target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include <stdarg.h>
  20 #include <stdlib.h>
  21 #include <stdio.h>
  22 #include <string.h>
  23 #include <inttypes.h>
  24
  25 #include "cpu.h"
  26 #include "tcg-op.h"
  27 #include "qemu/log.h"
  28 #include "arm_ldst.h"
  29 #include "translate.h"
  30 #include "internals.h"
  31 #include "qemu/host-utils.h"
  32
  33 #include "exec/semihost.h"
  34 #include "exec/gen-icount.h"
  35
  36 #include "exec/helper-proto.h"
  37 #include "exec/helper-gen.h"
  38
  39 #include "trace-tcg.h"
  40
  41 static TCGv_i64 cpu_X[32];
  42 static TCGv_i64 cpu_pc;
  43
  44 /* Load/store exclusive handling */
  45 static TCGv_i64 cpu_exclusive_high;
  46
  47 static const char *regnames[] = {
  48     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52 };
  53
  54 enum a64_shift_type {
  55     A64_SHIFT_TYPE_LSL = 0,
  56     A64_SHIFT_TYPE_LSR = 1,
  57     A64_SHIFT_TYPE_ASR = 2,
  58     A64_SHIFT_TYPE_ROR = 3
  59 };
  60
  61 /* Table based decoder typedefs - used when the relevant bits for decode
  62  * are too awkwardly scattered across the instruction (eg SIMD).
  63  */
  64 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66 typedef struct AArch64DecodeTable {
  67     uint32_t pattern;
  68     uint32_t mask;
  69     AArch64DecodeFn *disas_fn;
  70 } AArch64DecodeTable;
  71
  72 /* Function prototype for gen_ functions for calling Neon helpers */
  73 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  85 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  86
  87 /* initialize TCG globals.  */
  88 void a64_translate_init(void)
  89 {
  90     int i;
  91
  92     cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
  93                                     offsetof(CPUARMState, pc),
  94                                     "pc");
  95     for (i = 0; i < 32; i++) {
  96         cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
  97                                           offsetof(CPUARMState, xregs[i]),
  98                                           regnames[i]);
  99     }
 100
 101     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
 102         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 103 }
 104
 105 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 106 {
 107     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 108      *  if EL1, access as if EL0; otherwise access at current EL
 109      */
 110     switch (s->mmu_idx) {
 111     case ARMMMUIdx_S12NSE1:
 112         return ARMMMUIdx_S12NSE0;
 113     case ARMMMUIdx_S1SE1:
 114         return ARMMMUIdx_S1SE0;
 115     case ARMMMUIdx_S2NS:
 116         g_assert_not_reached();
 117     default:
 118         return s->mmu_idx;
 119     }
 120 }
 121
 122 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 123                             fprintf_function cpu_fprintf, int flags)
 124 {
 125     ARMCPU *cpu = ARM_CPU(cs);
 126     CPUARMState *env = &cpu->env;
 127     uint32_t psr = pstate_read(env);
 128     int i;
 129     int el = arm_current_el(env);
 130     const char *ns_status;
 131
 132     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 133             env->pc, env->xregs[31]);
 134     for (i = 0; i < 31; i++) {
 135         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 136         if ((i % 4) == 3) {
 137             cpu_fprintf(f, "\n");
 138         } else {
 139             cpu_fprintf(f, " ");
 140         }
 141     }
 142
 143     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 144         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 145     } else {
 146         ns_status = "";
 147     }
 148
 149     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 150                 psr,
 151                 psr & PSTATE_N ? 'N' : '-',
 152                 psr & PSTATE_Z ? 'Z' : '-',
 153                 psr & PSTATE_C ? 'C' : '-',
 154                 psr & PSTATE_V ? 'V' : '-',
 155                 ns_status,
 156                 el,
 157                 psr & PSTATE_SP ? 'h' : 't');
 158
 159     if (flags & CPU_DUMP_FPU) {
 160         int numvfpregs = 32;
 161         for (i = 0; i < numvfpregs; i += 2) {
 162             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 163             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 164             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 165                         i, vhi, vlo);
 166             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 167             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 168             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 169                         i + 1, vhi, vlo);
 170         }
 171         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 172                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 173     }
 174 }
 175
 176 void gen_a64_set_pc_im(uint64_t val)
 177 {
 178     tcg_gen_movi_i64(cpu_pc, val);
 179 }
 180
 181 typedef struct DisasCompare64 {
 182     TCGCond cond;
 183     TCGv_i64 value;
 184 } DisasCompare64;
 185
 186 static void a64_test_cc(DisasCompare64 *c64, int cc)
 187 {
 188     DisasCompare c32;
 189
 190     arm_test_cc(&c32, cc);
 191
 192     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 193        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 194     c64->cond = c32.cond;
 195     c64->value = tcg_temp_new_i64();
 196     tcg_gen_ext_i32_i64(c64->value, c32.value);
 197
 198     arm_free_cc(&c32);
 199 }
 200
 201 static void a64_free_cc(DisasCompare64 *c64)
 202 {
 203     tcg_temp_free_i64(c64->value);
 204 }
 205
 206 static void gen_exception_internal(int excp)
 207 {
 208     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 209
 210     assert(excp_is_internal(excp));
 211     gen_helper_exception_internal(cpu_env, tcg_excp);
 212     tcg_temp_free_i32(tcg_excp);
 213 }
 214
 215 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 216 {
 217     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 218     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 219     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 220
 221     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 222                                        tcg_syn, tcg_el);
 223     tcg_temp_free_i32(tcg_el);
 224     tcg_temp_free_i32(tcg_syn);
 225     tcg_temp_free_i32(tcg_excp);
 226 }
 227
 228 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 229 {
 230     gen_a64_set_pc_im(s->pc - offset);
 231     gen_exception_internal(excp);
 232     s->is_jmp = DISAS_EXC;
 233 }
 234
 235 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 236                                uint32_t syndrome, uint32_t target_el)
 237 {
 238     gen_a64_set_pc_im(s->pc - offset);
 239     gen_exception(excp, syndrome, target_el);
 240     s->is_jmp = DISAS_EXC;
 241 }
 242
 243 static void gen_ss_advance(DisasContext *s)
 244 {
 245     /* If the singlestep state is Active-not-pending, advance to
 246      * Active-pending.
 247      */
 248     if (s->ss_active) {
 249         s->pstate_ss = 0;
 250         gen_helper_clear_pstate_ss(cpu_env);
 251     }
 252 }
 253
 254 static void gen_step_complete_exception(DisasContext *s)
 255 {
 256     /* We just completed step of an insn. Move from Active-not-pending
 257      * to Active-pending, and then also take the swstep exception.
 258      * This corresponds to making the (IMPDEF) choice to prioritize
 259      * swstep exceptions over asynchronous exceptions taken to an exception
 260      * level where debug is disabled. This choice has the advantage that
 261      * we do not need to maintain internal state corresponding to the
 262      * ISV/EX syndrome bits between completion of the step and generation
 263      * of the exception, and our syndrome information is always correct.
 264      */
 265     gen_ss_advance(s);
 266     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 267                   default_exception_el(s));
 268     s->is_jmp = DISAS_EXC;
 269 }
 270
 271 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 272 {
 273     /* No direct tb linking with singlestep (either QEMU's or the ARM
 274      * debug architecture kind) or deterministic io
 275      */
 276     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 277         return false;
 278     }
 279
 280     /* Only link tbs from inside the same guest page */
 281     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 282         return false;
 283     }
 284
 285     return true;
 286 }
 287
 288 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 289 {
 290     TranslationBlock *tb;
 291
 292     tb = s->tb;
 293     if (use_goto_tb(s, n, dest)) {
 294         tcg_gen_goto_tb(n);
 295         gen_a64_set_pc_im(dest);
 296         tcg_gen_exit_tb((intptr_t)tb + n);
 297         s->is_jmp = DISAS_TB_JUMP;
 298     } else {
 299         gen_a64_set_pc_im(dest);
 300         if (s->ss_active) {
 301             gen_step_complete_exception(s);
 302         } else if (s->singlestep_enabled) {
 303             gen_exception_internal(EXCP_DEBUG);
 304         } else {
 305             tcg_gen_exit_tb(0);
 306             s->is_jmp = DISAS_TB_JUMP;
 307         }
 308     }
 309 }
 310
 311 static void unallocated_encoding(DisasContext *s)
 312 {
 313     /* Unallocated and reserved encodings are uncategorized */
 314     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 315                        default_exception_el(s));
 316 }
 317
 318 #define unsupported_encoding(s, insn)                                    \
 319     do {                                                                 \
 320         qemu_log_mask(LOG_UNIMP,                                         \
 321                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 322                       "at pc=%016" PRIx64 "\n",                          \
 323                       __FILE__, __LINE__, insn, s->pc - 4);              \
 324         unallocated_encoding(s);                                         \
 325     } while (0);
 326
 327 static void init_tmp_a64_array(DisasContext *s)
 328 {
 329 #ifdef CONFIG_DEBUG_TCG
 330     int i;
 331     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 332         TCGV_UNUSED_I64(s->tmp_a64[i]);
 333     }
 334 #endif
 335     s->tmp_a64_count = 0;
 336 }
 337
 338 static void free_tmp_a64(DisasContext *s)
 339 {
 340     int i;
 341     for (i = 0; i < s->tmp_a64_count; i++) {
 342         tcg_temp_free_i64(s->tmp_a64[i]);
 343     }
 344     init_tmp_a64_array(s);
 345 }
 346
 347 static TCGv_i64 new_tmp_a64(DisasContext *s)
 348 {
 349     assert(s->tmp_a64_count < TMP_A64_MAX);
 350     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 351 }
 352
 353 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 354 {
 355     TCGv_i64 t = new_tmp_a64(s);
 356     tcg_gen_movi_i64(t, 0);
 357     return t;
 358 }
 359
 360 /*
 361  * Register access functions
 362  *
 363  * These functions are used for directly accessing a register in where
 364  * changes to the final register value are likely to be made. If you
 365  * need to use a register for temporary calculation (e.g. index type
 366  * operations) use the read_* form.
 367  *
 368  * B1.2.1 Register mappings
 369  *
 370  * In instruction register encoding 31 can refer to ZR (zero register) or
 371  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 372  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 373  * This is the point of the _sp forms.
 374  */
 375 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 376 {
 377     if (reg == 31) {
 378         return new_tmp_a64_zero(s);
 379     } else {
 380         return cpu_X[reg];
 381     }
 382 }
 383
 384 /* register access for when 31 == SP */
 385 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 386 {
 387     return cpu_X[reg];
 388 }
 389
 390 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 391  * representing the register contents. This TCGv is an auto-freed
 392  * temporary so it need not be explicitly freed, and may be modified.
 393  */
 394 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 395 {
 396     TCGv_i64 v = new_tmp_a64(s);
 397     if (reg != 31) {
 398         if (sf) {
 399             tcg_gen_mov_i64(v, cpu_X[reg]);
 400         } else {
 401             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 402         }
 403     } else {
 404         tcg_gen_movi_i64(v, 0);
 405     }
 406     return v;
 407 }
 408
 409 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 410 {
 411     TCGv_i64 v = new_tmp_a64(s);
 412     if (sf) {
 413         tcg_gen_mov_i64(v, cpu_X[reg]);
 414     } else {
 415         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 416     }
 417     return v;
 418 }
 419
 420 /* We should have at some point before trying to access an FP register
 421  * done the necessary access check, so assert that
 422  * (a) we did the check and
 423  * (b) we didn't then just plough ahead anyway if it failed.
 424  * Print the instruction pattern in the abort message so we can figure
 425  * out what we need to fix if a user encounters this problem in the wild.
 426  */
 427 static inline void assert_fp_access_checked(DisasContext *s)
 428 {
 429 #ifdef CONFIG_DEBUG_TCG
 430     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 431         fprintf(stderr, "target-arm: FP access check missing for "
 432                 "instruction 0x%08x\n", s->insn);
 433         abort();
 434     }
 435 #endif
 436 }
 437
 438 /* Return the offset into CPUARMState of an element of specified
 439  * size, 'element' places in from the least significant end of
 440  * the FP/vector register Qn.
 441  */
 442 static inline int vec_reg_offset(DisasContext *s, int regno,
 443                                  int element, TCGMemOp size)
 444 {
 445     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 446 #ifdef HOST_WORDS_BIGENDIAN
 447     /* This is complicated slightly because vfp.regs[2n] is
 448      * still the low half and  vfp.regs[2n+1] the high half
 449      * of the 128 bit vector, even on big endian systems.
 450      * Calculate the offset assuming a fully bigendian 128 bits,
 451      * then XOR to account for the order of the two 64 bit halves.
 452      */
 453     offs += (16 - ((element + 1) * (1 << size)));
 454     offs ^= 8;
 455 #else
 456     offs += element * (1 << size);
 457 #endif
 458     assert_fp_access_checked(s);
 459     return offs;
 460 }
 461
 462 /* Return the offset into CPUARMState of a slice (from
 463  * the least significant end) of FP register Qn (ie
 464  * Dn, Sn, Hn or Bn).
 465  * (Note that this is not the same mapping as for A32; see cpu.h)
 466  */
 467 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 468 {
 469     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 470 #ifdef HOST_WORDS_BIGENDIAN
 471     offs += (8 - (1 << size));
 472 #endif
 473     assert_fp_access_checked(s);
 474     return offs;
 475 }
 476
 477 /* Offset of the high half of the 128 bit vector Qn */
 478 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 479 {
 480     assert_fp_access_checked(s);
 481     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 482 }
 483
 484 /* Convenience accessors for reading and writing single and double
 485  * FP registers. Writing clears the upper parts of the associated
 486  * 128 bit vector register, as required by the architecture.
 487  * Note that unlike the GP register accessors, the values returned
 488  * by the read functions must be manually freed.
 489  */
 490 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 491 {
 492     TCGv_i64 v = tcg_temp_new_i64();
 493
 494     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 495     return v;
 496 }
 497
 498 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 499 {
 500     TCGv_i32 v = tcg_temp_new_i32();
 501
 502     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 503     return v;
 504 }
 505
 506 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 507 {
 508     TCGv_i64 tcg_zero = tcg_const_i64(0);
 509
 510     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 511     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 512     tcg_temp_free_i64(tcg_zero);
 513 }
 514
 515 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 516 {
 517     TCGv_i64 tmp = tcg_temp_new_i64();
 518
 519     tcg_gen_extu_i32_i64(tmp, v);
 520     write_fp_dreg(s, reg, tmp);
 521     tcg_temp_free_i64(tmp);
 522 }
 523
 524 static TCGv_ptr get_fpstatus_ptr(void)
 525 {
 526     TCGv_ptr statusptr = tcg_temp_new_ptr();
 527     int offset;
 528
 529     /* In A64 all instructions (both FP and Neon) use the FPCR;
 530      * there is no equivalent of the A32 Neon "standard FPSCR value"
 531      * and all operations use vfp.fp_status.
 532      */
 533     offset = offsetof(CPUARMState, vfp.fp_status);
 534     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 535     return statusptr;
 536 }
 537
 538 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 539  * than the 32 bit equivalent.
 540  */
 541 static inline void gen_set_NZ64(TCGv_i64 result)
 542 {
 543     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 544     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 545 }
 546
 547 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 548 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 549 {
 550     if (sf) {
 551         gen_set_NZ64(result);
 552     } else {
 553         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 554         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 555     }
 556     tcg_gen_movi_i32(cpu_CF, 0);
 557     tcg_gen_movi_i32(cpu_VF, 0);
 558 }
 559
 560 /* dest = T0 + T1; compute C, N, V and Z flags */
 561 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 562 {
 563     if (sf) {
 564         TCGv_i64 result, flag, tmp;
 565         result = tcg_temp_new_i64();
 566         flag = tcg_temp_new_i64();
 567         tmp = tcg_temp_new_i64();
 568
 569         tcg_gen_movi_i64(tmp, 0);
 570         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 571
 572         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 573
 574         gen_set_NZ64(result);
 575
 576         tcg_gen_xor_i64(flag, result, t0);
 577         tcg_gen_xor_i64(tmp, t0, t1);
 578         tcg_gen_andc_i64(flag, flag, tmp);
 579         tcg_temp_free_i64(tmp);
 580         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 581
 582         tcg_gen_mov_i64(dest, result);
 583         tcg_temp_free_i64(result);
 584         tcg_temp_free_i64(flag);
 585     } else {
 586         /* 32 bit arithmetic */
 587         TCGv_i32 t0_32 = tcg_temp_new_i32();
 588         TCGv_i32 t1_32 = tcg_temp_new_i32();
 589         TCGv_i32 tmp = tcg_temp_new_i32();
 590
 591         tcg_gen_movi_i32(tmp, 0);
 592         tcg_gen_extrl_i64_i32(t0_32, t0);
 593         tcg_gen_extrl_i64_i32(t1_32, t1);
 594         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 595         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 596         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 597         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 598         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 599         tcg_gen_extu_i32_i64(dest, cpu_NF);
 600
 601         tcg_temp_free_i32(tmp);
 602         tcg_temp_free_i32(t0_32);
 603         tcg_temp_free_i32(t1_32);
 604     }
 605 }
 606
 607 /* dest = T0 - T1; compute C, N, V and Z flags */
 608 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 609 {
 610     if (sf) {
 611         /* 64 bit arithmetic */
 612         TCGv_i64 result, flag, tmp;
 613
 614         result = tcg_temp_new_i64();
 615         flag = tcg_temp_new_i64();
 616         tcg_gen_sub_i64(result, t0, t1);
 617
 618         gen_set_NZ64(result);
 619
 620         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 621         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 622
 623         tcg_gen_xor_i64(flag, result, t0);
 624         tmp = tcg_temp_new_i64();
 625         tcg_gen_xor_i64(tmp, t0, t1);
 626         tcg_gen_and_i64(flag, flag, tmp);
 627         tcg_temp_free_i64(tmp);
 628         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 629         tcg_gen_mov_i64(dest, result);
 630         tcg_temp_free_i64(flag);
 631         tcg_temp_free_i64(result);
 632     } else {
 633         /* 32 bit arithmetic */
 634         TCGv_i32 t0_32 = tcg_temp_new_i32();
 635         TCGv_i32 t1_32 = tcg_temp_new_i32();
 636         TCGv_i32 tmp;
 637
 638         tcg_gen_extrl_i64_i32(t0_32, t0);
 639         tcg_gen_extrl_i64_i32(t1_32, t1);
 640         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 641         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 642         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 643         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 644         tmp = tcg_temp_new_i32();
 645         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 646         tcg_temp_free_i32(t0_32);
 647         tcg_temp_free_i32(t1_32);
 648         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 649         tcg_temp_free_i32(tmp);
 650         tcg_gen_extu_i32_i64(dest, cpu_NF);
 651     }
 652 }
 653
 654 /* dest = T0 + T1 + CF; do not compute flags. */
 655 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 656 {
 657     TCGv_i64 flag = tcg_temp_new_i64();
 658     tcg_gen_extu_i32_i64(flag, cpu_CF);
 659     tcg_gen_add_i64(dest, t0, t1);
 660     tcg_gen_add_i64(dest, dest, flag);
 661     tcg_temp_free_i64(flag);
 662
 663     if (!sf) {
 664         tcg_gen_ext32u_i64(dest, dest);
 665     }
 666 }
 667
 668 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 669 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 670 {
 671     if (sf) {
 672         TCGv_i64 result, cf_64, vf_64, tmp;
 673         result = tcg_temp_new_i64();
 674         cf_64 = tcg_temp_new_i64();
 675         vf_64 = tcg_temp_new_i64();
 676         tmp = tcg_const_i64(0);
 677
 678         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 679         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 680         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 681         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 682         gen_set_NZ64(result);
 683
 684         tcg_gen_xor_i64(vf_64, result, t0);
 685         tcg_gen_xor_i64(tmp, t0, t1);
 686         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 687         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 688
 689         tcg_gen_mov_i64(dest, result);
 690
 691         tcg_temp_free_i64(tmp);
 692         tcg_temp_free_i64(vf_64);
 693         tcg_temp_free_i64(cf_64);
 694         tcg_temp_free_i64(result);
 695     } else {
 696         TCGv_i32 t0_32, t1_32, tmp;
 697         t0_32 = tcg_temp_new_i32();
 698         t1_32 = tcg_temp_new_i32();
 699         tmp = tcg_const_i32(0);
 700
 701         tcg_gen_extrl_i64_i32(t0_32, t0);
 702         tcg_gen_extrl_i64_i32(t1_32, t1);
 703         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 704         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 705
 706         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 707         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 708         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 709         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 710         tcg_gen_extu_i32_i64(dest, cpu_NF);
 711
 712         tcg_temp_free_i32(tmp);
 713         tcg_temp_free_i32(t1_32);
 714         tcg_temp_free_i32(t0_32);
 715     }
 716 }
 717
 718 /*
 719  * Load/Store generators
 720  */
 721
 722 /*
 723  * Store from GPR register to memory.
 724  */
 725 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 726                              TCGv_i64 tcg_addr, int size, int memidx)
 727 {
 728     g_assert(size <= 3);
 729     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
 730 }
 731
 732 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 733                       TCGv_i64 tcg_addr, int size)
 734 {
 735     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 736 }
 737
 738 /*
 739  * Load from memory to GPR register
 740  */
 741 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 742                              int size, bool is_signed, bool extend, int memidx)
 743 {
 744     TCGMemOp memop = MO_TE + size;
 745
 746     g_assert(size <= 3);
 747
 748     if (is_signed) {
 749         memop += MO_SIGN;
 750     }
 751
 752     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 753
 754     if (extend && is_signed) {
 755         g_assert(size < 3);
 756         tcg_gen_ext32u_i64(dest, dest);
 757     }
 758 }
 759
 760 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 761                       int size, bool is_signed, bool extend)
 762 {
 763     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 764                      get_mem_index(s));
 765 }
 766
 767 /*
 768  * Store from FP register to memory
 769  */
 770 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 771 {
 772     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 773     TCGv_i64 tmp = tcg_temp_new_i64();
 774     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 775     if (size < 4) {
 776         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
 777     } else {
 778         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 779         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
 780         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 781         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 782         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 783         tcg_temp_free_i64(tcg_hiaddr);
 784     }
 785
 786     tcg_temp_free_i64(tmp);
 787 }
 788
 789 /*
 790  * Load from memory to FP register
 791  */
 792 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 793 {
 794     /* This always zero-extends and writes to a full 128 bit wide vector */
 795     TCGv_i64 tmplo = tcg_temp_new_i64();
 796     TCGv_i64 tmphi;
 797
 798     if (size < 4) {
 799         TCGMemOp memop = MO_TE + size;
 800         tmphi = tcg_const_i64(0);
 801         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 802     } else {
 803         TCGv_i64 tcg_hiaddr;
 804         tmphi = tcg_temp_new_i64();
 805         tcg_hiaddr = tcg_temp_new_i64();
 806
 807         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
 808         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 809         tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 810         tcg_temp_free_i64(tcg_hiaddr);
 811     }
 812
 813     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 814     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 815
 816     tcg_temp_free_i64(tmplo);
 817     tcg_temp_free_i64(tmphi);
 818 }
 819
 820 /*
 821  * Vector load/store helpers.
 822  *
 823  * The principal difference between this and a FP load is that we don't
 824  * zero extend as we are filling a partial chunk of the vector register.
 825  * These functions don't support 128 bit loads/stores, which would be
 826  * normal load/store operations.
 827  *
 828  * The _i32 versions are useful when operating on 32 bit quantities
 829  * (eg for floating point single or using Neon helper functions).
 830  */
 831
 832 /* Get value of an element within a vector register */
 833 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 834                              int element, TCGMemOp memop)
 835 {
 836     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 837     switch (memop) {
 838     case MO_8:
 839         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 840         break;
 841     case MO_16:
 842         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 843         break;
 844     case MO_32:
 845         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 846         break;
 847     case MO_8|MO_SIGN:
 848         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 849         break;
 850     case MO_16|MO_SIGN:
 851         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 852         break;
 853     case MO_32|MO_SIGN:
 854         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 855         break;
 856     case MO_64:
 857     case MO_64|MO_SIGN:
 858         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 859         break;
 860     default:
 861         g_assert_not_reached();
 862     }
 863 }
 864
 865 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 866                                  int element, TCGMemOp memop)
 867 {
 868     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 869     switch (memop) {
 870     case MO_8:
 871         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 872         break;
 873     case MO_16:
 874         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 875         break;
 876     case MO_8|MO_SIGN:
 877         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 878         break;
 879     case MO_16|MO_SIGN:
 880         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 881         break;
 882     case MO_32:
 883     case MO_32|MO_SIGN:
 884         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 885         break;
 886     default:
 887         g_assert_not_reached();
 888     }
 889 }
 890
 891 /* Set value of an element within a vector register */
 892 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 893                               int element, TCGMemOp memop)
 894 {
 895     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 896     switch (memop) {
 897     case MO_8:
 898         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 899         break;
 900     case MO_16:
 901         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 902         break;
 903     case MO_32:
 904         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 905         break;
 906     case MO_64:
 907         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 908         break;
 909     default:
 910         g_assert_not_reached();
 911     }
 912 }
 913
 914 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 915                                   int destidx, int element, TCGMemOp memop)
 916 {
 917     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 918     switch (memop) {
 919     case MO_8:
 920         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 921         break;
 922     case MO_16:
 923         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 924         break;
 925     case MO_32:
 926         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 927         break;
 928     default:
 929         g_assert_not_reached();
 930     }
 931 }
 932
 933 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 934  * vector ops all need to do this).
 935  */
 936 static void clear_vec_high(DisasContext *s, int rd)
 937 {
 938     TCGv_i64 tcg_zero = tcg_const_i64(0);
 939
 940     write_vec_element(s, tcg_zero, rd, 1, MO_64);
 941     tcg_temp_free_i64(tcg_zero);
 942 }
 943
 944 /* Store from vector register to memory */
 945 static void do_vec_st(DisasContext *s, int srcidx, int element,
 946                       TCGv_i64 tcg_addr, int size)
 947 {
 948     TCGMemOp memop = MO_TE + size;
 949     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 950
 951     read_vec_element(s, tcg_tmp, srcidx, element, size);
 952     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 953
 954     tcg_temp_free_i64(tcg_tmp);
 955 }
 956
 957 /* Load from memory to vector register */
 958 static void do_vec_ld(DisasContext *s, int destidx, int element,
 959                       TCGv_i64 tcg_addr, int size)
 960 {
 961     TCGMemOp memop = MO_TE + size;
 962     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 963
 964     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 965     write_vec_element(s, tcg_tmp, destidx, element, size);
 966
 967     tcg_temp_free_i64(tcg_tmp);
 968 }
 969
 970 /* Check that FP/Neon access is enabled. If it is, return
 971  * true. If not, emit code to generate an appropriate exception,
 972  * and return false; the caller should not emit any code for
 973  * the instruction. Note that this check must happen after all
 974  * unallocated-encoding checks (otherwise the syndrome information
 975  * for the resulting exception will be incorrect).
 976  */
 977 static inline bool fp_access_check(DisasContext *s)
 978 {
 979     assert(!s->fp_access_checked);
 980     s->fp_access_checked = true;
 981
 982     if (!s->fp_excp_el) {
 983         return true;
 984     }
 985
 986     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
 987                        s->fp_excp_el);
 988     return false;
 989 }
 990
 991 /*
 992  * This utility function is for doing register extension with an
 993  * optional shift. You will likely want to pass a temporary for the
 994  * destination register. See DecodeRegExtend() in the ARM ARM.
 995  */
 996 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
 997                               int option, unsigned int shift)
 998 {
 999     int extsize = extract32(option, 0, 2);
1000     bool is_signed = extract32(option, 2, 1);
1001
1002     if (is_signed) {
1003         switch (extsize) {
1004         case 0:
1005             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1006             break;
1007         case 1:
1008             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1009             break;
1010         case 2:
1011             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1012             break;
1013         case 3:
1014             tcg_gen_mov_i64(tcg_out, tcg_in);
1015             break;
1016         }
1017     } else {
1018         switch (extsize) {
1019         case 0:
1020             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1021             break;
1022         case 1:
1023             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1024             break;
1025         case 2:
1026             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1027             break;
1028         case 3:
1029             tcg_gen_mov_i64(tcg_out, tcg_in);
1030             break;
1031         }
1032     }
1033
1034     if (shift) {
1035         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1036     }
1037 }
1038
1039 static inline void gen_check_sp_alignment(DisasContext *s)
1040 {
1041     /* The AArch64 architecture mandates that (if enabled via PSTATE
1042      * or SCTLR bits) there is a check that SP is 16-aligned on every
1043      * SP-relative load or store (with an exception generated if it is not).
1044      * In line with general QEMU practice regarding misaligned accesses,
1045      * we omit these checks for the sake of guest program performance.
1046      * This function is provided as a hook so we can more easily add these
1047      * checks in future (possibly as a "favour catching guest program bugs
1048      * over speed" user selectable option).
1049      */
1050 }
1051
1052 /*
1053  * This provides a simple table based table lookup decoder. It is
1054  * intended to be used when the relevant bits for decode are too
1055  * awkwardly placed and switch/if based logic would be confusing and
1056  * deeply nested. Since it's a linear search through the table, tables
1057  * should be kept small.
1058  *
1059  * It returns the first handler where insn & mask == pattern, or
1060  * NULL if there is no match.
1061  * The table is terminated by an empty mask (i.e. 0)
1062  */
1063 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1064                                                uint32_t insn)
1065 {
1066     const AArch64DecodeTable *tptr = table;
1067
1068     while (tptr->mask) {
1069         if ((insn & tptr->mask) == tptr->pattern) {
1070             return tptr->disas_fn;
1071         }
1072         tptr++;
1073     }
1074     return NULL;
1075 }
1076
1077 /*
1078  * the instruction disassembly implemented here matches
1079  * the instruction encoding classifications in chapter 3 (C3)
1080  * of the ARM Architecture Reference Manual (DDI0487A_a)
1081  */
1082
1083 /* C3.2.7 Unconditional branch (immediate)
1084  *   31  30       26 25                                  0
1085  * +----+-----------+-------------------------------------+
1086  * | op | 0 0 1 0 1 |                 imm26               |
1087  * +----+-----------+-------------------------------------+
1088  */
1089 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1090 {
1091     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1092
1093     if (insn & (1U << 31)) {
1094         /* C5.6.26 BL Branch with link */
1095         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1096     }
1097
1098     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1099     gen_goto_tb(s, 0, addr);
1100 }
1101
1102 /* C3.2.1 Compare & branch (immediate)
1103  *   31  30         25  24  23                  5 4      0
1104  * +----+-------------+----+---------------------+--------+
1105  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1106  * +----+-------------+----+---------------------+--------+
1107  */
1108 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1109 {
1110     unsigned int sf, op, rt;
1111     uint64_t addr;
1112     TCGLabel *label_match;
1113     TCGv_i64 tcg_cmp;
1114
1115     sf = extract32(insn, 31, 1);
1116     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1117     rt = extract32(insn, 0, 5);
1118     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1119
1120     tcg_cmp = read_cpu_reg(s, rt, sf);
1121     label_match = gen_new_label();
1122
1123     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1124                         tcg_cmp, 0, label_match);
1125
1126     gen_goto_tb(s, 0, s->pc);
1127     gen_set_label(label_match);
1128     gen_goto_tb(s, 1, addr);
1129 }
1130
1131 /* C3.2.5 Test & branch (immediate)
1132  *   31  30         25  24  23   19 18          5 4    0
1133  * +----+-------------+----+-------+-------------+------+
1134  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1135  * +----+-------------+----+-------+-------------+------+
1136  */
1137 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1138 {
1139     unsigned int bit_pos, op, rt;
1140     uint64_t addr;
1141     TCGLabel *label_match;
1142     TCGv_i64 tcg_cmp;
1143
1144     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1145     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1146     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1147     rt = extract32(insn, 0, 5);
1148
1149     tcg_cmp = tcg_temp_new_i64();
1150     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1151     label_match = gen_new_label();
1152     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1153                         tcg_cmp, 0, label_match);
1154     tcg_temp_free_i64(tcg_cmp);
1155     gen_goto_tb(s, 0, s->pc);
1156     gen_set_label(label_match);
1157     gen_goto_tb(s, 1, addr);
1158 }
1159
1160 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1161  *  31           25  24  23                  5   4  3    0
1162  * +---------------+----+---------------------+----+------+
1163  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1164  * +---------------+----+---------------------+----+------+
1165  */
1166 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1167 {
1168     unsigned int cond;
1169     uint64_t addr;
1170
1171     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1172         unallocated_encoding(s);
1173         return;
1174     }
1175     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1176     cond = extract32(insn, 0, 4);
1177
1178     if (cond < 0x0e) {
1179         /* genuinely conditional branches */
1180         TCGLabel *label_match = gen_new_label();
1181         arm_gen_test_cc(cond, label_match);
1182         gen_goto_tb(s, 0, s->pc);
1183         gen_set_label(label_match);
1184         gen_goto_tb(s, 1, addr);
1185     } else {
1186         /* 0xe and 0xf are both "always" conditions */
1187         gen_goto_tb(s, 0, addr);
1188     }
1189 }
1190
1191 /* C5.6.68 HINT */
1192 static void handle_hint(DisasContext *s, uint32_t insn,
1193                         unsigned int op1, unsigned int op2, unsigned int crm)
1194 {
1195     unsigned int selector = crm << 3 | op2;
1196
1197     if (op1 != 3) {
1198         unallocated_encoding(s);
1199         return;
1200     }
1201
1202     switch (selector) {
1203     case 0: /* NOP */
1204         return;
1205     case 3: /* WFI */
1206         s->is_jmp = DISAS_WFI;
1207         return;
1208     case 1: /* YIELD */
1209         s->is_jmp = DISAS_YIELD;
1210         return;
1211     case 2: /* WFE */
1212         s->is_jmp = DISAS_WFE;
1213         return;
1214     case 4: /* SEV */
1215     case 5: /* SEVL */
1216         /* we treat all as NOP at least for now */
1217         return;
1218     default:
1219         /* default specified as NOP equivalent */
1220         return;
1221     }
1222 }
1223
1224 static void gen_clrex(DisasContext *s, uint32_t insn)
1225 {
1226     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1227 }
1228
1229 /* CLREX, DSB, DMB, ISB */
1230 static void handle_sync(DisasContext *s, uint32_t insn,
1231                         unsigned int op1, unsigned int op2, unsigned int crm)
1232 {
1233     if (op1 != 3) {
1234         unallocated_encoding(s);
1235         return;
1236     }
1237
1238     switch (op2) {
1239     case 2: /* CLREX */
1240         gen_clrex(s, insn);
1241         return;
1242     case 4: /* DSB */
1243     case 5: /* DMB */
1244         /* We don't emulate caches so barriers are no-ops */
1245         return;
1246     case 6: /* ISB */
1247         /* We need to break the TB after this insn to execute
1248          * a self-modified code correctly and also to take
1249          * any pending interrupts immediately.
1250          */
1251         s->is_jmp = DISAS_UPDATE;
1252         return;
1253     default:
1254         unallocated_encoding(s);
1255         return;
1256     }
1257 }
1258
1259 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1260 static void handle_msr_i(DisasContext *s, uint32_t insn,
1261                          unsigned int op1, unsigned int op2, unsigned int crm)
1262 {
1263     int op = op1 << 3 | op2;
1264     switch (op) {
1265     case 0x05: /* SPSel */
1266         if (s->current_el == 0) {
1267             unallocated_encoding(s);
1268             return;
1269         }
1270         /* fall through */
1271     case 0x1e: /* DAIFSet */
1272     case 0x1f: /* DAIFClear */
1273     {
1274         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1275         TCGv_i32 tcg_op = tcg_const_i32(op);
1276         gen_a64_set_pc_im(s->pc - 4);
1277         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1278         tcg_temp_free_i32(tcg_imm);
1279         tcg_temp_free_i32(tcg_op);
1280         s->is_jmp = DISAS_UPDATE;
1281         break;
1282     }
1283     default:
1284         unallocated_encoding(s);
1285         return;
1286     }
1287 }
1288
1289 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1290 {
1291     TCGv_i32 tmp = tcg_temp_new_i32();
1292     TCGv_i32 nzcv = tcg_temp_new_i32();
1293
1294     /* build bit 31, N */
1295     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1296     /* build bit 30, Z */
1297     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1298     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1299     /* build bit 29, C */
1300     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1301     /* build bit 28, V */
1302     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1303     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1304     /* generate result */
1305     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1306
1307     tcg_temp_free_i32(nzcv);
1308     tcg_temp_free_i32(tmp);
1309 }
1310
1311 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1312
1313 {
1314     TCGv_i32 nzcv = tcg_temp_new_i32();
1315
1316     /* take NZCV from R[t] */
1317     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1318
1319     /* bit 31, N */
1320     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1321     /* bit 30, Z */
1322     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1323     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1324     /* bit 29, C */
1325     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1326     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1327     /* bit 28, V */
1328     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1329     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1330     tcg_temp_free_i32(nzcv);
1331 }
1332
1333 /* C5.6.129 MRS - move from system register
1334  * C5.6.131 MSR (register) - move to system register
1335  * C5.6.204 SYS
1336  * C5.6.205 SYSL
1337  * These are all essentially the same insn in 'read' and 'write'
1338  * versions, with varying op0 fields.
1339  */
1340 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1341                        unsigned int op0, unsigned int op1, unsigned int op2,
1342                        unsigned int crn, unsigned int crm, unsigned int rt)
1343 {
1344     const ARMCPRegInfo *ri;
1345     TCGv_i64 tcg_rt;
1346
1347     ri = get_arm_cp_reginfo(s->cp_regs,
1348                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1349                                                crn, crm, op0, op1, op2));
1350
1351     if (!ri) {
1352         /* Unknown register; this might be a guest error or a QEMU
1353          * unimplemented feature.
1354          */
1355         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1356                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1357                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1358         unallocated_encoding(s);
1359         return;
1360     }
1361
1362     /* Check access permissions */
1363     if (!cp_access_ok(s->current_el, ri, isread)) {
1364         unallocated_encoding(s);
1365         return;
1366     }
1367
1368     if (ri->accessfn) {
1369         /* Emit code to perform further access permissions checks at
1370          * runtime; this may result in an exception.
1371          */
1372         TCGv_ptr tmpptr;
1373         TCGv_i32 tcg_syn;
1374         uint32_t syndrome;
1375
1376         gen_a64_set_pc_im(s->pc - 4);
1377         tmpptr = tcg_const_ptr(ri);
1378         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1379         tcg_syn = tcg_const_i32(syndrome);
1380         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
1381         tcg_temp_free_ptr(tmpptr);
1382         tcg_temp_free_i32(tcg_syn);
1383     }
1384
1385     /* Handle special cases first */
1386     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1387     case ARM_CP_NOP:
1388         return;
1389     case ARM_CP_NZCV:
1390         tcg_rt = cpu_reg(s, rt);
1391         if (isread) {
1392             gen_get_nzcv(tcg_rt);
1393         } else {
1394             gen_set_nzcv(tcg_rt);
1395         }
1396         return;
1397     case ARM_CP_CURRENTEL:
1398         /* Reads as current EL value from pstate, which is
1399          * guaranteed to be constant by the tb flags.
1400          */
1401         tcg_rt = cpu_reg(s, rt);
1402         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1403         return;
1404     case ARM_CP_DC_ZVA:
1405         /* Writes clear the aligned block of memory which rt points into. */
1406         tcg_rt = cpu_reg(s, rt);
1407         gen_helper_dc_zva(cpu_env, tcg_rt);
1408         return;
1409     default:
1410         break;
1411     }
1412
1413     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1414         gen_io_start();
1415     }
1416
1417     tcg_rt = cpu_reg(s, rt);
1418
1419     if (isread) {
1420         if (ri->type & ARM_CP_CONST) {
1421             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1422         } else if (ri->readfn) {
1423             TCGv_ptr tmpptr;
1424             tmpptr = tcg_const_ptr(ri);
1425             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1426             tcg_temp_free_ptr(tmpptr);
1427         } else {
1428             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1429         }
1430     } else {
1431         if (ri->type & ARM_CP_CONST) {
1432             /* If not forbidden by access permissions, treat as WI */
1433             return;
1434         } else if (ri->writefn) {
1435             TCGv_ptr tmpptr;
1436             tmpptr = tcg_const_ptr(ri);
1437             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1438             tcg_temp_free_ptr(tmpptr);
1439         } else {
1440             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1441         }
1442     }
1443
1444     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1445         /* I/O operations must end the TB here (whether read or write) */
1446         gen_io_end();
1447         s->is_jmp = DISAS_UPDATE;
1448     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1449         /* We default to ending the TB on a coprocessor register write,
1450          * but allow this to be suppressed by the register definition
1451          * (usually only necessary to work around guest bugs).
1452          */
1453         s->is_jmp = DISAS_UPDATE;
1454     }
1455 }
1456
1457 /* C3.2.4 System
1458  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1459  * +---------------------+---+-----+-----+-------+-------+-----+------+
1460  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1461  * +---------------------+---+-----+-----+-------+-------+-----+------+
1462  */
1463 static void disas_system(DisasContext *s, uint32_t insn)
1464 {
1465     unsigned int l, op0, op1, crn, crm, op2, rt;
1466     l = extract32(insn, 21, 1);
1467     op0 = extract32(insn, 19, 2);
1468     op1 = extract32(insn, 16, 3);
1469     crn = extract32(insn, 12, 4);
1470     crm = extract32(insn, 8, 4);
1471     op2 = extract32(insn, 5, 3);
1472     rt = extract32(insn, 0, 5);
1473
1474     if (op0 == 0) {
1475         if (l || rt != 31) {
1476             unallocated_encoding(s);
1477             return;
1478         }
1479         switch (crn) {
1480         case 2: /* C5.6.68 HINT */
1481             handle_hint(s, insn, op1, op2, crm);
1482             break;
1483         case 3: /* CLREX, DSB, DMB, ISB */
1484             handle_sync(s, insn, op1, op2, crm);
1485             break;
1486         case 4: /* C5.6.130 MSR (immediate) */
1487             handle_msr_i(s, insn, op1, op2, crm);
1488             break;
1489         default:
1490             unallocated_encoding(s);
1491             break;
1492         }
1493         return;
1494     }
1495     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1496 }
1497
1498 /* C3.2.3 Exception generation
1499  *
1500  *  31             24 23 21 20                     5 4   2 1  0
1501  * +-----------------+-----+------------------------+-----+----+
1502  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1503  * +-----------------------+------------------------+----------+
1504  */
1505 static void disas_exc(DisasContext *s, uint32_t insn)
1506 {
1507     int opc = extract32(insn, 21, 3);
1508     int op2_ll = extract32(insn, 0, 5);
1509     int imm16 = extract32(insn, 5, 16);
1510     TCGv_i32 tmp;
1511
1512     switch (opc) {
1513     case 0:
1514         /* For SVC, HVC and SMC we advance the single-step state
1515          * machine before taking the exception. This is architecturally
1516          * mandated, to ensure that single-stepping a system call
1517          * instruction works properly.
1518          */
1519         switch (op2_ll) {
1520         case 1:
1521             gen_ss_advance(s);
1522             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1523                                default_exception_el(s));
1524             break;
1525         case 2:
1526             if (s->current_el == 0) {
1527                 unallocated_encoding(s);
1528                 break;
1529             }
1530             /* The pre HVC helper handles cases when HVC gets trapped
1531              * as an undefined insn by runtime configuration.
1532              */
1533             gen_a64_set_pc_im(s->pc - 4);
1534             gen_helper_pre_hvc(cpu_env);
1535             gen_ss_advance(s);
1536             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1537             break;
1538         case 3:
1539             if (s->current_el == 0) {
1540                 unallocated_encoding(s);
1541                 break;
1542             }
1543             gen_a64_set_pc_im(s->pc - 4);
1544             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1545             gen_helper_pre_smc(cpu_env, tmp);
1546             tcg_temp_free_i32(tmp);
1547             gen_ss_advance(s);
1548             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1549             break;
1550         default:
1551             unallocated_encoding(s);
1552             break;
1553         }
1554         break;
1555     case 1:
1556         if (op2_ll != 0) {
1557             unallocated_encoding(s);
1558             break;
1559         }
1560         /* BRK */
1561         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1562                            default_exception_el(s));
1563         break;
1564     case 2:
1565         if (op2_ll != 0) {
1566             unallocated_encoding(s);
1567             break;
1568         }
1569         /* HLT. This has two purposes.
1570          * Architecturally, it is an external halting debug instruction.
1571          * Since QEMU doesn't implement external debug, we treat this as
1572          * it is required for halting debug disabled: it will UNDEF.
1573          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1574          */
1575         if (semihosting_enabled() && imm16 == 0xf000) {
1576 #ifndef CONFIG_USER_ONLY
1577             /* In system mode, don't allow userspace access to semihosting,
1578              * to provide some semblance of security (and for consistency
1579              * with our 32-bit semihosting).
1580              */
1581             if (s->current_el == 0) {
1582                 unsupported_encoding(s, insn);
1583                 break;
1584             }
1585 #endif
1586             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1587         } else {
1588             unsupported_encoding(s, insn);
1589         }
1590         break;
1591     case 5:
1592         if (op2_ll < 1 || op2_ll > 3) {
1593             unallocated_encoding(s);
1594             break;
1595         }
1596         /* DCPS1, DCPS2, DCPS3 */
1597         unsupported_encoding(s, insn);
1598         break;
1599     default:
1600         unallocated_encoding(s);
1601         break;
1602     }
1603 }
1604
1605 /* C3.2.7 Unconditional branch (register)
1606  *  31           25 24   21 20   16 15   10 9    5 4     0
1607  * +---------------+-------+-------+-------+------+-------+
1608  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1609  * +---------------+-------+-------+-------+------+-------+
1610  */
1611 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1612 {
1613     unsigned int opc, op2, op3, rn, op4;
1614
1615     opc = extract32(insn, 21, 4);
1616     op2 = extract32(insn, 16, 5);
1617     op3 = extract32(insn, 10, 6);
1618     rn = extract32(insn, 5, 5);
1619     op4 = extract32(insn, 0, 5);
1620
1621     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1622         unallocated_encoding(s);
1623         return;
1624     }
1625
1626     switch (opc) {
1627     case 0: /* BR */
1628     case 2: /* RET */
1629         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1630         break;
1631     case 1: /* BLR */
1632         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1633         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1634         break;
1635     case 4: /* ERET */
1636         if (s->current_el == 0) {
1637             unallocated_encoding(s);
1638             return;
1639         }
1640         gen_helper_exception_return(cpu_env);
1641         s->is_jmp = DISAS_JUMP;
1642         return;
1643     case 5: /* DRPS */
1644         if (rn != 0x1f) {
1645             unallocated_encoding(s);
1646         } else {
1647             unsupported_encoding(s, insn);
1648         }
1649         return;
1650     default:
1651         unallocated_encoding(s);
1652         return;
1653     }
1654
1655     s->is_jmp = DISAS_JUMP;
1656 }
1657
1658 /* C3.2 Branches, exception generating and system instructions */
1659 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1660 {
1661     switch (extract32(insn, 25, 7)) {
1662     case 0x0a: case 0x0b:
1663     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1664         disas_uncond_b_imm(s, insn);
1665         break;
1666     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1667         disas_comp_b_imm(s, insn);
1668         break;
1669     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1670         disas_test_b_imm(s, insn);
1671         break;
1672     case 0x2a: /* Conditional branch (immediate) */
1673         disas_cond_b_imm(s, insn);
1674         break;
1675     case 0x6a: /* Exception generation / System */
1676         if (insn & (1 << 24)) {
1677             disas_system(s, insn);
1678         } else {
1679             disas_exc(s, insn);
1680         }
1681         break;
1682     case 0x6b: /* Unconditional branch (register) */
1683         disas_uncond_b_reg(s, insn);
1684         break;
1685     default:
1686         unallocated_encoding(s);
1687         break;
1688     }
1689 }
1690
1691 /*
1692  * Load/Store exclusive instructions are implemented by remembering
1693  * the value/address loaded, and seeing if these are the same
1694  * when the store is performed. This is not actually the architecturally
1695  * mandated semantics, but it works for typical guest code sequences
1696  * and avoids having to monitor regular stores.
1697  *
1698  * In system emulation mode only one CPU will be running at once, so
1699  * this sequence is effectively atomic.  In user emulation mode we
1700  * throw an exception and handle the atomic operation elsewhere.
1701  */
1702 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1703                                TCGv_i64 addr, int size, bool is_pair)
1704 {
1705     TCGv_i64 tmp = tcg_temp_new_i64();
1706     TCGMemOp memop = MO_TE + size;
1707
1708     g_assert(size <= 3);
1709     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1710
1711     if (is_pair) {
1712         TCGv_i64 addr2 = tcg_temp_new_i64();
1713         TCGv_i64 hitmp = tcg_temp_new_i64();
1714
1715         g_assert(size >= 2);
1716         tcg_gen_addi_i64(addr2, addr, 1 << size);
1717         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1718         tcg_temp_free_i64(addr2);
1719         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1720         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1721         tcg_temp_free_i64(hitmp);
1722     }
1723
1724     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1725     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1726
1727     tcg_temp_free_i64(tmp);
1728     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1729 }
1730
1731 #ifdef CONFIG_USER_ONLY
1732 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1733                                 TCGv_i64 addr, int size, int is_pair)
1734 {
1735     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1736     tcg_gen_movi_i32(cpu_exclusive_info,
1737                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1738     gen_exception_internal_insn(s, 4, EXCP_STREX);
1739 }
1740 #else
1741 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1742                                 TCGv_i64 inaddr, int size, int is_pair)
1743 {
1744     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1745      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1746      *     [addr] = {Rt};
1747      *     if (is_pair) {
1748      *         [addr + datasize] = {Rt2};
1749      *     }
1750      *     {Rd} = 0;
1751      * } else {
1752      *     {Rd} = 1;
1753      * }
1754      * env->exclusive_addr = -1;
1755      */
1756     TCGLabel *fail_label = gen_new_label();
1757     TCGLabel *done_label = gen_new_label();
1758     TCGv_i64 addr = tcg_temp_local_new_i64();
1759     TCGv_i64 tmp;
1760
1761     /* Copy input into a local temp so it is not trashed when the
1762      * basic block ends at the branch insn.
1763      */
1764     tcg_gen_mov_i64(addr, inaddr);
1765     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1766
1767     tmp = tcg_temp_new_i64();
1768     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1769     tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1770     tcg_temp_free_i64(tmp);
1771
1772     if (is_pair) {
1773         TCGv_i64 addrhi = tcg_temp_new_i64();
1774         TCGv_i64 tmphi = tcg_temp_new_i64();
1775
1776         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1777         tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1778         tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1779
1780         tcg_temp_free_i64(tmphi);
1781         tcg_temp_free_i64(addrhi);
1782     }
1783
1784     /* We seem to still have the exclusive monitor, so do the store */
1785     tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1786     if (is_pair) {
1787         TCGv_i64 addrhi = tcg_temp_new_i64();
1788
1789         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1790         tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1791                             get_mem_index(s), MO_TE + size);
1792         tcg_temp_free_i64(addrhi);
1793     }
1794
1795     tcg_temp_free_i64(addr);
1796
1797     tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1798     tcg_gen_br(done_label);
1799     gen_set_label(fail_label);
1800     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1801     gen_set_label(done_label);
1802     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1803
1804 }
1805 #endif
1806
1807 /* C3.3.6 Load/store exclusive
1808  *
1809  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1810  * +-----+-------------+----+---+----+------+----+-------+------+------+
1811  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1812  * +-----+-------------+----+---+----+------+----+-------+------+------+
1813  *
1814  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1815  *   L: 0 -> store, 1 -> load
1816  *  o2: 0 -> exclusive, 1 -> not
1817  *  o1: 0 -> single register, 1 -> register pair
1818  *  o0: 1 -> load-acquire/store-release, 0 -> not
1819  *
1820  *  o0 == 0 AND o2 == 1 is un-allocated
1821  *  o1 == 1 is un-allocated except for 32 and 64 bit sizes
1822  */
1823 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1824 {
1825     int rt = extract32(insn, 0, 5);
1826     int rn = extract32(insn, 5, 5);
1827     int rt2 = extract32(insn, 10, 5);
1828     int is_lasr = extract32(insn, 15, 1);
1829     int rs = extract32(insn, 16, 5);
1830     int is_pair = extract32(insn, 21, 1);
1831     int is_store = !extract32(insn, 22, 1);
1832     int is_excl = !extract32(insn, 23, 1);
1833     int size = extract32(insn, 30, 2);
1834     TCGv_i64 tcg_addr;
1835
1836     if ((!is_excl && !is_lasr) ||
1837         (is_pair && size < 2)) {
1838         unallocated_encoding(s);
1839         return;
1840     }
1841
1842     if (rn == 31) {
1843         gen_check_sp_alignment(s);
1844     }
1845     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1846
1847     /* Note that since TCG is single threaded load-acquire/store-release
1848      * semantics require no extra if (is_lasr) { ... } handling.
1849      */
1850
1851     if (is_excl) {
1852         if (!is_store) {
1853             s->is_ldex = true;
1854             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1855         } else {
1856             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1857         }
1858     } else {
1859         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1860         if (is_store) {
1861             do_gpr_st(s, tcg_rt, tcg_addr, size);
1862         } else {
1863             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1864         }
1865         if (is_pair) {
1866             TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1867             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1868             if (is_store) {
1869                 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1870             } else {
1871                 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1872             }
1873         }
1874     }
1875 }
1876
1877 /*
1878  * C3.3.5 Load register (literal)
1879  *
1880  *  31 30 29   27  26 25 24 23                5 4     0
1881  * +-----+-------+---+-----+-------------------+-------+
1882  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1883  * +-----+-------+---+-----+-------------------+-------+
1884  *
1885  * V: 1 -> vector (simd/fp)
1886  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1887  *                   10-> 32 bit signed, 11 -> prefetch
1888  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1889  */
1890 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1891 {
1892     int rt = extract32(insn, 0, 5);
1893     int64_t imm = sextract32(insn, 5, 19) << 2;
1894     bool is_vector = extract32(insn, 26, 1);
1895     int opc = extract32(insn, 30, 2);
1896     bool is_signed = false;
1897     int size = 2;
1898     TCGv_i64 tcg_rt, tcg_addr;
1899
1900     if (is_vector) {
1901         if (opc == 3) {
1902             unallocated_encoding(s);
1903             return;
1904         }
1905         size = 2 + opc;
1906         if (!fp_access_check(s)) {
1907             return;
1908         }
1909     } else {
1910         if (opc == 3) {
1911             /* PRFM (literal) : prefetch */
1912             return;
1913         }
1914         size = 2 + extract32(opc, 0, 1);
1915         is_signed = extract32(opc, 1, 1);
1916     }
1917
1918     tcg_rt = cpu_reg(s, rt);
1919
1920     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1921     if (is_vector) {
1922         do_fp_ld(s, rt, tcg_addr, size);
1923     } else {
1924         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1925     }
1926     tcg_temp_free_i64(tcg_addr);
1927 }
1928
1929 /*
1930  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1931  * C5.6.81 LDP (Load Pair - non vector)
1932  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1933  * C5.6.176 STNP (Store Pair - non-temporal hint)
1934  * C5.6.177 STP (Store Pair - non vector)
1935  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1936  * C6.3.165 LDP (Load Pair of SIMD&FP)
1937  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1938  * C6.3.284 STP (Store Pair of SIMD&FP)
1939  *
1940  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1941  * +-----+-------+---+---+-------+---+-----------------------------+
1942  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1943  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1944  *
1945  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1946  *      LDPSW                    01
1947  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1948  *   V: 0 -> GPR, 1 -> Vector
1949  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1950  *      10 -> signed offset, 11 -> pre-index
1951  *   L: 0 -> Store 1 -> Load
1952  *
1953  * Rt, Rt2 = GPR or SIMD registers to be stored
1954  * Rn = general purpose register containing address
1955  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1956  */
1957 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1958 {
1959     int rt = extract32(insn, 0, 5);
1960     int rn = extract32(insn, 5, 5);
1961     int rt2 = extract32(insn, 10, 5);
1962     uint64_t offset = sextract64(insn, 15, 7);
1963     int index = extract32(insn, 23, 2);
1964     bool is_vector = extract32(insn, 26, 1);
1965     bool is_load = extract32(insn, 22, 1);
1966     int opc = extract32(insn, 30, 2);
1967
1968     bool is_signed = false;
1969     bool postindex = false;
1970     bool wback = false;
1971
1972     TCGv_i64 tcg_addr; /* calculated address */
1973     int size;
1974
1975     if (opc == 3) {
1976         unallocated_encoding(s);
1977         return;
1978     }
1979
1980     if (is_vector) {
1981         size = 2 + opc;
1982     } else {
1983         size = 2 + extract32(opc, 1, 1);
1984         is_signed = extract32(opc, 0, 1);
1985         if (!is_load && is_signed) {
1986             unallocated_encoding(s);
1987             return;
1988         }
1989     }
1990
1991     switch (index) {
1992     case 1: /* post-index */
1993         postindex = true;
1994         wback = true;
1995         break;
1996     case 0:
1997         /* signed offset with "non-temporal" hint. Since we don't emulate
1998          * caches we don't care about hints to the cache system about
1999          * data access patterns, and handle this identically to plain
2000          * signed offset.
2001          */
2002         if (is_signed) {
2003             /* There is no non-temporal-hint version of LDPSW */
2004             unallocated_encoding(s);
2005             return;
2006         }
2007         postindex = false;
2008         break;
2009     case 2: /* signed offset, rn not updated */
2010         postindex = false;
2011         break;
2012     case 3: /* pre-index */
2013         postindex = false;
2014         wback = true;
2015         break;
2016     }
2017
2018     if (is_vector && !fp_access_check(s)) {
2019         return;
2020     }
2021
2022     offset <<= size;
2023
2024     if (rn == 31) {
2025         gen_check_sp_alignment(s);
2026     }
2027
2028     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2029
2030     if (!postindex) {
2031         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2032     }
2033
2034     if (is_vector) {
2035         if (is_load) {
2036             do_fp_ld(s, rt, tcg_addr, size);
2037         } else {
2038             do_fp_st(s, rt, tcg_addr, size);
2039         }
2040     } else {
2041         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2042         if (is_load) {
2043             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2044         } else {
2045             do_gpr_st(s, tcg_rt, tcg_addr, size);
2046         }
2047     }
2048     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2049     if (is_vector) {
2050         if (is_load) {
2051             do_fp_ld(s, rt2, tcg_addr, size);
2052         } else {
2053             do_fp_st(s, rt2, tcg_addr, size);
2054         }
2055     } else {
2056         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2057         if (is_load) {
2058             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2059         } else {
2060             do_gpr_st(s, tcg_rt2, tcg_addr, size);
2061         }
2062     }
2063
2064     if (wback) {
2065         if (postindex) {
2066             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2067         } else {
2068             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2069         }
2070         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2071     }
2072 }
2073
2074 /*
2075  * C3.3.8 Load/store (immediate post-indexed)
2076  * C3.3.9 Load/store (immediate pre-indexed)
2077  * C3.3.12 Load/store (unscaled immediate)
2078  *
2079  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2080  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2081  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2082  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2083  *
2084  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2085          10 -> unprivileged
2086  * V = 0 -> non-vector
2087  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2088  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2089  */
2090 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2091 {
2092     int rt = extract32(insn, 0, 5);
2093     int rn = extract32(insn, 5, 5);
2094     int imm9 = sextract32(insn, 12, 9);
2095     int opc = extract32(insn, 22, 2);
2096     int size = extract32(insn, 30, 2);
2097     int idx = extract32(insn, 10, 2);
2098     bool is_signed = false;
2099     bool is_store = false;
2100     bool is_extended = false;
2101     bool is_unpriv = (idx == 2);
2102     bool is_vector = extract32(insn, 26, 1);
2103     bool post_index;
2104     bool writeback;
2105
2106     TCGv_i64 tcg_addr;
2107
2108     if (is_vector) {
2109         size |= (opc & 2) << 1;
2110         if (size > 4 || is_unpriv) {
2111             unallocated_encoding(s);
2112             return;
2113         }
2114         is_store = ((opc & 1) == 0);
2115         if (!fp_access_check(s)) {
2116             return;
2117         }
2118     } else {
2119         if (size == 3 && opc == 2) {
2120             /* PRFM - prefetch */
2121             if (is_unpriv) {
2122                 unallocated_encoding(s);
2123                 return;
2124             }
2125             return;
2126         }
2127         if (opc == 3 && size > 1) {
2128             unallocated_encoding(s);
2129             return;
2130         }
2131         is_store = (opc == 0);
2132         is_signed = opc & (1<<1);
2133         is_extended = (size < 3) && (opc & 1);
2134     }
2135
2136     switch (idx) {
2137     case 0:
2138     case 2:
2139         post_index = false;
2140         writeback = false;
2141         break;
2142     case 1:
2143         post_index = true;
2144         writeback = true;
2145         break;
2146     case 3:
2147         post_index = false;
2148         writeback = true;
2149         break;
2150     }
2151
2152     if (rn == 31) {
2153         gen_check_sp_alignment(s);
2154     }
2155     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2156
2157     if (!post_index) {
2158         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2159     }
2160
2161     if (is_vector) {
2162         if (is_store) {
2163             do_fp_st(s, rt, tcg_addr, size);
2164         } else {
2165             do_fp_ld(s, rt, tcg_addr, size);
2166         }
2167     } else {
2168         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2169         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2170
2171         if (is_store) {
2172             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2173         } else {
2174             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2175                              is_signed, is_extended, memidx);
2176         }
2177     }
2178
2179     if (writeback) {
2180         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2181         if (post_index) {
2182             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2183         }
2184         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2185     }
2186 }
2187
2188 /*
2189  * C3.3.10 Load/store (register offset)
2190  *
2191  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2192  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2193  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2194  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2195  *
2196  * For non-vector:
2197  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2198  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2199  * For vector:
2200  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2201  *   opc<0>: 0 -> store, 1 -> load
2202  * V: 1 -> vector/simd
2203  * opt: extend encoding (see DecodeRegExtend)
2204  * S: if S=1 then scale (essentially index by sizeof(size))
2205  * Rt: register to transfer into/out of
2206  * Rn: address register or SP for base
2207  * Rm: offset register or ZR for offset
2208  */
2209 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2210 {
2211     int rt = extract32(insn, 0, 5);
2212     int rn = extract32(insn, 5, 5);
2213     int shift = extract32(insn, 12, 1);
2214     int rm = extract32(insn, 16, 5);
2215     int opc = extract32(insn, 22, 2);
2216     int opt = extract32(insn, 13, 3);
2217     int size = extract32(insn, 30, 2);
2218     bool is_signed = false;
2219     bool is_store = false;
2220     bool is_extended = false;
2221     bool is_vector = extract32(insn, 26, 1);
2222
2223     TCGv_i64 tcg_rm;
2224     TCGv_i64 tcg_addr;
2225
2226     if (extract32(opt, 1, 1) == 0) {
2227         unallocated_encoding(s);
2228         return;
2229     }
2230
2231     if (is_vector) {
2232         size |= (opc & 2) << 1;
2233         if (size > 4) {
2234             unallocated_encoding(s);
2235             return;
2236         }
2237         is_store = !extract32(opc, 0, 1);
2238         if (!fp_access_check(s)) {
2239             return;
2240         }
2241     } else {
2242         if (size == 3 && opc == 2) {
2243             /* PRFM - prefetch */
2244             return;
2245         }
2246         if (opc == 3 && size > 1) {
2247             unallocated_encoding(s);
2248             return;
2249         }
2250         is_store = (opc == 0);
2251         is_signed = extract32(opc, 1, 1);
2252         is_extended = (size < 3) && extract32(opc, 0, 1);
2253     }
2254
2255     if (rn == 31) {
2256         gen_check_sp_alignment(s);
2257     }
2258     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2259
2260     tcg_rm = read_cpu_reg(s, rm, 1);
2261     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2262
2263     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2264
2265     if (is_vector) {
2266         if (is_store) {
2267             do_fp_st(s, rt, tcg_addr, size);
2268         } else {
2269             do_fp_ld(s, rt, tcg_addr, size);
2270         }
2271     } else {
2272         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2273         if (is_store) {
2274             do_gpr_st(s, tcg_rt, tcg_addr, size);
2275         } else {
2276             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2277         }
2278     }
2279 }
2280
2281 /*
2282  * C3.3.13 Load/store (unsigned immediate)
2283  *
2284  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2285  * +----+-------+---+-----+-----+------------+-------+------+
2286  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2287  * +----+-------+---+-----+-----+------------+-------+------+
2288  *
2289  * For non-vector:
2290  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2291  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2292  * For vector:
2293  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2294  *   opc<0>: 0 -> store, 1 -> load
2295  * Rn: base address register (inc SP)
2296  * Rt: target register
2297  */
2298 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2299 {
2300     int rt = extract32(insn, 0, 5);
2301     int rn = extract32(insn, 5, 5);
2302     unsigned int imm12 = extract32(insn, 10, 12);
2303     bool is_vector = extract32(insn, 26, 1);
2304     int size = extract32(insn, 30, 2);
2305     int opc = extract32(insn, 22, 2);
2306     unsigned int offset;
2307
2308     TCGv_i64 tcg_addr;
2309
2310     bool is_store;
2311     bool is_signed = false;
2312     bool is_extended = false;
2313
2314     if (is_vector) {
2315         size |= (opc & 2) << 1;
2316         if (size > 4) {
2317             unallocated_encoding(s);
2318             return;
2319         }
2320         is_store = !extract32(opc, 0, 1);
2321         if (!fp_access_check(s)) {
2322             return;
2323         }
2324     } else {
2325         if (size == 3 && opc == 2) {
2326             /* PRFM - prefetch */
2327             return;
2328         }
2329         if (opc == 3 && size > 1) {
2330             unallocated_encoding(s);
2331             return;
2332         }
2333         is_store = (opc == 0);
2334         is_signed = extract32(opc, 1, 1);
2335         is_extended = (size < 3) && extract32(opc, 0, 1);
2336     }
2337
2338     if (rn == 31) {
2339         gen_check_sp_alignment(s);
2340     }
2341     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2342     offset = imm12 << size;
2343     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2344
2345     if (is_vector) {
2346         if (is_store) {
2347             do_fp_st(s, rt, tcg_addr, size);
2348         } else {
2349             do_fp_ld(s, rt, tcg_addr, size);
2350         }
2351     } else {
2352         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2353         if (is_store) {
2354             do_gpr_st(s, tcg_rt, tcg_addr, size);
2355         } else {
2356             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2357         }
2358     }
2359 }
2360
2361 /* Load/store register (all forms) */
2362 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2363 {
2364     switch (extract32(insn, 24, 2)) {
2365     case 0:
2366         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2367             disas_ldst_reg_roffset(s, insn);
2368         } else {
2369             /* Load/store register (unscaled immediate)
2370              * Load/store immediate pre/post-indexed
2371              * Load/store register unprivileged
2372              */
2373             disas_ldst_reg_imm9(s, insn);
2374         }
2375         break;
2376     case 1:
2377         disas_ldst_reg_unsigned_imm(s, insn);
2378         break;
2379     default:
2380         unallocated_encoding(s);
2381         break;
2382     }
2383 }
2384
2385 /* C3.3.1 AdvSIMD load/store multiple structures
2386  *
2387  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2388  * +---+---+---------------+---+-------------+--------+------+------+------+
2389  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2390  * +---+---+---------------+---+-------------+--------+------+------+------+
2391  *
2392  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2393  *
2394  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2395  * +---+---+---------------+---+---+---------+--------+------+------+------+
2396  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2397  * +---+---+---------------+---+---+---------+--------+------+------+------+
2398  *
2399  * Rt: first (or only) SIMD&FP register to be transferred
2400  * Rn: base address or SP
2401  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2402  */
2403 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2404 {
2405     int rt = extract32(insn, 0, 5);
2406     int rn = extract32(insn, 5, 5);
2407     int size = extract32(insn, 10, 2);
2408     int opcode = extract32(insn, 12, 4);
2409     bool is_store = !extract32(insn, 22, 1);
2410     bool is_postidx = extract32(insn, 23, 1);
2411     bool is_q = extract32(insn, 30, 1);
2412     TCGv_i64 tcg_addr, tcg_rn;
2413
2414     int ebytes = 1 << size;
2415     int elements = (is_q ? 128 : 64) / (8 << size);
2416     int rpt;    /* num iterations */
2417     int selem;  /* structure elements */
2418     int r;
2419
2420     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2421         unallocated_encoding(s);
2422         return;
2423     }
2424
2425     /* From the shared decode logic */
2426     switch (opcode) {
2427     case 0x0:
2428         rpt = 1;
2429         selem = 4;
2430         break;
2431     case 0x2:
2432         rpt = 4;
2433         selem = 1;
2434         break;
2435     case 0x4:
2436         rpt = 1;
2437         selem = 3;
2438         break;
2439     case 0x6:
2440         rpt = 3;
2441         selem = 1;
2442         break;
2443     case 0x7:
2444         rpt = 1;
2445         selem = 1;
2446         break;
2447     case 0x8:
2448         rpt = 1;
2449         selem = 2;
2450         break;
2451     case 0xa:
2452         rpt = 2;
2453         selem = 1;
2454         break;
2455     default:
2456         unallocated_encoding(s);
2457         return;
2458     }
2459
2460     if (size == 3 && !is_q && selem != 1) {
2461         /* reserved */
2462         unallocated_encoding(s);
2463         return;
2464     }
2465
2466     if (!fp_access_check(s)) {
2467         return;
2468     }
2469
2470     if (rn == 31) {
2471         gen_check_sp_alignment(s);
2472     }
2473
2474     tcg_rn = cpu_reg_sp(s, rn);
2475     tcg_addr = tcg_temp_new_i64();
2476     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2477
2478     for (r = 0; r < rpt; r++) {
2479         int e;
2480         for (e = 0; e < elements; e++) {
2481             int tt = (rt + r) % 32;
2482             int xs;
2483             for (xs = 0; xs < selem; xs++) {
2484                 if (is_store) {
2485                     do_vec_st(s, tt, e, tcg_addr, size);
2486                 } else {
2487                     do_vec_ld(s, tt, e, tcg_addr, size);
2488
2489                     /* For non-quad operations, setting a slice of the low
2490                      * 64 bits of the register clears the high 64 bits (in
2491                      * the ARM ARM pseudocode this is implicit in the fact
2492                      * that 'rval' is a 64 bit wide variable). We optimize
2493                      * by noticing that we only need to do this the first
2494                      * time we touch a register.
2495                      */
2496                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2497                         clear_vec_high(s, tt);
2498                     }
2499                 }
2500                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2501                 tt = (tt + 1) % 32;
2502             }
2503         }
2504     }
2505
2506     if (is_postidx) {
2507         int rm = extract32(insn, 16, 5);
2508         if (rm == 31) {
2509             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2510         } else {
2511             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2512         }
2513     }
2514     tcg_temp_free_i64(tcg_addr);
2515 }
2516
2517 /* C3.3.3 AdvSIMD load/store single structure
2518  *
2519  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2520  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2521  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2522  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2523  *
2524  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2525  *
2526  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2527  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2528  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2529  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2530  *
2531  * Rt: first (or only) SIMD&FP register to be transferred
2532  * Rn: base address or SP
2533  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2534  * index = encoded in Q:S:size dependent on size
2535  *
2536  * lane_size = encoded in R, opc
2537  * transfer width = encoded in opc, S, size
2538  */
2539 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2540 {
2541     int rt = extract32(insn, 0, 5);
2542     int rn = extract32(insn, 5, 5);
2543     int size = extract32(insn, 10, 2);
2544     int S = extract32(insn, 12, 1);
2545     int opc = extract32(insn, 13, 3);
2546     int R = extract32(insn, 21, 1);
2547     int is_load = extract32(insn, 22, 1);
2548     int is_postidx = extract32(insn, 23, 1);
2549     int is_q = extract32(insn, 30, 1);
2550
2551     int scale = extract32(opc, 1, 2);
2552     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2553     bool replicate = false;
2554     int index = is_q << 3 | S << 2 | size;
2555     int ebytes, xs;
2556     TCGv_i64 tcg_addr, tcg_rn;
2557
2558     switch (scale) {
2559     case 3:
2560         if (!is_load || S) {
2561             unallocated_encoding(s);
2562             return;
2563         }
2564         scale = size;
2565         replicate = true;
2566         break;
2567     case 0:
2568         break;
2569     case 1:
2570         if (extract32(size, 0, 1)) {
2571             unallocated_encoding(s);
2572             return;
2573         }
2574         index >>= 1;
2575         break;
2576     case 2:
2577         if (extract32(size, 1, 1)) {
2578             unallocated_encoding(s);
2579             return;
2580         }
2581         if (!extract32(size, 0, 1)) {
2582             index >>= 2;
2583         } else {
2584             if (S) {
2585                 unallocated_encoding(s);
2586                 return;
2587             }
2588             index >>= 3;
2589             scale = 3;
2590         }
2591         break;
2592     default:
2593         g_assert_not_reached();
2594     }
2595
2596     if (!fp_access_check(s)) {
2597         return;
2598     }
2599
2600     ebytes = 1 << scale;
2601
2602     if (rn == 31) {
2603         gen_check_sp_alignment(s);
2604     }
2605
2606     tcg_rn = cpu_reg_sp(s, rn);
2607     tcg_addr = tcg_temp_new_i64();
2608     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2609
2610     for (xs = 0; xs < selem; xs++) {
2611         if (replicate) {
2612             /* Load and replicate to all elements */
2613             uint64_t mulconst;
2614             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2615
2616             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2617                                 get_mem_index(s), MO_TE + scale);
2618             switch (scale) {
2619             case 0:
2620                 mulconst = 0x0101010101010101ULL;
2621                 break;
2622             case 1:
2623                 mulconst = 0x0001000100010001ULL;
2624                 break;
2625             case 2:
2626                 mulconst = 0x0000000100000001ULL;
2627                 break;
2628             case 3:
2629                 mulconst = 0;
2630                 break;
2631             default:
2632                 g_assert_not_reached();
2633             }
2634             if (mulconst) {
2635                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2636             }
2637             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2638             if (is_q) {
2639                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2640             } else {
2641                 clear_vec_high(s, rt);
2642             }
2643             tcg_temp_free_i64(tcg_tmp);
2644         } else {
2645             /* Load/store one element per register */
2646             if (is_load) {
2647                 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2648             } else {
2649                 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2650             }
2651         }
2652         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2653         rt = (rt + 1) % 32;
2654     }
2655
2656     if (is_postidx) {
2657         int rm = extract32(insn, 16, 5);
2658         if (rm == 31) {
2659             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2660         } else {
2661             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2662         }
2663     }
2664     tcg_temp_free_i64(tcg_addr);
2665 }
2666
2667 /* C3.3 Loads and stores */
2668 static void disas_ldst(DisasContext *s, uint32_t insn)
2669 {
2670     switch (extract32(insn, 24, 6)) {
2671     case 0x08: /* Load/store exclusive */
2672         disas_ldst_excl(s, insn);
2673         break;
2674     case 0x18: case 0x1c: /* Load register (literal) */
2675         disas_ld_lit(s, insn);
2676         break;
2677     case 0x28: case 0x29:
2678     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2679         disas_ldst_pair(s, insn);
2680         break;
2681     case 0x38: case 0x39:
2682     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2683         disas_ldst_reg(s, insn);
2684         break;
2685     case 0x0c: /* AdvSIMD load/store multiple structures */
2686         disas_ldst_multiple_struct(s, insn);
2687         break;
2688     case 0x0d: /* AdvSIMD load/store single structure */
2689         disas_ldst_single_struct(s, insn);
2690         break;
2691     default:
2692         unallocated_encoding(s);
2693         break;
2694     }
2695 }
2696
2697 /* C3.4.6 PC-rel. addressing
2698  *   31  30   29 28       24 23                5 4    0
2699  * +----+-------+-----------+-------------------+------+
2700  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2701  * +----+-------+-----------+-------------------+------+
2702  */
2703 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2704 {
2705     unsigned int page, rd;
2706     uint64_t base;
2707     uint64_t offset;
2708
2709     page = extract32(insn, 31, 1);
2710     /* SignExtend(immhi:immlo) -> offset */
2711     offset = sextract64(insn, 5, 19);
2712     offset = offset << 2 | extract32(insn, 29, 2);
2713     rd = extract32(insn, 0, 5);
2714     base = s->pc - 4;
2715
2716     if (page) {
2717         /* ADRP (page based) */
2718         base &= ~0xfff;
2719         offset <<= 12;
2720     }
2721
2722     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2723 }
2724
2725 /*
2726  * C3.4.1 Add/subtract (immediate)
2727  *
2728  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2729  * +--+--+--+-----------+-----+-------------+-----+-----+
2730  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2731  * +--+--+--+-----------+-----+-------------+-----+-----+
2732  *
2733  *    sf: 0 -> 32bit, 1 -> 64bit
2734  *    op: 0 -> add  , 1 -> sub
2735  *     S: 1 -> set flags
2736  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2737  */
2738 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2739 {
2740     int rd = extract32(insn, 0, 5);
2741     int rn = extract32(insn, 5, 5);
2742     uint64_t imm = extract32(insn, 10, 12);
2743     int shift = extract32(insn, 22, 2);
2744     bool setflags = extract32(insn, 29, 1);
2745     bool sub_op = extract32(insn, 30, 1);
2746     bool is_64bit = extract32(insn, 31, 1);
2747
2748     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2749     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2750     TCGv_i64 tcg_result;
2751
2752     switch (shift) {
2753     case 0x0:
2754         break;
2755     case 0x1:
2756         imm <<= 12;
2757         break;
2758     default:
2759         unallocated_encoding(s);
2760         return;
2761     }
2762
2763     tcg_result = tcg_temp_new_i64();
2764     if (!setflags) {
2765         if (sub_op) {
2766             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2767         } else {
2768             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2769         }
2770     } else {
2771         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2772         if (sub_op) {
2773             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2774         } else {
2775             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2776         }
2777         tcg_temp_free_i64(tcg_imm);
2778     }
2779
2780     if (is_64bit) {
2781         tcg_gen_mov_i64(tcg_rd, tcg_result);
2782     } else {
2783         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2784     }
2785
2786     tcg_temp_free_i64(tcg_result);
2787 }
2788
2789 /* The input should be a value in the bottom e bits (with higher
2790  * bits zero); returns that value replicated into every element
2791  * of size e in a 64 bit integer.
2792  */
2793 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2794 {
2795     assert(e != 0);
2796     while (e < 64) {
2797         mask |= mask << e;
2798         e *= 2;
2799     }
2800     return mask;
2801 }
2802
2803 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2804 static inline uint64_t bitmask64(unsigned int length)
2805 {
2806     assert(length > 0 && length <= 64);
2807     return ~0ULL >> (64 - length);
2808 }
2809
2810 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2811  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2812  * value (ie should cause a guest UNDEF exception), and true if they are
2813  * valid, in which case the decoded bit pattern is written to result.
2814  */
2815 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2816                                    unsigned int imms, unsigned int immr)
2817 {
2818     uint64_t mask;
2819     unsigned e, levels, s, r;
2820     int len;
2821
2822     assert(immn < 2 && imms < 64 && immr < 64);
2823
2824     /* The bit patterns we create here are 64 bit patterns which
2825      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2826      * 64 bits each. Each element contains the same value: a run
2827      * of between 1 and e-1 non-zero bits, rotated within the
2828      * element by between 0 and e-1 bits.
2829      *
2830      * The element size and run length are encoded into immn (1 bit)
2831      * and imms (6 bits) as follows:
2832      * 64 bit elements: immn = 1, imms = <length of run - 1>
2833      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2834      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2835      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2836      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2837      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2838      * Notice that immn = 0, imms = 11111x is the only combination
2839      * not covered by one of the above options; this is reserved.
2840      * Further, <length of run - 1> all-ones is a reserved pattern.
2841      *
2842      * In all cases the rotation is by immr % e (and immr is 6 bits).
2843      */
2844
2845     /* First determine the element size */
2846     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2847     if (len < 1) {
2848         /* This is the immn == 0, imms == 0x11111x case */
2849         return false;
2850     }
2851     e = 1 << len;
2852
2853     levels = e - 1;
2854     s = imms & levels;
2855     r = immr & levels;
2856
2857     if (s == levels) {
2858         /* <length of run - 1> mustn't be all-ones. */
2859         return false;
2860     }
2861
2862     /* Create the value of one element: s+1 set bits rotated
2863      * by r within the element (which is e bits wide)...
2864      */
2865     mask = bitmask64(s + 1);
2866     if (r) {
2867         mask = (mask >> r) | (mask << (e - r));
2868         mask &= bitmask64(e);
2869     }
2870     /* ...then replicate the element over the whole 64 bit value */
2871     mask = bitfield_replicate(mask, e);
2872     *result = mask;
2873     return true;
2874 }
2875
2876 /* C3.4.4 Logical (immediate)
2877  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2878  * +----+-----+-------------+---+------+------+------+------+
2879  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2880  * +----+-----+-------------+---+------+------+------+------+
2881  */
2882 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2883 {
2884     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2885     TCGv_i64 tcg_rd, tcg_rn;
2886     uint64_t wmask;
2887     bool is_and = false;
2888
2889     sf = extract32(insn, 31, 1);
2890     opc = extract32(insn, 29, 2);
2891     is_n = extract32(insn, 22, 1);
2892     immr = extract32(insn, 16, 6);
2893     imms = extract32(insn, 10, 6);
2894     rn = extract32(insn, 5, 5);
2895     rd = extract32(insn, 0, 5);
2896
2897     if (!sf && is_n) {
2898         unallocated_encoding(s);
2899         return;
2900     }
2901
2902     if (opc == 0x3) { /* ANDS */
2903         tcg_rd = cpu_reg(s, rd);
2904     } else {
2905         tcg_rd = cpu_reg_sp(s, rd);
2906     }
2907     tcg_rn = cpu_reg(s, rn);
2908
2909     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2910         /* some immediate field values are reserved */
2911         unallocated_encoding(s);
2912         return;
2913     }
2914
2915     if (!sf) {
2916         wmask &= 0xffffffff;
2917     }
2918
2919     switch (opc) {
2920     case 0x3: /* ANDS */
2921     case 0x0: /* AND */
2922         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2923         is_and = true;
2924         break;
2925     case 0x1: /* ORR */
2926         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2927         break;
2928     case 0x2: /* EOR */
2929         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2930         break;
2931     default:
2932         assert(FALSE); /* must handle all above */
2933         break;
2934     }
2935
2936     if (!sf && !is_and) {
2937         /* zero extend final result; we know we can skip this for AND
2938          * since the immediate had the high 32 bits clear.
2939          */
2940         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2941     }
2942
2943     if (opc == 3) { /* ANDS */
2944         gen_logic_CC(sf, tcg_rd);
2945     }
2946 }
2947
2948 /*
2949  * C3.4.5 Move wide (immediate)
2950  *
2951  *  31 30 29 28         23 22 21 20             5 4    0
2952  * +--+-----+-------------+-----+----------------+------+
2953  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2954  * +--+-----+-------------+-----+----------------+------+
2955  *
2956  * sf: 0 -> 32 bit, 1 -> 64 bit
2957  * opc: 00 -> N, 10 -> Z, 11 -> K
2958  * hw: shift/16 (0,16, and sf only 32, 48)
2959  */
2960 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2961 {
2962     int rd = extract32(insn, 0, 5);
2963     uint64_t imm = extract32(insn, 5, 16);
2964     int sf = extract32(insn, 31, 1);
2965     int opc = extract32(insn, 29, 2);
2966     int pos = extract32(insn, 21, 2) << 4;
2967     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2968     TCGv_i64 tcg_imm;
2969
2970     if (!sf && (pos >= 32)) {
2971         unallocated_encoding(s);
2972         return;
2973     }
2974
2975     switch (opc) {
2976     case 0: /* MOVN */
2977     case 2: /* MOVZ */
2978         imm <<= pos;
2979         if (opc == 0) {
2980             imm = ~imm;
2981         }
2982         if (!sf) {
2983             imm &= 0xffffffffu;
2984         }
2985         tcg_gen_movi_i64(tcg_rd, imm);
2986         break;
2987     case 3: /* MOVK */
2988         tcg_imm = tcg_const_i64(imm);
2989         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2990         tcg_temp_free_i64(tcg_imm);
2991         if (!sf) {
2992             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2993         }
2994         break;
2995     default:
2996         unallocated_encoding(s);
2997         break;
2998     }
2999 }
3000
3001 /* C3.4.2 Bitfield
3002  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3003  * +----+-----+-------------+---+------+------+------+------+
3004  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3005  * +----+-----+-------------+---+------+------+------+------+
3006  */
3007 static void disas_bitfield(DisasContext *s, uint32_t insn)
3008 {
3009     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3010     TCGv_i64 tcg_rd, tcg_tmp;
3011
3012     sf = extract32(insn, 31, 1);
3013     opc = extract32(insn, 29, 2);
3014     n = extract32(insn, 22, 1);
3015     ri = extract32(insn, 16, 6);
3016     si = extract32(insn, 10, 6);
3017     rn = extract32(insn, 5, 5);
3018     rd = extract32(insn, 0, 5);
3019     bitsize = sf ? 64 : 32;
3020
3021     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3022         unallocated_encoding(s);
3023         return;
3024     }
3025
3026     tcg_rd = cpu_reg(s, rd);
3027
3028     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3029        to be smaller than bitsize, we'll never reference data outside the
3030        low 32-bits anyway.  */
3031     tcg_tmp = read_cpu_reg(s, rn, 1);
3032
3033     /* Recognize the common aliases.  */
3034     if (opc == 0) { /* SBFM */
3035         if (ri == 0) {
3036             if (si == 7) { /* SXTB */
3037                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3038                 goto done;
3039             } else if (si == 15) { /* SXTH */
3040                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3041                 goto done;
3042             } else if (si == 31) { /* SXTW */
3043                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3044                 goto done;
3045             }
3046         }
3047         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3048             if (si == 31) {
3049                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3050             }
3051             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3052             goto done;
3053         }
3054     } else if (opc == 2) { /* UBFM */
3055         if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3056             tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3057             return;
3058         }
3059         if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3060             if (si == 31) {
3061                 tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3062             }
3063             tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3064             return;
3065         }
3066         if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3067             int shift = bitsize - 1 - si;
3068             tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3069             goto done;
3070         }
3071     }
3072
3073     if (opc != 1) { /* SBFM or UBFM */
3074         tcg_gen_movi_i64(tcg_rd, 0);
3075     }
3076
3077     /* do the bit move operation */
3078     if (si >= ri) {
3079         /* Wd<s-r:0> = Wn<s:r> */
3080         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3081         pos = 0;
3082         len = (si - ri) + 1;
3083     } else {
3084         /* Wd<32+s-r,32-r> = Wn<s:0> */
3085         pos = bitsize - ri;
3086         len = si + 1;
3087     }
3088
3089     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3090
3091     if (opc == 0) { /* SBFM - sign extend the destination field */
3092         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3093         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3094     }
3095
3096  done:
3097     if (!sf) { /* zero extend final result */
3098         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3099     }
3100 }
3101
3102 /* C3.4.3 Extract
3103  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3104  * +----+------+-------------+---+----+------+--------+------+------+
3105  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3106  * +----+------+-------------+---+----+------+--------+------+------+
3107  */
3108 static void disas_extract(DisasContext *s, uint32_t insn)
3109 {
3110     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3111
3112     sf = extract32(insn, 31, 1);
3113     n = extract32(insn, 22, 1);
3114     rm = extract32(insn, 16, 5);
3115     imm = extract32(insn, 10, 6);
3116     rn = extract32(insn, 5, 5);
3117     rd = extract32(insn, 0, 5);
3118     op21 = extract32(insn, 29, 2);
3119     op0 = extract32(insn, 21, 1);
3120     bitsize = sf ? 64 : 32;
3121
3122     if (sf != n || op21 || op0 || imm >= bitsize) {
3123         unallocated_encoding(s);
3124     } else {
3125         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3126
3127         tcg_rd = cpu_reg(s, rd);
3128
3129         if (unlikely(imm == 0)) {
3130             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3131              * so an extract from bit 0 is a special case.
3132              */
3133             if (sf) {
3134                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3135             } else {
3136                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3137             }
3138         } else if (rm == rn) { /* ROR */
3139             tcg_rm = cpu_reg(s, rm);
3140             if (sf) {
3141                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3142             } else {
3143                 TCGv_i32 tmp = tcg_temp_new_i32();
3144                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3145                 tcg_gen_rotri_i32(tmp, tmp, imm);
3146                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3147                 tcg_temp_free_i32(tmp);
3148             }
3149         } else {
3150             tcg_rm = read_cpu_reg(s, rm, sf);
3151             tcg_rn = read_cpu_reg(s, rn, sf);
3152             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3153             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3154             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3155             if (!sf) {
3156                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3157             }
3158         }
3159     }
3160 }
3161
3162 /* C3.4 Data processing - immediate */
3163 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3164 {
3165     switch (extract32(insn, 23, 6)) {
3166     case 0x20: case 0x21: /* PC-rel. addressing */
3167         disas_pc_rel_adr(s, insn);
3168         break;
3169     case 0x22: case 0x23: /* Add/subtract (immediate) */
3170         disas_add_sub_imm(s, insn);
3171         break;
3172     case 0x24: /* Logical (immediate) */
3173         disas_logic_imm(s, insn);
3174         break;
3175     case 0x25: /* Move wide (immediate) */
3176         disas_movw_imm(s, insn);
3177         break;
3178     case 0x26: /* Bitfield */
3179         disas_bitfield(s, insn);
3180         break;
3181     case 0x27: /* Extract */
3182         disas_extract(s, insn);
3183         break;
3184     default:
3185         unallocated_encoding(s);
3186         break;
3187     }
3188 }
3189
3190 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3191  * Note that it is the caller's responsibility to ensure that the
3192  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3193  * mandated semantics for out of range shifts.
3194  */
3195 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3196                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3197 {
3198     switch (shift_type) {
3199     case A64_SHIFT_TYPE_LSL:
3200         tcg_gen_shl_i64(dst, src, shift_amount);
3201         break;
3202     case A64_SHIFT_TYPE_LSR:
3203         tcg_gen_shr_i64(dst, src, shift_amount);
3204         break;
3205     case A64_SHIFT_TYPE_ASR:
3206         if (!sf) {
3207             tcg_gen_ext32s_i64(dst, src);
3208         }
3209         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3210         break;
3211     case A64_SHIFT_TYPE_ROR:
3212         if (sf) {
3213             tcg_gen_rotr_i64(dst, src, shift_amount);
3214         } else {
3215             TCGv_i32 t0, t1;
3216             t0 = tcg_temp_new_i32();
3217             t1 = tcg_temp_new_i32();
3218             tcg_gen_extrl_i64_i32(t0, src);
3219             tcg_gen_extrl_i64_i32(t1, shift_amount);
3220             tcg_gen_rotr_i32(t0, t0, t1);
3221             tcg_gen_extu_i32_i64(dst, t0);
3222             tcg_temp_free_i32(t0);
3223             tcg_temp_free_i32(t1);
3224         }
3225         break;
3226     default:
3227         assert(FALSE); /* all shift types should be handled */
3228         break;
3229     }
3230
3231     if (!sf) { /* zero extend final result */
3232         tcg_gen_ext32u_i64(dst, dst);
3233     }
3234 }
3235
3236 /* Shift a TCGv src by immediate, put result in dst.
3237  * The shift amount must be in range (this should always be true as the
3238  * relevant instructions will UNDEF on bad shift immediates).
3239  */
3240 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3241                           enum a64_shift_type shift_type, unsigned int shift_i)
3242 {
3243     assert(shift_i < (sf ? 64 : 32));
3244
3245     if (shift_i == 0) {
3246         tcg_gen_mov_i64(dst, src);
3247     } else {
3248         TCGv_i64 shift_const;
3249
3250         shift_const = tcg_const_i64(shift_i);
3251         shift_reg(dst, src, sf, shift_type, shift_const);
3252         tcg_temp_free_i64(shift_const);
3253     }
3254 }
3255
3256 /* C3.5.10 Logical (shifted register)
3257  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3258  * +----+-----+-----------+-------+---+------+--------+------+------+
3259  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3260  * +----+-----+-----------+-------+---+------+--------+------+------+
3261  */
3262 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3263 {
3264     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3265     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3266
3267     sf = extract32(insn, 31, 1);
3268     opc = extract32(insn, 29, 2);
3269     shift_type = extract32(insn, 22, 2);
3270     invert = extract32(insn, 21, 1);
3271     rm = extract32(insn, 16, 5);
3272     shift_amount = extract32(insn, 10, 6);
3273     rn = extract32(insn, 5, 5);
3274     rd = extract32(insn, 0, 5);
3275
3276     if (!sf && (shift_amount & (1 << 5))) {
3277         unallocated_encoding(s);
3278         return;
3279     }
3280
3281     tcg_rd = cpu_reg(s, rd);
3282
3283     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3284         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3285          * register-register MOV and MVN, so it is worth special casing.
3286          */
3287         tcg_rm = cpu_reg(s, rm);
3288         if (invert) {
3289             tcg_gen_not_i64(tcg_rd, tcg_rm);
3290             if (!sf) {
3291                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3292             }
3293         } else {
3294             if (sf) {
3295                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3296             } else {
3297                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3298             }
3299         }
3300         return;
3301     }
3302
3303     tcg_rm = read_cpu_reg(s, rm, sf);
3304
3305     if (shift_amount) {
3306         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3307     }
3308
3309     tcg_rn = cpu_reg(s, rn);
3310
3311     switch (opc | (invert << 2)) {
3312     case 0: /* AND */
3313     case 3: /* ANDS */
3314         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3315         break;
3316     case 1: /* ORR */
3317         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3318         break;
3319     case 2: /* EOR */
3320         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3321         break;
3322     case 4: /* BIC */
3323     case 7: /* BICS */
3324         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3325         break;
3326     case 5: /* ORN */
3327         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3328         break;
3329     case 6: /* EON */
3330         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3331         break;
3332     default:
3333         assert(FALSE);
3334         break;
3335     }
3336
3337     if (!sf) {
3338         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3339     }
3340
3341     if (opc == 3) {
3342         gen_logic_CC(sf, tcg_rd);
3343     }
3344 }
3345
3346 /*
3347  * C3.5.1 Add/subtract (extended register)
3348  *
3349  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3350  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3351  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3352  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3353  *
3354  *  sf: 0 -> 32bit, 1 -> 64bit
3355  *  op: 0 -> add  , 1 -> sub
3356  *   S: 1 -> set flags
3357  * opt: 00
3358  * option: extension type (see DecodeRegExtend)
3359  * imm3: optional shift to Rm
3360  *
3361  * Rd = Rn + LSL(extend(Rm), amount)
3362  */
3363 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3364 {
3365     int rd = extract32(insn, 0, 5);
3366     int rn = extract32(insn, 5, 5);
3367     int imm3 = extract32(insn, 10, 3);
3368     int option = extract32(insn, 13, 3);
3369     int rm = extract32(insn, 16, 5);
3370     bool setflags = extract32(insn, 29, 1);
3371     bool sub_op = extract32(insn, 30, 1);
3372     bool sf = extract32(insn, 31, 1);
3373
3374     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3375     TCGv_i64 tcg_rd;
3376     TCGv_i64 tcg_result;
3377
3378     if (imm3 > 4) {
3379         unallocated_encoding(s);
3380         return;
3381     }
3382
3383     /* non-flag setting ops may use SP */
3384     if (!setflags) {
3385         tcg_rd = cpu_reg_sp(s, rd);
3386     } else {
3387         tcg_rd = cpu_reg(s, rd);
3388     }
3389     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3390
3391     tcg_rm = read_cpu_reg(s, rm, sf);
3392     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3393
3394     tcg_result = tcg_temp_new_i64();
3395
3396     if (!setflags) {
3397         if (sub_op) {
3398             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3399         } else {
3400             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3401         }
3402     } else {
3403         if (sub_op) {
3404             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3405         } else {
3406             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3407         }
3408     }
3409
3410     if (sf) {
3411         tcg_gen_mov_i64(tcg_rd, tcg_result);
3412     } else {
3413         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3414     }
3415
3416     tcg_temp_free_i64(tcg_result);
3417 }
3418
3419 /*
3420  * C3.5.2 Add/subtract (shifted register)
3421  *
3422  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3423  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3424  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3425  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3426  *
3427  *    sf: 0 -> 32bit, 1 -> 64bit
3428  *    op: 0 -> add  , 1 -> sub
3429  *     S: 1 -> set flags
3430  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3431  *  imm6: Shift amount to apply to Rm before the add/sub
3432  */
3433 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3434 {
3435     int rd = extract32(insn, 0, 5);
3436     int rn = extract32(insn, 5, 5);
3437     int imm6 = extract32(insn, 10, 6);
3438     int rm = extract32(insn, 16, 5);
3439     int shift_type = extract32(insn, 22, 2);
3440     bool setflags = extract32(insn, 29, 1);
3441     bool sub_op = extract32(insn, 30, 1);
3442     bool sf = extract32(insn, 31, 1);
3443
3444     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3445     TCGv_i64 tcg_rn, tcg_rm;
3446     TCGv_i64 tcg_result;
3447
3448     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3449         unallocated_encoding(s);
3450         return;
3451     }
3452
3453     tcg_rn = read_cpu_reg(s, rn, sf);
3454     tcg_rm = read_cpu_reg(s, rm, sf);
3455
3456     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3457
3458     tcg_result = tcg_temp_new_i64();
3459
3460     if (!setflags) {
3461         if (sub_op) {
3462             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3463         } else {
3464             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3465         }
3466     } else {
3467         if (sub_op) {
3468             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3469         } else {
3470             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3471         }
3472     }
3473
3474     if (sf) {
3475         tcg_gen_mov_i64(tcg_rd, tcg_result);
3476     } else {
3477         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3478     }
3479
3480     tcg_temp_free_i64(tcg_result);
3481 }
3482
3483 /* C3.5.9 Data-processing (3 source)
3484
3485    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3486   +--+------+-----------+------+------+----+------+------+------+
3487   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3488   +--+------+-----------+------+------+----+------+------+------+
3489
3490  */
3491 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3492 {
3493     int rd = extract32(insn, 0, 5);
3494     int rn = extract32(insn, 5, 5);
3495     int ra = extract32(insn, 10, 5);
3496     int rm = extract32(insn, 16, 5);
3497     int op_id = (extract32(insn, 29, 3) << 4) |
3498         (extract32(insn, 21, 3) << 1) |
3499         extract32(insn, 15, 1);
3500     bool sf = extract32(insn, 31, 1);
3501     bool is_sub = extract32(op_id, 0, 1);
3502     bool is_high = extract32(op_id, 2, 1);
3503     bool is_signed = false;
3504     TCGv_i64 tcg_op1;
3505     TCGv_i64 tcg_op2;
3506     TCGv_i64 tcg_tmp;
3507
3508     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3509     switch (op_id) {
3510     case 0x42: /* SMADDL */
3511     case 0x43: /* SMSUBL */
3512     case 0x44: /* SMULH */
3513         is_signed = true;
3514         break;
3515     case 0x0: /* MADD (32bit) */
3516     case 0x1: /* MSUB (32bit) */
3517     case 0x40: /* MADD (64bit) */
3518     case 0x41: /* MSUB (64bit) */
3519     case 0x4a: /* UMADDL */
3520     case 0x4b: /* UMSUBL */
3521     case 0x4c: /* UMULH */
3522         break;
3523     default:
3524         unallocated_encoding(s);
3525         return;
3526     }
3527
3528     if (is_high) {
3529         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3530         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3531         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3532         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3533
3534         if (is_signed) {
3535             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3536         } else {
3537             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3538         }
3539
3540         tcg_temp_free_i64(low_bits);
3541         return;
3542     }
3543
3544     tcg_op1 = tcg_temp_new_i64();
3545     tcg_op2 = tcg_temp_new_i64();
3546     tcg_tmp = tcg_temp_new_i64();
3547
3548     if (op_id < 0x42) {
3549         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3550         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3551     } else {
3552         if (is_signed) {
3553             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3554             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3555         } else {
3556             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3557             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3558         }
3559     }
3560
3561     if (ra == 31 && !is_sub) {
3562         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3563         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3564     } else {
3565         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3566         if (is_sub) {
3567             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3568         } else {
3569             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3570         }
3571     }
3572
3573     if (!sf) {
3574         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3575     }
3576
3577     tcg_temp_free_i64(tcg_op1);
3578     tcg_temp_free_i64(tcg_op2);
3579     tcg_temp_free_i64(tcg_tmp);
3580 }
3581
3582 /* C3.5.3 - Add/subtract (with carry)
3583  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3584  * +--+--+--+------------------------+------+---------+------+-----+
3585  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3586  * +--+--+--+------------------------+------+---------+------+-----+
3587  *                                            [000000]
3588  */
3589
3590 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3591 {
3592     unsigned int sf, op, setflags, rm, rn, rd;
3593     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3594
3595     if (extract32(insn, 10, 6) != 0) {
3596         unallocated_encoding(s);
3597         return;
3598     }
3599
3600     sf = extract32(insn, 31, 1);
3601     op = extract32(insn, 30, 1);
3602     setflags = extract32(insn, 29, 1);
3603     rm = extract32(insn, 16, 5);
3604     rn = extract32(insn, 5, 5);
3605     rd = extract32(insn, 0, 5);
3606
3607     tcg_rd = cpu_reg(s, rd);
3608     tcg_rn = cpu_reg(s, rn);
3609
3610     if (op) {
3611         tcg_y = new_tmp_a64(s);
3612         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3613     } else {
3614         tcg_y = cpu_reg(s, rm);
3615     }
3616
3617     if (setflags) {
3618         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3619     } else {
3620         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3621     }
3622 }
3623
3624 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3625  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3626  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3627  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3628  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3629  *        [1]                             y                [0]       [0]
3630  */
3631 static void disas_cc(DisasContext *s, uint32_t insn)
3632 {
3633     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3634     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3635     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3636     DisasCompare c;
3637
3638     if (!extract32(insn, 29, 1)) {
3639         unallocated_encoding(s);
3640         return;
3641     }
3642     if (insn & (1 << 10 | 1 << 4)) {
3643         unallocated_encoding(s);
3644         return;
3645     }
3646     sf = extract32(insn, 31, 1);
3647     op = extract32(insn, 30, 1);
3648     is_imm = extract32(insn, 11, 1);
3649     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3650     cond = extract32(insn, 12, 4);
3651     rn = extract32(insn, 5, 5);
3652     nzcv = extract32(insn, 0, 4);
3653
3654     /* Set T0 = !COND.  */
3655     tcg_t0 = tcg_temp_new_i32();
3656     arm_test_cc(&c, cond);
3657     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3658     arm_free_cc(&c);
3659
3660     /* Load the arguments for the new comparison.  */
3661     if (is_imm) {
3662         tcg_y = new_tmp_a64(s);
3663         tcg_gen_movi_i64(tcg_y, y);
3664     } else {
3665         tcg_y = cpu_reg(s, y);
3666     }
3667     tcg_rn = cpu_reg(s, rn);
3668
3669     /* Set the flags for the new comparison.  */
3670     tcg_tmp = tcg_temp_new_i64();
3671     if (op) {
3672         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3673     } else {
3674         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3675     }
3676     tcg_temp_free_i64(tcg_tmp);
3677
3678     /* If COND was false, force the flags to #nzcv.  Compute two masks
3679      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3680      * For tcg hosts that support ANDC, we can make do with just T1.
3681      * In either case, allow the tcg optimizer to delete any unused mask.
3682      */
3683     tcg_t1 = tcg_temp_new_i32();
3684     tcg_t2 = tcg_temp_new_i32();
3685     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3686     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3687
3688     if (nzcv & 8) { /* N */
3689         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3690     } else {
3691         if (TCG_TARGET_HAS_andc_i32) {
3692             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3693         } else {
3694             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3695         }
3696     }
3697     if (nzcv & 4) { /* Z */
3698         if (TCG_TARGET_HAS_andc_i32) {
3699             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3700         } else {
3701             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3702         }
3703     } else {
3704         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3705     }
3706     if (nzcv & 2) { /* C */
3707         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3708     } else {
3709         if (TCG_TARGET_HAS_andc_i32) {
3710             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3711         } else {
3712             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3713         }
3714     }
3715     if (nzcv & 1) { /* V */
3716         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3717     } else {
3718         if (TCG_TARGET_HAS_andc_i32) {
3719             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3720         } else {
3721             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3722         }
3723     }
3724     tcg_temp_free_i32(tcg_t0);
3725     tcg_temp_free_i32(tcg_t1);
3726     tcg_temp_free_i32(tcg_t2);
3727 }
3728
3729 /* C3.5.6 Conditional select
3730  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3731  * +----+----+---+-----------------+------+------+-----+------+------+
3732  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3733  * +----+----+---+-----------------+------+------+-----+------+------+
3734  */
3735 static void disas_cond_select(DisasContext *s, uint32_t insn)
3736 {
3737     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3738     TCGv_i64 tcg_rd, zero;
3739     DisasCompare64 c;
3740
3741     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3742         /* S == 1 or op2<1> == 1 */
3743         unallocated_encoding(s);
3744         return;
3745     }
3746     sf = extract32(insn, 31, 1);
3747     else_inv = extract32(insn, 30, 1);
3748     rm = extract32(insn, 16, 5);
3749     cond = extract32(insn, 12, 4);
3750     else_inc = extract32(insn, 10, 1);
3751     rn = extract32(insn, 5, 5);
3752     rd = extract32(insn, 0, 5);
3753
3754     tcg_rd = cpu_reg(s, rd);
3755
3756     a64_test_cc(&c, cond);
3757     zero = tcg_const_i64(0);
3758
3759     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3760         /* CSET & CSETM.  */
3761         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3762         if (else_inv) {
3763             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3764         }
3765     } else {
3766         TCGv_i64 t_true = cpu_reg(s, rn);
3767         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3768         if (else_inv && else_inc) {
3769             tcg_gen_neg_i64(t_false, t_false);
3770         } else if (else_inv) {
3771             tcg_gen_not_i64(t_false, t_false);
3772         } else if (else_inc) {
3773             tcg_gen_addi_i64(t_false, t_false, 1);
3774         }
3775         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3776     }
3777
3778     tcg_temp_free_i64(zero);
3779     a64_free_cc(&c);
3780
3781     if (!sf) {
3782         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3783     }
3784 }
3785
3786 static void handle_clz(DisasContext *s, unsigned int sf,
3787                        unsigned int rn, unsigned int rd)
3788 {
3789     TCGv_i64 tcg_rd, tcg_rn;
3790     tcg_rd = cpu_reg(s, rd);
3791     tcg_rn = cpu_reg(s, rn);
3792
3793     if (sf) {
3794         gen_helper_clz64(tcg_rd, tcg_rn);
3795     } else {
3796         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3797         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3798         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3799         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3800         tcg_temp_free_i32(tcg_tmp32);
3801     }
3802 }
3803
3804 static void handle_cls(DisasContext *s, unsigned int sf,
3805                        unsigned int rn, unsigned int rd)
3806 {
3807     TCGv_i64 tcg_rd, tcg_rn;
3808     tcg_rd = cpu_reg(s, rd);
3809     tcg_rn = cpu_reg(s, rn);
3810
3811     if (sf) {
3812         gen_helper_cls64(tcg_rd, tcg_rn);
3813     } else {
3814         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3815         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3816         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3817         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3818         tcg_temp_free_i32(tcg_tmp32);
3819     }
3820 }
3821
3822 static void handle_rbit(DisasContext *s, unsigned int sf,
3823                         unsigned int rn, unsigned int rd)
3824 {
3825     TCGv_i64 tcg_rd, tcg_rn;
3826     tcg_rd = cpu_reg(s, rd);
3827     tcg_rn = cpu_reg(s, rn);
3828
3829     if (sf) {
3830         gen_helper_rbit64(tcg_rd, tcg_rn);
3831     } else {
3832         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3833         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3834         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3835         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3836         tcg_temp_free_i32(tcg_tmp32);
3837     }
3838 }
3839
3840 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3841 static void handle_rev64(DisasContext *s, unsigned int sf,
3842                          unsigned int rn, unsigned int rd)
3843 {
3844     if (!sf) {
3845         unallocated_encoding(s);
3846         return;
3847     }
3848     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3849 }
3850
3851 /* C5.6.149 REV with sf==0, opcode==2
3852  * C5.6.151 REV32 (sf==1, opcode==2)
3853  */
3854 static void handle_rev32(DisasContext *s, unsigned int sf,
3855                          unsigned int rn, unsigned int rd)
3856 {
3857     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3858
3859     if (sf) {
3860         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3861         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3862
3863         /* bswap32_i64 requires zero high word */
3864         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3865         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3866         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3867         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3868         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3869
3870         tcg_temp_free_i64(tcg_tmp);
3871     } else {
3872         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3873         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3874     }
3875 }
3876
3877 /* C5.6.150 REV16 (opcode==1) */
3878 static void handle_rev16(DisasContext *s, unsigned int sf,
3879                          unsigned int rn, unsigned int rd)
3880 {
3881     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3882     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3883     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3884
3885     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3886     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3887
3888     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3889     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3890     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3891     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3892
3893     if (sf) {
3894         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3895         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3896         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3897         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3898
3899         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3900         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3901         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3902     }
3903
3904     tcg_temp_free_i64(tcg_tmp);
3905 }
3906
3907 /* C3.5.7 Data-processing (1 source)
3908  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3909  * +----+---+---+-----------------+---------+--------+------+------+
3910  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3911  * +----+---+---+-----------------+---------+--------+------+------+
3912  */
3913 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3914 {
3915     unsigned int sf, opcode, rn, rd;
3916
3917     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3918         unallocated_encoding(s);
3919         return;
3920     }
3921
3922     sf = extract32(insn, 31, 1);
3923     opcode = extract32(insn, 10, 6);
3924     rn = extract32(insn, 5, 5);
3925     rd = extract32(insn, 0, 5);
3926
3927     switch (opcode) {
3928     case 0: /* RBIT */
3929         handle_rbit(s, sf, rn, rd);
3930         break;
3931     case 1: /* REV16 */
3932         handle_rev16(s, sf, rn, rd);
3933         break;
3934     case 2: /* REV32 */
3935         handle_rev32(s, sf, rn, rd);
3936         break;
3937     case 3: /* REV64 */
3938         handle_rev64(s, sf, rn, rd);
3939         break;
3940     case 4: /* CLZ */
3941         handle_clz(s, sf, rn, rd);
3942         break;
3943     case 5: /* CLS */
3944         handle_cls(s, sf, rn, rd);
3945         break;
3946     }
3947 }
3948
3949 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3950                        unsigned int rm, unsigned int rn, unsigned int rd)
3951 {
3952     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3953     tcg_rd = cpu_reg(s, rd);
3954
3955     if (!sf && is_signed) {
3956         tcg_n = new_tmp_a64(s);
3957         tcg_m = new_tmp_a64(s);
3958         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3959         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3960     } else {
3961         tcg_n = read_cpu_reg(s, rn, sf);
3962         tcg_m = read_cpu_reg(s, rm, sf);
3963     }
3964
3965     if (is_signed) {
3966         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3967     } else {
3968         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3969     }
3970
3971     if (!sf) { /* zero extend final result */
3972         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3973     }
3974 }
3975
3976 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3977 static void handle_shift_reg(DisasContext *s,
3978                              enum a64_shift_type shift_type, unsigned int sf,
3979                              unsigned int rm, unsigned int rn, unsigned int rd)
3980 {
3981     TCGv_i64 tcg_shift = tcg_temp_new_i64();
3982     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3983     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3984
3985     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3986     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3987     tcg_temp_free_i64(tcg_shift);
3988 }
3989
3990 /* CRC32[BHWX], CRC32C[BHWX] */
3991 static void handle_crc32(DisasContext *s,
3992                          unsigned int sf, unsigned int sz, bool crc32c,
3993                          unsigned int rm, unsigned int rn, unsigned int rd)
3994 {
3995     TCGv_i64 tcg_acc, tcg_val;
3996     TCGv_i32 tcg_bytes;
3997
3998     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3999         || (sf == 1 && sz != 3)
4000         || (sf == 0 && sz == 3)) {
4001         unallocated_encoding(s);
4002         return;
4003     }
4004
4005     if (sz == 3) {
4006         tcg_val = cpu_reg(s, rm);
4007     } else {
4008         uint64_t mask;
4009         switch (sz) {
4010         case 0:
4011             mask = 0xFF;
4012             break;
4013         case 1:
4014             mask = 0xFFFF;
4015             break;
4016         case 2:
4017             mask = 0xFFFFFFFF;
4018             break;
4019         default:
4020             g_assert_not_reached();
4021         }
4022         tcg_val = new_tmp_a64(s);
4023         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4024     }
4025
4026     tcg_acc = cpu_reg(s, rn);
4027     tcg_bytes = tcg_const_i32(1 << sz);
4028
4029     if (crc32c) {
4030         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4031     } else {
4032         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4033     }
4034
4035     tcg_temp_free_i32(tcg_bytes);
4036 }
4037
4038 /* C3.5.8 Data-processing (2 source)
4039  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4040  * +----+---+---+-----------------+------+--------+------+------+
4041  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4042  * +----+---+---+-----------------+------+--------+------+------+
4043  */
4044 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4045 {
4046     unsigned int sf, rm, opcode, rn, rd;
4047     sf = extract32(insn, 31, 1);
4048     rm = extract32(insn, 16, 5);
4049     opcode = extract32(insn, 10, 6);
4050     rn = extract32(insn, 5, 5);
4051     rd = extract32(insn, 0, 5);
4052
4053     if (extract32(insn, 29, 1)) {
4054         unallocated_encoding(s);
4055         return;
4056     }
4057
4058     switch (opcode) {
4059     case 2: /* UDIV */
4060         handle_div(s, false, sf, rm, rn, rd);
4061         break;
4062     case 3: /* SDIV */
4063         handle_div(s, true, sf, rm, rn, rd);
4064         break;
4065     case 8: /* LSLV */
4066         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4067         break;
4068     case 9: /* LSRV */
4069         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4070         break;
4071     case 10: /* ASRV */
4072         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4073         break;
4074     case 11: /* RORV */
4075         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4076         break;
4077     case 16:
4078     case 17:
4079     case 18:
4080     case 19:
4081     case 20:
4082     case 21:
4083     case 22:
4084     case 23: /* CRC32 */
4085     {
4086         int sz = extract32(opcode, 0, 2);
4087         bool crc32c = extract32(opcode, 2, 1);
4088         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4089         break;
4090     }
4091     default:
4092         unallocated_encoding(s);
4093         break;
4094     }
4095 }
4096
4097 /* C3.5 Data processing - register */
4098 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4099 {
4100     switch (extract32(insn, 24, 5)) {
4101     case 0x0a: /* Logical (shifted register) */
4102         disas_logic_reg(s, insn);
4103         break;
4104     case 0x0b: /* Add/subtract */
4105         if (insn & (1 << 21)) { /* (extended register) */
4106             disas_add_sub_ext_reg(s, insn);
4107         } else {
4108             disas_add_sub_reg(s, insn);
4109         }
4110         break;
4111     case 0x1b: /* Data-processing (3 source) */
4112         disas_data_proc_3src(s, insn);
4113         break;
4114     case 0x1a:
4115         switch (extract32(insn, 21, 3)) {
4116         case 0x0: /* Add/subtract (with carry) */
4117             disas_adc_sbc(s, insn);
4118             break;
4119         case 0x2: /* Conditional compare */
4120             disas_cc(s, insn); /* both imm and reg forms */
4121             break;
4122         case 0x4: /* Conditional select */
4123             disas_cond_select(s, insn);
4124             break;
4125         case 0x6: /* Data-processing */
4126             if (insn & (1 << 30)) { /* (1 source) */
4127                 disas_data_proc_1src(s, insn);
4128             } else {            /* (2 source) */
4129                 disas_data_proc_2src(s, insn);
4130             }
4131             break;
4132         default:
4133             unallocated_encoding(s);
4134             break;
4135         }
4136         break;
4137     default:
4138         unallocated_encoding(s);
4139         break;
4140     }
4141 }
4142
4143 static void handle_fp_compare(DisasContext *s, bool is_double,
4144                               unsigned int rn, unsigned int rm,
4145                               bool cmp_with_zero, bool signal_all_nans)
4146 {
4147     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4148     TCGv_ptr fpst = get_fpstatus_ptr();
4149
4150     if (is_double) {
4151         TCGv_i64 tcg_vn, tcg_vm;
4152
4153         tcg_vn = read_fp_dreg(s, rn);
4154         if (cmp_with_zero) {
4155             tcg_vm = tcg_const_i64(0);
4156         } else {
4157             tcg_vm = read_fp_dreg(s, rm);
4158         }
4159         if (signal_all_nans) {
4160             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4161         } else {
4162             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4163         }
4164         tcg_temp_free_i64(tcg_vn);
4165         tcg_temp_free_i64(tcg_vm);
4166     } else {
4167         TCGv_i32 tcg_vn, tcg_vm;
4168
4169         tcg_vn = read_fp_sreg(s, rn);
4170         if (cmp_with_zero) {
4171             tcg_vm = tcg_const_i32(0);
4172         } else {
4173             tcg_vm = read_fp_sreg(s, rm);
4174         }
4175         if (signal_all_nans) {
4176             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4177         } else {
4178             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4179         }
4180         tcg_temp_free_i32(tcg_vn);
4181         tcg_temp_free_i32(tcg_vm);
4182     }
4183
4184     tcg_temp_free_ptr(fpst);
4185
4186     gen_set_nzcv(tcg_flags);
4187
4188     tcg_temp_free_i64(tcg_flags);
4189 }
4190
4191 /* C3.6.22 Floating point compare
4192  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4193  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4194  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4195  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4196  */
4197 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4198 {
4199     unsigned int mos, type, rm, op, rn, opc, op2r;
4200
4201     mos = extract32(insn, 29, 3);
4202     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4203     rm = extract32(insn, 16, 5);
4204     op = extract32(insn, 14, 2);
4205     rn = extract32(insn, 5, 5);
4206     opc = extract32(insn, 3, 2);
4207     op2r = extract32(insn, 0, 3);
4208
4209     if (mos || op || op2r || type > 1) {
4210         unallocated_encoding(s);
4211         return;
4212     }
4213
4214     if (!fp_access_check(s)) {
4215         return;
4216     }
4217
4218     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4219 }
4220
4221 /* C3.6.23 Floating point conditional compare
4222  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4223  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4224  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4225  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4226  */
4227 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4228 {
4229     unsigned int mos, type, rm, cond, rn, op, nzcv;
4230     TCGv_i64 tcg_flags;
4231     TCGLabel *label_continue = NULL;
4232
4233     mos = extract32(insn, 29, 3);
4234     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4235     rm = extract32(insn, 16, 5);
4236     cond = extract32(insn, 12, 4);
4237     rn = extract32(insn, 5, 5);
4238     op = extract32(insn, 4, 1);
4239     nzcv = extract32(insn, 0, 4);
4240
4241     if (mos || type > 1) {
4242         unallocated_encoding(s);
4243         return;
4244     }
4245
4246     if (!fp_access_check(s)) {
4247         return;
4248     }
4249
4250     if (cond < 0x0e) { /* not always */
4251         TCGLabel *label_match = gen_new_label();
4252         label_continue = gen_new_label();
4253         arm_gen_test_cc(cond, label_match);
4254         /* nomatch: */
4255         tcg_flags = tcg_const_i64(nzcv << 28);
4256         gen_set_nzcv(tcg_flags);
4257         tcg_temp_free_i64(tcg_flags);
4258         tcg_gen_br(label_continue);
4259         gen_set_label(label_match);
4260     }
4261
4262     handle_fp_compare(s, type, rn, rm, false, op);
4263
4264     if (cond < 0x0e) {
4265         gen_set_label(label_continue);
4266     }
4267 }
4268
4269 /* C3.6.24 Floating point conditional select
4270  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4271  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4272  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4273  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4274  */
4275 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4276 {
4277     unsigned int mos, type, rm, cond, rn, rd;
4278     TCGv_i64 t_true, t_false, t_zero;
4279     DisasCompare64 c;
4280
4281     mos = extract32(insn, 29, 3);
4282     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4283     rm = extract32(insn, 16, 5);
4284     cond = extract32(insn, 12, 4);
4285     rn = extract32(insn, 5, 5);
4286     rd = extract32(insn, 0, 5);
4287
4288     if (mos || type > 1) {
4289         unallocated_encoding(s);
4290         return;
4291     }
4292
4293     if (!fp_access_check(s)) {
4294         return;
4295     }
4296
4297     /* Zero extend sreg inputs to 64 bits now.  */
4298     t_true = tcg_temp_new_i64();
4299     t_false = tcg_temp_new_i64();
4300     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4301     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4302
4303     a64_test_cc(&c, cond);
4304     t_zero = tcg_const_i64(0);
4305     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4306     tcg_temp_free_i64(t_zero);
4307     tcg_temp_free_i64(t_false);
4308     a64_free_cc(&c);
4309
4310     /* Note that sregs write back zeros to the high bits,
4311        and we've already done the zero-extension.  */
4312     write_fp_dreg(s, rd, t_true);
4313     tcg_temp_free_i64(t_true);
4314 }
4315
4316 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4317 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4318 {
4319     TCGv_ptr fpst;
4320     TCGv_i32 tcg_op;
4321     TCGv_i32 tcg_res;
4322
4323     fpst = get_fpstatus_ptr();
4324     tcg_op = read_fp_sreg(s, rn);
4325     tcg_res = tcg_temp_new_i32();
4326
4327     switch (opcode) {
4328     case 0x0: /* FMOV */
4329         tcg_gen_mov_i32(tcg_res, tcg_op);
4330         break;
4331     case 0x1: /* FABS */
4332         gen_helper_vfp_abss(tcg_res, tcg_op);
4333         break;
4334     case 0x2: /* FNEG */
4335         gen_helper_vfp_negs(tcg_res, tcg_op);
4336         break;
4337     case 0x3: /* FSQRT */
4338         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4339         break;
4340     case 0x8: /* FRINTN */
4341     case 0x9: /* FRINTP */
4342     case 0xa: /* FRINTM */
4343     case 0xb: /* FRINTZ */
4344     case 0xc: /* FRINTA */
4345     {
4346         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4347
4348         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4349         gen_helper_rints(tcg_res, tcg_op, fpst);
4350
4351         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4352         tcg_temp_free_i32(tcg_rmode);
4353         break;
4354     }
4355     case 0xe: /* FRINTX */
4356         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4357         break;
4358     case 0xf: /* FRINTI */
4359         gen_helper_rints(tcg_res, tcg_op, fpst);
4360         break;
4361     default:
4362         abort();
4363     }
4364
4365     write_fp_sreg(s, rd, tcg_res);
4366
4367     tcg_temp_free_ptr(fpst);
4368     tcg_temp_free_i32(tcg_op);
4369     tcg_temp_free_i32(tcg_res);
4370 }
4371
4372 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4373 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4374 {
4375     TCGv_ptr fpst;
4376     TCGv_i64 tcg_op;
4377     TCGv_i64 tcg_res;
4378
4379     fpst = get_fpstatus_ptr();
4380     tcg_op = read_fp_dreg(s, rn);
4381     tcg_res = tcg_temp_new_i64();
4382
4383     switch (opcode) {
4384     case 0x0: /* FMOV */
4385         tcg_gen_mov_i64(tcg_res, tcg_op);
4386         break;
4387     case 0x1: /* FABS */
4388         gen_helper_vfp_absd(tcg_res, tcg_op);
4389         break;
4390     case 0x2: /* FNEG */
4391         gen_helper_vfp_negd(tcg_res, tcg_op);
4392         break;
4393     case 0x3: /* FSQRT */
4394         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4395         break;
4396     case 0x8: /* FRINTN */
4397     case 0x9: /* FRINTP */
4398     case 0xa: /* FRINTM */
4399     case 0xb: /* FRINTZ */
4400     case 0xc: /* FRINTA */
4401     {
4402         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4403
4404         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4405         gen_helper_rintd(tcg_res, tcg_op, fpst);
4406
4407         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4408         tcg_temp_free_i32(tcg_rmode);
4409         break;
4410     }
4411     case 0xe: /* FRINTX */
4412         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4413         break;
4414     case 0xf: /* FRINTI */
4415         gen_helper_rintd(tcg_res, tcg_op, fpst);
4416         break;
4417     default:
4418         abort();
4419     }
4420
4421     write_fp_dreg(s, rd, tcg_res);
4422
4423     tcg_temp_free_ptr(fpst);
4424     tcg_temp_free_i64(tcg_op);
4425     tcg_temp_free_i64(tcg_res);
4426 }
4427
4428 static void handle_fp_fcvt(DisasContext *s, int opcode,
4429                            int rd, int rn, int dtype, int ntype)
4430 {
4431     switch (ntype) {
4432     case 0x0:
4433     {
4434         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4435         if (dtype == 1) {
4436             /* Single to double */
4437             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4438             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4439             write_fp_dreg(s, rd, tcg_rd);
4440             tcg_temp_free_i64(tcg_rd);
4441         } else {
4442             /* Single to half */
4443             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4444             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4445             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4446             write_fp_sreg(s, rd, tcg_rd);
4447             tcg_temp_free_i32(tcg_rd);
4448         }
4449         tcg_temp_free_i32(tcg_rn);
4450         break;
4451     }
4452     case 0x1:
4453     {
4454         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4455         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4456         if (dtype == 0) {
4457             /* Double to single */
4458             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4459         } else {
4460             /* Double to half */
4461             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4462             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4463         }
4464         write_fp_sreg(s, rd, tcg_rd);
4465         tcg_temp_free_i32(tcg_rd);
4466         tcg_temp_free_i64(tcg_rn);
4467         break;
4468     }
4469     case 0x3:
4470     {
4471         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4472         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4473         if (dtype == 0) {
4474             /* Half to single */
4475             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4476             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4477             write_fp_sreg(s, rd, tcg_rd);
4478             tcg_temp_free_i32(tcg_rd);
4479         } else {
4480             /* Half to double */
4481             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4482             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4483             write_fp_dreg(s, rd, tcg_rd);
4484             tcg_temp_free_i64(tcg_rd);
4485         }
4486         tcg_temp_free_i32(tcg_rn);
4487         break;
4488     }
4489     default:
4490         abort();
4491     }
4492 }
4493
4494 /* C3.6.25 Floating point data-processing (1 source)
4495  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4496  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4497  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4498  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4499  */
4500 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4501 {
4502     int type = extract32(insn, 22, 2);
4503     int opcode = extract32(insn, 15, 6);
4504     int rn = extract32(insn, 5, 5);
4505     int rd = extract32(insn, 0, 5);
4506
4507     switch (opcode) {
4508     case 0x4: case 0x5: case 0x7:
4509     {
4510         /* FCVT between half, single and double precision */
4511         int dtype = extract32(opcode, 0, 2);
4512         if (type == 2 || dtype == type) {
4513             unallocated_encoding(s);
4514             return;
4515         }
4516         if (!fp_access_check(s)) {
4517             return;
4518         }
4519
4520         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4521         break;
4522     }
4523     case 0x0 ... 0x3:
4524     case 0x8 ... 0xc:
4525     case 0xe ... 0xf:
4526         /* 32-to-32 and 64-to-64 ops */
4527         switch (type) {
4528         case 0:
4529             if (!fp_access_check(s)) {
4530                 return;
4531             }
4532
4533             handle_fp_1src_single(s, opcode, rd, rn);
4534             break;
4535         case 1:
4536             if (!fp_access_check(s)) {
4537                 return;
4538             }
4539
4540             handle_fp_1src_double(s, opcode, rd, rn);
4541             break;
4542         default:
4543             unallocated_encoding(s);
4544         }
4545         break;
4546     default:
4547         unallocated_encoding(s);
4548         break;
4549     }
4550 }
4551
4552 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4553 static void handle_fp_2src_single(DisasContext *s, int opcode,
4554                                   int rd, int rn, int rm)
4555 {
4556     TCGv_i32 tcg_op1;
4557     TCGv_i32 tcg_op2;
4558     TCGv_i32 tcg_res;
4559     TCGv_ptr fpst;
4560
4561     tcg_res = tcg_temp_new_i32();
4562     fpst = get_fpstatus_ptr();
4563     tcg_op1 = read_fp_sreg(s, rn);
4564     tcg_op2 = read_fp_sreg(s, rm);
4565
4566     switch (opcode) {
4567     case 0x0: /* FMUL */
4568         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4569         break;
4570     case 0x1: /* FDIV */
4571         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4572         break;
4573     case 0x2: /* FADD */
4574         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4575         break;
4576     case 0x3: /* FSUB */
4577         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4578         break;
4579     case 0x4: /* FMAX */
4580         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4581         break;
4582     case 0x5: /* FMIN */
4583         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4584         break;
4585     case 0x6: /* FMAXNM */
4586         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4587         break;
4588     case 0x7: /* FMINNM */
4589         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4590         break;
4591     case 0x8: /* FNMUL */
4592         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4593         gen_helper_vfp_negs(tcg_res, tcg_res);
4594         break;
4595     }
4596
4597     write_fp_sreg(s, rd, tcg_res);
4598
4599     tcg_temp_free_ptr(fpst);
4600     tcg_temp_free_i32(tcg_op1);
4601     tcg_temp_free_i32(tcg_op2);
4602     tcg_temp_free_i32(tcg_res);
4603 }
4604
4605 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4606 static void handle_fp_2src_double(DisasContext *s, int opcode,
4607                                   int rd, int rn, int rm)
4608 {
4609     TCGv_i64 tcg_op1;
4610     TCGv_i64 tcg_op2;
4611     TCGv_i64 tcg_res;
4612     TCGv_ptr fpst;
4613
4614     tcg_res = tcg_temp_new_i64();
4615     fpst = get_fpstatus_ptr();
4616     tcg_op1 = read_fp_dreg(s, rn);
4617     tcg_op2 = read_fp_dreg(s, rm);
4618
4619     switch (opcode) {
4620     case 0x0: /* FMUL */
4621         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4622         break;
4623     case 0x1: /* FDIV */
4624         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4625         break;
4626     case 0x2: /* FADD */
4627         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4628         break;
4629     case 0x3: /* FSUB */
4630         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4631         break;
4632     case 0x4: /* FMAX */
4633         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4634         break;
4635     case 0x5: /* FMIN */
4636         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4637         break;
4638     case 0x6: /* FMAXNM */
4639         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4640         break;
4641     case 0x7: /* FMINNM */
4642         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4643         break;
4644     case 0x8: /* FNMUL */
4645         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4646         gen_helper_vfp_negd(tcg_res, tcg_res);
4647         break;
4648     }
4649
4650     write_fp_dreg(s, rd, tcg_res);
4651
4652     tcg_temp_free_ptr(fpst);
4653     tcg_temp_free_i64(tcg_op1);
4654     tcg_temp_free_i64(tcg_op2);
4655     tcg_temp_free_i64(tcg_res);
4656 }
4657
4658 /* C3.6.26 Floating point data-processing (2 source)
4659  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4660  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4661  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4662  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4663  */
4664 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4665 {
4666     int type = extract32(insn, 22, 2);
4667     int rd = extract32(insn, 0, 5);
4668     int rn = extract32(insn, 5, 5);
4669     int rm = extract32(insn, 16, 5);
4670     int opcode = extract32(insn, 12, 4);
4671
4672     if (opcode > 8) {
4673         unallocated_encoding(s);
4674         return;
4675     }
4676
4677     switch (type) {
4678     case 0:
4679         if (!fp_access_check(s)) {
4680             return;
4681         }
4682         handle_fp_2src_single(s, opcode, rd, rn, rm);
4683         break;
4684     case 1:
4685         if (!fp_access_check(s)) {
4686             return;
4687         }
4688         handle_fp_2src_double(s, opcode, rd, rn, rm);
4689         break;
4690     default:
4691         unallocated_encoding(s);
4692     }
4693 }
4694
4695 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4696 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4697                                   int rd, int rn, int rm, int ra)
4698 {
4699     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4700     TCGv_i32 tcg_res = tcg_temp_new_i32();
4701     TCGv_ptr fpst = get_fpstatus_ptr();
4702
4703     tcg_op1 = read_fp_sreg(s, rn);
4704     tcg_op2 = read_fp_sreg(s, rm);
4705     tcg_op3 = read_fp_sreg(s, ra);
4706
4707     /* These are fused multiply-add, and must be done as one
4708      * floating point operation with no rounding between the
4709      * multiplication and addition steps.
4710      * NB that doing the negations here as separate steps is
4711      * correct : an input NaN should come out with its sign bit
4712      * flipped if it is a negated-input.
4713      */
4714     if (o1 == true) {
4715         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4716     }
4717
4718     if (o0 != o1) {
4719         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4720     }
4721
4722     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4723
4724     write_fp_sreg(s, rd, tcg_res);
4725
4726     tcg_temp_free_ptr(fpst);
4727     tcg_temp_free_i32(tcg_op1);
4728     tcg_temp_free_i32(tcg_op2);
4729     tcg_temp_free_i32(tcg_op3);
4730     tcg_temp_free_i32(tcg_res);
4731 }
4732
4733 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4734 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4735                                   int rd, int rn, int rm, int ra)
4736 {
4737     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4738     TCGv_i64 tcg_res = tcg_temp_new_i64();
4739     TCGv_ptr fpst = get_fpstatus_ptr();
4740
4741     tcg_op1 = read_fp_dreg(s, rn);
4742     tcg_op2 = read_fp_dreg(s, rm);
4743     tcg_op3 = read_fp_dreg(s, ra);
4744
4745     /* These are fused multiply-add, and must be done as one
4746      * floating point operation with no rounding between the
4747      * multiplication and addition steps.
4748      * NB that doing the negations here as separate steps is
4749      * correct : an input NaN should come out with its sign bit
4750      * flipped if it is a negated-input.
4751      */
4752     if (o1 == true) {
4753         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4754     }
4755
4756     if (o0 != o1) {
4757         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4758     }
4759
4760     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4761
4762     write_fp_dreg(s, rd, tcg_res);
4763
4764     tcg_temp_free_ptr(fpst);
4765     tcg_temp_free_i64(tcg_op1);
4766     tcg_temp_free_i64(tcg_op2);
4767     tcg_temp_free_i64(tcg_op3);
4768     tcg_temp_free_i64(tcg_res);
4769 }
4770
4771 /* C3.6.27 Floating point data-processing (3 source)
4772  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4773  * +---+---+---+-----------+------+----+------+----+------+------+------+
4774  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4775  * +---+---+---+-----------+------+----+------+----+------+------+------+
4776  */
4777 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4778 {
4779     int type = extract32(insn, 22, 2);
4780     int rd = extract32(insn, 0, 5);
4781     int rn = extract32(insn, 5, 5);
4782     int ra = extract32(insn, 10, 5);
4783     int rm = extract32(insn, 16, 5);
4784     bool o0 = extract32(insn, 15, 1);
4785     bool o1 = extract32(insn, 21, 1);
4786
4787     switch (type) {
4788     case 0:
4789         if (!fp_access_check(s)) {
4790             return;
4791         }
4792         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4793         break;
4794     case 1:
4795         if (!fp_access_check(s)) {
4796             return;
4797         }
4798         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4799         break;
4800     default:
4801         unallocated_encoding(s);
4802     }
4803 }
4804
4805 /* C3.6.28 Floating point immediate
4806  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4807  * +---+---+---+-----------+------+---+------------+-------+------+------+
4808  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4809  * +---+---+---+-----------+------+---+------------+-------+------+------+
4810  */
4811 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4812 {
4813     int rd = extract32(insn, 0, 5);
4814     int imm8 = extract32(insn, 13, 8);
4815     int is_double = extract32(insn, 22, 2);
4816     uint64_t imm;
4817     TCGv_i64 tcg_res;
4818
4819     if (is_double > 1) {
4820         unallocated_encoding(s);
4821         return;
4822     }
4823
4824     if (!fp_access_check(s)) {
4825         return;
4826     }
4827
4828     /* The imm8 encodes the sign bit, enough bits to represent
4829      * an exponent in the range 01....1xx to 10....0xx,
4830      * and the most significant 4 bits of the mantissa; see
4831      * VFPExpandImm() in the v8 ARM ARM.
4832      */
4833     if (is_double) {
4834         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4835             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4836             extract32(imm8, 0, 6);
4837         imm <<= 48;
4838     } else {
4839         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4840             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4841             (extract32(imm8, 0, 6) << 3);
4842         imm <<= 16;
4843     }
4844
4845     tcg_res = tcg_const_i64(imm);
4846     write_fp_dreg(s, rd, tcg_res);
4847     tcg_temp_free_i64(tcg_res);
4848 }
4849
4850 /* Handle floating point <=> fixed point conversions. Note that we can
4851  * also deal with fp <=> integer conversions as a special case (scale == 64)
4852  * OPTME: consider handling that special case specially or at least skipping
4853  * the call to scalbn in the helpers for zero shifts.
4854  */
4855 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4856                            bool itof, int rmode, int scale, int sf, int type)
4857 {
4858     bool is_signed = !(opcode & 1);
4859     bool is_double = type;
4860     TCGv_ptr tcg_fpstatus;
4861     TCGv_i32 tcg_shift;
4862
4863     tcg_fpstatus = get_fpstatus_ptr();
4864
4865     tcg_shift = tcg_const_i32(64 - scale);
4866
4867     if (itof) {
4868         TCGv_i64 tcg_int = cpu_reg(s, rn);
4869         if (!sf) {
4870             TCGv_i64 tcg_extend = new_tmp_a64(s);
4871
4872             if (is_signed) {
4873                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4874             } else {
4875                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4876             }
4877
4878             tcg_int = tcg_extend;
4879         }
4880
4881         if (is_double) {
4882             TCGv_i64 tcg_double = tcg_temp_new_i64();
4883             if (is_signed) {
4884                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4885                                      tcg_shift, tcg_fpstatus);
4886             } else {
4887                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4888                                      tcg_shift, tcg_fpstatus);
4889             }
4890             write_fp_dreg(s, rd, tcg_double);
4891             tcg_temp_free_i64(tcg_double);
4892         } else {
4893             TCGv_i32 tcg_single = tcg_temp_new_i32();
4894             if (is_signed) {
4895                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4896                                      tcg_shift, tcg_fpstatus);
4897             } else {
4898                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4899                                      tcg_shift, tcg_fpstatus);
4900             }
4901             write_fp_sreg(s, rd, tcg_single);
4902             tcg_temp_free_i32(tcg_single);
4903         }
4904     } else {
4905         TCGv_i64 tcg_int = cpu_reg(s, rd);
4906         TCGv_i32 tcg_rmode;
4907
4908         if (extract32(opcode, 2, 1)) {
4909             /* There are too many rounding modes to all fit into rmode,
4910              * so FCVTA[US] is a special case.
4911              */
4912             rmode = FPROUNDING_TIEAWAY;
4913         }
4914
4915         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4916
4917         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4918
4919         if (is_double) {
4920             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4921             if (is_signed) {
4922                 if (!sf) {
4923                     gen_helper_vfp_tosld(tcg_int, tcg_double,
4924                                          tcg_shift, tcg_fpstatus);
4925                 } else {
4926                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
4927                                          tcg_shift, tcg_fpstatus);
4928                 }
4929             } else {
4930                 if (!sf) {
4931                     gen_helper_vfp_tould(tcg_int, tcg_double,
4932                                          tcg_shift, tcg_fpstatus);
4933                 } else {
4934                     gen_helper_vfp_touqd(tcg_int, tcg_double,
4935                                          tcg_shift, tcg_fpstatus);
4936                 }
4937             }
4938             tcg_temp_free_i64(tcg_double);
4939         } else {
4940             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4941             if (sf) {
4942                 if (is_signed) {
4943                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
4944                                          tcg_shift, tcg_fpstatus);
4945                 } else {
4946                     gen_helper_vfp_touqs(tcg_int, tcg_single,
4947                                          tcg_shift, tcg_fpstatus);
4948                 }
4949             } else {
4950                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4951                 if (is_signed) {
4952                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
4953                                          tcg_shift, tcg_fpstatus);
4954                 } else {
4955                     gen_helper_vfp_touls(tcg_dest, tcg_single,
4956                                          tcg_shift, tcg_fpstatus);
4957                 }
4958                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4959                 tcg_temp_free_i32(tcg_dest);
4960             }
4961             tcg_temp_free_i32(tcg_single);
4962         }
4963
4964         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4965         tcg_temp_free_i32(tcg_rmode);
4966
4967         if (!sf) {
4968             tcg_gen_ext32u_i64(tcg_int, tcg_int);
4969         }
4970     }
4971
4972     tcg_temp_free_ptr(tcg_fpstatus);
4973     tcg_temp_free_i32(tcg_shift);
4974 }
4975
4976 /* C3.6.29 Floating point <-> fixed point conversions
4977  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4978  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4979  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4980  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4981  */
4982 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4983 {
4984     int rd = extract32(insn, 0, 5);
4985     int rn = extract32(insn, 5, 5);
4986     int scale = extract32(insn, 10, 6);
4987     int opcode = extract32(insn, 16, 3);
4988     int rmode = extract32(insn, 19, 2);
4989     int type = extract32(insn, 22, 2);
4990     bool sbit = extract32(insn, 29, 1);
4991     bool sf = extract32(insn, 31, 1);
4992     bool itof;
4993
4994     if (sbit || (type > 1)
4995         || (!sf && scale < 32)) {
4996         unallocated_encoding(s);
4997         return;
4998     }
4999
5000     switch ((rmode << 3) | opcode) {
5001     case 0x2: /* SCVTF */
5002     case 0x3: /* UCVTF */
5003         itof = true;
5004         break;
5005     case 0x18: /* FCVTZS */
5006     case 0x19: /* FCVTZU */
5007         itof = false;
5008         break;
5009     default:
5010         unallocated_encoding(s);
5011         return;
5012     }
5013
5014     if (!fp_access_check(s)) {
5015         return;
5016     }
5017
5018     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5019 }
5020
5021 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5022 {
5023     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5024      * without conversion.
5025      */
5026
5027     if (itof) {
5028         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5029
5030         switch (type) {
5031         case 0:
5032         {
5033             /* 32 bit */
5034             TCGv_i64 tmp = tcg_temp_new_i64();
5035             tcg_gen_ext32u_i64(tmp, tcg_rn);
5036             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5037             tcg_gen_movi_i64(tmp, 0);
5038             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5039             tcg_temp_free_i64(tmp);
5040             break;
5041         }
5042         case 1:
5043         {
5044             /* 64 bit */
5045             TCGv_i64 tmp = tcg_const_i64(0);
5046             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5047             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5048             tcg_temp_free_i64(tmp);
5049             break;
5050         }
5051         case 2:
5052             /* 64 bit to top half. */
5053             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5054             break;
5055         }
5056     } else {
5057         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5058
5059         switch (type) {
5060         case 0:
5061             /* 32 bit */
5062             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5063             break;
5064         case 1:
5065             /* 64 bit */
5066             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5067             break;
5068         case 2:
5069             /* 64 bits from top half */
5070             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5071             break;
5072         }
5073     }
5074 }
5075
5076 /* C3.6.30 Floating point <-> integer conversions
5077  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5078  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5079  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5080  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5081  */
5082 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5083 {
5084     int rd = extract32(insn, 0, 5);
5085     int rn = extract32(insn, 5, 5);
5086     int opcode = extract32(insn, 16, 3);
5087     int rmode = extract32(insn, 19, 2);
5088     int type = extract32(insn, 22, 2);
5089     bool sbit = extract32(insn, 29, 1);
5090     bool sf = extract32(insn, 31, 1);
5091
5092     if (sbit) {
5093         unallocated_encoding(s);
5094         return;
5095     }
5096
5097     if (opcode > 5) {
5098         /* FMOV */
5099         bool itof = opcode & 1;
5100
5101         if (rmode >= 2) {
5102             unallocated_encoding(s);
5103             return;
5104         }
5105
5106         switch (sf << 3 | type << 1 | rmode) {
5107         case 0x0: /* 32 bit */
5108         case 0xa: /* 64 bit */
5109         case 0xd: /* 64 bit to top half of quad */
5110             break;
5111         default:
5112             /* all other sf/type/rmode combinations are invalid */
5113             unallocated_encoding(s);
5114             break;
5115         }
5116
5117         if (!fp_access_check(s)) {
5118             return;
5119         }
5120         handle_fmov(s, rd, rn, type, itof);
5121     } else {
5122         /* actual FP conversions */
5123         bool itof = extract32(opcode, 1, 1);
5124
5125         if (type > 1 || (rmode != 0 && opcode > 1)) {
5126             unallocated_encoding(s);
5127             return;
5128         }
5129
5130         if (!fp_access_check(s)) {
5131             return;
5132         }
5133         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5134     }
5135 }
5136
5137 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5138  *   31  30  29 28     25 24                          0
5139  * +---+---+---+---------+-----------------------------+
5140  * |   | 0 |   | 1 1 1 1 |                             |
5141  * +---+---+---+---------+-----------------------------+
5142  */
5143 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5144 {
5145     if (extract32(insn, 24, 1)) {
5146         /* Floating point data-processing (3 source) */
5147         disas_fp_3src(s, insn);
5148     } else if (extract32(insn, 21, 1) == 0) {
5149         /* Floating point to fixed point conversions */
5150         disas_fp_fixed_conv(s, insn);
5151     } else {
5152         switch (extract32(insn, 10, 2)) {
5153         case 1:
5154             /* Floating point conditional compare */
5155             disas_fp_ccomp(s, insn);
5156             break;
5157         case 2:
5158             /* Floating point data-processing (2 source) */
5159             disas_fp_2src(s, insn);
5160             break;
5161         case 3:
5162             /* Floating point conditional select */
5163             disas_fp_csel(s, insn);
5164             break;
5165         case 0:
5166             switch (ctz32(extract32(insn, 12, 4))) {
5167             case 0: /* [15:12] == xxx1 */
5168                 /* Floating point immediate */
5169                 disas_fp_imm(s, insn);
5170                 break;
5171             case 1: /* [15:12] == xx10 */
5172                 /* Floating point compare */
5173                 disas_fp_compare(s, insn);
5174                 break;
5175             case 2: /* [15:12] == x100 */
5176                 /* Floating point data-processing (1 source) */
5177                 disas_fp_1src(s, insn);
5178                 break;
5179             case 3: /* [15:12] == 1000 */
5180                 unallocated_encoding(s);
5181                 break;
5182             default: /* [15:12] == 0000 */
5183                 /* Floating point <-> integer conversions */
5184                 disas_fp_int_conv(s, insn);
5185                 break;
5186             }
5187             break;
5188         }
5189     }
5190 }
5191
5192 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5193                      int pos)
5194 {
5195     /* Extract 64 bits from the middle of two concatenated 64 bit
5196      * vector register slices left:right. The extracted bits start
5197      * at 'pos' bits into the right (least significant) side.
5198      * We return the result in tcg_right, and guarantee not to
5199      * trash tcg_left.
5200      */
5201     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5202     assert(pos > 0 && pos < 64);
5203
5204     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5205     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5206     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5207
5208     tcg_temp_free_i64(tcg_tmp);
5209 }
5210
5211 /* C3.6.1 EXT
5212  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5213  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5214  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5215  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5216  */
5217 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5218 {
5219     int is_q = extract32(insn, 30, 1);
5220     int op2 = extract32(insn, 22, 2);
5221     int imm4 = extract32(insn, 11, 4);
5222     int rm = extract32(insn, 16, 5);
5223     int rn = extract32(insn, 5, 5);
5224     int rd = extract32(insn, 0, 5);
5225     int pos = imm4 << 3;
5226     TCGv_i64 tcg_resl, tcg_resh;
5227
5228     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5229         unallocated_encoding(s);
5230         return;
5231     }
5232
5233     if (!fp_access_check(s)) {
5234         return;
5235     }
5236
5237     tcg_resh = tcg_temp_new_i64();
5238     tcg_resl = tcg_temp_new_i64();
5239
5240     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5241      * either extracting 128 bits from a 128:128 concatenation, or
5242      * extracting 64 bits from a 64:64 concatenation.
5243      */
5244     if (!is_q) {
5245         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5246         if (pos != 0) {
5247             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5248             do_ext64(s, tcg_resh, tcg_resl, pos);
5249         }
5250         tcg_gen_movi_i64(tcg_resh, 0);
5251     } else {
5252         TCGv_i64 tcg_hh;
5253         typedef struct {
5254             int reg;
5255             int elt;
5256         } EltPosns;
5257         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5258         EltPosns *elt = eltposns;
5259
5260         if (pos >= 64) {
5261             elt++;
5262             pos -= 64;
5263         }
5264
5265         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5266         elt++;
5267         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5268         elt++;
5269         if (pos != 0) {
5270             do_ext64(s, tcg_resh, tcg_resl, pos);
5271             tcg_hh = tcg_temp_new_i64();
5272             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5273             do_ext64(s, tcg_hh, tcg_resh, pos);
5274             tcg_temp_free_i64(tcg_hh);
5275         }
5276     }
5277
5278     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5279     tcg_temp_free_i64(tcg_resl);
5280     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5281     tcg_temp_free_i64(tcg_resh);
5282 }
5283
5284 /* C3.6.2 TBL/TBX
5285  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5286  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5287  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5288  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5289  */
5290 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5291 {
5292     int op2 = extract32(insn, 22, 2);
5293     int is_q = extract32(insn, 30, 1);
5294     int rm = extract32(insn, 16, 5);
5295     int rn = extract32(insn, 5, 5);
5296     int rd = extract32(insn, 0, 5);
5297     int is_tblx = extract32(insn, 12, 1);
5298     int len = extract32(insn, 13, 2);
5299     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5300     TCGv_i32 tcg_regno, tcg_numregs;
5301
5302     if (op2 != 0) {
5303         unallocated_encoding(s);
5304         return;
5305     }
5306
5307     if (!fp_access_check(s)) {
5308         return;
5309     }
5310
5311     /* This does a table lookup: for every byte element in the input
5312      * we index into a table formed from up to four vector registers,
5313      * and then the output is the result of the lookups. Our helper
5314      * function does the lookup operation for a single 64 bit part of
5315      * the input.
5316      */
5317     tcg_resl = tcg_temp_new_i64();
5318     tcg_resh = tcg_temp_new_i64();
5319
5320     if (is_tblx) {
5321         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5322     } else {
5323         tcg_gen_movi_i64(tcg_resl, 0);
5324     }
5325     if (is_tblx && is_q) {
5326         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5327     } else {
5328         tcg_gen_movi_i64(tcg_resh, 0);
5329     }
5330
5331     tcg_idx = tcg_temp_new_i64();
5332     tcg_regno = tcg_const_i32(rn);
5333     tcg_numregs = tcg_const_i32(len + 1);
5334     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5335     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5336                         tcg_regno, tcg_numregs);
5337     if (is_q) {
5338         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5339         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5340                             tcg_regno, tcg_numregs);
5341     }
5342     tcg_temp_free_i64(tcg_idx);
5343     tcg_temp_free_i32(tcg_regno);
5344     tcg_temp_free_i32(tcg_numregs);
5345
5346     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5347     tcg_temp_free_i64(tcg_resl);
5348     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5349     tcg_temp_free_i64(tcg_resh);
5350 }
5351
5352 /* C3.6.3 ZIP/UZP/TRN
5353  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5354  * +---+---+-------------+------+---+------+---+------------------+------+
5355  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5356  * +---+---+-------------+------+---+------+---+------------------+------+
5357  */
5358 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5359 {
5360     int rd = extract32(insn, 0, 5);
5361     int rn = extract32(insn, 5, 5);
5362     int rm = extract32(insn, 16, 5);
5363     int size = extract32(insn, 22, 2);
5364     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5365      * bit 2 indicates 1 vs 2 variant of the insn.
5366      */
5367     int opcode = extract32(insn, 12, 2);
5368     bool part = extract32(insn, 14, 1);
5369     bool is_q = extract32(insn, 30, 1);
5370     int esize = 8 << size;
5371     int i, ofs;
5372     int datasize = is_q ? 128 : 64;
5373     int elements = datasize / esize;
5374     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5375
5376     if (opcode == 0 || (size == 3 && !is_q)) {
5377         unallocated_encoding(s);
5378         return;
5379     }
5380
5381     if (!fp_access_check(s)) {
5382         return;
5383     }
5384
5385     tcg_resl = tcg_const_i64(0);
5386     tcg_resh = tcg_const_i64(0);
5387     tcg_res = tcg_temp_new_i64();
5388
5389     for (i = 0; i < elements; i++) {
5390         switch (opcode) {
5391         case 1: /* UZP1/2 */
5392         {
5393             int midpoint = elements / 2;
5394             if (i < midpoint) {
5395                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5396             } else {
5397                 read_vec_element(s, tcg_res, rm,
5398                                  2 * (i - midpoint) + part, size);
5399             }
5400             break;
5401         }
5402         case 2: /* TRN1/2 */
5403             if (i & 1) {
5404                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5405             } else {
5406                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5407             }
5408             break;
5409         case 3: /* ZIP1/2 */
5410         {
5411             int base = part * elements / 2;
5412             if (i & 1) {
5413                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5414             } else {
5415                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5416             }
5417             break;
5418         }
5419         default:
5420             g_assert_not_reached();
5421         }
5422
5423         ofs = i * esize;
5424         if (ofs < 64) {
5425             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5426             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5427         } else {
5428             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5429             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5430         }
5431     }
5432
5433     tcg_temp_free_i64(tcg_res);
5434
5435     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5436     tcg_temp_free_i64(tcg_resl);
5437     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5438     tcg_temp_free_i64(tcg_resh);
5439 }
5440
5441 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5442                         int opc, bool is_min, TCGv_ptr fpst)
5443 {
5444     /* Helper function for disas_simd_across_lanes: do a single precision
5445      * min/max operation on the specified two inputs,
5446      * and return the result in tcg_elt1.
5447      */
5448     if (opc == 0xc) {
5449         if (is_min) {
5450             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5451         } else {
5452             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5453         }
5454     } else {
5455         assert(opc == 0xf);
5456         if (is_min) {
5457             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5458         } else {
5459             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5460         }
5461     }
5462 }
5463
5464 /* C3.6.4 AdvSIMD across lanes
5465  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5466  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5467  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5468  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5469  */
5470 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5471 {
5472     int rd = extract32(insn, 0, 5);
5473     int rn = extract32(insn, 5, 5);
5474     int size = extract32(insn, 22, 2);
5475     int opcode = extract32(insn, 12, 5);
5476     bool is_q = extract32(insn, 30, 1);
5477     bool is_u = extract32(insn, 29, 1);
5478     bool is_fp = false;
5479     bool is_min = false;
5480     int esize;
5481     int elements;
5482     int i;
5483     TCGv_i64 tcg_res, tcg_elt;
5484
5485     switch (opcode) {
5486     case 0x1b: /* ADDV */
5487         if (is_u) {
5488             unallocated_encoding(s);
5489             return;
5490         }
5491         /* fall through */
5492     case 0x3: /* SADDLV, UADDLV */
5493     case 0xa: /* SMAXV, UMAXV */
5494     case 0x1a: /* SMINV, UMINV */
5495         if (size == 3 || (size == 2 && !is_q)) {
5496             unallocated_encoding(s);
5497             return;
5498         }
5499         break;
5500     case 0xc: /* FMAXNMV, FMINNMV */
5501     case 0xf: /* FMAXV, FMINV */
5502         if (!is_u || !is_q || extract32(size, 0, 1)) {
5503             unallocated_encoding(s);
5504             return;
5505         }
5506         /* Bit 1 of size field encodes min vs max, and actual size is always
5507          * 32 bits: adjust the size variable so following code can rely on it
5508          */
5509         is_min = extract32(size, 1, 1);
5510         is_fp = true;
5511         size = 2;
5512         break;
5513     default:
5514         unallocated_encoding(s);
5515         return;
5516     }
5517
5518     if (!fp_access_check(s)) {
5519         return;
5520     }
5521
5522     esize = 8 << size;
5523     elements = (is_q ? 128 : 64) / esize;
5524
5525     tcg_res = tcg_temp_new_i64();
5526     tcg_elt = tcg_temp_new_i64();
5527
5528     /* These instructions operate across all lanes of a vector
5529      * to produce a single result. We can guarantee that a 64
5530      * bit intermediate is sufficient:
5531      *  + for [US]ADDLV the maximum element size is 32 bits, and
5532      *    the result type is 64 bits
5533      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5534      *    same as the element size, which is 32 bits at most
5535      * For the integer operations we can choose to work at 64
5536      * or 32 bits and truncate at the end; for simplicity
5537      * we use 64 bits always. The floating point
5538      * ops do require 32 bit intermediates, though.
5539      */
5540     if (!is_fp) {
5541         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5542
5543         for (i = 1; i < elements; i++) {
5544             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5545
5546             switch (opcode) {
5547             case 0x03: /* SADDLV / UADDLV */
5548             case 0x1b: /* ADDV */
5549                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5550                 break;
5551             case 0x0a: /* SMAXV / UMAXV */
5552                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5553                                     tcg_res,
5554                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5555                 break;
5556             case 0x1a: /* SMINV / UMINV */
5557                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5558                                     tcg_res,
5559                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5560                 break;
5561                 break;
5562             default:
5563                 g_assert_not_reached();
5564             }
5565
5566         }
5567     } else {
5568         /* Floating point ops which work on 32 bit (single) intermediates.
5569          * Note that correct NaN propagation requires that we do these
5570          * operations in exactly the order specified by the pseudocode.
5571          */
5572         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5573         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5574         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5575         TCGv_ptr fpst = get_fpstatus_ptr();
5576
5577         assert(esize == 32);
5578         assert(elements == 4);
5579
5580         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5581         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5582         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5583         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5584
5585         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5586
5587         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5588         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5589         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5590         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5591
5592         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5593
5594         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5595
5596         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5597         tcg_temp_free_i32(tcg_elt1);
5598         tcg_temp_free_i32(tcg_elt2);
5599         tcg_temp_free_i32(tcg_elt3);
5600         tcg_temp_free_ptr(fpst);
5601     }
5602
5603     tcg_temp_free_i64(tcg_elt);
5604
5605     /* Now truncate the result to the width required for the final output */
5606     if (opcode == 0x03) {
5607         /* SADDLV, UADDLV: result is 2*esize */
5608         size++;
5609     }
5610
5611     switch (size) {
5612     case 0:
5613         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5614         break;
5615     case 1:
5616         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5617         break;
5618     case 2:
5619         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5620         break;
5621     case 3:
5622         break;
5623     default:
5624         g_assert_not_reached();
5625     }
5626
5627     write_fp_dreg(s, rd, tcg_res);
5628     tcg_temp_free_i64(tcg_res);
5629 }
5630
5631 /* C6.3.31 DUP (Element, Vector)
5632  *
5633  *  31  30   29              21 20    16 15        10  9    5 4    0
5634  * +---+---+-------------------+--------+-------------+------+------+
5635  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5636  * +---+---+-------------------+--------+-------------+------+------+
5637  *
5638  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5639  */
5640 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5641                              int imm5)
5642 {
5643     int size = ctz32(imm5);
5644     int esize = 8 << size;
5645     int elements = (is_q ? 128 : 64) / esize;
5646     int index, i;
5647     TCGv_i64 tmp;
5648
5649     if (size > 3 || (size == 3 && !is_q)) {
5650         unallocated_encoding(s);
5651         return;
5652     }
5653
5654     if (!fp_access_check(s)) {
5655         return;
5656     }
5657
5658     index = imm5 >> (size + 1);
5659
5660     tmp = tcg_temp_new_i64();
5661     read_vec_element(s, tmp, rn, index, size);
5662
5663     for (i = 0; i < elements; i++) {
5664         write_vec_element(s, tmp, rd, i, size);
5665     }
5666
5667     if (!is_q) {
5668         clear_vec_high(s, rd);
5669     }
5670
5671     tcg_temp_free_i64(tmp);
5672 }
5673
5674 /* C6.3.31 DUP (element, scalar)
5675  *  31                   21 20    16 15        10  9    5 4    0
5676  * +-----------------------+--------+-------------+------+------+
5677  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5678  * +-----------------------+--------+-------------+------+------+
5679  */
5680 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5681                               int imm5)
5682 {
5683     int size = ctz32(imm5);
5684     int index;
5685     TCGv_i64 tmp;
5686
5687     if (size > 3) {
5688         unallocated_encoding(s);
5689         return;
5690     }
5691
5692     if (!fp_access_check(s)) {
5693         return;
5694     }
5695
5696     index = imm5 >> (size + 1);
5697
5698     /* This instruction just extracts the specified element and
5699      * zero-extends it into the bottom of the destination register.
5700      */
5701     tmp = tcg_temp_new_i64();
5702     read_vec_element(s, tmp, rn, index, size);
5703     write_fp_dreg(s, rd, tmp);
5704     tcg_temp_free_i64(tmp);
5705 }
5706
5707 /* C6.3.32 DUP (General)
5708  *
5709  *  31  30   29              21 20    16 15        10  9    5 4    0
5710  * +---+---+-------------------+--------+-------------+------+------+
5711  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5712  * +---+---+-------------------+--------+-------------+------+------+
5713  *
5714  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5715  */
5716 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5717                              int imm5)
5718 {
5719     int size = ctz32(imm5);
5720     int esize = 8 << size;
5721     int elements = (is_q ? 128 : 64)/esize;
5722     int i = 0;
5723
5724     if (size > 3 || ((size == 3) && !is_q)) {
5725         unallocated_encoding(s);
5726         return;
5727     }
5728
5729     if (!fp_access_check(s)) {
5730         return;
5731     }
5732
5733     for (i = 0; i < elements; i++) {
5734         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5735     }
5736     if (!is_q) {
5737         clear_vec_high(s, rd);
5738     }
5739 }
5740
5741 /* C6.3.150 INS (Element)
5742  *
5743  *  31                   21 20    16 15  14    11  10 9    5 4    0
5744  * +-----------------------+--------+------------+---+------+------+
5745  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5746  * +-----------------------+--------+------------+---+------+------+
5747  *
5748  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5749  * index: encoded in imm5<4:size+1>
5750  */
5751 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5752                              int imm4, int imm5)
5753 {
5754     int size = ctz32(imm5);
5755     int src_index, dst_index;
5756     TCGv_i64 tmp;
5757
5758     if (size > 3) {
5759         unallocated_encoding(s);
5760         return;
5761     }
5762
5763     if (!fp_access_check(s)) {
5764         return;
5765     }
5766
5767     dst_index = extract32(imm5, 1+size, 5);
5768     src_index = extract32(imm4, size, 4);
5769
5770     tmp = tcg_temp_new_i64();
5771
5772     read_vec_element(s, tmp, rn, src_index, size);
5773     write_vec_element(s, tmp, rd, dst_index, size);
5774
5775     tcg_temp_free_i64(tmp);
5776 }
5777
5778
5779 /* C6.3.151 INS (General)
5780  *
5781  *  31                   21 20    16 15        10  9    5 4    0
5782  * +-----------------------+--------+-------------+------+------+
5783  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5784  * +-----------------------+--------+-------------+------+------+
5785  *
5786  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5787  * index: encoded in imm5<4:size+1>
5788  */
5789 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5790 {
5791     int size = ctz32(imm5);
5792     int idx;
5793
5794     if (size > 3) {
5795         unallocated_encoding(s);
5796         return;
5797     }
5798
5799     if (!fp_access_check(s)) {
5800         return;
5801     }
5802
5803     idx = extract32(imm5, 1 + size, 4 - size);
5804     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5805 }
5806
5807 /*
5808  * C6.3.321 UMOV (General)
5809  * C6.3.237 SMOV (General)
5810  *
5811  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5812  * +---+---+-------------------+--------+-------------+------+------+
5813  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5814  * +---+---+-------------------+--------+-------------+------+------+
5815  *
5816  * U: unsigned when set
5817  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5818  */
5819 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5820                                   int rn, int rd, int imm5)
5821 {
5822     int size = ctz32(imm5);
5823     int element;
5824     TCGv_i64 tcg_rd;
5825
5826     /* Check for UnallocatedEncodings */
5827     if (is_signed) {
5828         if (size > 2 || (size == 2 && !is_q)) {
5829             unallocated_encoding(s);
5830             return;
5831         }
5832     } else {
5833         if (size > 3
5834             || (size < 3 && is_q)
5835             || (size == 3 && !is_q)) {
5836             unallocated_encoding(s);
5837             return;
5838         }
5839     }
5840
5841     if (!fp_access_check(s)) {
5842         return;
5843     }
5844
5845     element = extract32(imm5, 1+size, 4);
5846
5847     tcg_rd = cpu_reg(s, rd);
5848     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5849     if (is_signed && !is_q) {
5850         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5851     }
5852 }
5853
5854 /* C3.6.5 AdvSIMD copy
5855  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5856  * +---+---+----+-----------------+------+---+------+---+------+------+
5857  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5858  * +---+---+----+-----------------+------+---+------+---+------+------+
5859  */
5860 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5861 {
5862     int rd = extract32(insn, 0, 5);
5863     int rn = extract32(insn, 5, 5);
5864     int imm4 = extract32(insn, 11, 4);
5865     int op = extract32(insn, 29, 1);
5866     int is_q = extract32(insn, 30, 1);
5867     int imm5 = extract32(insn, 16, 5);
5868
5869     if (op) {
5870         if (is_q) {
5871             /* INS (element) */
5872             handle_simd_inse(s, rd, rn, imm4, imm5);
5873         } else {
5874             unallocated_encoding(s);
5875         }
5876     } else {
5877         switch (imm4) {
5878         case 0:
5879             /* DUP (element - vector) */
5880             handle_simd_dupe(s, is_q, rd, rn, imm5);
5881             break;
5882         case 1:
5883             /* DUP (general) */
5884             handle_simd_dupg(s, is_q, rd, rn, imm5);
5885             break;
5886         case 3:
5887             if (is_q) {
5888                 /* INS (general) */
5889                 handle_simd_insg(s, rd, rn, imm5);
5890             } else {
5891                 unallocated_encoding(s);
5892             }
5893             break;
5894         case 5:
5895         case 7:
5896             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5897             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5898             break;
5899         default:
5900             unallocated_encoding(s);
5901             break;
5902         }
5903     }
5904 }
5905
5906 /* C3.6.6 AdvSIMD modified immediate
5907  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5908  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5909  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5910  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5911  *
5912  * There are a number of operations that can be carried out here:
5913  *   MOVI - move (shifted) imm into register
5914  *   MVNI - move inverted (shifted) imm into register
5915  *   ORR  - bitwise OR of (shifted) imm with register
5916  *   BIC  - bitwise clear of (shifted) imm with register
5917  */
5918 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5919 {
5920     int rd = extract32(insn, 0, 5);
5921     int cmode = extract32(insn, 12, 4);
5922     int cmode_3_1 = extract32(cmode, 1, 3);
5923     int cmode_0 = extract32(cmode, 0, 1);
5924     int o2 = extract32(insn, 11, 1);
5925     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5926     bool is_neg = extract32(insn, 29, 1);
5927     bool is_q = extract32(insn, 30, 1);
5928     uint64_t imm = 0;
5929     TCGv_i64 tcg_rd, tcg_imm;
5930     int i;
5931
5932     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5933         unallocated_encoding(s);
5934         return;
5935     }
5936
5937     if (!fp_access_check(s)) {
5938         return;
5939     }
5940
5941     /* See AdvSIMDExpandImm() in ARM ARM */
5942     switch (cmode_3_1) {
5943     case 0: /* Replicate(Zeros(24):imm8, 2) */
5944     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5945     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5946     case 3: /* Replicate(imm8:Zeros(24), 2) */
5947     {
5948         int shift = cmode_3_1 * 8;
5949         imm = bitfield_replicate(abcdefgh << shift, 32);
5950         break;
5951     }
5952     case 4: /* Replicate(Zeros(8):imm8, 4) */
5953     case 5: /* Replicate(imm8:Zeros(8), 4) */
5954     {
5955         int shift = (cmode_3_1 & 0x1) * 8;
5956         imm = bitfield_replicate(abcdefgh << shift, 16);
5957         break;
5958     }
5959     case 6:
5960         if (cmode_0) {
5961             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5962             imm = (abcdefgh << 16) | 0xffff;
5963         } else {
5964             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5965             imm = (abcdefgh << 8) | 0xff;
5966         }
5967         imm = bitfield_replicate(imm, 32);
5968         break;
5969     case 7:
5970         if (!cmode_0 && !is_neg) {
5971             imm = bitfield_replicate(abcdefgh, 8);
5972         } else if (!cmode_0 && is_neg) {
5973             int i;
5974             imm = 0;
5975             for (i = 0; i < 8; i++) {
5976                 if ((abcdefgh) & (1 << i)) {
5977                     imm |= 0xffULL << (i * 8);
5978                 }
5979             }
5980         } else if (cmode_0) {
5981             if (is_neg) {
5982                 imm = (abcdefgh & 0x3f) << 48;
5983                 if (abcdefgh & 0x80) {
5984                     imm |= 0x8000000000000000ULL;
5985                 }
5986                 if (abcdefgh & 0x40) {
5987                     imm |= 0x3fc0000000000000ULL;
5988                 } else {
5989                     imm |= 0x4000000000000000ULL;
5990                 }
5991             } else {
5992                 imm = (abcdefgh & 0x3f) << 19;
5993                 if (abcdefgh & 0x80) {
5994                     imm |= 0x80000000;
5995                 }
5996                 if (abcdefgh & 0x40) {
5997                     imm |= 0x3e000000;
5998                 } else {
5999                     imm |= 0x40000000;
6000                 }
6001                 imm |= (imm << 32);
6002             }
6003         }
6004         break;
6005     }
6006
6007     if (cmode_3_1 != 7 && is_neg) {
6008         imm = ~imm;
6009     }
6010
6011     tcg_imm = tcg_const_i64(imm);
6012     tcg_rd = new_tmp_a64(s);
6013
6014     for (i = 0; i < 2; i++) {
6015         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6016
6017         if (i == 1 && !is_q) {
6018             /* non-quad ops clear high half of vector */
6019             tcg_gen_movi_i64(tcg_rd, 0);
6020         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6021             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6022             if (is_neg) {
6023                 /* AND (BIC) */
6024                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6025             } else {
6026                 /* ORR */
6027                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6028             }
6029         } else {
6030             /* MOVI */
6031             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6032         }
6033         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6034     }
6035
6036     tcg_temp_free_i64(tcg_imm);
6037 }
6038
6039 /* C3.6.7 AdvSIMD scalar copy
6040  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6041  * +-----+----+-----------------+------+---+------+---+------+------+
6042  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6043  * +-----+----+-----------------+------+---+------+---+------+------+
6044  */
6045 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6046 {
6047     int rd = extract32(insn, 0, 5);
6048     int rn = extract32(insn, 5, 5);
6049     int imm4 = extract32(insn, 11, 4);
6050     int imm5 = extract32(insn, 16, 5);
6051     int op = extract32(insn, 29, 1);
6052
6053     if (op != 0 || imm4 != 0) {
6054         unallocated_encoding(s);
6055         return;
6056     }
6057
6058     /* DUP (element, scalar) */
6059     handle_simd_dupes(s, rd, rn, imm5);
6060 }
6061
6062 /* C3.6.8 AdvSIMD scalar pairwise
6063  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6064  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6065  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6066  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6067  */
6068 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6069 {
6070     int u = extract32(insn, 29, 1);
6071     int size = extract32(insn, 22, 2);
6072     int opcode = extract32(insn, 12, 5);
6073     int rn = extract32(insn, 5, 5);
6074     int rd = extract32(insn, 0, 5);
6075     TCGv_ptr fpst;
6076
6077     /* For some ops (the FP ones), size[1] is part of the encoding.
6078      * For ADDP strictly it is not but size[1] is always 1 for valid
6079      * encodings.
6080      */
6081     opcode |= (extract32(size, 1, 1) << 5);
6082
6083     switch (opcode) {
6084     case 0x3b: /* ADDP */
6085         if (u || size != 3) {
6086             unallocated_encoding(s);
6087             return;
6088         }
6089         if (!fp_access_check(s)) {
6090             return;
6091         }
6092
6093         TCGV_UNUSED_PTR(fpst);
6094         break;
6095     case 0xc: /* FMAXNMP */
6096     case 0xd: /* FADDP */
6097     case 0xf: /* FMAXP */
6098     case 0x2c: /* FMINNMP */
6099     case 0x2f: /* FMINP */
6100         /* FP op, size[0] is 32 or 64 bit */
6101         if (!u) {
6102             unallocated_encoding(s);
6103             return;
6104         }
6105         if (!fp_access_check(s)) {
6106             return;
6107         }
6108
6109         size = extract32(size, 0, 1) ? 3 : 2;
6110         fpst = get_fpstatus_ptr();
6111         break;
6112     default:
6113         unallocated_encoding(s);
6114         return;
6115     }
6116
6117     if (size == 3) {
6118         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6119         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6120         TCGv_i64 tcg_res = tcg_temp_new_i64();
6121
6122         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6123         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6124
6125         switch (opcode) {
6126         case 0x3b: /* ADDP */
6127             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6128             break;
6129         case 0xc: /* FMAXNMP */
6130             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6131             break;
6132         case 0xd: /* FADDP */
6133             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6134             break;
6135         case 0xf: /* FMAXP */
6136             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6137             break;
6138         case 0x2c: /* FMINNMP */
6139             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6140             break;
6141         case 0x2f: /* FMINP */
6142             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6143             break;
6144         default:
6145             g_assert_not_reached();
6146         }
6147
6148         write_fp_dreg(s, rd, tcg_res);
6149
6150         tcg_temp_free_i64(tcg_op1);
6151         tcg_temp_free_i64(tcg_op2);
6152         tcg_temp_free_i64(tcg_res);
6153     } else {
6154         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6155         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6156         TCGv_i32 tcg_res = tcg_temp_new_i32();
6157
6158         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6159         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6160
6161         switch (opcode) {
6162         case 0xc: /* FMAXNMP */
6163             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6164             break;
6165         case 0xd: /* FADDP */
6166             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6167             break;
6168         case 0xf: /* FMAXP */
6169             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6170             break;
6171         case 0x2c: /* FMINNMP */
6172             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6173             break;
6174         case 0x2f: /* FMINP */
6175             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6176             break;
6177         default:
6178             g_assert_not_reached();
6179         }
6180
6181         write_fp_sreg(s, rd, tcg_res);
6182
6183         tcg_temp_free_i32(tcg_op1);
6184         tcg_temp_free_i32(tcg_op2);
6185         tcg_temp_free_i32(tcg_res);
6186     }
6187
6188     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6189         tcg_temp_free_ptr(fpst);
6190     }
6191 }
6192
6193 /*
6194  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6195  *
6196  * This code is handles the common shifting code and is used by both
6197  * the vector and scalar code.
6198  */
6199 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6200                                     TCGv_i64 tcg_rnd, bool accumulate,
6201                                     bool is_u, int size, int shift)
6202 {
6203     bool extended_result = false;
6204     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6205     int ext_lshift = 0;
6206     TCGv_i64 tcg_src_hi;
6207
6208     if (round && size == 3) {
6209         extended_result = true;
6210         ext_lshift = 64 - shift;
6211         tcg_src_hi = tcg_temp_new_i64();
6212     } else if (shift == 64) {
6213         if (!accumulate && is_u) {
6214             /* result is zero */
6215             tcg_gen_movi_i64(tcg_res, 0);
6216             return;
6217         }
6218     }
6219
6220     /* Deal with the rounding step */
6221     if (round) {
6222         if (extended_result) {
6223             TCGv_i64 tcg_zero = tcg_const_i64(0);
6224             if (!is_u) {
6225                 /* take care of sign extending tcg_res */
6226                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6227                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6228                                  tcg_src, tcg_src_hi,
6229                                  tcg_rnd, tcg_zero);
6230             } else {
6231                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6232                                  tcg_src, tcg_zero,
6233                                  tcg_rnd, tcg_zero);
6234             }
6235             tcg_temp_free_i64(tcg_zero);
6236         } else {
6237             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6238         }
6239     }
6240
6241     /* Now do the shift right */
6242     if (round && extended_result) {
6243         /* extended case, >64 bit precision required */
6244         if (ext_lshift == 0) {
6245             /* special case, only high bits matter */
6246             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6247         } else {
6248             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6249             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6250             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6251         }
6252     } else {
6253         if (is_u) {
6254             if (shift == 64) {
6255                 /* essentially shifting in 64 zeros */
6256                 tcg_gen_movi_i64(tcg_src, 0);
6257             } else {
6258                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6259             }
6260         } else {
6261             if (shift == 64) {
6262                 /* effectively extending the sign-bit */
6263                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6264             } else {
6265                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6266             }
6267         }
6268     }
6269
6270     if (accumulate) {
6271         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6272     } else {
6273         tcg_gen_mov_i64(tcg_res, tcg_src);
6274     }
6275
6276     if (extended_result) {
6277         tcg_temp_free_i64(tcg_src_hi);
6278     }
6279 }
6280
6281 /* Common SHL/SLI - Shift left with an optional insert */
6282 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6283                                  bool insert, int shift)
6284 {
6285     if (insert) { /* SLI */
6286         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6287     } else { /* SHL */
6288         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6289     }
6290 }
6291
6292 /* SRI: shift right with insert */
6293 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6294                                  int size, int shift)
6295 {
6296     int esize = 8 << size;
6297
6298     /* shift count same as element size is valid but does nothing;
6299      * special case to avoid potential shift by 64.
6300      */
6301     if (shift != esize) {
6302         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6303         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6304     }
6305 }
6306
6307 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6308 static void handle_scalar_simd_shri(DisasContext *s,
6309                                     bool is_u, int immh, int immb,
6310                                     int opcode, int rn, int rd)
6311 {
6312     const int size = 3;
6313     int immhb = immh << 3 | immb;
6314     int shift = 2 * (8 << size) - immhb;
6315     bool accumulate = false;
6316     bool round = false;
6317     bool insert = false;
6318     TCGv_i64 tcg_rn;
6319     TCGv_i64 tcg_rd;
6320     TCGv_i64 tcg_round;
6321
6322     if (!extract32(immh, 3, 1)) {
6323         unallocated_encoding(s);
6324         return;
6325     }
6326
6327     if (!fp_access_check(s)) {
6328         return;
6329     }
6330
6331     switch (opcode) {
6332     case 0x02: /* SSRA / USRA (accumulate) */
6333         accumulate = true;
6334         break;
6335     case 0x04: /* SRSHR / URSHR (rounding) */
6336         round = true;
6337         break;
6338     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6339         accumulate = round = true;
6340         break;
6341     case 0x08: /* SRI */
6342         insert = true;
6343         break;
6344     }
6345
6346     if (round) {
6347         uint64_t round_const = 1ULL << (shift - 1);
6348         tcg_round = tcg_const_i64(round_const);
6349     } else {
6350         TCGV_UNUSED_I64(tcg_round);
6351     }
6352
6353     tcg_rn = read_fp_dreg(s, rn);
6354     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6355
6356     if (insert) {
6357         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6358     } else {
6359         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6360                                 accumulate, is_u, size, shift);
6361     }
6362
6363     write_fp_dreg(s, rd, tcg_rd);
6364
6365     tcg_temp_free_i64(tcg_rn);
6366     tcg_temp_free_i64(tcg_rd);
6367     if (round) {
6368         tcg_temp_free_i64(tcg_round);
6369     }
6370 }
6371
6372 /* SHL/SLI - Scalar shift left */
6373 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6374                                     int immh, int immb, int opcode,
6375                                     int rn, int rd)
6376 {
6377     int size = 32 - clz32(immh) - 1;
6378     int immhb = immh << 3 | immb;
6379     int shift = immhb - (8 << size);
6380     TCGv_i64 tcg_rn = new_tmp_a64(s);
6381     TCGv_i64 tcg_rd = new_tmp_a64(s);
6382
6383     if (!extract32(immh, 3, 1)) {
6384         unallocated_encoding(s);
6385         return;
6386     }
6387
6388     if (!fp_access_check(s)) {
6389         return;
6390     }
6391
6392     tcg_rn = read_fp_dreg(s, rn);
6393     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6394
6395     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6396
6397     write_fp_dreg(s, rd, tcg_rd);
6398
6399     tcg_temp_free_i64(tcg_rn);
6400     tcg_temp_free_i64(tcg_rd);
6401 }
6402
6403 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6404  * (signed/unsigned) narrowing */
6405 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6406                                    bool is_u_shift, bool is_u_narrow,
6407                                    int immh, int immb, int opcode,
6408                                    int rn, int rd)
6409 {
6410     int immhb = immh << 3 | immb;
6411     int size = 32 - clz32(immh) - 1;
6412     int esize = 8 << size;
6413     int shift = (2 * esize) - immhb;
6414     int elements = is_scalar ? 1 : (64 / esize);
6415     bool round = extract32(opcode, 0, 1);
6416     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6417     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6418     TCGv_i32 tcg_rd_narrowed;
6419     TCGv_i64 tcg_final;
6420
6421     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6422         { gen_helper_neon_narrow_sat_s8,
6423           gen_helper_neon_unarrow_sat8 },
6424         { gen_helper_neon_narrow_sat_s16,
6425           gen_helper_neon_unarrow_sat16 },
6426         { gen_helper_neon_narrow_sat_s32,
6427           gen_helper_neon_unarrow_sat32 },
6428         { NULL, NULL },
6429     };
6430     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6431         gen_helper_neon_narrow_sat_u8,
6432         gen_helper_neon_narrow_sat_u16,
6433         gen_helper_neon_narrow_sat_u32,
6434         NULL
6435     };
6436     NeonGenNarrowEnvFn *narrowfn;
6437
6438     int i;
6439
6440     assert(size < 4);
6441
6442     if (extract32(immh, 3, 1)) {
6443         unallocated_encoding(s);
6444         return;
6445     }
6446
6447     if (!fp_access_check(s)) {
6448         return;
6449     }
6450
6451     if (is_u_shift) {
6452         narrowfn = unsigned_narrow_fns[size];
6453     } else {
6454         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6455     }
6456
6457     tcg_rn = tcg_temp_new_i64();
6458     tcg_rd = tcg_temp_new_i64();
6459     tcg_rd_narrowed = tcg_temp_new_i32();
6460     tcg_final = tcg_const_i64(0);
6461
6462     if (round) {
6463         uint64_t round_const = 1ULL << (shift - 1);
6464         tcg_round = tcg_const_i64(round_const);
6465     } else {
6466         TCGV_UNUSED_I64(tcg_round);
6467     }
6468
6469     for (i = 0; i < elements; i++) {
6470         read_vec_element(s, tcg_rn, rn, i, ldop);
6471         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6472                                 false, is_u_shift, size+1, shift);
6473         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6474         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6475         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6476     }
6477
6478     if (!is_q) {
6479         clear_vec_high(s, rd);
6480         write_vec_element(s, tcg_final, rd, 0, MO_64);
6481     } else {
6482         write_vec_element(s, tcg_final, rd, 1, MO_64);
6483     }
6484
6485     if (round) {
6486         tcg_temp_free_i64(tcg_round);
6487     }
6488     tcg_temp_free_i64(tcg_rn);
6489     tcg_temp_free_i64(tcg_rd);
6490     tcg_temp_free_i32(tcg_rd_narrowed);
6491     tcg_temp_free_i64(tcg_final);
6492     return;
6493 }
6494
6495 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6496 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6497                              bool src_unsigned, bool dst_unsigned,
6498                              int immh, int immb, int rn, int rd)
6499 {
6500     int immhb = immh << 3 | immb;
6501     int size = 32 - clz32(immh) - 1;
6502     int shift = immhb - (8 << size);
6503     int pass;
6504
6505     assert(immh != 0);
6506     assert(!(scalar && is_q));
6507
6508     if (!scalar) {
6509         if (!is_q && extract32(immh, 3, 1)) {
6510             unallocated_encoding(s);
6511             return;
6512         }
6513
6514         /* Since we use the variable-shift helpers we must
6515          * replicate the shift count into each element of
6516          * the tcg_shift value.
6517          */
6518         switch (size) {
6519         case 0:
6520             shift |= shift << 8;
6521             /* fall through */
6522         case 1:
6523             shift |= shift << 16;
6524             break;
6525         case 2:
6526         case 3:
6527             break;
6528         default:
6529             g_assert_not_reached();
6530         }
6531     }
6532
6533     if (!fp_access_check(s)) {
6534         return;
6535     }
6536
6537     if (size == 3) {
6538         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6539         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6540             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6541             { NULL, gen_helper_neon_qshl_u64 },
6542         };
6543         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6544         int maxpass = is_q ? 2 : 1;
6545
6546         for (pass = 0; pass < maxpass; pass++) {
6547             TCGv_i64 tcg_op = tcg_temp_new_i64();
6548
6549             read_vec_element(s, tcg_op, rn, pass, MO_64);
6550             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6551             write_vec_element(s, tcg_op, rd, pass, MO_64);
6552
6553             tcg_temp_free_i64(tcg_op);
6554         }
6555         tcg_temp_free_i64(tcg_shift);
6556
6557         if (!is_q) {
6558             clear_vec_high(s, rd);
6559         }
6560     } else {
6561         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6562         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6563             {
6564                 { gen_helper_neon_qshl_s8,
6565                   gen_helper_neon_qshl_s16,
6566                   gen_helper_neon_qshl_s32 },
6567                 { gen_helper_neon_qshlu_s8,
6568                   gen_helper_neon_qshlu_s16,
6569                   gen_helper_neon_qshlu_s32 }
6570             }, {
6571                 { NULL, NULL, NULL },
6572                 { gen_helper_neon_qshl_u8,
6573                   gen_helper_neon_qshl_u16,
6574                   gen_helper_neon_qshl_u32 }
6575             }
6576         };
6577         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6578         TCGMemOp memop = scalar ? size : MO_32;
6579         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6580
6581         for (pass = 0; pass < maxpass; pass++) {
6582             TCGv_i32 tcg_op = tcg_temp_new_i32();
6583
6584             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6585             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6586             if (scalar) {
6587                 switch (size) {
6588                 case 0:
6589                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6590                     break;
6591                 case 1:
6592                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6593                     break;
6594                 case 2:
6595                     break;
6596                 default:
6597                     g_assert_not_reached();
6598                 }
6599                 write_fp_sreg(s, rd, tcg_op);
6600             } else {
6601                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6602             }
6603
6604             tcg_temp_free_i32(tcg_op);
6605         }
6606         tcg_temp_free_i32(tcg_shift);
6607
6608         if (!is_q && !scalar) {
6609             clear_vec_high(s, rd);
6610         }
6611     }
6612 }
6613
6614 /* Common vector code for handling integer to FP conversion */
6615 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6616                                    int elements, int is_signed,
6617                                    int fracbits, int size)
6618 {
6619     bool is_double = size == 3 ? true : false;
6620     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6621     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6622     TCGv_i64 tcg_int = tcg_temp_new_i64();
6623     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6624     int pass;
6625
6626     for (pass = 0; pass < elements; pass++) {
6627         read_vec_element(s, tcg_int, rn, pass, mop);
6628
6629         if (is_double) {
6630             TCGv_i64 tcg_double = tcg_temp_new_i64();
6631             if (is_signed) {
6632                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6633                                      tcg_shift, tcg_fpst);
6634             } else {
6635                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6636                                      tcg_shift, tcg_fpst);
6637             }
6638             if (elements == 1) {
6639                 write_fp_dreg(s, rd, tcg_double);
6640             } else {
6641                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6642             }
6643             tcg_temp_free_i64(tcg_double);
6644         } else {
6645             TCGv_i32 tcg_single = tcg_temp_new_i32();
6646             if (is_signed) {
6647                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6648                                      tcg_shift, tcg_fpst);
6649             } else {
6650                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6651                                      tcg_shift, tcg_fpst);
6652             }
6653             if (elements == 1) {
6654                 write_fp_sreg(s, rd, tcg_single);
6655             } else {
6656                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6657             }
6658             tcg_temp_free_i32(tcg_single);
6659         }
6660     }
6661
6662     if (!is_double && elements == 2) {
6663         clear_vec_high(s, rd);
6664     }
6665
6666     tcg_temp_free_i64(tcg_int);
6667     tcg_temp_free_ptr(tcg_fpst);
6668     tcg_temp_free_i32(tcg_shift);
6669 }
6670
6671 /* UCVTF/SCVTF - Integer to FP conversion */
6672 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6673                                          bool is_q, bool is_u,
6674                                          int immh, int immb, int opcode,
6675                                          int rn, int rd)
6676 {
6677     bool is_double = extract32(immh, 3, 1);
6678     int size = is_double ? MO_64 : MO_32;
6679     int elements;
6680     int immhb = immh << 3 | immb;
6681     int fracbits = (is_double ? 128 : 64) - immhb;
6682
6683     if (!extract32(immh, 2, 2)) {
6684         unallocated_encoding(s);
6685         return;
6686     }
6687
6688     if (is_scalar) {
6689         elements = 1;
6690     } else {
6691         elements = is_double ? 2 : is_q ? 4 : 2;
6692         if (is_double && !is_q) {
6693             unallocated_encoding(s);
6694             return;
6695         }
6696     }
6697
6698     if (!fp_access_check(s)) {
6699         return;
6700     }
6701
6702     /* immh == 0 would be a failure of the decode logic */
6703     g_assert(immh);
6704
6705     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6706 }
6707
6708 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6709 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6710                                          bool is_q, bool is_u,
6711                                          int immh, int immb, int rn, int rd)
6712 {
6713     bool is_double = extract32(immh, 3, 1);
6714     int immhb = immh << 3 | immb;
6715     int fracbits = (is_double ? 128 : 64) - immhb;
6716     int pass;
6717     TCGv_ptr tcg_fpstatus;
6718     TCGv_i32 tcg_rmode, tcg_shift;
6719
6720     if (!extract32(immh, 2, 2)) {
6721         unallocated_encoding(s);
6722         return;
6723     }
6724
6725     if (!is_scalar && !is_q && is_double) {
6726         unallocated_encoding(s);
6727         return;
6728     }
6729
6730     if (!fp_access_check(s)) {
6731         return;
6732     }
6733
6734     assert(!(is_scalar && is_q));
6735
6736     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6737     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6738     tcg_fpstatus = get_fpstatus_ptr();
6739     tcg_shift = tcg_const_i32(fracbits);
6740
6741     if (is_double) {
6742         int maxpass = is_scalar ? 1 : 2;
6743
6744         for (pass = 0; pass < maxpass; pass++) {
6745             TCGv_i64 tcg_op = tcg_temp_new_i64();
6746
6747             read_vec_element(s, tcg_op, rn, pass, MO_64);
6748             if (is_u) {
6749                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6750             } else {
6751                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6752             }
6753             write_vec_element(s, tcg_op, rd, pass, MO_64);
6754             tcg_temp_free_i64(tcg_op);
6755         }
6756         if (!is_q) {
6757             clear_vec_high(s, rd);
6758         }
6759     } else {
6760         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6761         for (pass = 0; pass < maxpass; pass++) {
6762             TCGv_i32 tcg_op = tcg_temp_new_i32();
6763
6764             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6765             if (is_u) {
6766                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6767             } else {
6768                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6769             }
6770             if (is_scalar) {
6771                 write_fp_sreg(s, rd, tcg_op);
6772             } else {
6773                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6774             }
6775             tcg_temp_free_i32(tcg_op);
6776         }
6777         if (!is_q && !is_scalar) {
6778             clear_vec_high(s, rd);
6779         }
6780     }
6781
6782     tcg_temp_free_ptr(tcg_fpstatus);
6783     tcg_temp_free_i32(tcg_shift);
6784     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6785     tcg_temp_free_i32(tcg_rmode);
6786 }
6787
6788 /* C3.6.9 AdvSIMD scalar shift by immediate
6789  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6790  * +-----+---+-------------+------+------+--------+---+------+------+
6791  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6792  * +-----+---+-------------+------+------+--------+---+------+------+
6793  *
6794  * This is the scalar version so it works on a fixed sized registers
6795  */
6796 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6797 {
6798     int rd = extract32(insn, 0, 5);
6799     int rn = extract32(insn, 5, 5);
6800     int opcode = extract32(insn, 11, 5);
6801     int immb = extract32(insn, 16, 3);
6802     int immh = extract32(insn, 19, 4);
6803     bool is_u = extract32(insn, 29, 1);
6804
6805     if (immh == 0) {
6806         unallocated_encoding(s);
6807         return;
6808     }
6809
6810     switch (opcode) {
6811     case 0x08: /* SRI */
6812         if (!is_u) {
6813             unallocated_encoding(s);
6814             return;
6815         }
6816         /* fall through */
6817     case 0x00: /* SSHR / USHR */
6818     case 0x02: /* SSRA / USRA */
6819     case 0x04: /* SRSHR / URSHR */
6820     case 0x06: /* SRSRA / URSRA */
6821         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6822         break;
6823     case 0x0a: /* SHL / SLI */
6824         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6825         break;
6826     case 0x1c: /* SCVTF, UCVTF */
6827         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6828                                      opcode, rn, rd);
6829         break;
6830     case 0x10: /* SQSHRUN, SQSHRUN2 */
6831     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6832         if (!is_u) {
6833             unallocated_encoding(s);
6834             return;
6835         }
6836         handle_vec_simd_sqshrn(s, true, false, false, true,
6837                                immh, immb, opcode, rn, rd);
6838         break;
6839     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6840     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6841         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6842                                immh, immb, opcode, rn, rd);
6843         break;
6844     case 0xc: /* SQSHLU */
6845         if (!is_u) {
6846             unallocated_encoding(s);
6847             return;
6848         }
6849         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6850         break;
6851     case 0xe: /* SQSHL, UQSHL */
6852         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6853         break;
6854     case 0x1f: /* FCVTZS, FCVTZU */
6855         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6856         break;
6857     default:
6858         unallocated_encoding(s);
6859         break;
6860     }
6861 }
6862
6863 /* C3.6.10 AdvSIMD scalar three different
6864  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6865  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6866  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6867  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6868  */
6869 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6870 {
6871     bool is_u = extract32(insn, 29, 1);
6872     int size = extract32(insn, 22, 2);
6873     int opcode = extract32(insn, 12, 4);
6874     int rm = extract32(insn, 16, 5);
6875     int rn = extract32(insn, 5, 5);
6876     int rd = extract32(insn, 0, 5);
6877
6878     if (is_u) {
6879         unallocated_encoding(s);
6880         return;
6881     }
6882
6883     switch (opcode) {
6884     case 0x9: /* SQDMLAL, SQDMLAL2 */
6885     case 0xb: /* SQDMLSL, SQDMLSL2 */
6886     case 0xd: /* SQDMULL, SQDMULL2 */
6887         if (size == 0 || size == 3) {
6888             unallocated_encoding(s);
6889             return;
6890         }
6891         break;
6892     default:
6893         unallocated_encoding(s);
6894         return;
6895     }
6896
6897     if (!fp_access_check(s)) {
6898         return;
6899     }
6900
6901     if (size == 2) {
6902         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6903         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6904         TCGv_i64 tcg_res = tcg_temp_new_i64();
6905
6906         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6907         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6908
6909         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6910         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6911
6912         switch (opcode) {
6913         case 0xd: /* SQDMULL, SQDMULL2 */
6914             break;
6915         case 0xb: /* SQDMLSL, SQDMLSL2 */
6916             tcg_gen_neg_i64(tcg_res, tcg_res);
6917             /* fall through */
6918         case 0x9: /* SQDMLAL, SQDMLAL2 */
6919             read_vec_element(s, tcg_op1, rd, 0, MO_64);
6920             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6921                                               tcg_res, tcg_op1);
6922             break;
6923         default:
6924             g_assert_not_reached();
6925         }
6926
6927         write_fp_dreg(s, rd, tcg_res);
6928
6929         tcg_temp_free_i64(tcg_op1);
6930         tcg_temp_free_i64(tcg_op2);
6931         tcg_temp_free_i64(tcg_res);
6932     } else {
6933         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6934         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6935         TCGv_i64 tcg_res = tcg_temp_new_i64();
6936
6937         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6938         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6939
6940         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6941         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6942
6943         switch (opcode) {
6944         case 0xd: /* SQDMULL, SQDMULL2 */
6945             break;
6946         case 0xb: /* SQDMLSL, SQDMLSL2 */
6947             gen_helper_neon_negl_u32(tcg_res, tcg_res);
6948             /* fall through */
6949         case 0x9: /* SQDMLAL, SQDMLAL2 */
6950         {
6951             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6952             read_vec_element(s, tcg_op3, rd, 0, MO_32);
6953             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6954                                               tcg_res, tcg_op3);
6955             tcg_temp_free_i64(tcg_op3);
6956             break;
6957         }
6958         default:
6959             g_assert_not_reached();
6960         }
6961
6962         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6963         write_fp_dreg(s, rd, tcg_res);
6964
6965         tcg_temp_free_i32(tcg_op1);
6966         tcg_temp_free_i32(tcg_op2);
6967         tcg_temp_free_i64(tcg_res);
6968     }
6969 }
6970
6971 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6972                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6973 {
6974     /* Handle 64x64->64 opcodes which are shared between the scalar
6975      * and vector 3-same groups. We cover every opcode where size == 3
6976      * is valid in either the three-reg-same (integer, not pairwise)
6977      * or scalar-three-reg-same groups. (Some opcodes are not yet
6978      * implemented.)
6979      */
6980     TCGCond cond;
6981
6982     switch (opcode) {
6983     case 0x1: /* SQADD */
6984         if (u) {
6985             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6986         } else {
6987             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6988         }
6989         break;
6990     case 0x5: /* SQSUB */
6991         if (u) {
6992             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6993         } else {
6994             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6995         }
6996         break;
6997     case 0x6: /* CMGT, CMHI */
6998         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6999          * We implement this using setcond (test) and then negating.
7000          */
7001         cond = u ? TCG_COND_GTU : TCG_COND_GT;
7002     do_cmop:
7003         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7004         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7005         break;
7006     case 0x7: /* CMGE, CMHS */
7007         cond = u ? TCG_COND_GEU : TCG_COND_GE;
7008         goto do_cmop;
7009     case 0x11: /* CMTST, CMEQ */
7010         if (u) {
7011             cond = TCG_COND_EQ;
7012             goto do_cmop;
7013         }
7014         /* CMTST : test is "if (X & Y != 0)". */
7015         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7016         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7017         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7018         break;
7019     case 0x8: /* SSHL, USHL */
7020         if (u) {
7021             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7022         } else {
7023             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7024         }
7025         break;
7026     case 0x9: /* SQSHL, UQSHL */
7027         if (u) {
7028             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7029         } else {
7030             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7031         }
7032         break;
7033     case 0xa: /* SRSHL, URSHL */
7034         if (u) {
7035             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7036         } else {
7037             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7038         }
7039         break;
7040     case 0xb: /* SQRSHL, UQRSHL */
7041         if (u) {
7042             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7043         } else {
7044             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7045         }
7046         break;
7047     case 0x10: /* ADD, SUB */
7048         if (u) {
7049             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7050         } else {
7051             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7052         }
7053         break;
7054     default:
7055         g_assert_not_reached();
7056     }
7057 }
7058
7059 /* Handle the 3-same-operands float operations; shared by the scalar
7060  * and vector encodings. The caller must filter out any encodings
7061  * not allocated for the encoding it is dealing with.
7062  */
7063 static void handle_3same_float(DisasContext *s, int size, int elements,
7064                                int fpopcode, int rd, int rn, int rm)
7065 {
7066     int pass;
7067     TCGv_ptr fpst = get_fpstatus_ptr();
7068
7069     for (pass = 0; pass < elements; pass++) {
7070         if (size) {
7071             /* Double */
7072             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7073             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7074             TCGv_i64 tcg_res = tcg_temp_new_i64();
7075
7076             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7077             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7078
7079             switch (fpopcode) {
7080             case 0x39: /* FMLS */
7081                 /* As usual for ARM, separate negation for fused multiply-add */
7082                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7083                 /* fall through */
7084             case 0x19: /* FMLA */
7085                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7086                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7087                                        tcg_res, fpst);
7088                 break;
7089             case 0x18: /* FMAXNM */
7090                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7091                 break;
7092             case 0x1a: /* FADD */
7093                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7094                 break;
7095             case 0x1b: /* FMULX */
7096                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7097                 break;
7098             case 0x1c: /* FCMEQ */
7099                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7100                 break;
7101             case 0x1e: /* FMAX */
7102                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7103                 break;
7104             case 0x1f: /* FRECPS */
7105                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7106                 break;
7107             case 0x38: /* FMINNM */
7108                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7109                 break;
7110             case 0x3a: /* FSUB */
7111                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7112                 break;
7113             case 0x3e: /* FMIN */
7114                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7115                 break;
7116             case 0x3f: /* FRSQRTS */
7117                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7118                 break;
7119             case 0x5b: /* FMUL */
7120                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7121                 break;
7122             case 0x5c: /* FCMGE */
7123                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7124                 break;
7125             case 0x5d: /* FACGE */
7126                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7127                 break;
7128             case 0x5f: /* FDIV */
7129                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7130                 break;
7131             case 0x7a: /* FABD */
7132                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7133                 gen_helper_vfp_absd(tcg_res, tcg_res);
7134                 break;
7135             case 0x7c: /* FCMGT */
7136                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7137                 break;
7138             case 0x7d: /* FACGT */
7139                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7140                 break;
7141             default:
7142                 g_assert_not_reached();
7143             }
7144
7145             write_vec_element(s, tcg_res, rd, pass, MO_64);
7146
7147             tcg_temp_free_i64(tcg_res);
7148             tcg_temp_free_i64(tcg_op1);
7149             tcg_temp_free_i64(tcg_op2);
7150         } else {
7151             /* Single */
7152             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7153             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7154             TCGv_i32 tcg_res = tcg_temp_new_i32();
7155
7156             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7157             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7158
7159             switch (fpopcode) {
7160             case 0x39: /* FMLS */
7161                 /* As usual for ARM, separate negation for fused multiply-add */
7162                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7163                 /* fall through */
7164             case 0x19: /* FMLA */
7165                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7166                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7167                                        tcg_res, fpst);
7168                 break;
7169             case 0x1a: /* FADD */
7170                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7171                 break;
7172             case 0x1b: /* FMULX */
7173                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7174                 break;
7175             case 0x1c: /* FCMEQ */
7176                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7177                 break;
7178             case 0x1e: /* FMAX */
7179                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7180                 break;
7181             case 0x1f: /* FRECPS */
7182                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7183                 break;
7184             case 0x18: /* FMAXNM */
7185                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7186                 break;
7187             case 0x38: /* FMINNM */
7188                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7189                 break;
7190             case 0x3a: /* FSUB */
7191                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7192                 break;
7193             case 0x3e: /* FMIN */
7194                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7195                 break;
7196             case 0x3f: /* FRSQRTS */
7197                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7198                 break;
7199             case 0x5b: /* FMUL */
7200                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7201                 break;
7202             case 0x5c: /* FCMGE */
7203                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7204                 break;
7205             case 0x5d: /* FACGE */
7206                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7207                 break;
7208             case 0x5f: /* FDIV */
7209                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7210                 break;
7211             case 0x7a: /* FABD */
7212                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7213                 gen_helper_vfp_abss(tcg_res, tcg_res);
7214                 break;
7215             case 0x7c: /* FCMGT */
7216                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7217                 break;
7218             case 0x7d: /* FACGT */
7219                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7220                 break;
7221             default:
7222                 g_assert_not_reached();
7223             }
7224
7225             if (elements == 1) {
7226                 /* scalar single so clear high part */
7227                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7228
7229                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7230                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7231                 tcg_temp_free_i64(tcg_tmp);
7232             } else {
7233                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7234             }
7235
7236             tcg_temp_free_i32(tcg_res);
7237             tcg_temp_free_i32(tcg_op1);
7238             tcg_temp_free_i32(tcg_op2);
7239         }
7240     }
7241
7242     tcg_temp_free_ptr(fpst);
7243
7244     if ((elements << size) < 4) {
7245         /* scalar, or non-quad vector op */
7246         clear_vec_high(s, rd);
7247     }
7248 }
7249
7250 /* C3.6.11 AdvSIMD scalar three same
7251  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7252  * +-----+---+-----------+------+---+------+--------+---+------+------+
7253  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7254  * +-----+---+-----------+------+---+------+--------+---+------+------+
7255  */
7256 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7257 {
7258     int rd = extract32(insn, 0, 5);
7259     int rn = extract32(insn, 5, 5);
7260     int opcode = extract32(insn, 11, 5);
7261     int rm = extract32(insn, 16, 5);
7262     int size = extract32(insn, 22, 2);
7263     bool u = extract32(insn, 29, 1);
7264     TCGv_i64 tcg_rd;
7265
7266     if (opcode >= 0x18) {
7267         /* Floating point: U, size[1] and opcode indicate operation */
7268         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7269         switch (fpopcode) {
7270         case 0x1b: /* FMULX */
7271         case 0x1f: /* FRECPS */
7272         case 0x3f: /* FRSQRTS */
7273         case 0x5d: /* FACGE */
7274         case 0x7d: /* FACGT */
7275         case 0x1c: /* FCMEQ */
7276         case 0x5c: /* FCMGE */
7277         case 0x7c: /* FCMGT */
7278         case 0x7a: /* FABD */
7279             break;
7280         default:
7281             unallocated_encoding(s);
7282             return;
7283         }
7284
7285         if (!fp_access_check(s)) {
7286             return;
7287         }
7288
7289         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7290         return;
7291     }
7292
7293     switch (opcode) {
7294     case 0x1: /* SQADD, UQADD */
7295     case 0x5: /* SQSUB, UQSUB */
7296     case 0x9: /* SQSHL, UQSHL */
7297     case 0xb: /* SQRSHL, UQRSHL */
7298         break;
7299     case 0x8: /* SSHL, USHL */
7300     case 0xa: /* SRSHL, URSHL */
7301     case 0x6: /* CMGT, CMHI */
7302     case 0x7: /* CMGE, CMHS */
7303     case 0x11: /* CMTST, CMEQ */
7304     case 0x10: /* ADD, SUB (vector) */
7305         if (size != 3) {
7306             unallocated_encoding(s);
7307             return;
7308         }
7309         break;
7310     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7311         if (size != 1 && size != 2) {
7312             unallocated_encoding(s);
7313             return;
7314         }
7315         break;
7316     default:
7317         unallocated_encoding(s);
7318         return;
7319     }
7320
7321     if (!fp_access_check(s)) {
7322         return;
7323     }
7324
7325     tcg_rd = tcg_temp_new_i64();
7326
7327     if (size == 3) {
7328         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7329         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7330
7331         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7332         tcg_temp_free_i64(tcg_rn);
7333         tcg_temp_free_i64(tcg_rm);
7334     } else {
7335         /* Do a single operation on the lowest element in the vector.
7336          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7337          * no side effects for all these operations.
7338          * OPTME: special-purpose helpers would avoid doing some
7339          * unnecessary work in the helper for the 8 and 16 bit cases.
7340          */
7341         NeonGenTwoOpEnvFn *genenvfn;
7342         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7343         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7344         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7345
7346         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7347         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7348
7349         switch (opcode) {
7350         case 0x1: /* SQADD, UQADD */
7351         {
7352             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7353                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7354                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7355                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7356             };
7357             genenvfn = fns[size][u];
7358             break;
7359         }
7360         case 0x5: /* SQSUB, UQSUB */
7361         {
7362             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7363                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7364                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7365                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7366             };
7367             genenvfn = fns[size][u];
7368             break;
7369         }
7370         case 0x9: /* SQSHL, UQSHL */
7371         {
7372             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7373                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7374                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7375                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7376             };
7377             genenvfn = fns[size][u];
7378             break;
7379         }
7380         case 0xb: /* SQRSHL, UQRSHL */
7381         {
7382             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7383                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7384                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7385                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7386             };
7387             genenvfn = fns[size][u];
7388             break;
7389         }
7390         case 0x16: /* SQDMULH, SQRDMULH */
7391         {
7392             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7393                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7394                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7395             };
7396             assert(size == 1 || size == 2);
7397             genenvfn = fns[size - 1][u];
7398             break;
7399         }
7400         default:
7401             g_assert_not_reached();
7402         }
7403
7404         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7405         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7406         tcg_temp_free_i32(tcg_rd32);
7407         tcg_temp_free_i32(tcg_rn);
7408         tcg_temp_free_i32(tcg_rm);
7409     }
7410
7411     write_fp_dreg(s, rd, tcg_rd);
7412
7413     tcg_temp_free_i64(tcg_rd);
7414 }
7415
7416 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7417                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7418                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7419 {
7420     /* Handle 64->64 opcodes which are shared between the scalar and
7421      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7422      * is valid in either group and also the double-precision fp ops.
7423      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7424      * requires them.
7425      */
7426     TCGCond cond;
7427
7428     switch (opcode) {
7429     case 0x4: /* CLS, CLZ */
7430         if (u) {
7431             gen_helper_clz64(tcg_rd, tcg_rn);
7432         } else {
7433             gen_helper_cls64(tcg_rd, tcg_rn);
7434         }
7435         break;
7436     case 0x5: /* NOT */
7437         /* This opcode is shared with CNT and RBIT but we have earlier
7438          * enforced that size == 3 if and only if this is the NOT insn.
7439          */
7440         tcg_gen_not_i64(tcg_rd, tcg_rn);
7441         break;
7442     case 0x7: /* SQABS, SQNEG */
7443         if (u) {
7444             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7445         } else {
7446             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7447         }
7448         break;
7449     case 0xa: /* CMLT */
7450         /* 64 bit integer comparison against zero, result is
7451          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7452          * subtracting 1.
7453          */
7454         cond = TCG_COND_LT;
7455     do_cmop:
7456         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7457         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7458         break;
7459     case 0x8: /* CMGT, CMGE */
7460         cond = u ? TCG_COND_GE : TCG_COND_GT;
7461         goto do_cmop;
7462     case 0x9: /* CMEQ, CMLE */
7463         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7464         goto do_cmop;
7465     case 0xb: /* ABS, NEG */
7466         if (u) {
7467             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7468         } else {
7469             TCGv_i64 tcg_zero = tcg_const_i64(0);
7470             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7471             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7472                                 tcg_rn, tcg_rd);
7473             tcg_temp_free_i64(tcg_zero);
7474         }
7475         break;
7476     case 0x2f: /* FABS */
7477         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7478         break;
7479     case 0x6f: /* FNEG */
7480         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7481         break;
7482     case 0x7f: /* FSQRT */
7483         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7484         break;
7485     case 0x1a: /* FCVTNS */
7486     case 0x1b: /* FCVTMS */
7487     case 0x1c: /* FCVTAS */
7488     case 0x3a: /* FCVTPS */
7489     case 0x3b: /* FCVTZS */
7490     {
7491         TCGv_i32 tcg_shift = tcg_const_i32(0);
7492         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7493         tcg_temp_free_i32(tcg_shift);
7494         break;
7495     }
7496     case 0x5a: /* FCVTNU */
7497     case 0x5b: /* FCVTMU */
7498     case 0x5c: /* FCVTAU */
7499     case 0x7a: /* FCVTPU */
7500     case 0x7b: /* FCVTZU */
7501     {
7502         TCGv_i32 tcg_shift = tcg_const_i32(0);
7503         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7504         tcg_temp_free_i32(tcg_shift);
7505         break;
7506     }
7507     case 0x18: /* FRINTN */
7508     case 0x19: /* FRINTM */
7509     case 0x38: /* FRINTP */
7510     case 0x39: /* FRINTZ */
7511     case 0x58: /* FRINTA */
7512     case 0x79: /* FRINTI */
7513         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7514         break;
7515     case 0x59: /* FRINTX */
7516         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7517         break;
7518     default:
7519         g_assert_not_reached();
7520     }
7521 }
7522
7523 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7524                                    bool is_scalar, bool is_u, bool is_q,
7525                                    int size, int rn, int rd)
7526 {
7527     bool is_double = (size == 3);
7528     TCGv_ptr fpst;
7529
7530     if (!fp_access_check(s)) {
7531         return;
7532     }
7533
7534     fpst = get_fpstatus_ptr();
7535
7536     if (is_double) {
7537         TCGv_i64 tcg_op = tcg_temp_new_i64();
7538         TCGv_i64 tcg_zero = tcg_const_i64(0);
7539         TCGv_i64 tcg_res = tcg_temp_new_i64();
7540         NeonGenTwoDoubleOPFn *genfn;
7541         bool swap = false;
7542         int pass;
7543
7544         switch (opcode) {
7545         case 0x2e: /* FCMLT (zero) */
7546             swap = true;
7547             /* fallthrough */
7548         case 0x2c: /* FCMGT (zero) */
7549             genfn = gen_helper_neon_cgt_f64;
7550             break;
7551         case 0x2d: /* FCMEQ (zero) */
7552             genfn = gen_helper_neon_ceq_f64;
7553             break;
7554         case 0x6d: /* FCMLE (zero) */
7555             swap = true;
7556             /* fall through */
7557         case 0x6c: /* FCMGE (zero) */
7558             genfn = gen_helper_neon_cge_f64;
7559             break;
7560         default:
7561             g_assert_not_reached();
7562         }
7563
7564         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7565             read_vec_element(s, tcg_op, rn, pass, MO_64);
7566             if (swap) {
7567                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7568             } else {
7569                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7570             }
7571             write_vec_element(s, tcg_res, rd, pass, MO_64);
7572         }
7573         if (is_scalar) {
7574             clear_vec_high(s, rd);
7575         }
7576
7577         tcg_temp_free_i64(tcg_res);
7578         tcg_temp_free_i64(tcg_zero);
7579         tcg_temp_free_i64(tcg_op);
7580     } else {
7581         TCGv_i32 tcg_op = tcg_temp_new_i32();
7582         TCGv_i32 tcg_zero = tcg_const_i32(0);
7583         TCGv_i32 tcg_res = tcg_temp_new_i32();
7584         NeonGenTwoSingleOPFn *genfn;
7585         bool swap = false;
7586         int pass, maxpasses;
7587
7588         switch (opcode) {
7589         case 0x2e: /* FCMLT (zero) */
7590             swap = true;
7591             /* fall through */
7592         case 0x2c: /* FCMGT (zero) */
7593             genfn = gen_helper_neon_cgt_f32;
7594             break;
7595         case 0x2d: /* FCMEQ (zero) */
7596             genfn = gen_helper_neon_ceq_f32;
7597             break;
7598         case 0x6d: /* FCMLE (zero) */
7599             swap = true;
7600             /* fall through */
7601         case 0x6c: /* FCMGE (zero) */
7602             genfn = gen_helper_neon_cge_f32;
7603             break;
7604         default:
7605             g_assert_not_reached();
7606         }
7607
7608         if (is_scalar) {
7609             maxpasses = 1;
7610         } else {
7611             maxpasses = is_q ? 4 : 2;
7612         }
7613
7614         for (pass = 0; pass < maxpasses; pass++) {
7615             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7616             if (swap) {
7617                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7618             } else {
7619                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7620             }
7621             if (is_scalar) {
7622                 write_fp_sreg(s, rd, tcg_res);
7623             } else {
7624                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7625             }
7626         }
7627         tcg_temp_free_i32(tcg_res);
7628         tcg_temp_free_i32(tcg_zero);
7629         tcg_temp_free_i32(tcg_op);
7630         if (!is_q && !is_scalar) {
7631             clear_vec_high(s, rd);
7632         }
7633     }
7634
7635     tcg_temp_free_ptr(fpst);
7636 }
7637
7638 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7639                                     bool is_scalar, bool is_u, bool is_q,
7640                                     int size, int rn, int rd)
7641 {
7642     bool is_double = (size == 3);
7643     TCGv_ptr fpst = get_fpstatus_ptr();
7644
7645     if (is_double) {
7646         TCGv_i64 tcg_op = tcg_temp_new_i64();
7647         TCGv_i64 tcg_res = tcg_temp_new_i64();
7648         int pass;
7649
7650         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7651             read_vec_element(s, tcg_op, rn, pass, MO_64);
7652             switch (opcode) {
7653             case 0x3d: /* FRECPE */
7654                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7655                 break;
7656             case 0x3f: /* FRECPX */
7657                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7658                 break;
7659             case 0x7d: /* FRSQRTE */
7660                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7661                 break;
7662             default:
7663                 g_assert_not_reached();
7664             }
7665             write_vec_element(s, tcg_res, rd, pass, MO_64);
7666         }
7667         if (is_scalar) {
7668             clear_vec_high(s, rd);
7669         }
7670
7671         tcg_temp_free_i64(tcg_res);
7672         tcg_temp_free_i64(tcg_op);
7673     } else {
7674         TCGv_i32 tcg_op = tcg_temp_new_i32();
7675         TCGv_i32 tcg_res = tcg_temp_new_i32();
7676         int pass, maxpasses;
7677
7678         if (is_scalar) {
7679             maxpasses = 1;
7680         } else {
7681             maxpasses = is_q ? 4 : 2;
7682         }
7683
7684         for (pass = 0; pass < maxpasses; pass++) {
7685             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7686
7687             switch (opcode) {
7688             case 0x3c: /* URECPE */
7689                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7690                 break;
7691             case 0x3d: /* FRECPE */
7692                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7693                 break;
7694             case 0x3f: /* FRECPX */
7695                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7696                 break;
7697             case 0x7d: /* FRSQRTE */
7698                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7699                 break;
7700             default:
7701                 g_assert_not_reached();
7702             }
7703
7704             if (is_scalar) {
7705                 write_fp_sreg(s, rd, tcg_res);
7706             } else {
7707                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7708             }
7709         }
7710         tcg_temp_free_i32(tcg_res);
7711         tcg_temp_free_i32(tcg_op);
7712         if (!is_q && !is_scalar) {
7713             clear_vec_high(s, rd);
7714         }
7715     }
7716     tcg_temp_free_ptr(fpst);
7717 }
7718
7719 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7720                                 int opcode, bool u, bool is_q,
7721                                 int size, int rn, int rd)
7722 {
7723     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7724      * in the source becomes a size element in the destination).
7725      */
7726     int pass;
7727     TCGv_i32 tcg_res[2];
7728     int destelt = is_q ? 2 : 0;
7729     int passes = scalar ? 1 : 2;
7730
7731     if (scalar) {
7732         tcg_res[1] = tcg_const_i32(0);
7733     }
7734
7735     for (pass = 0; pass < passes; pass++) {
7736         TCGv_i64 tcg_op = tcg_temp_new_i64();
7737         NeonGenNarrowFn *genfn = NULL;
7738         NeonGenNarrowEnvFn *genenvfn = NULL;
7739
7740         if (scalar) {
7741             read_vec_element(s, tcg_op, rn, pass, size + 1);
7742         } else {
7743             read_vec_element(s, tcg_op, rn, pass, MO_64);
7744         }
7745         tcg_res[pass] = tcg_temp_new_i32();
7746
7747         switch (opcode) {
7748         case 0x12: /* XTN, SQXTUN */
7749         {
7750             static NeonGenNarrowFn * const xtnfns[3] = {
7751                 gen_helper_neon_narrow_u8,
7752                 gen_helper_neon_narrow_u16,
7753                 tcg_gen_extrl_i64_i32,
7754             };
7755             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7756                 gen_helper_neon_unarrow_sat8,
7757                 gen_helper_neon_unarrow_sat16,
7758                 gen_helper_neon_unarrow_sat32,
7759             };
7760             if (u) {
7761                 genenvfn = sqxtunfns[size];
7762             } else {
7763                 genfn = xtnfns[size];
7764             }
7765             break;
7766         }
7767         case 0x14: /* SQXTN, UQXTN */
7768         {
7769             static NeonGenNarrowEnvFn * const fns[3][2] = {
7770                 { gen_helper_neon_narrow_sat_s8,
7771                   gen_helper_neon_narrow_sat_u8 },
7772                 { gen_helper_neon_narrow_sat_s16,
7773                   gen_helper_neon_narrow_sat_u16 },
7774                 { gen_helper_neon_narrow_sat_s32,
7775                   gen_helper_neon_narrow_sat_u32 },
7776             };
7777             genenvfn = fns[size][u];
7778             break;
7779         }
7780         case 0x16: /* FCVTN, FCVTN2 */
7781             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7782             if (size == 2) {
7783                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7784             } else {
7785                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7786                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7787                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7788                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7789                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7790                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7791                 tcg_temp_free_i32(tcg_lo);
7792                 tcg_temp_free_i32(tcg_hi);
7793             }
7794             break;
7795         case 0x56:  /* FCVTXN, FCVTXN2 */
7796             /* 64 bit to 32 bit float conversion
7797              * with von Neumann rounding (round to odd)
7798              */
7799             assert(size == 2);
7800             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7801             break;
7802         default:
7803             g_assert_not_reached();
7804         }
7805
7806         if (genfn) {
7807             genfn(tcg_res[pass], tcg_op);
7808         } else if (genenvfn) {
7809             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7810         }
7811
7812         tcg_temp_free_i64(tcg_op);
7813     }
7814
7815     for (pass = 0; pass < 2; pass++) {
7816         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7817         tcg_temp_free_i32(tcg_res[pass]);
7818     }
7819     if (!is_q) {
7820         clear_vec_high(s, rd);
7821     }
7822 }
7823
7824 /* Remaining saturating accumulating ops */
7825 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7826                                 bool is_q, int size, int rn, int rd)
7827 {
7828     bool is_double = (size == 3);
7829
7830     if (is_double) {
7831         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7832         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7833         int pass;
7834
7835         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7836             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7837             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7838
7839             if (is_u) { /* USQADD */
7840                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7841             } else { /* SUQADD */
7842                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7843             }
7844             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7845         }
7846         if (is_scalar) {
7847             clear_vec_high(s, rd);
7848         }
7849
7850         tcg_temp_free_i64(tcg_rd);
7851         tcg_temp_free_i64(tcg_rn);
7852     } else {
7853         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7854         TCGv_i32 tcg_rd = tcg_temp_new_i32();
7855         int pass, maxpasses;
7856
7857         if (is_scalar) {
7858             maxpasses = 1;
7859         } else {
7860             maxpasses = is_q ? 4 : 2;
7861         }
7862
7863         for (pass = 0; pass < maxpasses; pass++) {
7864             if (is_scalar) {
7865                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7866                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7867             } else {
7868                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7869                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7870             }
7871
7872             if (is_u) { /* USQADD */
7873                 switch (size) {
7874                 case 0:
7875                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7876                     break;
7877                 case 1:
7878                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7879                     break;
7880                 case 2:
7881                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7882                     break;
7883                 default:
7884                     g_assert_not_reached();
7885                 }
7886             } else { /* SUQADD */
7887                 switch (size) {
7888                 case 0:
7889                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7890                     break;
7891                 case 1:
7892                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7893                     break;
7894                 case 2:
7895                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7896                     break;
7897                 default:
7898                     g_assert_not_reached();
7899                 }
7900             }
7901
7902             if (is_scalar) {
7903                 TCGv_i64 tcg_zero = tcg_const_i64(0);
7904                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7905                 tcg_temp_free_i64(tcg_zero);
7906             }
7907             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7908         }
7909
7910         if (!is_q) {
7911             clear_vec_high(s, rd);
7912         }
7913
7914         tcg_temp_free_i32(tcg_rd);
7915         tcg_temp_free_i32(tcg_rn);
7916     }
7917 }
7918
7919 /* C3.6.12 AdvSIMD scalar two reg misc
7920  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7921  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7922  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7923  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7924  */
7925 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7926 {
7927     int rd = extract32(insn, 0, 5);
7928     int rn = extract32(insn, 5, 5);
7929     int opcode = extract32(insn, 12, 5);
7930     int size = extract32(insn, 22, 2);
7931     bool u = extract32(insn, 29, 1);
7932     bool is_fcvt = false;
7933     int rmode;
7934     TCGv_i32 tcg_rmode;
7935     TCGv_ptr tcg_fpstatus;
7936
7937     switch (opcode) {
7938     case 0x3: /* USQADD / SUQADD*/
7939         if (!fp_access_check(s)) {
7940             return;
7941         }
7942         handle_2misc_satacc(s, true, u, false, size, rn, rd);
7943         return;
7944     case 0x7: /* SQABS / SQNEG */
7945         break;
7946     case 0xa: /* CMLT */
7947         if (u) {
7948             unallocated_encoding(s);
7949             return;
7950         }
7951         /* fall through */
7952     case 0x8: /* CMGT, CMGE */
7953     case 0x9: /* CMEQ, CMLE */
7954     case 0xb: /* ABS, NEG */
7955         if (size != 3) {
7956             unallocated_encoding(s);
7957             return;
7958         }
7959         break;
7960     case 0x12: /* SQXTUN */
7961         if (!u) {
7962             unallocated_encoding(s);
7963             return;
7964         }
7965         /* fall through */
7966     case 0x14: /* SQXTN, UQXTN */
7967         if (size == 3) {
7968             unallocated_encoding(s);
7969             return;
7970         }
7971         if (!fp_access_check(s)) {
7972             return;
7973         }
7974         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7975         return;
7976     case 0xc ... 0xf:
7977     case 0x16 ... 0x1d:
7978     case 0x1f:
7979         /* Floating point: U, size[1] and opcode indicate operation;
7980          * size[0] indicates single or double precision.
7981          */
7982         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7983         size = extract32(size, 0, 1) ? 3 : 2;
7984         switch (opcode) {
7985         case 0x2c: /* FCMGT (zero) */
7986         case 0x2d: /* FCMEQ (zero) */
7987         case 0x2e: /* FCMLT (zero) */
7988         case 0x6c: /* FCMGE (zero) */
7989         case 0x6d: /* FCMLE (zero) */
7990             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7991             return;
7992         case 0x1d: /* SCVTF */
7993         case 0x5d: /* UCVTF */
7994         {
7995             bool is_signed = (opcode == 0x1d);
7996             if (!fp_access_check(s)) {
7997                 return;
7998             }
7999             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8000             return;
8001         }
8002         case 0x3d: /* FRECPE */
8003         case 0x3f: /* FRECPX */
8004         case 0x7d: /* FRSQRTE */
8005             if (!fp_access_check(s)) {
8006                 return;
8007             }
8008             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8009             return;
8010         case 0x1a: /* FCVTNS */
8011         case 0x1b: /* FCVTMS */
8012         case 0x3a: /* FCVTPS */
8013         case 0x3b: /* FCVTZS */
8014         case 0x5a: /* FCVTNU */
8015         case 0x5b: /* FCVTMU */
8016         case 0x7a: /* FCVTPU */
8017         case 0x7b: /* FCVTZU */
8018             is_fcvt = true;
8019             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8020             break;
8021         case 0x1c: /* FCVTAS */
8022         case 0x5c: /* FCVTAU */
8023             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8024             is_fcvt = true;
8025             rmode = FPROUNDING_TIEAWAY;
8026             break;
8027         case 0x56: /* FCVTXN, FCVTXN2 */
8028             if (size == 2) {
8029                 unallocated_encoding(s);
8030                 return;
8031             }
8032             if (!fp_access_check(s)) {
8033                 return;
8034             }
8035             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8036             return;
8037         default:
8038             unallocated_encoding(s);
8039             return;
8040         }
8041         break;
8042     default:
8043         unallocated_encoding(s);
8044         return;
8045     }
8046
8047     if (!fp_access_check(s)) {
8048         return;
8049     }
8050
8051     if (is_fcvt) {
8052         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8053         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8054         tcg_fpstatus = get_fpstatus_ptr();
8055     } else {
8056         TCGV_UNUSED_I32(tcg_rmode);
8057         TCGV_UNUSED_PTR(tcg_fpstatus);
8058     }
8059
8060     if (size == 3) {
8061         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8062         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8063
8064         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8065         write_fp_dreg(s, rd, tcg_rd);
8066         tcg_temp_free_i64(tcg_rd);
8067         tcg_temp_free_i64(tcg_rn);
8068     } else {
8069         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8070         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8071
8072         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8073
8074         switch (opcode) {
8075         case 0x7: /* SQABS, SQNEG */
8076         {
8077             NeonGenOneOpEnvFn *genfn;
8078             static NeonGenOneOpEnvFn * const fns[3][2] = {
8079                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8080                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8081                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8082             };
8083             genfn = fns[size][u];
8084             genfn(tcg_rd, cpu_env, tcg_rn);
8085             break;
8086         }
8087         case 0x1a: /* FCVTNS */
8088         case 0x1b: /* FCVTMS */
8089         case 0x1c: /* FCVTAS */
8090         case 0x3a: /* FCVTPS */
8091         case 0x3b: /* FCVTZS */
8092         {
8093             TCGv_i32 tcg_shift = tcg_const_i32(0);
8094             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8095             tcg_temp_free_i32(tcg_shift);
8096             break;
8097         }
8098         case 0x5a: /* FCVTNU */
8099         case 0x5b: /* FCVTMU */
8100         case 0x5c: /* FCVTAU */
8101         case 0x7a: /* FCVTPU */
8102         case 0x7b: /* FCVTZU */
8103         {
8104             TCGv_i32 tcg_shift = tcg_const_i32(0);
8105             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8106             tcg_temp_free_i32(tcg_shift);
8107             break;
8108         }
8109         default:
8110             g_assert_not_reached();
8111         }
8112
8113         write_fp_sreg(s, rd, tcg_rd);
8114         tcg_temp_free_i32(tcg_rd);
8115         tcg_temp_free_i32(tcg_rn);
8116     }
8117
8118     if (is_fcvt) {
8119         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8120         tcg_temp_free_i32(tcg_rmode);
8121         tcg_temp_free_ptr(tcg_fpstatus);
8122     }
8123 }
8124
8125 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8126 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8127                                  int immh, int immb, int opcode, int rn, int rd)
8128 {
8129     int size = 32 - clz32(immh) - 1;
8130     int immhb = immh << 3 | immb;
8131     int shift = 2 * (8 << size) - immhb;
8132     bool accumulate = false;
8133     bool round = false;
8134     bool insert = false;
8135     int dsize = is_q ? 128 : 64;
8136     int esize = 8 << size;
8137     int elements = dsize/esize;
8138     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8139     TCGv_i64 tcg_rn = new_tmp_a64(s);
8140     TCGv_i64 tcg_rd = new_tmp_a64(s);
8141     TCGv_i64 tcg_round;
8142     int i;
8143
8144     if (extract32(immh, 3, 1) && !is_q) {
8145         unallocated_encoding(s);
8146         return;
8147     }
8148
8149     if (size > 3 && !is_q) {
8150         unallocated_encoding(s);
8151         return;
8152     }
8153
8154     if (!fp_access_check(s)) {
8155         return;
8156     }
8157
8158     switch (opcode) {
8159     case 0x02: /* SSRA / USRA (accumulate) */
8160         accumulate = true;
8161         break;
8162     case 0x04: /* SRSHR / URSHR (rounding) */
8163         round = true;
8164         break;
8165     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8166         accumulate = round = true;
8167         break;
8168     case 0x08: /* SRI */
8169         insert = true;
8170         break;
8171     }
8172
8173     if (round) {
8174         uint64_t round_const = 1ULL << (shift - 1);
8175         tcg_round = tcg_const_i64(round_const);
8176     } else {
8177         TCGV_UNUSED_I64(tcg_round);
8178     }
8179
8180     for (i = 0; i < elements; i++) {
8181         read_vec_element(s, tcg_rn, rn, i, memop);
8182         if (accumulate || insert) {
8183             read_vec_element(s, tcg_rd, rd, i, memop);
8184         }
8185
8186         if (insert) {
8187             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8188         } else {
8189             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8190                                     accumulate, is_u, size, shift);
8191         }
8192
8193         write_vec_element(s, tcg_rd, rd, i, size);
8194     }
8195
8196     if (!is_q) {
8197         clear_vec_high(s, rd);
8198     }
8199
8200     if (round) {
8201         tcg_temp_free_i64(tcg_round);
8202     }
8203 }
8204
8205 /* SHL/SLI - Vector shift left */
8206 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8207                                 int immh, int immb, int opcode, int rn, int rd)
8208 {
8209     int size = 32 - clz32(immh) - 1;
8210     int immhb = immh << 3 | immb;
8211     int shift = immhb - (8 << size);
8212     int dsize = is_q ? 128 : 64;
8213     int esize = 8 << size;
8214     int elements = dsize/esize;
8215     TCGv_i64 tcg_rn = new_tmp_a64(s);
8216     TCGv_i64 tcg_rd = new_tmp_a64(s);
8217     int i;
8218
8219     if (extract32(immh, 3, 1) && !is_q) {
8220         unallocated_encoding(s);
8221         return;
8222     }
8223
8224     if (size > 3 && !is_q) {
8225         unallocated_encoding(s);
8226         return;
8227     }
8228
8229     if (!fp_access_check(s)) {
8230         return;
8231     }
8232
8233     for (i = 0; i < elements; i++) {
8234         read_vec_element(s, tcg_rn, rn, i, size);
8235         if (insert) {
8236             read_vec_element(s, tcg_rd, rd, i, size);
8237         }
8238
8239         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8240
8241         write_vec_element(s, tcg_rd, rd, i, size);
8242     }
8243
8244     if (!is_q) {
8245         clear_vec_high(s, rd);
8246     }
8247 }
8248
8249 /* USHLL/SHLL - Vector shift left with widening */
8250 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8251                                  int immh, int immb, int opcode, int rn, int rd)
8252 {
8253     int size = 32 - clz32(immh) - 1;
8254     int immhb = immh << 3 | immb;
8255     int shift = immhb - (8 << size);
8256     int dsize = 64;
8257     int esize = 8 << size;
8258     int elements = dsize/esize;
8259     TCGv_i64 tcg_rn = new_tmp_a64(s);
8260     TCGv_i64 tcg_rd = new_tmp_a64(s);
8261     int i;
8262
8263     if (size >= 3) {
8264         unallocated_encoding(s);
8265         return;
8266     }
8267
8268     if (!fp_access_check(s)) {
8269         return;
8270     }
8271
8272     /* For the LL variants the store is larger than the load,
8273      * so if rd == rn we would overwrite parts of our input.
8274      * So load everything right now and use shifts in the main loop.
8275      */
8276     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8277
8278     for (i = 0; i < elements; i++) {
8279         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8280         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8281         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8282         write_vec_element(s, tcg_rd, rd, i, size + 1);
8283     }
8284 }
8285
8286 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8287 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8288                                  int immh, int immb, int opcode, int rn, int rd)
8289 {
8290     int immhb = immh << 3 | immb;
8291     int size = 32 - clz32(immh) - 1;
8292     int dsize = 64;
8293     int esize = 8 << size;
8294     int elements = dsize/esize;
8295     int shift = (2 * esize) - immhb;
8296     bool round = extract32(opcode, 0, 1);
8297     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8298     TCGv_i64 tcg_round;
8299     int i;
8300
8301     if (extract32(immh, 3, 1)) {
8302         unallocated_encoding(s);
8303         return;
8304     }
8305
8306     if (!fp_access_check(s)) {
8307         return;
8308     }
8309
8310     tcg_rn = tcg_temp_new_i64();
8311     tcg_rd = tcg_temp_new_i64();
8312     tcg_final = tcg_temp_new_i64();
8313     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8314
8315     if (round) {
8316         uint64_t round_const = 1ULL << (shift - 1);
8317         tcg_round = tcg_const_i64(round_const);
8318     } else {
8319         TCGV_UNUSED_I64(tcg_round);
8320     }
8321
8322     for (i = 0; i < elements; i++) {
8323         read_vec_element(s, tcg_rn, rn, i, size+1);
8324         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8325                                 false, true, size+1, shift);
8326
8327         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8328     }
8329
8330     if (!is_q) {
8331         clear_vec_high(s, rd);
8332         write_vec_element(s, tcg_final, rd, 0, MO_64);
8333     } else {
8334         write_vec_element(s, tcg_final, rd, 1, MO_64);
8335     }
8336
8337     if (round) {
8338         tcg_temp_free_i64(tcg_round);
8339     }
8340     tcg_temp_free_i64(tcg_rn);
8341     tcg_temp_free_i64(tcg_rd);
8342     tcg_temp_free_i64(tcg_final);
8343     return;
8344 }
8345
8346
8347 /* C3.6.14 AdvSIMD shift by immediate
8348  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8349  * +---+---+---+-------------+------+------+--------+---+------+------+
8350  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8351  * +---+---+---+-------------+------+------+--------+---+------+------+
8352  */
8353 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8354 {
8355     int rd = extract32(insn, 0, 5);
8356     int rn = extract32(insn, 5, 5);
8357     int opcode = extract32(insn, 11, 5);
8358     int immb = extract32(insn, 16, 3);
8359     int immh = extract32(insn, 19, 4);
8360     bool is_u = extract32(insn, 29, 1);
8361     bool is_q = extract32(insn, 30, 1);
8362
8363     switch (opcode) {
8364     case 0x08: /* SRI */
8365         if (!is_u) {
8366             unallocated_encoding(s);
8367             return;
8368         }
8369         /* fall through */
8370     case 0x00: /* SSHR / USHR */
8371     case 0x02: /* SSRA / USRA (accumulate) */
8372     case 0x04: /* SRSHR / URSHR (rounding) */
8373     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8374         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8375         break;
8376     case 0x0a: /* SHL / SLI */
8377         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8378         break;
8379     case 0x10: /* SHRN */
8380     case 0x11: /* RSHRN / SQRSHRUN */
8381         if (is_u) {
8382             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8383                                    opcode, rn, rd);
8384         } else {
8385             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8386         }
8387         break;
8388     case 0x12: /* SQSHRN / UQSHRN */
8389     case 0x13: /* SQRSHRN / UQRSHRN */
8390         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8391                                opcode, rn, rd);
8392         break;
8393     case 0x14: /* SSHLL / USHLL */
8394         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8395         break;
8396     case 0x1c: /* SCVTF / UCVTF */
8397         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8398                                      opcode, rn, rd);
8399         break;
8400     case 0xc: /* SQSHLU */
8401         if (!is_u) {
8402             unallocated_encoding(s);
8403             return;
8404         }
8405         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8406         break;
8407     case 0xe: /* SQSHL, UQSHL */
8408         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8409         break;
8410     case 0x1f: /* FCVTZS/ FCVTZU */
8411         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8412         return;
8413     default:
8414         unallocated_encoding(s);
8415         return;
8416     }
8417 }
8418
8419 /* Generate code to do a "long" addition or subtraction, ie one done in
8420  * TCGv_i64 on vector lanes twice the width specified by size.
8421  */
8422 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8423                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8424 {
8425     static NeonGenTwo64OpFn * const fns[3][2] = {
8426         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8427         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8428         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8429     };
8430     NeonGenTwo64OpFn *genfn;
8431     assert(size < 3);
8432
8433     genfn = fns[size][is_sub];
8434     genfn(tcg_res, tcg_op1, tcg_op2);
8435 }
8436
8437 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8438                                 int opcode, int rd, int rn, int rm)
8439 {
8440     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8441     TCGv_i64 tcg_res[2];
8442     int pass, accop;
8443
8444     tcg_res[0] = tcg_temp_new_i64();
8445     tcg_res[1] = tcg_temp_new_i64();
8446
8447     /* Does this op do an adding accumulate, a subtracting accumulate,
8448      * or no accumulate at all?
8449      */
8450     switch (opcode) {
8451     case 5:
8452     case 8:
8453     case 9:
8454         accop = 1;
8455         break;
8456     case 10:
8457     case 11:
8458         accop = -1;
8459         break;
8460     default:
8461         accop = 0;
8462         break;
8463     }
8464
8465     if (accop != 0) {
8466         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8467         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8468     }
8469
8470     /* size == 2 means two 32x32->64 operations; this is worth special
8471      * casing because we can generally handle it inline.
8472      */
8473     if (size == 2) {
8474         for (pass = 0; pass < 2; pass++) {
8475             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8476             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8477             TCGv_i64 tcg_passres;
8478             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8479
8480             int elt = pass + is_q * 2;
8481
8482             read_vec_element(s, tcg_op1, rn, elt, memop);
8483             read_vec_element(s, tcg_op2, rm, elt, memop);
8484
8485             if (accop == 0) {
8486                 tcg_passres = tcg_res[pass];
8487             } else {
8488                 tcg_passres = tcg_temp_new_i64();
8489             }
8490
8491             switch (opcode) {
8492             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8493                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8494                 break;
8495             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8496                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8497                 break;
8498             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8499             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8500             {
8501                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8502                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8503
8504                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8505                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8506                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8507                                     tcg_passres,
8508                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8509                 tcg_temp_free_i64(tcg_tmp1);
8510                 tcg_temp_free_i64(tcg_tmp2);
8511                 break;
8512             }
8513             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8514             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8515             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8516                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8517                 break;
8518             case 9: /* SQDMLAL, SQDMLAL2 */
8519             case 11: /* SQDMLSL, SQDMLSL2 */
8520             case 13: /* SQDMULL, SQDMULL2 */
8521                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8522                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8523                                                   tcg_passres, tcg_passres);
8524                 break;
8525             default:
8526                 g_assert_not_reached();
8527             }
8528
8529             if (opcode == 9 || opcode == 11) {
8530                 /* saturating accumulate ops */
8531                 if (accop < 0) {
8532                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8533                 }
8534                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8535                                                   tcg_res[pass], tcg_passres);
8536             } else if (accop > 0) {
8537                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8538             } else if (accop < 0) {
8539                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8540             }
8541
8542             if (accop != 0) {
8543                 tcg_temp_free_i64(tcg_passres);
8544             }
8545
8546             tcg_temp_free_i64(tcg_op1);
8547             tcg_temp_free_i64(tcg_op2);
8548         }
8549     } else {
8550         /* size 0 or 1, generally helper functions */
8551         for (pass = 0; pass < 2; pass++) {
8552             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8553             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8554             TCGv_i64 tcg_passres;
8555             int elt = pass + is_q * 2;
8556
8557             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8558             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8559
8560             if (accop == 0) {
8561                 tcg_passres = tcg_res[pass];
8562             } else {
8563                 tcg_passres = tcg_temp_new_i64();
8564             }
8565
8566             switch (opcode) {
8567             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8568             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8569             {
8570                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8571                 static NeonGenWidenFn * const widenfns[2][2] = {
8572                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8573                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8574                 };
8575                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8576
8577                 widenfn(tcg_op2_64, tcg_op2);
8578                 widenfn(tcg_passres, tcg_op1);
8579                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8580                               tcg_passres, tcg_op2_64);
8581                 tcg_temp_free_i64(tcg_op2_64);
8582                 break;
8583             }
8584             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8585             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8586                 if (size == 0) {
8587                     if (is_u) {
8588                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8589                     } else {
8590                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8591                     }
8592                 } else {
8593                     if (is_u) {
8594                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8595                     } else {
8596                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8597                     }
8598                 }
8599                 break;
8600             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8601             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8602             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8603                 if (size == 0) {
8604                     if (is_u) {
8605                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8606                     } else {
8607                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8608                     }
8609                 } else {
8610                     if (is_u) {
8611                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8612                     } else {
8613                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8614                     }
8615                 }
8616                 break;
8617             case 9: /* SQDMLAL, SQDMLAL2 */
8618             case 11: /* SQDMLSL, SQDMLSL2 */
8619             case 13: /* SQDMULL, SQDMULL2 */
8620                 assert(size == 1);
8621                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8622                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8623                                                   tcg_passres, tcg_passres);
8624                 break;
8625             case 14: /* PMULL */
8626                 assert(size == 0);
8627                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8628                 break;
8629             default:
8630                 g_assert_not_reached();
8631             }
8632             tcg_temp_free_i32(tcg_op1);
8633             tcg_temp_free_i32(tcg_op2);
8634
8635             if (accop != 0) {
8636                 if (opcode == 9 || opcode == 11) {
8637                     /* saturating accumulate ops */
8638                     if (accop < 0) {
8639                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8640                     }
8641                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8642                                                       tcg_res[pass],
8643                                                       tcg_passres);
8644                 } else {
8645                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8646                                   tcg_res[pass], tcg_passres);
8647                 }
8648                 tcg_temp_free_i64(tcg_passres);
8649             }
8650         }
8651     }
8652
8653     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8654     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8655     tcg_temp_free_i64(tcg_res[0]);
8656     tcg_temp_free_i64(tcg_res[1]);
8657 }
8658
8659 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8660                             int opcode, int rd, int rn, int rm)
8661 {
8662     TCGv_i64 tcg_res[2];
8663     int part = is_q ? 2 : 0;
8664     int pass;
8665
8666     for (pass = 0; pass < 2; pass++) {
8667         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8668         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8669         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8670         static NeonGenWidenFn * const widenfns[3][2] = {
8671             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8672             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8673             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8674         };
8675         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8676
8677         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8678         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8679         widenfn(tcg_op2_wide, tcg_op2);
8680         tcg_temp_free_i32(tcg_op2);
8681         tcg_res[pass] = tcg_temp_new_i64();
8682         gen_neon_addl(size, (opcode == 3),
8683                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8684         tcg_temp_free_i64(tcg_op1);
8685         tcg_temp_free_i64(tcg_op2_wide);
8686     }
8687
8688     for (pass = 0; pass < 2; pass++) {
8689         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8690         tcg_temp_free_i64(tcg_res[pass]);
8691     }
8692 }
8693
8694 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8695 {
8696     tcg_gen_addi_i64(in, in, 1U << 31);
8697     tcg_gen_extrh_i64_i32(res, in);
8698 }
8699
8700 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8701                                  int opcode, int rd, int rn, int rm)
8702 {
8703     TCGv_i32 tcg_res[2];
8704     int part = is_q ? 2 : 0;
8705     int pass;
8706
8707     for (pass = 0; pass < 2; pass++) {
8708         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8709         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8710         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8711         static NeonGenNarrowFn * const narrowfns[3][2] = {
8712             { gen_helper_neon_narrow_high_u8,
8713               gen_helper_neon_narrow_round_high_u8 },
8714             { gen_helper_neon_narrow_high_u16,
8715               gen_helper_neon_narrow_round_high_u16 },
8716             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8717         };
8718         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8719
8720         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8721         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8722
8723         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8724
8725         tcg_temp_free_i64(tcg_op1);
8726         tcg_temp_free_i64(tcg_op2);
8727
8728         tcg_res[pass] = tcg_temp_new_i32();
8729         gennarrow(tcg_res[pass], tcg_wideres);
8730         tcg_temp_free_i64(tcg_wideres);
8731     }
8732
8733     for (pass = 0; pass < 2; pass++) {
8734         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8735         tcg_temp_free_i32(tcg_res[pass]);
8736     }
8737     if (!is_q) {
8738         clear_vec_high(s, rd);
8739     }
8740 }
8741
8742 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8743 {
8744     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8745      * is the only three-reg-diff instruction which produces a
8746      * 128-bit wide result from a single operation. However since
8747      * it's possible to calculate the two halves more or less
8748      * separately we just use two helper calls.
8749      */
8750     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8751     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8752     TCGv_i64 tcg_res = tcg_temp_new_i64();
8753
8754     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8755     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8756     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8757     write_vec_element(s, tcg_res, rd, 0, MO_64);
8758     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8759     write_vec_element(s, tcg_res, rd, 1, MO_64);
8760
8761     tcg_temp_free_i64(tcg_op1);
8762     tcg_temp_free_i64(tcg_op2);
8763     tcg_temp_free_i64(tcg_res);
8764 }
8765
8766 /* C3.6.15 AdvSIMD three different
8767  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8768  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8769  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8770  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8771  */
8772 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8773 {
8774     /* Instructions in this group fall into three basic classes
8775      * (in each case with the operation working on each element in
8776      * the input vectors):
8777      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8778      *     128 bit input)
8779      * (2) wide 64 x 128 -> 128
8780      * (3) narrowing 128 x 128 -> 64
8781      * Here we do initial decode, catch unallocated cases and
8782      * dispatch to separate functions for each class.
8783      */
8784     int is_q = extract32(insn, 30, 1);
8785     int is_u = extract32(insn, 29, 1);
8786     int size = extract32(insn, 22, 2);
8787     int opcode = extract32(insn, 12, 4);
8788     int rm = extract32(insn, 16, 5);
8789     int rn = extract32(insn, 5, 5);
8790     int rd = extract32(insn, 0, 5);
8791
8792     switch (opcode) {
8793     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8794     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8795         /* 64 x 128 -> 128 */
8796         if (size == 3) {
8797             unallocated_encoding(s);
8798             return;
8799         }
8800         if (!fp_access_check(s)) {
8801             return;
8802         }
8803         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8804         break;
8805     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8806     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8807         /* 128 x 128 -> 64 */
8808         if (size == 3) {
8809             unallocated_encoding(s);
8810             return;
8811         }
8812         if (!fp_access_check(s)) {
8813             return;
8814         }
8815         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8816         break;
8817     case 14: /* PMULL, PMULL2 */
8818         if (is_u || size == 1 || size == 2) {
8819             unallocated_encoding(s);
8820             return;
8821         }
8822         if (size == 3) {
8823             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8824                 unallocated_encoding(s);
8825                 return;
8826             }
8827             if (!fp_access_check(s)) {
8828                 return;
8829             }
8830             handle_pmull_64(s, is_q, rd, rn, rm);
8831             return;
8832         }
8833         goto is_widening;
8834     case 9: /* SQDMLAL, SQDMLAL2 */
8835     case 11: /* SQDMLSL, SQDMLSL2 */
8836     case 13: /* SQDMULL, SQDMULL2 */
8837         if (is_u || size == 0) {
8838             unallocated_encoding(s);
8839             return;
8840         }
8841         /* fall through */
8842     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8843     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8844     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8845     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8846     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8847     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8848     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8849         /* 64 x 64 -> 128 */
8850         if (size == 3) {
8851             unallocated_encoding(s);
8852             return;
8853         }
8854     is_widening:
8855         if (!fp_access_check(s)) {
8856             return;
8857         }
8858
8859         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8860         break;
8861     default:
8862         /* opcode 15 not allocated */
8863         unallocated_encoding(s);
8864         break;
8865     }
8866 }
8867
8868 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8869 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8870 {
8871     int rd = extract32(insn, 0, 5);
8872     int rn = extract32(insn, 5, 5);
8873     int rm = extract32(insn, 16, 5);
8874     int size = extract32(insn, 22, 2);
8875     bool is_u = extract32(insn, 29, 1);
8876     bool is_q = extract32(insn, 30, 1);
8877     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8878     int pass;
8879
8880     if (!fp_access_check(s)) {
8881         return;
8882     }
8883
8884     tcg_op1 = tcg_temp_new_i64();
8885     tcg_op2 = tcg_temp_new_i64();
8886     tcg_res[0] = tcg_temp_new_i64();
8887     tcg_res[1] = tcg_temp_new_i64();
8888
8889     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8890         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8891         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8892
8893         if (!is_u) {
8894             switch (size) {
8895             case 0: /* AND */
8896                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8897                 break;
8898             case 1: /* BIC */
8899                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8900                 break;
8901             case 2: /* ORR */
8902                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8903                 break;
8904             case 3: /* ORN */
8905                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8906                 break;
8907             }
8908         } else {
8909             if (size != 0) {
8910                 /* B* ops need res loaded to operate on */
8911                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8912             }
8913
8914             switch (size) {
8915             case 0: /* EOR */
8916                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8917                 break;
8918             case 1: /* BSL bitwise select */
8919                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8920                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8921                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8922                 break;
8923             case 2: /* BIT, bitwise insert if true */
8924                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8925                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8926                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8927                 break;
8928             case 3: /* BIF, bitwise insert if false */
8929                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8930                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8931                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8932                 break;
8933             }
8934         }
8935     }
8936
8937     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8938     if (!is_q) {
8939         tcg_gen_movi_i64(tcg_res[1], 0);
8940     }
8941     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8942
8943     tcg_temp_free_i64(tcg_op1);
8944     tcg_temp_free_i64(tcg_op2);
8945     tcg_temp_free_i64(tcg_res[0]);
8946     tcg_temp_free_i64(tcg_res[1]);
8947 }
8948
8949 /* Helper functions for 32 bit comparisons */
8950 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8951 {
8952     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8953 }
8954
8955 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8956 {
8957     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8958 }
8959
8960 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8961 {
8962     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8963 }
8964
8965 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8966 {
8967     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8968 }
8969
8970 /* Pairwise op subgroup of C3.6.16.
8971  *
8972  * This is called directly or via the handle_3same_float for float pairwise
8973  * operations where the opcode and size are calculated differently.
8974  */
8975 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8976                                    int size, int rn, int rm, int rd)
8977 {
8978     TCGv_ptr fpst;
8979     int pass;
8980
8981     /* Floating point operations need fpst */
8982     if (opcode >= 0x58) {
8983         fpst = get_fpstatus_ptr();
8984     } else {
8985         TCGV_UNUSED_PTR(fpst);
8986     }
8987
8988     if (!fp_access_check(s)) {
8989         return;
8990     }
8991
8992     /* These operations work on the concatenated rm:rn, with each pair of
8993      * adjacent elements being operated on to produce an element in the result.
8994      */
8995     if (size == 3) {
8996         TCGv_i64 tcg_res[2];
8997
8998         for (pass = 0; pass < 2; pass++) {
8999             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9000             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9001             int passreg = (pass == 0) ? rn : rm;
9002
9003             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9004             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9005             tcg_res[pass] = tcg_temp_new_i64();
9006
9007             switch (opcode) {
9008             case 0x17: /* ADDP */
9009                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9010                 break;
9011             case 0x58: /* FMAXNMP */
9012                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9013                 break;
9014             case 0x5a: /* FADDP */
9015                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9016                 break;
9017             case 0x5e: /* FMAXP */
9018                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9019                 break;
9020             case 0x78: /* FMINNMP */
9021                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9022                 break;
9023             case 0x7e: /* FMINP */
9024                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9025                 break;
9026             default:
9027                 g_assert_not_reached();
9028             }
9029
9030             tcg_temp_free_i64(tcg_op1);
9031             tcg_temp_free_i64(tcg_op2);
9032         }
9033
9034         for (pass = 0; pass < 2; pass++) {
9035             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9036             tcg_temp_free_i64(tcg_res[pass]);
9037         }
9038     } else {
9039         int maxpass = is_q ? 4 : 2;
9040         TCGv_i32 tcg_res[4];
9041
9042         for (pass = 0; pass < maxpass; pass++) {
9043             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9044             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9045             NeonGenTwoOpFn *genfn = NULL;
9046             int passreg = pass < (maxpass / 2) ? rn : rm;
9047             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9048
9049             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9050             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9051             tcg_res[pass] = tcg_temp_new_i32();
9052
9053             switch (opcode) {
9054             case 0x17: /* ADDP */
9055             {
9056                 static NeonGenTwoOpFn * const fns[3] = {
9057                     gen_helper_neon_padd_u8,
9058                     gen_helper_neon_padd_u16,
9059                     tcg_gen_add_i32,
9060                 };
9061                 genfn = fns[size];
9062                 break;
9063             }
9064             case 0x14: /* SMAXP, UMAXP */
9065             {
9066                 static NeonGenTwoOpFn * const fns[3][2] = {
9067                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9068                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9069                     { gen_max_s32, gen_max_u32 },
9070                 };
9071                 genfn = fns[size][u];
9072                 break;
9073             }
9074             case 0x15: /* SMINP, UMINP */
9075             {
9076                 static NeonGenTwoOpFn * const fns[3][2] = {
9077                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9078                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9079                     { gen_min_s32, gen_min_u32 },
9080                 };
9081                 genfn = fns[size][u];
9082                 break;
9083             }
9084             /* The FP operations are all on single floats (32 bit) */
9085             case 0x58: /* FMAXNMP */
9086                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9087                 break;
9088             case 0x5a: /* FADDP */
9089                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9090                 break;
9091             case 0x5e: /* FMAXP */
9092                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9093                 break;
9094             case 0x78: /* FMINNMP */
9095                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9096                 break;
9097             case 0x7e: /* FMINP */
9098                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9099                 break;
9100             default:
9101                 g_assert_not_reached();
9102             }
9103
9104             /* FP ops called directly, otherwise call now */
9105             if (genfn) {
9106                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9107             }
9108
9109             tcg_temp_free_i32(tcg_op1);
9110             tcg_temp_free_i32(tcg_op2);
9111         }
9112
9113         for (pass = 0; pass < maxpass; pass++) {
9114             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9115             tcg_temp_free_i32(tcg_res[pass]);
9116         }
9117         if (!is_q) {
9118             clear_vec_high(s, rd);
9119         }
9120     }
9121
9122     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9123         tcg_temp_free_ptr(fpst);
9124     }
9125 }
9126
9127 /* Floating point op subgroup of C3.6.16. */
9128 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9129 {
9130     /* For floating point ops, the U, size[1] and opcode bits
9131      * together indicate the operation. size[0] indicates single
9132      * or double.
9133      */
9134     int fpopcode = extract32(insn, 11, 5)
9135         | (extract32(insn, 23, 1) << 5)
9136         | (extract32(insn, 29, 1) << 6);
9137     int is_q = extract32(insn, 30, 1);
9138     int size = extract32(insn, 22, 1);
9139     int rm = extract32(insn, 16, 5);
9140     int rn = extract32(insn, 5, 5);
9141     int rd = extract32(insn, 0, 5);
9142
9143     int datasize = is_q ? 128 : 64;
9144     int esize = 32 << size;
9145     int elements = datasize / esize;
9146
9147     if (size == 1 && !is_q) {
9148         unallocated_encoding(s);
9149         return;
9150     }
9151
9152     switch (fpopcode) {
9153     case 0x58: /* FMAXNMP */
9154     case 0x5a: /* FADDP */
9155     case 0x5e: /* FMAXP */
9156     case 0x78: /* FMINNMP */
9157     case 0x7e: /* FMINP */
9158         if (size && !is_q) {
9159             unallocated_encoding(s);
9160             return;
9161         }
9162         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9163                                rn, rm, rd);
9164         return;
9165     case 0x1b: /* FMULX */
9166     case 0x1f: /* FRECPS */
9167     case 0x3f: /* FRSQRTS */
9168     case 0x5d: /* FACGE */
9169     case 0x7d: /* FACGT */
9170     case 0x19: /* FMLA */
9171     case 0x39: /* FMLS */
9172     case 0x18: /* FMAXNM */
9173     case 0x1a: /* FADD */
9174     case 0x1c: /* FCMEQ */
9175     case 0x1e: /* FMAX */
9176     case 0x38: /* FMINNM */
9177     case 0x3a: /* FSUB */
9178     case 0x3e: /* FMIN */
9179     case 0x5b: /* FMUL */
9180     case 0x5c: /* FCMGE */
9181     case 0x5f: /* FDIV */
9182     case 0x7a: /* FABD */
9183     case 0x7c: /* FCMGT */
9184         if (!fp_access_check(s)) {
9185             return;
9186         }
9187
9188         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9189         return;
9190     default:
9191         unallocated_encoding(s);
9192         return;
9193     }
9194 }
9195
9196 /* Integer op subgroup of C3.6.16. */
9197 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9198 {
9199     int is_q = extract32(insn, 30, 1);
9200     int u = extract32(insn, 29, 1);
9201     int size = extract32(insn, 22, 2);
9202     int opcode = extract32(insn, 11, 5);
9203     int rm = extract32(insn, 16, 5);
9204     int rn = extract32(insn, 5, 5);
9205     int rd = extract32(insn, 0, 5);
9206     int pass;
9207
9208     switch (opcode) {
9209     case 0x13: /* MUL, PMUL */
9210         if (u && size != 0) {
9211             unallocated_encoding(s);
9212             return;
9213         }
9214         /* fall through */
9215     case 0x0: /* SHADD, UHADD */
9216     case 0x2: /* SRHADD, URHADD */
9217     case 0x4: /* SHSUB, UHSUB */
9218     case 0xc: /* SMAX, UMAX */
9219     case 0xd: /* SMIN, UMIN */
9220     case 0xe: /* SABD, UABD */
9221     case 0xf: /* SABA, UABA */
9222     case 0x12: /* MLA, MLS */
9223         if (size == 3) {
9224             unallocated_encoding(s);
9225             return;
9226         }
9227         break;
9228     case 0x16: /* SQDMULH, SQRDMULH */
9229         if (size == 0 || size == 3) {
9230             unallocated_encoding(s);
9231             return;
9232         }
9233         break;
9234     default:
9235         if (size == 3 && !is_q) {
9236             unallocated_encoding(s);
9237             return;
9238         }
9239         break;
9240     }
9241
9242     if (!fp_access_check(s)) {
9243         return;
9244     }
9245
9246     if (size == 3) {
9247         assert(is_q);
9248         for (pass = 0; pass < 2; pass++) {
9249             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9250             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9251             TCGv_i64 tcg_res = tcg_temp_new_i64();
9252
9253             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9254             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9255
9256             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9257
9258             write_vec_element(s, tcg_res, rd, pass, MO_64);
9259
9260             tcg_temp_free_i64(tcg_res);
9261             tcg_temp_free_i64(tcg_op1);
9262             tcg_temp_free_i64(tcg_op2);
9263         }
9264     } else {
9265         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9266             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9267             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9268             TCGv_i32 tcg_res = tcg_temp_new_i32();
9269             NeonGenTwoOpFn *genfn = NULL;
9270             NeonGenTwoOpEnvFn *genenvfn = NULL;
9271
9272             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9273             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9274
9275             switch (opcode) {
9276             case 0x0: /* SHADD, UHADD */
9277             {
9278                 static NeonGenTwoOpFn * const fns[3][2] = {
9279                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9280                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9281                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9282                 };
9283                 genfn = fns[size][u];
9284                 break;
9285             }
9286             case 0x1: /* SQADD, UQADD */
9287             {
9288                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9289                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9290                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9291                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9292                 };
9293                 genenvfn = fns[size][u];
9294                 break;
9295             }
9296             case 0x2: /* SRHADD, URHADD */
9297             {
9298                 static NeonGenTwoOpFn * const fns[3][2] = {
9299                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9300                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9301                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9302                 };
9303                 genfn = fns[size][u];
9304                 break;
9305             }
9306             case 0x4: /* SHSUB, UHSUB */
9307             {
9308                 static NeonGenTwoOpFn * const fns[3][2] = {
9309                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9310                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9311                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9312                 };
9313                 genfn = fns[size][u];
9314                 break;
9315             }
9316             case 0x5: /* SQSUB, UQSUB */
9317             {
9318                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9319                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9320                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9321                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9322                 };
9323                 genenvfn = fns[size][u];
9324                 break;
9325             }
9326             case 0x6: /* CMGT, CMHI */
9327             {
9328                 static NeonGenTwoOpFn * const fns[3][2] = {
9329                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9330                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9331                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9332                 };
9333                 genfn = fns[size][u];
9334                 break;
9335             }
9336             case 0x7: /* CMGE, CMHS */
9337             {
9338                 static NeonGenTwoOpFn * const fns[3][2] = {
9339                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9340                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9341                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9342                 };
9343                 genfn = fns[size][u];
9344                 break;
9345             }
9346             case 0x8: /* SSHL, USHL */
9347             {
9348                 static NeonGenTwoOpFn * const fns[3][2] = {
9349                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9350                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9351                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9352                 };
9353                 genfn = fns[size][u];
9354                 break;
9355             }
9356             case 0x9: /* SQSHL, UQSHL */
9357             {
9358                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9359                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9360                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9361                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9362                 };
9363                 genenvfn = fns[size][u];
9364                 break;
9365             }
9366             case 0xa: /* SRSHL, URSHL */
9367             {
9368                 static NeonGenTwoOpFn * const fns[3][2] = {
9369                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9370                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9371                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9372                 };
9373                 genfn = fns[size][u];
9374                 break;
9375             }
9376             case 0xb: /* SQRSHL, UQRSHL */
9377             {
9378                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9379                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9380                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9381                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9382                 };
9383                 genenvfn = fns[size][u];
9384                 break;
9385             }
9386             case 0xc: /* SMAX, UMAX */
9387             {
9388                 static NeonGenTwoOpFn * const fns[3][2] = {
9389                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9390                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9391                     { gen_max_s32, gen_max_u32 },
9392                 };
9393                 genfn = fns[size][u];
9394                 break;
9395             }
9396
9397             case 0xd: /* SMIN, UMIN */
9398             {
9399                 static NeonGenTwoOpFn * const fns[3][2] = {
9400                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9401                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9402                     { gen_min_s32, gen_min_u32 },
9403                 };
9404                 genfn = fns[size][u];
9405                 break;
9406             }
9407             case 0xe: /* SABD, UABD */
9408             case 0xf: /* SABA, UABA */
9409             {
9410                 static NeonGenTwoOpFn * const fns[3][2] = {
9411                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9412                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9413                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9414                 };
9415                 genfn = fns[size][u];
9416                 break;
9417             }
9418             case 0x10: /* ADD, SUB */
9419             {
9420                 static NeonGenTwoOpFn * const fns[3][2] = {
9421                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9422                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9423                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9424                 };
9425                 genfn = fns[size][u];
9426                 break;
9427             }
9428             case 0x11: /* CMTST, CMEQ */
9429             {
9430                 static NeonGenTwoOpFn * const fns[3][2] = {
9431                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9432                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9433                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9434                 };
9435                 genfn = fns[size][u];
9436                 break;
9437             }
9438             case 0x13: /* MUL, PMUL */
9439                 if (u) {
9440                     /* PMUL */
9441                     assert(size == 0);
9442                     genfn = gen_helper_neon_mul_p8;
9443                     break;
9444                 }
9445                 /* fall through : MUL */
9446             case 0x12: /* MLA, MLS */
9447             {
9448                 static NeonGenTwoOpFn * const fns[3] = {
9449                     gen_helper_neon_mul_u8,
9450                     gen_helper_neon_mul_u16,
9451                     tcg_gen_mul_i32,
9452                 };
9453                 genfn = fns[size];
9454                 break;
9455             }
9456             case 0x16: /* SQDMULH, SQRDMULH */
9457             {
9458                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9459                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9460                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9461                 };
9462                 assert(size == 1 || size == 2);
9463                 genenvfn = fns[size - 1][u];
9464                 break;
9465             }
9466             default:
9467                 g_assert_not_reached();
9468             }
9469
9470             if (genenvfn) {
9471                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9472             } else {
9473                 genfn(tcg_res, tcg_op1, tcg_op2);
9474             }
9475
9476             if (opcode == 0xf || opcode == 0x12) {
9477                 /* SABA, UABA, MLA, MLS: accumulating ops */
9478                 static NeonGenTwoOpFn * const fns[3][2] = {
9479                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9480                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9481                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9482                 };
9483                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9484
9485                 genfn = fns[size][is_sub];
9486                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9487                 genfn(tcg_res, tcg_op1, tcg_res);
9488             }
9489
9490             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9491
9492             tcg_temp_free_i32(tcg_res);
9493             tcg_temp_free_i32(tcg_op1);
9494             tcg_temp_free_i32(tcg_op2);
9495         }
9496     }
9497
9498     if (!is_q) {
9499         clear_vec_high(s, rd);
9500     }
9501 }
9502
9503 /* C3.6.16 AdvSIMD three same
9504  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9505  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9506  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9507  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9508  */
9509 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9510 {
9511     int opcode = extract32(insn, 11, 5);
9512
9513     switch (opcode) {
9514     case 0x3: /* logic ops */
9515         disas_simd_3same_logic(s, insn);
9516         break;
9517     case 0x17: /* ADDP */
9518     case 0x14: /* SMAXP, UMAXP */
9519     case 0x15: /* SMINP, UMINP */
9520     {
9521         /* Pairwise operations */
9522         int is_q = extract32(insn, 30, 1);
9523         int u = extract32(insn, 29, 1);
9524         int size = extract32(insn, 22, 2);
9525         int rm = extract32(insn, 16, 5);
9526         int rn = extract32(insn, 5, 5);
9527         int rd = extract32(insn, 0, 5);
9528         if (opcode == 0x17) {
9529             if (u || (size == 3 && !is_q)) {
9530                 unallocated_encoding(s);
9531                 return;
9532             }
9533         } else {
9534             if (size == 3) {
9535                 unallocated_encoding(s);
9536                 return;
9537             }
9538         }
9539         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9540         break;
9541     }
9542     case 0x18 ... 0x31:
9543         /* floating point ops, sz[1] and U are part of opcode */
9544         disas_simd_3same_float(s, insn);
9545         break;
9546     default:
9547         disas_simd_3same_int(s, insn);
9548         break;
9549     }
9550 }
9551
9552 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9553                                   int size, int rn, int rd)
9554 {
9555     /* Handle 2-reg-misc ops which are widening (so each size element
9556      * in the source becomes a 2*size element in the destination.
9557      * The only instruction like this is FCVTL.
9558      */
9559     int pass;
9560
9561     if (size == 3) {
9562         /* 32 -> 64 bit fp conversion */
9563         TCGv_i64 tcg_res[2];
9564         int srcelt = is_q ? 2 : 0;
9565
9566         for (pass = 0; pass < 2; pass++) {
9567             TCGv_i32 tcg_op = tcg_temp_new_i32();
9568             tcg_res[pass] = tcg_temp_new_i64();
9569
9570             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9571             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9572             tcg_temp_free_i32(tcg_op);
9573         }
9574         for (pass = 0; pass < 2; pass++) {
9575             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9576             tcg_temp_free_i64(tcg_res[pass]);
9577         }
9578     } else {
9579         /* 16 -> 32 bit fp conversion */
9580         int srcelt = is_q ? 4 : 0;
9581         TCGv_i32 tcg_res[4];
9582
9583         for (pass = 0; pass < 4; pass++) {
9584             tcg_res[pass] = tcg_temp_new_i32();
9585
9586             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9587             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9588                                            cpu_env);
9589         }
9590         for (pass = 0; pass < 4; pass++) {
9591             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9592             tcg_temp_free_i32(tcg_res[pass]);
9593         }
9594     }
9595 }
9596
9597 static void handle_rev(DisasContext *s, int opcode, bool u,
9598                        bool is_q, int size, int rn, int rd)
9599 {
9600     int op = (opcode << 1) | u;
9601     int opsz = op + size;
9602     int grp_size = 3 - opsz;
9603     int dsize = is_q ? 128 : 64;
9604     int i;
9605
9606     if (opsz >= 3) {
9607         unallocated_encoding(s);
9608         return;
9609     }
9610
9611     if (!fp_access_check(s)) {
9612         return;
9613     }
9614
9615     if (size == 0) {
9616         /* Special case bytes, use bswap op on each group of elements */
9617         int groups = dsize / (8 << grp_size);
9618
9619         for (i = 0; i < groups; i++) {
9620             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9621
9622             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9623             switch (grp_size) {
9624             case MO_16:
9625                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9626                 break;
9627             case MO_32:
9628                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9629                 break;
9630             case MO_64:
9631                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9632                 break;
9633             default:
9634                 g_assert_not_reached();
9635             }
9636             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9637             tcg_temp_free_i64(tcg_tmp);
9638         }
9639         if (!is_q) {
9640             clear_vec_high(s, rd);
9641         }
9642     } else {
9643         int revmask = (1 << grp_size) - 1;
9644         int esize = 8 << size;
9645         int elements = dsize / esize;
9646         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9647         TCGv_i64 tcg_rd = tcg_const_i64(0);
9648         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9649
9650         for (i = 0; i < elements; i++) {
9651             int e_rev = (i & 0xf) ^ revmask;
9652             int off = e_rev * esize;
9653             read_vec_element(s, tcg_rn, rn, i, size);
9654             if (off >= 64) {
9655                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9656                                     tcg_rn, off - 64, esize);
9657             } else {
9658                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9659             }
9660         }
9661         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9662         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9663
9664         tcg_temp_free_i64(tcg_rd_hi);
9665         tcg_temp_free_i64(tcg_rd);
9666         tcg_temp_free_i64(tcg_rn);
9667     }
9668 }
9669
9670 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9671                                   bool is_q, int size, int rn, int rd)
9672 {
9673     /* Implement the pairwise operations from 2-misc:
9674      * SADDLP, UADDLP, SADALP, UADALP.
9675      * These all add pairs of elements in the input to produce a
9676      * double-width result element in the output (possibly accumulating).
9677      */
9678     bool accum = (opcode == 0x6);
9679     int maxpass = is_q ? 2 : 1;
9680     int pass;
9681     TCGv_i64 tcg_res[2];
9682
9683     if (size == 2) {
9684         /* 32 + 32 -> 64 op */
9685         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9686
9687         for (pass = 0; pass < maxpass; pass++) {
9688             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9689             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9690
9691             tcg_res[pass] = tcg_temp_new_i64();
9692
9693             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9694             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9695             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9696             if (accum) {
9697                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9698                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9699             }
9700
9701             tcg_temp_free_i64(tcg_op1);
9702             tcg_temp_free_i64(tcg_op2);
9703         }
9704     } else {
9705         for (pass = 0; pass < maxpass; pass++) {
9706             TCGv_i64 tcg_op = tcg_temp_new_i64();
9707             NeonGenOneOpFn *genfn;
9708             static NeonGenOneOpFn * const fns[2][2] = {
9709                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9710                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9711             };
9712
9713             genfn = fns[size][u];
9714
9715             tcg_res[pass] = tcg_temp_new_i64();
9716
9717             read_vec_element(s, tcg_op, rn, pass, MO_64);
9718             genfn(tcg_res[pass], tcg_op);
9719
9720             if (accum) {
9721                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9722                 if (size == 0) {
9723                     gen_helper_neon_addl_u16(tcg_res[pass],
9724                                              tcg_res[pass], tcg_op);
9725                 } else {
9726                     gen_helper_neon_addl_u32(tcg_res[pass],
9727                                              tcg_res[pass], tcg_op);
9728                 }
9729             }
9730             tcg_temp_free_i64(tcg_op);
9731         }
9732     }
9733     if (!is_q) {
9734         tcg_res[1] = tcg_const_i64(0);
9735     }
9736     for (pass = 0; pass < 2; pass++) {
9737         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9738         tcg_temp_free_i64(tcg_res[pass]);
9739     }
9740 }
9741
9742 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9743 {
9744     /* Implement SHLL and SHLL2 */
9745     int pass;
9746     int part = is_q ? 2 : 0;
9747     TCGv_i64 tcg_res[2];
9748
9749     for (pass = 0; pass < 2; pass++) {
9750         static NeonGenWidenFn * const widenfns[3] = {
9751             gen_helper_neon_widen_u8,
9752             gen_helper_neon_widen_u16,
9753             tcg_gen_extu_i32_i64,
9754         };
9755         NeonGenWidenFn *widenfn = widenfns[size];
9756         TCGv_i32 tcg_op = tcg_temp_new_i32();
9757
9758         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9759         tcg_res[pass] = tcg_temp_new_i64();
9760         widenfn(tcg_res[pass], tcg_op);
9761         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9762
9763         tcg_temp_free_i32(tcg_op);
9764     }
9765
9766     for (pass = 0; pass < 2; pass++) {
9767         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9768         tcg_temp_free_i64(tcg_res[pass]);
9769     }
9770 }
9771
9772 /* C3.6.17 AdvSIMD two reg misc
9773  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9774  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9775  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9776  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9777  */
9778 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9779 {
9780     int size = extract32(insn, 22, 2);
9781     int opcode = extract32(insn, 12, 5);
9782     bool u = extract32(insn, 29, 1);
9783     bool is_q = extract32(insn, 30, 1);
9784     int rn = extract32(insn, 5, 5);
9785     int rd = extract32(insn, 0, 5);
9786     bool need_fpstatus = false;
9787     bool need_rmode = false;
9788     int rmode = -1;
9789     TCGv_i32 tcg_rmode;
9790     TCGv_ptr tcg_fpstatus;
9791
9792     switch (opcode) {
9793     case 0x0: /* REV64, REV32 */
9794     case 0x1: /* REV16 */
9795         handle_rev(s, opcode, u, is_q, size, rn, rd);
9796         return;
9797     case 0x5: /* CNT, NOT, RBIT */
9798         if (u && size == 0) {
9799             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9800             size = 3;
9801             break;
9802         } else if (u && size == 1) {
9803             /* RBIT */
9804             break;
9805         } else if (!u && size == 0) {
9806             /* CNT */
9807             break;
9808         }
9809         unallocated_encoding(s);
9810         return;
9811     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9812     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9813         if (size == 3) {
9814             unallocated_encoding(s);
9815             return;
9816         }
9817         if (!fp_access_check(s)) {
9818             return;
9819         }
9820
9821         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9822         return;
9823     case 0x4: /* CLS, CLZ */
9824         if (size == 3) {
9825             unallocated_encoding(s);
9826             return;
9827         }
9828         break;
9829     case 0x2: /* SADDLP, UADDLP */
9830     case 0x6: /* SADALP, UADALP */
9831         if (size == 3) {
9832             unallocated_encoding(s);
9833             return;
9834         }
9835         if (!fp_access_check(s)) {
9836             return;
9837         }
9838         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9839         return;
9840     case 0x13: /* SHLL, SHLL2 */
9841         if (u == 0 || size == 3) {
9842             unallocated_encoding(s);
9843             return;
9844         }
9845         if (!fp_access_check(s)) {
9846             return;
9847         }
9848         handle_shll(s, is_q, size, rn, rd);
9849         return;
9850     case 0xa: /* CMLT */
9851         if (u == 1) {
9852             unallocated_encoding(s);
9853             return;
9854         }
9855         /* fall through */
9856     case 0x8: /* CMGT, CMGE */
9857     case 0x9: /* CMEQ, CMLE */
9858     case 0xb: /* ABS, NEG */
9859         if (size == 3 && !is_q) {
9860             unallocated_encoding(s);
9861             return;
9862         }
9863         break;
9864     case 0x3: /* SUQADD, USQADD */
9865         if (size == 3 && !is_q) {
9866             unallocated_encoding(s);
9867             return;
9868         }
9869         if (!fp_access_check(s)) {
9870             return;
9871         }
9872         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9873         return;
9874     case 0x7: /* SQABS, SQNEG */
9875         if (size == 3 && !is_q) {
9876             unallocated_encoding(s);
9877             return;
9878         }
9879         break;
9880     case 0xc ... 0xf:
9881     case 0x16 ... 0x1d:
9882     case 0x1f:
9883     {
9884         /* Floating point: U, size[1] and opcode indicate operation;
9885          * size[0] indicates single or double precision.
9886          */
9887         int is_double = extract32(size, 0, 1);
9888         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9889         size = is_double ? 3 : 2;
9890         switch (opcode) {
9891         case 0x2f: /* FABS */
9892         case 0x6f: /* FNEG */
9893             if (size == 3 && !is_q) {
9894                 unallocated_encoding(s);
9895                 return;
9896             }
9897             break;
9898         case 0x1d: /* SCVTF */
9899         case 0x5d: /* UCVTF */
9900         {
9901             bool is_signed = (opcode == 0x1d) ? true : false;
9902             int elements = is_double ? 2 : is_q ? 4 : 2;
9903             if (is_double && !is_q) {
9904                 unallocated_encoding(s);
9905                 return;
9906             }
9907             if (!fp_access_check(s)) {
9908                 return;
9909             }
9910             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9911             return;
9912         }
9913         case 0x2c: /* FCMGT (zero) */
9914         case 0x2d: /* FCMEQ (zero) */
9915         case 0x2e: /* FCMLT (zero) */
9916         case 0x6c: /* FCMGE (zero) */
9917         case 0x6d: /* FCMLE (zero) */
9918             if (size == 3 && !is_q) {
9919                 unallocated_encoding(s);
9920                 return;
9921             }
9922             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9923             return;
9924         case 0x7f: /* FSQRT */
9925             if (size == 3 && !is_q) {
9926                 unallocated_encoding(s);
9927                 return;
9928             }
9929             break;
9930         case 0x1a: /* FCVTNS */
9931         case 0x1b: /* FCVTMS */
9932         case 0x3a: /* FCVTPS */
9933         case 0x3b: /* FCVTZS */
9934         case 0x5a: /* FCVTNU */
9935         case 0x5b: /* FCVTMU */
9936         case 0x7a: /* FCVTPU */
9937         case 0x7b: /* FCVTZU */
9938             need_fpstatus = true;
9939             need_rmode = true;
9940             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9941             if (size == 3 && !is_q) {
9942                 unallocated_encoding(s);
9943                 return;
9944             }
9945             break;
9946         case 0x5c: /* FCVTAU */
9947         case 0x1c: /* FCVTAS */
9948             need_fpstatus = true;
9949             need_rmode = true;
9950             rmode = FPROUNDING_TIEAWAY;
9951             if (size == 3 && !is_q) {
9952                 unallocated_encoding(s);
9953                 return;
9954             }
9955             break;
9956         case 0x3c: /* URECPE */
9957             if (size == 3) {
9958                 unallocated_encoding(s);
9959                 return;
9960             }
9961             /* fall through */
9962         case 0x3d: /* FRECPE */
9963         case 0x7d: /* FRSQRTE */
9964             if (size == 3 && !is_q) {
9965                 unallocated_encoding(s);
9966                 return;
9967             }
9968             if (!fp_access_check(s)) {
9969                 return;
9970             }
9971             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9972             return;
9973         case 0x56: /* FCVTXN, FCVTXN2 */
9974             if (size == 2) {
9975                 unallocated_encoding(s);
9976                 return;
9977             }
9978             /* fall through */
9979         case 0x16: /* FCVTN, FCVTN2 */
9980             /* handle_2misc_narrow does a 2*size -> size operation, but these
9981              * instructions encode the source size rather than dest size.
9982              */
9983             if (!fp_access_check(s)) {
9984                 return;
9985             }
9986             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9987             return;
9988         case 0x17: /* FCVTL, FCVTL2 */
9989             if (!fp_access_check(s)) {
9990                 return;
9991             }
9992             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9993             return;
9994         case 0x18: /* FRINTN */
9995         case 0x19: /* FRINTM */
9996         case 0x38: /* FRINTP */
9997         case 0x39: /* FRINTZ */
9998             need_rmode = true;
9999             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10000             /* fall through */
10001         case 0x59: /* FRINTX */
10002         case 0x79: /* FRINTI */
10003             need_fpstatus = true;
10004             if (size == 3 && !is_q) {
10005                 unallocated_encoding(s);
10006                 return;
10007             }
10008             break;
10009         case 0x58: /* FRINTA */
10010             need_rmode = true;
10011             rmode = FPROUNDING_TIEAWAY;
10012             need_fpstatus = true;
10013             if (size == 3 && !is_q) {
10014                 unallocated_encoding(s);
10015                 return;
10016             }
10017             break;
10018         case 0x7c: /* URSQRTE */
10019             if (size == 3) {
10020                 unallocated_encoding(s);
10021                 return;
10022             }
10023             need_fpstatus = true;
10024             break;
10025         default:
10026             unallocated_encoding(s);
10027             return;
10028         }
10029         break;
10030     }
10031     default:
10032         unallocated_encoding(s);
10033         return;
10034     }
10035
10036     if (!fp_access_check(s)) {
10037         return;
10038     }
10039
10040     if (need_fpstatus) {
10041         tcg_fpstatus = get_fpstatus_ptr();
10042     } else {
10043         TCGV_UNUSED_PTR(tcg_fpstatus);
10044     }
10045     if (need_rmode) {
10046         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10047         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10048     } else {
10049         TCGV_UNUSED_I32(tcg_rmode);
10050     }
10051
10052     if (size == 3) {
10053         /* All 64-bit element operations can be shared with scalar 2misc */
10054         int pass;
10055
10056         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10057             TCGv_i64 tcg_op = tcg_temp_new_i64();
10058             TCGv_i64 tcg_res = tcg_temp_new_i64();
10059
10060             read_vec_element(s, tcg_op, rn, pass, MO_64);
10061
10062             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10063                             tcg_rmode, tcg_fpstatus);
10064
10065             write_vec_element(s, tcg_res, rd, pass, MO_64);
10066
10067             tcg_temp_free_i64(tcg_res);
10068             tcg_temp_free_i64(tcg_op);
10069         }
10070     } else {
10071         int pass;
10072
10073         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10074             TCGv_i32 tcg_op = tcg_temp_new_i32();
10075             TCGv_i32 tcg_res = tcg_temp_new_i32();
10076             TCGCond cond;
10077
10078             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10079
10080             if (size == 2) {
10081                 /* Special cases for 32 bit elements */
10082                 switch (opcode) {
10083                 case 0xa: /* CMLT */
10084                     /* 32 bit integer comparison against zero, result is
10085                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10086                      * and inverting.
10087                      */
10088                     cond = TCG_COND_LT;
10089                 do_cmop:
10090                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10091                     tcg_gen_neg_i32(tcg_res, tcg_res);
10092                     break;
10093                 case 0x8: /* CMGT, CMGE */
10094                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10095                     goto do_cmop;
10096                 case 0x9: /* CMEQ, CMLE */
10097                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10098                     goto do_cmop;
10099                 case 0x4: /* CLS */
10100                     if (u) {
10101                         gen_helper_clz32(tcg_res, tcg_op);
10102                     } else {
10103                         gen_helper_cls32(tcg_res, tcg_op);
10104                     }
10105                     break;
10106                 case 0x7: /* SQABS, SQNEG */
10107                     if (u) {
10108                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10109                     } else {
10110                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10111                     }
10112                     break;
10113                 case 0xb: /* ABS, NEG */
10114                     if (u) {
10115                         tcg_gen_neg_i32(tcg_res, tcg_op);
10116                     } else {
10117                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10118                         tcg_gen_neg_i32(tcg_res, tcg_op);
10119                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10120                                             tcg_zero, tcg_op, tcg_res);
10121                         tcg_temp_free_i32(tcg_zero);
10122                     }
10123                     break;
10124                 case 0x2f: /* FABS */
10125                     gen_helper_vfp_abss(tcg_res, tcg_op);
10126                     break;
10127                 case 0x6f: /* FNEG */
10128                     gen_helper_vfp_negs(tcg_res, tcg_op);
10129                     break;
10130                 case 0x7f: /* FSQRT */
10131                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10132                     break;
10133                 case 0x1a: /* FCVTNS */
10134                 case 0x1b: /* FCVTMS */
10135                 case 0x1c: /* FCVTAS */
10136                 case 0x3a: /* FCVTPS */
10137                 case 0x3b: /* FCVTZS */
10138                 {
10139                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10140                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10141                                          tcg_shift, tcg_fpstatus);
10142                     tcg_temp_free_i32(tcg_shift);
10143                     break;
10144                 }
10145                 case 0x5a: /* FCVTNU */
10146                 case 0x5b: /* FCVTMU */
10147                 case 0x5c: /* FCVTAU */
10148                 case 0x7a: /* FCVTPU */
10149                 case 0x7b: /* FCVTZU */
10150                 {
10151                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10152                     gen_helper_vfp_touls(tcg_res, tcg_op,
10153                                          tcg_shift, tcg_fpstatus);
10154                     tcg_temp_free_i32(tcg_shift);
10155                     break;
10156                 }
10157                 case 0x18: /* FRINTN */
10158                 case 0x19: /* FRINTM */
10159                 case 0x38: /* FRINTP */
10160                 case 0x39: /* FRINTZ */
10161                 case 0x58: /* FRINTA */
10162                 case 0x79: /* FRINTI */
10163                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10164                     break;
10165                 case 0x59: /* FRINTX */
10166                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10167                     break;
10168                 case 0x7c: /* URSQRTE */
10169                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10170                     break;
10171                 default:
10172                     g_assert_not_reached();
10173                 }
10174             } else {
10175                 /* Use helpers for 8 and 16 bit elements */
10176                 switch (opcode) {
10177                 case 0x5: /* CNT, RBIT */
10178                     /* For these two insns size is part of the opcode specifier
10179                      * (handled earlier); they always operate on byte elements.
10180                      */
10181                     if (u) {
10182                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10183                     } else {
10184                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10185                     }
10186                     break;
10187                 case 0x7: /* SQABS, SQNEG */
10188                 {
10189                     NeonGenOneOpEnvFn *genfn;
10190                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10191                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10192                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10193                     };
10194                     genfn = fns[size][u];
10195                     genfn(tcg_res, cpu_env, tcg_op);
10196                     break;
10197                 }
10198                 case 0x8: /* CMGT, CMGE */
10199                 case 0x9: /* CMEQ, CMLE */
10200                 case 0xa: /* CMLT */
10201                 {
10202                     static NeonGenTwoOpFn * const fns[3][2] = {
10203                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10204                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10205                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10206                     };
10207                     NeonGenTwoOpFn *genfn;
10208                     int comp;
10209                     bool reverse;
10210                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10211
10212                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10213                     comp = (opcode - 0x8) * 2 + u;
10214                     /* ...but LE, LT are implemented as reverse GE, GT */
10215                     reverse = (comp > 2);
10216                     if (reverse) {
10217                         comp = 4 - comp;
10218                     }
10219                     genfn = fns[comp][size];
10220                     if (reverse) {
10221                         genfn(tcg_res, tcg_zero, tcg_op);
10222                     } else {
10223                         genfn(tcg_res, tcg_op, tcg_zero);
10224                     }
10225                     tcg_temp_free_i32(tcg_zero);
10226                     break;
10227                 }
10228                 case 0xb: /* ABS, NEG */
10229                     if (u) {
10230                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10231                         if (size) {
10232                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10233                         } else {
10234                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10235                         }
10236                         tcg_temp_free_i32(tcg_zero);
10237                     } else {
10238                         if (size) {
10239                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10240                         } else {
10241                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10242                         }
10243                     }
10244                     break;
10245                 case 0x4: /* CLS, CLZ */
10246                     if (u) {
10247                         if (size == 0) {
10248                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10249                         } else {
10250                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10251                         }
10252                     } else {
10253                         if (size == 0) {
10254                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10255                         } else {
10256                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10257                         }
10258                     }
10259                     break;
10260                 default:
10261                     g_assert_not_reached();
10262                 }
10263             }
10264
10265             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10266
10267             tcg_temp_free_i32(tcg_res);
10268             tcg_temp_free_i32(tcg_op);
10269         }
10270     }
10271     if (!is_q) {
10272         clear_vec_high(s, rd);
10273     }
10274
10275     if (need_rmode) {
10276         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10277         tcg_temp_free_i32(tcg_rmode);
10278     }
10279     if (need_fpstatus) {
10280         tcg_temp_free_ptr(tcg_fpstatus);
10281     }
10282 }
10283
10284 /* C3.6.13 AdvSIMD scalar x indexed element
10285  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10286  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10287  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10288  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10289  * C3.6.18 AdvSIMD vector x indexed element
10290  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10291  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10292  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10293  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10294  */
10295 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10296 {
10297     /* This encoding has two kinds of instruction:
10298      *  normal, where we perform elt x idxelt => elt for each
10299      *     element in the vector
10300      *  long, where we perform elt x idxelt and generate a result of
10301      *     double the width of the input element
10302      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10303      */
10304     bool is_scalar = extract32(insn, 28, 1);
10305     bool is_q = extract32(insn, 30, 1);
10306     bool u = extract32(insn, 29, 1);
10307     int size = extract32(insn, 22, 2);
10308     int l = extract32(insn, 21, 1);
10309     int m = extract32(insn, 20, 1);
10310     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10311     int rm = extract32(insn, 16, 4);
10312     int opcode = extract32(insn, 12, 4);
10313     int h = extract32(insn, 11, 1);
10314     int rn = extract32(insn, 5, 5);
10315     int rd = extract32(insn, 0, 5);
10316     bool is_long = false;
10317     bool is_fp = false;
10318     int index;
10319     TCGv_ptr fpst;
10320
10321     switch (opcode) {
10322     case 0x0: /* MLA */
10323     case 0x4: /* MLS */
10324         if (!u || is_scalar) {
10325             unallocated_encoding(s);
10326             return;
10327         }
10328         break;
10329     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10330     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10331     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10332         if (is_scalar) {
10333             unallocated_encoding(s);
10334             return;
10335         }
10336         is_long = true;
10337         break;
10338     case 0x3: /* SQDMLAL, SQDMLAL2 */
10339     case 0x7: /* SQDMLSL, SQDMLSL2 */
10340     case 0xb: /* SQDMULL, SQDMULL2 */
10341         is_long = true;
10342         /* fall through */
10343     case 0xc: /* SQDMULH */
10344     case 0xd: /* SQRDMULH */
10345         if (u) {
10346             unallocated_encoding(s);
10347             return;
10348         }
10349         break;
10350     case 0x8: /* MUL */
10351         if (u || is_scalar) {
10352             unallocated_encoding(s);
10353             return;
10354         }
10355         break;
10356     case 0x1: /* FMLA */
10357     case 0x5: /* FMLS */
10358         if (u) {
10359             unallocated_encoding(s);
10360             return;
10361         }
10362         /* fall through */
10363     case 0x9: /* FMUL, FMULX */
10364         if (!extract32(size, 1, 1)) {
10365             unallocated_encoding(s);
10366             return;
10367         }
10368         is_fp = true;
10369         break;
10370     default:
10371         unallocated_encoding(s);
10372         return;
10373     }
10374
10375     if (is_fp) {
10376         /* low bit of size indicates single/double */
10377         size = extract32(size, 0, 1) ? 3 : 2;
10378         if (size == 2) {
10379             index = h << 1 | l;
10380         } else {
10381             if (l || !is_q) {
10382                 unallocated_encoding(s);
10383                 return;
10384             }
10385             index = h;
10386         }
10387         rm |= (m << 4);
10388     } else {
10389         switch (size) {
10390         case 1:
10391             index = h << 2 | l << 1 | m;
10392             break;
10393         case 2:
10394             index = h << 1 | l;
10395             rm |= (m << 4);
10396             break;
10397         default:
10398             unallocated_encoding(s);
10399             return;
10400         }
10401     }
10402
10403     if (!fp_access_check(s)) {
10404         return;
10405     }
10406
10407     if (is_fp) {
10408         fpst = get_fpstatus_ptr();
10409     } else {
10410         TCGV_UNUSED_PTR(fpst);
10411     }
10412
10413     if (size == 3) {
10414         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10415         int pass;
10416
10417         assert(is_fp && is_q && !is_long);
10418
10419         read_vec_element(s, tcg_idx, rm, index, MO_64);
10420
10421         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10422             TCGv_i64 tcg_op = tcg_temp_new_i64();
10423             TCGv_i64 tcg_res = tcg_temp_new_i64();
10424
10425             read_vec_element(s, tcg_op, rn, pass, MO_64);
10426
10427             switch (opcode) {
10428             case 0x5: /* FMLS */
10429                 /* As usual for ARM, separate negation for fused multiply-add */
10430                 gen_helper_vfp_negd(tcg_op, tcg_op);
10431                 /* fall through */
10432             case 0x1: /* FMLA */
10433                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10434                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10435                 break;
10436             case 0x9: /* FMUL, FMULX */
10437                 if (u) {
10438                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10439                 } else {
10440                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10441                 }
10442                 break;
10443             default:
10444                 g_assert_not_reached();
10445             }
10446
10447             write_vec_element(s, tcg_res, rd, pass, MO_64);
10448             tcg_temp_free_i64(tcg_op);
10449             tcg_temp_free_i64(tcg_res);
10450         }
10451
10452         if (is_scalar) {
10453             clear_vec_high(s, rd);
10454         }
10455
10456         tcg_temp_free_i64(tcg_idx);
10457     } else if (!is_long) {
10458         /* 32 bit floating point, or 16 or 32 bit integer.
10459          * For the 16 bit scalar case we use the usual Neon helpers and
10460          * rely on the fact that 0 op 0 == 0 with no side effects.
10461          */
10462         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10463         int pass, maxpasses;
10464
10465         if (is_scalar) {
10466             maxpasses = 1;
10467         } else {
10468             maxpasses = is_q ? 4 : 2;
10469         }
10470
10471         read_vec_element_i32(s, tcg_idx, rm, index, size);
10472
10473         if (size == 1 && !is_scalar) {
10474             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10475              * the index into both halves of the 32 bit tcg_idx and then use
10476              * the usual Neon helpers.
10477              */
10478             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10479         }
10480
10481         for (pass = 0; pass < maxpasses; pass++) {
10482             TCGv_i32 tcg_op = tcg_temp_new_i32();
10483             TCGv_i32 tcg_res = tcg_temp_new_i32();
10484
10485             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10486
10487             switch (opcode) {
10488             case 0x0: /* MLA */
10489             case 0x4: /* MLS */
10490             case 0x8: /* MUL */
10491             {
10492                 static NeonGenTwoOpFn * const fns[2][2] = {
10493                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10494                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10495                 };
10496                 NeonGenTwoOpFn *genfn;
10497                 bool is_sub = opcode == 0x4;
10498
10499                 if (size == 1) {
10500                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10501                 } else {
10502                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10503                 }
10504                 if (opcode == 0x8) {
10505                     break;
10506                 }
10507                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10508                 genfn = fns[size - 1][is_sub];
10509                 genfn(tcg_res, tcg_op, tcg_res);
10510                 break;
10511             }
10512             case 0x5: /* FMLS */
10513                 /* As usual for ARM, separate negation for fused multiply-add */
10514                 gen_helper_vfp_negs(tcg_op, tcg_op);
10515                 /* fall through */
10516             case 0x1: /* FMLA */
10517                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10518                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10519                 break;
10520             case 0x9: /* FMUL, FMULX */
10521                 if (u) {
10522                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10523                 } else {
10524                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10525                 }
10526                 break;
10527             case 0xc: /* SQDMULH */
10528                 if (size == 1) {
10529                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10530                                                tcg_op, tcg_idx);
10531                 } else {
10532                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10533                                                tcg_op, tcg_idx);
10534                 }
10535                 break;
10536             case 0xd: /* SQRDMULH */
10537                 if (size == 1) {
10538                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10539                                                 tcg_op, tcg_idx);
10540                 } else {
10541                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10542                                                 tcg_op, tcg_idx);
10543                 }
10544                 break;
10545             default:
10546                 g_assert_not_reached();
10547             }
10548
10549             if (is_scalar) {
10550                 write_fp_sreg(s, rd, tcg_res);
10551             } else {
10552                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10553             }
10554
10555             tcg_temp_free_i32(tcg_op);
10556             tcg_temp_free_i32(tcg_res);
10557         }
10558
10559         tcg_temp_free_i32(tcg_idx);
10560
10561         if (!is_q) {
10562             clear_vec_high(s, rd);
10563         }
10564     } else {
10565         /* long ops: 16x16->32 or 32x32->64 */
10566         TCGv_i64 tcg_res[2];
10567         int pass;
10568         bool satop = extract32(opcode, 0, 1);
10569         TCGMemOp memop = MO_32;
10570
10571         if (satop || !u) {
10572             memop |= MO_SIGN;
10573         }
10574
10575         if (size == 2) {
10576             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10577
10578             read_vec_element(s, tcg_idx, rm, index, memop);
10579
10580             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10581                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10582                 TCGv_i64 tcg_passres;
10583                 int passelt;
10584
10585                 if (is_scalar) {
10586                     passelt = 0;
10587                 } else {
10588                     passelt = pass + (is_q * 2);
10589                 }
10590
10591                 read_vec_element(s, tcg_op, rn, passelt, memop);
10592
10593                 tcg_res[pass] = tcg_temp_new_i64();
10594
10595                 if (opcode == 0xa || opcode == 0xb) {
10596                     /* Non-accumulating ops */
10597                     tcg_passres = tcg_res[pass];
10598                 } else {
10599                     tcg_passres = tcg_temp_new_i64();
10600                 }
10601
10602                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10603                 tcg_temp_free_i64(tcg_op);
10604
10605                 if (satop) {
10606                     /* saturating, doubling */
10607                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10608                                                       tcg_passres, tcg_passres);
10609                 }
10610
10611                 if (opcode == 0xa || opcode == 0xb) {
10612                     continue;
10613                 }
10614
10615                 /* Accumulating op: handle accumulate step */
10616                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10617
10618                 switch (opcode) {
10619                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10620                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10621                     break;
10622                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10623                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10624                     break;
10625                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10626                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10627                     /* fall through */
10628                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10629                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10630                                                       tcg_res[pass],
10631                                                       tcg_passres);
10632                     break;
10633                 default:
10634                     g_assert_not_reached();
10635                 }
10636                 tcg_temp_free_i64(tcg_passres);
10637             }
10638             tcg_temp_free_i64(tcg_idx);
10639
10640             if (is_scalar) {
10641                 clear_vec_high(s, rd);
10642             }
10643         } else {
10644             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10645
10646             assert(size == 1);
10647             read_vec_element_i32(s, tcg_idx, rm, index, size);
10648
10649             if (!is_scalar) {
10650                 /* The simplest way to handle the 16x16 indexed ops is to
10651                  * duplicate the index into both halves of the 32 bit tcg_idx
10652                  * and then use the usual Neon helpers.
10653                  */
10654                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10655             }
10656
10657             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10658                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10659                 TCGv_i64 tcg_passres;
10660
10661                 if (is_scalar) {
10662                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10663                 } else {
10664                     read_vec_element_i32(s, tcg_op, rn,
10665                                          pass + (is_q * 2), MO_32);
10666                 }
10667
10668                 tcg_res[pass] = tcg_temp_new_i64();
10669
10670                 if (opcode == 0xa || opcode == 0xb) {
10671                     /* Non-accumulating ops */
10672                     tcg_passres = tcg_res[pass];
10673                 } else {
10674                     tcg_passres = tcg_temp_new_i64();
10675                 }
10676
10677                 if (memop & MO_SIGN) {
10678                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10679                 } else {
10680                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10681                 }
10682                 if (satop) {
10683                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10684                                                       tcg_passres, tcg_passres);
10685                 }
10686                 tcg_temp_free_i32(tcg_op);
10687
10688                 if (opcode == 0xa || opcode == 0xb) {
10689                     continue;
10690                 }
10691
10692                 /* Accumulating op: handle accumulate step */
10693                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10694
10695                 switch (opcode) {
10696                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10697                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10698                                              tcg_passres);
10699                     break;
10700                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10701                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10702                                              tcg_passres);
10703                     break;
10704                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10705                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10706                     /* fall through */
10707                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10708                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10709                                                       tcg_res[pass],
10710                                                       tcg_passres);
10711                     break;
10712                 default:
10713                     g_assert_not_reached();
10714                 }
10715                 tcg_temp_free_i64(tcg_passres);
10716             }
10717             tcg_temp_free_i32(tcg_idx);
10718
10719             if (is_scalar) {
10720                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10721             }
10722         }
10723
10724         if (is_scalar) {
10725             tcg_res[1] = tcg_const_i64(0);
10726         }
10727
10728         for (pass = 0; pass < 2; pass++) {
10729             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10730             tcg_temp_free_i64(tcg_res[pass]);
10731         }
10732     }
10733
10734     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10735         tcg_temp_free_ptr(fpst);
10736     }
10737 }
10738
10739 /* C3.6.19 Crypto AES
10740  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10741  * +-----------------+------+-----------+--------+-----+------+------+
10742  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10743  * +-----------------+------+-----------+--------+-----+------+------+
10744  */
10745 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10746 {
10747     int size = extract32(insn, 22, 2);
10748     int opcode = extract32(insn, 12, 5);
10749     int rn = extract32(insn, 5, 5);
10750     int rd = extract32(insn, 0, 5);
10751     int decrypt;
10752     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10753     CryptoThreeOpEnvFn *genfn;
10754
10755     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10756         || size != 0) {
10757         unallocated_encoding(s);
10758         return;
10759     }
10760
10761     switch (opcode) {
10762     case 0x4: /* AESE */
10763         decrypt = 0;
10764         genfn = gen_helper_crypto_aese;
10765         break;
10766     case 0x6: /* AESMC */
10767         decrypt = 0;
10768         genfn = gen_helper_crypto_aesmc;
10769         break;
10770     case 0x5: /* AESD */
10771         decrypt = 1;
10772         genfn = gen_helper_crypto_aese;
10773         break;
10774     case 0x7: /* AESIMC */
10775         decrypt = 1;
10776         genfn = gen_helper_crypto_aesmc;
10777         break;
10778     default:
10779         unallocated_encoding(s);
10780         return;
10781     }
10782
10783     /* Note that we convert the Vx register indexes into the
10784      * index within the vfp.regs[] array, so we can share the
10785      * helper with the AArch32 instructions.
10786      */
10787     tcg_rd_regno = tcg_const_i32(rd << 1);
10788     tcg_rn_regno = tcg_const_i32(rn << 1);
10789     tcg_decrypt = tcg_const_i32(decrypt);
10790
10791     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10792
10793     tcg_temp_free_i32(tcg_rd_regno);
10794     tcg_temp_free_i32(tcg_rn_regno);
10795     tcg_temp_free_i32(tcg_decrypt);
10796 }
10797
10798 /* C3.6.20 Crypto three-reg SHA
10799  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10800  * +-----------------+------+---+------+---+--------+-----+------+------+
10801  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10802  * +-----------------+------+---+------+---+--------+-----+------+------+
10803  */
10804 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10805 {
10806     int size = extract32(insn, 22, 2);
10807     int opcode = extract32(insn, 12, 3);
10808     int rm = extract32(insn, 16, 5);
10809     int rn = extract32(insn, 5, 5);
10810     int rd = extract32(insn, 0, 5);
10811     CryptoThreeOpEnvFn *genfn;
10812     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10813     int feature = ARM_FEATURE_V8_SHA256;
10814
10815     if (size != 0) {
10816         unallocated_encoding(s);
10817         return;
10818     }
10819
10820     switch (opcode) {
10821     case 0: /* SHA1C */
10822     case 1: /* SHA1P */
10823     case 2: /* SHA1M */
10824     case 3: /* SHA1SU0 */
10825         genfn = NULL;
10826         feature = ARM_FEATURE_V8_SHA1;
10827         break;
10828     case 4: /* SHA256H */
10829         genfn = gen_helper_crypto_sha256h;
10830         break;
10831     case 5: /* SHA256H2 */
10832         genfn = gen_helper_crypto_sha256h2;
10833         break;
10834     case 6: /* SHA256SU1 */
10835         genfn = gen_helper_crypto_sha256su1;
10836         break;
10837     default:
10838         unallocated_encoding(s);
10839         return;
10840     }
10841
10842     if (!arm_dc_feature(s, feature)) {
10843         unallocated_encoding(s);
10844         return;
10845     }
10846
10847     tcg_rd_regno = tcg_const_i32(rd << 1);
10848     tcg_rn_regno = tcg_const_i32(rn << 1);
10849     tcg_rm_regno = tcg_const_i32(rm << 1);
10850
10851     if (genfn) {
10852         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10853     } else {
10854         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10855
10856         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10857                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10858         tcg_temp_free_i32(tcg_opcode);
10859     }
10860
10861     tcg_temp_free_i32(tcg_rd_regno);
10862     tcg_temp_free_i32(tcg_rn_regno);
10863     tcg_temp_free_i32(tcg_rm_regno);
10864 }
10865
10866 /* C3.6.21 Crypto two-reg SHA
10867  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10868  * +-----------------+------+-----------+--------+-----+------+------+
10869  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10870  * +-----------------+------+-----------+--------+-----+------+------+
10871  */
10872 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10873 {
10874     int size = extract32(insn, 22, 2);
10875     int opcode = extract32(insn, 12, 5);
10876     int rn = extract32(insn, 5, 5);
10877     int rd = extract32(insn, 0, 5);
10878     CryptoTwoOpEnvFn *genfn;
10879     int feature;
10880     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10881
10882     if (size != 0) {
10883         unallocated_encoding(s);
10884         return;
10885     }
10886
10887     switch (opcode) {
10888     case 0: /* SHA1H */
10889         feature = ARM_FEATURE_V8_SHA1;
10890         genfn = gen_helper_crypto_sha1h;
10891         break;
10892     case 1: /* SHA1SU1 */
10893         feature = ARM_FEATURE_V8_SHA1;
10894         genfn = gen_helper_crypto_sha1su1;
10895         break;
10896     case 2: /* SHA256SU0 */
10897         feature = ARM_FEATURE_V8_SHA256;
10898         genfn = gen_helper_crypto_sha256su0;
10899         break;
10900     default:
10901         unallocated_encoding(s);
10902         return;
10903     }
10904
10905     if (!arm_dc_feature(s, feature)) {
10906         unallocated_encoding(s);
10907         return;
10908     }
10909
10910     tcg_rd_regno = tcg_const_i32(rd << 1);
10911     tcg_rn_regno = tcg_const_i32(rn << 1);
10912
10913     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10914
10915     tcg_temp_free_i32(tcg_rd_regno);
10916     tcg_temp_free_i32(tcg_rn_regno);
10917 }
10918
10919 /* C3.6 Data processing - SIMD, inc Crypto
10920  *
10921  * As the decode gets a little complex we are using a table based
10922  * approach for this part of the decode.
10923  */
10924 static const AArch64DecodeTable data_proc_simd[] = {
10925     /* pattern  ,  mask     ,  fn                        */
10926     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10927     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10928     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10929     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10930     { 0x0e000400, 0x9fe08400, disas_simd_copy },
10931     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10932     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10933     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10934     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10935     { 0x0e000000, 0xbf208c00, disas_simd_tb },
10936     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10937     { 0x2e000000, 0xbf208400, disas_simd_ext },
10938     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10939     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10940     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10941     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10942     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10943     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10944     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10945     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10946     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10947     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10948     { 0x00000000, 0x00000000, NULL }
10949 };
10950
10951 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10952 {
10953     /* Note that this is called with all non-FP cases from
10954      * table C3-6 so it must UNDEF for entries not specifically
10955      * allocated to instructions in that table.
10956      */
10957     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10958     if (fn) {
10959         fn(s, insn);
10960     } else {
10961         unallocated_encoding(s);
10962     }
10963 }
10964
10965 /* C3.6 Data processing - SIMD and floating point */
10966 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10967 {
10968     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10969         disas_data_proc_fp(s, insn);
10970     } else {
10971         /* SIMD, including crypto */
10972         disas_data_proc_simd(s, insn);
10973     }
10974 }
10975
10976 /* C3.1 A64 instruction index by encoding */
10977 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10978 {
10979     uint32_t insn;
10980
10981     insn = arm_ldl_code(env, s->pc, s->bswap_code);
10982     s->insn = insn;
10983     s->pc += 4;
10984
10985     s->fp_access_checked = false;
10986
10987     switch (extract32(insn, 25, 4)) {
10988     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10989         unallocated_encoding(s);
10990         break;
10991     case 0x8: case 0x9: /* Data processing - immediate */
10992         disas_data_proc_imm(s, insn);
10993         break;
10994     case 0xa: case 0xb: /* Branch, exception generation and system insns */
10995         disas_b_exc_sys(s, insn);
10996         break;
10997     case 0x4:
10998     case 0x6:
10999     case 0xc:
11000     case 0xe:      /* Loads and stores */
11001         disas_ldst(s, insn);
11002         break;
11003     case 0x5:
11004     case 0xd:      /* Data processing - register */
11005         disas_data_proc_reg(s, insn);
11006         break;
11007     case 0x7:
11008     case 0xf:      /* Data processing - SIMD and floating point */
11009         disas_data_proc_simd_fp(s, insn);
11010         break;
11011     default:
11012         assert(FALSE); /* all 15 cases should be handled above */
11013         break;
11014     }
11015
11016     /* if we allocated any temporaries, free them here */
11017     free_tmp_a64(s);
11018 }
11019
11020 void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11021 {
11022     CPUState *cs = CPU(cpu);
11023     CPUARMState *env = &cpu->env;
11024     DisasContext dc1, *dc = &dc1;
11025     target_ulong pc_start;
11026     target_ulong next_page_start;
11027     int num_insns;
11028     int max_insns;
11029
11030     pc_start = tb->pc;
11031
11032     dc->tb = tb;
11033
11034     dc->is_jmp = DISAS_NEXT;
11035     dc->pc = pc_start;
11036     dc->singlestep_enabled = cs->singlestep_enabled;
11037     dc->condjmp = 0;
11038
11039     dc->aarch64 = 1;
11040     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11041      * there is no secure EL1, so we route exceptions to EL3.
11042      */
11043     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11044                                !arm_el_is_aa64(env, 3);
11045     dc->thumb = 0;
11046     dc->bswap_code = 0;
11047     dc->condexec_mask = 0;
11048     dc->condexec_cond = 0;
11049     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11050     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11051 #if !defined(CONFIG_USER_ONLY)
11052     dc->user = (dc->current_el == 0);
11053 #endif
11054     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11055     dc->vec_len = 0;
11056     dc->vec_stride = 0;
11057     dc->cp_regs = cpu->cp_regs;
11058     dc->features = env->features;
11059
11060     /* Single step state. The code-generation logic here is:
11061      *  SS_ACTIVE == 0:
11062      *   generate code with no special handling for single-stepping (except
11063      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11064      *   this happens anyway because those changes are all system register or
11065      *   PSTATE writes).
11066      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11067      *   emit code for one insn
11068      *   emit code to clear PSTATE.SS
11069      *   emit code to generate software step exception for completed step
11070      *   end TB (as usual for having generated an exception)
11071      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11072      *   emit code to generate a software step exception
11073      *   end the TB
11074      */
11075     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11076     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11077     dc->is_ldex = false;
11078     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11079
11080     init_tmp_a64_array(dc);
11081
11082     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11083     num_insns = 0;
11084     max_insns = tb->cflags & CF_COUNT_MASK;
11085     if (max_insns == 0) {
11086         max_insns = CF_COUNT_MASK;
11087     }
11088     if (max_insns > TCG_MAX_INSNS) {
11089         max_insns = TCG_MAX_INSNS;
11090     }
11091
11092     gen_tb_start(tb);
11093
11094     tcg_clear_temp_count();
11095
11096     do {
11097         tcg_gen_insn_start(dc->pc, 0);
11098         num_insns++;
11099
11100         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11101             CPUBreakpoint *bp;
11102             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11103                 if (bp->pc == dc->pc) {
11104                     if (bp->flags & BP_CPU) {
11105                         gen_helper_check_breakpoints(cpu_env);
11106                         /* End the TB early; it likely won't be executed */
11107                         dc->is_jmp = DISAS_UPDATE;
11108                     } else {
11109                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11110                         /* The address covered by the breakpoint must be
11111                            included in [tb->pc, tb->pc + tb->size) in order
11112                            to for it to be properly cleared -- thus we
11113                            increment the PC here so that the logic setting
11114                            tb->size below does the right thing.  */
11115                         dc->pc += 4;
11116                         goto done_generating;
11117                     }
11118                     break;
11119                 }
11120             }
11121         }
11122
11123         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11124             gen_io_start();
11125         }
11126
11127         if (dc->ss_active && !dc->pstate_ss) {
11128             /* Singlestep state is Active-pending.
11129              * If we're in this state at the start of a TB then either
11130              *  a) we just took an exception to an EL which is being debugged
11131              *     and this is the first insn in the exception handler
11132              *  b) debug exceptions were masked and we just unmasked them
11133              *     without changing EL (eg by clearing PSTATE.D)
11134              * In either case we're going to take a swstep exception in the
11135              * "did not step an insn" case, and so the syndrome ISV and EX
11136              * bits should be zero.
11137              */
11138             assert(num_insns == 1);
11139             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11140                           default_exception_el(dc));
11141             dc->is_jmp = DISAS_EXC;
11142             break;
11143         }
11144
11145         disas_a64_insn(env, dc);
11146
11147         if (tcg_check_temp_count()) {
11148             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11149                     dc->pc);
11150         }
11151
11152         /* Translation stops when a conditional branch is encountered.
11153          * Otherwise the subsequent code could get translated several times.
11154          * Also stop translation when a page boundary is reached.  This
11155          * ensures prefetch aborts occur at the right place.
11156          */
11157     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11158              !cs->singlestep_enabled &&
11159              !singlestep &&
11160              !dc->ss_active &&
11161              dc->pc < next_page_start &&
11162              num_insns < max_insns);
11163
11164     if (tb->cflags & CF_LAST_IO) {
11165         gen_io_end();
11166     }
11167
11168     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11169         && dc->is_jmp != DISAS_EXC) {
11170         /* Note that this means single stepping WFI doesn't halt the CPU.
11171          * For conditional branch insns this is harmless unreachable code as
11172          * gen_goto_tb() has already handled emitting the debug exception
11173          * (and thus a tb-jump is not possible when singlestepping).
11174          */
11175         assert(dc->is_jmp != DISAS_TB_JUMP);
11176         if (dc->is_jmp != DISAS_JUMP) {
11177             gen_a64_set_pc_im(dc->pc);
11178         }
11179         if (cs->singlestep_enabled) {
11180             gen_exception_internal(EXCP_DEBUG);
11181         } else {
11182             gen_step_complete_exception(dc);
11183         }
11184     } else {
11185         switch (dc->is_jmp) {
11186         case DISAS_NEXT:
11187             gen_goto_tb(dc, 1, dc->pc);
11188             break;
11189         default:
11190         case DISAS_UPDATE:
11191             gen_a64_set_pc_im(dc->pc);
11192             /* fall through */
11193         case DISAS_JUMP:
11194             /* indicate that the hash table must be used to find the next TB */
11195             tcg_gen_exit_tb(0);
11196             break;
11197         case DISAS_TB_JUMP:
11198         case DISAS_EXC:
11199         case DISAS_SWI:
11200             break;
11201         case DISAS_WFE:
11202             gen_a64_set_pc_im(dc->pc);
11203             gen_helper_wfe(cpu_env);
11204             break;
11205         case DISAS_YIELD:
11206             gen_a64_set_pc_im(dc->pc);
11207             gen_helper_yield(cpu_env);
11208             break;
11209         case DISAS_WFI:
11210             /* This is a special case because we don't want to just halt the CPU
11211              * if trying to debug across a WFI.
11212              */
11213             gen_a64_set_pc_im(dc->pc);
11214             gen_helper_wfi(cpu_env);
11215             /* The helper doesn't necessarily throw an exception, but we
11216              * must go back to the main loop to check for interrupts anyway.
11217              */
11218             tcg_gen_exit_tb(0);
11219             break;
11220         }
11221     }
11222
11223 done_generating:
11224     gen_tb_end(tb, num_insns);
11225
11226 #ifdef DEBUG_DISAS
11227     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11228         qemu_log("----------------\n");
11229         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11230         log_target_disas(cs, pc_start, dc->pc - pc_start,
11231                          4 | (dc->bswap_code << 1));
11232         qemu_log("\n");
11233     }
11234 #endif
11235     tb->size = dc->pc - pc_start;
11236     tb->icount = num_insns;
11237 }