target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include <stdarg.h>
  20 #include <stdlib.h>
  21 #include <stdio.h>
  22 #include <string.h>
  23 #include <inttypes.h>
  24
  25 #include "cpu.h"
  26 #include "tcg-op.h"
  27 #include "qemu/log.h"
  28 #include "arm_ldst.h"
  29 #include "translate.h"
  30 #include "internals.h"
  31 #include "qemu/host-utils.h"
  32
  33 #include "exec/semihost.h"
  34 #include "exec/gen-icount.h"
  35
  36 #include "exec/helper-proto.h"
  37 #include "exec/helper-gen.h"
  38
  39 #include "trace-tcg.h"
  40
  41 static TCGv_i64 cpu_X[32];
  42 static TCGv_i64 cpu_pc;
  43
  44 /* Load/store exclusive handling */
  45 static TCGv_i64 cpu_exclusive_high;
  46
  47 static const char *regnames[] = {
  48     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52 };
  53
  54 enum a64_shift_type {
  55     A64_SHIFT_TYPE_LSL = 0,
  56     A64_SHIFT_TYPE_LSR = 1,
  57     A64_SHIFT_TYPE_ASR = 2,
  58     A64_SHIFT_TYPE_ROR = 3
  59 };
  60
  61 /* Table based decoder typedefs - used when the relevant bits for decode
  62  * are too awkwardly scattered across the instruction (eg SIMD).
  63  */
  64 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66 typedef struct AArch64DecodeTable {
  67     uint32_t pattern;
  68     uint32_t mask;
  69     AArch64DecodeFn *disas_fn;
  70 } AArch64DecodeTable;
  71
  72 /* Function prototype for gen_ functions for calling Neon helpers */
  73 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  85 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  86
  87 /* initialize TCG globals.  */
  88 void a64_translate_init(void)
  89 {
  90     int i;
  91
  92     cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
  93                                     offsetof(CPUARMState, pc),
  94                                     "pc");
  95     for (i = 0; i < 32; i++) {
  96         cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
  97                                           offsetof(CPUARMState, xregs[i]),
  98                                           regnames[i]);
  99     }
 100
 101     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
 102         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 103 }
 104
 105 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 106 {
 107     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 108      *  if EL1, access as if EL0; otherwise access at current EL
 109      */
 110     switch (s->mmu_idx) {
 111     case ARMMMUIdx_S12NSE1:
 112         return ARMMMUIdx_S12NSE0;
 113     case ARMMMUIdx_S1SE1:
 114         return ARMMMUIdx_S1SE0;
 115     case ARMMMUIdx_S2NS:
 116         g_assert_not_reached();
 117     default:
 118         return s->mmu_idx;
 119     }
 120 }
 121
 122 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 123                             fprintf_function cpu_fprintf, int flags)
 124 {
 125     ARMCPU *cpu = ARM_CPU(cs);
 126     CPUARMState *env = &cpu->env;
 127     uint32_t psr = pstate_read(env);
 128     int i;
 129     int el = arm_current_el(env);
 130     const char *ns_status;
 131
 132     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 133             env->pc, env->xregs[31]);
 134     for (i = 0; i < 31; i++) {
 135         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 136         if ((i % 4) == 3) {
 137             cpu_fprintf(f, "\n");
 138         } else {
 139             cpu_fprintf(f, " ");
 140         }
 141     }
 142
 143     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 144         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 145     } else {
 146         ns_status = "";
 147     }
 148
 149     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 150                 psr,
 151                 psr & PSTATE_N ? 'N' : '-',
 152                 psr & PSTATE_Z ? 'Z' : '-',
 153                 psr & PSTATE_C ? 'C' : '-',
 154                 psr & PSTATE_V ? 'V' : '-',
 155                 ns_status,
 156                 el,
 157                 psr & PSTATE_SP ? 'h' : 't');
 158
 159     if (flags & CPU_DUMP_FPU) {
 160         int numvfpregs = 32;
 161         for (i = 0; i < numvfpregs; i += 2) {
 162             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 163             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 164             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 165                         i, vhi, vlo);
 166             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 167             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 168             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 169                         i + 1, vhi, vlo);
 170         }
 171         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 172                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 173     }
 174 }
 175
 176 void gen_a64_set_pc_im(uint64_t val)
 177 {
 178     tcg_gen_movi_i64(cpu_pc, val);
 179 }
 180
 181 typedef struct DisasCompare64 {
 182     TCGCond cond;
 183     TCGv_i64 value;
 184 } DisasCompare64;
 185
 186 static void a64_test_cc(DisasCompare64 *c64, int cc)
 187 {
 188     DisasCompare c32;
 189
 190     arm_test_cc(&c32, cc);
 191
 192     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 193        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 194     c64->cond = c32.cond;
 195     c64->value = tcg_temp_new_i64();
 196     tcg_gen_ext_i32_i64(c64->value, c32.value);
 197
 198     arm_free_cc(&c32);
 199 }
 200
 201 static void a64_free_cc(DisasCompare64 *c64)
 202 {
 203     tcg_temp_free_i64(c64->value);
 204 }
 205
 206 static void gen_exception_internal(int excp)
 207 {
 208     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 209
 210     assert(excp_is_internal(excp));
 211     gen_helper_exception_internal(cpu_env, tcg_excp);
 212     tcg_temp_free_i32(tcg_excp);
 213 }
 214
 215 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 216 {
 217     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 218     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 219     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 220
 221     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 222                                        tcg_syn, tcg_el);
 223     tcg_temp_free_i32(tcg_el);
 224     tcg_temp_free_i32(tcg_syn);
 225     tcg_temp_free_i32(tcg_excp);
 226 }
 227
 228 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 229 {
 230     gen_a64_set_pc_im(s->pc - offset);
 231     gen_exception_internal(excp);
 232     s->is_jmp = DISAS_EXC;
 233 }
 234
 235 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 236                                uint32_t syndrome, uint32_t target_el)
 237 {
 238     gen_a64_set_pc_im(s->pc - offset);
 239     gen_exception(excp, syndrome, target_el);
 240     s->is_jmp = DISAS_EXC;
 241 }
 242
 243 static void gen_ss_advance(DisasContext *s)
 244 {
 245     /* If the singlestep state is Active-not-pending, advance to
 246      * Active-pending.
 247      */
 248     if (s->ss_active) {
 249         s->pstate_ss = 0;
 250         gen_helper_clear_pstate_ss(cpu_env);
 251     }
 252 }
 253
 254 static void gen_step_complete_exception(DisasContext *s)
 255 {
 256     /* We just completed step of an insn. Move from Active-not-pending
 257      * to Active-pending, and then also take the swstep exception.
 258      * This corresponds to making the (IMPDEF) choice to prioritize
 259      * swstep exceptions over asynchronous exceptions taken to an exception
 260      * level where debug is disabled. This choice has the advantage that
 261      * we do not need to maintain internal state corresponding to the
 262      * ISV/EX syndrome bits between completion of the step and generation
 263      * of the exception, and our syndrome information is always correct.
 264      */
 265     gen_ss_advance(s);
 266     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 267                   default_exception_el(s));
 268     s->is_jmp = DISAS_EXC;
 269 }
 270
 271 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 272 {
 273     /* No direct tb linking with singlestep (either QEMU's or the ARM
 274      * debug architecture kind) or deterministic io
 275      */
 276     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 277         return false;
 278     }
 279
 280     /* Only link tbs from inside the same guest page */
 281     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 282         return false;
 283     }
 284
 285     return true;
 286 }
 287
 288 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 289 {
 290     TranslationBlock *tb;
 291
 292     tb = s->tb;
 293     if (use_goto_tb(s, n, dest)) {
 294         tcg_gen_goto_tb(n);
 295         gen_a64_set_pc_im(dest);
 296         tcg_gen_exit_tb((intptr_t)tb + n);
 297         s->is_jmp = DISAS_TB_JUMP;
 298     } else {
 299         gen_a64_set_pc_im(dest);
 300         if (s->ss_active) {
 301             gen_step_complete_exception(s);
 302         } else if (s->singlestep_enabled) {
 303             gen_exception_internal(EXCP_DEBUG);
 304         } else {
 305             tcg_gen_exit_tb(0);
 306             s->is_jmp = DISAS_TB_JUMP;
 307         }
 308     }
 309 }
 310
 311 static void unallocated_encoding(DisasContext *s)
 312 {
 313     /* Unallocated and reserved encodings are uncategorized */
 314     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 315                        default_exception_el(s));
 316 }
 317
 318 #define unsupported_encoding(s, insn)                                    \
 319     do {                                                                 \
 320         qemu_log_mask(LOG_UNIMP,                                         \
 321                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 322                       "at pc=%016" PRIx64 "\n",                          \
 323                       __FILE__, __LINE__, insn, s->pc - 4);              \
 324         unallocated_encoding(s);                                         \
 325     } while (0);
 326
 327 static void init_tmp_a64_array(DisasContext *s)
 328 {
 329 #ifdef CONFIG_DEBUG_TCG
 330     int i;
 331     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 332         TCGV_UNUSED_I64(s->tmp_a64[i]);
 333     }
 334 #endif
 335     s->tmp_a64_count = 0;
 336 }
 337
 338 static void free_tmp_a64(DisasContext *s)
 339 {
 340     int i;
 341     for (i = 0; i < s->tmp_a64_count; i++) {
 342         tcg_temp_free_i64(s->tmp_a64[i]);
 343     }
 344     init_tmp_a64_array(s);
 345 }
 346
 347 static TCGv_i64 new_tmp_a64(DisasContext *s)
 348 {
 349     assert(s->tmp_a64_count < TMP_A64_MAX);
 350     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 351 }
 352
 353 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 354 {
 355     TCGv_i64 t = new_tmp_a64(s);
 356     tcg_gen_movi_i64(t, 0);
 357     return t;
 358 }
 359
 360 /*
 361  * Register access functions
 362  *
 363  * These functions are used for directly accessing a register in where
 364  * changes to the final register value are likely to be made. If you
 365  * need to use a register for temporary calculation (e.g. index type
 366  * operations) use the read_* form.
 367  *
 368  * B1.2.1 Register mappings
 369  *
 370  * In instruction register encoding 31 can refer to ZR (zero register) or
 371  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 372  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 373  * This is the point of the _sp forms.
 374  */
 375 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 376 {
 377     if (reg == 31) {
 378         return new_tmp_a64_zero(s);
 379     } else {
 380         return cpu_X[reg];
 381     }
 382 }
 383
 384 /* register access for when 31 == SP */
 385 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 386 {
 387     return cpu_X[reg];
 388 }
 389
 390 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 391  * representing the register contents. This TCGv is an auto-freed
 392  * temporary so it need not be explicitly freed, and may be modified.
 393  */
 394 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 395 {
 396     TCGv_i64 v = new_tmp_a64(s);
 397     if (reg != 31) {
 398         if (sf) {
 399             tcg_gen_mov_i64(v, cpu_X[reg]);
 400         } else {
 401             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 402         }
 403     } else {
 404         tcg_gen_movi_i64(v, 0);
 405     }
 406     return v;
 407 }
 408
 409 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 410 {
 411     TCGv_i64 v = new_tmp_a64(s);
 412     if (sf) {
 413         tcg_gen_mov_i64(v, cpu_X[reg]);
 414     } else {
 415         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 416     }
 417     return v;
 418 }
 419
 420 /* We should have at some point before trying to access an FP register
 421  * done the necessary access check, so assert that
 422  * (a) we did the check and
 423  * (b) we didn't then just plough ahead anyway if it failed.
 424  * Print the instruction pattern in the abort message so we can figure
 425  * out what we need to fix if a user encounters this problem in the wild.
 426  */
 427 static inline void assert_fp_access_checked(DisasContext *s)
 428 {
 429 #ifdef CONFIG_DEBUG_TCG
 430     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 431         fprintf(stderr, "target-arm: FP access check missing for "
 432                 "instruction 0x%08x\n", s->insn);
 433         abort();
 434     }
 435 #endif
 436 }
 437
 438 /* Return the offset into CPUARMState of an element of specified
 439  * size, 'element' places in from the least significant end of
 440  * the FP/vector register Qn.
 441  */
 442 static inline int vec_reg_offset(DisasContext *s, int regno,
 443                                  int element, TCGMemOp size)
 444 {
 445     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 446 #ifdef HOST_WORDS_BIGENDIAN
 447     /* This is complicated slightly because vfp.regs[2n] is
 448      * still the low half and  vfp.regs[2n+1] the high half
 449      * of the 128 bit vector, even on big endian systems.
 450      * Calculate the offset assuming a fully bigendian 128 bits,
 451      * then XOR to account for the order of the two 64 bit halves.
 452      */
 453     offs += (16 - ((element + 1) * (1 << size)));
 454     offs ^= 8;
 455 #else
 456     offs += element * (1 << size);
 457 #endif
 458     assert_fp_access_checked(s);
 459     return offs;
 460 }
 461
 462 /* Return the offset into CPUARMState of a slice (from
 463  * the least significant end) of FP register Qn (ie
 464  * Dn, Sn, Hn or Bn).
 465  * (Note that this is not the same mapping as for A32; see cpu.h)
 466  */
 467 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 468 {
 469     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 470 #ifdef HOST_WORDS_BIGENDIAN
 471     offs += (8 - (1 << size));
 472 #endif
 473     assert_fp_access_checked(s);
 474     return offs;
 475 }
 476
 477 /* Offset of the high half of the 128 bit vector Qn */
 478 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 479 {
 480     assert_fp_access_checked(s);
 481     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 482 }
 483
 484 /* Convenience accessors for reading and writing single and double
 485  * FP registers. Writing clears the upper parts of the associated
 486  * 128 bit vector register, as required by the architecture.
 487  * Note that unlike the GP register accessors, the values returned
 488  * by the read functions must be manually freed.
 489  */
 490 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 491 {
 492     TCGv_i64 v = tcg_temp_new_i64();
 493
 494     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 495     return v;
 496 }
 497
 498 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 499 {
 500     TCGv_i32 v = tcg_temp_new_i32();
 501
 502     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 503     return v;
 504 }
 505
 506 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 507 {
 508     TCGv_i64 tcg_zero = tcg_const_i64(0);
 509
 510     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 511     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 512     tcg_temp_free_i64(tcg_zero);
 513 }
 514
 515 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 516 {
 517     TCGv_i64 tmp = tcg_temp_new_i64();
 518
 519     tcg_gen_extu_i32_i64(tmp, v);
 520     write_fp_dreg(s, reg, tmp);
 521     tcg_temp_free_i64(tmp);
 522 }
 523
 524 static TCGv_ptr get_fpstatus_ptr(void)
 525 {
 526     TCGv_ptr statusptr = tcg_temp_new_ptr();
 527     int offset;
 528
 529     /* In A64 all instructions (both FP and Neon) use the FPCR;
 530      * there is no equivalent of the A32 Neon "standard FPSCR value"
 531      * and all operations use vfp.fp_status.
 532      */
 533     offset = offsetof(CPUARMState, vfp.fp_status);
 534     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 535     return statusptr;
 536 }
 537
 538 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 539  * than the 32 bit equivalent.
 540  */
 541 static inline void gen_set_NZ64(TCGv_i64 result)
 542 {
 543     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 544     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 545 }
 546
 547 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 548 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 549 {
 550     if (sf) {
 551         gen_set_NZ64(result);
 552     } else {
 553         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 554         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 555     }
 556     tcg_gen_movi_i32(cpu_CF, 0);
 557     tcg_gen_movi_i32(cpu_VF, 0);
 558 }
 559
 560 /* dest = T0 + T1; compute C, N, V and Z flags */
 561 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 562 {
 563     if (sf) {
 564         TCGv_i64 result, flag, tmp;
 565         result = tcg_temp_new_i64();
 566         flag = tcg_temp_new_i64();
 567         tmp = tcg_temp_new_i64();
 568
 569         tcg_gen_movi_i64(tmp, 0);
 570         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 571
 572         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 573
 574         gen_set_NZ64(result);
 575
 576         tcg_gen_xor_i64(flag, result, t0);
 577         tcg_gen_xor_i64(tmp, t0, t1);
 578         tcg_gen_andc_i64(flag, flag, tmp);
 579         tcg_temp_free_i64(tmp);
 580         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 581
 582         tcg_gen_mov_i64(dest, result);
 583         tcg_temp_free_i64(result);
 584         tcg_temp_free_i64(flag);
 585     } else {
 586         /* 32 bit arithmetic */
 587         TCGv_i32 t0_32 = tcg_temp_new_i32();
 588         TCGv_i32 t1_32 = tcg_temp_new_i32();
 589         TCGv_i32 tmp = tcg_temp_new_i32();
 590
 591         tcg_gen_movi_i32(tmp, 0);
 592         tcg_gen_extrl_i64_i32(t0_32, t0);
 593         tcg_gen_extrl_i64_i32(t1_32, t1);
 594         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 595         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 596         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 597         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 598         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 599         tcg_gen_extu_i32_i64(dest, cpu_NF);
 600
 601         tcg_temp_free_i32(tmp);
 602         tcg_temp_free_i32(t0_32);
 603         tcg_temp_free_i32(t1_32);
 604     }
 605 }
 606
 607 /* dest = T0 - T1; compute C, N, V and Z flags */
 608 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 609 {
 610     if (sf) {
 611         /* 64 bit arithmetic */
 612         TCGv_i64 result, flag, tmp;
 613
 614         result = tcg_temp_new_i64();
 615         flag = tcg_temp_new_i64();
 616         tcg_gen_sub_i64(result, t0, t1);
 617
 618         gen_set_NZ64(result);
 619
 620         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 621         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 622
 623         tcg_gen_xor_i64(flag, result, t0);
 624         tmp = tcg_temp_new_i64();
 625         tcg_gen_xor_i64(tmp, t0, t1);
 626         tcg_gen_and_i64(flag, flag, tmp);
 627         tcg_temp_free_i64(tmp);
 628         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 629         tcg_gen_mov_i64(dest, result);
 630         tcg_temp_free_i64(flag);
 631         tcg_temp_free_i64(result);
 632     } else {
 633         /* 32 bit arithmetic */
 634         TCGv_i32 t0_32 = tcg_temp_new_i32();
 635         TCGv_i32 t1_32 = tcg_temp_new_i32();
 636         TCGv_i32 tmp;
 637
 638         tcg_gen_extrl_i64_i32(t0_32, t0);
 639         tcg_gen_extrl_i64_i32(t1_32, t1);
 640         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 641         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 642         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 643         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 644         tmp = tcg_temp_new_i32();
 645         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 646         tcg_temp_free_i32(t0_32);
 647         tcg_temp_free_i32(t1_32);
 648         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 649         tcg_temp_free_i32(tmp);
 650         tcg_gen_extu_i32_i64(dest, cpu_NF);
 651     }
 652 }
 653
 654 /* dest = T0 + T1 + CF; do not compute flags. */
 655 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 656 {
 657     TCGv_i64 flag = tcg_temp_new_i64();
 658     tcg_gen_extu_i32_i64(flag, cpu_CF);
 659     tcg_gen_add_i64(dest, t0, t1);
 660     tcg_gen_add_i64(dest, dest, flag);
 661     tcg_temp_free_i64(flag);
 662
 663     if (!sf) {
 664         tcg_gen_ext32u_i64(dest, dest);
 665     }
 666 }
 667
 668 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 669 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 670 {
 671     if (sf) {
 672         TCGv_i64 result, cf_64, vf_64, tmp;
 673         result = tcg_temp_new_i64();
 674         cf_64 = tcg_temp_new_i64();
 675         vf_64 = tcg_temp_new_i64();
 676         tmp = tcg_const_i64(0);
 677
 678         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 679         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 680         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 681         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 682         gen_set_NZ64(result);
 683
 684         tcg_gen_xor_i64(vf_64, result, t0);
 685         tcg_gen_xor_i64(tmp, t0, t1);
 686         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 687         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 688
 689         tcg_gen_mov_i64(dest, result);
 690
 691         tcg_temp_free_i64(tmp);
 692         tcg_temp_free_i64(vf_64);
 693         tcg_temp_free_i64(cf_64);
 694         tcg_temp_free_i64(result);
 695     } else {
 696         TCGv_i32 t0_32, t1_32, tmp;
 697         t0_32 = tcg_temp_new_i32();
 698         t1_32 = tcg_temp_new_i32();
 699         tmp = tcg_const_i32(0);
 700
 701         tcg_gen_extrl_i64_i32(t0_32, t0);
 702         tcg_gen_extrl_i64_i32(t1_32, t1);
 703         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 704         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 705
 706         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 707         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 708         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 709         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 710         tcg_gen_extu_i32_i64(dest, cpu_NF);
 711
 712         tcg_temp_free_i32(tmp);
 713         tcg_temp_free_i32(t1_32);
 714         tcg_temp_free_i32(t0_32);
 715     }
 716 }
 717
 718 /*
 719  * Load/Store generators
 720  */
 721
 722 /*
 723  * Store from GPR register to memory.
 724  */
 725 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 726                              TCGv_i64 tcg_addr, int size, int memidx)
 727 {
 728     g_assert(size <= 3);
 729     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
 730 }
 731
 732 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 733                       TCGv_i64 tcg_addr, int size)
 734 {
 735     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 736 }
 737
 738 /*
 739  * Load from memory to GPR register
 740  */
 741 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 742                              int size, bool is_signed, bool extend, int memidx)
 743 {
 744     TCGMemOp memop = MO_TE + size;
 745
 746     g_assert(size <= 3);
 747
 748     if (is_signed) {
 749         memop += MO_SIGN;
 750     }
 751
 752     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 753
 754     if (extend && is_signed) {
 755         g_assert(size < 3);
 756         tcg_gen_ext32u_i64(dest, dest);
 757     }
 758 }
 759
 760 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 761                       int size, bool is_signed, bool extend)
 762 {
 763     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 764                      get_mem_index(s));
 765 }
 766
 767 /*
 768  * Store from FP register to memory
 769  */
 770 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 771 {
 772     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 773     TCGv_i64 tmp = tcg_temp_new_i64();
 774     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 775     if (size < 4) {
 776         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
 777     } else {
 778         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 779         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
 780         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 781         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 782         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 783         tcg_temp_free_i64(tcg_hiaddr);
 784     }
 785
 786     tcg_temp_free_i64(tmp);
 787 }
 788
 789 /*
 790  * Load from memory to FP register
 791  */
 792 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 793 {
 794     /* This always zero-extends and writes to a full 128 bit wide vector */
 795     TCGv_i64 tmplo = tcg_temp_new_i64();
 796     TCGv_i64 tmphi;
 797
 798     if (size < 4) {
 799         TCGMemOp memop = MO_TE + size;
 800         tmphi = tcg_const_i64(0);
 801         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 802     } else {
 803         TCGv_i64 tcg_hiaddr;
 804         tmphi = tcg_temp_new_i64();
 805         tcg_hiaddr = tcg_temp_new_i64();
 806
 807         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
 808         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 809         tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 810         tcg_temp_free_i64(tcg_hiaddr);
 811     }
 812
 813     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 814     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 815
 816     tcg_temp_free_i64(tmplo);
 817     tcg_temp_free_i64(tmphi);
 818 }
 819
 820 /*
 821  * Vector load/store helpers.
 822  *
 823  * The principal difference between this and a FP load is that we don't
 824  * zero extend as we are filling a partial chunk of the vector register.
 825  * These functions don't support 128 bit loads/stores, which would be
 826  * normal load/store operations.
 827  *
 828  * The _i32 versions are useful when operating on 32 bit quantities
 829  * (eg for floating point single or using Neon helper functions).
 830  */
 831
 832 /* Get value of an element within a vector register */
 833 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 834                              int element, TCGMemOp memop)
 835 {
 836     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 837     switch (memop) {
 838     case MO_8:
 839         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 840         break;
 841     case MO_16:
 842         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 843         break;
 844     case MO_32:
 845         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 846         break;
 847     case MO_8|MO_SIGN:
 848         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 849         break;
 850     case MO_16|MO_SIGN:
 851         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 852         break;
 853     case MO_32|MO_SIGN:
 854         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 855         break;
 856     case MO_64:
 857     case MO_64|MO_SIGN:
 858         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 859         break;
 860     default:
 861         g_assert_not_reached();
 862     }
 863 }
 864
 865 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 866                                  int element, TCGMemOp memop)
 867 {
 868     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 869     switch (memop) {
 870     case MO_8:
 871         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 872         break;
 873     case MO_16:
 874         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 875         break;
 876     case MO_8|MO_SIGN:
 877         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 878         break;
 879     case MO_16|MO_SIGN:
 880         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 881         break;
 882     case MO_32:
 883     case MO_32|MO_SIGN:
 884         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 885         break;
 886     default:
 887         g_assert_not_reached();
 888     }
 889 }
 890
 891 /* Set value of an element within a vector register */
 892 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 893                               int element, TCGMemOp memop)
 894 {
 895     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 896     switch (memop) {
 897     case MO_8:
 898         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 899         break;
 900     case MO_16:
 901         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 902         break;
 903     case MO_32:
 904         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 905         break;
 906     case MO_64:
 907         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 908         break;
 909     default:
 910         g_assert_not_reached();
 911     }
 912 }
 913
 914 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 915                                   int destidx, int element, TCGMemOp memop)
 916 {
 917     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 918     switch (memop) {
 919     case MO_8:
 920         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 921         break;
 922     case MO_16:
 923         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 924         break;
 925     case MO_32:
 926         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 927         break;
 928     default:
 929         g_assert_not_reached();
 930     }
 931 }
 932
 933 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 934  * vector ops all need to do this).
 935  */
 936 static void clear_vec_high(DisasContext *s, int rd)
 937 {
 938     TCGv_i64 tcg_zero = tcg_const_i64(0);
 939
 940     write_vec_element(s, tcg_zero, rd, 1, MO_64);
 941     tcg_temp_free_i64(tcg_zero);
 942 }
 943
 944 /* Store from vector register to memory */
 945 static void do_vec_st(DisasContext *s, int srcidx, int element,
 946                       TCGv_i64 tcg_addr, int size)
 947 {
 948     TCGMemOp memop = MO_TE + size;
 949     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 950
 951     read_vec_element(s, tcg_tmp, srcidx, element, size);
 952     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 953
 954     tcg_temp_free_i64(tcg_tmp);
 955 }
 956
 957 /* Load from memory to vector register */
 958 static void do_vec_ld(DisasContext *s, int destidx, int element,
 959                       TCGv_i64 tcg_addr, int size)
 960 {
 961     TCGMemOp memop = MO_TE + size;
 962     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 963
 964     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 965     write_vec_element(s, tcg_tmp, destidx, element, size);
 966
 967     tcg_temp_free_i64(tcg_tmp);
 968 }
 969
 970 /* Check that FP/Neon access is enabled. If it is, return
 971  * true. If not, emit code to generate an appropriate exception,
 972  * and return false; the caller should not emit any code for
 973  * the instruction. Note that this check must happen after all
 974  * unallocated-encoding checks (otherwise the syndrome information
 975  * for the resulting exception will be incorrect).
 976  */
 977 static inline bool fp_access_check(DisasContext *s)
 978 {
 979     assert(!s->fp_access_checked);
 980     s->fp_access_checked = true;
 981
 982     if (!s->fp_excp_el) {
 983         return true;
 984     }
 985
 986     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
 987                        s->fp_excp_el);
 988     return false;
 989 }
 990
 991 /*
 992  * This utility function is for doing register extension with an
 993  * optional shift. You will likely want to pass a temporary for the
 994  * destination register. See DecodeRegExtend() in the ARM ARM.
 995  */
 996 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
 997                               int option, unsigned int shift)
 998 {
 999     int extsize = extract32(option, 0, 2);
1000     bool is_signed = extract32(option, 2, 1);
1001
1002     if (is_signed) {
1003         switch (extsize) {
1004         case 0:
1005             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1006             break;
1007         case 1:
1008             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1009             break;
1010         case 2:
1011             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1012             break;
1013         case 3:
1014             tcg_gen_mov_i64(tcg_out, tcg_in);
1015             break;
1016         }
1017     } else {
1018         switch (extsize) {
1019         case 0:
1020             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1021             break;
1022         case 1:
1023             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1024             break;
1025         case 2:
1026             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1027             break;
1028         case 3:
1029             tcg_gen_mov_i64(tcg_out, tcg_in);
1030             break;
1031         }
1032     }
1033
1034     if (shift) {
1035         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1036     }
1037 }
1038
1039 static inline void gen_check_sp_alignment(DisasContext *s)
1040 {
1041     /* The AArch64 architecture mandates that (if enabled via PSTATE
1042      * or SCTLR bits) there is a check that SP is 16-aligned on every
1043      * SP-relative load or store (with an exception generated if it is not).
1044      * In line with general QEMU practice regarding misaligned accesses,
1045      * we omit these checks for the sake of guest program performance.
1046      * This function is provided as a hook so we can more easily add these
1047      * checks in future (possibly as a "favour catching guest program bugs
1048      * over speed" user selectable option).
1049      */
1050 }
1051
1052 /*
1053  * This provides a simple table based table lookup decoder. It is
1054  * intended to be used when the relevant bits for decode are too
1055  * awkwardly placed and switch/if based logic would be confusing and
1056  * deeply nested. Since it's a linear search through the table, tables
1057  * should be kept small.
1058  *
1059  * It returns the first handler where insn & mask == pattern, or
1060  * NULL if there is no match.
1061  * The table is terminated by an empty mask (i.e. 0)
1062  */
1063 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1064                                                uint32_t insn)
1065 {
1066     const AArch64DecodeTable *tptr = table;
1067
1068     while (tptr->mask) {
1069         if ((insn & tptr->mask) == tptr->pattern) {
1070             return tptr->disas_fn;
1071         }
1072         tptr++;
1073     }
1074     return NULL;
1075 }
1076
1077 /*
1078  * the instruction disassembly implemented here matches
1079  * the instruction encoding classifications in chapter 3 (C3)
1080  * of the ARM Architecture Reference Manual (DDI0487A_a)
1081  */
1082
1083 /* C3.2.7 Unconditional branch (immediate)
1084  *   31  30       26 25                                  0
1085  * +----+-----------+-------------------------------------+
1086  * | op | 0 0 1 0 1 |                 imm26               |
1087  * +----+-----------+-------------------------------------+
1088  */
1089 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1090 {
1091     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1092
1093     if (insn & (1U << 31)) {
1094         /* C5.6.26 BL Branch with link */
1095         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1096     }
1097
1098     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1099     gen_goto_tb(s, 0, addr);
1100 }
1101
1102 /* C3.2.1 Compare & branch (immediate)
1103  *   31  30         25  24  23                  5 4      0
1104  * +----+-------------+----+---------------------+--------+
1105  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1106  * +----+-------------+----+---------------------+--------+
1107  */
1108 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1109 {
1110     unsigned int sf, op, rt;
1111     uint64_t addr;
1112     TCGLabel *label_match;
1113     TCGv_i64 tcg_cmp;
1114
1115     sf = extract32(insn, 31, 1);
1116     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1117     rt = extract32(insn, 0, 5);
1118     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1119
1120     tcg_cmp = read_cpu_reg(s, rt, sf);
1121     label_match = gen_new_label();
1122
1123     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1124                         tcg_cmp, 0, label_match);
1125
1126     gen_goto_tb(s, 0, s->pc);
1127     gen_set_label(label_match);
1128     gen_goto_tb(s, 1, addr);
1129 }
1130
1131 /* C3.2.5 Test & branch (immediate)
1132  *   31  30         25  24  23   19 18          5 4    0
1133  * +----+-------------+----+-------+-------------+------+
1134  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1135  * +----+-------------+----+-------+-------------+------+
1136  */
1137 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1138 {
1139     unsigned int bit_pos, op, rt;
1140     uint64_t addr;
1141     TCGLabel *label_match;
1142     TCGv_i64 tcg_cmp;
1143
1144     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1145     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1146     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1147     rt = extract32(insn, 0, 5);
1148
1149     tcg_cmp = tcg_temp_new_i64();
1150     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1151     label_match = gen_new_label();
1152     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1153                         tcg_cmp, 0, label_match);
1154     tcg_temp_free_i64(tcg_cmp);
1155     gen_goto_tb(s, 0, s->pc);
1156     gen_set_label(label_match);
1157     gen_goto_tb(s, 1, addr);
1158 }
1159
1160 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1161  *  31           25  24  23                  5   4  3    0
1162  * +---------------+----+---------------------+----+------+
1163  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1164  * +---------------+----+---------------------+----+------+
1165  */
1166 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1167 {
1168     unsigned int cond;
1169     uint64_t addr;
1170
1171     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1172         unallocated_encoding(s);
1173         return;
1174     }
1175     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1176     cond = extract32(insn, 0, 4);
1177
1178     if (cond < 0x0e) {
1179         /* genuinely conditional branches */
1180         TCGLabel *label_match = gen_new_label();
1181         arm_gen_test_cc(cond, label_match);
1182         gen_goto_tb(s, 0, s->pc);
1183         gen_set_label(label_match);
1184         gen_goto_tb(s, 1, addr);
1185     } else {
1186         /* 0xe and 0xf are both "always" conditions */
1187         gen_goto_tb(s, 0, addr);
1188     }
1189 }
1190
1191 /* C5.6.68 HINT */
1192 static void handle_hint(DisasContext *s, uint32_t insn,
1193                         unsigned int op1, unsigned int op2, unsigned int crm)
1194 {
1195     unsigned int selector = crm << 3 | op2;
1196
1197     if (op1 != 3) {
1198         unallocated_encoding(s);
1199         return;
1200     }
1201
1202     switch (selector) {
1203     case 0: /* NOP */
1204         return;
1205     case 3: /* WFI */
1206         s->is_jmp = DISAS_WFI;
1207         return;
1208     case 1: /* YIELD */
1209         s->is_jmp = DISAS_YIELD;
1210         return;
1211     case 2: /* WFE */
1212         s->is_jmp = DISAS_WFE;
1213         return;
1214     case 4: /* SEV */
1215     case 5: /* SEVL */
1216         /* we treat all as NOP at least for now */
1217         return;
1218     default:
1219         /* default specified as NOP equivalent */
1220         return;
1221     }
1222 }
1223
1224 static void gen_clrex(DisasContext *s, uint32_t insn)
1225 {
1226     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1227 }
1228
1229 /* CLREX, DSB, DMB, ISB */
1230 static void handle_sync(DisasContext *s, uint32_t insn,
1231                         unsigned int op1, unsigned int op2, unsigned int crm)
1232 {
1233     if (op1 != 3) {
1234         unallocated_encoding(s);
1235         return;
1236     }
1237
1238     switch (op2) {
1239     case 2: /* CLREX */
1240         gen_clrex(s, insn);
1241         return;
1242     case 4: /* DSB */
1243     case 5: /* DMB */
1244         /* We don't emulate caches so barriers are no-ops */
1245         return;
1246     case 6: /* ISB */
1247         /* We need to break the TB after this insn to execute
1248          * a self-modified code correctly and also to take
1249          * any pending interrupts immediately.
1250          */
1251         s->is_jmp = DISAS_UPDATE;
1252         return;
1253     default:
1254         unallocated_encoding(s);
1255         return;
1256     }
1257 }
1258
1259 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1260 static void handle_msr_i(DisasContext *s, uint32_t insn,
1261                          unsigned int op1, unsigned int op2, unsigned int crm)
1262 {
1263     int op = op1 << 3 | op2;
1264     switch (op) {
1265     case 0x05: /* SPSel */
1266         if (s->current_el == 0) {
1267             unallocated_encoding(s);
1268             return;
1269         }
1270         /* fall through */
1271     case 0x1e: /* DAIFSet */
1272     case 0x1f: /* DAIFClear */
1273     {
1274         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1275         TCGv_i32 tcg_op = tcg_const_i32(op);
1276         gen_a64_set_pc_im(s->pc - 4);
1277         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1278         tcg_temp_free_i32(tcg_imm);
1279         tcg_temp_free_i32(tcg_op);
1280         s->is_jmp = DISAS_UPDATE;
1281         break;
1282     }
1283     default:
1284         unallocated_encoding(s);
1285         return;
1286     }
1287 }
1288
1289 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1290 {
1291     TCGv_i32 tmp = tcg_temp_new_i32();
1292     TCGv_i32 nzcv = tcg_temp_new_i32();
1293
1294     /* build bit 31, N */
1295     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1296     /* build bit 30, Z */
1297     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1298     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1299     /* build bit 29, C */
1300     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1301     /* build bit 28, V */
1302     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1303     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1304     /* generate result */
1305     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1306
1307     tcg_temp_free_i32(nzcv);
1308     tcg_temp_free_i32(tmp);
1309 }
1310
1311 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1312
1313 {
1314     TCGv_i32 nzcv = tcg_temp_new_i32();
1315
1316     /* take NZCV from R[t] */
1317     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1318
1319     /* bit 31, N */
1320     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1321     /* bit 30, Z */
1322     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1323     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1324     /* bit 29, C */
1325     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1326     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1327     /* bit 28, V */
1328     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1329     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1330     tcg_temp_free_i32(nzcv);
1331 }
1332
1333 /* C5.6.129 MRS - move from system register
1334  * C5.6.131 MSR (register) - move to system register
1335  * C5.6.204 SYS
1336  * C5.6.205 SYSL
1337  * These are all essentially the same insn in 'read' and 'write'
1338  * versions, with varying op0 fields.
1339  */
1340 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1341                        unsigned int op0, unsigned int op1, unsigned int op2,
1342                        unsigned int crn, unsigned int crm, unsigned int rt)
1343 {
1344     const ARMCPRegInfo *ri;
1345     TCGv_i64 tcg_rt;
1346
1347     ri = get_arm_cp_reginfo(s->cp_regs,
1348                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1349                                                crn, crm, op0, op1, op2));
1350
1351     if (!ri) {
1352         /* Unknown register; this might be a guest error or a QEMU
1353          * unimplemented feature.
1354          */
1355         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1356                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1357                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1358         unallocated_encoding(s);
1359         return;
1360     }
1361
1362     /* Check access permissions */
1363     if (!cp_access_ok(s->current_el, ri, isread)) {
1364         unallocated_encoding(s);
1365         return;
1366     }
1367
1368     if (ri->accessfn) {
1369         /* Emit code to perform further access permissions checks at
1370          * runtime; this may result in an exception.
1371          */
1372         TCGv_ptr tmpptr;
1373         TCGv_i32 tcg_syn;
1374         uint32_t syndrome;
1375
1376         gen_a64_set_pc_im(s->pc - 4);
1377         tmpptr = tcg_const_ptr(ri);
1378         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1379         tcg_syn = tcg_const_i32(syndrome);
1380         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
1381         tcg_temp_free_ptr(tmpptr);
1382         tcg_temp_free_i32(tcg_syn);
1383     }
1384
1385     /* Handle special cases first */
1386     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1387     case ARM_CP_NOP:
1388         return;
1389     case ARM_CP_NZCV:
1390         tcg_rt = cpu_reg(s, rt);
1391         if (isread) {
1392             gen_get_nzcv(tcg_rt);
1393         } else {
1394             gen_set_nzcv(tcg_rt);
1395         }
1396         return;
1397     case ARM_CP_CURRENTEL:
1398         /* Reads as current EL value from pstate, which is
1399          * guaranteed to be constant by the tb flags.
1400          */
1401         tcg_rt = cpu_reg(s, rt);
1402         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1403         return;
1404     case ARM_CP_DC_ZVA:
1405         /* Writes clear the aligned block of memory which rt points into. */
1406         tcg_rt = cpu_reg(s, rt);
1407         gen_helper_dc_zva(cpu_env, tcg_rt);
1408         return;
1409     default:
1410         break;
1411     }
1412
1413     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1414         gen_io_start();
1415     }
1416
1417     tcg_rt = cpu_reg(s, rt);
1418
1419     if (isread) {
1420         if (ri->type & ARM_CP_CONST) {
1421             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1422         } else if (ri->readfn) {
1423             TCGv_ptr tmpptr;
1424             tmpptr = tcg_const_ptr(ri);
1425             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1426             tcg_temp_free_ptr(tmpptr);
1427         } else {
1428             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1429         }
1430     } else {
1431         if (ri->type & ARM_CP_CONST) {
1432             /* If not forbidden by access permissions, treat as WI */
1433             return;
1434         } else if (ri->writefn) {
1435             TCGv_ptr tmpptr;
1436             tmpptr = tcg_const_ptr(ri);
1437             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1438             tcg_temp_free_ptr(tmpptr);
1439         } else {
1440             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1441         }
1442     }
1443
1444     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1445         /* I/O operations must end the TB here (whether read or write) */
1446         gen_io_end();
1447         s->is_jmp = DISAS_UPDATE;
1448     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1449         /* We default to ending the TB on a coprocessor register write,
1450          * but allow this to be suppressed by the register definition
1451          * (usually only necessary to work around guest bugs).
1452          */
1453         s->is_jmp = DISAS_UPDATE;
1454     }
1455 }
1456
1457 /* C3.2.4 System
1458  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1459  * +---------------------+---+-----+-----+-------+-------+-----+------+
1460  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1461  * +---------------------+---+-----+-----+-------+-------+-----+------+
1462  */
1463 static void disas_system(DisasContext *s, uint32_t insn)
1464 {
1465     unsigned int l, op0, op1, crn, crm, op2, rt;
1466     l = extract32(insn, 21, 1);
1467     op0 = extract32(insn, 19, 2);
1468     op1 = extract32(insn, 16, 3);
1469     crn = extract32(insn, 12, 4);
1470     crm = extract32(insn, 8, 4);
1471     op2 = extract32(insn, 5, 3);
1472     rt = extract32(insn, 0, 5);
1473
1474     if (op0 == 0) {
1475         if (l || rt != 31) {
1476             unallocated_encoding(s);
1477             return;
1478         }
1479         switch (crn) {
1480         case 2: /* C5.6.68 HINT */
1481             handle_hint(s, insn, op1, op2, crm);
1482             break;
1483         case 3: /* CLREX, DSB, DMB, ISB */
1484             handle_sync(s, insn, op1, op2, crm);
1485             break;
1486         case 4: /* C5.6.130 MSR (immediate) */
1487             handle_msr_i(s, insn, op1, op2, crm);
1488             break;
1489         default:
1490             unallocated_encoding(s);
1491             break;
1492         }
1493         return;
1494     }
1495     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1496 }
1497
1498 /* C3.2.3 Exception generation
1499  *
1500  *  31             24 23 21 20                     5 4   2 1  0
1501  * +-----------------+-----+------------------------+-----+----+
1502  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1503  * +-----------------------+------------------------+----------+
1504  */
1505 static void disas_exc(DisasContext *s, uint32_t insn)
1506 {
1507     int opc = extract32(insn, 21, 3);
1508     int op2_ll = extract32(insn, 0, 5);
1509     int imm16 = extract32(insn, 5, 16);
1510     TCGv_i32 tmp;
1511
1512     switch (opc) {
1513     case 0:
1514         /* For SVC, HVC and SMC we advance the single-step state
1515          * machine before taking the exception. This is architecturally
1516          * mandated, to ensure that single-stepping a system call
1517          * instruction works properly.
1518          */
1519         switch (op2_ll) {
1520         case 1:
1521             gen_ss_advance(s);
1522             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1523                                default_exception_el(s));
1524             break;
1525         case 2:
1526             if (s->current_el == 0) {
1527                 unallocated_encoding(s);
1528                 break;
1529             }
1530             /* The pre HVC helper handles cases when HVC gets trapped
1531              * as an undefined insn by runtime configuration.
1532              */
1533             gen_a64_set_pc_im(s->pc - 4);
1534             gen_helper_pre_hvc(cpu_env);
1535             gen_ss_advance(s);
1536             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1537             break;
1538         case 3:
1539             if (s->current_el == 0) {
1540                 unallocated_encoding(s);
1541                 break;
1542             }
1543             gen_a64_set_pc_im(s->pc - 4);
1544             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1545             gen_helper_pre_smc(cpu_env, tmp);
1546             tcg_temp_free_i32(tmp);
1547             gen_ss_advance(s);
1548             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1549             break;
1550         default:
1551             unallocated_encoding(s);
1552             break;
1553         }
1554         break;
1555     case 1:
1556         if (op2_ll != 0) {
1557             unallocated_encoding(s);
1558             break;
1559         }
1560         /* BRK */
1561         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1562                            default_exception_el(s));
1563         break;
1564     case 2:
1565         if (op2_ll != 0) {
1566             unallocated_encoding(s);
1567             break;
1568         }
1569         /* HLT. This has two purposes.
1570          * Architecturally, it is an external halting debug instruction.
1571          * Since QEMU doesn't implement external debug, we treat this as
1572          * it is required for halting debug disabled: it will UNDEF.
1573          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1574          */
1575         if (semihosting_enabled() && imm16 == 0xf000) {
1576 #ifndef CONFIG_USER_ONLY
1577             /* In system mode, don't allow userspace access to semihosting,
1578              * to provide some semblance of security (and for consistency
1579              * with our 32-bit semihosting).
1580              */
1581             if (s->current_el == 0) {
1582                 unsupported_encoding(s, insn);
1583                 break;
1584             }
1585 #endif
1586             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1587         } else {
1588             unsupported_encoding(s, insn);
1589         }
1590         break;
1591     case 5:
1592         if (op2_ll < 1 || op2_ll > 3) {
1593             unallocated_encoding(s);
1594             break;
1595         }
1596         /* DCPS1, DCPS2, DCPS3 */
1597         unsupported_encoding(s, insn);
1598         break;
1599     default:
1600         unallocated_encoding(s);
1601         break;
1602     }
1603 }
1604
1605 /* C3.2.7 Unconditional branch (register)
1606  *  31           25 24   21 20   16 15   10 9    5 4     0
1607  * +---------------+-------+-------+-------+------+-------+
1608  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1609  * +---------------+-------+-------+-------+------+-------+
1610  */
1611 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1612 {
1613     unsigned int opc, op2, op3, rn, op4;
1614
1615     opc = extract32(insn, 21, 4);
1616     op2 = extract32(insn, 16, 5);
1617     op3 = extract32(insn, 10, 6);
1618     rn = extract32(insn, 5, 5);
1619     op4 = extract32(insn, 0, 5);
1620
1621     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1622         unallocated_encoding(s);
1623         return;
1624     }
1625
1626     switch (opc) {
1627     case 0: /* BR */
1628     case 2: /* RET */
1629         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1630         break;
1631     case 1: /* BLR */
1632         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1633         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1634         break;
1635     case 4: /* ERET */
1636         if (s->current_el == 0) {
1637             unallocated_encoding(s);
1638             return;
1639         }
1640         gen_helper_exception_return(cpu_env);
1641         s->is_jmp = DISAS_JUMP;
1642         return;
1643     case 5: /* DRPS */
1644         if (rn != 0x1f) {
1645             unallocated_encoding(s);
1646         } else {
1647             unsupported_encoding(s, insn);
1648         }
1649         return;
1650     default:
1651         unallocated_encoding(s);
1652         return;
1653     }
1654
1655     s->is_jmp = DISAS_JUMP;
1656 }
1657
1658 /* C3.2 Branches, exception generating and system instructions */
1659 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1660 {
1661     switch (extract32(insn, 25, 7)) {
1662     case 0x0a: case 0x0b:
1663     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1664         disas_uncond_b_imm(s, insn);
1665         break;
1666     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1667         disas_comp_b_imm(s, insn);
1668         break;
1669     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1670         disas_test_b_imm(s, insn);
1671         break;
1672     case 0x2a: /* Conditional branch (immediate) */
1673         disas_cond_b_imm(s, insn);
1674         break;
1675     case 0x6a: /* Exception generation / System */
1676         if (insn & (1 << 24)) {
1677             disas_system(s, insn);
1678         } else {
1679             disas_exc(s, insn);
1680         }
1681         break;
1682     case 0x6b: /* Unconditional branch (register) */
1683         disas_uncond_b_reg(s, insn);
1684         break;
1685     default:
1686         unallocated_encoding(s);
1687         break;
1688     }
1689 }
1690
1691 /*
1692  * Load/Store exclusive instructions are implemented by remembering
1693  * the value/address loaded, and seeing if these are the same
1694  * when the store is performed. This is not actually the architecturally
1695  * mandated semantics, but it works for typical guest code sequences
1696  * and avoids having to monitor regular stores.
1697  *
1698  * In system emulation mode only one CPU will be running at once, so
1699  * this sequence is effectively atomic.  In user emulation mode we
1700  * throw an exception and handle the atomic operation elsewhere.
1701  */
1702 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1703                                TCGv_i64 addr, int size, bool is_pair)
1704 {
1705     TCGv_i64 tmp = tcg_temp_new_i64();
1706     TCGMemOp memop = MO_TE + size;
1707
1708     g_assert(size <= 3);
1709     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1710
1711     if (is_pair) {
1712         TCGv_i64 addr2 = tcg_temp_new_i64();
1713         TCGv_i64 hitmp = tcg_temp_new_i64();
1714
1715         g_assert(size >= 2);
1716         tcg_gen_addi_i64(addr2, addr, 1 << size);
1717         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1718         tcg_temp_free_i64(addr2);
1719         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1720         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1721         tcg_temp_free_i64(hitmp);
1722     }
1723
1724     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1725     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1726
1727     tcg_temp_free_i64(tmp);
1728     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1729 }
1730
1731 #ifdef CONFIG_USER_ONLY
1732 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1733                                 TCGv_i64 addr, int size, int is_pair)
1734 {
1735     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1736     tcg_gen_movi_i32(cpu_exclusive_info,
1737                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1738     gen_exception_internal_insn(s, 4, EXCP_STREX);
1739 }
1740 #else
1741 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1742                                 TCGv_i64 inaddr, int size, int is_pair)
1743 {
1744     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1745      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1746      *     [addr] = {Rt};
1747      *     if (is_pair) {
1748      *         [addr + datasize] = {Rt2};
1749      *     }
1750      *     {Rd} = 0;
1751      * } else {
1752      *     {Rd} = 1;
1753      * }
1754      * env->exclusive_addr = -1;
1755      */
1756     TCGLabel *fail_label = gen_new_label();
1757     TCGLabel *done_label = gen_new_label();
1758     TCGv_i64 addr = tcg_temp_local_new_i64();
1759     TCGv_i64 tmp;
1760
1761     /* Copy input into a local temp so it is not trashed when the
1762      * basic block ends at the branch insn.
1763      */
1764     tcg_gen_mov_i64(addr, inaddr);
1765     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1766
1767     tmp = tcg_temp_new_i64();
1768     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1769     tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1770     tcg_temp_free_i64(tmp);
1771
1772     if (is_pair) {
1773         TCGv_i64 addrhi = tcg_temp_new_i64();
1774         TCGv_i64 tmphi = tcg_temp_new_i64();
1775
1776         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1777         tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1778         tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1779
1780         tcg_temp_free_i64(tmphi);
1781         tcg_temp_free_i64(addrhi);
1782     }
1783
1784     /* We seem to still have the exclusive monitor, so do the store */
1785     tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1786     if (is_pair) {
1787         TCGv_i64 addrhi = tcg_temp_new_i64();
1788
1789         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1790         tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1791                             get_mem_index(s), MO_TE + size);
1792         tcg_temp_free_i64(addrhi);
1793     }
1794
1795     tcg_temp_free_i64(addr);
1796
1797     tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1798     tcg_gen_br(done_label);
1799     gen_set_label(fail_label);
1800     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1801     gen_set_label(done_label);
1802     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1803
1804 }
1805 #endif
1806
1807 /* C3.3.6 Load/store exclusive
1808  *
1809  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1810  * +-----+-------------+----+---+----+------+----+-------+------+------+
1811  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1812  * +-----+-------------+----+---+----+------+----+-------+------+------+
1813  *
1814  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1815  *   L: 0 -> store, 1 -> load
1816  *  o2: 0 -> exclusive, 1 -> not
1817  *  o1: 0 -> single register, 1 -> register pair
1818  *  o0: 1 -> load-acquire/store-release, 0 -> not
1819  */
1820 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1821 {
1822     int rt = extract32(insn, 0, 5);
1823     int rn = extract32(insn, 5, 5);
1824     int rt2 = extract32(insn, 10, 5);
1825     int is_lasr = extract32(insn, 15, 1);
1826     int rs = extract32(insn, 16, 5);
1827     int is_pair = extract32(insn, 21, 1);
1828     int is_store = !extract32(insn, 22, 1);
1829     int is_excl = !extract32(insn, 23, 1);
1830     int size = extract32(insn, 30, 2);
1831     TCGv_i64 tcg_addr;
1832
1833     if ((!is_excl && !is_pair && !is_lasr) ||
1834         (!is_excl && is_pair) ||
1835         (is_pair && size < 2)) {
1836         unallocated_encoding(s);
1837         return;
1838     }
1839
1840     if (rn == 31) {
1841         gen_check_sp_alignment(s);
1842     }
1843     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1844
1845     /* Note that since TCG is single threaded load-acquire/store-release
1846      * semantics require no extra if (is_lasr) { ... } handling.
1847      */
1848
1849     if (is_excl) {
1850         if (!is_store) {
1851             s->is_ldex = true;
1852             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1853         } else {
1854             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1855         }
1856     } else {
1857         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1858         if (is_store) {
1859             do_gpr_st(s, tcg_rt, tcg_addr, size);
1860         } else {
1861             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1862         }
1863     }
1864 }
1865
1866 /*
1867  * C3.3.5 Load register (literal)
1868  *
1869  *  31 30 29   27  26 25 24 23                5 4     0
1870  * +-----+-------+---+-----+-------------------+-------+
1871  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1872  * +-----+-------+---+-----+-------------------+-------+
1873  *
1874  * V: 1 -> vector (simd/fp)
1875  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1876  *                   10-> 32 bit signed, 11 -> prefetch
1877  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1878  */
1879 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1880 {
1881     int rt = extract32(insn, 0, 5);
1882     int64_t imm = sextract32(insn, 5, 19) << 2;
1883     bool is_vector = extract32(insn, 26, 1);
1884     int opc = extract32(insn, 30, 2);
1885     bool is_signed = false;
1886     int size = 2;
1887     TCGv_i64 tcg_rt, tcg_addr;
1888
1889     if (is_vector) {
1890         if (opc == 3) {
1891             unallocated_encoding(s);
1892             return;
1893         }
1894         size = 2 + opc;
1895         if (!fp_access_check(s)) {
1896             return;
1897         }
1898     } else {
1899         if (opc == 3) {
1900             /* PRFM (literal) : prefetch */
1901             return;
1902         }
1903         size = 2 + extract32(opc, 0, 1);
1904         is_signed = extract32(opc, 1, 1);
1905     }
1906
1907     tcg_rt = cpu_reg(s, rt);
1908
1909     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1910     if (is_vector) {
1911         do_fp_ld(s, rt, tcg_addr, size);
1912     } else {
1913         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1914     }
1915     tcg_temp_free_i64(tcg_addr);
1916 }
1917
1918 /*
1919  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1920  * C5.6.81 LDP (Load Pair - non vector)
1921  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1922  * C5.6.176 STNP (Store Pair - non-temporal hint)
1923  * C5.6.177 STP (Store Pair - non vector)
1924  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1925  * C6.3.165 LDP (Load Pair of SIMD&FP)
1926  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1927  * C6.3.284 STP (Store Pair of SIMD&FP)
1928  *
1929  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1930  * +-----+-------+---+---+-------+---+-----------------------------+
1931  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1932  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1933  *
1934  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1935  *      LDPSW                    01
1936  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1937  *   V: 0 -> GPR, 1 -> Vector
1938  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1939  *      10 -> signed offset, 11 -> pre-index
1940  *   L: 0 -> Store 1 -> Load
1941  *
1942  * Rt, Rt2 = GPR or SIMD registers to be stored
1943  * Rn = general purpose register containing address
1944  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1945  */
1946 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1947 {
1948     int rt = extract32(insn, 0, 5);
1949     int rn = extract32(insn, 5, 5);
1950     int rt2 = extract32(insn, 10, 5);
1951     uint64_t offset = sextract64(insn, 15, 7);
1952     int index = extract32(insn, 23, 2);
1953     bool is_vector = extract32(insn, 26, 1);
1954     bool is_load = extract32(insn, 22, 1);
1955     int opc = extract32(insn, 30, 2);
1956
1957     bool is_signed = false;
1958     bool postindex = false;
1959     bool wback = false;
1960
1961     TCGv_i64 tcg_addr; /* calculated address */
1962     int size;
1963
1964     if (opc == 3) {
1965         unallocated_encoding(s);
1966         return;
1967     }
1968
1969     if (is_vector) {
1970         size = 2 + opc;
1971     } else {
1972         size = 2 + extract32(opc, 1, 1);
1973         is_signed = extract32(opc, 0, 1);
1974         if (!is_load && is_signed) {
1975             unallocated_encoding(s);
1976             return;
1977         }
1978     }
1979
1980     switch (index) {
1981     case 1: /* post-index */
1982         postindex = true;
1983         wback = true;
1984         break;
1985     case 0:
1986         /* signed offset with "non-temporal" hint. Since we don't emulate
1987          * caches we don't care about hints to the cache system about
1988          * data access patterns, and handle this identically to plain
1989          * signed offset.
1990          */
1991         if (is_signed) {
1992             /* There is no non-temporal-hint version of LDPSW */
1993             unallocated_encoding(s);
1994             return;
1995         }
1996         postindex = false;
1997         break;
1998     case 2: /* signed offset, rn not updated */
1999         postindex = false;
2000         break;
2001     case 3: /* pre-index */
2002         postindex = false;
2003         wback = true;
2004         break;
2005     }
2006
2007     if (is_vector && !fp_access_check(s)) {
2008         return;
2009     }
2010
2011     offset <<= size;
2012
2013     if (rn == 31) {
2014         gen_check_sp_alignment(s);
2015     }
2016
2017     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2018
2019     if (!postindex) {
2020         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2021     }
2022
2023     if (is_vector) {
2024         if (is_load) {
2025             do_fp_ld(s, rt, tcg_addr, size);
2026         } else {
2027             do_fp_st(s, rt, tcg_addr, size);
2028         }
2029     } else {
2030         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2031         if (is_load) {
2032             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2033         } else {
2034             do_gpr_st(s, tcg_rt, tcg_addr, size);
2035         }
2036     }
2037     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2038     if (is_vector) {
2039         if (is_load) {
2040             do_fp_ld(s, rt2, tcg_addr, size);
2041         } else {
2042             do_fp_st(s, rt2, tcg_addr, size);
2043         }
2044     } else {
2045         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2046         if (is_load) {
2047             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2048         } else {
2049             do_gpr_st(s, tcg_rt2, tcg_addr, size);
2050         }
2051     }
2052
2053     if (wback) {
2054         if (postindex) {
2055             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2056         } else {
2057             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2058         }
2059         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2060     }
2061 }
2062
2063 /*
2064  * C3.3.8 Load/store (immediate post-indexed)
2065  * C3.3.9 Load/store (immediate pre-indexed)
2066  * C3.3.12 Load/store (unscaled immediate)
2067  *
2068  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2069  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2070  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2071  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2072  *
2073  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2074          10 -> unprivileged
2075  * V = 0 -> non-vector
2076  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2077  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2078  */
2079 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2080 {
2081     int rt = extract32(insn, 0, 5);
2082     int rn = extract32(insn, 5, 5);
2083     int imm9 = sextract32(insn, 12, 9);
2084     int opc = extract32(insn, 22, 2);
2085     int size = extract32(insn, 30, 2);
2086     int idx = extract32(insn, 10, 2);
2087     bool is_signed = false;
2088     bool is_store = false;
2089     bool is_extended = false;
2090     bool is_unpriv = (idx == 2);
2091     bool is_vector = extract32(insn, 26, 1);
2092     bool post_index;
2093     bool writeback;
2094
2095     TCGv_i64 tcg_addr;
2096
2097     if (is_vector) {
2098         size |= (opc & 2) << 1;
2099         if (size > 4 || is_unpriv) {
2100             unallocated_encoding(s);
2101             return;
2102         }
2103         is_store = ((opc & 1) == 0);
2104         if (!fp_access_check(s)) {
2105             return;
2106         }
2107     } else {
2108         if (size == 3 && opc == 2) {
2109             /* PRFM - prefetch */
2110             if (is_unpriv) {
2111                 unallocated_encoding(s);
2112                 return;
2113             }
2114             return;
2115         }
2116         if (opc == 3 && size > 1) {
2117             unallocated_encoding(s);
2118             return;
2119         }
2120         is_store = (opc == 0);
2121         is_signed = opc & (1<<1);
2122         is_extended = (size < 3) && (opc & 1);
2123     }
2124
2125     switch (idx) {
2126     case 0:
2127     case 2:
2128         post_index = false;
2129         writeback = false;
2130         break;
2131     case 1:
2132         post_index = true;
2133         writeback = true;
2134         break;
2135     case 3:
2136         post_index = false;
2137         writeback = true;
2138         break;
2139     }
2140
2141     if (rn == 31) {
2142         gen_check_sp_alignment(s);
2143     }
2144     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2145
2146     if (!post_index) {
2147         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2148     }
2149
2150     if (is_vector) {
2151         if (is_store) {
2152             do_fp_st(s, rt, tcg_addr, size);
2153         } else {
2154             do_fp_ld(s, rt, tcg_addr, size);
2155         }
2156     } else {
2157         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2158         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2159
2160         if (is_store) {
2161             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2162         } else {
2163             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2164                              is_signed, is_extended, memidx);
2165         }
2166     }
2167
2168     if (writeback) {
2169         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2170         if (post_index) {
2171             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2172         }
2173         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2174     }
2175 }
2176
2177 /*
2178  * C3.3.10 Load/store (register offset)
2179  *
2180  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2181  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2182  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2183  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2184  *
2185  * For non-vector:
2186  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2187  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2188  * For vector:
2189  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2190  *   opc<0>: 0 -> store, 1 -> load
2191  * V: 1 -> vector/simd
2192  * opt: extend encoding (see DecodeRegExtend)
2193  * S: if S=1 then scale (essentially index by sizeof(size))
2194  * Rt: register to transfer into/out of
2195  * Rn: address register or SP for base
2196  * Rm: offset register or ZR for offset
2197  */
2198 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2199 {
2200     int rt = extract32(insn, 0, 5);
2201     int rn = extract32(insn, 5, 5);
2202     int shift = extract32(insn, 12, 1);
2203     int rm = extract32(insn, 16, 5);
2204     int opc = extract32(insn, 22, 2);
2205     int opt = extract32(insn, 13, 3);
2206     int size = extract32(insn, 30, 2);
2207     bool is_signed = false;
2208     bool is_store = false;
2209     bool is_extended = false;
2210     bool is_vector = extract32(insn, 26, 1);
2211
2212     TCGv_i64 tcg_rm;
2213     TCGv_i64 tcg_addr;
2214
2215     if (extract32(opt, 1, 1) == 0) {
2216         unallocated_encoding(s);
2217         return;
2218     }
2219
2220     if (is_vector) {
2221         size |= (opc & 2) << 1;
2222         if (size > 4) {
2223             unallocated_encoding(s);
2224             return;
2225         }
2226         is_store = !extract32(opc, 0, 1);
2227         if (!fp_access_check(s)) {
2228             return;
2229         }
2230     } else {
2231         if (size == 3 && opc == 2) {
2232             /* PRFM - prefetch */
2233             return;
2234         }
2235         if (opc == 3 && size > 1) {
2236             unallocated_encoding(s);
2237             return;
2238         }
2239         is_store = (opc == 0);
2240         is_signed = extract32(opc, 1, 1);
2241         is_extended = (size < 3) && extract32(opc, 0, 1);
2242     }
2243
2244     if (rn == 31) {
2245         gen_check_sp_alignment(s);
2246     }
2247     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2248
2249     tcg_rm = read_cpu_reg(s, rm, 1);
2250     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2251
2252     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2253
2254     if (is_vector) {
2255         if (is_store) {
2256             do_fp_st(s, rt, tcg_addr, size);
2257         } else {
2258             do_fp_ld(s, rt, tcg_addr, size);
2259         }
2260     } else {
2261         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2262         if (is_store) {
2263             do_gpr_st(s, tcg_rt, tcg_addr, size);
2264         } else {
2265             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2266         }
2267     }
2268 }
2269
2270 /*
2271  * C3.3.13 Load/store (unsigned immediate)
2272  *
2273  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2274  * +----+-------+---+-----+-----+------------+-------+------+
2275  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2276  * +----+-------+---+-----+-----+------------+-------+------+
2277  *
2278  * For non-vector:
2279  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2280  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2281  * For vector:
2282  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2283  *   opc<0>: 0 -> store, 1 -> load
2284  * Rn: base address register (inc SP)
2285  * Rt: target register
2286  */
2287 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2288 {
2289     int rt = extract32(insn, 0, 5);
2290     int rn = extract32(insn, 5, 5);
2291     unsigned int imm12 = extract32(insn, 10, 12);
2292     bool is_vector = extract32(insn, 26, 1);
2293     int size = extract32(insn, 30, 2);
2294     int opc = extract32(insn, 22, 2);
2295     unsigned int offset;
2296
2297     TCGv_i64 tcg_addr;
2298
2299     bool is_store;
2300     bool is_signed = false;
2301     bool is_extended = false;
2302
2303     if (is_vector) {
2304         size |= (opc & 2) << 1;
2305         if (size > 4) {
2306             unallocated_encoding(s);
2307             return;
2308         }
2309         is_store = !extract32(opc, 0, 1);
2310         if (!fp_access_check(s)) {
2311             return;
2312         }
2313     } else {
2314         if (size == 3 && opc == 2) {
2315             /* PRFM - prefetch */
2316             return;
2317         }
2318         if (opc == 3 && size > 1) {
2319             unallocated_encoding(s);
2320             return;
2321         }
2322         is_store = (opc == 0);
2323         is_signed = extract32(opc, 1, 1);
2324         is_extended = (size < 3) && extract32(opc, 0, 1);
2325     }
2326
2327     if (rn == 31) {
2328         gen_check_sp_alignment(s);
2329     }
2330     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2331     offset = imm12 << size;
2332     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2333
2334     if (is_vector) {
2335         if (is_store) {
2336             do_fp_st(s, rt, tcg_addr, size);
2337         } else {
2338             do_fp_ld(s, rt, tcg_addr, size);
2339         }
2340     } else {
2341         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2342         if (is_store) {
2343             do_gpr_st(s, tcg_rt, tcg_addr, size);
2344         } else {
2345             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2346         }
2347     }
2348 }
2349
2350 /* Load/store register (all forms) */
2351 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2352 {
2353     switch (extract32(insn, 24, 2)) {
2354     case 0:
2355         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2356             disas_ldst_reg_roffset(s, insn);
2357         } else {
2358             /* Load/store register (unscaled immediate)
2359              * Load/store immediate pre/post-indexed
2360              * Load/store register unprivileged
2361              */
2362             disas_ldst_reg_imm9(s, insn);
2363         }
2364         break;
2365     case 1:
2366         disas_ldst_reg_unsigned_imm(s, insn);
2367         break;
2368     default:
2369         unallocated_encoding(s);
2370         break;
2371     }
2372 }
2373
2374 /* C3.3.1 AdvSIMD load/store multiple structures
2375  *
2376  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2377  * +---+---+---------------+---+-------------+--------+------+------+------+
2378  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2379  * +---+---+---------------+---+-------------+--------+------+------+------+
2380  *
2381  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2382  *
2383  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2384  * +---+---+---------------+---+---+---------+--------+------+------+------+
2385  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2386  * +---+---+---------------+---+---+---------+--------+------+------+------+
2387  *
2388  * Rt: first (or only) SIMD&FP register to be transferred
2389  * Rn: base address or SP
2390  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2391  */
2392 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2393 {
2394     int rt = extract32(insn, 0, 5);
2395     int rn = extract32(insn, 5, 5);
2396     int size = extract32(insn, 10, 2);
2397     int opcode = extract32(insn, 12, 4);
2398     bool is_store = !extract32(insn, 22, 1);
2399     bool is_postidx = extract32(insn, 23, 1);
2400     bool is_q = extract32(insn, 30, 1);
2401     TCGv_i64 tcg_addr, tcg_rn;
2402
2403     int ebytes = 1 << size;
2404     int elements = (is_q ? 128 : 64) / (8 << size);
2405     int rpt;    /* num iterations */
2406     int selem;  /* structure elements */
2407     int r;
2408
2409     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2410         unallocated_encoding(s);
2411         return;
2412     }
2413
2414     /* From the shared decode logic */
2415     switch (opcode) {
2416     case 0x0:
2417         rpt = 1;
2418         selem = 4;
2419         break;
2420     case 0x2:
2421         rpt = 4;
2422         selem = 1;
2423         break;
2424     case 0x4:
2425         rpt = 1;
2426         selem = 3;
2427         break;
2428     case 0x6:
2429         rpt = 3;
2430         selem = 1;
2431         break;
2432     case 0x7:
2433         rpt = 1;
2434         selem = 1;
2435         break;
2436     case 0x8:
2437         rpt = 1;
2438         selem = 2;
2439         break;
2440     case 0xa:
2441         rpt = 2;
2442         selem = 1;
2443         break;
2444     default:
2445         unallocated_encoding(s);
2446         return;
2447     }
2448
2449     if (size == 3 && !is_q && selem != 1) {
2450         /* reserved */
2451         unallocated_encoding(s);
2452         return;
2453     }
2454
2455     if (!fp_access_check(s)) {
2456         return;
2457     }
2458
2459     if (rn == 31) {
2460         gen_check_sp_alignment(s);
2461     }
2462
2463     tcg_rn = cpu_reg_sp(s, rn);
2464     tcg_addr = tcg_temp_new_i64();
2465     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2466
2467     for (r = 0; r < rpt; r++) {
2468         int e;
2469         for (e = 0; e < elements; e++) {
2470             int tt = (rt + r) % 32;
2471             int xs;
2472             for (xs = 0; xs < selem; xs++) {
2473                 if (is_store) {
2474                     do_vec_st(s, tt, e, tcg_addr, size);
2475                 } else {
2476                     do_vec_ld(s, tt, e, tcg_addr, size);
2477
2478                     /* For non-quad operations, setting a slice of the low
2479                      * 64 bits of the register clears the high 64 bits (in
2480                      * the ARM ARM pseudocode this is implicit in the fact
2481                      * that 'rval' is a 64 bit wide variable). We optimize
2482                      * by noticing that we only need to do this the first
2483                      * time we touch a register.
2484                      */
2485                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2486                         clear_vec_high(s, tt);
2487                     }
2488                 }
2489                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2490                 tt = (tt + 1) % 32;
2491             }
2492         }
2493     }
2494
2495     if (is_postidx) {
2496         int rm = extract32(insn, 16, 5);
2497         if (rm == 31) {
2498             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2499         } else {
2500             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2501         }
2502     }
2503     tcg_temp_free_i64(tcg_addr);
2504 }
2505
2506 /* C3.3.3 AdvSIMD load/store single structure
2507  *
2508  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2509  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2510  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2511  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2512  *
2513  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2514  *
2515  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2516  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2517  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2518  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2519  *
2520  * Rt: first (or only) SIMD&FP register to be transferred
2521  * Rn: base address or SP
2522  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2523  * index = encoded in Q:S:size dependent on size
2524  *
2525  * lane_size = encoded in R, opc
2526  * transfer width = encoded in opc, S, size
2527  */
2528 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2529 {
2530     int rt = extract32(insn, 0, 5);
2531     int rn = extract32(insn, 5, 5);
2532     int size = extract32(insn, 10, 2);
2533     int S = extract32(insn, 12, 1);
2534     int opc = extract32(insn, 13, 3);
2535     int R = extract32(insn, 21, 1);
2536     int is_load = extract32(insn, 22, 1);
2537     int is_postidx = extract32(insn, 23, 1);
2538     int is_q = extract32(insn, 30, 1);
2539
2540     int scale = extract32(opc, 1, 2);
2541     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2542     bool replicate = false;
2543     int index = is_q << 3 | S << 2 | size;
2544     int ebytes, xs;
2545     TCGv_i64 tcg_addr, tcg_rn;
2546
2547     switch (scale) {
2548     case 3:
2549         if (!is_load || S) {
2550             unallocated_encoding(s);
2551             return;
2552         }
2553         scale = size;
2554         replicate = true;
2555         break;
2556     case 0:
2557         break;
2558     case 1:
2559         if (extract32(size, 0, 1)) {
2560             unallocated_encoding(s);
2561             return;
2562         }
2563         index >>= 1;
2564         break;
2565     case 2:
2566         if (extract32(size, 1, 1)) {
2567             unallocated_encoding(s);
2568             return;
2569         }
2570         if (!extract32(size, 0, 1)) {
2571             index >>= 2;
2572         } else {
2573             if (S) {
2574                 unallocated_encoding(s);
2575                 return;
2576             }
2577             index >>= 3;
2578             scale = 3;
2579         }
2580         break;
2581     default:
2582         g_assert_not_reached();
2583     }
2584
2585     if (!fp_access_check(s)) {
2586         return;
2587     }
2588
2589     ebytes = 1 << scale;
2590
2591     if (rn == 31) {
2592         gen_check_sp_alignment(s);
2593     }
2594
2595     tcg_rn = cpu_reg_sp(s, rn);
2596     tcg_addr = tcg_temp_new_i64();
2597     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2598
2599     for (xs = 0; xs < selem; xs++) {
2600         if (replicate) {
2601             /* Load and replicate to all elements */
2602             uint64_t mulconst;
2603             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2604
2605             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2606                                 get_mem_index(s), MO_TE + scale);
2607             switch (scale) {
2608             case 0:
2609                 mulconst = 0x0101010101010101ULL;
2610                 break;
2611             case 1:
2612                 mulconst = 0x0001000100010001ULL;
2613                 break;
2614             case 2:
2615                 mulconst = 0x0000000100000001ULL;
2616                 break;
2617             case 3:
2618                 mulconst = 0;
2619                 break;
2620             default:
2621                 g_assert_not_reached();
2622             }
2623             if (mulconst) {
2624                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2625             }
2626             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2627             if (is_q) {
2628                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2629             } else {
2630                 clear_vec_high(s, rt);
2631             }
2632             tcg_temp_free_i64(tcg_tmp);
2633         } else {
2634             /* Load/store one element per register */
2635             if (is_load) {
2636                 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2637             } else {
2638                 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2639             }
2640         }
2641         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2642         rt = (rt + 1) % 32;
2643     }
2644
2645     if (is_postidx) {
2646         int rm = extract32(insn, 16, 5);
2647         if (rm == 31) {
2648             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2649         } else {
2650             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2651         }
2652     }
2653     tcg_temp_free_i64(tcg_addr);
2654 }
2655
2656 /* C3.3 Loads and stores */
2657 static void disas_ldst(DisasContext *s, uint32_t insn)
2658 {
2659     switch (extract32(insn, 24, 6)) {
2660     case 0x08: /* Load/store exclusive */
2661         disas_ldst_excl(s, insn);
2662         break;
2663     case 0x18: case 0x1c: /* Load register (literal) */
2664         disas_ld_lit(s, insn);
2665         break;
2666     case 0x28: case 0x29:
2667     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2668         disas_ldst_pair(s, insn);
2669         break;
2670     case 0x38: case 0x39:
2671     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2672         disas_ldst_reg(s, insn);
2673         break;
2674     case 0x0c: /* AdvSIMD load/store multiple structures */
2675         disas_ldst_multiple_struct(s, insn);
2676         break;
2677     case 0x0d: /* AdvSIMD load/store single structure */
2678         disas_ldst_single_struct(s, insn);
2679         break;
2680     default:
2681         unallocated_encoding(s);
2682         break;
2683     }
2684 }
2685
2686 /* C3.4.6 PC-rel. addressing
2687  *   31  30   29 28       24 23                5 4    0
2688  * +----+-------+-----------+-------------------+------+
2689  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2690  * +----+-------+-----------+-------------------+------+
2691  */
2692 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2693 {
2694     unsigned int page, rd;
2695     uint64_t base;
2696     uint64_t offset;
2697
2698     page = extract32(insn, 31, 1);
2699     /* SignExtend(immhi:immlo) -> offset */
2700     offset = sextract64(insn, 5, 19);
2701     offset = offset << 2 | extract32(insn, 29, 2);
2702     rd = extract32(insn, 0, 5);
2703     base = s->pc - 4;
2704
2705     if (page) {
2706         /* ADRP (page based) */
2707         base &= ~0xfff;
2708         offset <<= 12;
2709     }
2710
2711     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2712 }
2713
2714 /*
2715  * C3.4.1 Add/subtract (immediate)
2716  *
2717  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2718  * +--+--+--+-----------+-----+-------------+-----+-----+
2719  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2720  * +--+--+--+-----------+-----+-------------+-----+-----+
2721  *
2722  *    sf: 0 -> 32bit, 1 -> 64bit
2723  *    op: 0 -> add  , 1 -> sub
2724  *     S: 1 -> set flags
2725  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2726  */
2727 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2728 {
2729     int rd = extract32(insn, 0, 5);
2730     int rn = extract32(insn, 5, 5);
2731     uint64_t imm = extract32(insn, 10, 12);
2732     int shift = extract32(insn, 22, 2);
2733     bool setflags = extract32(insn, 29, 1);
2734     bool sub_op = extract32(insn, 30, 1);
2735     bool is_64bit = extract32(insn, 31, 1);
2736
2737     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2738     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2739     TCGv_i64 tcg_result;
2740
2741     switch (shift) {
2742     case 0x0:
2743         break;
2744     case 0x1:
2745         imm <<= 12;
2746         break;
2747     default:
2748         unallocated_encoding(s);
2749         return;
2750     }
2751
2752     tcg_result = tcg_temp_new_i64();
2753     if (!setflags) {
2754         if (sub_op) {
2755             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2756         } else {
2757             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2758         }
2759     } else {
2760         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2761         if (sub_op) {
2762             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2763         } else {
2764             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2765         }
2766         tcg_temp_free_i64(tcg_imm);
2767     }
2768
2769     if (is_64bit) {
2770         tcg_gen_mov_i64(tcg_rd, tcg_result);
2771     } else {
2772         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2773     }
2774
2775     tcg_temp_free_i64(tcg_result);
2776 }
2777
2778 /* The input should be a value in the bottom e bits (with higher
2779  * bits zero); returns that value replicated into every element
2780  * of size e in a 64 bit integer.
2781  */
2782 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2783 {
2784     assert(e != 0);
2785     while (e < 64) {
2786         mask |= mask << e;
2787         e *= 2;
2788     }
2789     return mask;
2790 }
2791
2792 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2793 static inline uint64_t bitmask64(unsigned int length)
2794 {
2795     assert(length > 0 && length <= 64);
2796     return ~0ULL >> (64 - length);
2797 }
2798
2799 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2800  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2801  * value (ie should cause a guest UNDEF exception), and true if they are
2802  * valid, in which case the decoded bit pattern is written to result.
2803  */
2804 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2805                                    unsigned int imms, unsigned int immr)
2806 {
2807     uint64_t mask;
2808     unsigned e, levels, s, r;
2809     int len;
2810
2811     assert(immn < 2 && imms < 64 && immr < 64);
2812
2813     /* The bit patterns we create here are 64 bit patterns which
2814      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2815      * 64 bits each. Each element contains the same value: a run
2816      * of between 1 and e-1 non-zero bits, rotated within the
2817      * element by between 0 and e-1 bits.
2818      *
2819      * The element size and run length are encoded into immn (1 bit)
2820      * and imms (6 bits) as follows:
2821      * 64 bit elements: immn = 1, imms = <length of run - 1>
2822      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2823      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2824      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2825      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2826      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2827      * Notice that immn = 0, imms = 11111x is the only combination
2828      * not covered by one of the above options; this is reserved.
2829      * Further, <length of run - 1> all-ones is a reserved pattern.
2830      *
2831      * In all cases the rotation is by immr % e (and immr is 6 bits).
2832      */
2833
2834     /* First determine the element size */
2835     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2836     if (len < 1) {
2837         /* This is the immn == 0, imms == 0x11111x case */
2838         return false;
2839     }
2840     e = 1 << len;
2841
2842     levels = e - 1;
2843     s = imms & levels;
2844     r = immr & levels;
2845
2846     if (s == levels) {
2847         /* <length of run - 1> mustn't be all-ones. */
2848         return false;
2849     }
2850
2851     /* Create the value of one element: s+1 set bits rotated
2852      * by r within the element (which is e bits wide)...
2853      */
2854     mask = bitmask64(s + 1);
2855     if (r) {
2856         mask = (mask >> r) | (mask << (e - r));
2857         mask &= bitmask64(e);
2858     }
2859     /* ...then replicate the element over the whole 64 bit value */
2860     mask = bitfield_replicate(mask, e);
2861     *result = mask;
2862     return true;
2863 }
2864
2865 /* C3.4.4 Logical (immediate)
2866  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2867  * +----+-----+-------------+---+------+------+------+------+
2868  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2869  * +----+-----+-------------+---+------+------+------+------+
2870  */
2871 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2872 {
2873     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2874     TCGv_i64 tcg_rd, tcg_rn;
2875     uint64_t wmask;
2876     bool is_and = false;
2877
2878     sf = extract32(insn, 31, 1);
2879     opc = extract32(insn, 29, 2);
2880     is_n = extract32(insn, 22, 1);
2881     immr = extract32(insn, 16, 6);
2882     imms = extract32(insn, 10, 6);
2883     rn = extract32(insn, 5, 5);
2884     rd = extract32(insn, 0, 5);
2885
2886     if (!sf && is_n) {
2887         unallocated_encoding(s);
2888         return;
2889     }
2890
2891     if (opc == 0x3) { /* ANDS */
2892         tcg_rd = cpu_reg(s, rd);
2893     } else {
2894         tcg_rd = cpu_reg_sp(s, rd);
2895     }
2896     tcg_rn = cpu_reg(s, rn);
2897
2898     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2899         /* some immediate field values are reserved */
2900         unallocated_encoding(s);
2901         return;
2902     }
2903
2904     if (!sf) {
2905         wmask &= 0xffffffff;
2906     }
2907
2908     switch (opc) {
2909     case 0x3: /* ANDS */
2910     case 0x0: /* AND */
2911         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2912         is_and = true;
2913         break;
2914     case 0x1: /* ORR */
2915         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2916         break;
2917     case 0x2: /* EOR */
2918         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2919         break;
2920     default:
2921         assert(FALSE); /* must handle all above */
2922         break;
2923     }
2924
2925     if (!sf && !is_and) {
2926         /* zero extend final result; we know we can skip this for AND
2927          * since the immediate had the high 32 bits clear.
2928          */
2929         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2930     }
2931
2932     if (opc == 3) { /* ANDS */
2933         gen_logic_CC(sf, tcg_rd);
2934     }
2935 }
2936
2937 /*
2938  * C3.4.5 Move wide (immediate)
2939  *
2940  *  31 30 29 28         23 22 21 20             5 4    0
2941  * +--+-----+-------------+-----+----------------+------+
2942  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2943  * +--+-----+-------------+-----+----------------+------+
2944  *
2945  * sf: 0 -> 32 bit, 1 -> 64 bit
2946  * opc: 00 -> N, 10 -> Z, 11 -> K
2947  * hw: shift/16 (0,16, and sf only 32, 48)
2948  */
2949 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2950 {
2951     int rd = extract32(insn, 0, 5);
2952     uint64_t imm = extract32(insn, 5, 16);
2953     int sf = extract32(insn, 31, 1);
2954     int opc = extract32(insn, 29, 2);
2955     int pos = extract32(insn, 21, 2) << 4;
2956     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2957     TCGv_i64 tcg_imm;
2958
2959     if (!sf && (pos >= 32)) {
2960         unallocated_encoding(s);
2961         return;
2962     }
2963
2964     switch (opc) {
2965     case 0: /* MOVN */
2966     case 2: /* MOVZ */
2967         imm <<= pos;
2968         if (opc == 0) {
2969             imm = ~imm;
2970         }
2971         if (!sf) {
2972             imm &= 0xffffffffu;
2973         }
2974         tcg_gen_movi_i64(tcg_rd, imm);
2975         break;
2976     case 3: /* MOVK */
2977         tcg_imm = tcg_const_i64(imm);
2978         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2979         tcg_temp_free_i64(tcg_imm);
2980         if (!sf) {
2981             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2982         }
2983         break;
2984     default:
2985         unallocated_encoding(s);
2986         break;
2987     }
2988 }
2989
2990 /* C3.4.2 Bitfield
2991  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2992  * +----+-----+-------------+---+------+------+------+------+
2993  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2994  * +----+-----+-------------+---+------+------+------+------+
2995  */
2996 static void disas_bitfield(DisasContext *s, uint32_t insn)
2997 {
2998     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2999     TCGv_i64 tcg_rd, tcg_tmp;
3000
3001     sf = extract32(insn, 31, 1);
3002     opc = extract32(insn, 29, 2);
3003     n = extract32(insn, 22, 1);
3004     ri = extract32(insn, 16, 6);
3005     si = extract32(insn, 10, 6);
3006     rn = extract32(insn, 5, 5);
3007     rd = extract32(insn, 0, 5);
3008     bitsize = sf ? 64 : 32;
3009
3010     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3011         unallocated_encoding(s);
3012         return;
3013     }
3014
3015     tcg_rd = cpu_reg(s, rd);
3016
3017     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3018        to be smaller than bitsize, we'll never reference data outside the
3019        low 32-bits anyway.  */
3020     tcg_tmp = read_cpu_reg(s, rn, 1);
3021
3022     /* Recognize the common aliases.  */
3023     if (opc == 0) { /* SBFM */
3024         if (ri == 0) {
3025             if (si == 7) { /* SXTB */
3026                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3027                 goto done;
3028             } else if (si == 15) { /* SXTH */
3029                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3030                 goto done;
3031             } else if (si == 31) { /* SXTW */
3032                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3033                 goto done;
3034             }
3035         }
3036         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3037             if (si == 31) {
3038                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3039             }
3040             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3041             goto done;
3042         }
3043     } else if (opc == 2) { /* UBFM */
3044         if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3045             tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3046             return;
3047         }
3048         if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3049             if (si == 31) {
3050                 tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3051             }
3052             tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3053             return;
3054         }
3055         if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3056             int shift = bitsize - 1 - si;
3057             tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3058             goto done;
3059         }
3060     }
3061
3062     if (opc != 1) { /* SBFM or UBFM */
3063         tcg_gen_movi_i64(tcg_rd, 0);
3064     }
3065
3066     /* do the bit move operation */
3067     if (si >= ri) {
3068         /* Wd<s-r:0> = Wn<s:r> */
3069         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3070         pos = 0;
3071         len = (si - ri) + 1;
3072     } else {
3073         /* Wd<32+s-r,32-r> = Wn<s:0> */
3074         pos = bitsize - ri;
3075         len = si + 1;
3076     }
3077
3078     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3079
3080     if (opc == 0) { /* SBFM - sign extend the destination field */
3081         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3082         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3083     }
3084
3085  done:
3086     if (!sf) { /* zero extend final result */
3087         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3088     }
3089 }
3090
3091 /* C3.4.3 Extract
3092  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3093  * +----+------+-------------+---+----+------+--------+------+------+
3094  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3095  * +----+------+-------------+---+----+------+--------+------+------+
3096  */
3097 static void disas_extract(DisasContext *s, uint32_t insn)
3098 {
3099     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3100
3101     sf = extract32(insn, 31, 1);
3102     n = extract32(insn, 22, 1);
3103     rm = extract32(insn, 16, 5);
3104     imm = extract32(insn, 10, 6);
3105     rn = extract32(insn, 5, 5);
3106     rd = extract32(insn, 0, 5);
3107     op21 = extract32(insn, 29, 2);
3108     op0 = extract32(insn, 21, 1);
3109     bitsize = sf ? 64 : 32;
3110
3111     if (sf != n || op21 || op0 || imm >= bitsize) {
3112         unallocated_encoding(s);
3113     } else {
3114         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3115
3116         tcg_rd = cpu_reg(s, rd);
3117
3118         if (unlikely(imm == 0)) {
3119             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3120              * so an extract from bit 0 is a special case.
3121              */
3122             if (sf) {
3123                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3124             } else {
3125                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3126             }
3127         } else if (rm == rn) { /* ROR */
3128             tcg_rm = cpu_reg(s, rm);
3129             if (sf) {
3130                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3131             } else {
3132                 TCGv_i32 tmp = tcg_temp_new_i32();
3133                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3134                 tcg_gen_rotri_i32(tmp, tmp, imm);
3135                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3136                 tcg_temp_free_i32(tmp);
3137             }
3138         } else {
3139             tcg_rm = read_cpu_reg(s, rm, sf);
3140             tcg_rn = read_cpu_reg(s, rn, sf);
3141             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3142             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3143             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3144             if (!sf) {
3145                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3146             }
3147         }
3148     }
3149 }
3150
3151 /* C3.4 Data processing - immediate */
3152 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3153 {
3154     switch (extract32(insn, 23, 6)) {
3155     case 0x20: case 0x21: /* PC-rel. addressing */
3156         disas_pc_rel_adr(s, insn);
3157         break;
3158     case 0x22: case 0x23: /* Add/subtract (immediate) */
3159         disas_add_sub_imm(s, insn);
3160         break;
3161     case 0x24: /* Logical (immediate) */
3162         disas_logic_imm(s, insn);
3163         break;
3164     case 0x25: /* Move wide (immediate) */
3165         disas_movw_imm(s, insn);
3166         break;
3167     case 0x26: /* Bitfield */
3168         disas_bitfield(s, insn);
3169         break;
3170     case 0x27: /* Extract */
3171         disas_extract(s, insn);
3172         break;
3173     default:
3174         unallocated_encoding(s);
3175         break;
3176     }
3177 }
3178
3179 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3180  * Note that it is the caller's responsibility to ensure that the
3181  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3182  * mandated semantics for out of range shifts.
3183  */
3184 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3185                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3186 {
3187     switch (shift_type) {
3188     case A64_SHIFT_TYPE_LSL:
3189         tcg_gen_shl_i64(dst, src, shift_amount);
3190         break;
3191     case A64_SHIFT_TYPE_LSR:
3192         tcg_gen_shr_i64(dst, src, shift_amount);
3193         break;
3194     case A64_SHIFT_TYPE_ASR:
3195         if (!sf) {
3196             tcg_gen_ext32s_i64(dst, src);
3197         }
3198         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3199         break;
3200     case A64_SHIFT_TYPE_ROR:
3201         if (sf) {
3202             tcg_gen_rotr_i64(dst, src, shift_amount);
3203         } else {
3204             TCGv_i32 t0, t1;
3205             t0 = tcg_temp_new_i32();
3206             t1 = tcg_temp_new_i32();
3207             tcg_gen_extrl_i64_i32(t0, src);
3208             tcg_gen_extrl_i64_i32(t1, shift_amount);
3209             tcg_gen_rotr_i32(t0, t0, t1);
3210             tcg_gen_extu_i32_i64(dst, t0);
3211             tcg_temp_free_i32(t0);
3212             tcg_temp_free_i32(t1);
3213         }
3214         break;
3215     default:
3216         assert(FALSE); /* all shift types should be handled */
3217         break;
3218     }
3219
3220     if (!sf) { /* zero extend final result */
3221         tcg_gen_ext32u_i64(dst, dst);
3222     }
3223 }
3224
3225 /* Shift a TCGv src by immediate, put result in dst.
3226  * The shift amount must be in range (this should always be true as the
3227  * relevant instructions will UNDEF on bad shift immediates).
3228  */
3229 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3230                           enum a64_shift_type shift_type, unsigned int shift_i)
3231 {
3232     assert(shift_i < (sf ? 64 : 32));
3233
3234     if (shift_i == 0) {
3235         tcg_gen_mov_i64(dst, src);
3236     } else {
3237         TCGv_i64 shift_const;
3238
3239         shift_const = tcg_const_i64(shift_i);
3240         shift_reg(dst, src, sf, shift_type, shift_const);
3241         tcg_temp_free_i64(shift_const);
3242     }
3243 }
3244
3245 /* C3.5.10 Logical (shifted register)
3246  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3247  * +----+-----+-----------+-------+---+------+--------+------+------+
3248  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3249  * +----+-----+-----------+-------+---+------+--------+------+------+
3250  */
3251 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3252 {
3253     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3254     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3255
3256     sf = extract32(insn, 31, 1);
3257     opc = extract32(insn, 29, 2);
3258     shift_type = extract32(insn, 22, 2);
3259     invert = extract32(insn, 21, 1);
3260     rm = extract32(insn, 16, 5);
3261     shift_amount = extract32(insn, 10, 6);
3262     rn = extract32(insn, 5, 5);
3263     rd = extract32(insn, 0, 5);
3264
3265     if (!sf && (shift_amount & (1 << 5))) {
3266         unallocated_encoding(s);
3267         return;
3268     }
3269
3270     tcg_rd = cpu_reg(s, rd);
3271
3272     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3273         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3274          * register-register MOV and MVN, so it is worth special casing.
3275          */
3276         tcg_rm = cpu_reg(s, rm);
3277         if (invert) {
3278             tcg_gen_not_i64(tcg_rd, tcg_rm);
3279             if (!sf) {
3280                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3281             }
3282         } else {
3283             if (sf) {
3284                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3285             } else {
3286                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3287             }
3288         }
3289         return;
3290     }
3291
3292     tcg_rm = read_cpu_reg(s, rm, sf);
3293
3294     if (shift_amount) {
3295         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3296     }
3297
3298     tcg_rn = cpu_reg(s, rn);
3299
3300     switch (opc | (invert << 2)) {
3301     case 0: /* AND */
3302     case 3: /* ANDS */
3303         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3304         break;
3305     case 1: /* ORR */
3306         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3307         break;
3308     case 2: /* EOR */
3309         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3310         break;
3311     case 4: /* BIC */
3312     case 7: /* BICS */
3313         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3314         break;
3315     case 5: /* ORN */
3316         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3317         break;
3318     case 6: /* EON */
3319         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3320         break;
3321     default:
3322         assert(FALSE);
3323         break;
3324     }
3325
3326     if (!sf) {
3327         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3328     }
3329
3330     if (opc == 3) {
3331         gen_logic_CC(sf, tcg_rd);
3332     }
3333 }
3334
3335 /*
3336  * C3.5.1 Add/subtract (extended register)
3337  *
3338  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3339  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3340  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3341  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3342  *
3343  *  sf: 0 -> 32bit, 1 -> 64bit
3344  *  op: 0 -> add  , 1 -> sub
3345  *   S: 1 -> set flags
3346  * opt: 00
3347  * option: extension type (see DecodeRegExtend)
3348  * imm3: optional shift to Rm
3349  *
3350  * Rd = Rn + LSL(extend(Rm), amount)
3351  */
3352 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3353 {
3354     int rd = extract32(insn, 0, 5);
3355     int rn = extract32(insn, 5, 5);
3356     int imm3 = extract32(insn, 10, 3);
3357     int option = extract32(insn, 13, 3);
3358     int rm = extract32(insn, 16, 5);
3359     bool setflags = extract32(insn, 29, 1);
3360     bool sub_op = extract32(insn, 30, 1);
3361     bool sf = extract32(insn, 31, 1);
3362
3363     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3364     TCGv_i64 tcg_rd;
3365     TCGv_i64 tcg_result;
3366
3367     if (imm3 > 4) {
3368         unallocated_encoding(s);
3369         return;
3370     }
3371
3372     /* non-flag setting ops may use SP */
3373     if (!setflags) {
3374         tcg_rd = cpu_reg_sp(s, rd);
3375     } else {
3376         tcg_rd = cpu_reg(s, rd);
3377     }
3378     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3379
3380     tcg_rm = read_cpu_reg(s, rm, sf);
3381     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3382
3383     tcg_result = tcg_temp_new_i64();
3384
3385     if (!setflags) {
3386         if (sub_op) {
3387             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3388         } else {
3389             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3390         }
3391     } else {
3392         if (sub_op) {
3393             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3394         } else {
3395             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3396         }
3397     }
3398
3399     if (sf) {
3400         tcg_gen_mov_i64(tcg_rd, tcg_result);
3401     } else {
3402         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3403     }
3404
3405     tcg_temp_free_i64(tcg_result);
3406 }
3407
3408 /*
3409  * C3.5.2 Add/subtract (shifted register)
3410  *
3411  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3412  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3413  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3414  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3415  *
3416  *    sf: 0 -> 32bit, 1 -> 64bit
3417  *    op: 0 -> add  , 1 -> sub
3418  *     S: 1 -> set flags
3419  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3420  *  imm6: Shift amount to apply to Rm before the add/sub
3421  */
3422 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3423 {
3424     int rd = extract32(insn, 0, 5);
3425     int rn = extract32(insn, 5, 5);
3426     int imm6 = extract32(insn, 10, 6);
3427     int rm = extract32(insn, 16, 5);
3428     int shift_type = extract32(insn, 22, 2);
3429     bool setflags = extract32(insn, 29, 1);
3430     bool sub_op = extract32(insn, 30, 1);
3431     bool sf = extract32(insn, 31, 1);
3432
3433     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3434     TCGv_i64 tcg_rn, tcg_rm;
3435     TCGv_i64 tcg_result;
3436
3437     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3438         unallocated_encoding(s);
3439         return;
3440     }
3441
3442     tcg_rn = read_cpu_reg(s, rn, sf);
3443     tcg_rm = read_cpu_reg(s, rm, sf);
3444
3445     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3446
3447     tcg_result = tcg_temp_new_i64();
3448
3449     if (!setflags) {
3450         if (sub_op) {
3451             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3452         } else {
3453             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3454         }
3455     } else {
3456         if (sub_op) {
3457             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3458         } else {
3459             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3460         }
3461     }
3462
3463     if (sf) {
3464         tcg_gen_mov_i64(tcg_rd, tcg_result);
3465     } else {
3466         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3467     }
3468
3469     tcg_temp_free_i64(tcg_result);
3470 }
3471
3472 /* C3.5.9 Data-processing (3 source)
3473
3474    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3475   +--+------+-----------+------+------+----+------+------+------+
3476   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3477   +--+------+-----------+------+------+----+------+------+------+
3478
3479  */
3480 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3481 {
3482     int rd = extract32(insn, 0, 5);
3483     int rn = extract32(insn, 5, 5);
3484     int ra = extract32(insn, 10, 5);
3485     int rm = extract32(insn, 16, 5);
3486     int op_id = (extract32(insn, 29, 3) << 4) |
3487         (extract32(insn, 21, 3) << 1) |
3488         extract32(insn, 15, 1);
3489     bool sf = extract32(insn, 31, 1);
3490     bool is_sub = extract32(op_id, 0, 1);
3491     bool is_high = extract32(op_id, 2, 1);
3492     bool is_signed = false;
3493     TCGv_i64 tcg_op1;
3494     TCGv_i64 tcg_op2;
3495     TCGv_i64 tcg_tmp;
3496
3497     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3498     switch (op_id) {
3499     case 0x42: /* SMADDL */
3500     case 0x43: /* SMSUBL */
3501     case 0x44: /* SMULH */
3502         is_signed = true;
3503         break;
3504     case 0x0: /* MADD (32bit) */
3505     case 0x1: /* MSUB (32bit) */
3506     case 0x40: /* MADD (64bit) */
3507     case 0x41: /* MSUB (64bit) */
3508     case 0x4a: /* UMADDL */
3509     case 0x4b: /* UMSUBL */
3510     case 0x4c: /* UMULH */
3511         break;
3512     default:
3513         unallocated_encoding(s);
3514         return;
3515     }
3516
3517     if (is_high) {
3518         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3519         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3520         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3521         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3522
3523         if (is_signed) {
3524             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3525         } else {
3526             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3527         }
3528
3529         tcg_temp_free_i64(low_bits);
3530         return;
3531     }
3532
3533     tcg_op1 = tcg_temp_new_i64();
3534     tcg_op2 = tcg_temp_new_i64();
3535     tcg_tmp = tcg_temp_new_i64();
3536
3537     if (op_id < 0x42) {
3538         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3539         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3540     } else {
3541         if (is_signed) {
3542             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3543             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3544         } else {
3545             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3546             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3547         }
3548     }
3549
3550     if (ra == 31 && !is_sub) {
3551         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3552         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3553     } else {
3554         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3555         if (is_sub) {
3556             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3557         } else {
3558             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3559         }
3560     }
3561
3562     if (!sf) {
3563         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3564     }
3565
3566     tcg_temp_free_i64(tcg_op1);
3567     tcg_temp_free_i64(tcg_op2);
3568     tcg_temp_free_i64(tcg_tmp);
3569 }
3570
3571 /* C3.5.3 - Add/subtract (with carry)
3572  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3573  * +--+--+--+------------------------+------+---------+------+-----+
3574  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3575  * +--+--+--+------------------------+------+---------+------+-----+
3576  *                                            [000000]
3577  */
3578
3579 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3580 {
3581     unsigned int sf, op, setflags, rm, rn, rd;
3582     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3583
3584     if (extract32(insn, 10, 6) != 0) {
3585         unallocated_encoding(s);
3586         return;
3587     }
3588
3589     sf = extract32(insn, 31, 1);
3590     op = extract32(insn, 30, 1);
3591     setflags = extract32(insn, 29, 1);
3592     rm = extract32(insn, 16, 5);
3593     rn = extract32(insn, 5, 5);
3594     rd = extract32(insn, 0, 5);
3595
3596     tcg_rd = cpu_reg(s, rd);
3597     tcg_rn = cpu_reg(s, rn);
3598
3599     if (op) {
3600         tcg_y = new_tmp_a64(s);
3601         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3602     } else {
3603         tcg_y = cpu_reg(s, rm);
3604     }
3605
3606     if (setflags) {
3607         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3608     } else {
3609         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3610     }
3611 }
3612
3613 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3614  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3615  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3616  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3617  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3618  *        [1]                             y                [0]       [0]
3619  */
3620 static void disas_cc(DisasContext *s, uint32_t insn)
3621 {
3622     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3623     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3624     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3625     DisasCompare c;
3626
3627     if (!extract32(insn, 29, 1)) {
3628         unallocated_encoding(s);
3629         return;
3630     }
3631     if (insn & (1 << 10 | 1 << 4)) {
3632         unallocated_encoding(s);
3633         return;
3634     }
3635     sf = extract32(insn, 31, 1);
3636     op = extract32(insn, 30, 1);
3637     is_imm = extract32(insn, 11, 1);
3638     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3639     cond = extract32(insn, 12, 4);
3640     rn = extract32(insn, 5, 5);
3641     nzcv = extract32(insn, 0, 4);
3642
3643     /* Set T0 = !COND.  */
3644     tcg_t0 = tcg_temp_new_i32();
3645     arm_test_cc(&c, cond);
3646     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3647     arm_free_cc(&c);
3648
3649     /* Load the arguments for the new comparison.  */
3650     if (is_imm) {
3651         tcg_y = new_tmp_a64(s);
3652         tcg_gen_movi_i64(tcg_y, y);
3653     } else {
3654         tcg_y = cpu_reg(s, y);
3655     }
3656     tcg_rn = cpu_reg(s, rn);
3657
3658     /* Set the flags for the new comparison.  */
3659     tcg_tmp = tcg_temp_new_i64();
3660     if (op) {
3661         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3662     } else {
3663         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3664     }
3665     tcg_temp_free_i64(tcg_tmp);
3666
3667     /* If COND was false, force the flags to #nzcv.  Compute two masks
3668      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3669      * For tcg hosts that support ANDC, we can make do with just T1.
3670      * In either case, allow the tcg optimizer to delete any unused mask.
3671      */
3672     tcg_t1 = tcg_temp_new_i32();
3673     tcg_t2 = tcg_temp_new_i32();
3674     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3675     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3676
3677     if (nzcv & 8) { /* N */
3678         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3679     } else {
3680         if (TCG_TARGET_HAS_andc_i32) {
3681             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3682         } else {
3683             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3684         }
3685     }
3686     if (nzcv & 4) { /* Z */
3687         if (TCG_TARGET_HAS_andc_i32) {
3688             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3689         } else {
3690             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3691         }
3692     } else {
3693         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3694     }
3695     if (nzcv & 2) { /* C */
3696         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3697     } else {
3698         if (TCG_TARGET_HAS_andc_i32) {
3699             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3700         } else {
3701             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3702         }
3703     }
3704     if (nzcv & 1) { /* V */
3705         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3706     } else {
3707         if (TCG_TARGET_HAS_andc_i32) {
3708             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3709         } else {
3710             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3711         }
3712     }
3713     tcg_temp_free_i32(tcg_t0);
3714     tcg_temp_free_i32(tcg_t1);
3715     tcg_temp_free_i32(tcg_t2);
3716 }
3717
3718 /* C3.5.6 Conditional select
3719  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3720  * +----+----+---+-----------------+------+------+-----+------+------+
3721  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3722  * +----+----+---+-----------------+------+------+-----+------+------+
3723  */
3724 static void disas_cond_select(DisasContext *s, uint32_t insn)
3725 {
3726     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3727     TCGv_i64 tcg_rd, zero;
3728     DisasCompare64 c;
3729
3730     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3731         /* S == 1 or op2<1> == 1 */
3732         unallocated_encoding(s);
3733         return;
3734     }
3735     sf = extract32(insn, 31, 1);
3736     else_inv = extract32(insn, 30, 1);
3737     rm = extract32(insn, 16, 5);
3738     cond = extract32(insn, 12, 4);
3739     else_inc = extract32(insn, 10, 1);
3740     rn = extract32(insn, 5, 5);
3741     rd = extract32(insn, 0, 5);
3742
3743     tcg_rd = cpu_reg(s, rd);
3744
3745     a64_test_cc(&c, cond);
3746     zero = tcg_const_i64(0);
3747
3748     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3749         /* CSET & CSETM.  */
3750         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3751         if (else_inv) {
3752             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3753         }
3754     } else {
3755         TCGv_i64 t_true = cpu_reg(s, rn);
3756         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3757         if (else_inv && else_inc) {
3758             tcg_gen_neg_i64(t_false, t_false);
3759         } else if (else_inv) {
3760             tcg_gen_not_i64(t_false, t_false);
3761         } else if (else_inc) {
3762             tcg_gen_addi_i64(t_false, t_false, 1);
3763         }
3764         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3765     }
3766
3767     tcg_temp_free_i64(zero);
3768     a64_free_cc(&c);
3769
3770     if (!sf) {
3771         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3772     }
3773 }
3774
3775 static void handle_clz(DisasContext *s, unsigned int sf,
3776                        unsigned int rn, unsigned int rd)
3777 {
3778     TCGv_i64 tcg_rd, tcg_rn;
3779     tcg_rd = cpu_reg(s, rd);
3780     tcg_rn = cpu_reg(s, rn);
3781
3782     if (sf) {
3783         gen_helper_clz64(tcg_rd, tcg_rn);
3784     } else {
3785         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3786         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3787         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3788         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3789         tcg_temp_free_i32(tcg_tmp32);
3790     }
3791 }
3792
3793 static void handle_cls(DisasContext *s, unsigned int sf,
3794                        unsigned int rn, unsigned int rd)
3795 {
3796     TCGv_i64 tcg_rd, tcg_rn;
3797     tcg_rd = cpu_reg(s, rd);
3798     tcg_rn = cpu_reg(s, rn);
3799
3800     if (sf) {
3801         gen_helper_cls64(tcg_rd, tcg_rn);
3802     } else {
3803         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3804         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3805         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3806         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3807         tcg_temp_free_i32(tcg_tmp32);
3808     }
3809 }
3810
3811 static void handle_rbit(DisasContext *s, unsigned int sf,
3812                         unsigned int rn, unsigned int rd)
3813 {
3814     TCGv_i64 tcg_rd, tcg_rn;
3815     tcg_rd = cpu_reg(s, rd);
3816     tcg_rn = cpu_reg(s, rn);
3817
3818     if (sf) {
3819         gen_helper_rbit64(tcg_rd, tcg_rn);
3820     } else {
3821         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3822         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3823         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3824         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3825         tcg_temp_free_i32(tcg_tmp32);
3826     }
3827 }
3828
3829 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3830 static void handle_rev64(DisasContext *s, unsigned int sf,
3831                          unsigned int rn, unsigned int rd)
3832 {
3833     if (!sf) {
3834         unallocated_encoding(s);
3835         return;
3836     }
3837     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3838 }
3839
3840 /* C5.6.149 REV with sf==0, opcode==2
3841  * C5.6.151 REV32 (sf==1, opcode==2)
3842  */
3843 static void handle_rev32(DisasContext *s, unsigned int sf,
3844                          unsigned int rn, unsigned int rd)
3845 {
3846     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3847
3848     if (sf) {
3849         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3850         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3851
3852         /* bswap32_i64 requires zero high word */
3853         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3854         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3855         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3856         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3857         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3858
3859         tcg_temp_free_i64(tcg_tmp);
3860     } else {
3861         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3862         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3863     }
3864 }
3865
3866 /* C5.6.150 REV16 (opcode==1) */
3867 static void handle_rev16(DisasContext *s, unsigned int sf,
3868                          unsigned int rn, unsigned int rd)
3869 {
3870     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3871     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3872     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3873
3874     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3875     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3876
3877     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3878     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3879     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3880     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3881
3882     if (sf) {
3883         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3884         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3885         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3886         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3887
3888         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3889         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3890         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3891     }
3892
3893     tcg_temp_free_i64(tcg_tmp);
3894 }
3895
3896 /* C3.5.7 Data-processing (1 source)
3897  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3898  * +----+---+---+-----------------+---------+--------+------+------+
3899  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3900  * +----+---+---+-----------------+---------+--------+------+------+
3901  */
3902 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3903 {
3904     unsigned int sf, opcode, rn, rd;
3905
3906     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3907         unallocated_encoding(s);
3908         return;
3909     }
3910
3911     sf = extract32(insn, 31, 1);
3912     opcode = extract32(insn, 10, 6);
3913     rn = extract32(insn, 5, 5);
3914     rd = extract32(insn, 0, 5);
3915
3916     switch (opcode) {
3917     case 0: /* RBIT */
3918         handle_rbit(s, sf, rn, rd);
3919         break;
3920     case 1: /* REV16 */
3921         handle_rev16(s, sf, rn, rd);
3922         break;
3923     case 2: /* REV32 */
3924         handle_rev32(s, sf, rn, rd);
3925         break;
3926     case 3: /* REV64 */
3927         handle_rev64(s, sf, rn, rd);
3928         break;
3929     case 4: /* CLZ */
3930         handle_clz(s, sf, rn, rd);
3931         break;
3932     case 5: /* CLS */
3933         handle_cls(s, sf, rn, rd);
3934         break;
3935     }
3936 }
3937
3938 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3939                        unsigned int rm, unsigned int rn, unsigned int rd)
3940 {
3941     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3942     tcg_rd = cpu_reg(s, rd);
3943
3944     if (!sf && is_signed) {
3945         tcg_n = new_tmp_a64(s);
3946         tcg_m = new_tmp_a64(s);
3947         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3948         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3949     } else {
3950         tcg_n = read_cpu_reg(s, rn, sf);
3951         tcg_m = read_cpu_reg(s, rm, sf);
3952     }
3953
3954     if (is_signed) {
3955         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3956     } else {
3957         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3958     }
3959
3960     if (!sf) { /* zero extend final result */
3961         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3962     }
3963 }
3964
3965 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3966 static void handle_shift_reg(DisasContext *s,
3967                              enum a64_shift_type shift_type, unsigned int sf,
3968                              unsigned int rm, unsigned int rn, unsigned int rd)
3969 {
3970     TCGv_i64 tcg_shift = tcg_temp_new_i64();
3971     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3972     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3973
3974     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3975     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3976     tcg_temp_free_i64(tcg_shift);
3977 }
3978
3979 /* CRC32[BHWX], CRC32C[BHWX] */
3980 static void handle_crc32(DisasContext *s,
3981                          unsigned int sf, unsigned int sz, bool crc32c,
3982                          unsigned int rm, unsigned int rn, unsigned int rd)
3983 {
3984     TCGv_i64 tcg_acc, tcg_val;
3985     TCGv_i32 tcg_bytes;
3986
3987     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3988         || (sf == 1 && sz != 3)
3989         || (sf == 0 && sz == 3)) {
3990         unallocated_encoding(s);
3991         return;
3992     }
3993
3994     if (sz == 3) {
3995         tcg_val = cpu_reg(s, rm);
3996     } else {
3997         uint64_t mask;
3998         switch (sz) {
3999         case 0:
4000             mask = 0xFF;
4001             break;
4002         case 1:
4003             mask = 0xFFFF;
4004             break;
4005         case 2:
4006             mask = 0xFFFFFFFF;
4007             break;
4008         default:
4009             g_assert_not_reached();
4010         }
4011         tcg_val = new_tmp_a64(s);
4012         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4013     }
4014
4015     tcg_acc = cpu_reg(s, rn);
4016     tcg_bytes = tcg_const_i32(1 << sz);
4017
4018     if (crc32c) {
4019         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4020     } else {
4021         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4022     }
4023
4024     tcg_temp_free_i32(tcg_bytes);
4025 }
4026
4027 /* C3.5.8 Data-processing (2 source)
4028  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4029  * +----+---+---+-----------------+------+--------+------+------+
4030  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4031  * +----+---+---+-----------------+------+--------+------+------+
4032  */
4033 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4034 {
4035     unsigned int sf, rm, opcode, rn, rd;
4036     sf = extract32(insn, 31, 1);
4037     rm = extract32(insn, 16, 5);
4038     opcode = extract32(insn, 10, 6);
4039     rn = extract32(insn, 5, 5);
4040     rd = extract32(insn, 0, 5);
4041
4042     if (extract32(insn, 29, 1)) {
4043         unallocated_encoding(s);
4044         return;
4045     }
4046
4047     switch (opcode) {
4048     case 2: /* UDIV */
4049         handle_div(s, false, sf, rm, rn, rd);
4050         break;
4051     case 3: /* SDIV */
4052         handle_div(s, true, sf, rm, rn, rd);
4053         break;
4054     case 8: /* LSLV */
4055         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4056         break;
4057     case 9: /* LSRV */
4058         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4059         break;
4060     case 10: /* ASRV */
4061         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4062         break;
4063     case 11: /* RORV */
4064         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4065         break;
4066     case 16:
4067     case 17:
4068     case 18:
4069     case 19:
4070     case 20:
4071     case 21:
4072     case 22:
4073     case 23: /* CRC32 */
4074     {
4075         int sz = extract32(opcode, 0, 2);
4076         bool crc32c = extract32(opcode, 2, 1);
4077         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4078         break;
4079     }
4080     default:
4081         unallocated_encoding(s);
4082         break;
4083     }
4084 }
4085
4086 /* C3.5 Data processing - register */
4087 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4088 {
4089     switch (extract32(insn, 24, 5)) {
4090     case 0x0a: /* Logical (shifted register) */
4091         disas_logic_reg(s, insn);
4092         break;
4093     case 0x0b: /* Add/subtract */
4094         if (insn & (1 << 21)) { /* (extended register) */
4095             disas_add_sub_ext_reg(s, insn);
4096         } else {
4097             disas_add_sub_reg(s, insn);
4098         }
4099         break;
4100     case 0x1b: /* Data-processing (3 source) */
4101         disas_data_proc_3src(s, insn);
4102         break;
4103     case 0x1a:
4104         switch (extract32(insn, 21, 3)) {
4105         case 0x0: /* Add/subtract (with carry) */
4106             disas_adc_sbc(s, insn);
4107             break;
4108         case 0x2: /* Conditional compare */
4109             disas_cc(s, insn); /* both imm and reg forms */
4110             break;
4111         case 0x4: /* Conditional select */
4112             disas_cond_select(s, insn);
4113             break;
4114         case 0x6: /* Data-processing */
4115             if (insn & (1 << 30)) { /* (1 source) */
4116                 disas_data_proc_1src(s, insn);
4117             } else {            /* (2 source) */
4118                 disas_data_proc_2src(s, insn);
4119             }
4120             break;
4121         default:
4122             unallocated_encoding(s);
4123             break;
4124         }
4125         break;
4126     default:
4127         unallocated_encoding(s);
4128         break;
4129     }
4130 }
4131
4132 static void handle_fp_compare(DisasContext *s, bool is_double,
4133                               unsigned int rn, unsigned int rm,
4134                               bool cmp_with_zero, bool signal_all_nans)
4135 {
4136     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4137     TCGv_ptr fpst = get_fpstatus_ptr();
4138
4139     if (is_double) {
4140         TCGv_i64 tcg_vn, tcg_vm;
4141
4142         tcg_vn = read_fp_dreg(s, rn);
4143         if (cmp_with_zero) {
4144             tcg_vm = tcg_const_i64(0);
4145         } else {
4146             tcg_vm = read_fp_dreg(s, rm);
4147         }
4148         if (signal_all_nans) {
4149             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4150         } else {
4151             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4152         }
4153         tcg_temp_free_i64(tcg_vn);
4154         tcg_temp_free_i64(tcg_vm);
4155     } else {
4156         TCGv_i32 tcg_vn, tcg_vm;
4157
4158         tcg_vn = read_fp_sreg(s, rn);
4159         if (cmp_with_zero) {
4160             tcg_vm = tcg_const_i32(0);
4161         } else {
4162             tcg_vm = read_fp_sreg(s, rm);
4163         }
4164         if (signal_all_nans) {
4165             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4166         } else {
4167             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4168         }
4169         tcg_temp_free_i32(tcg_vn);
4170         tcg_temp_free_i32(tcg_vm);
4171     }
4172
4173     tcg_temp_free_ptr(fpst);
4174
4175     gen_set_nzcv(tcg_flags);
4176
4177     tcg_temp_free_i64(tcg_flags);
4178 }
4179
4180 /* C3.6.22 Floating point compare
4181  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4182  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4183  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4184  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4185  */
4186 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4187 {
4188     unsigned int mos, type, rm, op, rn, opc, op2r;
4189
4190     mos = extract32(insn, 29, 3);
4191     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4192     rm = extract32(insn, 16, 5);
4193     op = extract32(insn, 14, 2);
4194     rn = extract32(insn, 5, 5);
4195     opc = extract32(insn, 3, 2);
4196     op2r = extract32(insn, 0, 3);
4197
4198     if (mos || op || op2r || type > 1) {
4199         unallocated_encoding(s);
4200         return;
4201     }
4202
4203     if (!fp_access_check(s)) {
4204         return;
4205     }
4206
4207     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4208 }
4209
4210 /* C3.6.23 Floating point conditional compare
4211  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4212  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4213  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4214  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4215  */
4216 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4217 {
4218     unsigned int mos, type, rm, cond, rn, op, nzcv;
4219     TCGv_i64 tcg_flags;
4220     TCGLabel *label_continue = NULL;
4221
4222     mos = extract32(insn, 29, 3);
4223     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4224     rm = extract32(insn, 16, 5);
4225     cond = extract32(insn, 12, 4);
4226     rn = extract32(insn, 5, 5);
4227     op = extract32(insn, 4, 1);
4228     nzcv = extract32(insn, 0, 4);
4229
4230     if (mos || type > 1) {
4231         unallocated_encoding(s);
4232         return;
4233     }
4234
4235     if (!fp_access_check(s)) {
4236         return;
4237     }
4238
4239     if (cond < 0x0e) { /* not always */
4240         TCGLabel *label_match = gen_new_label();
4241         label_continue = gen_new_label();
4242         arm_gen_test_cc(cond, label_match);
4243         /* nomatch: */
4244         tcg_flags = tcg_const_i64(nzcv << 28);
4245         gen_set_nzcv(tcg_flags);
4246         tcg_temp_free_i64(tcg_flags);
4247         tcg_gen_br(label_continue);
4248         gen_set_label(label_match);
4249     }
4250
4251     handle_fp_compare(s, type, rn, rm, false, op);
4252
4253     if (cond < 0x0e) {
4254         gen_set_label(label_continue);
4255     }
4256 }
4257
4258 /* C3.6.24 Floating point conditional select
4259  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4260  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4261  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4262  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4263  */
4264 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4265 {
4266     unsigned int mos, type, rm, cond, rn, rd;
4267     TCGv_i64 t_true, t_false, t_zero;
4268     DisasCompare64 c;
4269
4270     mos = extract32(insn, 29, 3);
4271     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4272     rm = extract32(insn, 16, 5);
4273     cond = extract32(insn, 12, 4);
4274     rn = extract32(insn, 5, 5);
4275     rd = extract32(insn, 0, 5);
4276
4277     if (mos || type > 1) {
4278         unallocated_encoding(s);
4279         return;
4280     }
4281
4282     if (!fp_access_check(s)) {
4283         return;
4284     }
4285
4286     /* Zero extend sreg inputs to 64 bits now.  */
4287     t_true = tcg_temp_new_i64();
4288     t_false = tcg_temp_new_i64();
4289     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4290     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4291
4292     a64_test_cc(&c, cond);
4293     t_zero = tcg_const_i64(0);
4294     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4295     tcg_temp_free_i64(t_zero);
4296     tcg_temp_free_i64(t_false);
4297     a64_free_cc(&c);
4298
4299     /* Note that sregs write back zeros to the high bits,
4300        and we've already done the zero-extension.  */
4301     write_fp_dreg(s, rd, t_true);
4302     tcg_temp_free_i64(t_true);
4303 }
4304
4305 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4306 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4307 {
4308     TCGv_ptr fpst;
4309     TCGv_i32 tcg_op;
4310     TCGv_i32 tcg_res;
4311
4312     fpst = get_fpstatus_ptr();
4313     tcg_op = read_fp_sreg(s, rn);
4314     tcg_res = tcg_temp_new_i32();
4315
4316     switch (opcode) {
4317     case 0x0: /* FMOV */
4318         tcg_gen_mov_i32(tcg_res, tcg_op);
4319         break;
4320     case 0x1: /* FABS */
4321         gen_helper_vfp_abss(tcg_res, tcg_op);
4322         break;
4323     case 0x2: /* FNEG */
4324         gen_helper_vfp_negs(tcg_res, tcg_op);
4325         break;
4326     case 0x3: /* FSQRT */
4327         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4328         break;
4329     case 0x8: /* FRINTN */
4330     case 0x9: /* FRINTP */
4331     case 0xa: /* FRINTM */
4332     case 0xb: /* FRINTZ */
4333     case 0xc: /* FRINTA */
4334     {
4335         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4336
4337         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4338         gen_helper_rints(tcg_res, tcg_op, fpst);
4339
4340         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4341         tcg_temp_free_i32(tcg_rmode);
4342         break;
4343     }
4344     case 0xe: /* FRINTX */
4345         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4346         break;
4347     case 0xf: /* FRINTI */
4348         gen_helper_rints(tcg_res, tcg_op, fpst);
4349         break;
4350     default:
4351         abort();
4352     }
4353
4354     write_fp_sreg(s, rd, tcg_res);
4355
4356     tcg_temp_free_ptr(fpst);
4357     tcg_temp_free_i32(tcg_op);
4358     tcg_temp_free_i32(tcg_res);
4359 }
4360
4361 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4362 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4363 {
4364     TCGv_ptr fpst;
4365     TCGv_i64 tcg_op;
4366     TCGv_i64 tcg_res;
4367
4368     fpst = get_fpstatus_ptr();
4369     tcg_op = read_fp_dreg(s, rn);
4370     tcg_res = tcg_temp_new_i64();
4371
4372     switch (opcode) {
4373     case 0x0: /* FMOV */
4374         tcg_gen_mov_i64(tcg_res, tcg_op);
4375         break;
4376     case 0x1: /* FABS */
4377         gen_helper_vfp_absd(tcg_res, tcg_op);
4378         break;
4379     case 0x2: /* FNEG */
4380         gen_helper_vfp_negd(tcg_res, tcg_op);
4381         break;
4382     case 0x3: /* FSQRT */
4383         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4384         break;
4385     case 0x8: /* FRINTN */
4386     case 0x9: /* FRINTP */
4387     case 0xa: /* FRINTM */
4388     case 0xb: /* FRINTZ */
4389     case 0xc: /* FRINTA */
4390     {
4391         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4392
4393         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4394         gen_helper_rintd(tcg_res, tcg_op, fpst);
4395
4396         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4397         tcg_temp_free_i32(tcg_rmode);
4398         break;
4399     }
4400     case 0xe: /* FRINTX */
4401         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4402         break;
4403     case 0xf: /* FRINTI */
4404         gen_helper_rintd(tcg_res, tcg_op, fpst);
4405         break;
4406     default:
4407         abort();
4408     }
4409
4410     write_fp_dreg(s, rd, tcg_res);
4411
4412     tcg_temp_free_ptr(fpst);
4413     tcg_temp_free_i64(tcg_op);
4414     tcg_temp_free_i64(tcg_res);
4415 }
4416
4417 static void handle_fp_fcvt(DisasContext *s, int opcode,
4418                            int rd, int rn, int dtype, int ntype)
4419 {
4420     switch (ntype) {
4421     case 0x0:
4422     {
4423         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4424         if (dtype == 1) {
4425             /* Single to double */
4426             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4427             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4428             write_fp_dreg(s, rd, tcg_rd);
4429             tcg_temp_free_i64(tcg_rd);
4430         } else {
4431             /* Single to half */
4432             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4433             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4434             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4435             write_fp_sreg(s, rd, tcg_rd);
4436             tcg_temp_free_i32(tcg_rd);
4437         }
4438         tcg_temp_free_i32(tcg_rn);
4439         break;
4440     }
4441     case 0x1:
4442     {
4443         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4444         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4445         if (dtype == 0) {
4446             /* Double to single */
4447             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4448         } else {
4449             /* Double to half */
4450             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4451             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4452         }
4453         write_fp_sreg(s, rd, tcg_rd);
4454         tcg_temp_free_i32(tcg_rd);
4455         tcg_temp_free_i64(tcg_rn);
4456         break;
4457     }
4458     case 0x3:
4459     {
4460         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4461         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4462         if (dtype == 0) {
4463             /* Half to single */
4464             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4465             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4466             write_fp_sreg(s, rd, tcg_rd);
4467             tcg_temp_free_i32(tcg_rd);
4468         } else {
4469             /* Half to double */
4470             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4471             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4472             write_fp_dreg(s, rd, tcg_rd);
4473             tcg_temp_free_i64(tcg_rd);
4474         }
4475         tcg_temp_free_i32(tcg_rn);
4476         break;
4477     }
4478     default:
4479         abort();
4480     }
4481 }
4482
4483 /* C3.6.25 Floating point data-processing (1 source)
4484  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4485  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4486  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4487  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4488  */
4489 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4490 {
4491     int type = extract32(insn, 22, 2);
4492     int opcode = extract32(insn, 15, 6);
4493     int rn = extract32(insn, 5, 5);
4494     int rd = extract32(insn, 0, 5);
4495
4496     switch (opcode) {
4497     case 0x4: case 0x5: case 0x7:
4498     {
4499         /* FCVT between half, single and double precision */
4500         int dtype = extract32(opcode, 0, 2);
4501         if (type == 2 || dtype == type) {
4502             unallocated_encoding(s);
4503             return;
4504         }
4505         if (!fp_access_check(s)) {
4506             return;
4507         }
4508
4509         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4510         break;
4511     }
4512     case 0x0 ... 0x3:
4513     case 0x8 ... 0xc:
4514     case 0xe ... 0xf:
4515         /* 32-to-32 and 64-to-64 ops */
4516         switch (type) {
4517         case 0:
4518             if (!fp_access_check(s)) {
4519                 return;
4520             }
4521
4522             handle_fp_1src_single(s, opcode, rd, rn);
4523             break;
4524         case 1:
4525             if (!fp_access_check(s)) {
4526                 return;
4527             }
4528
4529             handle_fp_1src_double(s, opcode, rd, rn);
4530             break;
4531         default:
4532             unallocated_encoding(s);
4533         }
4534         break;
4535     default:
4536         unallocated_encoding(s);
4537         break;
4538     }
4539 }
4540
4541 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4542 static void handle_fp_2src_single(DisasContext *s, int opcode,
4543                                   int rd, int rn, int rm)
4544 {
4545     TCGv_i32 tcg_op1;
4546     TCGv_i32 tcg_op2;
4547     TCGv_i32 tcg_res;
4548     TCGv_ptr fpst;
4549
4550     tcg_res = tcg_temp_new_i32();
4551     fpst = get_fpstatus_ptr();
4552     tcg_op1 = read_fp_sreg(s, rn);
4553     tcg_op2 = read_fp_sreg(s, rm);
4554
4555     switch (opcode) {
4556     case 0x0: /* FMUL */
4557         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4558         break;
4559     case 0x1: /* FDIV */
4560         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4561         break;
4562     case 0x2: /* FADD */
4563         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4564         break;
4565     case 0x3: /* FSUB */
4566         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4567         break;
4568     case 0x4: /* FMAX */
4569         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4570         break;
4571     case 0x5: /* FMIN */
4572         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4573         break;
4574     case 0x6: /* FMAXNM */
4575         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4576         break;
4577     case 0x7: /* FMINNM */
4578         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4579         break;
4580     case 0x8: /* FNMUL */
4581         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4582         gen_helper_vfp_negs(tcg_res, tcg_res);
4583         break;
4584     }
4585
4586     write_fp_sreg(s, rd, tcg_res);
4587
4588     tcg_temp_free_ptr(fpst);
4589     tcg_temp_free_i32(tcg_op1);
4590     tcg_temp_free_i32(tcg_op2);
4591     tcg_temp_free_i32(tcg_res);
4592 }
4593
4594 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4595 static void handle_fp_2src_double(DisasContext *s, int opcode,
4596                                   int rd, int rn, int rm)
4597 {
4598     TCGv_i64 tcg_op1;
4599     TCGv_i64 tcg_op2;
4600     TCGv_i64 tcg_res;
4601     TCGv_ptr fpst;
4602
4603     tcg_res = tcg_temp_new_i64();
4604     fpst = get_fpstatus_ptr();
4605     tcg_op1 = read_fp_dreg(s, rn);
4606     tcg_op2 = read_fp_dreg(s, rm);
4607
4608     switch (opcode) {
4609     case 0x0: /* FMUL */
4610         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4611         break;
4612     case 0x1: /* FDIV */
4613         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4614         break;
4615     case 0x2: /* FADD */
4616         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4617         break;
4618     case 0x3: /* FSUB */
4619         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4620         break;
4621     case 0x4: /* FMAX */
4622         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4623         break;
4624     case 0x5: /* FMIN */
4625         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4626         break;
4627     case 0x6: /* FMAXNM */
4628         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4629         break;
4630     case 0x7: /* FMINNM */
4631         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4632         break;
4633     case 0x8: /* FNMUL */
4634         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4635         gen_helper_vfp_negd(tcg_res, tcg_res);
4636         break;
4637     }
4638
4639     write_fp_dreg(s, rd, tcg_res);
4640
4641     tcg_temp_free_ptr(fpst);
4642     tcg_temp_free_i64(tcg_op1);
4643     tcg_temp_free_i64(tcg_op2);
4644     tcg_temp_free_i64(tcg_res);
4645 }
4646
4647 /* C3.6.26 Floating point data-processing (2 source)
4648  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4649  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4650  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4651  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4652  */
4653 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4654 {
4655     int type = extract32(insn, 22, 2);
4656     int rd = extract32(insn, 0, 5);
4657     int rn = extract32(insn, 5, 5);
4658     int rm = extract32(insn, 16, 5);
4659     int opcode = extract32(insn, 12, 4);
4660
4661     if (opcode > 8) {
4662         unallocated_encoding(s);
4663         return;
4664     }
4665
4666     switch (type) {
4667     case 0:
4668         if (!fp_access_check(s)) {
4669             return;
4670         }
4671         handle_fp_2src_single(s, opcode, rd, rn, rm);
4672         break;
4673     case 1:
4674         if (!fp_access_check(s)) {
4675             return;
4676         }
4677         handle_fp_2src_double(s, opcode, rd, rn, rm);
4678         break;
4679     default:
4680         unallocated_encoding(s);
4681     }
4682 }
4683
4684 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4685 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4686                                   int rd, int rn, int rm, int ra)
4687 {
4688     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4689     TCGv_i32 tcg_res = tcg_temp_new_i32();
4690     TCGv_ptr fpst = get_fpstatus_ptr();
4691
4692     tcg_op1 = read_fp_sreg(s, rn);
4693     tcg_op2 = read_fp_sreg(s, rm);
4694     tcg_op3 = read_fp_sreg(s, ra);
4695
4696     /* These are fused multiply-add, and must be done as one
4697      * floating point operation with no rounding between the
4698      * multiplication and addition steps.
4699      * NB that doing the negations here as separate steps is
4700      * correct : an input NaN should come out with its sign bit
4701      * flipped if it is a negated-input.
4702      */
4703     if (o1 == true) {
4704         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4705     }
4706
4707     if (o0 != o1) {
4708         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4709     }
4710
4711     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4712
4713     write_fp_sreg(s, rd, tcg_res);
4714
4715     tcg_temp_free_ptr(fpst);
4716     tcg_temp_free_i32(tcg_op1);
4717     tcg_temp_free_i32(tcg_op2);
4718     tcg_temp_free_i32(tcg_op3);
4719     tcg_temp_free_i32(tcg_res);
4720 }
4721
4722 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4723 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4724                                   int rd, int rn, int rm, int ra)
4725 {
4726     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4727     TCGv_i64 tcg_res = tcg_temp_new_i64();
4728     TCGv_ptr fpst = get_fpstatus_ptr();
4729
4730     tcg_op1 = read_fp_dreg(s, rn);
4731     tcg_op2 = read_fp_dreg(s, rm);
4732     tcg_op3 = read_fp_dreg(s, ra);
4733
4734     /* These are fused multiply-add, and must be done as one
4735      * floating point operation with no rounding between the
4736      * multiplication and addition steps.
4737      * NB that doing the negations here as separate steps is
4738      * correct : an input NaN should come out with its sign bit
4739      * flipped if it is a negated-input.
4740      */
4741     if (o1 == true) {
4742         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4743     }
4744
4745     if (o0 != o1) {
4746         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4747     }
4748
4749     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4750
4751     write_fp_dreg(s, rd, tcg_res);
4752
4753     tcg_temp_free_ptr(fpst);
4754     tcg_temp_free_i64(tcg_op1);
4755     tcg_temp_free_i64(tcg_op2);
4756     tcg_temp_free_i64(tcg_op3);
4757     tcg_temp_free_i64(tcg_res);
4758 }
4759
4760 /* C3.6.27 Floating point data-processing (3 source)
4761  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4762  * +---+---+---+-----------+------+----+------+----+------+------+------+
4763  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4764  * +---+---+---+-----------+------+----+------+----+------+------+------+
4765  */
4766 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4767 {
4768     int type = extract32(insn, 22, 2);
4769     int rd = extract32(insn, 0, 5);
4770     int rn = extract32(insn, 5, 5);
4771     int ra = extract32(insn, 10, 5);
4772     int rm = extract32(insn, 16, 5);
4773     bool o0 = extract32(insn, 15, 1);
4774     bool o1 = extract32(insn, 21, 1);
4775
4776     switch (type) {
4777     case 0:
4778         if (!fp_access_check(s)) {
4779             return;
4780         }
4781         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4782         break;
4783     case 1:
4784         if (!fp_access_check(s)) {
4785             return;
4786         }
4787         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4788         break;
4789     default:
4790         unallocated_encoding(s);
4791     }
4792 }
4793
4794 /* C3.6.28 Floating point immediate
4795  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4796  * +---+---+---+-----------+------+---+------------+-------+------+------+
4797  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4798  * +---+---+---+-----------+------+---+------------+-------+------+------+
4799  */
4800 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4801 {
4802     int rd = extract32(insn, 0, 5);
4803     int imm8 = extract32(insn, 13, 8);
4804     int is_double = extract32(insn, 22, 2);
4805     uint64_t imm;
4806     TCGv_i64 tcg_res;
4807
4808     if (is_double > 1) {
4809         unallocated_encoding(s);
4810         return;
4811     }
4812
4813     if (!fp_access_check(s)) {
4814         return;
4815     }
4816
4817     /* The imm8 encodes the sign bit, enough bits to represent
4818      * an exponent in the range 01....1xx to 10....0xx,
4819      * and the most significant 4 bits of the mantissa; see
4820      * VFPExpandImm() in the v8 ARM ARM.
4821      */
4822     if (is_double) {
4823         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4824             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4825             extract32(imm8, 0, 6);
4826         imm <<= 48;
4827     } else {
4828         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4829             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4830             (extract32(imm8, 0, 6) << 3);
4831         imm <<= 16;
4832     }
4833
4834     tcg_res = tcg_const_i64(imm);
4835     write_fp_dreg(s, rd, tcg_res);
4836     tcg_temp_free_i64(tcg_res);
4837 }
4838
4839 /* Handle floating point <=> fixed point conversions. Note that we can
4840  * also deal with fp <=> integer conversions as a special case (scale == 64)
4841  * OPTME: consider handling that special case specially or at least skipping
4842  * the call to scalbn in the helpers for zero shifts.
4843  */
4844 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4845                            bool itof, int rmode, int scale, int sf, int type)
4846 {
4847     bool is_signed = !(opcode & 1);
4848     bool is_double = type;
4849     TCGv_ptr tcg_fpstatus;
4850     TCGv_i32 tcg_shift;
4851
4852     tcg_fpstatus = get_fpstatus_ptr();
4853
4854     tcg_shift = tcg_const_i32(64 - scale);
4855
4856     if (itof) {
4857         TCGv_i64 tcg_int = cpu_reg(s, rn);
4858         if (!sf) {
4859             TCGv_i64 tcg_extend = new_tmp_a64(s);
4860
4861             if (is_signed) {
4862                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4863             } else {
4864                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4865             }
4866
4867             tcg_int = tcg_extend;
4868         }
4869
4870         if (is_double) {
4871             TCGv_i64 tcg_double = tcg_temp_new_i64();
4872             if (is_signed) {
4873                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4874                                      tcg_shift, tcg_fpstatus);
4875             } else {
4876                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4877                                      tcg_shift, tcg_fpstatus);
4878             }
4879             write_fp_dreg(s, rd, tcg_double);
4880             tcg_temp_free_i64(tcg_double);
4881         } else {
4882             TCGv_i32 tcg_single = tcg_temp_new_i32();
4883             if (is_signed) {
4884                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4885                                      tcg_shift, tcg_fpstatus);
4886             } else {
4887                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4888                                      tcg_shift, tcg_fpstatus);
4889             }
4890             write_fp_sreg(s, rd, tcg_single);
4891             tcg_temp_free_i32(tcg_single);
4892         }
4893     } else {
4894         TCGv_i64 tcg_int = cpu_reg(s, rd);
4895         TCGv_i32 tcg_rmode;
4896
4897         if (extract32(opcode, 2, 1)) {
4898             /* There are too many rounding modes to all fit into rmode,
4899              * so FCVTA[US] is a special case.
4900              */
4901             rmode = FPROUNDING_TIEAWAY;
4902         }
4903
4904         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4905
4906         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4907
4908         if (is_double) {
4909             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4910             if (is_signed) {
4911                 if (!sf) {
4912                     gen_helper_vfp_tosld(tcg_int, tcg_double,
4913                                          tcg_shift, tcg_fpstatus);
4914                 } else {
4915                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
4916                                          tcg_shift, tcg_fpstatus);
4917                 }
4918             } else {
4919                 if (!sf) {
4920                     gen_helper_vfp_tould(tcg_int, tcg_double,
4921                                          tcg_shift, tcg_fpstatus);
4922                 } else {
4923                     gen_helper_vfp_touqd(tcg_int, tcg_double,
4924                                          tcg_shift, tcg_fpstatus);
4925                 }
4926             }
4927             tcg_temp_free_i64(tcg_double);
4928         } else {
4929             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4930             if (sf) {
4931                 if (is_signed) {
4932                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
4933                                          tcg_shift, tcg_fpstatus);
4934                 } else {
4935                     gen_helper_vfp_touqs(tcg_int, tcg_single,
4936                                          tcg_shift, tcg_fpstatus);
4937                 }
4938             } else {
4939                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4940                 if (is_signed) {
4941                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
4942                                          tcg_shift, tcg_fpstatus);
4943                 } else {
4944                     gen_helper_vfp_touls(tcg_dest, tcg_single,
4945                                          tcg_shift, tcg_fpstatus);
4946                 }
4947                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4948                 tcg_temp_free_i32(tcg_dest);
4949             }
4950             tcg_temp_free_i32(tcg_single);
4951         }
4952
4953         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4954         tcg_temp_free_i32(tcg_rmode);
4955
4956         if (!sf) {
4957             tcg_gen_ext32u_i64(tcg_int, tcg_int);
4958         }
4959     }
4960
4961     tcg_temp_free_ptr(tcg_fpstatus);
4962     tcg_temp_free_i32(tcg_shift);
4963 }
4964
4965 /* C3.6.29 Floating point <-> fixed point conversions
4966  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4967  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4968  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4969  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4970  */
4971 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4972 {
4973     int rd = extract32(insn, 0, 5);
4974     int rn = extract32(insn, 5, 5);
4975     int scale = extract32(insn, 10, 6);
4976     int opcode = extract32(insn, 16, 3);
4977     int rmode = extract32(insn, 19, 2);
4978     int type = extract32(insn, 22, 2);
4979     bool sbit = extract32(insn, 29, 1);
4980     bool sf = extract32(insn, 31, 1);
4981     bool itof;
4982
4983     if (sbit || (type > 1)
4984         || (!sf && scale < 32)) {
4985         unallocated_encoding(s);
4986         return;
4987     }
4988
4989     switch ((rmode << 3) | opcode) {
4990     case 0x2: /* SCVTF */
4991     case 0x3: /* UCVTF */
4992         itof = true;
4993         break;
4994     case 0x18: /* FCVTZS */
4995     case 0x19: /* FCVTZU */
4996         itof = false;
4997         break;
4998     default:
4999         unallocated_encoding(s);
5000         return;
5001     }
5002
5003     if (!fp_access_check(s)) {
5004         return;
5005     }
5006
5007     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5008 }
5009
5010 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5011 {
5012     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5013      * without conversion.
5014      */
5015
5016     if (itof) {
5017         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5018
5019         switch (type) {
5020         case 0:
5021         {
5022             /* 32 bit */
5023             TCGv_i64 tmp = tcg_temp_new_i64();
5024             tcg_gen_ext32u_i64(tmp, tcg_rn);
5025             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5026             tcg_gen_movi_i64(tmp, 0);
5027             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5028             tcg_temp_free_i64(tmp);
5029             break;
5030         }
5031         case 1:
5032         {
5033             /* 64 bit */
5034             TCGv_i64 tmp = tcg_const_i64(0);
5035             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5036             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5037             tcg_temp_free_i64(tmp);
5038             break;
5039         }
5040         case 2:
5041             /* 64 bit to top half. */
5042             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5043             break;
5044         }
5045     } else {
5046         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5047
5048         switch (type) {
5049         case 0:
5050             /* 32 bit */
5051             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5052             break;
5053         case 1:
5054             /* 64 bit */
5055             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5056             break;
5057         case 2:
5058             /* 64 bits from top half */
5059             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5060             break;
5061         }
5062     }
5063 }
5064
5065 /* C3.6.30 Floating point <-> integer conversions
5066  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5067  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5068  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5069  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5070  */
5071 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5072 {
5073     int rd = extract32(insn, 0, 5);
5074     int rn = extract32(insn, 5, 5);
5075     int opcode = extract32(insn, 16, 3);
5076     int rmode = extract32(insn, 19, 2);
5077     int type = extract32(insn, 22, 2);
5078     bool sbit = extract32(insn, 29, 1);
5079     bool sf = extract32(insn, 31, 1);
5080
5081     if (sbit) {
5082         unallocated_encoding(s);
5083         return;
5084     }
5085
5086     if (opcode > 5) {
5087         /* FMOV */
5088         bool itof = opcode & 1;
5089
5090         if (rmode >= 2) {
5091             unallocated_encoding(s);
5092             return;
5093         }
5094
5095         switch (sf << 3 | type << 1 | rmode) {
5096         case 0x0: /* 32 bit */
5097         case 0xa: /* 64 bit */
5098         case 0xd: /* 64 bit to top half of quad */
5099             break;
5100         default:
5101             /* all other sf/type/rmode combinations are invalid */
5102             unallocated_encoding(s);
5103             break;
5104         }
5105
5106         if (!fp_access_check(s)) {
5107             return;
5108         }
5109         handle_fmov(s, rd, rn, type, itof);
5110     } else {
5111         /* actual FP conversions */
5112         bool itof = extract32(opcode, 1, 1);
5113
5114         if (type > 1 || (rmode != 0 && opcode > 1)) {
5115             unallocated_encoding(s);
5116             return;
5117         }
5118
5119         if (!fp_access_check(s)) {
5120             return;
5121         }
5122         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5123     }
5124 }
5125
5126 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5127  *   31  30  29 28     25 24                          0
5128  * +---+---+---+---------+-----------------------------+
5129  * |   | 0 |   | 1 1 1 1 |                             |
5130  * +---+---+---+---------+-----------------------------+
5131  */
5132 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5133 {
5134     if (extract32(insn, 24, 1)) {
5135         /* Floating point data-processing (3 source) */
5136         disas_fp_3src(s, insn);
5137     } else if (extract32(insn, 21, 1) == 0) {
5138         /* Floating point to fixed point conversions */
5139         disas_fp_fixed_conv(s, insn);
5140     } else {
5141         switch (extract32(insn, 10, 2)) {
5142         case 1:
5143             /* Floating point conditional compare */
5144             disas_fp_ccomp(s, insn);
5145             break;
5146         case 2:
5147             /* Floating point data-processing (2 source) */
5148             disas_fp_2src(s, insn);
5149             break;
5150         case 3:
5151             /* Floating point conditional select */
5152             disas_fp_csel(s, insn);
5153             break;
5154         case 0:
5155             switch (ctz32(extract32(insn, 12, 4))) {
5156             case 0: /* [15:12] == xxx1 */
5157                 /* Floating point immediate */
5158                 disas_fp_imm(s, insn);
5159                 break;
5160             case 1: /* [15:12] == xx10 */
5161                 /* Floating point compare */
5162                 disas_fp_compare(s, insn);
5163                 break;
5164             case 2: /* [15:12] == x100 */
5165                 /* Floating point data-processing (1 source) */
5166                 disas_fp_1src(s, insn);
5167                 break;
5168             case 3: /* [15:12] == 1000 */
5169                 unallocated_encoding(s);
5170                 break;
5171             default: /* [15:12] == 0000 */
5172                 /* Floating point <-> integer conversions */
5173                 disas_fp_int_conv(s, insn);
5174                 break;
5175             }
5176             break;
5177         }
5178     }
5179 }
5180
5181 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5182                      int pos)
5183 {
5184     /* Extract 64 bits from the middle of two concatenated 64 bit
5185      * vector register slices left:right. The extracted bits start
5186      * at 'pos' bits into the right (least significant) side.
5187      * We return the result in tcg_right, and guarantee not to
5188      * trash tcg_left.
5189      */
5190     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5191     assert(pos > 0 && pos < 64);
5192
5193     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5194     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5195     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5196
5197     tcg_temp_free_i64(tcg_tmp);
5198 }
5199
5200 /* C3.6.1 EXT
5201  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5202  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5203  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5204  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5205  */
5206 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5207 {
5208     int is_q = extract32(insn, 30, 1);
5209     int op2 = extract32(insn, 22, 2);
5210     int imm4 = extract32(insn, 11, 4);
5211     int rm = extract32(insn, 16, 5);
5212     int rn = extract32(insn, 5, 5);
5213     int rd = extract32(insn, 0, 5);
5214     int pos = imm4 << 3;
5215     TCGv_i64 tcg_resl, tcg_resh;
5216
5217     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5218         unallocated_encoding(s);
5219         return;
5220     }
5221
5222     if (!fp_access_check(s)) {
5223         return;
5224     }
5225
5226     tcg_resh = tcg_temp_new_i64();
5227     tcg_resl = tcg_temp_new_i64();
5228
5229     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5230      * either extracting 128 bits from a 128:128 concatenation, or
5231      * extracting 64 bits from a 64:64 concatenation.
5232      */
5233     if (!is_q) {
5234         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5235         if (pos != 0) {
5236             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5237             do_ext64(s, tcg_resh, tcg_resl, pos);
5238         }
5239         tcg_gen_movi_i64(tcg_resh, 0);
5240     } else {
5241         TCGv_i64 tcg_hh;
5242         typedef struct {
5243             int reg;
5244             int elt;
5245         } EltPosns;
5246         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5247         EltPosns *elt = eltposns;
5248
5249         if (pos >= 64) {
5250             elt++;
5251             pos -= 64;
5252         }
5253
5254         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5255         elt++;
5256         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5257         elt++;
5258         if (pos != 0) {
5259             do_ext64(s, tcg_resh, tcg_resl, pos);
5260             tcg_hh = tcg_temp_new_i64();
5261             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5262             do_ext64(s, tcg_hh, tcg_resh, pos);
5263             tcg_temp_free_i64(tcg_hh);
5264         }
5265     }
5266
5267     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5268     tcg_temp_free_i64(tcg_resl);
5269     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5270     tcg_temp_free_i64(tcg_resh);
5271 }
5272
5273 /* C3.6.2 TBL/TBX
5274  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5275  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5276  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5277  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5278  */
5279 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5280 {
5281     int op2 = extract32(insn, 22, 2);
5282     int is_q = extract32(insn, 30, 1);
5283     int rm = extract32(insn, 16, 5);
5284     int rn = extract32(insn, 5, 5);
5285     int rd = extract32(insn, 0, 5);
5286     int is_tblx = extract32(insn, 12, 1);
5287     int len = extract32(insn, 13, 2);
5288     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5289     TCGv_i32 tcg_regno, tcg_numregs;
5290
5291     if (op2 != 0) {
5292         unallocated_encoding(s);
5293         return;
5294     }
5295
5296     if (!fp_access_check(s)) {
5297         return;
5298     }
5299
5300     /* This does a table lookup: for every byte element in the input
5301      * we index into a table formed from up to four vector registers,
5302      * and then the output is the result of the lookups. Our helper
5303      * function does the lookup operation for a single 64 bit part of
5304      * the input.
5305      */
5306     tcg_resl = tcg_temp_new_i64();
5307     tcg_resh = tcg_temp_new_i64();
5308
5309     if (is_tblx) {
5310         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5311     } else {
5312         tcg_gen_movi_i64(tcg_resl, 0);
5313     }
5314     if (is_tblx && is_q) {
5315         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5316     } else {
5317         tcg_gen_movi_i64(tcg_resh, 0);
5318     }
5319
5320     tcg_idx = tcg_temp_new_i64();
5321     tcg_regno = tcg_const_i32(rn);
5322     tcg_numregs = tcg_const_i32(len + 1);
5323     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5324     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5325                         tcg_regno, tcg_numregs);
5326     if (is_q) {
5327         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5328         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5329                             tcg_regno, tcg_numregs);
5330     }
5331     tcg_temp_free_i64(tcg_idx);
5332     tcg_temp_free_i32(tcg_regno);
5333     tcg_temp_free_i32(tcg_numregs);
5334
5335     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5336     tcg_temp_free_i64(tcg_resl);
5337     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5338     tcg_temp_free_i64(tcg_resh);
5339 }
5340
5341 /* C3.6.3 ZIP/UZP/TRN
5342  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5343  * +---+---+-------------+------+---+------+---+------------------+------+
5344  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5345  * +---+---+-------------+------+---+------+---+------------------+------+
5346  */
5347 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5348 {
5349     int rd = extract32(insn, 0, 5);
5350     int rn = extract32(insn, 5, 5);
5351     int rm = extract32(insn, 16, 5);
5352     int size = extract32(insn, 22, 2);
5353     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5354      * bit 2 indicates 1 vs 2 variant of the insn.
5355      */
5356     int opcode = extract32(insn, 12, 2);
5357     bool part = extract32(insn, 14, 1);
5358     bool is_q = extract32(insn, 30, 1);
5359     int esize = 8 << size;
5360     int i, ofs;
5361     int datasize = is_q ? 128 : 64;
5362     int elements = datasize / esize;
5363     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5364
5365     if (opcode == 0 || (size == 3 && !is_q)) {
5366         unallocated_encoding(s);
5367         return;
5368     }
5369
5370     if (!fp_access_check(s)) {
5371         return;
5372     }
5373
5374     tcg_resl = tcg_const_i64(0);
5375     tcg_resh = tcg_const_i64(0);
5376     tcg_res = tcg_temp_new_i64();
5377
5378     for (i = 0; i < elements; i++) {
5379         switch (opcode) {
5380         case 1: /* UZP1/2 */
5381         {
5382             int midpoint = elements / 2;
5383             if (i < midpoint) {
5384                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5385             } else {
5386                 read_vec_element(s, tcg_res, rm,
5387                                  2 * (i - midpoint) + part, size);
5388             }
5389             break;
5390         }
5391         case 2: /* TRN1/2 */
5392             if (i & 1) {
5393                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5394             } else {
5395                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5396             }
5397             break;
5398         case 3: /* ZIP1/2 */
5399         {
5400             int base = part * elements / 2;
5401             if (i & 1) {
5402                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5403             } else {
5404                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5405             }
5406             break;
5407         }
5408         default:
5409             g_assert_not_reached();
5410         }
5411
5412         ofs = i * esize;
5413         if (ofs < 64) {
5414             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5415             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5416         } else {
5417             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5418             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5419         }
5420     }
5421
5422     tcg_temp_free_i64(tcg_res);
5423
5424     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5425     tcg_temp_free_i64(tcg_resl);
5426     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5427     tcg_temp_free_i64(tcg_resh);
5428 }
5429
5430 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5431                         int opc, bool is_min, TCGv_ptr fpst)
5432 {
5433     /* Helper function for disas_simd_across_lanes: do a single precision
5434      * min/max operation on the specified two inputs,
5435      * and return the result in tcg_elt1.
5436      */
5437     if (opc == 0xc) {
5438         if (is_min) {
5439             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5440         } else {
5441             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5442         }
5443     } else {
5444         assert(opc == 0xf);
5445         if (is_min) {
5446             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5447         } else {
5448             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5449         }
5450     }
5451 }
5452
5453 /* C3.6.4 AdvSIMD across lanes
5454  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5455  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5456  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5457  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5458  */
5459 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5460 {
5461     int rd = extract32(insn, 0, 5);
5462     int rn = extract32(insn, 5, 5);
5463     int size = extract32(insn, 22, 2);
5464     int opcode = extract32(insn, 12, 5);
5465     bool is_q = extract32(insn, 30, 1);
5466     bool is_u = extract32(insn, 29, 1);
5467     bool is_fp = false;
5468     bool is_min = false;
5469     int esize;
5470     int elements;
5471     int i;
5472     TCGv_i64 tcg_res, tcg_elt;
5473
5474     switch (opcode) {
5475     case 0x1b: /* ADDV */
5476         if (is_u) {
5477             unallocated_encoding(s);
5478             return;
5479         }
5480         /* fall through */
5481     case 0x3: /* SADDLV, UADDLV */
5482     case 0xa: /* SMAXV, UMAXV */
5483     case 0x1a: /* SMINV, UMINV */
5484         if (size == 3 || (size == 2 && !is_q)) {
5485             unallocated_encoding(s);
5486             return;
5487         }
5488         break;
5489     case 0xc: /* FMAXNMV, FMINNMV */
5490     case 0xf: /* FMAXV, FMINV */
5491         if (!is_u || !is_q || extract32(size, 0, 1)) {
5492             unallocated_encoding(s);
5493             return;
5494         }
5495         /* Bit 1 of size field encodes min vs max, and actual size is always
5496          * 32 bits: adjust the size variable so following code can rely on it
5497          */
5498         is_min = extract32(size, 1, 1);
5499         is_fp = true;
5500         size = 2;
5501         break;
5502     default:
5503         unallocated_encoding(s);
5504         return;
5505     }
5506
5507     if (!fp_access_check(s)) {
5508         return;
5509     }
5510
5511     esize = 8 << size;
5512     elements = (is_q ? 128 : 64) / esize;
5513
5514     tcg_res = tcg_temp_new_i64();
5515     tcg_elt = tcg_temp_new_i64();
5516
5517     /* These instructions operate across all lanes of a vector
5518      * to produce a single result. We can guarantee that a 64
5519      * bit intermediate is sufficient:
5520      *  + for [US]ADDLV the maximum element size is 32 bits, and
5521      *    the result type is 64 bits
5522      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5523      *    same as the element size, which is 32 bits at most
5524      * For the integer operations we can choose to work at 64
5525      * or 32 bits and truncate at the end; for simplicity
5526      * we use 64 bits always. The floating point
5527      * ops do require 32 bit intermediates, though.
5528      */
5529     if (!is_fp) {
5530         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5531
5532         for (i = 1; i < elements; i++) {
5533             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5534
5535             switch (opcode) {
5536             case 0x03: /* SADDLV / UADDLV */
5537             case 0x1b: /* ADDV */
5538                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5539                 break;
5540             case 0x0a: /* SMAXV / UMAXV */
5541                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5542                                     tcg_res,
5543                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5544                 break;
5545             case 0x1a: /* SMINV / UMINV */
5546                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5547                                     tcg_res,
5548                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5549                 break;
5550                 break;
5551             default:
5552                 g_assert_not_reached();
5553             }
5554
5555         }
5556     } else {
5557         /* Floating point ops which work on 32 bit (single) intermediates.
5558          * Note that correct NaN propagation requires that we do these
5559          * operations in exactly the order specified by the pseudocode.
5560          */
5561         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5562         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5563         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5564         TCGv_ptr fpst = get_fpstatus_ptr();
5565
5566         assert(esize == 32);
5567         assert(elements == 4);
5568
5569         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5570         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5571         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5572         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5573
5574         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5575
5576         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5577         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5578         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5579         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5580
5581         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5582
5583         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5584
5585         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5586         tcg_temp_free_i32(tcg_elt1);
5587         tcg_temp_free_i32(tcg_elt2);
5588         tcg_temp_free_i32(tcg_elt3);
5589         tcg_temp_free_ptr(fpst);
5590     }
5591
5592     tcg_temp_free_i64(tcg_elt);
5593
5594     /* Now truncate the result to the width required for the final output */
5595     if (opcode == 0x03) {
5596         /* SADDLV, UADDLV: result is 2*esize */
5597         size++;
5598     }
5599
5600     switch (size) {
5601     case 0:
5602         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5603         break;
5604     case 1:
5605         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5606         break;
5607     case 2:
5608         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5609         break;
5610     case 3:
5611         break;
5612     default:
5613         g_assert_not_reached();
5614     }
5615
5616     write_fp_dreg(s, rd, tcg_res);
5617     tcg_temp_free_i64(tcg_res);
5618 }
5619
5620 /* C6.3.31 DUP (Element, Vector)
5621  *
5622  *  31  30   29              21 20    16 15        10  9    5 4    0
5623  * +---+---+-------------------+--------+-------------+------+------+
5624  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5625  * +---+---+-------------------+--------+-------------+------+------+
5626  *
5627  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5628  */
5629 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5630                              int imm5)
5631 {
5632     int size = ctz32(imm5);
5633     int esize = 8 << size;
5634     int elements = (is_q ? 128 : 64) / esize;
5635     int index, i;
5636     TCGv_i64 tmp;
5637
5638     if (size > 3 || (size == 3 && !is_q)) {
5639         unallocated_encoding(s);
5640         return;
5641     }
5642
5643     if (!fp_access_check(s)) {
5644         return;
5645     }
5646
5647     index = imm5 >> (size + 1);
5648
5649     tmp = tcg_temp_new_i64();
5650     read_vec_element(s, tmp, rn, index, size);
5651
5652     for (i = 0; i < elements; i++) {
5653         write_vec_element(s, tmp, rd, i, size);
5654     }
5655
5656     if (!is_q) {
5657         clear_vec_high(s, rd);
5658     }
5659
5660     tcg_temp_free_i64(tmp);
5661 }
5662
5663 /* C6.3.31 DUP (element, scalar)
5664  *  31                   21 20    16 15        10  9    5 4    0
5665  * +-----------------------+--------+-------------+------+------+
5666  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5667  * +-----------------------+--------+-------------+------+------+
5668  */
5669 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5670                               int imm5)
5671 {
5672     int size = ctz32(imm5);
5673     int index;
5674     TCGv_i64 tmp;
5675
5676     if (size > 3) {
5677         unallocated_encoding(s);
5678         return;
5679     }
5680
5681     if (!fp_access_check(s)) {
5682         return;
5683     }
5684
5685     index = imm5 >> (size + 1);
5686
5687     /* This instruction just extracts the specified element and
5688      * zero-extends it into the bottom of the destination register.
5689      */
5690     tmp = tcg_temp_new_i64();
5691     read_vec_element(s, tmp, rn, index, size);
5692     write_fp_dreg(s, rd, tmp);
5693     tcg_temp_free_i64(tmp);
5694 }
5695
5696 /* C6.3.32 DUP (General)
5697  *
5698  *  31  30   29              21 20    16 15        10  9    5 4    0
5699  * +---+---+-------------------+--------+-------------+------+------+
5700  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5701  * +---+---+-------------------+--------+-------------+------+------+
5702  *
5703  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5704  */
5705 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5706                              int imm5)
5707 {
5708     int size = ctz32(imm5);
5709     int esize = 8 << size;
5710     int elements = (is_q ? 128 : 64)/esize;
5711     int i = 0;
5712
5713     if (size > 3 || ((size == 3) && !is_q)) {
5714         unallocated_encoding(s);
5715         return;
5716     }
5717
5718     if (!fp_access_check(s)) {
5719         return;
5720     }
5721
5722     for (i = 0; i < elements; i++) {
5723         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5724     }
5725     if (!is_q) {
5726         clear_vec_high(s, rd);
5727     }
5728 }
5729
5730 /* C6.3.150 INS (Element)
5731  *
5732  *  31                   21 20    16 15  14    11  10 9    5 4    0
5733  * +-----------------------+--------+------------+---+------+------+
5734  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5735  * +-----------------------+--------+------------+---+------+------+
5736  *
5737  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5738  * index: encoded in imm5<4:size+1>
5739  */
5740 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5741                              int imm4, int imm5)
5742 {
5743     int size = ctz32(imm5);
5744     int src_index, dst_index;
5745     TCGv_i64 tmp;
5746
5747     if (size > 3) {
5748         unallocated_encoding(s);
5749         return;
5750     }
5751
5752     if (!fp_access_check(s)) {
5753         return;
5754     }
5755
5756     dst_index = extract32(imm5, 1+size, 5);
5757     src_index = extract32(imm4, size, 4);
5758
5759     tmp = tcg_temp_new_i64();
5760
5761     read_vec_element(s, tmp, rn, src_index, size);
5762     write_vec_element(s, tmp, rd, dst_index, size);
5763
5764     tcg_temp_free_i64(tmp);
5765 }
5766
5767
5768 /* C6.3.151 INS (General)
5769  *
5770  *  31                   21 20    16 15        10  9    5 4    0
5771  * +-----------------------+--------+-------------+------+------+
5772  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5773  * +-----------------------+--------+-------------+------+------+
5774  *
5775  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5776  * index: encoded in imm5<4:size+1>
5777  */
5778 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5779 {
5780     int size = ctz32(imm5);
5781     int idx;
5782
5783     if (size > 3) {
5784         unallocated_encoding(s);
5785         return;
5786     }
5787
5788     if (!fp_access_check(s)) {
5789         return;
5790     }
5791
5792     idx = extract32(imm5, 1 + size, 4 - size);
5793     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5794 }
5795
5796 /*
5797  * C6.3.321 UMOV (General)
5798  * C6.3.237 SMOV (General)
5799  *
5800  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5801  * +---+---+-------------------+--------+-------------+------+------+
5802  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5803  * +---+---+-------------------+--------+-------------+------+------+
5804  *
5805  * U: unsigned when set
5806  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5807  */
5808 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5809                                   int rn, int rd, int imm5)
5810 {
5811     int size = ctz32(imm5);
5812     int element;
5813     TCGv_i64 tcg_rd;
5814
5815     /* Check for UnallocatedEncodings */
5816     if (is_signed) {
5817         if (size > 2 || (size == 2 && !is_q)) {
5818             unallocated_encoding(s);
5819             return;
5820         }
5821     } else {
5822         if (size > 3
5823             || (size < 3 && is_q)
5824             || (size == 3 && !is_q)) {
5825             unallocated_encoding(s);
5826             return;
5827         }
5828     }
5829
5830     if (!fp_access_check(s)) {
5831         return;
5832     }
5833
5834     element = extract32(imm5, 1+size, 4);
5835
5836     tcg_rd = cpu_reg(s, rd);
5837     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5838     if (is_signed && !is_q) {
5839         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5840     }
5841 }
5842
5843 /* C3.6.5 AdvSIMD copy
5844  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5845  * +---+---+----+-----------------+------+---+------+---+------+------+
5846  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5847  * +---+---+----+-----------------+------+---+------+---+------+------+
5848  */
5849 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5850 {
5851     int rd = extract32(insn, 0, 5);
5852     int rn = extract32(insn, 5, 5);
5853     int imm4 = extract32(insn, 11, 4);
5854     int op = extract32(insn, 29, 1);
5855     int is_q = extract32(insn, 30, 1);
5856     int imm5 = extract32(insn, 16, 5);
5857
5858     if (op) {
5859         if (is_q) {
5860             /* INS (element) */
5861             handle_simd_inse(s, rd, rn, imm4, imm5);
5862         } else {
5863             unallocated_encoding(s);
5864         }
5865     } else {
5866         switch (imm4) {
5867         case 0:
5868             /* DUP (element - vector) */
5869             handle_simd_dupe(s, is_q, rd, rn, imm5);
5870             break;
5871         case 1:
5872             /* DUP (general) */
5873             handle_simd_dupg(s, is_q, rd, rn, imm5);
5874             break;
5875         case 3:
5876             if (is_q) {
5877                 /* INS (general) */
5878                 handle_simd_insg(s, rd, rn, imm5);
5879             } else {
5880                 unallocated_encoding(s);
5881             }
5882             break;
5883         case 5:
5884         case 7:
5885             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5886             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5887             break;
5888         default:
5889             unallocated_encoding(s);
5890             break;
5891         }
5892     }
5893 }
5894
5895 /* C3.6.6 AdvSIMD modified immediate
5896  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5897  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5898  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5899  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5900  *
5901  * There are a number of operations that can be carried out here:
5902  *   MOVI - move (shifted) imm into register
5903  *   MVNI - move inverted (shifted) imm into register
5904  *   ORR  - bitwise OR of (shifted) imm with register
5905  *   BIC  - bitwise clear of (shifted) imm with register
5906  */
5907 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5908 {
5909     int rd = extract32(insn, 0, 5);
5910     int cmode = extract32(insn, 12, 4);
5911     int cmode_3_1 = extract32(cmode, 1, 3);
5912     int cmode_0 = extract32(cmode, 0, 1);
5913     int o2 = extract32(insn, 11, 1);
5914     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5915     bool is_neg = extract32(insn, 29, 1);
5916     bool is_q = extract32(insn, 30, 1);
5917     uint64_t imm = 0;
5918     TCGv_i64 tcg_rd, tcg_imm;
5919     int i;
5920
5921     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5922         unallocated_encoding(s);
5923         return;
5924     }
5925
5926     if (!fp_access_check(s)) {
5927         return;
5928     }
5929
5930     /* See AdvSIMDExpandImm() in ARM ARM */
5931     switch (cmode_3_1) {
5932     case 0: /* Replicate(Zeros(24):imm8, 2) */
5933     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5934     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5935     case 3: /* Replicate(imm8:Zeros(24), 2) */
5936     {
5937         int shift = cmode_3_1 * 8;
5938         imm = bitfield_replicate(abcdefgh << shift, 32);
5939         break;
5940     }
5941     case 4: /* Replicate(Zeros(8):imm8, 4) */
5942     case 5: /* Replicate(imm8:Zeros(8), 4) */
5943     {
5944         int shift = (cmode_3_1 & 0x1) * 8;
5945         imm = bitfield_replicate(abcdefgh << shift, 16);
5946         break;
5947     }
5948     case 6:
5949         if (cmode_0) {
5950             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5951             imm = (abcdefgh << 16) | 0xffff;
5952         } else {
5953             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5954             imm = (abcdefgh << 8) | 0xff;
5955         }
5956         imm = bitfield_replicate(imm, 32);
5957         break;
5958     case 7:
5959         if (!cmode_0 && !is_neg) {
5960             imm = bitfield_replicate(abcdefgh, 8);
5961         } else if (!cmode_0 && is_neg) {
5962             int i;
5963             imm = 0;
5964             for (i = 0; i < 8; i++) {
5965                 if ((abcdefgh) & (1 << i)) {
5966                     imm |= 0xffULL << (i * 8);
5967                 }
5968             }
5969         } else if (cmode_0) {
5970             if (is_neg) {
5971                 imm = (abcdefgh & 0x3f) << 48;
5972                 if (abcdefgh & 0x80) {
5973                     imm |= 0x8000000000000000ULL;
5974                 }
5975                 if (abcdefgh & 0x40) {
5976                     imm |= 0x3fc0000000000000ULL;
5977                 } else {
5978                     imm |= 0x4000000000000000ULL;
5979                 }
5980             } else {
5981                 imm = (abcdefgh & 0x3f) << 19;
5982                 if (abcdefgh & 0x80) {
5983                     imm |= 0x80000000;
5984                 }
5985                 if (abcdefgh & 0x40) {
5986                     imm |= 0x3e000000;
5987                 } else {
5988                     imm |= 0x40000000;
5989                 }
5990                 imm |= (imm << 32);
5991             }
5992         }
5993         break;
5994     }
5995
5996     if (cmode_3_1 != 7 && is_neg) {
5997         imm = ~imm;
5998     }
5999
6000     tcg_imm = tcg_const_i64(imm);
6001     tcg_rd = new_tmp_a64(s);
6002
6003     for (i = 0; i < 2; i++) {
6004         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6005
6006         if (i == 1 && !is_q) {
6007             /* non-quad ops clear high half of vector */
6008             tcg_gen_movi_i64(tcg_rd, 0);
6009         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6010             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6011             if (is_neg) {
6012                 /* AND (BIC) */
6013                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6014             } else {
6015                 /* ORR */
6016                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6017             }
6018         } else {
6019             /* MOVI */
6020             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6021         }
6022         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6023     }
6024
6025     tcg_temp_free_i64(tcg_imm);
6026 }
6027
6028 /* C3.6.7 AdvSIMD scalar copy
6029  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6030  * +-----+----+-----------------+------+---+------+---+------+------+
6031  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6032  * +-----+----+-----------------+------+---+------+---+------+------+
6033  */
6034 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6035 {
6036     int rd = extract32(insn, 0, 5);
6037     int rn = extract32(insn, 5, 5);
6038     int imm4 = extract32(insn, 11, 4);
6039     int imm5 = extract32(insn, 16, 5);
6040     int op = extract32(insn, 29, 1);
6041
6042     if (op != 0 || imm4 != 0) {
6043         unallocated_encoding(s);
6044         return;
6045     }
6046
6047     /* DUP (element, scalar) */
6048     handle_simd_dupes(s, rd, rn, imm5);
6049 }
6050
6051 /* C3.6.8 AdvSIMD scalar pairwise
6052  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6053  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6054  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6055  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6056  */
6057 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6058 {
6059     int u = extract32(insn, 29, 1);
6060     int size = extract32(insn, 22, 2);
6061     int opcode = extract32(insn, 12, 5);
6062     int rn = extract32(insn, 5, 5);
6063     int rd = extract32(insn, 0, 5);
6064     TCGv_ptr fpst;
6065
6066     /* For some ops (the FP ones), size[1] is part of the encoding.
6067      * For ADDP strictly it is not but size[1] is always 1 for valid
6068      * encodings.
6069      */
6070     opcode |= (extract32(size, 1, 1) << 5);
6071
6072     switch (opcode) {
6073     case 0x3b: /* ADDP */
6074         if (u || size != 3) {
6075             unallocated_encoding(s);
6076             return;
6077         }
6078         if (!fp_access_check(s)) {
6079             return;
6080         }
6081
6082         TCGV_UNUSED_PTR(fpst);
6083         break;
6084     case 0xc: /* FMAXNMP */
6085     case 0xd: /* FADDP */
6086     case 0xf: /* FMAXP */
6087     case 0x2c: /* FMINNMP */
6088     case 0x2f: /* FMINP */
6089         /* FP op, size[0] is 32 or 64 bit */
6090         if (!u) {
6091             unallocated_encoding(s);
6092             return;
6093         }
6094         if (!fp_access_check(s)) {
6095             return;
6096         }
6097
6098         size = extract32(size, 0, 1) ? 3 : 2;
6099         fpst = get_fpstatus_ptr();
6100         break;
6101     default:
6102         unallocated_encoding(s);
6103         return;
6104     }
6105
6106     if (size == 3) {
6107         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6108         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6109         TCGv_i64 tcg_res = tcg_temp_new_i64();
6110
6111         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6112         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6113
6114         switch (opcode) {
6115         case 0x3b: /* ADDP */
6116             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6117             break;
6118         case 0xc: /* FMAXNMP */
6119             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6120             break;
6121         case 0xd: /* FADDP */
6122             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6123             break;
6124         case 0xf: /* FMAXP */
6125             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6126             break;
6127         case 0x2c: /* FMINNMP */
6128             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6129             break;
6130         case 0x2f: /* FMINP */
6131             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6132             break;
6133         default:
6134             g_assert_not_reached();
6135         }
6136
6137         write_fp_dreg(s, rd, tcg_res);
6138
6139         tcg_temp_free_i64(tcg_op1);
6140         tcg_temp_free_i64(tcg_op2);
6141         tcg_temp_free_i64(tcg_res);
6142     } else {
6143         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6144         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6145         TCGv_i32 tcg_res = tcg_temp_new_i32();
6146
6147         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6148         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6149
6150         switch (opcode) {
6151         case 0xc: /* FMAXNMP */
6152             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6153             break;
6154         case 0xd: /* FADDP */
6155             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6156             break;
6157         case 0xf: /* FMAXP */
6158             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6159             break;
6160         case 0x2c: /* FMINNMP */
6161             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6162             break;
6163         case 0x2f: /* FMINP */
6164             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6165             break;
6166         default:
6167             g_assert_not_reached();
6168         }
6169
6170         write_fp_sreg(s, rd, tcg_res);
6171
6172         tcg_temp_free_i32(tcg_op1);
6173         tcg_temp_free_i32(tcg_op2);
6174         tcg_temp_free_i32(tcg_res);
6175     }
6176
6177     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6178         tcg_temp_free_ptr(fpst);
6179     }
6180 }
6181
6182 /*
6183  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6184  *
6185  * This code is handles the common shifting code and is used by both
6186  * the vector and scalar code.
6187  */
6188 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6189                                     TCGv_i64 tcg_rnd, bool accumulate,
6190                                     bool is_u, int size, int shift)
6191 {
6192     bool extended_result = false;
6193     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6194     int ext_lshift = 0;
6195     TCGv_i64 tcg_src_hi;
6196
6197     if (round && size == 3) {
6198         extended_result = true;
6199         ext_lshift = 64 - shift;
6200         tcg_src_hi = tcg_temp_new_i64();
6201     } else if (shift == 64) {
6202         if (!accumulate && is_u) {
6203             /* result is zero */
6204             tcg_gen_movi_i64(tcg_res, 0);
6205             return;
6206         }
6207     }
6208
6209     /* Deal with the rounding step */
6210     if (round) {
6211         if (extended_result) {
6212             TCGv_i64 tcg_zero = tcg_const_i64(0);
6213             if (!is_u) {
6214                 /* take care of sign extending tcg_res */
6215                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6216                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6217                                  tcg_src, tcg_src_hi,
6218                                  tcg_rnd, tcg_zero);
6219             } else {
6220                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6221                                  tcg_src, tcg_zero,
6222                                  tcg_rnd, tcg_zero);
6223             }
6224             tcg_temp_free_i64(tcg_zero);
6225         } else {
6226             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6227         }
6228     }
6229
6230     /* Now do the shift right */
6231     if (round && extended_result) {
6232         /* extended case, >64 bit precision required */
6233         if (ext_lshift == 0) {
6234             /* special case, only high bits matter */
6235             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6236         } else {
6237             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6238             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6239             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6240         }
6241     } else {
6242         if (is_u) {
6243             if (shift == 64) {
6244                 /* essentially shifting in 64 zeros */
6245                 tcg_gen_movi_i64(tcg_src, 0);
6246             } else {
6247                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6248             }
6249         } else {
6250             if (shift == 64) {
6251                 /* effectively extending the sign-bit */
6252                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6253             } else {
6254                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6255             }
6256         }
6257     }
6258
6259     if (accumulate) {
6260         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6261     } else {
6262         tcg_gen_mov_i64(tcg_res, tcg_src);
6263     }
6264
6265     if (extended_result) {
6266         tcg_temp_free_i64(tcg_src_hi);
6267     }
6268 }
6269
6270 /* Common SHL/SLI - Shift left with an optional insert */
6271 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6272                                  bool insert, int shift)
6273 {
6274     if (insert) { /* SLI */
6275         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6276     } else { /* SHL */
6277         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6278     }
6279 }
6280
6281 /* SRI: shift right with insert */
6282 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6283                                  int size, int shift)
6284 {
6285     int esize = 8 << size;
6286
6287     /* shift count same as element size is valid but does nothing;
6288      * special case to avoid potential shift by 64.
6289      */
6290     if (shift != esize) {
6291         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6292         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6293     }
6294 }
6295
6296 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6297 static void handle_scalar_simd_shri(DisasContext *s,
6298                                     bool is_u, int immh, int immb,
6299                                     int opcode, int rn, int rd)
6300 {
6301     const int size = 3;
6302     int immhb = immh << 3 | immb;
6303     int shift = 2 * (8 << size) - immhb;
6304     bool accumulate = false;
6305     bool round = false;
6306     bool insert = false;
6307     TCGv_i64 tcg_rn;
6308     TCGv_i64 tcg_rd;
6309     TCGv_i64 tcg_round;
6310
6311     if (!extract32(immh, 3, 1)) {
6312         unallocated_encoding(s);
6313         return;
6314     }
6315
6316     if (!fp_access_check(s)) {
6317         return;
6318     }
6319
6320     switch (opcode) {
6321     case 0x02: /* SSRA / USRA (accumulate) */
6322         accumulate = true;
6323         break;
6324     case 0x04: /* SRSHR / URSHR (rounding) */
6325         round = true;
6326         break;
6327     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6328         accumulate = round = true;
6329         break;
6330     case 0x08: /* SRI */
6331         insert = true;
6332         break;
6333     }
6334
6335     if (round) {
6336         uint64_t round_const = 1ULL << (shift - 1);
6337         tcg_round = tcg_const_i64(round_const);
6338     } else {
6339         TCGV_UNUSED_I64(tcg_round);
6340     }
6341
6342     tcg_rn = read_fp_dreg(s, rn);
6343     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6344
6345     if (insert) {
6346         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6347     } else {
6348         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6349                                 accumulate, is_u, size, shift);
6350     }
6351
6352     write_fp_dreg(s, rd, tcg_rd);
6353
6354     tcg_temp_free_i64(tcg_rn);
6355     tcg_temp_free_i64(tcg_rd);
6356     if (round) {
6357         tcg_temp_free_i64(tcg_round);
6358     }
6359 }
6360
6361 /* SHL/SLI - Scalar shift left */
6362 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6363                                     int immh, int immb, int opcode,
6364                                     int rn, int rd)
6365 {
6366     int size = 32 - clz32(immh) - 1;
6367     int immhb = immh << 3 | immb;
6368     int shift = immhb - (8 << size);
6369     TCGv_i64 tcg_rn = new_tmp_a64(s);
6370     TCGv_i64 tcg_rd = new_tmp_a64(s);
6371
6372     if (!extract32(immh, 3, 1)) {
6373         unallocated_encoding(s);
6374         return;
6375     }
6376
6377     if (!fp_access_check(s)) {
6378         return;
6379     }
6380
6381     tcg_rn = read_fp_dreg(s, rn);
6382     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6383
6384     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6385
6386     write_fp_dreg(s, rd, tcg_rd);
6387
6388     tcg_temp_free_i64(tcg_rn);
6389     tcg_temp_free_i64(tcg_rd);
6390 }
6391
6392 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6393  * (signed/unsigned) narrowing */
6394 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6395                                    bool is_u_shift, bool is_u_narrow,
6396                                    int immh, int immb, int opcode,
6397                                    int rn, int rd)
6398 {
6399     int immhb = immh << 3 | immb;
6400     int size = 32 - clz32(immh) - 1;
6401     int esize = 8 << size;
6402     int shift = (2 * esize) - immhb;
6403     int elements = is_scalar ? 1 : (64 / esize);
6404     bool round = extract32(opcode, 0, 1);
6405     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6406     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6407     TCGv_i32 tcg_rd_narrowed;
6408     TCGv_i64 tcg_final;
6409
6410     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6411         { gen_helper_neon_narrow_sat_s8,
6412           gen_helper_neon_unarrow_sat8 },
6413         { gen_helper_neon_narrow_sat_s16,
6414           gen_helper_neon_unarrow_sat16 },
6415         { gen_helper_neon_narrow_sat_s32,
6416           gen_helper_neon_unarrow_sat32 },
6417         { NULL, NULL },
6418     };
6419     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6420         gen_helper_neon_narrow_sat_u8,
6421         gen_helper_neon_narrow_sat_u16,
6422         gen_helper_neon_narrow_sat_u32,
6423         NULL
6424     };
6425     NeonGenNarrowEnvFn *narrowfn;
6426
6427     int i;
6428
6429     assert(size < 4);
6430
6431     if (extract32(immh, 3, 1)) {
6432         unallocated_encoding(s);
6433         return;
6434     }
6435
6436     if (!fp_access_check(s)) {
6437         return;
6438     }
6439
6440     if (is_u_shift) {
6441         narrowfn = unsigned_narrow_fns[size];
6442     } else {
6443         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6444     }
6445
6446     tcg_rn = tcg_temp_new_i64();
6447     tcg_rd = tcg_temp_new_i64();
6448     tcg_rd_narrowed = tcg_temp_new_i32();
6449     tcg_final = tcg_const_i64(0);
6450
6451     if (round) {
6452         uint64_t round_const = 1ULL << (shift - 1);
6453         tcg_round = tcg_const_i64(round_const);
6454     } else {
6455         TCGV_UNUSED_I64(tcg_round);
6456     }
6457
6458     for (i = 0; i < elements; i++) {
6459         read_vec_element(s, tcg_rn, rn, i, ldop);
6460         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6461                                 false, is_u_shift, size+1, shift);
6462         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6463         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6464         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6465     }
6466
6467     if (!is_q) {
6468         clear_vec_high(s, rd);
6469         write_vec_element(s, tcg_final, rd, 0, MO_64);
6470     } else {
6471         write_vec_element(s, tcg_final, rd, 1, MO_64);
6472     }
6473
6474     if (round) {
6475         tcg_temp_free_i64(tcg_round);
6476     }
6477     tcg_temp_free_i64(tcg_rn);
6478     tcg_temp_free_i64(tcg_rd);
6479     tcg_temp_free_i32(tcg_rd_narrowed);
6480     tcg_temp_free_i64(tcg_final);
6481     return;
6482 }
6483
6484 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6485 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6486                              bool src_unsigned, bool dst_unsigned,
6487                              int immh, int immb, int rn, int rd)
6488 {
6489     int immhb = immh << 3 | immb;
6490     int size = 32 - clz32(immh) - 1;
6491     int shift = immhb - (8 << size);
6492     int pass;
6493
6494     assert(immh != 0);
6495     assert(!(scalar && is_q));
6496
6497     if (!scalar) {
6498         if (!is_q && extract32(immh, 3, 1)) {
6499             unallocated_encoding(s);
6500             return;
6501         }
6502
6503         /* Since we use the variable-shift helpers we must
6504          * replicate the shift count into each element of
6505          * the tcg_shift value.
6506          */
6507         switch (size) {
6508         case 0:
6509             shift |= shift << 8;
6510             /* fall through */
6511         case 1:
6512             shift |= shift << 16;
6513             break;
6514         case 2:
6515         case 3:
6516             break;
6517         default:
6518             g_assert_not_reached();
6519         }
6520     }
6521
6522     if (!fp_access_check(s)) {
6523         return;
6524     }
6525
6526     if (size == 3) {
6527         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6528         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6529             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6530             { NULL, gen_helper_neon_qshl_u64 },
6531         };
6532         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6533         int maxpass = is_q ? 2 : 1;
6534
6535         for (pass = 0; pass < maxpass; pass++) {
6536             TCGv_i64 tcg_op = tcg_temp_new_i64();
6537
6538             read_vec_element(s, tcg_op, rn, pass, MO_64);
6539             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6540             write_vec_element(s, tcg_op, rd, pass, MO_64);
6541
6542             tcg_temp_free_i64(tcg_op);
6543         }
6544         tcg_temp_free_i64(tcg_shift);
6545
6546         if (!is_q) {
6547             clear_vec_high(s, rd);
6548         }
6549     } else {
6550         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6551         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6552             {
6553                 { gen_helper_neon_qshl_s8,
6554                   gen_helper_neon_qshl_s16,
6555                   gen_helper_neon_qshl_s32 },
6556                 { gen_helper_neon_qshlu_s8,
6557                   gen_helper_neon_qshlu_s16,
6558                   gen_helper_neon_qshlu_s32 }
6559             }, {
6560                 { NULL, NULL, NULL },
6561                 { gen_helper_neon_qshl_u8,
6562                   gen_helper_neon_qshl_u16,
6563                   gen_helper_neon_qshl_u32 }
6564             }
6565         };
6566         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6567         TCGMemOp memop = scalar ? size : MO_32;
6568         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6569
6570         for (pass = 0; pass < maxpass; pass++) {
6571             TCGv_i32 tcg_op = tcg_temp_new_i32();
6572
6573             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6574             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6575             if (scalar) {
6576                 switch (size) {
6577                 case 0:
6578                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6579                     break;
6580                 case 1:
6581                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6582                     break;
6583                 case 2:
6584                     break;
6585                 default:
6586                     g_assert_not_reached();
6587                 }
6588                 write_fp_sreg(s, rd, tcg_op);
6589             } else {
6590                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6591             }
6592
6593             tcg_temp_free_i32(tcg_op);
6594         }
6595         tcg_temp_free_i32(tcg_shift);
6596
6597         if (!is_q && !scalar) {
6598             clear_vec_high(s, rd);
6599         }
6600     }
6601 }
6602
6603 /* Common vector code for handling integer to FP conversion */
6604 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6605                                    int elements, int is_signed,
6606                                    int fracbits, int size)
6607 {
6608     bool is_double = size == 3 ? true : false;
6609     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6610     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6611     TCGv_i64 tcg_int = tcg_temp_new_i64();
6612     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6613     int pass;
6614
6615     for (pass = 0; pass < elements; pass++) {
6616         read_vec_element(s, tcg_int, rn, pass, mop);
6617
6618         if (is_double) {
6619             TCGv_i64 tcg_double = tcg_temp_new_i64();
6620             if (is_signed) {
6621                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6622                                      tcg_shift, tcg_fpst);
6623             } else {
6624                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6625                                      tcg_shift, tcg_fpst);
6626             }
6627             if (elements == 1) {
6628                 write_fp_dreg(s, rd, tcg_double);
6629             } else {
6630                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6631             }
6632             tcg_temp_free_i64(tcg_double);
6633         } else {
6634             TCGv_i32 tcg_single = tcg_temp_new_i32();
6635             if (is_signed) {
6636                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6637                                      tcg_shift, tcg_fpst);
6638             } else {
6639                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6640                                      tcg_shift, tcg_fpst);
6641             }
6642             if (elements == 1) {
6643                 write_fp_sreg(s, rd, tcg_single);
6644             } else {
6645                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6646             }
6647             tcg_temp_free_i32(tcg_single);
6648         }
6649     }
6650
6651     if (!is_double && elements == 2) {
6652         clear_vec_high(s, rd);
6653     }
6654
6655     tcg_temp_free_i64(tcg_int);
6656     tcg_temp_free_ptr(tcg_fpst);
6657     tcg_temp_free_i32(tcg_shift);
6658 }
6659
6660 /* UCVTF/SCVTF - Integer to FP conversion */
6661 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6662                                          bool is_q, bool is_u,
6663                                          int immh, int immb, int opcode,
6664                                          int rn, int rd)
6665 {
6666     bool is_double = extract32(immh, 3, 1);
6667     int size = is_double ? MO_64 : MO_32;
6668     int elements;
6669     int immhb = immh << 3 | immb;
6670     int fracbits = (is_double ? 128 : 64) - immhb;
6671
6672     if (!extract32(immh, 2, 2)) {
6673         unallocated_encoding(s);
6674         return;
6675     }
6676
6677     if (is_scalar) {
6678         elements = 1;
6679     } else {
6680         elements = is_double ? 2 : is_q ? 4 : 2;
6681         if (is_double && !is_q) {
6682             unallocated_encoding(s);
6683             return;
6684         }
6685     }
6686
6687     if (!fp_access_check(s)) {
6688         return;
6689     }
6690
6691     /* immh == 0 would be a failure of the decode logic */
6692     g_assert(immh);
6693
6694     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6695 }
6696
6697 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6698 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6699                                          bool is_q, bool is_u,
6700                                          int immh, int immb, int rn, int rd)
6701 {
6702     bool is_double = extract32(immh, 3, 1);
6703     int immhb = immh << 3 | immb;
6704     int fracbits = (is_double ? 128 : 64) - immhb;
6705     int pass;
6706     TCGv_ptr tcg_fpstatus;
6707     TCGv_i32 tcg_rmode, tcg_shift;
6708
6709     if (!extract32(immh, 2, 2)) {
6710         unallocated_encoding(s);
6711         return;
6712     }
6713
6714     if (!is_scalar && !is_q && is_double) {
6715         unallocated_encoding(s);
6716         return;
6717     }
6718
6719     if (!fp_access_check(s)) {
6720         return;
6721     }
6722
6723     assert(!(is_scalar && is_q));
6724
6725     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6726     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6727     tcg_fpstatus = get_fpstatus_ptr();
6728     tcg_shift = tcg_const_i32(fracbits);
6729
6730     if (is_double) {
6731         int maxpass = is_scalar ? 1 : 2;
6732
6733         for (pass = 0; pass < maxpass; pass++) {
6734             TCGv_i64 tcg_op = tcg_temp_new_i64();
6735
6736             read_vec_element(s, tcg_op, rn, pass, MO_64);
6737             if (is_u) {
6738                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6739             } else {
6740                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6741             }
6742             write_vec_element(s, tcg_op, rd, pass, MO_64);
6743             tcg_temp_free_i64(tcg_op);
6744         }
6745         if (!is_q) {
6746             clear_vec_high(s, rd);
6747         }
6748     } else {
6749         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6750         for (pass = 0; pass < maxpass; pass++) {
6751             TCGv_i32 tcg_op = tcg_temp_new_i32();
6752
6753             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6754             if (is_u) {
6755                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6756             } else {
6757                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6758             }
6759             if (is_scalar) {
6760                 write_fp_sreg(s, rd, tcg_op);
6761             } else {
6762                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6763             }
6764             tcg_temp_free_i32(tcg_op);
6765         }
6766         if (!is_q && !is_scalar) {
6767             clear_vec_high(s, rd);
6768         }
6769     }
6770
6771     tcg_temp_free_ptr(tcg_fpstatus);
6772     tcg_temp_free_i32(tcg_shift);
6773     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6774     tcg_temp_free_i32(tcg_rmode);
6775 }
6776
6777 /* C3.6.9 AdvSIMD scalar shift by immediate
6778  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6779  * +-----+---+-------------+------+------+--------+---+------+------+
6780  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6781  * +-----+---+-------------+------+------+--------+---+------+------+
6782  *
6783  * This is the scalar version so it works on a fixed sized registers
6784  */
6785 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6786 {
6787     int rd = extract32(insn, 0, 5);
6788     int rn = extract32(insn, 5, 5);
6789     int opcode = extract32(insn, 11, 5);
6790     int immb = extract32(insn, 16, 3);
6791     int immh = extract32(insn, 19, 4);
6792     bool is_u = extract32(insn, 29, 1);
6793
6794     if (immh == 0) {
6795         unallocated_encoding(s);
6796         return;
6797     }
6798
6799     switch (opcode) {
6800     case 0x08: /* SRI */
6801         if (!is_u) {
6802             unallocated_encoding(s);
6803             return;
6804         }
6805         /* fall through */
6806     case 0x00: /* SSHR / USHR */
6807     case 0x02: /* SSRA / USRA */
6808     case 0x04: /* SRSHR / URSHR */
6809     case 0x06: /* SRSRA / URSRA */
6810         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6811         break;
6812     case 0x0a: /* SHL / SLI */
6813         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6814         break;
6815     case 0x1c: /* SCVTF, UCVTF */
6816         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6817                                      opcode, rn, rd);
6818         break;
6819     case 0x10: /* SQSHRUN, SQSHRUN2 */
6820     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6821         if (!is_u) {
6822             unallocated_encoding(s);
6823             return;
6824         }
6825         handle_vec_simd_sqshrn(s, true, false, false, true,
6826                                immh, immb, opcode, rn, rd);
6827         break;
6828     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6829     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6830         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6831                                immh, immb, opcode, rn, rd);
6832         break;
6833     case 0xc: /* SQSHLU */
6834         if (!is_u) {
6835             unallocated_encoding(s);
6836             return;
6837         }
6838         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6839         break;
6840     case 0xe: /* SQSHL, UQSHL */
6841         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6842         break;
6843     case 0x1f: /* FCVTZS, FCVTZU */
6844         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6845         break;
6846     default:
6847         unallocated_encoding(s);
6848         break;
6849     }
6850 }
6851
6852 /* C3.6.10 AdvSIMD scalar three different
6853  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6854  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6855  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6856  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6857  */
6858 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6859 {
6860     bool is_u = extract32(insn, 29, 1);
6861     int size = extract32(insn, 22, 2);
6862     int opcode = extract32(insn, 12, 4);
6863     int rm = extract32(insn, 16, 5);
6864     int rn = extract32(insn, 5, 5);
6865     int rd = extract32(insn, 0, 5);
6866
6867     if (is_u) {
6868         unallocated_encoding(s);
6869         return;
6870     }
6871
6872     switch (opcode) {
6873     case 0x9: /* SQDMLAL, SQDMLAL2 */
6874     case 0xb: /* SQDMLSL, SQDMLSL2 */
6875     case 0xd: /* SQDMULL, SQDMULL2 */
6876         if (size == 0 || size == 3) {
6877             unallocated_encoding(s);
6878             return;
6879         }
6880         break;
6881     default:
6882         unallocated_encoding(s);
6883         return;
6884     }
6885
6886     if (!fp_access_check(s)) {
6887         return;
6888     }
6889
6890     if (size == 2) {
6891         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6892         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6893         TCGv_i64 tcg_res = tcg_temp_new_i64();
6894
6895         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6896         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6897
6898         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6899         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6900
6901         switch (opcode) {
6902         case 0xd: /* SQDMULL, SQDMULL2 */
6903             break;
6904         case 0xb: /* SQDMLSL, SQDMLSL2 */
6905             tcg_gen_neg_i64(tcg_res, tcg_res);
6906             /* fall through */
6907         case 0x9: /* SQDMLAL, SQDMLAL2 */
6908             read_vec_element(s, tcg_op1, rd, 0, MO_64);
6909             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6910                                               tcg_res, tcg_op1);
6911             break;
6912         default:
6913             g_assert_not_reached();
6914         }
6915
6916         write_fp_dreg(s, rd, tcg_res);
6917
6918         tcg_temp_free_i64(tcg_op1);
6919         tcg_temp_free_i64(tcg_op2);
6920         tcg_temp_free_i64(tcg_res);
6921     } else {
6922         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6923         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6924         TCGv_i64 tcg_res = tcg_temp_new_i64();
6925
6926         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6927         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6928
6929         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6930         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6931
6932         switch (opcode) {
6933         case 0xd: /* SQDMULL, SQDMULL2 */
6934             break;
6935         case 0xb: /* SQDMLSL, SQDMLSL2 */
6936             gen_helper_neon_negl_u32(tcg_res, tcg_res);
6937             /* fall through */
6938         case 0x9: /* SQDMLAL, SQDMLAL2 */
6939         {
6940             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6941             read_vec_element(s, tcg_op3, rd, 0, MO_32);
6942             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6943                                               tcg_res, tcg_op3);
6944             tcg_temp_free_i64(tcg_op3);
6945             break;
6946         }
6947         default:
6948             g_assert_not_reached();
6949         }
6950
6951         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6952         write_fp_dreg(s, rd, tcg_res);
6953
6954         tcg_temp_free_i32(tcg_op1);
6955         tcg_temp_free_i32(tcg_op2);
6956         tcg_temp_free_i64(tcg_res);
6957     }
6958 }
6959
6960 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6961                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6962 {
6963     /* Handle 64x64->64 opcodes which are shared between the scalar
6964      * and vector 3-same groups. We cover every opcode where size == 3
6965      * is valid in either the three-reg-same (integer, not pairwise)
6966      * or scalar-three-reg-same groups. (Some opcodes are not yet
6967      * implemented.)
6968      */
6969     TCGCond cond;
6970
6971     switch (opcode) {
6972     case 0x1: /* SQADD */
6973         if (u) {
6974             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6975         } else {
6976             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6977         }
6978         break;
6979     case 0x5: /* SQSUB */
6980         if (u) {
6981             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6982         } else {
6983             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6984         }
6985         break;
6986     case 0x6: /* CMGT, CMHI */
6987         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6988          * We implement this using setcond (test) and then negating.
6989          */
6990         cond = u ? TCG_COND_GTU : TCG_COND_GT;
6991     do_cmop:
6992         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
6993         tcg_gen_neg_i64(tcg_rd, tcg_rd);
6994         break;
6995     case 0x7: /* CMGE, CMHS */
6996         cond = u ? TCG_COND_GEU : TCG_COND_GE;
6997         goto do_cmop;
6998     case 0x11: /* CMTST, CMEQ */
6999         if (u) {
7000             cond = TCG_COND_EQ;
7001             goto do_cmop;
7002         }
7003         /* CMTST : test is "if (X & Y != 0)". */
7004         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7005         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7006         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7007         break;
7008     case 0x8: /* SSHL, USHL */
7009         if (u) {
7010             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7011         } else {
7012             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7013         }
7014         break;
7015     case 0x9: /* SQSHL, UQSHL */
7016         if (u) {
7017             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7018         } else {
7019             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7020         }
7021         break;
7022     case 0xa: /* SRSHL, URSHL */
7023         if (u) {
7024             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7025         } else {
7026             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7027         }
7028         break;
7029     case 0xb: /* SQRSHL, UQRSHL */
7030         if (u) {
7031             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7032         } else {
7033             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7034         }
7035         break;
7036     case 0x10: /* ADD, SUB */
7037         if (u) {
7038             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7039         } else {
7040             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7041         }
7042         break;
7043     default:
7044         g_assert_not_reached();
7045     }
7046 }
7047
7048 /* Handle the 3-same-operands float operations; shared by the scalar
7049  * and vector encodings. The caller must filter out any encodings
7050  * not allocated for the encoding it is dealing with.
7051  */
7052 static void handle_3same_float(DisasContext *s, int size, int elements,
7053                                int fpopcode, int rd, int rn, int rm)
7054 {
7055     int pass;
7056     TCGv_ptr fpst = get_fpstatus_ptr();
7057
7058     for (pass = 0; pass < elements; pass++) {
7059         if (size) {
7060             /* Double */
7061             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7062             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7063             TCGv_i64 tcg_res = tcg_temp_new_i64();
7064
7065             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7066             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7067
7068             switch (fpopcode) {
7069             case 0x39: /* FMLS */
7070                 /* As usual for ARM, separate negation for fused multiply-add */
7071                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7072                 /* fall through */
7073             case 0x19: /* FMLA */
7074                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7075                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7076                                        tcg_res, fpst);
7077                 break;
7078             case 0x18: /* FMAXNM */
7079                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7080                 break;
7081             case 0x1a: /* FADD */
7082                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7083                 break;
7084             case 0x1b: /* FMULX */
7085                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7086                 break;
7087             case 0x1c: /* FCMEQ */
7088                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7089                 break;
7090             case 0x1e: /* FMAX */
7091                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7092                 break;
7093             case 0x1f: /* FRECPS */
7094                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7095                 break;
7096             case 0x38: /* FMINNM */
7097                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7098                 break;
7099             case 0x3a: /* FSUB */
7100                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7101                 break;
7102             case 0x3e: /* FMIN */
7103                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7104                 break;
7105             case 0x3f: /* FRSQRTS */
7106                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7107                 break;
7108             case 0x5b: /* FMUL */
7109                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7110                 break;
7111             case 0x5c: /* FCMGE */
7112                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7113                 break;
7114             case 0x5d: /* FACGE */
7115                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7116                 break;
7117             case 0x5f: /* FDIV */
7118                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7119                 break;
7120             case 0x7a: /* FABD */
7121                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7122                 gen_helper_vfp_absd(tcg_res, tcg_res);
7123                 break;
7124             case 0x7c: /* FCMGT */
7125                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7126                 break;
7127             case 0x7d: /* FACGT */
7128                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7129                 break;
7130             default:
7131                 g_assert_not_reached();
7132             }
7133
7134             write_vec_element(s, tcg_res, rd, pass, MO_64);
7135
7136             tcg_temp_free_i64(tcg_res);
7137             tcg_temp_free_i64(tcg_op1);
7138             tcg_temp_free_i64(tcg_op2);
7139         } else {
7140             /* Single */
7141             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7142             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7143             TCGv_i32 tcg_res = tcg_temp_new_i32();
7144
7145             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7146             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7147
7148             switch (fpopcode) {
7149             case 0x39: /* FMLS */
7150                 /* As usual for ARM, separate negation for fused multiply-add */
7151                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7152                 /* fall through */
7153             case 0x19: /* FMLA */
7154                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7155                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7156                                        tcg_res, fpst);
7157                 break;
7158             case 0x1a: /* FADD */
7159                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7160                 break;
7161             case 0x1b: /* FMULX */
7162                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7163                 break;
7164             case 0x1c: /* FCMEQ */
7165                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7166                 break;
7167             case 0x1e: /* FMAX */
7168                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7169                 break;
7170             case 0x1f: /* FRECPS */
7171                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7172                 break;
7173             case 0x18: /* FMAXNM */
7174                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7175                 break;
7176             case 0x38: /* FMINNM */
7177                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7178                 break;
7179             case 0x3a: /* FSUB */
7180                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7181                 break;
7182             case 0x3e: /* FMIN */
7183                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7184                 break;
7185             case 0x3f: /* FRSQRTS */
7186                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7187                 break;
7188             case 0x5b: /* FMUL */
7189                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7190                 break;
7191             case 0x5c: /* FCMGE */
7192                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7193                 break;
7194             case 0x5d: /* FACGE */
7195                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7196                 break;
7197             case 0x5f: /* FDIV */
7198                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7199                 break;
7200             case 0x7a: /* FABD */
7201                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7202                 gen_helper_vfp_abss(tcg_res, tcg_res);
7203                 break;
7204             case 0x7c: /* FCMGT */
7205                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7206                 break;
7207             case 0x7d: /* FACGT */
7208                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7209                 break;
7210             default:
7211                 g_assert_not_reached();
7212             }
7213
7214             if (elements == 1) {
7215                 /* scalar single so clear high part */
7216                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7217
7218                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7219                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7220                 tcg_temp_free_i64(tcg_tmp);
7221             } else {
7222                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7223             }
7224
7225             tcg_temp_free_i32(tcg_res);
7226             tcg_temp_free_i32(tcg_op1);
7227             tcg_temp_free_i32(tcg_op2);
7228         }
7229     }
7230
7231     tcg_temp_free_ptr(fpst);
7232
7233     if ((elements << size) < 4) {
7234         /* scalar, or non-quad vector op */
7235         clear_vec_high(s, rd);
7236     }
7237 }
7238
7239 /* C3.6.11 AdvSIMD scalar three same
7240  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7241  * +-----+---+-----------+------+---+------+--------+---+------+------+
7242  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7243  * +-----+---+-----------+------+---+------+--------+---+------+------+
7244  */
7245 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7246 {
7247     int rd = extract32(insn, 0, 5);
7248     int rn = extract32(insn, 5, 5);
7249     int opcode = extract32(insn, 11, 5);
7250     int rm = extract32(insn, 16, 5);
7251     int size = extract32(insn, 22, 2);
7252     bool u = extract32(insn, 29, 1);
7253     TCGv_i64 tcg_rd;
7254
7255     if (opcode >= 0x18) {
7256         /* Floating point: U, size[1] and opcode indicate operation */
7257         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7258         switch (fpopcode) {
7259         case 0x1b: /* FMULX */
7260         case 0x1f: /* FRECPS */
7261         case 0x3f: /* FRSQRTS */
7262         case 0x5d: /* FACGE */
7263         case 0x7d: /* FACGT */
7264         case 0x1c: /* FCMEQ */
7265         case 0x5c: /* FCMGE */
7266         case 0x7c: /* FCMGT */
7267         case 0x7a: /* FABD */
7268             break;
7269         default:
7270             unallocated_encoding(s);
7271             return;
7272         }
7273
7274         if (!fp_access_check(s)) {
7275             return;
7276         }
7277
7278         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7279         return;
7280     }
7281
7282     switch (opcode) {
7283     case 0x1: /* SQADD, UQADD */
7284     case 0x5: /* SQSUB, UQSUB */
7285     case 0x9: /* SQSHL, UQSHL */
7286     case 0xb: /* SQRSHL, UQRSHL */
7287         break;
7288     case 0x8: /* SSHL, USHL */
7289     case 0xa: /* SRSHL, URSHL */
7290     case 0x6: /* CMGT, CMHI */
7291     case 0x7: /* CMGE, CMHS */
7292     case 0x11: /* CMTST, CMEQ */
7293     case 0x10: /* ADD, SUB (vector) */
7294         if (size != 3) {
7295             unallocated_encoding(s);
7296             return;
7297         }
7298         break;
7299     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7300         if (size != 1 && size != 2) {
7301             unallocated_encoding(s);
7302             return;
7303         }
7304         break;
7305     default:
7306         unallocated_encoding(s);
7307         return;
7308     }
7309
7310     if (!fp_access_check(s)) {
7311         return;
7312     }
7313
7314     tcg_rd = tcg_temp_new_i64();
7315
7316     if (size == 3) {
7317         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7318         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7319
7320         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7321         tcg_temp_free_i64(tcg_rn);
7322         tcg_temp_free_i64(tcg_rm);
7323     } else {
7324         /* Do a single operation on the lowest element in the vector.
7325          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7326          * no side effects for all these operations.
7327          * OPTME: special-purpose helpers would avoid doing some
7328          * unnecessary work in the helper for the 8 and 16 bit cases.
7329          */
7330         NeonGenTwoOpEnvFn *genenvfn;
7331         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7332         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7333         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7334
7335         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7336         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7337
7338         switch (opcode) {
7339         case 0x1: /* SQADD, UQADD */
7340         {
7341             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7342                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7343                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7344                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7345             };
7346             genenvfn = fns[size][u];
7347             break;
7348         }
7349         case 0x5: /* SQSUB, UQSUB */
7350         {
7351             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7352                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7353                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7354                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7355             };
7356             genenvfn = fns[size][u];
7357             break;
7358         }
7359         case 0x9: /* SQSHL, UQSHL */
7360         {
7361             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7362                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7363                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7364                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7365             };
7366             genenvfn = fns[size][u];
7367             break;
7368         }
7369         case 0xb: /* SQRSHL, UQRSHL */
7370         {
7371             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7372                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7373                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7374                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7375             };
7376             genenvfn = fns[size][u];
7377             break;
7378         }
7379         case 0x16: /* SQDMULH, SQRDMULH */
7380         {
7381             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7382                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7383                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7384             };
7385             assert(size == 1 || size == 2);
7386             genenvfn = fns[size - 1][u];
7387             break;
7388         }
7389         default:
7390             g_assert_not_reached();
7391         }
7392
7393         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7394         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7395         tcg_temp_free_i32(tcg_rd32);
7396         tcg_temp_free_i32(tcg_rn);
7397         tcg_temp_free_i32(tcg_rm);
7398     }
7399
7400     write_fp_dreg(s, rd, tcg_rd);
7401
7402     tcg_temp_free_i64(tcg_rd);
7403 }
7404
7405 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7406                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7407                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7408 {
7409     /* Handle 64->64 opcodes which are shared between the scalar and
7410      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7411      * is valid in either group and also the double-precision fp ops.
7412      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7413      * requires them.
7414      */
7415     TCGCond cond;
7416
7417     switch (opcode) {
7418     case 0x4: /* CLS, CLZ */
7419         if (u) {
7420             gen_helper_clz64(tcg_rd, tcg_rn);
7421         } else {
7422             gen_helper_cls64(tcg_rd, tcg_rn);
7423         }
7424         break;
7425     case 0x5: /* NOT */
7426         /* This opcode is shared with CNT and RBIT but we have earlier
7427          * enforced that size == 3 if and only if this is the NOT insn.
7428          */
7429         tcg_gen_not_i64(tcg_rd, tcg_rn);
7430         break;
7431     case 0x7: /* SQABS, SQNEG */
7432         if (u) {
7433             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7434         } else {
7435             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7436         }
7437         break;
7438     case 0xa: /* CMLT */
7439         /* 64 bit integer comparison against zero, result is
7440          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7441          * subtracting 1.
7442          */
7443         cond = TCG_COND_LT;
7444     do_cmop:
7445         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7446         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7447         break;
7448     case 0x8: /* CMGT, CMGE */
7449         cond = u ? TCG_COND_GE : TCG_COND_GT;
7450         goto do_cmop;
7451     case 0x9: /* CMEQ, CMLE */
7452         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7453         goto do_cmop;
7454     case 0xb: /* ABS, NEG */
7455         if (u) {
7456             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7457         } else {
7458             TCGv_i64 tcg_zero = tcg_const_i64(0);
7459             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7460             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7461                                 tcg_rn, tcg_rd);
7462             tcg_temp_free_i64(tcg_zero);
7463         }
7464         break;
7465     case 0x2f: /* FABS */
7466         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7467         break;
7468     case 0x6f: /* FNEG */
7469         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7470         break;
7471     case 0x7f: /* FSQRT */
7472         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7473         break;
7474     case 0x1a: /* FCVTNS */
7475     case 0x1b: /* FCVTMS */
7476     case 0x1c: /* FCVTAS */
7477     case 0x3a: /* FCVTPS */
7478     case 0x3b: /* FCVTZS */
7479     {
7480         TCGv_i32 tcg_shift = tcg_const_i32(0);
7481         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7482         tcg_temp_free_i32(tcg_shift);
7483         break;
7484     }
7485     case 0x5a: /* FCVTNU */
7486     case 0x5b: /* FCVTMU */
7487     case 0x5c: /* FCVTAU */
7488     case 0x7a: /* FCVTPU */
7489     case 0x7b: /* FCVTZU */
7490     {
7491         TCGv_i32 tcg_shift = tcg_const_i32(0);
7492         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7493         tcg_temp_free_i32(tcg_shift);
7494         break;
7495     }
7496     case 0x18: /* FRINTN */
7497     case 0x19: /* FRINTM */
7498     case 0x38: /* FRINTP */
7499     case 0x39: /* FRINTZ */
7500     case 0x58: /* FRINTA */
7501     case 0x79: /* FRINTI */
7502         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7503         break;
7504     case 0x59: /* FRINTX */
7505         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7506         break;
7507     default:
7508         g_assert_not_reached();
7509     }
7510 }
7511
7512 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7513                                    bool is_scalar, bool is_u, bool is_q,
7514                                    int size, int rn, int rd)
7515 {
7516     bool is_double = (size == 3);
7517     TCGv_ptr fpst;
7518
7519     if (!fp_access_check(s)) {
7520         return;
7521     }
7522
7523     fpst = get_fpstatus_ptr();
7524
7525     if (is_double) {
7526         TCGv_i64 tcg_op = tcg_temp_new_i64();
7527         TCGv_i64 tcg_zero = tcg_const_i64(0);
7528         TCGv_i64 tcg_res = tcg_temp_new_i64();
7529         NeonGenTwoDoubleOPFn *genfn;
7530         bool swap = false;
7531         int pass;
7532
7533         switch (opcode) {
7534         case 0x2e: /* FCMLT (zero) */
7535             swap = true;
7536             /* fallthrough */
7537         case 0x2c: /* FCMGT (zero) */
7538             genfn = gen_helper_neon_cgt_f64;
7539             break;
7540         case 0x2d: /* FCMEQ (zero) */
7541             genfn = gen_helper_neon_ceq_f64;
7542             break;
7543         case 0x6d: /* FCMLE (zero) */
7544             swap = true;
7545             /* fall through */
7546         case 0x6c: /* FCMGE (zero) */
7547             genfn = gen_helper_neon_cge_f64;
7548             break;
7549         default:
7550             g_assert_not_reached();
7551         }
7552
7553         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7554             read_vec_element(s, tcg_op, rn, pass, MO_64);
7555             if (swap) {
7556                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7557             } else {
7558                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7559             }
7560             write_vec_element(s, tcg_res, rd, pass, MO_64);
7561         }
7562         if (is_scalar) {
7563             clear_vec_high(s, rd);
7564         }
7565
7566         tcg_temp_free_i64(tcg_res);
7567         tcg_temp_free_i64(tcg_zero);
7568         tcg_temp_free_i64(tcg_op);
7569     } else {
7570         TCGv_i32 tcg_op = tcg_temp_new_i32();
7571         TCGv_i32 tcg_zero = tcg_const_i32(0);
7572         TCGv_i32 tcg_res = tcg_temp_new_i32();
7573         NeonGenTwoSingleOPFn *genfn;
7574         bool swap = false;
7575         int pass, maxpasses;
7576
7577         switch (opcode) {
7578         case 0x2e: /* FCMLT (zero) */
7579             swap = true;
7580             /* fall through */
7581         case 0x2c: /* FCMGT (zero) */
7582             genfn = gen_helper_neon_cgt_f32;
7583             break;
7584         case 0x2d: /* FCMEQ (zero) */
7585             genfn = gen_helper_neon_ceq_f32;
7586             break;
7587         case 0x6d: /* FCMLE (zero) */
7588             swap = true;
7589             /* fall through */
7590         case 0x6c: /* FCMGE (zero) */
7591             genfn = gen_helper_neon_cge_f32;
7592             break;
7593         default:
7594             g_assert_not_reached();
7595         }
7596
7597         if (is_scalar) {
7598             maxpasses = 1;
7599         } else {
7600             maxpasses = is_q ? 4 : 2;
7601         }
7602
7603         for (pass = 0; pass < maxpasses; pass++) {
7604             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7605             if (swap) {
7606                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7607             } else {
7608                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7609             }
7610             if (is_scalar) {
7611                 write_fp_sreg(s, rd, tcg_res);
7612             } else {
7613                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7614             }
7615         }
7616         tcg_temp_free_i32(tcg_res);
7617         tcg_temp_free_i32(tcg_zero);
7618         tcg_temp_free_i32(tcg_op);
7619         if (!is_q && !is_scalar) {
7620             clear_vec_high(s, rd);
7621         }
7622     }
7623
7624     tcg_temp_free_ptr(fpst);
7625 }
7626
7627 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7628                                     bool is_scalar, bool is_u, bool is_q,
7629                                     int size, int rn, int rd)
7630 {
7631     bool is_double = (size == 3);
7632     TCGv_ptr fpst = get_fpstatus_ptr();
7633
7634     if (is_double) {
7635         TCGv_i64 tcg_op = tcg_temp_new_i64();
7636         TCGv_i64 tcg_res = tcg_temp_new_i64();
7637         int pass;
7638
7639         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7640             read_vec_element(s, tcg_op, rn, pass, MO_64);
7641             switch (opcode) {
7642             case 0x3d: /* FRECPE */
7643                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7644                 break;
7645             case 0x3f: /* FRECPX */
7646                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7647                 break;
7648             case 0x7d: /* FRSQRTE */
7649                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7650                 break;
7651             default:
7652                 g_assert_not_reached();
7653             }
7654             write_vec_element(s, tcg_res, rd, pass, MO_64);
7655         }
7656         if (is_scalar) {
7657             clear_vec_high(s, rd);
7658         }
7659
7660         tcg_temp_free_i64(tcg_res);
7661         tcg_temp_free_i64(tcg_op);
7662     } else {
7663         TCGv_i32 tcg_op = tcg_temp_new_i32();
7664         TCGv_i32 tcg_res = tcg_temp_new_i32();
7665         int pass, maxpasses;
7666
7667         if (is_scalar) {
7668             maxpasses = 1;
7669         } else {
7670             maxpasses = is_q ? 4 : 2;
7671         }
7672
7673         for (pass = 0; pass < maxpasses; pass++) {
7674             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7675
7676             switch (opcode) {
7677             case 0x3c: /* URECPE */
7678                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7679                 break;
7680             case 0x3d: /* FRECPE */
7681                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7682                 break;
7683             case 0x3f: /* FRECPX */
7684                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7685                 break;
7686             case 0x7d: /* FRSQRTE */
7687                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7688                 break;
7689             default:
7690                 g_assert_not_reached();
7691             }
7692
7693             if (is_scalar) {
7694                 write_fp_sreg(s, rd, tcg_res);
7695             } else {
7696                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7697             }
7698         }
7699         tcg_temp_free_i32(tcg_res);
7700         tcg_temp_free_i32(tcg_op);
7701         if (!is_q && !is_scalar) {
7702             clear_vec_high(s, rd);
7703         }
7704     }
7705     tcg_temp_free_ptr(fpst);
7706 }
7707
7708 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7709                                 int opcode, bool u, bool is_q,
7710                                 int size, int rn, int rd)
7711 {
7712     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7713      * in the source becomes a size element in the destination).
7714      */
7715     int pass;
7716     TCGv_i32 tcg_res[2];
7717     int destelt = is_q ? 2 : 0;
7718     int passes = scalar ? 1 : 2;
7719
7720     if (scalar) {
7721         tcg_res[1] = tcg_const_i32(0);
7722     }
7723
7724     for (pass = 0; pass < passes; pass++) {
7725         TCGv_i64 tcg_op = tcg_temp_new_i64();
7726         NeonGenNarrowFn *genfn = NULL;
7727         NeonGenNarrowEnvFn *genenvfn = NULL;
7728
7729         if (scalar) {
7730             read_vec_element(s, tcg_op, rn, pass, size + 1);
7731         } else {
7732             read_vec_element(s, tcg_op, rn, pass, MO_64);
7733         }
7734         tcg_res[pass] = tcg_temp_new_i32();
7735
7736         switch (opcode) {
7737         case 0x12: /* XTN, SQXTUN */
7738         {
7739             static NeonGenNarrowFn * const xtnfns[3] = {
7740                 gen_helper_neon_narrow_u8,
7741                 gen_helper_neon_narrow_u16,
7742                 tcg_gen_extrl_i64_i32,
7743             };
7744             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7745                 gen_helper_neon_unarrow_sat8,
7746                 gen_helper_neon_unarrow_sat16,
7747                 gen_helper_neon_unarrow_sat32,
7748             };
7749             if (u) {
7750                 genenvfn = sqxtunfns[size];
7751             } else {
7752                 genfn = xtnfns[size];
7753             }
7754             break;
7755         }
7756         case 0x14: /* SQXTN, UQXTN */
7757         {
7758             static NeonGenNarrowEnvFn * const fns[3][2] = {
7759                 { gen_helper_neon_narrow_sat_s8,
7760                   gen_helper_neon_narrow_sat_u8 },
7761                 { gen_helper_neon_narrow_sat_s16,
7762                   gen_helper_neon_narrow_sat_u16 },
7763                 { gen_helper_neon_narrow_sat_s32,
7764                   gen_helper_neon_narrow_sat_u32 },
7765             };
7766             genenvfn = fns[size][u];
7767             break;
7768         }
7769         case 0x16: /* FCVTN, FCVTN2 */
7770             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7771             if (size == 2) {
7772                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7773             } else {
7774                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7775                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7776                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7777                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7778                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7779                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7780                 tcg_temp_free_i32(tcg_lo);
7781                 tcg_temp_free_i32(tcg_hi);
7782             }
7783             break;
7784         case 0x56:  /* FCVTXN, FCVTXN2 */
7785             /* 64 bit to 32 bit float conversion
7786              * with von Neumann rounding (round to odd)
7787              */
7788             assert(size == 2);
7789             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7790             break;
7791         default:
7792             g_assert_not_reached();
7793         }
7794
7795         if (genfn) {
7796             genfn(tcg_res[pass], tcg_op);
7797         } else if (genenvfn) {
7798             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7799         }
7800
7801         tcg_temp_free_i64(tcg_op);
7802     }
7803
7804     for (pass = 0; pass < 2; pass++) {
7805         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7806         tcg_temp_free_i32(tcg_res[pass]);
7807     }
7808     if (!is_q) {
7809         clear_vec_high(s, rd);
7810     }
7811 }
7812
7813 /* Remaining saturating accumulating ops */
7814 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7815                                 bool is_q, int size, int rn, int rd)
7816 {
7817     bool is_double = (size == 3);
7818
7819     if (is_double) {
7820         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7821         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7822         int pass;
7823
7824         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7825             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7826             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7827
7828             if (is_u) { /* USQADD */
7829                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7830             } else { /* SUQADD */
7831                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7832             }
7833             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7834         }
7835         if (is_scalar) {
7836             clear_vec_high(s, rd);
7837         }
7838
7839         tcg_temp_free_i64(tcg_rd);
7840         tcg_temp_free_i64(tcg_rn);
7841     } else {
7842         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7843         TCGv_i32 tcg_rd = tcg_temp_new_i32();
7844         int pass, maxpasses;
7845
7846         if (is_scalar) {
7847             maxpasses = 1;
7848         } else {
7849             maxpasses = is_q ? 4 : 2;
7850         }
7851
7852         for (pass = 0; pass < maxpasses; pass++) {
7853             if (is_scalar) {
7854                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7855                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7856             } else {
7857                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7858                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7859             }
7860
7861             if (is_u) { /* USQADD */
7862                 switch (size) {
7863                 case 0:
7864                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7865                     break;
7866                 case 1:
7867                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7868                     break;
7869                 case 2:
7870                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7871                     break;
7872                 default:
7873                     g_assert_not_reached();
7874                 }
7875             } else { /* SUQADD */
7876                 switch (size) {
7877                 case 0:
7878                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7879                     break;
7880                 case 1:
7881                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7882                     break;
7883                 case 2:
7884                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7885                     break;
7886                 default:
7887                     g_assert_not_reached();
7888                 }
7889             }
7890
7891             if (is_scalar) {
7892                 TCGv_i64 tcg_zero = tcg_const_i64(0);
7893                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7894                 tcg_temp_free_i64(tcg_zero);
7895             }
7896             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7897         }
7898
7899         if (!is_q) {
7900             clear_vec_high(s, rd);
7901         }
7902
7903         tcg_temp_free_i32(tcg_rd);
7904         tcg_temp_free_i32(tcg_rn);
7905     }
7906 }
7907
7908 /* C3.6.12 AdvSIMD scalar two reg misc
7909  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7910  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7911  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7912  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7913  */
7914 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7915 {
7916     int rd = extract32(insn, 0, 5);
7917     int rn = extract32(insn, 5, 5);
7918     int opcode = extract32(insn, 12, 5);
7919     int size = extract32(insn, 22, 2);
7920     bool u = extract32(insn, 29, 1);
7921     bool is_fcvt = false;
7922     int rmode;
7923     TCGv_i32 tcg_rmode;
7924     TCGv_ptr tcg_fpstatus;
7925
7926     switch (opcode) {
7927     case 0x3: /* USQADD / SUQADD*/
7928         if (!fp_access_check(s)) {
7929             return;
7930         }
7931         handle_2misc_satacc(s, true, u, false, size, rn, rd);
7932         return;
7933     case 0x7: /* SQABS / SQNEG */
7934         break;
7935     case 0xa: /* CMLT */
7936         if (u) {
7937             unallocated_encoding(s);
7938             return;
7939         }
7940         /* fall through */
7941     case 0x8: /* CMGT, CMGE */
7942     case 0x9: /* CMEQ, CMLE */
7943     case 0xb: /* ABS, NEG */
7944         if (size != 3) {
7945             unallocated_encoding(s);
7946             return;
7947         }
7948         break;
7949     case 0x12: /* SQXTUN */
7950         if (!u) {
7951             unallocated_encoding(s);
7952             return;
7953         }
7954         /* fall through */
7955     case 0x14: /* SQXTN, UQXTN */
7956         if (size == 3) {
7957             unallocated_encoding(s);
7958             return;
7959         }
7960         if (!fp_access_check(s)) {
7961             return;
7962         }
7963         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7964         return;
7965     case 0xc ... 0xf:
7966     case 0x16 ... 0x1d:
7967     case 0x1f:
7968         /* Floating point: U, size[1] and opcode indicate operation;
7969          * size[0] indicates single or double precision.
7970          */
7971         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7972         size = extract32(size, 0, 1) ? 3 : 2;
7973         switch (opcode) {
7974         case 0x2c: /* FCMGT (zero) */
7975         case 0x2d: /* FCMEQ (zero) */
7976         case 0x2e: /* FCMLT (zero) */
7977         case 0x6c: /* FCMGE (zero) */
7978         case 0x6d: /* FCMLE (zero) */
7979             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7980             return;
7981         case 0x1d: /* SCVTF */
7982         case 0x5d: /* UCVTF */
7983         {
7984             bool is_signed = (opcode == 0x1d);
7985             if (!fp_access_check(s)) {
7986                 return;
7987             }
7988             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7989             return;
7990         }
7991         case 0x3d: /* FRECPE */
7992         case 0x3f: /* FRECPX */
7993         case 0x7d: /* FRSQRTE */
7994             if (!fp_access_check(s)) {
7995                 return;
7996             }
7997             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
7998             return;
7999         case 0x1a: /* FCVTNS */
8000         case 0x1b: /* FCVTMS */
8001         case 0x3a: /* FCVTPS */
8002         case 0x3b: /* FCVTZS */
8003         case 0x5a: /* FCVTNU */
8004         case 0x5b: /* FCVTMU */
8005         case 0x7a: /* FCVTPU */
8006         case 0x7b: /* FCVTZU */
8007             is_fcvt = true;
8008             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8009             break;
8010         case 0x1c: /* FCVTAS */
8011         case 0x5c: /* FCVTAU */
8012             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8013             is_fcvt = true;
8014             rmode = FPROUNDING_TIEAWAY;
8015             break;
8016         case 0x56: /* FCVTXN, FCVTXN2 */
8017             if (size == 2) {
8018                 unallocated_encoding(s);
8019                 return;
8020             }
8021             if (!fp_access_check(s)) {
8022                 return;
8023             }
8024             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8025             return;
8026         default:
8027             unallocated_encoding(s);
8028             return;
8029         }
8030         break;
8031     default:
8032         unallocated_encoding(s);
8033         return;
8034     }
8035
8036     if (!fp_access_check(s)) {
8037         return;
8038     }
8039
8040     if (is_fcvt) {
8041         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8042         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8043         tcg_fpstatus = get_fpstatus_ptr();
8044     } else {
8045         TCGV_UNUSED_I32(tcg_rmode);
8046         TCGV_UNUSED_PTR(tcg_fpstatus);
8047     }
8048
8049     if (size == 3) {
8050         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8051         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8052
8053         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8054         write_fp_dreg(s, rd, tcg_rd);
8055         tcg_temp_free_i64(tcg_rd);
8056         tcg_temp_free_i64(tcg_rn);
8057     } else {
8058         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8059         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8060
8061         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8062
8063         switch (opcode) {
8064         case 0x7: /* SQABS, SQNEG */
8065         {
8066             NeonGenOneOpEnvFn *genfn;
8067             static NeonGenOneOpEnvFn * const fns[3][2] = {
8068                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8069                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8070                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8071             };
8072             genfn = fns[size][u];
8073             genfn(tcg_rd, cpu_env, tcg_rn);
8074             break;
8075         }
8076         case 0x1a: /* FCVTNS */
8077         case 0x1b: /* FCVTMS */
8078         case 0x1c: /* FCVTAS */
8079         case 0x3a: /* FCVTPS */
8080         case 0x3b: /* FCVTZS */
8081         {
8082             TCGv_i32 tcg_shift = tcg_const_i32(0);
8083             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8084             tcg_temp_free_i32(tcg_shift);
8085             break;
8086         }
8087         case 0x5a: /* FCVTNU */
8088         case 0x5b: /* FCVTMU */
8089         case 0x5c: /* FCVTAU */
8090         case 0x7a: /* FCVTPU */
8091         case 0x7b: /* FCVTZU */
8092         {
8093             TCGv_i32 tcg_shift = tcg_const_i32(0);
8094             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8095             tcg_temp_free_i32(tcg_shift);
8096             break;
8097         }
8098         default:
8099             g_assert_not_reached();
8100         }
8101
8102         write_fp_sreg(s, rd, tcg_rd);
8103         tcg_temp_free_i32(tcg_rd);
8104         tcg_temp_free_i32(tcg_rn);
8105     }
8106
8107     if (is_fcvt) {
8108         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8109         tcg_temp_free_i32(tcg_rmode);
8110         tcg_temp_free_ptr(tcg_fpstatus);
8111     }
8112 }
8113
8114 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8115 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8116                                  int immh, int immb, int opcode, int rn, int rd)
8117 {
8118     int size = 32 - clz32(immh) - 1;
8119     int immhb = immh << 3 | immb;
8120     int shift = 2 * (8 << size) - immhb;
8121     bool accumulate = false;
8122     bool round = false;
8123     bool insert = false;
8124     int dsize = is_q ? 128 : 64;
8125     int esize = 8 << size;
8126     int elements = dsize/esize;
8127     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8128     TCGv_i64 tcg_rn = new_tmp_a64(s);
8129     TCGv_i64 tcg_rd = new_tmp_a64(s);
8130     TCGv_i64 tcg_round;
8131     int i;
8132
8133     if (extract32(immh, 3, 1) && !is_q) {
8134         unallocated_encoding(s);
8135         return;
8136     }
8137
8138     if (size > 3 && !is_q) {
8139         unallocated_encoding(s);
8140         return;
8141     }
8142
8143     if (!fp_access_check(s)) {
8144         return;
8145     }
8146
8147     switch (opcode) {
8148     case 0x02: /* SSRA / USRA (accumulate) */
8149         accumulate = true;
8150         break;
8151     case 0x04: /* SRSHR / URSHR (rounding) */
8152         round = true;
8153         break;
8154     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8155         accumulate = round = true;
8156         break;
8157     case 0x08: /* SRI */
8158         insert = true;
8159         break;
8160     }
8161
8162     if (round) {
8163         uint64_t round_const = 1ULL << (shift - 1);
8164         tcg_round = tcg_const_i64(round_const);
8165     } else {
8166         TCGV_UNUSED_I64(tcg_round);
8167     }
8168
8169     for (i = 0; i < elements; i++) {
8170         read_vec_element(s, tcg_rn, rn, i, memop);
8171         if (accumulate || insert) {
8172             read_vec_element(s, tcg_rd, rd, i, memop);
8173         }
8174
8175         if (insert) {
8176             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8177         } else {
8178             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8179                                     accumulate, is_u, size, shift);
8180         }
8181
8182         write_vec_element(s, tcg_rd, rd, i, size);
8183     }
8184
8185     if (!is_q) {
8186         clear_vec_high(s, rd);
8187     }
8188
8189     if (round) {
8190         tcg_temp_free_i64(tcg_round);
8191     }
8192 }
8193
8194 /* SHL/SLI - Vector shift left */
8195 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8196                                 int immh, int immb, int opcode, int rn, int rd)
8197 {
8198     int size = 32 - clz32(immh) - 1;
8199     int immhb = immh << 3 | immb;
8200     int shift = immhb - (8 << size);
8201     int dsize = is_q ? 128 : 64;
8202     int esize = 8 << size;
8203     int elements = dsize/esize;
8204     TCGv_i64 tcg_rn = new_tmp_a64(s);
8205     TCGv_i64 tcg_rd = new_tmp_a64(s);
8206     int i;
8207
8208     if (extract32(immh, 3, 1) && !is_q) {
8209         unallocated_encoding(s);
8210         return;
8211     }
8212
8213     if (size > 3 && !is_q) {
8214         unallocated_encoding(s);
8215         return;
8216     }
8217
8218     if (!fp_access_check(s)) {
8219         return;
8220     }
8221
8222     for (i = 0; i < elements; i++) {
8223         read_vec_element(s, tcg_rn, rn, i, size);
8224         if (insert) {
8225             read_vec_element(s, tcg_rd, rd, i, size);
8226         }
8227
8228         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8229
8230         write_vec_element(s, tcg_rd, rd, i, size);
8231     }
8232
8233     if (!is_q) {
8234         clear_vec_high(s, rd);
8235     }
8236 }
8237
8238 /* USHLL/SHLL - Vector shift left with widening */
8239 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8240                                  int immh, int immb, int opcode, int rn, int rd)
8241 {
8242     int size = 32 - clz32(immh) - 1;
8243     int immhb = immh << 3 | immb;
8244     int shift = immhb - (8 << size);
8245     int dsize = 64;
8246     int esize = 8 << size;
8247     int elements = dsize/esize;
8248     TCGv_i64 tcg_rn = new_tmp_a64(s);
8249     TCGv_i64 tcg_rd = new_tmp_a64(s);
8250     int i;
8251
8252     if (size >= 3) {
8253         unallocated_encoding(s);
8254         return;
8255     }
8256
8257     if (!fp_access_check(s)) {
8258         return;
8259     }
8260
8261     /* For the LL variants the store is larger than the load,
8262      * so if rd == rn we would overwrite parts of our input.
8263      * So load everything right now and use shifts in the main loop.
8264      */
8265     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8266
8267     for (i = 0; i < elements; i++) {
8268         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8269         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8270         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8271         write_vec_element(s, tcg_rd, rd, i, size + 1);
8272     }
8273 }
8274
8275 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8276 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8277                                  int immh, int immb, int opcode, int rn, int rd)
8278 {
8279     int immhb = immh << 3 | immb;
8280     int size = 32 - clz32(immh) - 1;
8281     int dsize = 64;
8282     int esize = 8 << size;
8283     int elements = dsize/esize;
8284     int shift = (2 * esize) - immhb;
8285     bool round = extract32(opcode, 0, 1);
8286     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8287     TCGv_i64 tcg_round;
8288     int i;
8289
8290     if (extract32(immh, 3, 1)) {
8291         unallocated_encoding(s);
8292         return;
8293     }
8294
8295     if (!fp_access_check(s)) {
8296         return;
8297     }
8298
8299     tcg_rn = tcg_temp_new_i64();
8300     tcg_rd = tcg_temp_new_i64();
8301     tcg_final = tcg_temp_new_i64();
8302     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8303
8304     if (round) {
8305         uint64_t round_const = 1ULL << (shift - 1);
8306         tcg_round = tcg_const_i64(round_const);
8307     } else {
8308         TCGV_UNUSED_I64(tcg_round);
8309     }
8310
8311     for (i = 0; i < elements; i++) {
8312         read_vec_element(s, tcg_rn, rn, i, size+1);
8313         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8314                                 false, true, size+1, shift);
8315
8316         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8317     }
8318
8319     if (!is_q) {
8320         clear_vec_high(s, rd);
8321         write_vec_element(s, tcg_final, rd, 0, MO_64);
8322     } else {
8323         write_vec_element(s, tcg_final, rd, 1, MO_64);
8324     }
8325
8326     if (round) {
8327         tcg_temp_free_i64(tcg_round);
8328     }
8329     tcg_temp_free_i64(tcg_rn);
8330     tcg_temp_free_i64(tcg_rd);
8331     tcg_temp_free_i64(tcg_final);
8332     return;
8333 }
8334
8335
8336 /* C3.6.14 AdvSIMD shift by immediate
8337  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8338  * +---+---+---+-------------+------+------+--------+---+------+------+
8339  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8340  * +---+---+---+-------------+------+------+--------+---+------+------+
8341  */
8342 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8343 {
8344     int rd = extract32(insn, 0, 5);
8345     int rn = extract32(insn, 5, 5);
8346     int opcode = extract32(insn, 11, 5);
8347     int immb = extract32(insn, 16, 3);
8348     int immh = extract32(insn, 19, 4);
8349     bool is_u = extract32(insn, 29, 1);
8350     bool is_q = extract32(insn, 30, 1);
8351
8352     switch (opcode) {
8353     case 0x08: /* SRI */
8354         if (!is_u) {
8355             unallocated_encoding(s);
8356             return;
8357         }
8358         /* fall through */
8359     case 0x00: /* SSHR / USHR */
8360     case 0x02: /* SSRA / USRA (accumulate) */
8361     case 0x04: /* SRSHR / URSHR (rounding) */
8362     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8363         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8364         break;
8365     case 0x0a: /* SHL / SLI */
8366         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8367         break;
8368     case 0x10: /* SHRN */
8369     case 0x11: /* RSHRN / SQRSHRUN */
8370         if (is_u) {
8371             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8372                                    opcode, rn, rd);
8373         } else {
8374             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8375         }
8376         break;
8377     case 0x12: /* SQSHRN / UQSHRN */
8378     case 0x13: /* SQRSHRN / UQRSHRN */
8379         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8380                                opcode, rn, rd);
8381         break;
8382     case 0x14: /* SSHLL / USHLL */
8383         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8384         break;
8385     case 0x1c: /* SCVTF / UCVTF */
8386         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8387                                      opcode, rn, rd);
8388         break;
8389     case 0xc: /* SQSHLU */
8390         if (!is_u) {
8391             unallocated_encoding(s);
8392             return;
8393         }
8394         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8395         break;
8396     case 0xe: /* SQSHL, UQSHL */
8397         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8398         break;
8399     case 0x1f: /* FCVTZS/ FCVTZU */
8400         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8401         return;
8402     default:
8403         unallocated_encoding(s);
8404         return;
8405     }
8406 }
8407
8408 /* Generate code to do a "long" addition or subtraction, ie one done in
8409  * TCGv_i64 on vector lanes twice the width specified by size.
8410  */
8411 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8412                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8413 {
8414     static NeonGenTwo64OpFn * const fns[3][2] = {
8415         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8416         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8417         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8418     };
8419     NeonGenTwo64OpFn *genfn;
8420     assert(size < 3);
8421
8422     genfn = fns[size][is_sub];
8423     genfn(tcg_res, tcg_op1, tcg_op2);
8424 }
8425
8426 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8427                                 int opcode, int rd, int rn, int rm)
8428 {
8429     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8430     TCGv_i64 tcg_res[2];
8431     int pass, accop;
8432
8433     tcg_res[0] = tcg_temp_new_i64();
8434     tcg_res[1] = tcg_temp_new_i64();
8435
8436     /* Does this op do an adding accumulate, a subtracting accumulate,
8437      * or no accumulate at all?
8438      */
8439     switch (opcode) {
8440     case 5:
8441     case 8:
8442     case 9:
8443         accop = 1;
8444         break;
8445     case 10:
8446     case 11:
8447         accop = -1;
8448         break;
8449     default:
8450         accop = 0;
8451         break;
8452     }
8453
8454     if (accop != 0) {
8455         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8456         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8457     }
8458
8459     /* size == 2 means two 32x32->64 operations; this is worth special
8460      * casing because we can generally handle it inline.
8461      */
8462     if (size == 2) {
8463         for (pass = 0; pass < 2; pass++) {
8464             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8465             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8466             TCGv_i64 tcg_passres;
8467             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8468
8469             int elt = pass + is_q * 2;
8470
8471             read_vec_element(s, tcg_op1, rn, elt, memop);
8472             read_vec_element(s, tcg_op2, rm, elt, memop);
8473
8474             if (accop == 0) {
8475                 tcg_passres = tcg_res[pass];
8476             } else {
8477                 tcg_passres = tcg_temp_new_i64();
8478             }
8479
8480             switch (opcode) {
8481             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8482                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8483                 break;
8484             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8485                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8486                 break;
8487             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8488             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8489             {
8490                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8491                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8492
8493                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8494                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8495                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8496                                     tcg_passres,
8497                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8498                 tcg_temp_free_i64(tcg_tmp1);
8499                 tcg_temp_free_i64(tcg_tmp2);
8500                 break;
8501             }
8502             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8503             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8504             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8505                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8506                 break;
8507             case 9: /* SQDMLAL, SQDMLAL2 */
8508             case 11: /* SQDMLSL, SQDMLSL2 */
8509             case 13: /* SQDMULL, SQDMULL2 */
8510                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8511                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8512                                                   tcg_passres, tcg_passres);
8513                 break;
8514             default:
8515                 g_assert_not_reached();
8516             }
8517
8518             if (opcode == 9 || opcode == 11) {
8519                 /* saturating accumulate ops */
8520                 if (accop < 0) {
8521                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8522                 }
8523                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8524                                                   tcg_res[pass], tcg_passres);
8525             } else if (accop > 0) {
8526                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8527             } else if (accop < 0) {
8528                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8529             }
8530
8531             if (accop != 0) {
8532                 tcg_temp_free_i64(tcg_passres);
8533             }
8534
8535             tcg_temp_free_i64(tcg_op1);
8536             tcg_temp_free_i64(tcg_op2);
8537         }
8538     } else {
8539         /* size 0 or 1, generally helper functions */
8540         for (pass = 0; pass < 2; pass++) {
8541             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8542             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8543             TCGv_i64 tcg_passres;
8544             int elt = pass + is_q * 2;
8545
8546             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8547             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8548
8549             if (accop == 0) {
8550                 tcg_passres = tcg_res[pass];
8551             } else {
8552                 tcg_passres = tcg_temp_new_i64();
8553             }
8554
8555             switch (opcode) {
8556             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8557             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8558             {
8559                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8560                 static NeonGenWidenFn * const widenfns[2][2] = {
8561                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8562                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8563                 };
8564                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8565
8566                 widenfn(tcg_op2_64, tcg_op2);
8567                 widenfn(tcg_passres, tcg_op1);
8568                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8569                               tcg_passres, tcg_op2_64);
8570                 tcg_temp_free_i64(tcg_op2_64);
8571                 break;
8572             }
8573             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8574             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8575                 if (size == 0) {
8576                     if (is_u) {
8577                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8578                     } else {
8579                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8580                     }
8581                 } else {
8582                     if (is_u) {
8583                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8584                     } else {
8585                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8586                     }
8587                 }
8588                 break;
8589             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8590             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8591             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8592                 if (size == 0) {
8593                     if (is_u) {
8594                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8595                     } else {
8596                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8597                     }
8598                 } else {
8599                     if (is_u) {
8600                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8601                     } else {
8602                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8603                     }
8604                 }
8605                 break;
8606             case 9: /* SQDMLAL, SQDMLAL2 */
8607             case 11: /* SQDMLSL, SQDMLSL2 */
8608             case 13: /* SQDMULL, SQDMULL2 */
8609                 assert(size == 1);
8610                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8611                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8612                                                   tcg_passres, tcg_passres);
8613                 break;
8614             case 14: /* PMULL */
8615                 assert(size == 0);
8616                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8617                 break;
8618             default:
8619                 g_assert_not_reached();
8620             }
8621             tcg_temp_free_i32(tcg_op1);
8622             tcg_temp_free_i32(tcg_op2);
8623
8624             if (accop != 0) {
8625                 if (opcode == 9 || opcode == 11) {
8626                     /* saturating accumulate ops */
8627                     if (accop < 0) {
8628                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8629                     }
8630                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8631                                                       tcg_res[pass],
8632                                                       tcg_passres);
8633                 } else {
8634                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8635                                   tcg_res[pass], tcg_passres);
8636                 }
8637                 tcg_temp_free_i64(tcg_passres);
8638             }
8639         }
8640     }
8641
8642     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8643     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8644     tcg_temp_free_i64(tcg_res[0]);
8645     tcg_temp_free_i64(tcg_res[1]);
8646 }
8647
8648 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8649                             int opcode, int rd, int rn, int rm)
8650 {
8651     TCGv_i64 tcg_res[2];
8652     int part = is_q ? 2 : 0;
8653     int pass;
8654
8655     for (pass = 0; pass < 2; pass++) {
8656         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8657         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8658         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8659         static NeonGenWidenFn * const widenfns[3][2] = {
8660             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8661             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8662             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8663         };
8664         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8665
8666         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8667         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8668         widenfn(tcg_op2_wide, tcg_op2);
8669         tcg_temp_free_i32(tcg_op2);
8670         tcg_res[pass] = tcg_temp_new_i64();
8671         gen_neon_addl(size, (opcode == 3),
8672                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8673         tcg_temp_free_i64(tcg_op1);
8674         tcg_temp_free_i64(tcg_op2_wide);
8675     }
8676
8677     for (pass = 0; pass < 2; pass++) {
8678         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8679         tcg_temp_free_i64(tcg_res[pass]);
8680     }
8681 }
8682
8683 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8684 {
8685     tcg_gen_addi_i64(in, in, 1U << 31);
8686     tcg_gen_extrh_i64_i32(res, in);
8687 }
8688
8689 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8690                                  int opcode, int rd, int rn, int rm)
8691 {
8692     TCGv_i32 tcg_res[2];
8693     int part = is_q ? 2 : 0;
8694     int pass;
8695
8696     for (pass = 0; pass < 2; pass++) {
8697         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8698         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8699         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8700         static NeonGenNarrowFn * const narrowfns[3][2] = {
8701             { gen_helper_neon_narrow_high_u8,
8702               gen_helper_neon_narrow_round_high_u8 },
8703             { gen_helper_neon_narrow_high_u16,
8704               gen_helper_neon_narrow_round_high_u16 },
8705             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8706         };
8707         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8708
8709         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8710         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8711
8712         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8713
8714         tcg_temp_free_i64(tcg_op1);
8715         tcg_temp_free_i64(tcg_op2);
8716
8717         tcg_res[pass] = tcg_temp_new_i32();
8718         gennarrow(tcg_res[pass], tcg_wideres);
8719         tcg_temp_free_i64(tcg_wideres);
8720     }
8721
8722     for (pass = 0; pass < 2; pass++) {
8723         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8724         tcg_temp_free_i32(tcg_res[pass]);
8725     }
8726     if (!is_q) {
8727         clear_vec_high(s, rd);
8728     }
8729 }
8730
8731 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8732 {
8733     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8734      * is the only three-reg-diff instruction which produces a
8735      * 128-bit wide result from a single operation. However since
8736      * it's possible to calculate the two halves more or less
8737      * separately we just use two helper calls.
8738      */
8739     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8740     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8741     TCGv_i64 tcg_res = tcg_temp_new_i64();
8742
8743     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8744     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8745     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8746     write_vec_element(s, tcg_res, rd, 0, MO_64);
8747     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8748     write_vec_element(s, tcg_res, rd, 1, MO_64);
8749
8750     tcg_temp_free_i64(tcg_op1);
8751     tcg_temp_free_i64(tcg_op2);
8752     tcg_temp_free_i64(tcg_res);
8753 }
8754
8755 /* C3.6.15 AdvSIMD three different
8756  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8757  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8758  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8759  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8760  */
8761 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8762 {
8763     /* Instructions in this group fall into three basic classes
8764      * (in each case with the operation working on each element in
8765      * the input vectors):
8766      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8767      *     128 bit input)
8768      * (2) wide 64 x 128 -> 128
8769      * (3) narrowing 128 x 128 -> 64
8770      * Here we do initial decode, catch unallocated cases and
8771      * dispatch to separate functions for each class.
8772      */
8773     int is_q = extract32(insn, 30, 1);
8774     int is_u = extract32(insn, 29, 1);
8775     int size = extract32(insn, 22, 2);
8776     int opcode = extract32(insn, 12, 4);
8777     int rm = extract32(insn, 16, 5);
8778     int rn = extract32(insn, 5, 5);
8779     int rd = extract32(insn, 0, 5);
8780
8781     switch (opcode) {
8782     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8783     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8784         /* 64 x 128 -> 128 */
8785         if (size == 3) {
8786             unallocated_encoding(s);
8787             return;
8788         }
8789         if (!fp_access_check(s)) {
8790             return;
8791         }
8792         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8793         break;
8794     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8795     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8796         /* 128 x 128 -> 64 */
8797         if (size == 3) {
8798             unallocated_encoding(s);
8799             return;
8800         }
8801         if (!fp_access_check(s)) {
8802             return;
8803         }
8804         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8805         break;
8806     case 14: /* PMULL, PMULL2 */
8807         if (is_u || size == 1 || size == 2) {
8808             unallocated_encoding(s);
8809             return;
8810         }
8811         if (size == 3) {
8812             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8813                 unallocated_encoding(s);
8814                 return;
8815             }
8816             if (!fp_access_check(s)) {
8817                 return;
8818             }
8819             handle_pmull_64(s, is_q, rd, rn, rm);
8820             return;
8821         }
8822         goto is_widening;
8823     case 9: /* SQDMLAL, SQDMLAL2 */
8824     case 11: /* SQDMLSL, SQDMLSL2 */
8825     case 13: /* SQDMULL, SQDMULL2 */
8826         if (is_u || size == 0) {
8827             unallocated_encoding(s);
8828             return;
8829         }
8830         /* fall through */
8831     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8832     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8833     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8834     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8835     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8836     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8837     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8838         /* 64 x 64 -> 128 */
8839         if (size == 3) {
8840             unallocated_encoding(s);
8841             return;
8842         }
8843     is_widening:
8844         if (!fp_access_check(s)) {
8845             return;
8846         }
8847
8848         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8849         break;
8850     default:
8851         /* opcode 15 not allocated */
8852         unallocated_encoding(s);
8853         break;
8854     }
8855 }
8856
8857 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8858 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8859 {
8860     int rd = extract32(insn, 0, 5);
8861     int rn = extract32(insn, 5, 5);
8862     int rm = extract32(insn, 16, 5);
8863     int size = extract32(insn, 22, 2);
8864     bool is_u = extract32(insn, 29, 1);
8865     bool is_q = extract32(insn, 30, 1);
8866     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8867     int pass;
8868
8869     if (!fp_access_check(s)) {
8870         return;
8871     }
8872
8873     tcg_op1 = tcg_temp_new_i64();
8874     tcg_op2 = tcg_temp_new_i64();
8875     tcg_res[0] = tcg_temp_new_i64();
8876     tcg_res[1] = tcg_temp_new_i64();
8877
8878     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8879         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8880         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8881
8882         if (!is_u) {
8883             switch (size) {
8884             case 0: /* AND */
8885                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8886                 break;
8887             case 1: /* BIC */
8888                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8889                 break;
8890             case 2: /* ORR */
8891                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8892                 break;
8893             case 3: /* ORN */
8894                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8895                 break;
8896             }
8897         } else {
8898             if (size != 0) {
8899                 /* B* ops need res loaded to operate on */
8900                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8901             }
8902
8903             switch (size) {
8904             case 0: /* EOR */
8905                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8906                 break;
8907             case 1: /* BSL bitwise select */
8908                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8909                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8910                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8911                 break;
8912             case 2: /* BIT, bitwise insert if true */
8913                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8914                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8915                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8916                 break;
8917             case 3: /* BIF, bitwise insert if false */
8918                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8919                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8920                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8921                 break;
8922             }
8923         }
8924     }
8925
8926     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8927     if (!is_q) {
8928         tcg_gen_movi_i64(tcg_res[1], 0);
8929     }
8930     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8931
8932     tcg_temp_free_i64(tcg_op1);
8933     tcg_temp_free_i64(tcg_op2);
8934     tcg_temp_free_i64(tcg_res[0]);
8935     tcg_temp_free_i64(tcg_res[1]);
8936 }
8937
8938 /* Helper functions for 32 bit comparisons */
8939 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8940 {
8941     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8942 }
8943
8944 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8945 {
8946     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8947 }
8948
8949 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8950 {
8951     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8952 }
8953
8954 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8955 {
8956     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8957 }
8958
8959 /* Pairwise op subgroup of C3.6.16.
8960  *
8961  * This is called directly or via the handle_3same_float for float pairwise
8962  * operations where the opcode and size are calculated differently.
8963  */
8964 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8965                                    int size, int rn, int rm, int rd)
8966 {
8967     TCGv_ptr fpst;
8968     int pass;
8969
8970     /* Floating point operations need fpst */
8971     if (opcode >= 0x58) {
8972         fpst = get_fpstatus_ptr();
8973     } else {
8974         TCGV_UNUSED_PTR(fpst);
8975     }
8976
8977     if (!fp_access_check(s)) {
8978         return;
8979     }
8980
8981     /* These operations work on the concatenated rm:rn, with each pair of
8982      * adjacent elements being operated on to produce an element in the result.
8983      */
8984     if (size == 3) {
8985         TCGv_i64 tcg_res[2];
8986
8987         for (pass = 0; pass < 2; pass++) {
8988             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8989             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8990             int passreg = (pass == 0) ? rn : rm;
8991
8992             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
8993             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
8994             tcg_res[pass] = tcg_temp_new_i64();
8995
8996             switch (opcode) {
8997             case 0x17: /* ADDP */
8998                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
8999                 break;
9000             case 0x58: /* FMAXNMP */
9001                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9002                 break;
9003             case 0x5a: /* FADDP */
9004                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9005                 break;
9006             case 0x5e: /* FMAXP */
9007                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9008                 break;
9009             case 0x78: /* FMINNMP */
9010                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9011                 break;
9012             case 0x7e: /* FMINP */
9013                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9014                 break;
9015             default:
9016                 g_assert_not_reached();
9017             }
9018
9019             tcg_temp_free_i64(tcg_op1);
9020             tcg_temp_free_i64(tcg_op2);
9021         }
9022
9023         for (pass = 0; pass < 2; pass++) {
9024             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9025             tcg_temp_free_i64(tcg_res[pass]);
9026         }
9027     } else {
9028         int maxpass = is_q ? 4 : 2;
9029         TCGv_i32 tcg_res[4];
9030
9031         for (pass = 0; pass < maxpass; pass++) {
9032             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9033             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9034             NeonGenTwoOpFn *genfn = NULL;
9035             int passreg = pass < (maxpass / 2) ? rn : rm;
9036             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9037
9038             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9039             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9040             tcg_res[pass] = tcg_temp_new_i32();
9041
9042             switch (opcode) {
9043             case 0x17: /* ADDP */
9044             {
9045                 static NeonGenTwoOpFn * const fns[3] = {
9046                     gen_helper_neon_padd_u8,
9047                     gen_helper_neon_padd_u16,
9048                     tcg_gen_add_i32,
9049                 };
9050                 genfn = fns[size];
9051                 break;
9052             }
9053             case 0x14: /* SMAXP, UMAXP */
9054             {
9055                 static NeonGenTwoOpFn * const fns[3][2] = {
9056                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9057                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9058                     { gen_max_s32, gen_max_u32 },
9059                 };
9060                 genfn = fns[size][u];
9061                 break;
9062             }
9063             case 0x15: /* SMINP, UMINP */
9064             {
9065                 static NeonGenTwoOpFn * const fns[3][2] = {
9066                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9067                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9068                     { gen_min_s32, gen_min_u32 },
9069                 };
9070                 genfn = fns[size][u];
9071                 break;
9072             }
9073             /* The FP operations are all on single floats (32 bit) */
9074             case 0x58: /* FMAXNMP */
9075                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9076                 break;
9077             case 0x5a: /* FADDP */
9078                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9079                 break;
9080             case 0x5e: /* FMAXP */
9081                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9082                 break;
9083             case 0x78: /* FMINNMP */
9084                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9085                 break;
9086             case 0x7e: /* FMINP */
9087                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9088                 break;
9089             default:
9090                 g_assert_not_reached();
9091             }
9092
9093             /* FP ops called directly, otherwise call now */
9094             if (genfn) {
9095                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9096             }
9097
9098             tcg_temp_free_i32(tcg_op1);
9099             tcg_temp_free_i32(tcg_op2);
9100         }
9101
9102         for (pass = 0; pass < maxpass; pass++) {
9103             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9104             tcg_temp_free_i32(tcg_res[pass]);
9105         }
9106         if (!is_q) {
9107             clear_vec_high(s, rd);
9108         }
9109     }
9110
9111     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9112         tcg_temp_free_ptr(fpst);
9113     }
9114 }
9115
9116 /* Floating point op subgroup of C3.6.16. */
9117 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9118 {
9119     /* For floating point ops, the U, size[1] and opcode bits
9120      * together indicate the operation. size[0] indicates single
9121      * or double.
9122      */
9123     int fpopcode = extract32(insn, 11, 5)
9124         | (extract32(insn, 23, 1) << 5)
9125         | (extract32(insn, 29, 1) << 6);
9126     int is_q = extract32(insn, 30, 1);
9127     int size = extract32(insn, 22, 1);
9128     int rm = extract32(insn, 16, 5);
9129     int rn = extract32(insn, 5, 5);
9130     int rd = extract32(insn, 0, 5);
9131
9132     int datasize = is_q ? 128 : 64;
9133     int esize = 32 << size;
9134     int elements = datasize / esize;
9135
9136     if (size == 1 && !is_q) {
9137         unallocated_encoding(s);
9138         return;
9139     }
9140
9141     switch (fpopcode) {
9142     case 0x58: /* FMAXNMP */
9143     case 0x5a: /* FADDP */
9144     case 0x5e: /* FMAXP */
9145     case 0x78: /* FMINNMP */
9146     case 0x7e: /* FMINP */
9147         if (size && !is_q) {
9148             unallocated_encoding(s);
9149             return;
9150         }
9151         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9152                                rn, rm, rd);
9153         return;
9154     case 0x1b: /* FMULX */
9155     case 0x1f: /* FRECPS */
9156     case 0x3f: /* FRSQRTS */
9157     case 0x5d: /* FACGE */
9158     case 0x7d: /* FACGT */
9159     case 0x19: /* FMLA */
9160     case 0x39: /* FMLS */
9161     case 0x18: /* FMAXNM */
9162     case 0x1a: /* FADD */
9163     case 0x1c: /* FCMEQ */
9164     case 0x1e: /* FMAX */
9165     case 0x38: /* FMINNM */
9166     case 0x3a: /* FSUB */
9167     case 0x3e: /* FMIN */
9168     case 0x5b: /* FMUL */
9169     case 0x5c: /* FCMGE */
9170     case 0x5f: /* FDIV */
9171     case 0x7a: /* FABD */
9172     case 0x7c: /* FCMGT */
9173         if (!fp_access_check(s)) {
9174             return;
9175         }
9176
9177         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9178         return;
9179     default:
9180         unallocated_encoding(s);
9181         return;
9182     }
9183 }
9184
9185 /* Integer op subgroup of C3.6.16. */
9186 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9187 {
9188     int is_q = extract32(insn, 30, 1);
9189     int u = extract32(insn, 29, 1);
9190     int size = extract32(insn, 22, 2);
9191     int opcode = extract32(insn, 11, 5);
9192     int rm = extract32(insn, 16, 5);
9193     int rn = extract32(insn, 5, 5);
9194     int rd = extract32(insn, 0, 5);
9195     int pass;
9196
9197     switch (opcode) {
9198     case 0x13: /* MUL, PMUL */
9199         if (u && size != 0) {
9200             unallocated_encoding(s);
9201             return;
9202         }
9203         /* fall through */
9204     case 0x0: /* SHADD, UHADD */
9205     case 0x2: /* SRHADD, URHADD */
9206     case 0x4: /* SHSUB, UHSUB */
9207     case 0xc: /* SMAX, UMAX */
9208     case 0xd: /* SMIN, UMIN */
9209     case 0xe: /* SABD, UABD */
9210     case 0xf: /* SABA, UABA */
9211     case 0x12: /* MLA, MLS */
9212         if (size == 3) {
9213             unallocated_encoding(s);
9214             return;
9215         }
9216         break;
9217     case 0x16: /* SQDMULH, SQRDMULH */
9218         if (size == 0 || size == 3) {
9219             unallocated_encoding(s);
9220             return;
9221         }
9222         break;
9223     default:
9224         if (size == 3 && !is_q) {
9225             unallocated_encoding(s);
9226             return;
9227         }
9228         break;
9229     }
9230
9231     if (!fp_access_check(s)) {
9232         return;
9233     }
9234
9235     if (size == 3) {
9236         assert(is_q);
9237         for (pass = 0; pass < 2; pass++) {
9238             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9239             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9240             TCGv_i64 tcg_res = tcg_temp_new_i64();
9241
9242             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9243             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9244
9245             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9246
9247             write_vec_element(s, tcg_res, rd, pass, MO_64);
9248
9249             tcg_temp_free_i64(tcg_res);
9250             tcg_temp_free_i64(tcg_op1);
9251             tcg_temp_free_i64(tcg_op2);
9252         }
9253     } else {
9254         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9255             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9256             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9257             TCGv_i32 tcg_res = tcg_temp_new_i32();
9258             NeonGenTwoOpFn *genfn = NULL;
9259             NeonGenTwoOpEnvFn *genenvfn = NULL;
9260
9261             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9262             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9263
9264             switch (opcode) {
9265             case 0x0: /* SHADD, UHADD */
9266             {
9267                 static NeonGenTwoOpFn * const fns[3][2] = {
9268                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9269                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9270                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9271                 };
9272                 genfn = fns[size][u];
9273                 break;
9274             }
9275             case 0x1: /* SQADD, UQADD */
9276             {
9277                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9278                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9279                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9280                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9281                 };
9282                 genenvfn = fns[size][u];
9283                 break;
9284             }
9285             case 0x2: /* SRHADD, URHADD */
9286             {
9287                 static NeonGenTwoOpFn * const fns[3][2] = {
9288                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9289                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9290                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9291                 };
9292                 genfn = fns[size][u];
9293                 break;
9294             }
9295             case 0x4: /* SHSUB, UHSUB */
9296             {
9297                 static NeonGenTwoOpFn * const fns[3][2] = {
9298                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9299                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9300                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9301                 };
9302                 genfn = fns[size][u];
9303                 break;
9304             }
9305             case 0x5: /* SQSUB, UQSUB */
9306             {
9307                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9308                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9309                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9310                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9311                 };
9312                 genenvfn = fns[size][u];
9313                 break;
9314             }
9315             case 0x6: /* CMGT, CMHI */
9316             {
9317                 static NeonGenTwoOpFn * const fns[3][2] = {
9318                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9319                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9320                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9321                 };
9322                 genfn = fns[size][u];
9323                 break;
9324             }
9325             case 0x7: /* CMGE, CMHS */
9326             {
9327                 static NeonGenTwoOpFn * const fns[3][2] = {
9328                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9329                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9330                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9331                 };
9332                 genfn = fns[size][u];
9333                 break;
9334             }
9335             case 0x8: /* SSHL, USHL */
9336             {
9337                 static NeonGenTwoOpFn * const fns[3][2] = {
9338                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9339                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9340                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9341                 };
9342                 genfn = fns[size][u];
9343                 break;
9344             }
9345             case 0x9: /* SQSHL, UQSHL */
9346             {
9347                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9348                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9349                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9350                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9351                 };
9352                 genenvfn = fns[size][u];
9353                 break;
9354             }
9355             case 0xa: /* SRSHL, URSHL */
9356             {
9357                 static NeonGenTwoOpFn * const fns[3][2] = {
9358                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9359                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9360                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9361                 };
9362                 genfn = fns[size][u];
9363                 break;
9364             }
9365             case 0xb: /* SQRSHL, UQRSHL */
9366             {
9367                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9368                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9369                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9370                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9371                 };
9372                 genenvfn = fns[size][u];
9373                 break;
9374             }
9375             case 0xc: /* SMAX, UMAX */
9376             {
9377                 static NeonGenTwoOpFn * const fns[3][2] = {
9378                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9379                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9380                     { gen_max_s32, gen_max_u32 },
9381                 };
9382                 genfn = fns[size][u];
9383                 break;
9384             }
9385
9386             case 0xd: /* SMIN, UMIN */
9387             {
9388                 static NeonGenTwoOpFn * const fns[3][2] = {
9389                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9390                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9391                     { gen_min_s32, gen_min_u32 },
9392                 };
9393                 genfn = fns[size][u];
9394                 break;
9395             }
9396             case 0xe: /* SABD, UABD */
9397             case 0xf: /* SABA, UABA */
9398             {
9399                 static NeonGenTwoOpFn * const fns[3][2] = {
9400                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9401                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9402                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9403                 };
9404                 genfn = fns[size][u];
9405                 break;
9406             }
9407             case 0x10: /* ADD, SUB */
9408             {
9409                 static NeonGenTwoOpFn * const fns[3][2] = {
9410                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9411                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9412                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9413                 };
9414                 genfn = fns[size][u];
9415                 break;
9416             }
9417             case 0x11: /* CMTST, CMEQ */
9418             {
9419                 static NeonGenTwoOpFn * const fns[3][2] = {
9420                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9421                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9422                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9423                 };
9424                 genfn = fns[size][u];
9425                 break;
9426             }
9427             case 0x13: /* MUL, PMUL */
9428                 if (u) {
9429                     /* PMUL */
9430                     assert(size == 0);
9431                     genfn = gen_helper_neon_mul_p8;
9432                     break;
9433                 }
9434                 /* fall through : MUL */
9435             case 0x12: /* MLA, MLS */
9436             {
9437                 static NeonGenTwoOpFn * const fns[3] = {
9438                     gen_helper_neon_mul_u8,
9439                     gen_helper_neon_mul_u16,
9440                     tcg_gen_mul_i32,
9441                 };
9442                 genfn = fns[size];
9443                 break;
9444             }
9445             case 0x16: /* SQDMULH, SQRDMULH */
9446             {
9447                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9448                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9449                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9450                 };
9451                 assert(size == 1 || size == 2);
9452                 genenvfn = fns[size - 1][u];
9453                 break;
9454             }
9455             default:
9456                 g_assert_not_reached();
9457             }
9458
9459             if (genenvfn) {
9460                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9461             } else {
9462                 genfn(tcg_res, tcg_op1, tcg_op2);
9463             }
9464
9465             if (opcode == 0xf || opcode == 0x12) {
9466                 /* SABA, UABA, MLA, MLS: accumulating ops */
9467                 static NeonGenTwoOpFn * const fns[3][2] = {
9468                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9469                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9470                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9471                 };
9472                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9473
9474                 genfn = fns[size][is_sub];
9475                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9476                 genfn(tcg_res, tcg_op1, tcg_res);
9477             }
9478
9479             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9480
9481             tcg_temp_free_i32(tcg_res);
9482             tcg_temp_free_i32(tcg_op1);
9483             tcg_temp_free_i32(tcg_op2);
9484         }
9485     }
9486
9487     if (!is_q) {
9488         clear_vec_high(s, rd);
9489     }
9490 }
9491
9492 /* C3.6.16 AdvSIMD three same
9493  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9494  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9495  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9496  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9497  */
9498 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9499 {
9500     int opcode = extract32(insn, 11, 5);
9501
9502     switch (opcode) {
9503     case 0x3: /* logic ops */
9504         disas_simd_3same_logic(s, insn);
9505         break;
9506     case 0x17: /* ADDP */
9507     case 0x14: /* SMAXP, UMAXP */
9508     case 0x15: /* SMINP, UMINP */
9509     {
9510         /* Pairwise operations */
9511         int is_q = extract32(insn, 30, 1);
9512         int u = extract32(insn, 29, 1);
9513         int size = extract32(insn, 22, 2);
9514         int rm = extract32(insn, 16, 5);
9515         int rn = extract32(insn, 5, 5);
9516         int rd = extract32(insn, 0, 5);
9517         if (opcode == 0x17) {
9518             if (u || (size == 3 && !is_q)) {
9519                 unallocated_encoding(s);
9520                 return;
9521             }
9522         } else {
9523             if (size == 3) {
9524                 unallocated_encoding(s);
9525                 return;
9526             }
9527         }
9528         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9529         break;
9530     }
9531     case 0x18 ... 0x31:
9532         /* floating point ops, sz[1] and U are part of opcode */
9533         disas_simd_3same_float(s, insn);
9534         break;
9535     default:
9536         disas_simd_3same_int(s, insn);
9537         break;
9538     }
9539 }
9540
9541 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9542                                   int size, int rn, int rd)
9543 {
9544     /* Handle 2-reg-misc ops which are widening (so each size element
9545      * in the source becomes a 2*size element in the destination.
9546      * The only instruction like this is FCVTL.
9547      */
9548     int pass;
9549
9550     if (size == 3) {
9551         /* 32 -> 64 bit fp conversion */
9552         TCGv_i64 tcg_res[2];
9553         int srcelt = is_q ? 2 : 0;
9554
9555         for (pass = 0; pass < 2; pass++) {
9556             TCGv_i32 tcg_op = tcg_temp_new_i32();
9557             tcg_res[pass] = tcg_temp_new_i64();
9558
9559             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9560             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9561             tcg_temp_free_i32(tcg_op);
9562         }
9563         for (pass = 0; pass < 2; pass++) {
9564             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9565             tcg_temp_free_i64(tcg_res[pass]);
9566         }
9567     } else {
9568         /* 16 -> 32 bit fp conversion */
9569         int srcelt = is_q ? 4 : 0;
9570         TCGv_i32 tcg_res[4];
9571
9572         for (pass = 0; pass < 4; pass++) {
9573             tcg_res[pass] = tcg_temp_new_i32();
9574
9575             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9576             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9577                                            cpu_env);
9578         }
9579         for (pass = 0; pass < 4; pass++) {
9580             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9581             tcg_temp_free_i32(tcg_res[pass]);
9582         }
9583     }
9584 }
9585
9586 static void handle_rev(DisasContext *s, int opcode, bool u,
9587                        bool is_q, int size, int rn, int rd)
9588 {
9589     int op = (opcode << 1) | u;
9590     int opsz = op + size;
9591     int grp_size = 3 - opsz;
9592     int dsize = is_q ? 128 : 64;
9593     int i;
9594
9595     if (opsz >= 3) {
9596         unallocated_encoding(s);
9597         return;
9598     }
9599
9600     if (!fp_access_check(s)) {
9601         return;
9602     }
9603
9604     if (size == 0) {
9605         /* Special case bytes, use bswap op on each group of elements */
9606         int groups = dsize / (8 << grp_size);
9607
9608         for (i = 0; i < groups; i++) {
9609             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9610
9611             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9612             switch (grp_size) {
9613             case MO_16:
9614                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9615                 break;
9616             case MO_32:
9617                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9618                 break;
9619             case MO_64:
9620                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9621                 break;
9622             default:
9623                 g_assert_not_reached();
9624             }
9625             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9626             tcg_temp_free_i64(tcg_tmp);
9627         }
9628         if (!is_q) {
9629             clear_vec_high(s, rd);
9630         }
9631     } else {
9632         int revmask = (1 << grp_size) - 1;
9633         int esize = 8 << size;
9634         int elements = dsize / esize;
9635         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9636         TCGv_i64 tcg_rd = tcg_const_i64(0);
9637         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9638
9639         for (i = 0; i < elements; i++) {
9640             int e_rev = (i & 0xf) ^ revmask;
9641             int off = e_rev * esize;
9642             read_vec_element(s, tcg_rn, rn, i, size);
9643             if (off >= 64) {
9644                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9645                                     tcg_rn, off - 64, esize);
9646             } else {
9647                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9648             }
9649         }
9650         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9651         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9652
9653         tcg_temp_free_i64(tcg_rd_hi);
9654         tcg_temp_free_i64(tcg_rd);
9655         tcg_temp_free_i64(tcg_rn);
9656     }
9657 }
9658
9659 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9660                                   bool is_q, int size, int rn, int rd)
9661 {
9662     /* Implement the pairwise operations from 2-misc:
9663      * SADDLP, UADDLP, SADALP, UADALP.
9664      * These all add pairs of elements in the input to produce a
9665      * double-width result element in the output (possibly accumulating).
9666      */
9667     bool accum = (opcode == 0x6);
9668     int maxpass = is_q ? 2 : 1;
9669     int pass;
9670     TCGv_i64 tcg_res[2];
9671
9672     if (size == 2) {
9673         /* 32 + 32 -> 64 op */
9674         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9675
9676         for (pass = 0; pass < maxpass; pass++) {
9677             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9678             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9679
9680             tcg_res[pass] = tcg_temp_new_i64();
9681
9682             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9683             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9684             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9685             if (accum) {
9686                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9687                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9688             }
9689
9690             tcg_temp_free_i64(tcg_op1);
9691             tcg_temp_free_i64(tcg_op2);
9692         }
9693     } else {
9694         for (pass = 0; pass < maxpass; pass++) {
9695             TCGv_i64 tcg_op = tcg_temp_new_i64();
9696             NeonGenOneOpFn *genfn;
9697             static NeonGenOneOpFn * const fns[2][2] = {
9698                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9699                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9700             };
9701
9702             genfn = fns[size][u];
9703
9704             tcg_res[pass] = tcg_temp_new_i64();
9705
9706             read_vec_element(s, tcg_op, rn, pass, MO_64);
9707             genfn(tcg_res[pass], tcg_op);
9708
9709             if (accum) {
9710                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9711                 if (size == 0) {
9712                     gen_helper_neon_addl_u16(tcg_res[pass],
9713                                              tcg_res[pass], tcg_op);
9714                 } else {
9715                     gen_helper_neon_addl_u32(tcg_res[pass],
9716                                              tcg_res[pass], tcg_op);
9717                 }
9718             }
9719             tcg_temp_free_i64(tcg_op);
9720         }
9721     }
9722     if (!is_q) {
9723         tcg_res[1] = tcg_const_i64(0);
9724     }
9725     for (pass = 0; pass < 2; pass++) {
9726         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9727         tcg_temp_free_i64(tcg_res[pass]);
9728     }
9729 }
9730
9731 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9732 {
9733     /* Implement SHLL and SHLL2 */
9734     int pass;
9735     int part = is_q ? 2 : 0;
9736     TCGv_i64 tcg_res[2];
9737
9738     for (pass = 0; pass < 2; pass++) {
9739         static NeonGenWidenFn * const widenfns[3] = {
9740             gen_helper_neon_widen_u8,
9741             gen_helper_neon_widen_u16,
9742             tcg_gen_extu_i32_i64,
9743         };
9744         NeonGenWidenFn *widenfn = widenfns[size];
9745         TCGv_i32 tcg_op = tcg_temp_new_i32();
9746
9747         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9748         tcg_res[pass] = tcg_temp_new_i64();
9749         widenfn(tcg_res[pass], tcg_op);
9750         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9751
9752         tcg_temp_free_i32(tcg_op);
9753     }
9754
9755     for (pass = 0; pass < 2; pass++) {
9756         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9757         tcg_temp_free_i64(tcg_res[pass]);
9758     }
9759 }
9760
9761 /* C3.6.17 AdvSIMD two reg misc
9762  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9763  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9764  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9765  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9766  */
9767 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9768 {
9769     int size = extract32(insn, 22, 2);
9770     int opcode = extract32(insn, 12, 5);
9771     bool u = extract32(insn, 29, 1);
9772     bool is_q = extract32(insn, 30, 1);
9773     int rn = extract32(insn, 5, 5);
9774     int rd = extract32(insn, 0, 5);
9775     bool need_fpstatus = false;
9776     bool need_rmode = false;
9777     int rmode = -1;
9778     TCGv_i32 tcg_rmode;
9779     TCGv_ptr tcg_fpstatus;
9780
9781     switch (opcode) {
9782     case 0x0: /* REV64, REV32 */
9783     case 0x1: /* REV16 */
9784         handle_rev(s, opcode, u, is_q, size, rn, rd);
9785         return;
9786     case 0x5: /* CNT, NOT, RBIT */
9787         if (u && size == 0) {
9788             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9789             size = 3;
9790             break;
9791         } else if (u && size == 1) {
9792             /* RBIT */
9793             break;
9794         } else if (!u && size == 0) {
9795             /* CNT */
9796             break;
9797         }
9798         unallocated_encoding(s);
9799         return;
9800     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9801     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9802         if (size == 3) {
9803             unallocated_encoding(s);
9804             return;
9805         }
9806         if (!fp_access_check(s)) {
9807             return;
9808         }
9809
9810         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9811         return;
9812     case 0x4: /* CLS, CLZ */
9813         if (size == 3) {
9814             unallocated_encoding(s);
9815             return;
9816         }
9817         break;
9818     case 0x2: /* SADDLP, UADDLP */
9819     case 0x6: /* SADALP, UADALP */
9820         if (size == 3) {
9821             unallocated_encoding(s);
9822             return;
9823         }
9824         if (!fp_access_check(s)) {
9825             return;
9826         }
9827         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9828         return;
9829     case 0x13: /* SHLL, SHLL2 */
9830         if (u == 0 || size == 3) {
9831             unallocated_encoding(s);
9832             return;
9833         }
9834         if (!fp_access_check(s)) {
9835             return;
9836         }
9837         handle_shll(s, is_q, size, rn, rd);
9838         return;
9839     case 0xa: /* CMLT */
9840         if (u == 1) {
9841             unallocated_encoding(s);
9842             return;
9843         }
9844         /* fall through */
9845     case 0x8: /* CMGT, CMGE */
9846     case 0x9: /* CMEQ, CMLE */
9847     case 0xb: /* ABS, NEG */
9848         if (size == 3 && !is_q) {
9849             unallocated_encoding(s);
9850             return;
9851         }
9852         break;
9853     case 0x3: /* SUQADD, USQADD */
9854         if (size == 3 && !is_q) {
9855             unallocated_encoding(s);
9856             return;
9857         }
9858         if (!fp_access_check(s)) {
9859             return;
9860         }
9861         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9862         return;
9863     case 0x7: /* SQABS, SQNEG */
9864         if (size == 3 && !is_q) {
9865             unallocated_encoding(s);
9866             return;
9867         }
9868         break;
9869     case 0xc ... 0xf:
9870     case 0x16 ... 0x1d:
9871     case 0x1f:
9872     {
9873         /* Floating point: U, size[1] and opcode indicate operation;
9874          * size[0] indicates single or double precision.
9875          */
9876         int is_double = extract32(size, 0, 1);
9877         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9878         size = is_double ? 3 : 2;
9879         switch (opcode) {
9880         case 0x2f: /* FABS */
9881         case 0x6f: /* FNEG */
9882             if (size == 3 && !is_q) {
9883                 unallocated_encoding(s);
9884                 return;
9885             }
9886             break;
9887         case 0x1d: /* SCVTF */
9888         case 0x5d: /* UCVTF */
9889         {
9890             bool is_signed = (opcode == 0x1d) ? true : false;
9891             int elements = is_double ? 2 : is_q ? 4 : 2;
9892             if (is_double && !is_q) {
9893                 unallocated_encoding(s);
9894                 return;
9895             }
9896             if (!fp_access_check(s)) {
9897                 return;
9898             }
9899             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9900             return;
9901         }
9902         case 0x2c: /* FCMGT (zero) */
9903         case 0x2d: /* FCMEQ (zero) */
9904         case 0x2e: /* FCMLT (zero) */
9905         case 0x6c: /* FCMGE (zero) */
9906         case 0x6d: /* FCMLE (zero) */
9907             if (size == 3 && !is_q) {
9908                 unallocated_encoding(s);
9909                 return;
9910             }
9911             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9912             return;
9913         case 0x7f: /* FSQRT */
9914             if (size == 3 && !is_q) {
9915                 unallocated_encoding(s);
9916                 return;
9917             }
9918             break;
9919         case 0x1a: /* FCVTNS */
9920         case 0x1b: /* FCVTMS */
9921         case 0x3a: /* FCVTPS */
9922         case 0x3b: /* FCVTZS */
9923         case 0x5a: /* FCVTNU */
9924         case 0x5b: /* FCVTMU */
9925         case 0x7a: /* FCVTPU */
9926         case 0x7b: /* FCVTZU */
9927             need_fpstatus = true;
9928             need_rmode = true;
9929             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9930             if (size == 3 && !is_q) {
9931                 unallocated_encoding(s);
9932                 return;
9933             }
9934             break;
9935         case 0x5c: /* FCVTAU */
9936         case 0x1c: /* FCVTAS */
9937             need_fpstatus = true;
9938             need_rmode = true;
9939             rmode = FPROUNDING_TIEAWAY;
9940             if (size == 3 && !is_q) {
9941                 unallocated_encoding(s);
9942                 return;
9943             }
9944             break;
9945         case 0x3c: /* URECPE */
9946             if (size == 3) {
9947                 unallocated_encoding(s);
9948                 return;
9949             }
9950             /* fall through */
9951         case 0x3d: /* FRECPE */
9952         case 0x7d: /* FRSQRTE */
9953             if (size == 3 && !is_q) {
9954                 unallocated_encoding(s);
9955                 return;
9956             }
9957             if (!fp_access_check(s)) {
9958                 return;
9959             }
9960             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9961             return;
9962         case 0x56: /* FCVTXN, FCVTXN2 */
9963             if (size == 2) {
9964                 unallocated_encoding(s);
9965                 return;
9966             }
9967             /* fall through */
9968         case 0x16: /* FCVTN, FCVTN2 */
9969             /* handle_2misc_narrow does a 2*size -> size operation, but these
9970              * instructions encode the source size rather than dest size.
9971              */
9972             if (!fp_access_check(s)) {
9973                 return;
9974             }
9975             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9976             return;
9977         case 0x17: /* FCVTL, FCVTL2 */
9978             if (!fp_access_check(s)) {
9979                 return;
9980             }
9981             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9982             return;
9983         case 0x18: /* FRINTN */
9984         case 0x19: /* FRINTM */
9985         case 0x38: /* FRINTP */
9986         case 0x39: /* FRINTZ */
9987             need_rmode = true;
9988             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9989             /* fall through */
9990         case 0x59: /* FRINTX */
9991         case 0x79: /* FRINTI */
9992             need_fpstatus = true;
9993             if (size == 3 && !is_q) {
9994                 unallocated_encoding(s);
9995                 return;
9996             }
9997             break;
9998         case 0x58: /* FRINTA */
9999             need_rmode = true;
10000             rmode = FPROUNDING_TIEAWAY;
10001             need_fpstatus = true;
10002             if (size == 3 && !is_q) {
10003                 unallocated_encoding(s);
10004                 return;
10005             }
10006             break;
10007         case 0x7c: /* URSQRTE */
10008             if (size == 3) {
10009                 unallocated_encoding(s);
10010                 return;
10011             }
10012             need_fpstatus = true;
10013             break;
10014         default:
10015             unallocated_encoding(s);
10016             return;
10017         }
10018         break;
10019     }
10020     default:
10021         unallocated_encoding(s);
10022         return;
10023     }
10024
10025     if (!fp_access_check(s)) {
10026         return;
10027     }
10028
10029     if (need_fpstatus) {
10030         tcg_fpstatus = get_fpstatus_ptr();
10031     } else {
10032         TCGV_UNUSED_PTR(tcg_fpstatus);
10033     }
10034     if (need_rmode) {
10035         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10036         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10037     } else {
10038         TCGV_UNUSED_I32(tcg_rmode);
10039     }
10040
10041     if (size == 3) {
10042         /* All 64-bit element operations can be shared with scalar 2misc */
10043         int pass;
10044
10045         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10046             TCGv_i64 tcg_op = tcg_temp_new_i64();
10047             TCGv_i64 tcg_res = tcg_temp_new_i64();
10048
10049             read_vec_element(s, tcg_op, rn, pass, MO_64);
10050
10051             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10052                             tcg_rmode, tcg_fpstatus);
10053
10054             write_vec_element(s, tcg_res, rd, pass, MO_64);
10055
10056             tcg_temp_free_i64(tcg_res);
10057             tcg_temp_free_i64(tcg_op);
10058         }
10059     } else {
10060         int pass;
10061
10062         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10063             TCGv_i32 tcg_op = tcg_temp_new_i32();
10064             TCGv_i32 tcg_res = tcg_temp_new_i32();
10065             TCGCond cond;
10066
10067             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10068
10069             if (size == 2) {
10070                 /* Special cases for 32 bit elements */
10071                 switch (opcode) {
10072                 case 0xa: /* CMLT */
10073                     /* 32 bit integer comparison against zero, result is
10074                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10075                      * and inverting.
10076                      */
10077                     cond = TCG_COND_LT;
10078                 do_cmop:
10079                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10080                     tcg_gen_neg_i32(tcg_res, tcg_res);
10081                     break;
10082                 case 0x8: /* CMGT, CMGE */
10083                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10084                     goto do_cmop;
10085                 case 0x9: /* CMEQ, CMLE */
10086                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10087                     goto do_cmop;
10088                 case 0x4: /* CLS */
10089                     if (u) {
10090                         gen_helper_clz32(tcg_res, tcg_op);
10091                     } else {
10092                         gen_helper_cls32(tcg_res, tcg_op);
10093                     }
10094                     break;
10095                 case 0x7: /* SQABS, SQNEG */
10096                     if (u) {
10097                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10098                     } else {
10099                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10100                     }
10101                     break;
10102                 case 0xb: /* ABS, NEG */
10103                     if (u) {
10104                         tcg_gen_neg_i32(tcg_res, tcg_op);
10105                     } else {
10106                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10107                         tcg_gen_neg_i32(tcg_res, tcg_op);
10108                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10109                                             tcg_zero, tcg_op, tcg_res);
10110                         tcg_temp_free_i32(tcg_zero);
10111                     }
10112                     break;
10113                 case 0x2f: /* FABS */
10114                     gen_helper_vfp_abss(tcg_res, tcg_op);
10115                     break;
10116                 case 0x6f: /* FNEG */
10117                     gen_helper_vfp_negs(tcg_res, tcg_op);
10118                     break;
10119                 case 0x7f: /* FSQRT */
10120                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10121                     break;
10122                 case 0x1a: /* FCVTNS */
10123                 case 0x1b: /* FCVTMS */
10124                 case 0x1c: /* FCVTAS */
10125                 case 0x3a: /* FCVTPS */
10126                 case 0x3b: /* FCVTZS */
10127                 {
10128                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10129                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10130                                          tcg_shift, tcg_fpstatus);
10131                     tcg_temp_free_i32(tcg_shift);
10132                     break;
10133                 }
10134                 case 0x5a: /* FCVTNU */
10135                 case 0x5b: /* FCVTMU */
10136                 case 0x5c: /* FCVTAU */
10137                 case 0x7a: /* FCVTPU */
10138                 case 0x7b: /* FCVTZU */
10139                 {
10140                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10141                     gen_helper_vfp_touls(tcg_res, tcg_op,
10142                                          tcg_shift, tcg_fpstatus);
10143                     tcg_temp_free_i32(tcg_shift);
10144                     break;
10145                 }
10146                 case 0x18: /* FRINTN */
10147                 case 0x19: /* FRINTM */
10148                 case 0x38: /* FRINTP */
10149                 case 0x39: /* FRINTZ */
10150                 case 0x58: /* FRINTA */
10151                 case 0x79: /* FRINTI */
10152                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10153                     break;
10154                 case 0x59: /* FRINTX */
10155                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10156                     break;
10157                 case 0x7c: /* URSQRTE */
10158                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10159                     break;
10160                 default:
10161                     g_assert_not_reached();
10162                 }
10163             } else {
10164                 /* Use helpers for 8 and 16 bit elements */
10165                 switch (opcode) {
10166                 case 0x5: /* CNT, RBIT */
10167                     /* For these two insns size is part of the opcode specifier
10168                      * (handled earlier); they always operate on byte elements.
10169                      */
10170                     if (u) {
10171                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10172                     } else {
10173                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10174                     }
10175                     break;
10176                 case 0x7: /* SQABS, SQNEG */
10177                 {
10178                     NeonGenOneOpEnvFn *genfn;
10179                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10180                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10181                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10182                     };
10183                     genfn = fns[size][u];
10184                     genfn(tcg_res, cpu_env, tcg_op);
10185                     break;
10186                 }
10187                 case 0x8: /* CMGT, CMGE */
10188                 case 0x9: /* CMEQ, CMLE */
10189                 case 0xa: /* CMLT */
10190                 {
10191                     static NeonGenTwoOpFn * const fns[3][2] = {
10192                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10193                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10194                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10195                     };
10196                     NeonGenTwoOpFn *genfn;
10197                     int comp;
10198                     bool reverse;
10199                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10200
10201                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10202                     comp = (opcode - 0x8) * 2 + u;
10203                     /* ...but LE, LT are implemented as reverse GE, GT */
10204                     reverse = (comp > 2);
10205                     if (reverse) {
10206                         comp = 4 - comp;
10207                     }
10208                     genfn = fns[comp][size];
10209                     if (reverse) {
10210                         genfn(tcg_res, tcg_zero, tcg_op);
10211                     } else {
10212                         genfn(tcg_res, tcg_op, tcg_zero);
10213                     }
10214                     tcg_temp_free_i32(tcg_zero);
10215                     break;
10216                 }
10217                 case 0xb: /* ABS, NEG */
10218                     if (u) {
10219                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10220                         if (size) {
10221                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10222                         } else {
10223                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10224                         }
10225                         tcg_temp_free_i32(tcg_zero);
10226                     } else {
10227                         if (size) {
10228                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10229                         } else {
10230                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10231                         }
10232                     }
10233                     break;
10234                 case 0x4: /* CLS, CLZ */
10235                     if (u) {
10236                         if (size == 0) {
10237                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10238                         } else {
10239                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10240                         }
10241                     } else {
10242                         if (size == 0) {
10243                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10244                         } else {
10245                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10246                         }
10247                     }
10248                     break;
10249                 default:
10250                     g_assert_not_reached();
10251                 }
10252             }
10253
10254             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10255
10256             tcg_temp_free_i32(tcg_res);
10257             tcg_temp_free_i32(tcg_op);
10258         }
10259     }
10260     if (!is_q) {
10261         clear_vec_high(s, rd);
10262     }
10263
10264     if (need_rmode) {
10265         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10266         tcg_temp_free_i32(tcg_rmode);
10267     }
10268     if (need_fpstatus) {
10269         tcg_temp_free_ptr(tcg_fpstatus);
10270     }
10271 }
10272
10273 /* C3.6.13 AdvSIMD scalar x indexed element
10274  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10275  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10276  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10277  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10278  * C3.6.18 AdvSIMD vector x indexed element
10279  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10280  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10281  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10282  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10283  */
10284 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10285 {
10286     /* This encoding has two kinds of instruction:
10287      *  normal, where we perform elt x idxelt => elt for each
10288      *     element in the vector
10289      *  long, where we perform elt x idxelt and generate a result of
10290      *     double the width of the input element
10291      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10292      */
10293     bool is_scalar = extract32(insn, 28, 1);
10294     bool is_q = extract32(insn, 30, 1);
10295     bool u = extract32(insn, 29, 1);
10296     int size = extract32(insn, 22, 2);
10297     int l = extract32(insn, 21, 1);
10298     int m = extract32(insn, 20, 1);
10299     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10300     int rm = extract32(insn, 16, 4);
10301     int opcode = extract32(insn, 12, 4);
10302     int h = extract32(insn, 11, 1);
10303     int rn = extract32(insn, 5, 5);
10304     int rd = extract32(insn, 0, 5);
10305     bool is_long = false;
10306     bool is_fp = false;
10307     int index;
10308     TCGv_ptr fpst;
10309
10310     switch (opcode) {
10311     case 0x0: /* MLA */
10312     case 0x4: /* MLS */
10313         if (!u || is_scalar) {
10314             unallocated_encoding(s);
10315             return;
10316         }
10317         break;
10318     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10319     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10320     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10321         if (is_scalar) {
10322             unallocated_encoding(s);
10323             return;
10324         }
10325         is_long = true;
10326         break;
10327     case 0x3: /* SQDMLAL, SQDMLAL2 */
10328     case 0x7: /* SQDMLSL, SQDMLSL2 */
10329     case 0xb: /* SQDMULL, SQDMULL2 */
10330         is_long = true;
10331         /* fall through */
10332     case 0xc: /* SQDMULH */
10333     case 0xd: /* SQRDMULH */
10334         if (u) {
10335             unallocated_encoding(s);
10336             return;
10337         }
10338         break;
10339     case 0x8: /* MUL */
10340         if (u || is_scalar) {
10341             unallocated_encoding(s);
10342             return;
10343         }
10344         break;
10345     case 0x1: /* FMLA */
10346     case 0x5: /* FMLS */
10347         if (u) {
10348             unallocated_encoding(s);
10349             return;
10350         }
10351         /* fall through */
10352     case 0x9: /* FMUL, FMULX */
10353         if (!extract32(size, 1, 1)) {
10354             unallocated_encoding(s);
10355             return;
10356         }
10357         is_fp = true;
10358         break;
10359     default:
10360         unallocated_encoding(s);
10361         return;
10362     }
10363
10364     if (is_fp) {
10365         /* low bit of size indicates single/double */
10366         size = extract32(size, 0, 1) ? 3 : 2;
10367         if (size == 2) {
10368             index = h << 1 | l;
10369         } else {
10370             if (l || !is_q) {
10371                 unallocated_encoding(s);
10372                 return;
10373             }
10374             index = h;
10375         }
10376         rm |= (m << 4);
10377     } else {
10378         switch (size) {
10379         case 1:
10380             index = h << 2 | l << 1 | m;
10381             break;
10382         case 2:
10383             index = h << 1 | l;
10384             rm |= (m << 4);
10385             break;
10386         default:
10387             unallocated_encoding(s);
10388             return;
10389         }
10390     }
10391
10392     if (!fp_access_check(s)) {
10393         return;
10394     }
10395
10396     if (is_fp) {
10397         fpst = get_fpstatus_ptr();
10398     } else {
10399         TCGV_UNUSED_PTR(fpst);
10400     }
10401
10402     if (size == 3) {
10403         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10404         int pass;
10405
10406         assert(is_fp && is_q && !is_long);
10407
10408         read_vec_element(s, tcg_idx, rm, index, MO_64);
10409
10410         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10411             TCGv_i64 tcg_op = tcg_temp_new_i64();
10412             TCGv_i64 tcg_res = tcg_temp_new_i64();
10413
10414             read_vec_element(s, tcg_op, rn, pass, MO_64);
10415
10416             switch (opcode) {
10417             case 0x5: /* FMLS */
10418                 /* As usual for ARM, separate negation for fused multiply-add */
10419                 gen_helper_vfp_negd(tcg_op, tcg_op);
10420                 /* fall through */
10421             case 0x1: /* FMLA */
10422                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10423                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10424                 break;
10425             case 0x9: /* FMUL, FMULX */
10426                 if (u) {
10427                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10428                 } else {
10429                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10430                 }
10431                 break;
10432             default:
10433                 g_assert_not_reached();
10434             }
10435
10436             write_vec_element(s, tcg_res, rd, pass, MO_64);
10437             tcg_temp_free_i64(tcg_op);
10438             tcg_temp_free_i64(tcg_res);
10439         }
10440
10441         if (is_scalar) {
10442             clear_vec_high(s, rd);
10443         }
10444
10445         tcg_temp_free_i64(tcg_idx);
10446     } else if (!is_long) {
10447         /* 32 bit floating point, or 16 or 32 bit integer.
10448          * For the 16 bit scalar case we use the usual Neon helpers and
10449          * rely on the fact that 0 op 0 == 0 with no side effects.
10450          */
10451         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10452         int pass, maxpasses;
10453
10454         if (is_scalar) {
10455             maxpasses = 1;
10456         } else {
10457             maxpasses = is_q ? 4 : 2;
10458         }
10459
10460         read_vec_element_i32(s, tcg_idx, rm, index, size);
10461
10462         if (size == 1 && !is_scalar) {
10463             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10464              * the index into both halves of the 32 bit tcg_idx and then use
10465              * the usual Neon helpers.
10466              */
10467             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10468         }
10469
10470         for (pass = 0; pass < maxpasses; pass++) {
10471             TCGv_i32 tcg_op = tcg_temp_new_i32();
10472             TCGv_i32 tcg_res = tcg_temp_new_i32();
10473
10474             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10475
10476             switch (opcode) {
10477             case 0x0: /* MLA */
10478             case 0x4: /* MLS */
10479             case 0x8: /* MUL */
10480             {
10481                 static NeonGenTwoOpFn * const fns[2][2] = {
10482                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10483                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10484                 };
10485                 NeonGenTwoOpFn *genfn;
10486                 bool is_sub = opcode == 0x4;
10487
10488                 if (size == 1) {
10489                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10490                 } else {
10491                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10492                 }
10493                 if (opcode == 0x8) {
10494                     break;
10495                 }
10496                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10497                 genfn = fns[size - 1][is_sub];
10498                 genfn(tcg_res, tcg_op, tcg_res);
10499                 break;
10500             }
10501             case 0x5: /* FMLS */
10502                 /* As usual for ARM, separate negation for fused multiply-add */
10503                 gen_helper_vfp_negs(tcg_op, tcg_op);
10504                 /* fall through */
10505             case 0x1: /* FMLA */
10506                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10507                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10508                 break;
10509             case 0x9: /* FMUL, FMULX */
10510                 if (u) {
10511                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10512                 } else {
10513                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10514                 }
10515                 break;
10516             case 0xc: /* SQDMULH */
10517                 if (size == 1) {
10518                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10519                                                tcg_op, tcg_idx);
10520                 } else {
10521                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10522                                                tcg_op, tcg_idx);
10523                 }
10524                 break;
10525             case 0xd: /* SQRDMULH */
10526                 if (size == 1) {
10527                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10528                                                 tcg_op, tcg_idx);
10529                 } else {
10530                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10531                                                 tcg_op, tcg_idx);
10532                 }
10533                 break;
10534             default:
10535                 g_assert_not_reached();
10536             }
10537
10538             if (is_scalar) {
10539                 write_fp_sreg(s, rd, tcg_res);
10540             } else {
10541                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10542             }
10543
10544             tcg_temp_free_i32(tcg_op);
10545             tcg_temp_free_i32(tcg_res);
10546         }
10547
10548         tcg_temp_free_i32(tcg_idx);
10549
10550         if (!is_q) {
10551             clear_vec_high(s, rd);
10552         }
10553     } else {
10554         /* long ops: 16x16->32 or 32x32->64 */
10555         TCGv_i64 tcg_res[2];
10556         int pass;
10557         bool satop = extract32(opcode, 0, 1);
10558         TCGMemOp memop = MO_32;
10559
10560         if (satop || !u) {
10561             memop |= MO_SIGN;
10562         }
10563
10564         if (size == 2) {
10565             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10566
10567             read_vec_element(s, tcg_idx, rm, index, memop);
10568
10569             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10570                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10571                 TCGv_i64 tcg_passres;
10572                 int passelt;
10573
10574                 if (is_scalar) {
10575                     passelt = 0;
10576                 } else {
10577                     passelt = pass + (is_q * 2);
10578                 }
10579
10580                 read_vec_element(s, tcg_op, rn, passelt, memop);
10581
10582                 tcg_res[pass] = tcg_temp_new_i64();
10583
10584                 if (opcode == 0xa || opcode == 0xb) {
10585                     /* Non-accumulating ops */
10586                     tcg_passres = tcg_res[pass];
10587                 } else {
10588                     tcg_passres = tcg_temp_new_i64();
10589                 }
10590
10591                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10592                 tcg_temp_free_i64(tcg_op);
10593
10594                 if (satop) {
10595                     /* saturating, doubling */
10596                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10597                                                       tcg_passres, tcg_passres);
10598                 }
10599
10600                 if (opcode == 0xa || opcode == 0xb) {
10601                     continue;
10602                 }
10603
10604                 /* Accumulating op: handle accumulate step */
10605                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10606
10607                 switch (opcode) {
10608                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10609                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10610                     break;
10611                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10612                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10613                     break;
10614                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10615                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10616                     /* fall through */
10617                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10618                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10619                                                       tcg_res[pass],
10620                                                       tcg_passres);
10621                     break;
10622                 default:
10623                     g_assert_not_reached();
10624                 }
10625                 tcg_temp_free_i64(tcg_passres);
10626             }
10627             tcg_temp_free_i64(tcg_idx);
10628
10629             if (is_scalar) {
10630                 clear_vec_high(s, rd);
10631             }
10632         } else {
10633             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10634
10635             assert(size == 1);
10636             read_vec_element_i32(s, tcg_idx, rm, index, size);
10637
10638             if (!is_scalar) {
10639                 /* The simplest way to handle the 16x16 indexed ops is to
10640                  * duplicate the index into both halves of the 32 bit tcg_idx
10641                  * and then use the usual Neon helpers.
10642                  */
10643                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10644             }
10645
10646             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10647                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10648                 TCGv_i64 tcg_passres;
10649
10650                 if (is_scalar) {
10651                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10652                 } else {
10653                     read_vec_element_i32(s, tcg_op, rn,
10654                                          pass + (is_q * 2), MO_32);
10655                 }
10656
10657                 tcg_res[pass] = tcg_temp_new_i64();
10658
10659                 if (opcode == 0xa || opcode == 0xb) {
10660                     /* Non-accumulating ops */
10661                     tcg_passres = tcg_res[pass];
10662                 } else {
10663                     tcg_passres = tcg_temp_new_i64();
10664                 }
10665
10666                 if (memop & MO_SIGN) {
10667                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10668                 } else {
10669                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10670                 }
10671                 if (satop) {
10672                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10673                                                       tcg_passres, tcg_passres);
10674                 }
10675                 tcg_temp_free_i32(tcg_op);
10676
10677                 if (opcode == 0xa || opcode == 0xb) {
10678                     continue;
10679                 }
10680
10681                 /* Accumulating op: handle accumulate step */
10682                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10683
10684                 switch (opcode) {
10685                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10686                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10687                                              tcg_passres);
10688                     break;
10689                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10690                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10691                                              tcg_passres);
10692                     break;
10693                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10694                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10695                     /* fall through */
10696                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10697                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10698                                                       tcg_res[pass],
10699                                                       tcg_passres);
10700                     break;
10701                 default:
10702                     g_assert_not_reached();
10703                 }
10704                 tcg_temp_free_i64(tcg_passres);
10705             }
10706             tcg_temp_free_i32(tcg_idx);
10707
10708             if (is_scalar) {
10709                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10710             }
10711         }
10712
10713         if (is_scalar) {
10714             tcg_res[1] = tcg_const_i64(0);
10715         }
10716
10717         for (pass = 0; pass < 2; pass++) {
10718             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10719             tcg_temp_free_i64(tcg_res[pass]);
10720         }
10721     }
10722
10723     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10724         tcg_temp_free_ptr(fpst);
10725     }
10726 }
10727
10728 /* C3.6.19 Crypto AES
10729  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10730  * +-----------------+------+-----------+--------+-----+------+------+
10731  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10732  * +-----------------+------+-----------+--------+-----+------+------+
10733  */
10734 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10735 {
10736     int size = extract32(insn, 22, 2);
10737     int opcode = extract32(insn, 12, 5);
10738     int rn = extract32(insn, 5, 5);
10739     int rd = extract32(insn, 0, 5);
10740     int decrypt;
10741     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10742     CryptoThreeOpEnvFn *genfn;
10743
10744     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10745         || size != 0) {
10746         unallocated_encoding(s);
10747         return;
10748     }
10749
10750     switch (opcode) {
10751     case 0x4: /* AESE */
10752         decrypt = 0;
10753         genfn = gen_helper_crypto_aese;
10754         break;
10755     case 0x6: /* AESMC */
10756         decrypt = 0;
10757         genfn = gen_helper_crypto_aesmc;
10758         break;
10759     case 0x5: /* AESD */
10760         decrypt = 1;
10761         genfn = gen_helper_crypto_aese;
10762         break;
10763     case 0x7: /* AESIMC */
10764         decrypt = 1;
10765         genfn = gen_helper_crypto_aesmc;
10766         break;
10767     default:
10768         unallocated_encoding(s);
10769         return;
10770     }
10771
10772     /* Note that we convert the Vx register indexes into the
10773      * index within the vfp.regs[] array, so we can share the
10774      * helper with the AArch32 instructions.
10775      */
10776     tcg_rd_regno = tcg_const_i32(rd << 1);
10777     tcg_rn_regno = tcg_const_i32(rn << 1);
10778     tcg_decrypt = tcg_const_i32(decrypt);
10779
10780     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10781
10782     tcg_temp_free_i32(tcg_rd_regno);
10783     tcg_temp_free_i32(tcg_rn_regno);
10784     tcg_temp_free_i32(tcg_decrypt);
10785 }
10786
10787 /* C3.6.20 Crypto three-reg SHA
10788  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10789  * +-----------------+------+---+------+---+--------+-----+------+------+
10790  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10791  * +-----------------+------+---+------+---+--------+-----+------+------+
10792  */
10793 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10794 {
10795     int size = extract32(insn, 22, 2);
10796     int opcode = extract32(insn, 12, 3);
10797     int rm = extract32(insn, 16, 5);
10798     int rn = extract32(insn, 5, 5);
10799     int rd = extract32(insn, 0, 5);
10800     CryptoThreeOpEnvFn *genfn;
10801     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10802     int feature = ARM_FEATURE_V8_SHA256;
10803
10804     if (size != 0) {
10805         unallocated_encoding(s);
10806         return;
10807     }
10808
10809     switch (opcode) {
10810     case 0: /* SHA1C */
10811     case 1: /* SHA1P */
10812     case 2: /* SHA1M */
10813     case 3: /* SHA1SU0 */
10814         genfn = NULL;
10815         feature = ARM_FEATURE_V8_SHA1;
10816         break;
10817     case 4: /* SHA256H */
10818         genfn = gen_helper_crypto_sha256h;
10819         break;
10820     case 5: /* SHA256H2 */
10821         genfn = gen_helper_crypto_sha256h2;
10822         break;
10823     case 6: /* SHA256SU1 */
10824         genfn = gen_helper_crypto_sha256su1;
10825         break;
10826     default:
10827         unallocated_encoding(s);
10828         return;
10829     }
10830
10831     if (!arm_dc_feature(s, feature)) {
10832         unallocated_encoding(s);
10833         return;
10834     }
10835
10836     tcg_rd_regno = tcg_const_i32(rd << 1);
10837     tcg_rn_regno = tcg_const_i32(rn << 1);
10838     tcg_rm_regno = tcg_const_i32(rm << 1);
10839
10840     if (genfn) {
10841         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10842     } else {
10843         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10844
10845         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10846                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10847         tcg_temp_free_i32(tcg_opcode);
10848     }
10849
10850     tcg_temp_free_i32(tcg_rd_regno);
10851     tcg_temp_free_i32(tcg_rn_regno);
10852     tcg_temp_free_i32(tcg_rm_regno);
10853 }
10854
10855 /* C3.6.21 Crypto two-reg SHA
10856  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10857  * +-----------------+------+-----------+--------+-----+------+------+
10858  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10859  * +-----------------+------+-----------+--------+-----+------+------+
10860  */
10861 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10862 {
10863     int size = extract32(insn, 22, 2);
10864     int opcode = extract32(insn, 12, 5);
10865     int rn = extract32(insn, 5, 5);
10866     int rd = extract32(insn, 0, 5);
10867     CryptoTwoOpEnvFn *genfn;
10868     int feature;
10869     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10870
10871     if (size != 0) {
10872         unallocated_encoding(s);
10873         return;
10874     }
10875
10876     switch (opcode) {
10877     case 0: /* SHA1H */
10878         feature = ARM_FEATURE_V8_SHA1;
10879         genfn = gen_helper_crypto_sha1h;
10880         break;
10881     case 1: /* SHA1SU1 */
10882         feature = ARM_FEATURE_V8_SHA1;
10883         genfn = gen_helper_crypto_sha1su1;
10884         break;
10885     case 2: /* SHA256SU0 */
10886         feature = ARM_FEATURE_V8_SHA256;
10887         genfn = gen_helper_crypto_sha256su0;
10888         break;
10889     default:
10890         unallocated_encoding(s);
10891         return;
10892     }
10893
10894     if (!arm_dc_feature(s, feature)) {
10895         unallocated_encoding(s);
10896         return;
10897     }
10898
10899     tcg_rd_regno = tcg_const_i32(rd << 1);
10900     tcg_rn_regno = tcg_const_i32(rn << 1);
10901
10902     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10903
10904     tcg_temp_free_i32(tcg_rd_regno);
10905     tcg_temp_free_i32(tcg_rn_regno);
10906 }
10907
10908 /* C3.6 Data processing - SIMD, inc Crypto
10909  *
10910  * As the decode gets a little complex we are using a table based
10911  * approach for this part of the decode.
10912  */
10913 static const AArch64DecodeTable data_proc_simd[] = {
10914     /* pattern  ,  mask     ,  fn                        */
10915     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10916     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10917     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10918     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10919     { 0x0e000400, 0x9fe08400, disas_simd_copy },
10920     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10921     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10922     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10923     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10924     { 0x0e000000, 0xbf208c00, disas_simd_tb },
10925     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10926     { 0x2e000000, 0xbf208400, disas_simd_ext },
10927     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10928     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10929     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10930     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10931     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10932     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10933     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10934     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10935     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10936     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10937     { 0x00000000, 0x00000000, NULL }
10938 };
10939
10940 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10941 {
10942     /* Note that this is called with all non-FP cases from
10943      * table C3-6 so it must UNDEF for entries not specifically
10944      * allocated to instructions in that table.
10945      */
10946     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10947     if (fn) {
10948         fn(s, insn);
10949     } else {
10950         unallocated_encoding(s);
10951     }
10952 }
10953
10954 /* C3.6 Data processing - SIMD and floating point */
10955 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10956 {
10957     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10958         disas_data_proc_fp(s, insn);
10959     } else {
10960         /* SIMD, including crypto */
10961         disas_data_proc_simd(s, insn);
10962     }
10963 }
10964
10965 /* C3.1 A64 instruction index by encoding */
10966 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10967 {
10968     uint32_t insn;
10969
10970     insn = arm_ldl_code(env, s->pc, s->bswap_code);
10971     s->insn = insn;
10972     s->pc += 4;
10973
10974     s->fp_access_checked = false;
10975
10976     switch (extract32(insn, 25, 4)) {
10977     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10978         unallocated_encoding(s);
10979         break;
10980     case 0x8: case 0x9: /* Data processing - immediate */
10981         disas_data_proc_imm(s, insn);
10982         break;
10983     case 0xa: case 0xb: /* Branch, exception generation and system insns */
10984         disas_b_exc_sys(s, insn);
10985         break;
10986     case 0x4:
10987     case 0x6:
10988     case 0xc:
10989     case 0xe:      /* Loads and stores */
10990         disas_ldst(s, insn);
10991         break;
10992     case 0x5:
10993     case 0xd:      /* Data processing - register */
10994         disas_data_proc_reg(s, insn);
10995         break;
10996     case 0x7:
10997     case 0xf:      /* Data processing - SIMD and floating point */
10998         disas_data_proc_simd_fp(s, insn);
10999         break;
11000     default:
11001         assert(FALSE); /* all 15 cases should be handled above */
11002         break;
11003     }
11004
11005     /* if we allocated any temporaries, free them here */
11006     free_tmp_a64(s);
11007 }
11008
11009 void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11010 {
11011     CPUState *cs = CPU(cpu);
11012     CPUARMState *env = &cpu->env;
11013     DisasContext dc1, *dc = &dc1;
11014     target_ulong pc_start;
11015     target_ulong next_page_start;
11016     int num_insns;
11017     int max_insns;
11018
11019     pc_start = tb->pc;
11020
11021     dc->tb = tb;
11022
11023     dc->is_jmp = DISAS_NEXT;
11024     dc->pc = pc_start;
11025     dc->singlestep_enabled = cs->singlestep_enabled;
11026     dc->condjmp = 0;
11027
11028     dc->aarch64 = 1;
11029     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11030      * there is no secure EL1, so we route exceptions to EL3.
11031      */
11032     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11033                                !arm_el_is_aa64(env, 3);
11034     dc->thumb = 0;
11035     dc->bswap_code = 0;
11036     dc->condexec_mask = 0;
11037     dc->condexec_cond = 0;
11038     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11039     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11040 #if !defined(CONFIG_USER_ONLY)
11041     dc->user = (dc->current_el == 0);
11042 #endif
11043     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11044     dc->vec_len = 0;
11045     dc->vec_stride = 0;
11046     dc->cp_regs = cpu->cp_regs;
11047     dc->features = env->features;
11048
11049     /* Single step state. The code-generation logic here is:
11050      *  SS_ACTIVE == 0:
11051      *   generate code with no special handling for single-stepping (except
11052      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11053      *   this happens anyway because those changes are all system register or
11054      *   PSTATE writes).
11055      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11056      *   emit code for one insn
11057      *   emit code to clear PSTATE.SS
11058      *   emit code to generate software step exception for completed step
11059      *   end TB (as usual for having generated an exception)
11060      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11061      *   emit code to generate a software step exception
11062      *   end the TB
11063      */
11064     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11065     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11066     dc->is_ldex = false;
11067     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11068
11069     init_tmp_a64_array(dc);
11070
11071     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11072     num_insns = 0;
11073     max_insns = tb->cflags & CF_COUNT_MASK;
11074     if (max_insns == 0) {
11075         max_insns = CF_COUNT_MASK;
11076     }
11077     if (max_insns > TCG_MAX_INSNS) {
11078         max_insns = TCG_MAX_INSNS;
11079     }
11080
11081     gen_tb_start(tb);
11082
11083     tcg_clear_temp_count();
11084
11085     do {
11086         tcg_gen_insn_start(dc->pc, 0);
11087         num_insns++;
11088
11089         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11090             CPUBreakpoint *bp;
11091             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11092                 if (bp->pc == dc->pc) {
11093                     if (bp->flags & BP_CPU) {
11094                         gen_a64_set_pc_im(dc->pc);
11095                         gen_helper_check_breakpoints(cpu_env);
11096                         /* End the TB early; it likely won't be executed */
11097                         dc->is_jmp = DISAS_UPDATE;
11098                     } else {
11099                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11100                         /* The address covered by the breakpoint must be
11101                            included in [tb->pc, tb->pc + tb->size) in order
11102                            to for it to be properly cleared -- thus we
11103                            increment the PC here so that the logic setting
11104                            tb->size below does the right thing.  */
11105                         dc->pc += 4;
11106                         goto done_generating;
11107                     }
11108                     break;
11109                 }
11110             }
11111         }
11112
11113         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11114             gen_io_start();
11115         }
11116
11117         if (dc->ss_active && !dc->pstate_ss) {
11118             /* Singlestep state is Active-pending.
11119              * If we're in this state at the start of a TB then either
11120              *  a) we just took an exception to an EL which is being debugged
11121              *     and this is the first insn in the exception handler
11122              *  b) debug exceptions were masked and we just unmasked them
11123              *     without changing EL (eg by clearing PSTATE.D)
11124              * In either case we're going to take a swstep exception in the
11125              * "did not step an insn" case, and so the syndrome ISV and EX
11126              * bits should be zero.
11127              */
11128             assert(num_insns == 1);
11129             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11130                           default_exception_el(dc));
11131             dc->is_jmp = DISAS_EXC;
11132             break;
11133         }
11134
11135         disas_a64_insn(env, dc);
11136
11137         if (tcg_check_temp_count()) {
11138             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11139                     dc->pc);
11140         }
11141
11142         /* Translation stops when a conditional branch is encountered.
11143          * Otherwise the subsequent code could get translated several times.
11144          * Also stop translation when a page boundary is reached.  This
11145          * ensures prefetch aborts occur at the right place.
11146          */
11147     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11148              !cs->singlestep_enabled &&
11149              !singlestep &&
11150              !dc->ss_active &&
11151              dc->pc < next_page_start &&
11152              num_insns < max_insns);
11153
11154     if (tb->cflags & CF_LAST_IO) {
11155         gen_io_end();
11156     }
11157
11158     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11159         && dc->is_jmp != DISAS_EXC) {
11160         /* Note that this means single stepping WFI doesn't halt the CPU.
11161          * For conditional branch insns this is harmless unreachable code as
11162          * gen_goto_tb() has already handled emitting the debug exception
11163          * (and thus a tb-jump is not possible when singlestepping).
11164          */
11165         assert(dc->is_jmp != DISAS_TB_JUMP);
11166         if (dc->is_jmp != DISAS_JUMP) {
11167             gen_a64_set_pc_im(dc->pc);
11168         }
11169         if (cs->singlestep_enabled) {
11170             gen_exception_internal(EXCP_DEBUG);
11171         } else {
11172             gen_step_complete_exception(dc);
11173         }
11174     } else {
11175         switch (dc->is_jmp) {
11176         case DISAS_NEXT:
11177             gen_goto_tb(dc, 1, dc->pc);
11178             break;
11179         default:
11180         case DISAS_UPDATE:
11181             gen_a64_set_pc_im(dc->pc);
11182             /* fall through */
11183         case DISAS_JUMP:
11184             /* indicate that the hash table must be used to find the next TB */
11185             tcg_gen_exit_tb(0);
11186             break;
11187         case DISAS_TB_JUMP:
11188         case DISAS_EXC:
11189         case DISAS_SWI:
11190             break;
11191         case DISAS_WFE:
11192             gen_a64_set_pc_im(dc->pc);
11193             gen_helper_wfe(cpu_env);
11194             break;
11195         case DISAS_YIELD:
11196             gen_a64_set_pc_im(dc->pc);
11197             gen_helper_yield(cpu_env);
11198             break;
11199         case DISAS_WFI:
11200             /* This is a special case because we don't want to just halt the CPU
11201              * if trying to debug across a WFI.
11202              */
11203             gen_a64_set_pc_im(dc->pc);
11204             gen_helper_wfi(cpu_env);
11205             /* The helper doesn't necessarily throw an exception, but we
11206              * must go back to the main loop to check for interrupts anyway.
11207              */
11208             tcg_gen_exit_tb(0);
11209             break;
11210         }
11211     }
11212
11213 done_generating:
11214     gen_tb_end(tb, num_insns);
11215
11216 #ifdef DEBUG_DISAS
11217     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11218         qemu_log("----------------\n");
11219         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11220         log_target_disas(cs, pc_start, dc->pc - pc_start,
11221                          4 | (dc->bswap_code << 1));
11222         qemu_log("\n");
11223     }
11224 #endif
11225     tb->size = dc->pc - pc_start;
11226     tb->icount = num_insns;
11227 }