target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "tcg-op.h"
  23 #include "qemu/log.h"
  24 #include "arm_ldst.h"
  25 #include "translate.h"
  26 #include "internals.h"
  27 #include "qemu/host-utils.h"
  28
  29 #include "exec/semihost.h"
  30 #include "exec/gen-icount.h"
  31
  32 #include "exec/helper-proto.h"
  33 #include "exec/helper-gen.h"
  34 #include "exec/log.h"
  35
  36 #include "trace-tcg.h"
  37
  38 static TCGv_i64 cpu_X[32];
  39 static TCGv_i64 cpu_pc;
  40
  41 /* Load/store exclusive handling */
  42 static TCGv_i64 cpu_exclusive_high;
  43
  44 static const char *regnames[] = {
  45     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  46     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  47     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  48     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  49 };
  50
  51 enum a64_shift_type {
  52     A64_SHIFT_TYPE_LSL = 0,
  53     A64_SHIFT_TYPE_LSR = 1,
  54     A64_SHIFT_TYPE_ASR = 2,
  55     A64_SHIFT_TYPE_ROR = 3
  56 };
  57
  58 /* Table based decoder typedefs - used when the relevant bits for decode
  59  * are too awkwardly scattered across the instruction (eg SIMD).
  60  */
  61 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  62
  63 typedef struct AArch64DecodeTable {
  64     uint32_t pattern;
  65     uint32_t mask;
  66     AArch64DecodeFn *disas_fn;
  67 } AArch64DecodeTable;
  68
  69 /* Function prototype for gen_ functions for calling Neon helpers */
  70 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  71 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  72 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  73 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  74 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  75 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  76 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  77 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  78 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  79 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  80 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  81 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  82 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  83
  84 /* initialize TCG globals.  */
  85 void a64_translate_init(void)
  86 {
  87     int i;
  88
  89     cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
  90                                     offsetof(CPUARMState, pc),
  91                                     "pc");
  92     for (i = 0; i < 32; i++) {
  93         cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
  94                                           offsetof(CPUARMState, xregs[i]),
  95                                           regnames[i]);
  96     }
  97
  98     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
  99         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 100 }
 101
 102 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 103 {
 104     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 105      *  if EL1, access as if EL0; otherwise access at current EL
 106      */
 107     switch (s->mmu_idx) {
 108     case ARMMMUIdx_S12NSE1:
 109         return ARMMMUIdx_S12NSE0;
 110     case ARMMMUIdx_S1SE1:
 111         return ARMMMUIdx_S1SE0;
 112     case ARMMMUIdx_S2NS:
 113         g_assert_not_reached();
 114     default:
 115         return s->mmu_idx;
 116     }
 117 }
 118
 119 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 120                             fprintf_function cpu_fprintf, int flags)
 121 {
 122     ARMCPU *cpu = ARM_CPU(cs);
 123     CPUARMState *env = &cpu->env;
 124     uint32_t psr = pstate_read(env);
 125     int i;
 126     int el = arm_current_el(env);
 127     const char *ns_status;
 128
 129     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 130             env->pc, env->xregs[31]);
 131     for (i = 0; i < 31; i++) {
 132         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 133         if ((i % 4) == 3) {
 134             cpu_fprintf(f, "\n");
 135         } else {
 136             cpu_fprintf(f, " ");
 137         }
 138     }
 139
 140     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 141         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 142     } else {
 143         ns_status = "";
 144     }
 145
 146     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 147                 psr,
 148                 psr & PSTATE_N ? 'N' : '-',
 149                 psr & PSTATE_Z ? 'Z' : '-',
 150                 psr & PSTATE_C ? 'C' : '-',
 151                 psr & PSTATE_V ? 'V' : '-',
 152                 ns_status,
 153                 el,
 154                 psr & PSTATE_SP ? 'h' : 't');
 155
 156     if (flags & CPU_DUMP_FPU) {
 157         int numvfpregs = 32;
 158         for (i = 0; i < numvfpregs; i += 2) {
 159             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 160             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 161             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 162                         i, vhi, vlo);
 163             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 164             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 165             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 166                         i + 1, vhi, vlo);
 167         }
 168         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 169                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 170     }
 171 }
 172
 173 void gen_a64_set_pc_im(uint64_t val)
 174 {
 175     tcg_gen_movi_i64(cpu_pc, val);
 176 }
 177
 178 typedef struct DisasCompare64 {
 179     TCGCond cond;
 180     TCGv_i64 value;
 181 } DisasCompare64;
 182
 183 static void a64_test_cc(DisasCompare64 *c64, int cc)
 184 {
 185     DisasCompare c32;
 186
 187     arm_test_cc(&c32, cc);
 188
 189     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 190        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 191     c64->cond = c32.cond;
 192     c64->value = tcg_temp_new_i64();
 193     tcg_gen_ext_i32_i64(c64->value, c32.value);
 194
 195     arm_free_cc(&c32);
 196 }
 197
 198 static void a64_free_cc(DisasCompare64 *c64)
 199 {
 200     tcg_temp_free_i64(c64->value);
 201 }
 202
 203 static void gen_exception_internal(int excp)
 204 {
 205     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 206
 207     assert(excp_is_internal(excp));
 208     gen_helper_exception_internal(cpu_env, tcg_excp);
 209     tcg_temp_free_i32(tcg_excp);
 210 }
 211
 212 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 213 {
 214     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 215     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 216     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 217
 218     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 219                                        tcg_syn, tcg_el);
 220     tcg_temp_free_i32(tcg_el);
 221     tcg_temp_free_i32(tcg_syn);
 222     tcg_temp_free_i32(tcg_excp);
 223 }
 224
 225 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 226 {
 227     gen_a64_set_pc_im(s->pc - offset);
 228     gen_exception_internal(excp);
 229     s->is_jmp = DISAS_EXC;
 230 }
 231
 232 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 233                                uint32_t syndrome, uint32_t target_el)
 234 {
 235     gen_a64_set_pc_im(s->pc - offset);
 236     gen_exception(excp, syndrome, target_el);
 237     s->is_jmp = DISAS_EXC;
 238 }
 239
 240 static void gen_ss_advance(DisasContext *s)
 241 {
 242     /* If the singlestep state is Active-not-pending, advance to
 243      * Active-pending.
 244      */
 245     if (s->ss_active) {
 246         s->pstate_ss = 0;
 247         gen_helper_clear_pstate_ss(cpu_env);
 248     }
 249 }
 250
 251 static void gen_step_complete_exception(DisasContext *s)
 252 {
 253     /* We just completed step of an insn. Move from Active-not-pending
 254      * to Active-pending, and then also take the swstep exception.
 255      * This corresponds to making the (IMPDEF) choice to prioritize
 256      * swstep exceptions over asynchronous exceptions taken to an exception
 257      * level where debug is disabled. This choice has the advantage that
 258      * we do not need to maintain internal state corresponding to the
 259      * ISV/EX syndrome bits between completion of the step and generation
 260      * of the exception, and our syndrome information is always correct.
 261      */
 262     gen_ss_advance(s);
 263     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 264                   default_exception_el(s));
 265     s->is_jmp = DISAS_EXC;
 266 }
 267
 268 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 269 {
 270     /* No direct tb linking with singlestep (either QEMU's or the ARM
 271      * debug architecture kind) or deterministic io
 272      */
 273     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 274         return false;
 275     }
 276
 277     /* Only link tbs from inside the same guest page */
 278     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 279         return false;
 280     }
 281
 282     return true;
 283 }
 284
 285 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 286 {
 287     TranslationBlock *tb;
 288
 289     tb = s->tb;
 290     if (use_goto_tb(s, n, dest)) {
 291         tcg_gen_goto_tb(n);
 292         gen_a64_set_pc_im(dest);
 293         tcg_gen_exit_tb((intptr_t)tb + n);
 294         s->is_jmp = DISAS_TB_JUMP;
 295     } else {
 296         gen_a64_set_pc_im(dest);
 297         if (s->ss_active) {
 298             gen_step_complete_exception(s);
 299         } else if (s->singlestep_enabled) {
 300             gen_exception_internal(EXCP_DEBUG);
 301         } else {
 302             tcg_gen_exit_tb(0);
 303             s->is_jmp = DISAS_TB_JUMP;
 304         }
 305     }
 306 }
 307
 308 static void unallocated_encoding(DisasContext *s)
 309 {
 310     /* Unallocated and reserved encodings are uncategorized */
 311     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 312                        default_exception_el(s));
 313 }
 314
 315 #define unsupported_encoding(s, insn)                                    \
 316     do {                                                                 \
 317         qemu_log_mask(LOG_UNIMP,                                         \
 318                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 319                       "at pc=%016" PRIx64 "\n",                          \
 320                       __FILE__, __LINE__, insn, s->pc - 4);              \
 321         unallocated_encoding(s);                                         \
 322     } while (0);
 323
 324 static void init_tmp_a64_array(DisasContext *s)
 325 {
 326 #ifdef CONFIG_DEBUG_TCG
 327     int i;
 328     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 329         TCGV_UNUSED_I64(s->tmp_a64[i]);
 330     }
 331 #endif
 332     s->tmp_a64_count = 0;
 333 }
 334
 335 static void free_tmp_a64(DisasContext *s)
 336 {
 337     int i;
 338     for (i = 0; i < s->tmp_a64_count; i++) {
 339         tcg_temp_free_i64(s->tmp_a64[i]);
 340     }
 341     init_tmp_a64_array(s);
 342 }
 343
 344 static TCGv_i64 new_tmp_a64(DisasContext *s)
 345 {
 346     assert(s->tmp_a64_count < TMP_A64_MAX);
 347     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 348 }
 349
 350 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 351 {
 352     TCGv_i64 t = new_tmp_a64(s);
 353     tcg_gen_movi_i64(t, 0);
 354     return t;
 355 }
 356
 357 /*
 358  * Register access functions
 359  *
 360  * These functions are used for directly accessing a register in where
 361  * changes to the final register value are likely to be made. If you
 362  * need to use a register for temporary calculation (e.g. index type
 363  * operations) use the read_* form.
 364  *
 365  * B1.2.1 Register mappings
 366  *
 367  * In instruction register encoding 31 can refer to ZR (zero register) or
 368  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 369  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 370  * This is the point of the _sp forms.
 371  */
 372 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 373 {
 374     if (reg == 31) {
 375         return new_tmp_a64_zero(s);
 376     } else {
 377         return cpu_X[reg];
 378     }
 379 }
 380
 381 /* register access for when 31 == SP */
 382 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 383 {
 384     return cpu_X[reg];
 385 }
 386
 387 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 388  * representing the register contents. This TCGv is an auto-freed
 389  * temporary so it need not be explicitly freed, and may be modified.
 390  */
 391 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 392 {
 393     TCGv_i64 v = new_tmp_a64(s);
 394     if (reg != 31) {
 395         if (sf) {
 396             tcg_gen_mov_i64(v, cpu_X[reg]);
 397         } else {
 398             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 399         }
 400     } else {
 401         tcg_gen_movi_i64(v, 0);
 402     }
 403     return v;
 404 }
 405
 406 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 407 {
 408     TCGv_i64 v = new_tmp_a64(s);
 409     if (sf) {
 410         tcg_gen_mov_i64(v, cpu_X[reg]);
 411     } else {
 412         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 413     }
 414     return v;
 415 }
 416
 417 /* We should have at some point before trying to access an FP register
 418  * done the necessary access check, so assert that
 419  * (a) we did the check and
 420  * (b) we didn't then just plough ahead anyway if it failed.
 421  * Print the instruction pattern in the abort message so we can figure
 422  * out what we need to fix if a user encounters this problem in the wild.
 423  */
 424 static inline void assert_fp_access_checked(DisasContext *s)
 425 {
 426 #ifdef CONFIG_DEBUG_TCG
 427     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 428         fprintf(stderr, "target-arm: FP access check missing for "
 429                 "instruction 0x%08x\n", s->insn);
 430         abort();
 431     }
 432 #endif
 433 }
 434
 435 /* Return the offset into CPUARMState of an element of specified
 436  * size, 'element' places in from the least significant end of
 437  * the FP/vector register Qn.
 438  */
 439 static inline int vec_reg_offset(DisasContext *s, int regno,
 440                                  int element, TCGMemOp size)
 441 {
 442     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 443 #ifdef HOST_WORDS_BIGENDIAN
 444     /* This is complicated slightly because vfp.regs[2n] is
 445      * still the low half and  vfp.regs[2n+1] the high half
 446      * of the 128 bit vector, even on big endian systems.
 447      * Calculate the offset assuming a fully bigendian 128 bits,
 448      * then XOR to account for the order of the two 64 bit halves.
 449      */
 450     offs += (16 - ((element + 1) * (1 << size)));
 451     offs ^= 8;
 452 #else
 453     offs += element * (1 << size);
 454 #endif
 455     assert_fp_access_checked(s);
 456     return offs;
 457 }
 458
 459 /* Return the offset into CPUARMState of a slice (from
 460  * the least significant end) of FP register Qn (ie
 461  * Dn, Sn, Hn or Bn).
 462  * (Note that this is not the same mapping as for A32; see cpu.h)
 463  */
 464 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 465 {
 466     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 467 #ifdef HOST_WORDS_BIGENDIAN
 468     offs += (8 - (1 << size));
 469 #endif
 470     assert_fp_access_checked(s);
 471     return offs;
 472 }
 473
 474 /* Offset of the high half of the 128 bit vector Qn */
 475 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 476 {
 477     assert_fp_access_checked(s);
 478     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 479 }
 480
 481 /* Convenience accessors for reading and writing single and double
 482  * FP registers. Writing clears the upper parts of the associated
 483  * 128 bit vector register, as required by the architecture.
 484  * Note that unlike the GP register accessors, the values returned
 485  * by the read functions must be manually freed.
 486  */
 487 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 488 {
 489     TCGv_i64 v = tcg_temp_new_i64();
 490
 491     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 492     return v;
 493 }
 494
 495 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 496 {
 497     TCGv_i32 v = tcg_temp_new_i32();
 498
 499     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 500     return v;
 501 }
 502
 503 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 504 {
 505     TCGv_i64 tcg_zero = tcg_const_i64(0);
 506
 507     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 508     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 509     tcg_temp_free_i64(tcg_zero);
 510 }
 511
 512 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 513 {
 514     TCGv_i64 tmp = tcg_temp_new_i64();
 515
 516     tcg_gen_extu_i32_i64(tmp, v);
 517     write_fp_dreg(s, reg, tmp);
 518     tcg_temp_free_i64(tmp);
 519 }
 520
 521 static TCGv_ptr get_fpstatus_ptr(void)
 522 {
 523     TCGv_ptr statusptr = tcg_temp_new_ptr();
 524     int offset;
 525
 526     /* In A64 all instructions (both FP and Neon) use the FPCR;
 527      * there is no equivalent of the A32 Neon "standard FPSCR value"
 528      * and all operations use vfp.fp_status.
 529      */
 530     offset = offsetof(CPUARMState, vfp.fp_status);
 531     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 532     return statusptr;
 533 }
 534
 535 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 536  * than the 32 bit equivalent.
 537  */
 538 static inline void gen_set_NZ64(TCGv_i64 result)
 539 {
 540     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 541     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 542 }
 543
 544 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 545 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 546 {
 547     if (sf) {
 548         gen_set_NZ64(result);
 549     } else {
 550         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 551         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 552     }
 553     tcg_gen_movi_i32(cpu_CF, 0);
 554     tcg_gen_movi_i32(cpu_VF, 0);
 555 }
 556
 557 /* dest = T0 + T1; compute C, N, V and Z flags */
 558 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 559 {
 560     if (sf) {
 561         TCGv_i64 result, flag, tmp;
 562         result = tcg_temp_new_i64();
 563         flag = tcg_temp_new_i64();
 564         tmp = tcg_temp_new_i64();
 565
 566         tcg_gen_movi_i64(tmp, 0);
 567         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 568
 569         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 570
 571         gen_set_NZ64(result);
 572
 573         tcg_gen_xor_i64(flag, result, t0);
 574         tcg_gen_xor_i64(tmp, t0, t1);
 575         tcg_gen_andc_i64(flag, flag, tmp);
 576         tcg_temp_free_i64(tmp);
 577         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 578
 579         tcg_gen_mov_i64(dest, result);
 580         tcg_temp_free_i64(result);
 581         tcg_temp_free_i64(flag);
 582     } else {
 583         /* 32 bit arithmetic */
 584         TCGv_i32 t0_32 = tcg_temp_new_i32();
 585         TCGv_i32 t1_32 = tcg_temp_new_i32();
 586         TCGv_i32 tmp = tcg_temp_new_i32();
 587
 588         tcg_gen_movi_i32(tmp, 0);
 589         tcg_gen_extrl_i64_i32(t0_32, t0);
 590         tcg_gen_extrl_i64_i32(t1_32, t1);
 591         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 592         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 593         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 594         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 595         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 596         tcg_gen_extu_i32_i64(dest, cpu_NF);
 597
 598         tcg_temp_free_i32(tmp);
 599         tcg_temp_free_i32(t0_32);
 600         tcg_temp_free_i32(t1_32);
 601     }
 602 }
 603
 604 /* dest = T0 - T1; compute C, N, V and Z flags */
 605 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 606 {
 607     if (sf) {
 608         /* 64 bit arithmetic */
 609         TCGv_i64 result, flag, tmp;
 610
 611         result = tcg_temp_new_i64();
 612         flag = tcg_temp_new_i64();
 613         tcg_gen_sub_i64(result, t0, t1);
 614
 615         gen_set_NZ64(result);
 616
 617         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 618         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 619
 620         tcg_gen_xor_i64(flag, result, t0);
 621         tmp = tcg_temp_new_i64();
 622         tcg_gen_xor_i64(tmp, t0, t1);
 623         tcg_gen_and_i64(flag, flag, tmp);
 624         tcg_temp_free_i64(tmp);
 625         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 626         tcg_gen_mov_i64(dest, result);
 627         tcg_temp_free_i64(flag);
 628         tcg_temp_free_i64(result);
 629     } else {
 630         /* 32 bit arithmetic */
 631         TCGv_i32 t0_32 = tcg_temp_new_i32();
 632         TCGv_i32 t1_32 = tcg_temp_new_i32();
 633         TCGv_i32 tmp;
 634
 635         tcg_gen_extrl_i64_i32(t0_32, t0);
 636         tcg_gen_extrl_i64_i32(t1_32, t1);
 637         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 638         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 639         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 640         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 641         tmp = tcg_temp_new_i32();
 642         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 643         tcg_temp_free_i32(t0_32);
 644         tcg_temp_free_i32(t1_32);
 645         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 646         tcg_temp_free_i32(tmp);
 647         tcg_gen_extu_i32_i64(dest, cpu_NF);
 648     }
 649 }
 650
 651 /* dest = T0 + T1 + CF; do not compute flags. */
 652 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 653 {
 654     TCGv_i64 flag = tcg_temp_new_i64();
 655     tcg_gen_extu_i32_i64(flag, cpu_CF);
 656     tcg_gen_add_i64(dest, t0, t1);
 657     tcg_gen_add_i64(dest, dest, flag);
 658     tcg_temp_free_i64(flag);
 659
 660     if (!sf) {
 661         tcg_gen_ext32u_i64(dest, dest);
 662     }
 663 }
 664
 665 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 666 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 667 {
 668     if (sf) {
 669         TCGv_i64 result, cf_64, vf_64, tmp;
 670         result = tcg_temp_new_i64();
 671         cf_64 = tcg_temp_new_i64();
 672         vf_64 = tcg_temp_new_i64();
 673         tmp = tcg_const_i64(0);
 674
 675         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 676         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 677         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 678         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 679         gen_set_NZ64(result);
 680
 681         tcg_gen_xor_i64(vf_64, result, t0);
 682         tcg_gen_xor_i64(tmp, t0, t1);
 683         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 684         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 685
 686         tcg_gen_mov_i64(dest, result);
 687
 688         tcg_temp_free_i64(tmp);
 689         tcg_temp_free_i64(vf_64);
 690         tcg_temp_free_i64(cf_64);
 691         tcg_temp_free_i64(result);
 692     } else {
 693         TCGv_i32 t0_32, t1_32, tmp;
 694         t0_32 = tcg_temp_new_i32();
 695         t1_32 = tcg_temp_new_i32();
 696         tmp = tcg_const_i32(0);
 697
 698         tcg_gen_extrl_i64_i32(t0_32, t0);
 699         tcg_gen_extrl_i64_i32(t1_32, t1);
 700         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 701         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 702
 703         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 704         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 705         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 706         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 707         tcg_gen_extu_i32_i64(dest, cpu_NF);
 708
 709         tcg_temp_free_i32(tmp);
 710         tcg_temp_free_i32(t1_32);
 711         tcg_temp_free_i32(t0_32);
 712     }
 713 }
 714
 715 /*
 716  * Load/Store generators
 717  */
 718
 719 /*
 720  * Store from GPR register to memory.
 721  */
 722 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 723                              TCGv_i64 tcg_addr, int size, int memidx)
 724 {
 725     g_assert(size <= 3);
 726     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
 727 }
 728
 729 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 730                       TCGv_i64 tcg_addr, int size)
 731 {
 732     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 733 }
 734
 735 /*
 736  * Load from memory to GPR register
 737  */
 738 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 739                              int size, bool is_signed, bool extend, int memidx)
 740 {
 741     TCGMemOp memop = MO_TE + size;
 742
 743     g_assert(size <= 3);
 744
 745     if (is_signed) {
 746         memop += MO_SIGN;
 747     }
 748
 749     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 750
 751     if (extend && is_signed) {
 752         g_assert(size < 3);
 753         tcg_gen_ext32u_i64(dest, dest);
 754     }
 755 }
 756
 757 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 758                       int size, bool is_signed, bool extend)
 759 {
 760     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 761                      get_mem_index(s));
 762 }
 763
 764 /*
 765  * Store from FP register to memory
 766  */
 767 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 768 {
 769     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 770     TCGv_i64 tmp = tcg_temp_new_i64();
 771     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 772     if (size < 4) {
 773         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
 774     } else {
 775         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 776         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
 777         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 778         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 779         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 780         tcg_temp_free_i64(tcg_hiaddr);
 781     }
 782
 783     tcg_temp_free_i64(tmp);
 784 }
 785
 786 /*
 787  * Load from memory to FP register
 788  */
 789 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 790 {
 791     /* This always zero-extends and writes to a full 128 bit wide vector */
 792     TCGv_i64 tmplo = tcg_temp_new_i64();
 793     TCGv_i64 tmphi;
 794
 795     if (size < 4) {
 796         TCGMemOp memop = MO_TE + size;
 797         tmphi = tcg_const_i64(0);
 798         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 799     } else {
 800         TCGv_i64 tcg_hiaddr;
 801         tmphi = tcg_temp_new_i64();
 802         tcg_hiaddr = tcg_temp_new_i64();
 803
 804         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
 805         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 806         tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 807         tcg_temp_free_i64(tcg_hiaddr);
 808     }
 809
 810     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 811     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 812
 813     tcg_temp_free_i64(tmplo);
 814     tcg_temp_free_i64(tmphi);
 815 }
 816
 817 /*
 818  * Vector load/store helpers.
 819  *
 820  * The principal difference between this and a FP load is that we don't
 821  * zero extend as we are filling a partial chunk of the vector register.
 822  * These functions don't support 128 bit loads/stores, which would be
 823  * normal load/store operations.
 824  *
 825  * The _i32 versions are useful when operating on 32 bit quantities
 826  * (eg for floating point single or using Neon helper functions).
 827  */
 828
 829 /* Get value of an element within a vector register */
 830 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 831                              int element, TCGMemOp memop)
 832 {
 833     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 834     switch (memop) {
 835     case MO_8:
 836         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 837         break;
 838     case MO_16:
 839         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 840         break;
 841     case MO_32:
 842         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 843         break;
 844     case MO_8|MO_SIGN:
 845         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 846         break;
 847     case MO_16|MO_SIGN:
 848         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 849         break;
 850     case MO_32|MO_SIGN:
 851         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 852         break;
 853     case MO_64:
 854     case MO_64|MO_SIGN:
 855         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 856         break;
 857     default:
 858         g_assert_not_reached();
 859     }
 860 }
 861
 862 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 863                                  int element, TCGMemOp memop)
 864 {
 865     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 866     switch (memop) {
 867     case MO_8:
 868         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 869         break;
 870     case MO_16:
 871         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 872         break;
 873     case MO_8|MO_SIGN:
 874         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 875         break;
 876     case MO_16|MO_SIGN:
 877         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 878         break;
 879     case MO_32:
 880     case MO_32|MO_SIGN:
 881         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 882         break;
 883     default:
 884         g_assert_not_reached();
 885     }
 886 }
 887
 888 /* Set value of an element within a vector register */
 889 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 890                               int element, TCGMemOp memop)
 891 {
 892     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 893     switch (memop) {
 894     case MO_8:
 895         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 896         break;
 897     case MO_16:
 898         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 899         break;
 900     case MO_32:
 901         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 902         break;
 903     case MO_64:
 904         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 905         break;
 906     default:
 907         g_assert_not_reached();
 908     }
 909 }
 910
 911 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 912                                   int destidx, int element, TCGMemOp memop)
 913 {
 914     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 915     switch (memop) {
 916     case MO_8:
 917         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 918         break;
 919     case MO_16:
 920         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 921         break;
 922     case MO_32:
 923         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 924         break;
 925     default:
 926         g_assert_not_reached();
 927     }
 928 }
 929
 930 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 931  * vector ops all need to do this).
 932  */
 933 static void clear_vec_high(DisasContext *s, int rd)
 934 {
 935     TCGv_i64 tcg_zero = tcg_const_i64(0);
 936
 937     write_vec_element(s, tcg_zero, rd, 1, MO_64);
 938     tcg_temp_free_i64(tcg_zero);
 939 }
 940
 941 /* Store from vector register to memory */
 942 static void do_vec_st(DisasContext *s, int srcidx, int element,
 943                       TCGv_i64 tcg_addr, int size)
 944 {
 945     TCGMemOp memop = MO_TE + size;
 946     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 947
 948     read_vec_element(s, tcg_tmp, srcidx, element, size);
 949     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 950
 951     tcg_temp_free_i64(tcg_tmp);
 952 }
 953
 954 /* Load from memory to vector register */
 955 static void do_vec_ld(DisasContext *s, int destidx, int element,
 956                       TCGv_i64 tcg_addr, int size)
 957 {
 958     TCGMemOp memop = MO_TE + size;
 959     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 960
 961     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 962     write_vec_element(s, tcg_tmp, destidx, element, size);
 963
 964     tcg_temp_free_i64(tcg_tmp);
 965 }
 966
 967 /* Check that FP/Neon access is enabled. If it is, return
 968  * true. If not, emit code to generate an appropriate exception,
 969  * and return false; the caller should not emit any code for
 970  * the instruction. Note that this check must happen after all
 971  * unallocated-encoding checks (otherwise the syndrome information
 972  * for the resulting exception will be incorrect).
 973  */
 974 static inline bool fp_access_check(DisasContext *s)
 975 {
 976     assert(!s->fp_access_checked);
 977     s->fp_access_checked = true;
 978
 979     if (!s->fp_excp_el) {
 980         return true;
 981     }
 982
 983     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
 984                        s->fp_excp_el);
 985     return false;
 986 }
 987
 988 /*
 989  * This utility function is for doing register extension with an
 990  * optional shift. You will likely want to pass a temporary for the
 991  * destination register. See DecodeRegExtend() in the ARM ARM.
 992  */
 993 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
 994                               int option, unsigned int shift)
 995 {
 996     int extsize = extract32(option, 0, 2);
 997     bool is_signed = extract32(option, 2, 1);
 998
 999     if (is_signed) {
1000         switch (extsize) {
1001         case 0:
1002             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1003             break;
1004         case 1:
1005             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1006             break;
1007         case 2:
1008             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1009             break;
1010         case 3:
1011             tcg_gen_mov_i64(tcg_out, tcg_in);
1012             break;
1013         }
1014     } else {
1015         switch (extsize) {
1016         case 0:
1017             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1018             break;
1019         case 1:
1020             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1021             break;
1022         case 2:
1023             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1024             break;
1025         case 3:
1026             tcg_gen_mov_i64(tcg_out, tcg_in);
1027             break;
1028         }
1029     }
1030
1031     if (shift) {
1032         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1033     }
1034 }
1035
1036 static inline void gen_check_sp_alignment(DisasContext *s)
1037 {
1038     /* The AArch64 architecture mandates that (if enabled via PSTATE
1039      * or SCTLR bits) there is a check that SP is 16-aligned on every
1040      * SP-relative load or store (with an exception generated if it is not).
1041      * In line with general QEMU practice regarding misaligned accesses,
1042      * we omit these checks for the sake of guest program performance.
1043      * This function is provided as a hook so we can more easily add these
1044      * checks in future (possibly as a "favour catching guest program bugs
1045      * over speed" user selectable option).
1046      */
1047 }
1048
1049 /*
1050  * This provides a simple table based table lookup decoder. It is
1051  * intended to be used when the relevant bits for decode are too
1052  * awkwardly placed and switch/if based logic would be confusing and
1053  * deeply nested. Since it's a linear search through the table, tables
1054  * should be kept small.
1055  *
1056  * It returns the first handler where insn & mask == pattern, or
1057  * NULL if there is no match.
1058  * The table is terminated by an empty mask (i.e. 0)
1059  */
1060 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1061                                                uint32_t insn)
1062 {
1063     const AArch64DecodeTable *tptr = table;
1064
1065     while (tptr->mask) {
1066         if ((insn & tptr->mask) == tptr->pattern) {
1067             return tptr->disas_fn;
1068         }
1069         tptr++;
1070     }
1071     return NULL;
1072 }
1073
1074 /*
1075  * the instruction disassembly implemented here matches
1076  * the instruction encoding classifications in chapter 3 (C3)
1077  * of the ARM Architecture Reference Manual (DDI0487A_a)
1078  */
1079
1080 /* C3.2.7 Unconditional branch (immediate)
1081  *   31  30       26 25                                  0
1082  * +----+-----------+-------------------------------------+
1083  * | op | 0 0 1 0 1 |                 imm26               |
1084  * +----+-----------+-------------------------------------+
1085  */
1086 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1087 {
1088     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1089
1090     if (insn & (1U << 31)) {
1091         /* C5.6.26 BL Branch with link */
1092         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1093     }
1094
1095     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1096     gen_goto_tb(s, 0, addr);
1097 }
1098
1099 /* C3.2.1 Compare & branch (immediate)
1100  *   31  30         25  24  23                  5 4      0
1101  * +----+-------------+----+---------------------+--------+
1102  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1103  * +----+-------------+----+---------------------+--------+
1104  */
1105 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1106 {
1107     unsigned int sf, op, rt;
1108     uint64_t addr;
1109     TCGLabel *label_match;
1110     TCGv_i64 tcg_cmp;
1111
1112     sf = extract32(insn, 31, 1);
1113     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1114     rt = extract32(insn, 0, 5);
1115     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1116
1117     tcg_cmp = read_cpu_reg(s, rt, sf);
1118     label_match = gen_new_label();
1119
1120     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1121                         tcg_cmp, 0, label_match);
1122
1123     gen_goto_tb(s, 0, s->pc);
1124     gen_set_label(label_match);
1125     gen_goto_tb(s, 1, addr);
1126 }
1127
1128 /* C3.2.5 Test & branch (immediate)
1129  *   31  30         25  24  23   19 18          5 4    0
1130  * +----+-------------+----+-------+-------------+------+
1131  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1132  * +----+-------------+----+-------+-------------+------+
1133  */
1134 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1135 {
1136     unsigned int bit_pos, op, rt;
1137     uint64_t addr;
1138     TCGLabel *label_match;
1139     TCGv_i64 tcg_cmp;
1140
1141     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1142     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1143     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1144     rt = extract32(insn, 0, 5);
1145
1146     tcg_cmp = tcg_temp_new_i64();
1147     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1148     label_match = gen_new_label();
1149     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1150                         tcg_cmp, 0, label_match);
1151     tcg_temp_free_i64(tcg_cmp);
1152     gen_goto_tb(s, 0, s->pc);
1153     gen_set_label(label_match);
1154     gen_goto_tb(s, 1, addr);
1155 }
1156
1157 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1158  *  31           25  24  23                  5   4  3    0
1159  * +---------------+----+---------------------+----+------+
1160  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1161  * +---------------+----+---------------------+----+------+
1162  */
1163 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1164 {
1165     unsigned int cond;
1166     uint64_t addr;
1167
1168     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1169         unallocated_encoding(s);
1170         return;
1171     }
1172     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1173     cond = extract32(insn, 0, 4);
1174
1175     if (cond < 0x0e) {
1176         /* genuinely conditional branches */
1177         TCGLabel *label_match = gen_new_label();
1178         arm_gen_test_cc(cond, label_match);
1179         gen_goto_tb(s, 0, s->pc);
1180         gen_set_label(label_match);
1181         gen_goto_tb(s, 1, addr);
1182     } else {
1183         /* 0xe and 0xf are both "always" conditions */
1184         gen_goto_tb(s, 0, addr);
1185     }
1186 }
1187
1188 /* C5.6.68 HINT */
1189 static void handle_hint(DisasContext *s, uint32_t insn,
1190                         unsigned int op1, unsigned int op2, unsigned int crm)
1191 {
1192     unsigned int selector = crm << 3 | op2;
1193
1194     if (op1 != 3) {
1195         unallocated_encoding(s);
1196         return;
1197     }
1198
1199     switch (selector) {
1200     case 0: /* NOP */
1201         return;
1202     case 3: /* WFI */
1203         s->is_jmp = DISAS_WFI;
1204         return;
1205     case 1: /* YIELD */
1206         s->is_jmp = DISAS_YIELD;
1207         return;
1208     case 2: /* WFE */
1209         s->is_jmp = DISAS_WFE;
1210         return;
1211     case 4: /* SEV */
1212     case 5: /* SEVL */
1213         /* we treat all as NOP at least for now */
1214         return;
1215     default:
1216         /* default specified as NOP equivalent */
1217         return;
1218     }
1219 }
1220
1221 static void gen_clrex(DisasContext *s, uint32_t insn)
1222 {
1223     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1224 }
1225
1226 /* CLREX, DSB, DMB, ISB */
1227 static void handle_sync(DisasContext *s, uint32_t insn,
1228                         unsigned int op1, unsigned int op2, unsigned int crm)
1229 {
1230     if (op1 != 3) {
1231         unallocated_encoding(s);
1232         return;
1233     }
1234
1235     switch (op2) {
1236     case 2: /* CLREX */
1237         gen_clrex(s, insn);
1238         return;
1239     case 4: /* DSB */
1240     case 5: /* DMB */
1241         /* We don't emulate caches so barriers are no-ops */
1242         return;
1243     case 6: /* ISB */
1244         /* We need to break the TB after this insn to execute
1245          * a self-modified code correctly and also to take
1246          * any pending interrupts immediately.
1247          */
1248         s->is_jmp = DISAS_UPDATE;
1249         return;
1250     default:
1251         unallocated_encoding(s);
1252         return;
1253     }
1254 }
1255
1256 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1257 static void handle_msr_i(DisasContext *s, uint32_t insn,
1258                          unsigned int op1, unsigned int op2, unsigned int crm)
1259 {
1260     int op = op1 << 3 | op2;
1261     switch (op) {
1262     case 0x05: /* SPSel */
1263         if (s->current_el == 0) {
1264             unallocated_encoding(s);
1265             return;
1266         }
1267         /* fall through */
1268     case 0x1e: /* DAIFSet */
1269     case 0x1f: /* DAIFClear */
1270     {
1271         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1272         TCGv_i32 tcg_op = tcg_const_i32(op);
1273         gen_a64_set_pc_im(s->pc - 4);
1274         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1275         tcg_temp_free_i32(tcg_imm);
1276         tcg_temp_free_i32(tcg_op);
1277         s->is_jmp = DISAS_UPDATE;
1278         break;
1279     }
1280     default:
1281         unallocated_encoding(s);
1282         return;
1283     }
1284 }
1285
1286 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1287 {
1288     TCGv_i32 tmp = tcg_temp_new_i32();
1289     TCGv_i32 nzcv = tcg_temp_new_i32();
1290
1291     /* build bit 31, N */
1292     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1293     /* build bit 30, Z */
1294     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1295     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1296     /* build bit 29, C */
1297     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1298     /* build bit 28, V */
1299     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1300     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1301     /* generate result */
1302     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1303
1304     tcg_temp_free_i32(nzcv);
1305     tcg_temp_free_i32(tmp);
1306 }
1307
1308 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1309
1310 {
1311     TCGv_i32 nzcv = tcg_temp_new_i32();
1312
1313     /* take NZCV from R[t] */
1314     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1315
1316     /* bit 31, N */
1317     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1318     /* bit 30, Z */
1319     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1320     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1321     /* bit 29, C */
1322     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1323     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1324     /* bit 28, V */
1325     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1326     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1327     tcg_temp_free_i32(nzcv);
1328 }
1329
1330 /* C5.6.129 MRS - move from system register
1331  * C5.6.131 MSR (register) - move to system register
1332  * C5.6.204 SYS
1333  * C5.6.205 SYSL
1334  * These are all essentially the same insn in 'read' and 'write'
1335  * versions, with varying op0 fields.
1336  */
1337 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1338                        unsigned int op0, unsigned int op1, unsigned int op2,
1339                        unsigned int crn, unsigned int crm, unsigned int rt)
1340 {
1341     const ARMCPRegInfo *ri;
1342     TCGv_i64 tcg_rt;
1343
1344     ri = get_arm_cp_reginfo(s->cp_regs,
1345                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1346                                                crn, crm, op0, op1, op2));
1347
1348     if (!ri) {
1349         /* Unknown register; this might be a guest error or a QEMU
1350          * unimplemented feature.
1351          */
1352         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1353                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1354                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1355         unallocated_encoding(s);
1356         return;
1357     }
1358
1359     /* Check access permissions */
1360     if (!cp_access_ok(s->current_el, ri, isread)) {
1361         unallocated_encoding(s);
1362         return;
1363     }
1364
1365     if (ri->accessfn) {
1366         /* Emit code to perform further access permissions checks at
1367          * runtime; this may result in an exception.
1368          */
1369         TCGv_ptr tmpptr;
1370         TCGv_i32 tcg_syn;
1371         uint32_t syndrome;
1372
1373         gen_a64_set_pc_im(s->pc - 4);
1374         tmpptr = tcg_const_ptr(ri);
1375         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1376         tcg_syn = tcg_const_i32(syndrome);
1377         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
1378         tcg_temp_free_ptr(tmpptr);
1379         tcg_temp_free_i32(tcg_syn);
1380     }
1381
1382     /* Handle special cases first */
1383     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1384     case ARM_CP_NOP:
1385         return;
1386     case ARM_CP_NZCV:
1387         tcg_rt = cpu_reg(s, rt);
1388         if (isread) {
1389             gen_get_nzcv(tcg_rt);
1390         } else {
1391             gen_set_nzcv(tcg_rt);
1392         }
1393         return;
1394     case ARM_CP_CURRENTEL:
1395         /* Reads as current EL value from pstate, which is
1396          * guaranteed to be constant by the tb flags.
1397          */
1398         tcg_rt = cpu_reg(s, rt);
1399         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1400         return;
1401     case ARM_CP_DC_ZVA:
1402         /* Writes clear the aligned block of memory which rt points into. */
1403         tcg_rt = cpu_reg(s, rt);
1404         gen_helper_dc_zva(cpu_env, tcg_rt);
1405         return;
1406     default:
1407         break;
1408     }
1409
1410     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1411         gen_io_start();
1412     }
1413
1414     tcg_rt = cpu_reg(s, rt);
1415
1416     if (isread) {
1417         if (ri->type & ARM_CP_CONST) {
1418             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1419         } else if (ri->readfn) {
1420             TCGv_ptr tmpptr;
1421             tmpptr = tcg_const_ptr(ri);
1422             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1423             tcg_temp_free_ptr(tmpptr);
1424         } else {
1425             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1426         }
1427     } else {
1428         if (ri->type & ARM_CP_CONST) {
1429             /* If not forbidden by access permissions, treat as WI */
1430             return;
1431         } else if (ri->writefn) {
1432             TCGv_ptr tmpptr;
1433             tmpptr = tcg_const_ptr(ri);
1434             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1435             tcg_temp_free_ptr(tmpptr);
1436         } else {
1437             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1438         }
1439     }
1440
1441     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1442         /* I/O operations must end the TB here (whether read or write) */
1443         gen_io_end();
1444         s->is_jmp = DISAS_UPDATE;
1445     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1446         /* We default to ending the TB on a coprocessor register write,
1447          * but allow this to be suppressed by the register definition
1448          * (usually only necessary to work around guest bugs).
1449          */
1450         s->is_jmp = DISAS_UPDATE;
1451     }
1452 }
1453
1454 /* C3.2.4 System
1455  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1456  * +---------------------+---+-----+-----+-------+-------+-----+------+
1457  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1458  * +---------------------+---+-----+-----+-------+-------+-----+------+
1459  */
1460 static void disas_system(DisasContext *s, uint32_t insn)
1461 {
1462     unsigned int l, op0, op1, crn, crm, op2, rt;
1463     l = extract32(insn, 21, 1);
1464     op0 = extract32(insn, 19, 2);
1465     op1 = extract32(insn, 16, 3);
1466     crn = extract32(insn, 12, 4);
1467     crm = extract32(insn, 8, 4);
1468     op2 = extract32(insn, 5, 3);
1469     rt = extract32(insn, 0, 5);
1470
1471     if (op0 == 0) {
1472         if (l || rt != 31) {
1473             unallocated_encoding(s);
1474             return;
1475         }
1476         switch (crn) {
1477         case 2: /* C5.6.68 HINT */
1478             handle_hint(s, insn, op1, op2, crm);
1479             break;
1480         case 3: /* CLREX, DSB, DMB, ISB */
1481             handle_sync(s, insn, op1, op2, crm);
1482             break;
1483         case 4: /* C5.6.130 MSR (immediate) */
1484             handle_msr_i(s, insn, op1, op2, crm);
1485             break;
1486         default:
1487             unallocated_encoding(s);
1488             break;
1489         }
1490         return;
1491     }
1492     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1493 }
1494
1495 /* C3.2.3 Exception generation
1496  *
1497  *  31             24 23 21 20                     5 4   2 1  0
1498  * +-----------------+-----+------------------------+-----+----+
1499  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1500  * +-----------------------+------------------------+----------+
1501  */
1502 static void disas_exc(DisasContext *s, uint32_t insn)
1503 {
1504     int opc = extract32(insn, 21, 3);
1505     int op2_ll = extract32(insn, 0, 5);
1506     int imm16 = extract32(insn, 5, 16);
1507     TCGv_i32 tmp;
1508
1509     switch (opc) {
1510     case 0:
1511         /* For SVC, HVC and SMC we advance the single-step state
1512          * machine before taking the exception. This is architecturally
1513          * mandated, to ensure that single-stepping a system call
1514          * instruction works properly.
1515          */
1516         switch (op2_ll) {
1517         case 1:
1518             gen_ss_advance(s);
1519             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1520                                default_exception_el(s));
1521             break;
1522         case 2:
1523             if (s->current_el == 0) {
1524                 unallocated_encoding(s);
1525                 break;
1526             }
1527             /* The pre HVC helper handles cases when HVC gets trapped
1528              * as an undefined insn by runtime configuration.
1529              */
1530             gen_a64_set_pc_im(s->pc - 4);
1531             gen_helper_pre_hvc(cpu_env);
1532             gen_ss_advance(s);
1533             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1534             break;
1535         case 3:
1536             if (s->current_el == 0) {
1537                 unallocated_encoding(s);
1538                 break;
1539             }
1540             gen_a64_set_pc_im(s->pc - 4);
1541             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1542             gen_helper_pre_smc(cpu_env, tmp);
1543             tcg_temp_free_i32(tmp);
1544             gen_ss_advance(s);
1545             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1546             break;
1547         default:
1548             unallocated_encoding(s);
1549             break;
1550         }
1551         break;
1552     case 1:
1553         if (op2_ll != 0) {
1554             unallocated_encoding(s);
1555             break;
1556         }
1557         /* BRK */
1558         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1559                            default_exception_el(s));
1560         break;
1561     case 2:
1562         if (op2_ll != 0) {
1563             unallocated_encoding(s);
1564             break;
1565         }
1566         /* HLT. This has two purposes.
1567          * Architecturally, it is an external halting debug instruction.
1568          * Since QEMU doesn't implement external debug, we treat this as
1569          * it is required for halting debug disabled: it will UNDEF.
1570          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1571          */
1572         if (semihosting_enabled() && imm16 == 0xf000) {
1573 #ifndef CONFIG_USER_ONLY
1574             /* In system mode, don't allow userspace access to semihosting,
1575              * to provide some semblance of security (and for consistency
1576              * with our 32-bit semihosting).
1577              */
1578             if (s->current_el == 0) {
1579                 unsupported_encoding(s, insn);
1580                 break;
1581             }
1582 #endif
1583             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1584         } else {
1585             unsupported_encoding(s, insn);
1586         }
1587         break;
1588     case 5:
1589         if (op2_ll < 1 || op2_ll > 3) {
1590             unallocated_encoding(s);
1591             break;
1592         }
1593         /* DCPS1, DCPS2, DCPS3 */
1594         unsupported_encoding(s, insn);
1595         break;
1596     default:
1597         unallocated_encoding(s);
1598         break;
1599     }
1600 }
1601
1602 /* C3.2.7 Unconditional branch (register)
1603  *  31           25 24   21 20   16 15   10 9    5 4     0
1604  * +---------------+-------+-------+-------+------+-------+
1605  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1606  * +---------------+-------+-------+-------+------+-------+
1607  */
1608 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1609 {
1610     unsigned int opc, op2, op3, rn, op4;
1611
1612     opc = extract32(insn, 21, 4);
1613     op2 = extract32(insn, 16, 5);
1614     op3 = extract32(insn, 10, 6);
1615     rn = extract32(insn, 5, 5);
1616     op4 = extract32(insn, 0, 5);
1617
1618     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1619         unallocated_encoding(s);
1620         return;
1621     }
1622
1623     switch (opc) {
1624     case 0: /* BR */
1625     case 2: /* RET */
1626         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1627         break;
1628     case 1: /* BLR */
1629         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1630         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1631         break;
1632     case 4: /* ERET */
1633         if (s->current_el == 0) {
1634             unallocated_encoding(s);
1635             return;
1636         }
1637         gen_helper_exception_return(cpu_env);
1638         s->is_jmp = DISAS_JUMP;
1639         return;
1640     case 5: /* DRPS */
1641         if (rn != 0x1f) {
1642             unallocated_encoding(s);
1643         } else {
1644             unsupported_encoding(s, insn);
1645         }
1646         return;
1647     default:
1648         unallocated_encoding(s);
1649         return;
1650     }
1651
1652     s->is_jmp = DISAS_JUMP;
1653 }
1654
1655 /* C3.2 Branches, exception generating and system instructions */
1656 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1657 {
1658     switch (extract32(insn, 25, 7)) {
1659     case 0x0a: case 0x0b:
1660     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1661         disas_uncond_b_imm(s, insn);
1662         break;
1663     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1664         disas_comp_b_imm(s, insn);
1665         break;
1666     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1667         disas_test_b_imm(s, insn);
1668         break;
1669     case 0x2a: /* Conditional branch (immediate) */
1670         disas_cond_b_imm(s, insn);
1671         break;
1672     case 0x6a: /* Exception generation / System */
1673         if (insn & (1 << 24)) {
1674             disas_system(s, insn);
1675         } else {
1676             disas_exc(s, insn);
1677         }
1678         break;
1679     case 0x6b: /* Unconditional branch (register) */
1680         disas_uncond_b_reg(s, insn);
1681         break;
1682     default:
1683         unallocated_encoding(s);
1684         break;
1685     }
1686 }
1687
1688 /*
1689  * Load/Store exclusive instructions are implemented by remembering
1690  * the value/address loaded, and seeing if these are the same
1691  * when the store is performed. This is not actually the architecturally
1692  * mandated semantics, but it works for typical guest code sequences
1693  * and avoids having to monitor regular stores.
1694  *
1695  * In system emulation mode only one CPU will be running at once, so
1696  * this sequence is effectively atomic.  In user emulation mode we
1697  * throw an exception and handle the atomic operation elsewhere.
1698  */
1699 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1700                                TCGv_i64 addr, int size, bool is_pair)
1701 {
1702     TCGv_i64 tmp = tcg_temp_new_i64();
1703     TCGMemOp memop = MO_TE + size;
1704
1705     g_assert(size <= 3);
1706     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1707
1708     if (is_pair) {
1709         TCGv_i64 addr2 = tcg_temp_new_i64();
1710         TCGv_i64 hitmp = tcg_temp_new_i64();
1711
1712         g_assert(size >= 2);
1713         tcg_gen_addi_i64(addr2, addr, 1 << size);
1714         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1715         tcg_temp_free_i64(addr2);
1716         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1717         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1718         tcg_temp_free_i64(hitmp);
1719     }
1720
1721     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1722     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1723
1724     tcg_temp_free_i64(tmp);
1725     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1726 }
1727
1728 #ifdef CONFIG_USER_ONLY
1729 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1730                                 TCGv_i64 addr, int size, int is_pair)
1731 {
1732     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1733     tcg_gen_movi_i32(cpu_exclusive_info,
1734                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1735     gen_exception_internal_insn(s, 4, EXCP_STREX);
1736 }
1737 #else
1738 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1739                                 TCGv_i64 inaddr, int size, int is_pair)
1740 {
1741     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1742      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1743      *     [addr] = {Rt};
1744      *     if (is_pair) {
1745      *         [addr + datasize] = {Rt2};
1746      *     }
1747      *     {Rd} = 0;
1748      * } else {
1749      *     {Rd} = 1;
1750      * }
1751      * env->exclusive_addr = -1;
1752      */
1753     TCGLabel *fail_label = gen_new_label();
1754     TCGLabel *done_label = gen_new_label();
1755     TCGv_i64 addr = tcg_temp_local_new_i64();
1756     TCGv_i64 tmp;
1757
1758     /* Copy input into a local temp so it is not trashed when the
1759      * basic block ends at the branch insn.
1760      */
1761     tcg_gen_mov_i64(addr, inaddr);
1762     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1763
1764     tmp = tcg_temp_new_i64();
1765     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1766     tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1767     tcg_temp_free_i64(tmp);
1768
1769     if (is_pair) {
1770         TCGv_i64 addrhi = tcg_temp_new_i64();
1771         TCGv_i64 tmphi = tcg_temp_new_i64();
1772
1773         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1774         tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1775         tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1776
1777         tcg_temp_free_i64(tmphi);
1778         tcg_temp_free_i64(addrhi);
1779     }
1780
1781     /* We seem to still have the exclusive monitor, so do the store */
1782     tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1783     if (is_pair) {
1784         TCGv_i64 addrhi = tcg_temp_new_i64();
1785
1786         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1787         tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1788                             get_mem_index(s), MO_TE + size);
1789         tcg_temp_free_i64(addrhi);
1790     }
1791
1792     tcg_temp_free_i64(addr);
1793
1794     tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1795     tcg_gen_br(done_label);
1796     gen_set_label(fail_label);
1797     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1798     gen_set_label(done_label);
1799     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1800
1801 }
1802 #endif
1803
1804 /* C3.3.6 Load/store exclusive
1805  *
1806  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1807  * +-----+-------------+----+---+----+------+----+-------+------+------+
1808  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1809  * +-----+-------------+----+---+----+------+----+-------+------+------+
1810  *
1811  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1812  *   L: 0 -> store, 1 -> load
1813  *  o2: 0 -> exclusive, 1 -> not
1814  *  o1: 0 -> single register, 1 -> register pair
1815  *  o0: 1 -> load-acquire/store-release, 0 -> not
1816  */
1817 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1818 {
1819     int rt = extract32(insn, 0, 5);
1820     int rn = extract32(insn, 5, 5);
1821     int rt2 = extract32(insn, 10, 5);
1822     int is_lasr = extract32(insn, 15, 1);
1823     int rs = extract32(insn, 16, 5);
1824     int is_pair = extract32(insn, 21, 1);
1825     int is_store = !extract32(insn, 22, 1);
1826     int is_excl = !extract32(insn, 23, 1);
1827     int size = extract32(insn, 30, 2);
1828     TCGv_i64 tcg_addr;
1829
1830     if ((!is_excl && !is_pair && !is_lasr) ||
1831         (!is_excl && is_pair) ||
1832         (is_pair && size < 2)) {
1833         unallocated_encoding(s);
1834         return;
1835     }
1836
1837     if (rn == 31) {
1838         gen_check_sp_alignment(s);
1839     }
1840     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1841
1842     /* Note that since TCG is single threaded load-acquire/store-release
1843      * semantics require no extra if (is_lasr) { ... } handling.
1844      */
1845
1846     if (is_excl) {
1847         if (!is_store) {
1848             s->is_ldex = true;
1849             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1850         } else {
1851             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1852         }
1853     } else {
1854         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1855         if (is_store) {
1856             do_gpr_st(s, tcg_rt, tcg_addr, size);
1857         } else {
1858             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1859         }
1860     }
1861 }
1862
1863 /*
1864  * C3.3.5 Load register (literal)
1865  *
1866  *  31 30 29   27  26 25 24 23                5 4     0
1867  * +-----+-------+---+-----+-------------------+-------+
1868  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1869  * +-----+-------+---+-----+-------------------+-------+
1870  *
1871  * V: 1 -> vector (simd/fp)
1872  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1873  *                   10-> 32 bit signed, 11 -> prefetch
1874  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1875  */
1876 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1877 {
1878     int rt = extract32(insn, 0, 5);
1879     int64_t imm = sextract32(insn, 5, 19) << 2;
1880     bool is_vector = extract32(insn, 26, 1);
1881     int opc = extract32(insn, 30, 2);
1882     bool is_signed = false;
1883     int size = 2;
1884     TCGv_i64 tcg_rt, tcg_addr;
1885
1886     if (is_vector) {
1887         if (opc == 3) {
1888             unallocated_encoding(s);
1889             return;
1890         }
1891         size = 2 + opc;
1892         if (!fp_access_check(s)) {
1893             return;
1894         }
1895     } else {
1896         if (opc == 3) {
1897             /* PRFM (literal) : prefetch */
1898             return;
1899         }
1900         size = 2 + extract32(opc, 0, 1);
1901         is_signed = extract32(opc, 1, 1);
1902     }
1903
1904     tcg_rt = cpu_reg(s, rt);
1905
1906     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1907     if (is_vector) {
1908         do_fp_ld(s, rt, tcg_addr, size);
1909     } else {
1910         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1911     }
1912     tcg_temp_free_i64(tcg_addr);
1913 }
1914
1915 /*
1916  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1917  * C5.6.81 LDP (Load Pair - non vector)
1918  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1919  * C5.6.176 STNP (Store Pair - non-temporal hint)
1920  * C5.6.177 STP (Store Pair - non vector)
1921  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1922  * C6.3.165 LDP (Load Pair of SIMD&FP)
1923  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1924  * C6.3.284 STP (Store Pair of SIMD&FP)
1925  *
1926  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1927  * +-----+-------+---+---+-------+---+-----------------------------+
1928  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1929  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1930  *
1931  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1932  *      LDPSW                    01
1933  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1934  *   V: 0 -> GPR, 1 -> Vector
1935  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1936  *      10 -> signed offset, 11 -> pre-index
1937  *   L: 0 -> Store 1 -> Load
1938  *
1939  * Rt, Rt2 = GPR or SIMD registers to be stored
1940  * Rn = general purpose register containing address
1941  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1942  */
1943 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1944 {
1945     int rt = extract32(insn, 0, 5);
1946     int rn = extract32(insn, 5, 5);
1947     int rt2 = extract32(insn, 10, 5);
1948     uint64_t offset = sextract64(insn, 15, 7);
1949     int index = extract32(insn, 23, 2);
1950     bool is_vector = extract32(insn, 26, 1);
1951     bool is_load = extract32(insn, 22, 1);
1952     int opc = extract32(insn, 30, 2);
1953
1954     bool is_signed = false;
1955     bool postindex = false;
1956     bool wback = false;
1957
1958     TCGv_i64 tcg_addr; /* calculated address */
1959     int size;
1960
1961     if (opc == 3) {
1962         unallocated_encoding(s);
1963         return;
1964     }
1965
1966     if (is_vector) {
1967         size = 2 + opc;
1968     } else {
1969         size = 2 + extract32(opc, 1, 1);
1970         is_signed = extract32(opc, 0, 1);
1971         if (!is_load && is_signed) {
1972             unallocated_encoding(s);
1973             return;
1974         }
1975     }
1976
1977     switch (index) {
1978     case 1: /* post-index */
1979         postindex = true;
1980         wback = true;
1981         break;
1982     case 0:
1983         /* signed offset with "non-temporal" hint. Since we don't emulate
1984          * caches we don't care about hints to the cache system about
1985          * data access patterns, and handle this identically to plain
1986          * signed offset.
1987          */
1988         if (is_signed) {
1989             /* There is no non-temporal-hint version of LDPSW */
1990             unallocated_encoding(s);
1991             return;
1992         }
1993         postindex = false;
1994         break;
1995     case 2: /* signed offset, rn not updated */
1996         postindex = false;
1997         break;
1998     case 3: /* pre-index */
1999         postindex = false;
2000         wback = true;
2001         break;
2002     }
2003
2004     if (is_vector && !fp_access_check(s)) {
2005         return;
2006     }
2007
2008     offset <<= size;
2009
2010     if (rn == 31) {
2011         gen_check_sp_alignment(s);
2012     }
2013
2014     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2015
2016     if (!postindex) {
2017         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2018     }
2019
2020     if (is_vector) {
2021         if (is_load) {
2022             do_fp_ld(s, rt, tcg_addr, size);
2023         } else {
2024             do_fp_st(s, rt, tcg_addr, size);
2025         }
2026     } else {
2027         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2028         if (is_load) {
2029             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2030         } else {
2031             do_gpr_st(s, tcg_rt, tcg_addr, size);
2032         }
2033     }
2034     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2035     if (is_vector) {
2036         if (is_load) {
2037             do_fp_ld(s, rt2, tcg_addr, size);
2038         } else {
2039             do_fp_st(s, rt2, tcg_addr, size);
2040         }
2041     } else {
2042         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2043         if (is_load) {
2044             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2045         } else {
2046             do_gpr_st(s, tcg_rt2, tcg_addr, size);
2047         }
2048     }
2049
2050     if (wback) {
2051         if (postindex) {
2052             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2053         } else {
2054             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2055         }
2056         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2057     }
2058 }
2059
2060 /*
2061  * C3.3.8 Load/store (immediate post-indexed)
2062  * C3.3.9 Load/store (immediate pre-indexed)
2063  * C3.3.12 Load/store (unscaled immediate)
2064  *
2065  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2066  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2067  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2068  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2069  *
2070  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2071          10 -> unprivileged
2072  * V = 0 -> non-vector
2073  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2074  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2075  */
2076 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2077 {
2078     int rt = extract32(insn, 0, 5);
2079     int rn = extract32(insn, 5, 5);
2080     int imm9 = sextract32(insn, 12, 9);
2081     int opc = extract32(insn, 22, 2);
2082     int size = extract32(insn, 30, 2);
2083     int idx = extract32(insn, 10, 2);
2084     bool is_signed = false;
2085     bool is_store = false;
2086     bool is_extended = false;
2087     bool is_unpriv = (idx == 2);
2088     bool is_vector = extract32(insn, 26, 1);
2089     bool post_index;
2090     bool writeback;
2091
2092     TCGv_i64 tcg_addr;
2093
2094     if (is_vector) {
2095         size |= (opc & 2) << 1;
2096         if (size > 4 || is_unpriv) {
2097             unallocated_encoding(s);
2098             return;
2099         }
2100         is_store = ((opc & 1) == 0);
2101         if (!fp_access_check(s)) {
2102             return;
2103         }
2104     } else {
2105         if (size == 3 && opc == 2) {
2106             /* PRFM - prefetch */
2107             if (is_unpriv) {
2108                 unallocated_encoding(s);
2109                 return;
2110             }
2111             return;
2112         }
2113         if (opc == 3 && size > 1) {
2114             unallocated_encoding(s);
2115             return;
2116         }
2117         is_store = (opc == 0);
2118         is_signed = opc & (1<<1);
2119         is_extended = (size < 3) && (opc & 1);
2120     }
2121
2122     switch (idx) {
2123     case 0:
2124     case 2:
2125         post_index = false;
2126         writeback = false;
2127         break;
2128     case 1:
2129         post_index = true;
2130         writeback = true;
2131         break;
2132     case 3:
2133         post_index = false;
2134         writeback = true;
2135         break;
2136     }
2137
2138     if (rn == 31) {
2139         gen_check_sp_alignment(s);
2140     }
2141     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2142
2143     if (!post_index) {
2144         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2145     }
2146
2147     if (is_vector) {
2148         if (is_store) {
2149             do_fp_st(s, rt, tcg_addr, size);
2150         } else {
2151             do_fp_ld(s, rt, tcg_addr, size);
2152         }
2153     } else {
2154         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2155         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2156
2157         if (is_store) {
2158             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2159         } else {
2160             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2161                              is_signed, is_extended, memidx);
2162         }
2163     }
2164
2165     if (writeback) {
2166         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2167         if (post_index) {
2168             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2169         }
2170         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2171     }
2172 }
2173
2174 /*
2175  * C3.3.10 Load/store (register offset)
2176  *
2177  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2178  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2179  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2180  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2181  *
2182  * For non-vector:
2183  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2184  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2185  * For vector:
2186  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2187  *   opc<0>: 0 -> store, 1 -> load
2188  * V: 1 -> vector/simd
2189  * opt: extend encoding (see DecodeRegExtend)
2190  * S: if S=1 then scale (essentially index by sizeof(size))
2191  * Rt: register to transfer into/out of
2192  * Rn: address register or SP for base
2193  * Rm: offset register or ZR for offset
2194  */
2195 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2196 {
2197     int rt = extract32(insn, 0, 5);
2198     int rn = extract32(insn, 5, 5);
2199     int shift = extract32(insn, 12, 1);
2200     int rm = extract32(insn, 16, 5);
2201     int opc = extract32(insn, 22, 2);
2202     int opt = extract32(insn, 13, 3);
2203     int size = extract32(insn, 30, 2);
2204     bool is_signed = false;
2205     bool is_store = false;
2206     bool is_extended = false;
2207     bool is_vector = extract32(insn, 26, 1);
2208
2209     TCGv_i64 tcg_rm;
2210     TCGv_i64 tcg_addr;
2211
2212     if (extract32(opt, 1, 1) == 0) {
2213         unallocated_encoding(s);
2214         return;
2215     }
2216
2217     if (is_vector) {
2218         size |= (opc & 2) << 1;
2219         if (size > 4) {
2220             unallocated_encoding(s);
2221             return;
2222         }
2223         is_store = !extract32(opc, 0, 1);
2224         if (!fp_access_check(s)) {
2225             return;
2226         }
2227     } else {
2228         if (size == 3 && opc == 2) {
2229             /* PRFM - prefetch */
2230             return;
2231         }
2232         if (opc == 3 && size > 1) {
2233             unallocated_encoding(s);
2234             return;
2235         }
2236         is_store = (opc == 0);
2237         is_signed = extract32(opc, 1, 1);
2238         is_extended = (size < 3) && extract32(opc, 0, 1);
2239     }
2240
2241     if (rn == 31) {
2242         gen_check_sp_alignment(s);
2243     }
2244     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2245
2246     tcg_rm = read_cpu_reg(s, rm, 1);
2247     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2248
2249     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2250
2251     if (is_vector) {
2252         if (is_store) {
2253             do_fp_st(s, rt, tcg_addr, size);
2254         } else {
2255             do_fp_ld(s, rt, tcg_addr, size);
2256         }
2257     } else {
2258         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2259         if (is_store) {
2260             do_gpr_st(s, tcg_rt, tcg_addr, size);
2261         } else {
2262             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2263         }
2264     }
2265 }
2266
2267 /*
2268  * C3.3.13 Load/store (unsigned immediate)
2269  *
2270  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2271  * +----+-------+---+-----+-----+------------+-------+------+
2272  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2273  * +----+-------+---+-----+-----+------------+-------+------+
2274  *
2275  * For non-vector:
2276  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2277  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2278  * For vector:
2279  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2280  *   opc<0>: 0 -> store, 1 -> load
2281  * Rn: base address register (inc SP)
2282  * Rt: target register
2283  */
2284 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2285 {
2286     int rt = extract32(insn, 0, 5);
2287     int rn = extract32(insn, 5, 5);
2288     unsigned int imm12 = extract32(insn, 10, 12);
2289     bool is_vector = extract32(insn, 26, 1);
2290     int size = extract32(insn, 30, 2);
2291     int opc = extract32(insn, 22, 2);
2292     unsigned int offset;
2293
2294     TCGv_i64 tcg_addr;
2295
2296     bool is_store;
2297     bool is_signed = false;
2298     bool is_extended = false;
2299
2300     if (is_vector) {
2301         size |= (opc & 2) << 1;
2302         if (size > 4) {
2303             unallocated_encoding(s);
2304             return;
2305         }
2306         is_store = !extract32(opc, 0, 1);
2307         if (!fp_access_check(s)) {
2308             return;
2309         }
2310     } else {
2311         if (size == 3 && opc == 2) {
2312             /* PRFM - prefetch */
2313             return;
2314         }
2315         if (opc == 3 && size > 1) {
2316             unallocated_encoding(s);
2317             return;
2318         }
2319         is_store = (opc == 0);
2320         is_signed = extract32(opc, 1, 1);
2321         is_extended = (size < 3) && extract32(opc, 0, 1);
2322     }
2323
2324     if (rn == 31) {
2325         gen_check_sp_alignment(s);
2326     }
2327     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2328     offset = imm12 << size;
2329     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2330
2331     if (is_vector) {
2332         if (is_store) {
2333             do_fp_st(s, rt, tcg_addr, size);
2334         } else {
2335             do_fp_ld(s, rt, tcg_addr, size);
2336         }
2337     } else {
2338         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2339         if (is_store) {
2340             do_gpr_st(s, tcg_rt, tcg_addr, size);
2341         } else {
2342             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2343         }
2344     }
2345 }
2346
2347 /* Load/store register (all forms) */
2348 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2349 {
2350     switch (extract32(insn, 24, 2)) {
2351     case 0:
2352         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2353             disas_ldst_reg_roffset(s, insn);
2354         } else {
2355             /* Load/store register (unscaled immediate)
2356              * Load/store immediate pre/post-indexed
2357              * Load/store register unprivileged
2358              */
2359             disas_ldst_reg_imm9(s, insn);
2360         }
2361         break;
2362     case 1:
2363         disas_ldst_reg_unsigned_imm(s, insn);
2364         break;
2365     default:
2366         unallocated_encoding(s);
2367         break;
2368     }
2369 }
2370
2371 /* C3.3.1 AdvSIMD load/store multiple structures
2372  *
2373  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2374  * +---+---+---------------+---+-------------+--------+------+------+------+
2375  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2376  * +---+---+---------------+---+-------------+--------+------+------+------+
2377  *
2378  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2379  *
2380  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2381  * +---+---+---------------+---+---+---------+--------+------+------+------+
2382  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2383  * +---+---+---------------+---+---+---------+--------+------+------+------+
2384  *
2385  * Rt: first (or only) SIMD&FP register to be transferred
2386  * Rn: base address or SP
2387  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2388  */
2389 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2390 {
2391     int rt = extract32(insn, 0, 5);
2392     int rn = extract32(insn, 5, 5);
2393     int size = extract32(insn, 10, 2);
2394     int opcode = extract32(insn, 12, 4);
2395     bool is_store = !extract32(insn, 22, 1);
2396     bool is_postidx = extract32(insn, 23, 1);
2397     bool is_q = extract32(insn, 30, 1);
2398     TCGv_i64 tcg_addr, tcg_rn;
2399
2400     int ebytes = 1 << size;
2401     int elements = (is_q ? 128 : 64) / (8 << size);
2402     int rpt;    /* num iterations */
2403     int selem;  /* structure elements */
2404     int r;
2405
2406     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2407         unallocated_encoding(s);
2408         return;
2409     }
2410
2411     /* From the shared decode logic */
2412     switch (opcode) {
2413     case 0x0:
2414         rpt = 1;
2415         selem = 4;
2416         break;
2417     case 0x2:
2418         rpt = 4;
2419         selem = 1;
2420         break;
2421     case 0x4:
2422         rpt = 1;
2423         selem = 3;
2424         break;
2425     case 0x6:
2426         rpt = 3;
2427         selem = 1;
2428         break;
2429     case 0x7:
2430         rpt = 1;
2431         selem = 1;
2432         break;
2433     case 0x8:
2434         rpt = 1;
2435         selem = 2;
2436         break;
2437     case 0xa:
2438         rpt = 2;
2439         selem = 1;
2440         break;
2441     default:
2442         unallocated_encoding(s);
2443         return;
2444     }
2445
2446     if (size == 3 && !is_q && selem != 1) {
2447         /* reserved */
2448         unallocated_encoding(s);
2449         return;
2450     }
2451
2452     if (!fp_access_check(s)) {
2453         return;
2454     }
2455
2456     if (rn == 31) {
2457         gen_check_sp_alignment(s);
2458     }
2459
2460     tcg_rn = cpu_reg_sp(s, rn);
2461     tcg_addr = tcg_temp_new_i64();
2462     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2463
2464     for (r = 0; r < rpt; r++) {
2465         int e;
2466         for (e = 0; e < elements; e++) {
2467             int tt = (rt + r) % 32;
2468             int xs;
2469             for (xs = 0; xs < selem; xs++) {
2470                 if (is_store) {
2471                     do_vec_st(s, tt, e, tcg_addr, size);
2472                 } else {
2473                     do_vec_ld(s, tt, e, tcg_addr, size);
2474
2475                     /* For non-quad operations, setting a slice of the low
2476                      * 64 bits of the register clears the high 64 bits (in
2477                      * the ARM ARM pseudocode this is implicit in the fact
2478                      * that 'rval' is a 64 bit wide variable). We optimize
2479                      * by noticing that we only need to do this the first
2480                      * time we touch a register.
2481                      */
2482                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2483                         clear_vec_high(s, tt);
2484                     }
2485                 }
2486                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2487                 tt = (tt + 1) % 32;
2488             }
2489         }
2490     }
2491
2492     if (is_postidx) {
2493         int rm = extract32(insn, 16, 5);
2494         if (rm == 31) {
2495             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2496         } else {
2497             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2498         }
2499     }
2500     tcg_temp_free_i64(tcg_addr);
2501 }
2502
2503 /* C3.3.3 AdvSIMD load/store single structure
2504  *
2505  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2506  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2507  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2508  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2509  *
2510  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2511  *
2512  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2513  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2514  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2515  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2516  *
2517  * Rt: first (or only) SIMD&FP register to be transferred
2518  * Rn: base address or SP
2519  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2520  * index = encoded in Q:S:size dependent on size
2521  *
2522  * lane_size = encoded in R, opc
2523  * transfer width = encoded in opc, S, size
2524  */
2525 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2526 {
2527     int rt = extract32(insn, 0, 5);
2528     int rn = extract32(insn, 5, 5);
2529     int size = extract32(insn, 10, 2);
2530     int S = extract32(insn, 12, 1);
2531     int opc = extract32(insn, 13, 3);
2532     int R = extract32(insn, 21, 1);
2533     int is_load = extract32(insn, 22, 1);
2534     int is_postidx = extract32(insn, 23, 1);
2535     int is_q = extract32(insn, 30, 1);
2536
2537     int scale = extract32(opc, 1, 2);
2538     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2539     bool replicate = false;
2540     int index = is_q << 3 | S << 2 | size;
2541     int ebytes, xs;
2542     TCGv_i64 tcg_addr, tcg_rn;
2543
2544     switch (scale) {
2545     case 3:
2546         if (!is_load || S) {
2547             unallocated_encoding(s);
2548             return;
2549         }
2550         scale = size;
2551         replicate = true;
2552         break;
2553     case 0:
2554         break;
2555     case 1:
2556         if (extract32(size, 0, 1)) {
2557             unallocated_encoding(s);
2558             return;
2559         }
2560         index >>= 1;
2561         break;
2562     case 2:
2563         if (extract32(size, 1, 1)) {
2564             unallocated_encoding(s);
2565             return;
2566         }
2567         if (!extract32(size, 0, 1)) {
2568             index >>= 2;
2569         } else {
2570             if (S) {
2571                 unallocated_encoding(s);
2572                 return;
2573             }
2574             index >>= 3;
2575             scale = 3;
2576         }
2577         break;
2578     default:
2579         g_assert_not_reached();
2580     }
2581
2582     if (!fp_access_check(s)) {
2583         return;
2584     }
2585
2586     ebytes = 1 << scale;
2587
2588     if (rn == 31) {
2589         gen_check_sp_alignment(s);
2590     }
2591
2592     tcg_rn = cpu_reg_sp(s, rn);
2593     tcg_addr = tcg_temp_new_i64();
2594     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2595
2596     for (xs = 0; xs < selem; xs++) {
2597         if (replicate) {
2598             /* Load and replicate to all elements */
2599             uint64_t mulconst;
2600             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2601
2602             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2603                                 get_mem_index(s), MO_TE + scale);
2604             switch (scale) {
2605             case 0:
2606                 mulconst = 0x0101010101010101ULL;
2607                 break;
2608             case 1:
2609                 mulconst = 0x0001000100010001ULL;
2610                 break;
2611             case 2:
2612                 mulconst = 0x0000000100000001ULL;
2613                 break;
2614             case 3:
2615                 mulconst = 0;
2616                 break;
2617             default:
2618                 g_assert_not_reached();
2619             }
2620             if (mulconst) {
2621                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2622             }
2623             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2624             if (is_q) {
2625                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2626             } else {
2627                 clear_vec_high(s, rt);
2628             }
2629             tcg_temp_free_i64(tcg_tmp);
2630         } else {
2631             /* Load/store one element per register */
2632             if (is_load) {
2633                 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2634             } else {
2635                 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2636             }
2637         }
2638         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2639         rt = (rt + 1) % 32;
2640     }
2641
2642     if (is_postidx) {
2643         int rm = extract32(insn, 16, 5);
2644         if (rm == 31) {
2645             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2646         } else {
2647             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2648         }
2649     }
2650     tcg_temp_free_i64(tcg_addr);
2651 }
2652
2653 /* C3.3 Loads and stores */
2654 static void disas_ldst(DisasContext *s, uint32_t insn)
2655 {
2656     switch (extract32(insn, 24, 6)) {
2657     case 0x08: /* Load/store exclusive */
2658         disas_ldst_excl(s, insn);
2659         break;
2660     case 0x18: case 0x1c: /* Load register (literal) */
2661         disas_ld_lit(s, insn);
2662         break;
2663     case 0x28: case 0x29:
2664     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2665         disas_ldst_pair(s, insn);
2666         break;
2667     case 0x38: case 0x39:
2668     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2669         disas_ldst_reg(s, insn);
2670         break;
2671     case 0x0c: /* AdvSIMD load/store multiple structures */
2672         disas_ldst_multiple_struct(s, insn);
2673         break;
2674     case 0x0d: /* AdvSIMD load/store single structure */
2675         disas_ldst_single_struct(s, insn);
2676         break;
2677     default:
2678         unallocated_encoding(s);
2679         break;
2680     }
2681 }
2682
2683 /* C3.4.6 PC-rel. addressing
2684  *   31  30   29 28       24 23                5 4    0
2685  * +----+-------+-----------+-------------------+------+
2686  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2687  * +----+-------+-----------+-------------------+------+
2688  */
2689 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2690 {
2691     unsigned int page, rd;
2692     uint64_t base;
2693     uint64_t offset;
2694
2695     page = extract32(insn, 31, 1);
2696     /* SignExtend(immhi:immlo) -> offset */
2697     offset = sextract64(insn, 5, 19);
2698     offset = offset << 2 | extract32(insn, 29, 2);
2699     rd = extract32(insn, 0, 5);
2700     base = s->pc - 4;
2701
2702     if (page) {
2703         /* ADRP (page based) */
2704         base &= ~0xfff;
2705         offset <<= 12;
2706     }
2707
2708     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2709 }
2710
2711 /*
2712  * C3.4.1 Add/subtract (immediate)
2713  *
2714  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2715  * +--+--+--+-----------+-----+-------------+-----+-----+
2716  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2717  * +--+--+--+-----------+-----+-------------+-----+-----+
2718  *
2719  *    sf: 0 -> 32bit, 1 -> 64bit
2720  *    op: 0 -> add  , 1 -> sub
2721  *     S: 1 -> set flags
2722  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2723  */
2724 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2725 {
2726     int rd = extract32(insn, 0, 5);
2727     int rn = extract32(insn, 5, 5);
2728     uint64_t imm = extract32(insn, 10, 12);
2729     int shift = extract32(insn, 22, 2);
2730     bool setflags = extract32(insn, 29, 1);
2731     bool sub_op = extract32(insn, 30, 1);
2732     bool is_64bit = extract32(insn, 31, 1);
2733
2734     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2735     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2736     TCGv_i64 tcg_result;
2737
2738     switch (shift) {
2739     case 0x0:
2740         break;
2741     case 0x1:
2742         imm <<= 12;
2743         break;
2744     default:
2745         unallocated_encoding(s);
2746         return;
2747     }
2748
2749     tcg_result = tcg_temp_new_i64();
2750     if (!setflags) {
2751         if (sub_op) {
2752             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2753         } else {
2754             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2755         }
2756     } else {
2757         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2758         if (sub_op) {
2759             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2760         } else {
2761             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2762         }
2763         tcg_temp_free_i64(tcg_imm);
2764     }
2765
2766     if (is_64bit) {
2767         tcg_gen_mov_i64(tcg_rd, tcg_result);
2768     } else {
2769         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2770     }
2771
2772     tcg_temp_free_i64(tcg_result);
2773 }
2774
2775 /* The input should be a value in the bottom e bits (with higher
2776  * bits zero); returns that value replicated into every element
2777  * of size e in a 64 bit integer.
2778  */
2779 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2780 {
2781     assert(e != 0);
2782     while (e < 64) {
2783         mask |= mask << e;
2784         e *= 2;
2785     }
2786     return mask;
2787 }
2788
2789 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2790 static inline uint64_t bitmask64(unsigned int length)
2791 {
2792     assert(length > 0 && length <= 64);
2793     return ~0ULL >> (64 - length);
2794 }
2795
2796 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2797  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2798  * value (ie should cause a guest UNDEF exception), and true if they are
2799  * valid, in which case the decoded bit pattern is written to result.
2800  */
2801 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2802                                    unsigned int imms, unsigned int immr)
2803 {
2804     uint64_t mask;
2805     unsigned e, levels, s, r;
2806     int len;
2807
2808     assert(immn < 2 && imms < 64 && immr < 64);
2809
2810     /* The bit patterns we create here are 64 bit patterns which
2811      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2812      * 64 bits each. Each element contains the same value: a run
2813      * of between 1 and e-1 non-zero bits, rotated within the
2814      * element by between 0 and e-1 bits.
2815      *
2816      * The element size and run length are encoded into immn (1 bit)
2817      * and imms (6 bits) as follows:
2818      * 64 bit elements: immn = 1, imms = <length of run - 1>
2819      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2820      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2821      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2822      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2823      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2824      * Notice that immn = 0, imms = 11111x is the only combination
2825      * not covered by one of the above options; this is reserved.
2826      * Further, <length of run - 1> all-ones is a reserved pattern.
2827      *
2828      * In all cases the rotation is by immr % e (and immr is 6 bits).
2829      */
2830
2831     /* First determine the element size */
2832     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2833     if (len < 1) {
2834         /* This is the immn == 0, imms == 0x11111x case */
2835         return false;
2836     }
2837     e = 1 << len;
2838
2839     levels = e - 1;
2840     s = imms & levels;
2841     r = immr & levels;
2842
2843     if (s == levels) {
2844         /* <length of run - 1> mustn't be all-ones. */
2845         return false;
2846     }
2847
2848     /* Create the value of one element: s+1 set bits rotated
2849      * by r within the element (which is e bits wide)...
2850      */
2851     mask = bitmask64(s + 1);
2852     if (r) {
2853         mask = (mask >> r) | (mask << (e - r));
2854         mask &= bitmask64(e);
2855     }
2856     /* ...then replicate the element over the whole 64 bit value */
2857     mask = bitfield_replicate(mask, e);
2858     *result = mask;
2859     return true;
2860 }
2861
2862 /* C3.4.4 Logical (immediate)
2863  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2864  * +----+-----+-------------+---+------+------+------+------+
2865  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2866  * +----+-----+-------------+---+------+------+------+------+
2867  */
2868 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2869 {
2870     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2871     TCGv_i64 tcg_rd, tcg_rn;
2872     uint64_t wmask;
2873     bool is_and = false;
2874
2875     sf = extract32(insn, 31, 1);
2876     opc = extract32(insn, 29, 2);
2877     is_n = extract32(insn, 22, 1);
2878     immr = extract32(insn, 16, 6);
2879     imms = extract32(insn, 10, 6);
2880     rn = extract32(insn, 5, 5);
2881     rd = extract32(insn, 0, 5);
2882
2883     if (!sf && is_n) {
2884         unallocated_encoding(s);
2885         return;
2886     }
2887
2888     if (opc == 0x3) { /* ANDS */
2889         tcg_rd = cpu_reg(s, rd);
2890     } else {
2891         tcg_rd = cpu_reg_sp(s, rd);
2892     }
2893     tcg_rn = cpu_reg(s, rn);
2894
2895     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2896         /* some immediate field values are reserved */
2897         unallocated_encoding(s);
2898         return;
2899     }
2900
2901     if (!sf) {
2902         wmask &= 0xffffffff;
2903     }
2904
2905     switch (opc) {
2906     case 0x3: /* ANDS */
2907     case 0x0: /* AND */
2908         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2909         is_and = true;
2910         break;
2911     case 0x1: /* ORR */
2912         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2913         break;
2914     case 0x2: /* EOR */
2915         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2916         break;
2917     default:
2918         assert(FALSE); /* must handle all above */
2919         break;
2920     }
2921
2922     if (!sf && !is_and) {
2923         /* zero extend final result; we know we can skip this for AND
2924          * since the immediate had the high 32 bits clear.
2925          */
2926         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2927     }
2928
2929     if (opc == 3) { /* ANDS */
2930         gen_logic_CC(sf, tcg_rd);
2931     }
2932 }
2933
2934 /*
2935  * C3.4.5 Move wide (immediate)
2936  *
2937  *  31 30 29 28         23 22 21 20             5 4    0
2938  * +--+-----+-------------+-----+----------------+------+
2939  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2940  * +--+-----+-------------+-----+----------------+------+
2941  *
2942  * sf: 0 -> 32 bit, 1 -> 64 bit
2943  * opc: 00 -> N, 10 -> Z, 11 -> K
2944  * hw: shift/16 (0,16, and sf only 32, 48)
2945  */
2946 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2947 {
2948     int rd = extract32(insn, 0, 5);
2949     uint64_t imm = extract32(insn, 5, 16);
2950     int sf = extract32(insn, 31, 1);
2951     int opc = extract32(insn, 29, 2);
2952     int pos = extract32(insn, 21, 2) << 4;
2953     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2954     TCGv_i64 tcg_imm;
2955
2956     if (!sf && (pos >= 32)) {
2957         unallocated_encoding(s);
2958         return;
2959     }
2960
2961     switch (opc) {
2962     case 0: /* MOVN */
2963     case 2: /* MOVZ */
2964         imm <<= pos;
2965         if (opc == 0) {
2966             imm = ~imm;
2967         }
2968         if (!sf) {
2969             imm &= 0xffffffffu;
2970         }
2971         tcg_gen_movi_i64(tcg_rd, imm);
2972         break;
2973     case 3: /* MOVK */
2974         tcg_imm = tcg_const_i64(imm);
2975         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2976         tcg_temp_free_i64(tcg_imm);
2977         if (!sf) {
2978             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2979         }
2980         break;
2981     default:
2982         unallocated_encoding(s);
2983         break;
2984     }
2985 }
2986
2987 /* C3.4.2 Bitfield
2988  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2989  * +----+-----+-------------+---+------+------+------+------+
2990  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2991  * +----+-----+-------------+---+------+------+------+------+
2992  */
2993 static void disas_bitfield(DisasContext *s, uint32_t insn)
2994 {
2995     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2996     TCGv_i64 tcg_rd, tcg_tmp;
2997
2998     sf = extract32(insn, 31, 1);
2999     opc = extract32(insn, 29, 2);
3000     n = extract32(insn, 22, 1);
3001     ri = extract32(insn, 16, 6);
3002     si = extract32(insn, 10, 6);
3003     rn = extract32(insn, 5, 5);
3004     rd = extract32(insn, 0, 5);
3005     bitsize = sf ? 64 : 32;
3006
3007     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3008         unallocated_encoding(s);
3009         return;
3010     }
3011
3012     tcg_rd = cpu_reg(s, rd);
3013
3014     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3015        to be smaller than bitsize, we'll never reference data outside the
3016        low 32-bits anyway.  */
3017     tcg_tmp = read_cpu_reg(s, rn, 1);
3018
3019     /* Recognize the common aliases.  */
3020     if (opc == 0) { /* SBFM */
3021         if (ri == 0) {
3022             if (si == 7) { /* SXTB */
3023                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3024                 goto done;
3025             } else if (si == 15) { /* SXTH */
3026                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3027                 goto done;
3028             } else if (si == 31) { /* SXTW */
3029                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3030                 goto done;
3031             }
3032         }
3033         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3034             if (si == 31) {
3035                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3036             }
3037             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3038             goto done;
3039         }
3040     } else if (opc == 2) { /* UBFM */
3041         if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3042             tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3043             return;
3044         }
3045         if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3046             if (si == 31) {
3047                 tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3048             }
3049             tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3050             return;
3051         }
3052         if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3053             int shift = bitsize - 1 - si;
3054             tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3055             goto done;
3056         }
3057     }
3058
3059     if (opc != 1) { /* SBFM or UBFM */
3060         tcg_gen_movi_i64(tcg_rd, 0);
3061     }
3062
3063     /* do the bit move operation */
3064     if (si >= ri) {
3065         /* Wd<s-r:0> = Wn<s:r> */
3066         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3067         pos = 0;
3068         len = (si - ri) + 1;
3069     } else {
3070         /* Wd<32+s-r,32-r> = Wn<s:0> */
3071         pos = bitsize - ri;
3072         len = si + 1;
3073     }
3074
3075     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3076
3077     if (opc == 0) { /* SBFM - sign extend the destination field */
3078         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3079         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3080     }
3081
3082  done:
3083     if (!sf) { /* zero extend final result */
3084         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3085     }
3086 }
3087
3088 /* C3.4.3 Extract
3089  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3090  * +----+------+-------------+---+----+------+--------+------+------+
3091  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3092  * +----+------+-------------+---+----+------+--------+------+------+
3093  */
3094 static void disas_extract(DisasContext *s, uint32_t insn)
3095 {
3096     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3097
3098     sf = extract32(insn, 31, 1);
3099     n = extract32(insn, 22, 1);
3100     rm = extract32(insn, 16, 5);
3101     imm = extract32(insn, 10, 6);
3102     rn = extract32(insn, 5, 5);
3103     rd = extract32(insn, 0, 5);
3104     op21 = extract32(insn, 29, 2);
3105     op0 = extract32(insn, 21, 1);
3106     bitsize = sf ? 64 : 32;
3107
3108     if (sf != n || op21 || op0 || imm >= bitsize) {
3109         unallocated_encoding(s);
3110     } else {
3111         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3112
3113         tcg_rd = cpu_reg(s, rd);
3114
3115         if (unlikely(imm == 0)) {
3116             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3117              * so an extract from bit 0 is a special case.
3118              */
3119             if (sf) {
3120                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3121             } else {
3122                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3123             }
3124         } else if (rm == rn) { /* ROR */
3125             tcg_rm = cpu_reg(s, rm);
3126             if (sf) {
3127                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3128             } else {
3129                 TCGv_i32 tmp = tcg_temp_new_i32();
3130                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3131                 tcg_gen_rotri_i32(tmp, tmp, imm);
3132                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3133                 tcg_temp_free_i32(tmp);
3134             }
3135         } else {
3136             tcg_rm = read_cpu_reg(s, rm, sf);
3137             tcg_rn = read_cpu_reg(s, rn, sf);
3138             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3139             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3140             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3141             if (!sf) {
3142                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3143             }
3144         }
3145     }
3146 }
3147
3148 /* C3.4 Data processing - immediate */
3149 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3150 {
3151     switch (extract32(insn, 23, 6)) {
3152     case 0x20: case 0x21: /* PC-rel. addressing */
3153         disas_pc_rel_adr(s, insn);
3154         break;
3155     case 0x22: case 0x23: /* Add/subtract (immediate) */
3156         disas_add_sub_imm(s, insn);
3157         break;
3158     case 0x24: /* Logical (immediate) */
3159         disas_logic_imm(s, insn);
3160         break;
3161     case 0x25: /* Move wide (immediate) */
3162         disas_movw_imm(s, insn);
3163         break;
3164     case 0x26: /* Bitfield */
3165         disas_bitfield(s, insn);
3166         break;
3167     case 0x27: /* Extract */
3168         disas_extract(s, insn);
3169         break;
3170     default:
3171         unallocated_encoding(s);
3172         break;
3173     }
3174 }
3175
3176 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3177  * Note that it is the caller's responsibility to ensure that the
3178  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3179  * mandated semantics for out of range shifts.
3180  */
3181 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3182                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3183 {
3184     switch (shift_type) {
3185     case A64_SHIFT_TYPE_LSL:
3186         tcg_gen_shl_i64(dst, src, shift_amount);
3187         break;
3188     case A64_SHIFT_TYPE_LSR:
3189         tcg_gen_shr_i64(dst, src, shift_amount);
3190         break;
3191     case A64_SHIFT_TYPE_ASR:
3192         if (!sf) {
3193             tcg_gen_ext32s_i64(dst, src);
3194         }
3195         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3196         break;
3197     case A64_SHIFT_TYPE_ROR:
3198         if (sf) {
3199             tcg_gen_rotr_i64(dst, src, shift_amount);
3200         } else {
3201             TCGv_i32 t0, t1;
3202             t0 = tcg_temp_new_i32();
3203             t1 = tcg_temp_new_i32();
3204             tcg_gen_extrl_i64_i32(t0, src);
3205             tcg_gen_extrl_i64_i32(t1, shift_amount);
3206             tcg_gen_rotr_i32(t0, t0, t1);
3207             tcg_gen_extu_i32_i64(dst, t0);
3208             tcg_temp_free_i32(t0);
3209             tcg_temp_free_i32(t1);
3210         }
3211         break;
3212     default:
3213         assert(FALSE); /* all shift types should be handled */
3214         break;
3215     }
3216
3217     if (!sf) { /* zero extend final result */
3218         tcg_gen_ext32u_i64(dst, dst);
3219     }
3220 }
3221
3222 /* Shift a TCGv src by immediate, put result in dst.
3223  * The shift amount must be in range (this should always be true as the
3224  * relevant instructions will UNDEF on bad shift immediates).
3225  */
3226 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3227                           enum a64_shift_type shift_type, unsigned int shift_i)
3228 {
3229     assert(shift_i < (sf ? 64 : 32));
3230
3231     if (shift_i == 0) {
3232         tcg_gen_mov_i64(dst, src);
3233     } else {
3234         TCGv_i64 shift_const;
3235
3236         shift_const = tcg_const_i64(shift_i);
3237         shift_reg(dst, src, sf, shift_type, shift_const);
3238         tcg_temp_free_i64(shift_const);
3239     }
3240 }
3241
3242 /* C3.5.10 Logical (shifted register)
3243  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3244  * +----+-----+-----------+-------+---+------+--------+------+------+
3245  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3246  * +----+-----+-----------+-------+---+------+--------+------+------+
3247  */
3248 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3249 {
3250     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3251     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3252
3253     sf = extract32(insn, 31, 1);
3254     opc = extract32(insn, 29, 2);
3255     shift_type = extract32(insn, 22, 2);
3256     invert = extract32(insn, 21, 1);
3257     rm = extract32(insn, 16, 5);
3258     shift_amount = extract32(insn, 10, 6);
3259     rn = extract32(insn, 5, 5);
3260     rd = extract32(insn, 0, 5);
3261
3262     if (!sf && (shift_amount & (1 << 5))) {
3263         unallocated_encoding(s);
3264         return;
3265     }
3266
3267     tcg_rd = cpu_reg(s, rd);
3268
3269     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3270         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3271          * register-register MOV and MVN, so it is worth special casing.
3272          */
3273         tcg_rm = cpu_reg(s, rm);
3274         if (invert) {
3275             tcg_gen_not_i64(tcg_rd, tcg_rm);
3276             if (!sf) {
3277                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3278             }
3279         } else {
3280             if (sf) {
3281                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3282             } else {
3283                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3284             }
3285         }
3286         return;
3287     }
3288
3289     tcg_rm = read_cpu_reg(s, rm, sf);
3290
3291     if (shift_amount) {
3292         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3293     }
3294
3295     tcg_rn = cpu_reg(s, rn);
3296
3297     switch (opc | (invert << 2)) {
3298     case 0: /* AND */
3299     case 3: /* ANDS */
3300         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3301         break;
3302     case 1: /* ORR */
3303         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3304         break;
3305     case 2: /* EOR */
3306         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3307         break;
3308     case 4: /* BIC */
3309     case 7: /* BICS */
3310         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3311         break;
3312     case 5: /* ORN */
3313         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3314         break;
3315     case 6: /* EON */
3316         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3317         break;
3318     default:
3319         assert(FALSE);
3320         break;
3321     }
3322
3323     if (!sf) {
3324         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3325     }
3326
3327     if (opc == 3) {
3328         gen_logic_CC(sf, tcg_rd);
3329     }
3330 }
3331
3332 /*
3333  * C3.5.1 Add/subtract (extended register)
3334  *
3335  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3336  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3337  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3338  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3339  *
3340  *  sf: 0 -> 32bit, 1 -> 64bit
3341  *  op: 0 -> add  , 1 -> sub
3342  *   S: 1 -> set flags
3343  * opt: 00
3344  * option: extension type (see DecodeRegExtend)
3345  * imm3: optional shift to Rm
3346  *
3347  * Rd = Rn + LSL(extend(Rm), amount)
3348  */
3349 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3350 {
3351     int rd = extract32(insn, 0, 5);
3352     int rn = extract32(insn, 5, 5);
3353     int imm3 = extract32(insn, 10, 3);
3354     int option = extract32(insn, 13, 3);
3355     int rm = extract32(insn, 16, 5);
3356     bool setflags = extract32(insn, 29, 1);
3357     bool sub_op = extract32(insn, 30, 1);
3358     bool sf = extract32(insn, 31, 1);
3359
3360     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3361     TCGv_i64 tcg_rd;
3362     TCGv_i64 tcg_result;
3363
3364     if (imm3 > 4) {
3365         unallocated_encoding(s);
3366         return;
3367     }
3368
3369     /* non-flag setting ops may use SP */
3370     if (!setflags) {
3371         tcg_rd = cpu_reg_sp(s, rd);
3372     } else {
3373         tcg_rd = cpu_reg(s, rd);
3374     }
3375     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3376
3377     tcg_rm = read_cpu_reg(s, rm, sf);
3378     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3379
3380     tcg_result = tcg_temp_new_i64();
3381
3382     if (!setflags) {
3383         if (sub_op) {
3384             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3385         } else {
3386             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3387         }
3388     } else {
3389         if (sub_op) {
3390             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3391         } else {
3392             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3393         }
3394     }
3395
3396     if (sf) {
3397         tcg_gen_mov_i64(tcg_rd, tcg_result);
3398     } else {
3399         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3400     }
3401
3402     tcg_temp_free_i64(tcg_result);
3403 }
3404
3405 /*
3406  * C3.5.2 Add/subtract (shifted register)
3407  *
3408  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3409  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3410  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3411  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3412  *
3413  *    sf: 0 -> 32bit, 1 -> 64bit
3414  *    op: 0 -> add  , 1 -> sub
3415  *     S: 1 -> set flags
3416  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3417  *  imm6: Shift amount to apply to Rm before the add/sub
3418  */
3419 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3420 {
3421     int rd = extract32(insn, 0, 5);
3422     int rn = extract32(insn, 5, 5);
3423     int imm6 = extract32(insn, 10, 6);
3424     int rm = extract32(insn, 16, 5);
3425     int shift_type = extract32(insn, 22, 2);
3426     bool setflags = extract32(insn, 29, 1);
3427     bool sub_op = extract32(insn, 30, 1);
3428     bool sf = extract32(insn, 31, 1);
3429
3430     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3431     TCGv_i64 tcg_rn, tcg_rm;
3432     TCGv_i64 tcg_result;
3433
3434     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3435         unallocated_encoding(s);
3436         return;
3437     }
3438
3439     tcg_rn = read_cpu_reg(s, rn, sf);
3440     tcg_rm = read_cpu_reg(s, rm, sf);
3441
3442     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3443
3444     tcg_result = tcg_temp_new_i64();
3445
3446     if (!setflags) {
3447         if (sub_op) {
3448             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3449         } else {
3450             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3451         }
3452     } else {
3453         if (sub_op) {
3454             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3455         } else {
3456             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3457         }
3458     }
3459
3460     if (sf) {
3461         tcg_gen_mov_i64(tcg_rd, tcg_result);
3462     } else {
3463         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3464     }
3465
3466     tcg_temp_free_i64(tcg_result);
3467 }
3468
3469 /* C3.5.9 Data-processing (3 source)
3470
3471    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3472   +--+------+-----------+------+------+----+------+------+------+
3473   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3474   +--+------+-----------+------+------+----+------+------+------+
3475
3476  */
3477 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3478 {
3479     int rd = extract32(insn, 0, 5);
3480     int rn = extract32(insn, 5, 5);
3481     int ra = extract32(insn, 10, 5);
3482     int rm = extract32(insn, 16, 5);
3483     int op_id = (extract32(insn, 29, 3) << 4) |
3484         (extract32(insn, 21, 3) << 1) |
3485         extract32(insn, 15, 1);
3486     bool sf = extract32(insn, 31, 1);
3487     bool is_sub = extract32(op_id, 0, 1);
3488     bool is_high = extract32(op_id, 2, 1);
3489     bool is_signed = false;
3490     TCGv_i64 tcg_op1;
3491     TCGv_i64 tcg_op2;
3492     TCGv_i64 tcg_tmp;
3493
3494     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3495     switch (op_id) {
3496     case 0x42: /* SMADDL */
3497     case 0x43: /* SMSUBL */
3498     case 0x44: /* SMULH */
3499         is_signed = true;
3500         break;
3501     case 0x0: /* MADD (32bit) */
3502     case 0x1: /* MSUB (32bit) */
3503     case 0x40: /* MADD (64bit) */
3504     case 0x41: /* MSUB (64bit) */
3505     case 0x4a: /* UMADDL */
3506     case 0x4b: /* UMSUBL */
3507     case 0x4c: /* UMULH */
3508         break;
3509     default:
3510         unallocated_encoding(s);
3511         return;
3512     }
3513
3514     if (is_high) {
3515         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3516         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3517         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3518         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3519
3520         if (is_signed) {
3521             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3522         } else {
3523             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3524         }
3525
3526         tcg_temp_free_i64(low_bits);
3527         return;
3528     }
3529
3530     tcg_op1 = tcg_temp_new_i64();
3531     tcg_op2 = tcg_temp_new_i64();
3532     tcg_tmp = tcg_temp_new_i64();
3533
3534     if (op_id < 0x42) {
3535         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3536         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3537     } else {
3538         if (is_signed) {
3539             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3540             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3541         } else {
3542             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3543             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3544         }
3545     }
3546
3547     if (ra == 31 && !is_sub) {
3548         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3549         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3550     } else {
3551         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3552         if (is_sub) {
3553             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3554         } else {
3555             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3556         }
3557     }
3558
3559     if (!sf) {
3560         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3561     }
3562
3563     tcg_temp_free_i64(tcg_op1);
3564     tcg_temp_free_i64(tcg_op2);
3565     tcg_temp_free_i64(tcg_tmp);
3566 }
3567
3568 /* C3.5.3 - Add/subtract (with carry)
3569  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3570  * +--+--+--+------------------------+------+---------+------+-----+
3571  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3572  * +--+--+--+------------------------+------+---------+------+-----+
3573  *                                            [000000]
3574  */
3575
3576 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3577 {
3578     unsigned int sf, op, setflags, rm, rn, rd;
3579     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3580
3581     if (extract32(insn, 10, 6) != 0) {
3582         unallocated_encoding(s);
3583         return;
3584     }
3585
3586     sf = extract32(insn, 31, 1);
3587     op = extract32(insn, 30, 1);
3588     setflags = extract32(insn, 29, 1);
3589     rm = extract32(insn, 16, 5);
3590     rn = extract32(insn, 5, 5);
3591     rd = extract32(insn, 0, 5);
3592
3593     tcg_rd = cpu_reg(s, rd);
3594     tcg_rn = cpu_reg(s, rn);
3595
3596     if (op) {
3597         tcg_y = new_tmp_a64(s);
3598         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3599     } else {
3600         tcg_y = cpu_reg(s, rm);
3601     }
3602
3603     if (setflags) {
3604         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3605     } else {
3606         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3607     }
3608 }
3609
3610 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3611  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3612  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3613  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3614  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3615  *        [1]                             y                [0]       [0]
3616  */
3617 static void disas_cc(DisasContext *s, uint32_t insn)
3618 {
3619     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3620     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3621     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3622     DisasCompare c;
3623
3624     if (!extract32(insn, 29, 1)) {
3625         unallocated_encoding(s);
3626         return;
3627     }
3628     if (insn & (1 << 10 | 1 << 4)) {
3629         unallocated_encoding(s);
3630         return;
3631     }
3632     sf = extract32(insn, 31, 1);
3633     op = extract32(insn, 30, 1);
3634     is_imm = extract32(insn, 11, 1);
3635     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3636     cond = extract32(insn, 12, 4);
3637     rn = extract32(insn, 5, 5);
3638     nzcv = extract32(insn, 0, 4);
3639
3640     /* Set T0 = !COND.  */
3641     tcg_t0 = tcg_temp_new_i32();
3642     arm_test_cc(&c, cond);
3643     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3644     arm_free_cc(&c);
3645
3646     /* Load the arguments for the new comparison.  */
3647     if (is_imm) {
3648         tcg_y = new_tmp_a64(s);
3649         tcg_gen_movi_i64(tcg_y, y);
3650     } else {
3651         tcg_y = cpu_reg(s, y);
3652     }
3653     tcg_rn = cpu_reg(s, rn);
3654
3655     /* Set the flags for the new comparison.  */
3656     tcg_tmp = tcg_temp_new_i64();
3657     if (op) {
3658         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3659     } else {
3660         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3661     }
3662     tcg_temp_free_i64(tcg_tmp);
3663
3664     /* If COND was false, force the flags to #nzcv.  Compute two masks
3665      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3666      * For tcg hosts that support ANDC, we can make do with just T1.
3667      * In either case, allow the tcg optimizer to delete any unused mask.
3668      */
3669     tcg_t1 = tcg_temp_new_i32();
3670     tcg_t2 = tcg_temp_new_i32();
3671     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3672     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3673
3674     if (nzcv & 8) { /* N */
3675         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3676     } else {
3677         if (TCG_TARGET_HAS_andc_i32) {
3678             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3679         } else {
3680             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3681         }
3682     }
3683     if (nzcv & 4) { /* Z */
3684         if (TCG_TARGET_HAS_andc_i32) {
3685             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3686         } else {
3687             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3688         }
3689     } else {
3690         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3691     }
3692     if (nzcv & 2) { /* C */
3693         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3694     } else {
3695         if (TCG_TARGET_HAS_andc_i32) {
3696             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3697         } else {
3698             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3699         }
3700     }
3701     if (nzcv & 1) { /* V */
3702         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3703     } else {
3704         if (TCG_TARGET_HAS_andc_i32) {
3705             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3706         } else {
3707             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3708         }
3709     }
3710     tcg_temp_free_i32(tcg_t0);
3711     tcg_temp_free_i32(tcg_t1);
3712     tcg_temp_free_i32(tcg_t2);
3713 }
3714
3715 /* C3.5.6 Conditional select
3716  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3717  * +----+----+---+-----------------+------+------+-----+------+------+
3718  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3719  * +----+----+---+-----------------+------+------+-----+------+------+
3720  */
3721 static void disas_cond_select(DisasContext *s, uint32_t insn)
3722 {
3723     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3724     TCGv_i64 tcg_rd, zero;
3725     DisasCompare64 c;
3726
3727     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3728         /* S == 1 or op2<1> == 1 */
3729         unallocated_encoding(s);
3730         return;
3731     }
3732     sf = extract32(insn, 31, 1);
3733     else_inv = extract32(insn, 30, 1);
3734     rm = extract32(insn, 16, 5);
3735     cond = extract32(insn, 12, 4);
3736     else_inc = extract32(insn, 10, 1);
3737     rn = extract32(insn, 5, 5);
3738     rd = extract32(insn, 0, 5);
3739
3740     tcg_rd = cpu_reg(s, rd);
3741
3742     a64_test_cc(&c, cond);
3743     zero = tcg_const_i64(0);
3744
3745     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3746         /* CSET & CSETM.  */
3747         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3748         if (else_inv) {
3749             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3750         }
3751     } else {
3752         TCGv_i64 t_true = cpu_reg(s, rn);
3753         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3754         if (else_inv && else_inc) {
3755             tcg_gen_neg_i64(t_false, t_false);
3756         } else if (else_inv) {
3757             tcg_gen_not_i64(t_false, t_false);
3758         } else if (else_inc) {
3759             tcg_gen_addi_i64(t_false, t_false, 1);
3760         }
3761         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3762     }
3763
3764     tcg_temp_free_i64(zero);
3765     a64_free_cc(&c);
3766
3767     if (!sf) {
3768         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3769     }
3770 }
3771
3772 static void handle_clz(DisasContext *s, unsigned int sf,
3773                        unsigned int rn, unsigned int rd)
3774 {
3775     TCGv_i64 tcg_rd, tcg_rn;
3776     tcg_rd = cpu_reg(s, rd);
3777     tcg_rn = cpu_reg(s, rn);
3778
3779     if (sf) {
3780         gen_helper_clz64(tcg_rd, tcg_rn);
3781     } else {
3782         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3783         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3784         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3785         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3786         tcg_temp_free_i32(tcg_tmp32);
3787     }
3788 }
3789
3790 static void handle_cls(DisasContext *s, unsigned int sf,
3791                        unsigned int rn, unsigned int rd)
3792 {
3793     TCGv_i64 tcg_rd, tcg_rn;
3794     tcg_rd = cpu_reg(s, rd);
3795     tcg_rn = cpu_reg(s, rn);
3796
3797     if (sf) {
3798         gen_helper_cls64(tcg_rd, tcg_rn);
3799     } else {
3800         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3801         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3802         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3803         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3804         tcg_temp_free_i32(tcg_tmp32);
3805     }
3806 }
3807
3808 static void handle_rbit(DisasContext *s, unsigned int sf,
3809                         unsigned int rn, unsigned int rd)
3810 {
3811     TCGv_i64 tcg_rd, tcg_rn;
3812     tcg_rd = cpu_reg(s, rd);
3813     tcg_rn = cpu_reg(s, rn);
3814
3815     if (sf) {
3816         gen_helper_rbit64(tcg_rd, tcg_rn);
3817     } else {
3818         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3819         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3820         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3821         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3822         tcg_temp_free_i32(tcg_tmp32);
3823     }
3824 }
3825
3826 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3827 static void handle_rev64(DisasContext *s, unsigned int sf,
3828                          unsigned int rn, unsigned int rd)
3829 {
3830     if (!sf) {
3831         unallocated_encoding(s);
3832         return;
3833     }
3834     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3835 }
3836
3837 /* C5.6.149 REV with sf==0, opcode==2
3838  * C5.6.151 REV32 (sf==1, opcode==2)
3839  */
3840 static void handle_rev32(DisasContext *s, unsigned int sf,
3841                          unsigned int rn, unsigned int rd)
3842 {
3843     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3844
3845     if (sf) {
3846         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3847         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3848
3849         /* bswap32_i64 requires zero high word */
3850         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3851         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3852         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3853         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3854         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3855
3856         tcg_temp_free_i64(tcg_tmp);
3857     } else {
3858         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3859         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3860     }
3861 }
3862
3863 /* C5.6.150 REV16 (opcode==1) */
3864 static void handle_rev16(DisasContext *s, unsigned int sf,
3865                          unsigned int rn, unsigned int rd)
3866 {
3867     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3868     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3869     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3870
3871     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3872     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3873
3874     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3875     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3876     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3877     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3878
3879     if (sf) {
3880         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3881         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3882         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3883         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3884
3885         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3886         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3887         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3888     }
3889
3890     tcg_temp_free_i64(tcg_tmp);
3891 }
3892
3893 /* C3.5.7 Data-processing (1 source)
3894  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3895  * +----+---+---+-----------------+---------+--------+------+------+
3896  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3897  * +----+---+---+-----------------+---------+--------+------+------+
3898  */
3899 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3900 {
3901     unsigned int sf, opcode, rn, rd;
3902
3903     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3904         unallocated_encoding(s);
3905         return;
3906     }
3907
3908     sf = extract32(insn, 31, 1);
3909     opcode = extract32(insn, 10, 6);
3910     rn = extract32(insn, 5, 5);
3911     rd = extract32(insn, 0, 5);
3912
3913     switch (opcode) {
3914     case 0: /* RBIT */
3915         handle_rbit(s, sf, rn, rd);
3916         break;
3917     case 1: /* REV16 */
3918         handle_rev16(s, sf, rn, rd);
3919         break;
3920     case 2: /* REV32 */
3921         handle_rev32(s, sf, rn, rd);
3922         break;
3923     case 3: /* REV64 */
3924         handle_rev64(s, sf, rn, rd);
3925         break;
3926     case 4: /* CLZ */
3927         handle_clz(s, sf, rn, rd);
3928         break;
3929     case 5: /* CLS */
3930         handle_cls(s, sf, rn, rd);
3931         break;
3932     }
3933 }
3934
3935 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3936                        unsigned int rm, unsigned int rn, unsigned int rd)
3937 {
3938     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3939     tcg_rd = cpu_reg(s, rd);
3940
3941     if (!sf && is_signed) {
3942         tcg_n = new_tmp_a64(s);
3943         tcg_m = new_tmp_a64(s);
3944         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3945         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3946     } else {
3947         tcg_n = read_cpu_reg(s, rn, sf);
3948         tcg_m = read_cpu_reg(s, rm, sf);
3949     }
3950
3951     if (is_signed) {
3952         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3953     } else {
3954         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3955     }
3956
3957     if (!sf) { /* zero extend final result */
3958         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3959     }
3960 }
3961
3962 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3963 static void handle_shift_reg(DisasContext *s,
3964                              enum a64_shift_type shift_type, unsigned int sf,
3965                              unsigned int rm, unsigned int rn, unsigned int rd)
3966 {
3967     TCGv_i64 tcg_shift = tcg_temp_new_i64();
3968     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3969     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3970
3971     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3972     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3973     tcg_temp_free_i64(tcg_shift);
3974 }
3975
3976 /* CRC32[BHWX], CRC32C[BHWX] */
3977 static void handle_crc32(DisasContext *s,
3978                          unsigned int sf, unsigned int sz, bool crc32c,
3979                          unsigned int rm, unsigned int rn, unsigned int rd)
3980 {
3981     TCGv_i64 tcg_acc, tcg_val;
3982     TCGv_i32 tcg_bytes;
3983
3984     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3985         || (sf == 1 && sz != 3)
3986         || (sf == 0 && sz == 3)) {
3987         unallocated_encoding(s);
3988         return;
3989     }
3990
3991     if (sz == 3) {
3992         tcg_val = cpu_reg(s, rm);
3993     } else {
3994         uint64_t mask;
3995         switch (sz) {
3996         case 0:
3997             mask = 0xFF;
3998             break;
3999         case 1:
4000             mask = 0xFFFF;
4001             break;
4002         case 2:
4003             mask = 0xFFFFFFFF;
4004             break;
4005         default:
4006             g_assert_not_reached();
4007         }
4008         tcg_val = new_tmp_a64(s);
4009         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4010     }
4011
4012     tcg_acc = cpu_reg(s, rn);
4013     tcg_bytes = tcg_const_i32(1 << sz);
4014
4015     if (crc32c) {
4016         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4017     } else {
4018         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4019     }
4020
4021     tcg_temp_free_i32(tcg_bytes);
4022 }
4023
4024 /* C3.5.8 Data-processing (2 source)
4025  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4026  * +----+---+---+-----------------+------+--------+------+------+
4027  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4028  * +----+---+---+-----------------+------+--------+------+------+
4029  */
4030 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4031 {
4032     unsigned int sf, rm, opcode, rn, rd;
4033     sf = extract32(insn, 31, 1);
4034     rm = extract32(insn, 16, 5);
4035     opcode = extract32(insn, 10, 6);
4036     rn = extract32(insn, 5, 5);
4037     rd = extract32(insn, 0, 5);
4038
4039     if (extract32(insn, 29, 1)) {
4040         unallocated_encoding(s);
4041         return;
4042     }
4043
4044     switch (opcode) {
4045     case 2: /* UDIV */
4046         handle_div(s, false, sf, rm, rn, rd);
4047         break;
4048     case 3: /* SDIV */
4049         handle_div(s, true, sf, rm, rn, rd);
4050         break;
4051     case 8: /* LSLV */
4052         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4053         break;
4054     case 9: /* LSRV */
4055         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4056         break;
4057     case 10: /* ASRV */
4058         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4059         break;
4060     case 11: /* RORV */
4061         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4062         break;
4063     case 16:
4064     case 17:
4065     case 18:
4066     case 19:
4067     case 20:
4068     case 21:
4069     case 22:
4070     case 23: /* CRC32 */
4071     {
4072         int sz = extract32(opcode, 0, 2);
4073         bool crc32c = extract32(opcode, 2, 1);
4074         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4075         break;
4076     }
4077     default:
4078         unallocated_encoding(s);
4079         break;
4080     }
4081 }
4082
4083 /* C3.5 Data processing - register */
4084 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4085 {
4086     switch (extract32(insn, 24, 5)) {
4087     case 0x0a: /* Logical (shifted register) */
4088         disas_logic_reg(s, insn);
4089         break;
4090     case 0x0b: /* Add/subtract */
4091         if (insn & (1 << 21)) { /* (extended register) */
4092             disas_add_sub_ext_reg(s, insn);
4093         } else {
4094             disas_add_sub_reg(s, insn);
4095         }
4096         break;
4097     case 0x1b: /* Data-processing (3 source) */
4098         disas_data_proc_3src(s, insn);
4099         break;
4100     case 0x1a:
4101         switch (extract32(insn, 21, 3)) {
4102         case 0x0: /* Add/subtract (with carry) */
4103             disas_adc_sbc(s, insn);
4104             break;
4105         case 0x2: /* Conditional compare */
4106             disas_cc(s, insn); /* both imm and reg forms */
4107             break;
4108         case 0x4: /* Conditional select */
4109             disas_cond_select(s, insn);
4110             break;
4111         case 0x6: /* Data-processing */
4112             if (insn & (1 << 30)) { /* (1 source) */
4113                 disas_data_proc_1src(s, insn);
4114             } else {            /* (2 source) */
4115                 disas_data_proc_2src(s, insn);
4116             }
4117             break;
4118         default:
4119             unallocated_encoding(s);
4120             break;
4121         }
4122         break;
4123     default:
4124         unallocated_encoding(s);
4125         break;
4126     }
4127 }
4128
4129 static void handle_fp_compare(DisasContext *s, bool is_double,
4130                               unsigned int rn, unsigned int rm,
4131                               bool cmp_with_zero, bool signal_all_nans)
4132 {
4133     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4134     TCGv_ptr fpst = get_fpstatus_ptr();
4135
4136     if (is_double) {
4137         TCGv_i64 tcg_vn, tcg_vm;
4138
4139         tcg_vn = read_fp_dreg(s, rn);
4140         if (cmp_with_zero) {
4141             tcg_vm = tcg_const_i64(0);
4142         } else {
4143             tcg_vm = read_fp_dreg(s, rm);
4144         }
4145         if (signal_all_nans) {
4146             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4147         } else {
4148             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4149         }
4150         tcg_temp_free_i64(tcg_vn);
4151         tcg_temp_free_i64(tcg_vm);
4152     } else {
4153         TCGv_i32 tcg_vn, tcg_vm;
4154
4155         tcg_vn = read_fp_sreg(s, rn);
4156         if (cmp_with_zero) {
4157             tcg_vm = tcg_const_i32(0);
4158         } else {
4159             tcg_vm = read_fp_sreg(s, rm);
4160         }
4161         if (signal_all_nans) {
4162             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4163         } else {
4164             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4165         }
4166         tcg_temp_free_i32(tcg_vn);
4167         tcg_temp_free_i32(tcg_vm);
4168     }
4169
4170     tcg_temp_free_ptr(fpst);
4171
4172     gen_set_nzcv(tcg_flags);
4173
4174     tcg_temp_free_i64(tcg_flags);
4175 }
4176
4177 /* C3.6.22 Floating point compare
4178  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4179  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4180  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4181  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4182  */
4183 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4184 {
4185     unsigned int mos, type, rm, op, rn, opc, op2r;
4186
4187     mos = extract32(insn, 29, 3);
4188     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4189     rm = extract32(insn, 16, 5);
4190     op = extract32(insn, 14, 2);
4191     rn = extract32(insn, 5, 5);
4192     opc = extract32(insn, 3, 2);
4193     op2r = extract32(insn, 0, 3);
4194
4195     if (mos || op || op2r || type > 1) {
4196         unallocated_encoding(s);
4197         return;
4198     }
4199
4200     if (!fp_access_check(s)) {
4201         return;
4202     }
4203
4204     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4205 }
4206
4207 /* C3.6.23 Floating point conditional compare
4208  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4209  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4210  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4211  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4212  */
4213 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4214 {
4215     unsigned int mos, type, rm, cond, rn, op, nzcv;
4216     TCGv_i64 tcg_flags;
4217     TCGLabel *label_continue = NULL;
4218
4219     mos = extract32(insn, 29, 3);
4220     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4221     rm = extract32(insn, 16, 5);
4222     cond = extract32(insn, 12, 4);
4223     rn = extract32(insn, 5, 5);
4224     op = extract32(insn, 4, 1);
4225     nzcv = extract32(insn, 0, 4);
4226
4227     if (mos || type > 1) {
4228         unallocated_encoding(s);
4229         return;
4230     }
4231
4232     if (!fp_access_check(s)) {
4233         return;
4234     }
4235
4236     if (cond < 0x0e) { /* not always */
4237         TCGLabel *label_match = gen_new_label();
4238         label_continue = gen_new_label();
4239         arm_gen_test_cc(cond, label_match);
4240         /* nomatch: */
4241         tcg_flags = tcg_const_i64(nzcv << 28);
4242         gen_set_nzcv(tcg_flags);
4243         tcg_temp_free_i64(tcg_flags);
4244         tcg_gen_br(label_continue);
4245         gen_set_label(label_match);
4246     }
4247
4248     handle_fp_compare(s, type, rn, rm, false, op);
4249
4250     if (cond < 0x0e) {
4251         gen_set_label(label_continue);
4252     }
4253 }
4254
4255 /* C3.6.24 Floating point conditional select
4256  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4257  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4258  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4259  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4260  */
4261 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4262 {
4263     unsigned int mos, type, rm, cond, rn, rd;
4264     TCGv_i64 t_true, t_false, t_zero;
4265     DisasCompare64 c;
4266
4267     mos = extract32(insn, 29, 3);
4268     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4269     rm = extract32(insn, 16, 5);
4270     cond = extract32(insn, 12, 4);
4271     rn = extract32(insn, 5, 5);
4272     rd = extract32(insn, 0, 5);
4273
4274     if (mos || type > 1) {
4275         unallocated_encoding(s);
4276         return;
4277     }
4278
4279     if (!fp_access_check(s)) {
4280         return;
4281     }
4282
4283     /* Zero extend sreg inputs to 64 bits now.  */
4284     t_true = tcg_temp_new_i64();
4285     t_false = tcg_temp_new_i64();
4286     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4287     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4288
4289     a64_test_cc(&c, cond);
4290     t_zero = tcg_const_i64(0);
4291     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4292     tcg_temp_free_i64(t_zero);
4293     tcg_temp_free_i64(t_false);
4294     a64_free_cc(&c);
4295
4296     /* Note that sregs write back zeros to the high bits,
4297        and we've already done the zero-extension.  */
4298     write_fp_dreg(s, rd, t_true);
4299     tcg_temp_free_i64(t_true);
4300 }
4301
4302 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4303 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4304 {
4305     TCGv_ptr fpst;
4306     TCGv_i32 tcg_op;
4307     TCGv_i32 tcg_res;
4308
4309     fpst = get_fpstatus_ptr();
4310     tcg_op = read_fp_sreg(s, rn);
4311     tcg_res = tcg_temp_new_i32();
4312
4313     switch (opcode) {
4314     case 0x0: /* FMOV */
4315         tcg_gen_mov_i32(tcg_res, tcg_op);
4316         break;
4317     case 0x1: /* FABS */
4318         gen_helper_vfp_abss(tcg_res, tcg_op);
4319         break;
4320     case 0x2: /* FNEG */
4321         gen_helper_vfp_negs(tcg_res, tcg_op);
4322         break;
4323     case 0x3: /* FSQRT */
4324         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4325         break;
4326     case 0x8: /* FRINTN */
4327     case 0x9: /* FRINTP */
4328     case 0xa: /* FRINTM */
4329     case 0xb: /* FRINTZ */
4330     case 0xc: /* FRINTA */
4331     {
4332         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4333
4334         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4335         gen_helper_rints(tcg_res, tcg_op, fpst);
4336
4337         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4338         tcg_temp_free_i32(tcg_rmode);
4339         break;
4340     }
4341     case 0xe: /* FRINTX */
4342         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4343         break;
4344     case 0xf: /* FRINTI */
4345         gen_helper_rints(tcg_res, tcg_op, fpst);
4346         break;
4347     default:
4348         abort();
4349     }
4350
4351     write_fp_sreg(s, rd, tcg_res);
4352
4353     tcg_temp_free_ptr(fpst);
4354     tcg_temp_free_i32(tcg_op);
4355     tcg_temp_free_i32(tcg_res);
4356 }
4357
4358 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4359 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4360 {
4361     TCGv_ptr fpst;
4362     TCGv_i64 tcg_op;
4363     TCGv_i64 tcg_res;
4364
4365     fpst = get_fpstatus_ptr();
4366     tcg_op = read_fp_dreg(s, rn);
4367     tcg_res = tcg_temp_new_i64();
4368
4369     switch (opcode) {
4370     case 0x0: /* FMOV */
4371         tcg_gen_mov_i64(tcg_res, tcg_op);
4372         break;
4373     case 0x1: /* FABS */
4374         gen_helper_vfp_absd(tcg_res, tcg_op);
4375         break;
4376     case 0x2: /* FNEG */
4377         gen_helper_vfp_negd(tcg_res, tcg_op);
4378         break;
4379     case 0x3: /* FSQRT */
4380         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4381         break;
4382     case 0x8: /* FRINTN */
4383     case 0x9: /* FRINTP */
4384     case 0xa: /* FRINTM */
4385     case 0xb: /* FRINTZ */
4386     case 0xc: /* FRINTA */
4387     {
4388         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4389
4390         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4391         gen_helper_rintd(tcg_res, tcg_op, fpst);
4392
4393         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4394         tcg_temp_free_i32(tcg_rmode);
4395         break;
4396     }
4397     case 0xe: /* FRINTX */
4398         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4399         break;
4400     case 0xf: /* FRINTI */
4401         gen_helper_rintd(tcg_res, tcg_op, fpst);
4402         break;
4403     default:
4404         abort();
4405     }
4406
4407     write_fp_dreg(s, rd, tcg_res);
4408
4409     tcg_temp_free_ptr(fpst);
4410     tcg_temp_free_i64(tcg_op);
4411     tcg_temp_free_i64(tcg_res);
4412 }
4413
4414 static void handle_fp_fcvt(DisasContext *s, int opcode,
4415                            int rd, int rn, int dtype, int ntype)
4416 {
4417     switch (ntype) {
4418     case 0x0:
4419     {
4420         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4421         if (dtype == 1) {
4422             /* Single to double */
4423             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4424             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4425             write_fp_dreg(s, rd, tcg_rd);
4426             tcg_temp_free_i64(tcg_rd);
4427         } else {
4428             /* Single to half */
4429             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4430             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4431             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4432             write_fp_sreg(s, rd, tcg_rd);
4433             tcg_temp_free_i32(tcg_rd);
4434         }
4435         tcg_temp_free_i32(tcg_rn);
4436         break;
4437     }
4438     case 0x1:
4439     {
4440         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4441         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4442         if (dtype == 0) {
4443             /* Double to single */
4444             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4445         } else {
4446             /* Double to half */
4447             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4448             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4449         }
4450         write_fp_sreg(s, rd, tcg_rd);
4451         tcg_temp_free_i32(tcg_rd);
4452         tcg_temp_free_i64(tcg_rn);
4453         break;
4454     }
4455     case 0x3:
4456     {
4457         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4458         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4459         if (dtype == 0) {
4460             /* Half to single */
4461             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4462             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4463             write_fp_sreg(s, rd, tcg_rd);
4464             tcg_temp_free_i32(tcg_rd);
4465         } else {
4466             /* Half to double */
4467             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4468             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4469             write_fp_dreg(s, rd, tcg_rd);
4470             tcg_temp_free_i64(tcg_rd);
4471         }
4472         tcg_temp_free_i32(tcg_rn);
4473         break;
4474     }
4475     default:
4476         abort();
4477     }
4478 }
4479
4480 /* C3.6.25 Floating point data-processing (1 source)
4481  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4482  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4483  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4484  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4485  */
4486 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4487 {
4488     int type = extract32(insn, 22, 2);
4489     int opcode = extract32(insn, 15, 6);
4490     int rn = extract32(insn, 5, 5);
4491     int rd = extract32(insn, 0, 5);
4492
4493     switch (opcode) {
4494     case 0x4: case 0x5: case 0x7:
4495     {
4496         /* FCVT between half, single and double precision */
4497         int dtype = extract32(opcode, 0, 2);
4498         if (type == 2 || dtype == type) {
4499             unallocated_encoding(s);
4500             return;
4501         }
4502         if (!fp_access_check(s)) {
4503             return;
4504         }
4505
4506         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4507         break;
4508     }
4509     case 0x0 ... 0x3:
4510     case 0x8 ... 0xc:
4511     case 0xe ... 0xf:
4512         /* 32-to-32 and 64-to-64 ops */
4513         switch (type) {
4514         case 0:
4515             if (!fp_access_check(s)) {
4516                 return;
4517             }
4518
4519             handle_fp_1src_single(s, opcode, rd, rn);
4520             break;
4521         case 1:
4522             if (!fp_access_check(s)) {
4523                 return;
4524             }
4525
4526             handle_fp_1src_double(s, opcode, rd, rn);
4527             break;
4528         default:
4529             unallocated_encoding(s);
4530         }
4531         break;
4532     default:
4533         unallocated_encoding(s);
4534         break;
4535     }
4536 }
4537
4538 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4539 static void handle_fp_2src_single(DisasContext *s, int opcode,
4540                                   int rd, int rn, int rm)
4541 {
4542     TCGv_i32 tcg_op1;
4543     TCGv_i32 tcg_op2;
4544     TCGv_i32 tcg_res;
4545     TCGv_ptr fpst;
4546
4547     tcg_res = tcg_temp_new_i32();
4548     fpst = get_fpstatus_ptr();
4549     tcg_op1 = read_fp_sreg(s, rn);
4550     tcg_op2 = read_fp_sreg(s, rm);
4551
4552     switch (opcode) {
4553     case 0x0: /* FMUL */
4554         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4555         break;
4556     case 0x1: /* FDIV */
4557         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4558         break;
4559     case 0x2: /* FADD */
4560         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4561         break;
4562     case 0x3: /* FSUB */
4563         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4564         break;
4565     case 0x4: /* FMAX */
4566         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4567         break;
4568     case 0x5: /* FMIN */
4569         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4570         break;
4571     case 0x6: /* FMAXNM */
4572         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4573         break;
4574     case 0x7: /* FMINNM */
4575         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4576         break;
4577     case 0x8: /* FNMUL */
4578         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4579         gen_helper_vfp_negs(tcg_res, tcg_res);
4580         break;
4581     }
4582
4583     write_fp_sreg(s, rd, tcg_res);
4584
4585     tcg_temp_free_ptr(fpst);
4586     tcg_temp_free_i32(tcg_op1);
4587     tcg_temp_free_i32(tcg_op2);
4588     tcg_temp_free_i32(tcg_res);
4589 }
4590
4591 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4592 static void handle_fp_2src_double(DisasContext *s, int opcode,
4593                                   int rd, int rn, int rm)
4594 {
4595     TCGv_i64 tcg_op1;
4596     TCGv_i64 tcg_op2;
4597     TCGv_i64 tcg_res;
4598     TCGv_ptr fpst;
4599
4600     tcg_res = tcg_temp_new_i64();
4601     fpst = get_fpstatus_ptr();
4602     tcg_op1 = read_fp_dreg(s, rn);
4603     tcg_op2 = read_fp_dreg(s, rm);
4604
4605     switch (opcode) {
4606     case 0x0: /* FMUL */
4607         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4608         break;
4609     case 0x1: /* FDIV */
4610         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4611         break;
4612     case 0x2: /* FADD */
4613         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4614         break;
4615     case 0x3: /* FSUB */
4616         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4617         break;
4618     case 0x4: /* FMAX */
4619         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4620         break;
4621     case 0x5: /* FMIN */
4622         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4623         break;
4624     case 0x6: /* FMAXNM */
4625         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4626         break;
4627     case 0x7: /* FMINNM */
4628         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4629         break;
4630     case 0x8: /* FNMUL */
4631         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4632         gen_helper_vfp_negd(tcg_res, tcg_res);
4633         break;
4634     }
4635
4636     write_fp_dreg(s, rd, tcg_res);
4637
4638     tcg_temp_free_ptr(fpst);
4639     tcg_temp_free_i64(tcg_op1);
4640     tcg_temp_free_i64(tcg_op2);
4641     tcg_temp_free_i64(tcg_res);
4642 }
4643
4644 /* C3.6.26 Floating point data-processing (2 source)
4645  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4646  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4647  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4648  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4649  */
4650 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4651 {
4652     int type = extract32(insn, 22, 2);
4653     int rd = extract32(insn, 0, 5);
4654     int rn = extract32(insn, 5, 5);
4655     int rm = extract32(insn, 16, 5);
4656     int opcode = extract32(insn, 12, 4);
4657
4658     if (opcode > 8) {
4659         unallocated_encoding(s);
4660         return;
4661     }
4662
4663     switch (type) {
4664     case 0:
4665         if (!fp_access_check(s)) {
4666             return;
4667         }
4668         handle_fp_2src_single(s, opcode, rd, rn, rm);
4669         break;
4670     case 1:
4671         if (!fp_access_check(s)) {
4672             return;
4673         }
4674         handle_fp_2src_double(s, opcode, rd, rn, rm);
4675         break;
4676     default:
4677         unallocated_encoding(s);
4678     }
4679 }
4680
4681 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4682 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4683                                   int rd, int rn, int rm, int ra)
4684 {
4685     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4686     TCGv_i32 tcg_res = tcg_temp_new_i32();
4687     TCGv_ptr fpst = get_fpstatus_ptr();
4688
4689     tcg_op1 = read_fp_sreg(s, rn);
4690     tcg_op2 = read_fp_sreg(s, rm);
4691     tcg_op3 = read_fp_sreg(s, ra);
4692
4693     /* These are fused multiply-add, and must be done as one
4694      * floating point operation with no rounding between the
4695      * multiplication and addition steps.
4696      * NB that doing the negations here as separate steps is
4697      * correct : an input NaN should come out with its sign bit
4698      * flipped if it is a negated-input.
4699      */
4700     if (o1 == true) {
4701         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4702     }
4703
4704     if (o0 != o1) {
4705         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4706     }
4707
4708     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4709
4710     write_fp_sreg(s, rd, tcg_res);
4711
4712     tcg_temp_free_ptr(fpst);
4713     tcg_temp_free_i32(tcg_op1);
4714     tcg_temp_free_i32(tcg_op2);
4715     tcg_temp_free_i32(tcg_op3);
4716     tcg_temp_free_i32(tcg_res);
4717 }
4718
4719 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4720 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4721                                   int rd, int rn, int rm, int ra)
4722 {
4723     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4724     TCGv_i64 tcg_res = tcg_temp_new_i64();
4725     TCGv_ptr fpst = get_fpstatus_ptr();
4726
4727     tcg_op1 = read_fp_dreg(s, rn);
4728     tcg_op2 = read_fp_dreg(s, rm);
4729     tcg_op3 = read_fp_dreg(s, ra);
4730
4731     /* These are fused multiply-add, and must be done as one
4732      * floating point operation with no rounding between the
4733      * multiplication and addition steps.
4734      * NB that doing the negations here as separate steps is
4735      * correct : an input NaN should come out with its sign bit
4736      * flipped if it is a negated-input.
4737      */
4738     if (o1 == true) {
4739         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4740     }
4741
4742     if (o0 != o1) {
4743         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4744     }
4745
4746     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4747
4748     write_fp_dreg(s, rd, tcg_res);
4749
4750     tcg_temp_free_ptr(fpst);
4751     tcg_temp_free_i64(tcg_op1);
4752     tcg_temp_free_i64(tcg_op2);
4753     tcg_temp_free_i64(tcg_op3);
4754     tcg_temp_free_i64(tcg_res);
4755 }
4756
4757 /* C3.6.27 Floating point data-processing (3 source)
4758  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4759  * +---+---+---+-----------+------+----+------+----+------+------+------+
4760  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4761  * +---+---+---+-----------+------+----+------+----+------+------+------+
4762  */
4763 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4764 {
4765     int type = extract32(insn, 22, 2);
4766     int rd = extract32(insn, 0, 5);
4767     int rn = extract32(insn, 5, 5);
4768     int ra = extract32(insn, 10, 5);
4769     int rm = extract32(insn, 16, 5);
4770     bool o0 = extract32(insn, 15, 1);
4771     bool o1 = extract32(insn, 21, 1);
4772
4773     switch (type) {
4774     case 0:
4775         if (!fp_access_check(s)) {
4776             return;
4777         }
4778         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4779         break;
4780     case 1:
4781         if (!fp_access_check(s)) {
4782             return;
4783         }
4784         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4785         break;
4786     default:
4787         unallocated_encoding(s);
4788     }
4789 }
4790
4791 /* C3.6.28 Floating point immediate
4792  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4793  * +---+---+---+-----------+------+---+------------+-------+------+------+
4794  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4795  * +---+---+---+-----------+------+---+------------+-------+------+------+
4796  */
4797 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4798 {
4799     int rd = extract32(insn, 0, 5);
4800     int imm8 = extract32(insn, 13, 8);
4801     int is_double = extract32(insn, 22, 2);
4802     uint64_t imm;
4803     TCGv_i64 tcg_res;
4804
4805     if (is_double > 1) {
4806         unallocated_encoding(s);
4807         return;
4808     }
4809
4810     if (!fp_access_check(s)) {
4811         return;
4812     }
4813
4814     /* The imm8 encodes the sign bit, enough bits to represent
4815      * an exponent in the range 01....1xx to 10....0xx,
4816      * and the most significant 4 bits of the mantissa; see
4817      * VFPExpandImm() in the v8 ARM ARM.
4818      */
4819     if (is_double) {
4820         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4821             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4822             extract32(imm8, 0, 6);
4823         imm <<= 48;
4824     } else {
4825         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4826             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4827             (extract32(imm8, 0, 6) << 3);
4828         imm <<= 16;
4829     }
4830
4831     tcg_res = tcg_const_i64(imm);
4832     write_fp_dreg(s, rd, tcg_res);
4833     tcg_temp_free_i64(tcg_res);
4834 }
4835
4836 /* Handle floating point <=> fixed point conversions. Note that we can
4837  * also deal with fp <=> integer conversions as a special case (scale == 64)
4838  * OPTME: consider handling that special case specially or at least skipping
4839  * the call to scalbn in the helpers for zero shifts.
4840  */
4841 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4842                            bool itof, int rmode, int scale, int sf, int type)
4843 {
4844     bool is_signed = !(opcode & 1);
4845     bool is_double = type;
4846     TCGv_ptr tcg_fpstatus;
4847     TCGv_i32 tcg_shift;
4848
4849     tcg_fpstatus = get_fpstatus_ptr();
4850
4851     tcg_shift = tcg_const_i32(64 - scale);
4852
4853     if (itof) {
4854         TCGv_i64 tcg_int = cpu_reg(s, rn);
4855         if (!sf) {
4856             TCGv_i64 tcg_extend = new_tmp_a64(s);
4857
4858             if (is_signed) {
4859                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4860             } else {
4861                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4862             }
4863
4864             tcg_int = tcg_extend;
4865         }
4866
4867         if (is_double) {
4868             TCGv_i64 tcg_double = tcg_temp_new_i64();
4869             if (is_signed) {
4870                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4871                                      tcg_shift, tcg_fpstatus);
4872             } else {
4873                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4874                                      tcg_shift, tcg_fpstatus);
4875             }
4876             write_fp_dreg(s, rd, tcg_double);
4877             tcg_temp_free_i64(tcg_double);
4878         } else {
4879             TCGv_i32 tcg_single = tcg_temp_new_i32();
4880             if (is_signed) {
4881                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4882                                      tcg_shift, tcg_fpstatus);
4883             } else {
4884                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4885                                      tcg_shift, tcg_fpstatus);
4886             }
4887             write_fp_sreg(s, rd, tcg_single);
4888             tcg_temp_free_i32(tcg_single);
4889         }
4890     } else {
4891         TCGv_i64 tcg_int = cpu_reg(s, rd);
4892         TCGv_i32 tcg_rmode;
4893
4894         if (extract32(opcode, 2, 1)) {
4895             /* There are too many rounding modes to all fit into rmode,
4896              * so FCVTA[US] is a special case.
4897              */
4898             rmode = FPROUNDING_TIEAWAY;
4899         }
4900
4901         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4902
4903         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4904
4905         if (is_double) {
4906             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4907             if (is_signed) {
4908                 if (!sf) {
4909                     gen_helper_vfp_tosld(tcg_int, tcg_double,
4910                                          tcg_shift, tcg_fpstatus);
4911                 } else {
4912                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
4913                                          tcg_shift, tcg_fpstatus);
4914                 }
4915             } else {
4916                 if (!sf) {
4917                     gen_helper_vfp_tould(tcg_int, tcg_double,
4918                                          tcg_shift, tcg_fpstatus);
4919                 } else {
4920                     gen_helper_vfp_touqd(tcg_int, tcg_double,
4921                                          tcg_shift, tcg_fpstatus);
4922                 }
4923             }
4924             tcg_temp_free_i64(tcg_double);
4925         } else {
4926             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4927             if (sf) {
4928                 if (is_signed) {
4929                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
4930                                          tcg_shift, tcg_fpstatus);
4931                 } else {
4932                     gen_helper_vfp_touqs(tcg_int, tcg_single,
4933                                          tcg_shift, tcg_fpstatus);
4934                 }
4935             } else {
4936                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4937                 if (is_signed) {
4938                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
4939                                          tcg_shift, tcg_fpstatus);
4940                 } else {
4941                     gen_helper_vfp_touls(tcg_dest, tcg_single,
4942                                          tcg_shift, tcg_fpstatus);
4943                 }
4944                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4945                 tcg_temp_free_i32(tcg_dest);
4946             }
4947             tcg_temp_free_i32(tcg_single);
4948         }
4949
4950         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4951         tcg_temp_free_i32(tcg_rmode);
4952
4953         if (!sf) {
4954             tcg_gen_ext32u_i64(tcg_int, tcg_int);
4955         }
4956     }
4957
4958     tcg_temp_free_ptr(tcg_fpstatus);
4959     tcg_temp_free_i32(tcg_shift);
4960 }
4961
4962 /* C3.6.29 Floating point <-> fixed point conversions
4963  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4964  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4965  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4966  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4967  */
4968 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4969 {
4970     int rd = extract32(insn, 0, 5);
4971     int rn = extract32(insn, 5, 5);
4972     int scale = extract32(insn, 10, 6);
4973     int opcode = extract32(insn, 16, 3);
4974     int rmode = extract32(insn, 19, 2);
4975     int type = extract32(insn, 22, 2);
4976     bool sbit = extract32(insn, 29, 1);
4977     bool sf = extract32(insn, 31, 1);
4978     bool itof;
4979
4980     if (sbit || (type > 1)
4981         || (!sf && scale < 32)) {
4982         unallocated_encoding(s);
4983         return;
4984     }
4985
4986     switch ((rmode << 3) | opcode) {
4987     case 0x2: /* SCVTF */
4988     case 0x3: /* UCVTF */
4989         itof = true;
4990         break;
4991     case 0x18: /* FCVTZS */
4992     case 0x19: /* FCVTZU */
4993         itof = false;
4994         break;
4995     default:
4996         unallocated_encoding(s);
4997         return;
4998     }
4999
5000     if (!fp_access_check(s)) {
5001         return;
5002     }
5003
5004     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5005 }
5006
5007 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5008 {
5009     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5010      * without conversion.
5011      */
5012
5013     if (itof) {
5014         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5015
5016         switch (type) {
5017         case 0:
5018         {
5019             /* 32 bit */
5020             TCGv_i64 tmp = tcg_temp_new_i64();
5021             tcg_gen_ext32u_i64(tmp, tcg_rn);
5022             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5023             tcg_gen_movi_i64(tmp, 0);
5024             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5025             tcg_temp_free_i64(tmp);
5026             break;
5027         }
5028         case 1:
5029         {
5030             /* 64 bit */
5031             TCGv_i64 tmp = tcg_const_i64(0);
5032             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5033             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5034             tcg_temp_free_i64(tmp);
5035             break;
5036         }
5037         case 2:
5038             /* 64 bit to top half. */
5039             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5040             break;
5041         }
5042     } else {
5043         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5044
5045         switch (type) {
5046         case 0:
5047             /* 32 bit */
5048             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5049             break;
5050         case 1:
5051             /* 64 bit */
5052             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5053             break;
5054         case 2:
5055             /* 64 bits from top half */
5056             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5057             break;
5058         }
5059     }
5060 }
5061
5062 /* C3.6.30 Floating point <-> integer conversions
5063  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5064  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5065  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5066  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5067  */
5068 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5069 {
5070     int rd = extract32(insn, 0, 5);
5071     int rn = extract32(insn, 5, 5);
5072     int opcode = extract32(insn, 16, 3);
5073     int rmode = extract32(insn, 19, 2);
5074     int type = extract32(insn, 22, 2);
5075     bool sbit = extract32(insn, 29, 1);
5076     bool sf = extract32(insn, 31, 1);
5077
5078     if (sbit) {
5079         unallocated_encoding(s);
5080         return;
5081     }
5082
5083     if (opcode > 5) {
5084         /* FMOV */
5085         bool itof = opcode & 1;
5086
5087         if (rmode >= 2) {
5088             unallocated_encoding(s);
5089             return;
5090         }
5091
5092         switch (sf << 3 | type << 1 | rmode) {
5093         case 0x0: /* 32 bit */
5094         case 0xa: /* 64 bit */
5095         case 0xd: /* 64 bit to top half of quad */
5096             break;
5097         default:
5098             /* all other sf/type/rmode combinations are invalid */
5099             unallocated_encoding(s);
5100             break;
5101         }
5102
5103         if (!fp_access_check(s)) {
5104             return;
5105         }
5106         handle_fmov(s, rd, rn, type, itof);
5107     } else {
5108         /* actual FP conversions */
5109         bool itof = extract32(opcode, 1, 1);
5110
5111         if (type > 1 || (rmode != 0 && opcode > 1)) {
5112             unallocated_encoding(s);
5113             return;
5114         }
5115
5116         if (!fp_access_check(s)) {
5117             return;
5118         }
5119         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5120     }
5121 }
5122
5123 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5124  *   31  30  29 28     25 24                          0
5125  * +---+---+---+---------+-----------------------------+
5126  * |   | 0 |   | 1 1 1 1 |                             |
5127  * +---+---+---+---------+-----------------------------+
5128  */
5129 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5130 {
5131     if (extract32(insn, 24, 1)) {
5132         /* Floating point data-processing (3 source) */
5133         disas_fp_3src(s, insn);
5134     } else if (extract32(insn, 21, 1) == 0) {
5135         /* Floating point to fixed point conversions */
5136         disas_fp_fixed_conv(s, insn);
5137     } else {
5138         switch (extract32(insn, 10, 2)) {
5139         case 1:
5140             /* Floating point conditional compare */
5141             disas_fp_ccomp(s, insn);
5142             break;
5143         case 2:
5144             /* Floating point data-processing (2 source) */
5145             disas_fp_2src(s, insn);
5146             break;
5147         case 3:
5148             /* Floating point conditional select */
5149             disas_fp_csel(s, insn);
5150             break;
5151         case 0:
5152             switch (ctz32(extract32(insn, 12, 4))) {
5153             case 0: /* [15:12] == xxx1 */
5154                 /* Floating point immediate */
5155                 disas_fp_imm(s, insn);
5156                 break;
5157             case 1: /* [15:12] == xx10 */
5158                 /* Floating point compare */
5159                 disas_fp_compare(s, insn);
5160                 break;
5161             case 2: /* [15:12] == x100 */
5162                 /* Floating point data-processing (1 source) */
5163                 disas_fp_1src(s, insn);
5164                 break;
5165             case 3: /* [15:12] == 1000 */
5166                 unallocated_encoding(s);
5167                 break;
5168             default: /* [15:12] == 0000 */
5169                 /* Floating point <-> integer conversions */
5170                 disas_fp_int_conv(s, insn);
5171                 break;
5172             }
5173             break;
5174         }
5175     }
5176 }
5177
5178 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5179                      int pos)
5180 {
5181     /* Extract 64 bits from the middle of two concatenated 64 bit
5182      * vector register slices left:right. The extracted bits start
5183      * at 'pos' bits into the right (least significant) side.
5184      * We return the result in tcg_right, and guarantee not to
5185      * trash tcg_left.
5186      */
5187     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5188     assert(pos > 0 && pos < 64);
5189
5190     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5191     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5192     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5193
5194     tcg_temp_free_i64(tcg_tmp);
5195 }
5196
5197 /* C3.6.1 EXT
5198  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5199  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5200  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5201  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5202  */
5203 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5204 {
5205     int is_q = extract32(insn, 30, 1);
5206     int op2 = extract32(insn, 22, 2);
5207     int imm4 = extract32(insn, 11, 4);
5208     int rm = extract32(insn, 16, 5);
5209     int rn = extract32(insn, 5, 5);
5210     int rd = extract32(insn, 0, 5);
5211     int pos = imm4 << 3;
5212     TCGv_i64 tcg_resl, tcg_resh;
5213
5214     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5215         unallocated_encoding(s);
5216         return;
5217     }
5218
5219     if (!fp_access_check(s)) {
5220         return;
5221     }
5222
5223     tcg_resh = tcg_temp_new_i64();
5224     tcg_resl = tcg_temp_new_i64();
5225
5226     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5227      * either extracting 128 bits from a 128:128 concatenation, or
5228      * extracting 64 bits from a 64:64 concatenation.
5229      */
5230     if (!is_q) {
5231         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5232         if (pos != 0) {
5233             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5234             do_ext64(s, tcg_resh, tcg_resl, pos);
5235         }
5236         tcg_gen_movi_i64(tcg_resh, 0);
5237     } else {
5238         TCGv_i64 tcg_hh;
5239         typedef struct {
5240             int reg;
5241             int elt;
5242         } EltPosns;
5243         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5244         EltPosns *elt = eltposns;
5245
5246         if (pos >= 64) {
5247             elt++;
5248             pos -= 64;
5249         }
5250
5251         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5252         elt++;
5253         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5254         elt++;
5255         if (pos != 0) {
5256             do_ext64(s, tcg_resh, tcg_resl, pos);
5257             tcg_hh = tcg_temp_new_i64();
5258             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5259             do_ext64(s, tcg_hh, tcg_resh, pos);
5260             tcg_temp_free_i64(tcg_hh);
5261         }
5262     }
5263
5264     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5265     tcg_temp_free_i64(tcg_resl);
5266     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5267     tcg_temp_free_i64(tcg_resh);
5268 }
5269
5270 /* C3.6.2 TBL/TBX
5271  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5272  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5273  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5274  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5275  */
5276 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5277 {
5278     int op2 = extract32(insn, 22, 2);
5279     int is_q = extract32(insn, 30, 1);
5280     int rm = extract32(insn, 16, 5);
5281     int rn = extract32(insn, 5, 5);
5282     int rd = extract32(insn, 0, 5);
5283     int is_tblx = extract32(insn, 12, 1);
5284     int len = extract32(insn, 13, 2);
5285     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5286     TCGv_i32 tcg_regno, tcg_numregs;
5287
5288     if (op2 != 0) {
5289         unallocated_encoding(s);
5290         return;
5291     }
5292
5293     if (!fp_access_check(s)) {
5294         return;
5295     }
5296
5297     /* This does a table lookup: for every byte element in the input
5298      * we index into a table formed from up to four vector registers,
5299      * and then the output is the result of the lookups. Our helper
5300      * function does the lookup operation for a single 64 bit part of
5301      * the input.
5302      */
5303     tcg_resl = tcg_temp_new_i64();
5304     tcg_resh = tcg_temp_new_i64();
5305
5306     if (is_tblx) {
5307         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5308     } else {
5309         tcg_gen_movi_i64(tcg_resl, 0);
5310     }
5311     if (is_tblx && is_q) {
5312         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5313     } else {
5314         tcg_gen_movi_i64(tcg_resh, 0);
5315     }
5316
5317     tcg_idx = tcg_temp_new_i64();
5318     tcg_regno = tcg_const_i32(rn);
5319     tcg_numregs = tcg_const_i32(len + 1);
5320     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5321     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5322                         tcg_regno, tcg_numregs);
5323     if (is_q) {
5324         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5325         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5326                             tcg_regno, tcg_numregs);
5327     }
5328     tcg_temp_free_i64(tcg_idx);
5329     tcg_temp_free_i32(tcg_regno);
5330     tcg_temp_free_i32(tcg_numregs);
5331
5332     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5333     tcg_temp_free_i64(tcg_resl);
5334     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5335     tcg_temp_free_i64(tcg_resh);
5336 }
5337
5338 /* C3.6.3 ZIP/UZP/TRN
5339  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5340  * +---+---+-------------+------+---+------+---+------------------+------+
5341  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5342  * +---+---+-------------+------+---+------+---+------------------+------+
5343  */
5344 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5345 {
5346     int rd = extract32(insn, 0, 5);
5347     int rn = extract32(insn, 5, 5);
5348     int rm = extract32(insn, 16, 5);
5349     int size = extract32(insn, 22, 2);
5350     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5351      * bit 2 indicates 1 vs 2 variant of the insn.
5352      */
5353     int opcode = extract32(insn, 12, 2);
5354     bool part = extract32(insn, 14, 1);
5355     bool is_q = extract32(insn, 30, 1);
5356     int esize = 8 << size;
5357     int i, ofs;
5358     int datasize = is_q ? 128 : 64;
5359     int elements = datasize / esize;
5360     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5361
5362     if (opcode == 0 || (size == 3 && !is_q)) {
5363         unallocated_encoding(s);
5364         return;
5365     }
5366
5367     if (!fp_access_check(s)) {
5368         return;
5369     }
5370
5371     tcg_resl = tcg_const_i64(0);
5372     tcg_resh = tcg_const_i64(0);
5373     tcg_res = tcg_temp_new_i64();
5374
5375     for (i = 0; i < elements; i++) {
5376         switch (opcode) {
5377         case 1: /* UZP1/2 */
5378         {
5379             int midpoint = elements / 2;
5380             if (i < midpoint) {
5381                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5382             } else {
5383                 read_vec_element(s, tcg_res, rm,
5384                                  2 * (i - midpoint) + part, size);
5385             }
5386             break;
5387         }
5388         case 2: /* TRN1/2 */
5389             if (i & 1) {
5390                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5391             } else {
5392                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5393             }
5394             break;
5395         case 3: /* ZIP1/2 */
5396         {
5397             int base = part * elements / 2;
5398             if (i & 1) {
5399                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5400             } else {
5401                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5402             }
5403             break;
5404         }
5405         default:
5406             g_assert_not_reached();
5407         }
5408
5409         ofs = i * esize;
5410         if (ofs < 64) {
5411             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5412             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5413         } else {
5414             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5415             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5416         }
5417     }
5418
5419     tcg_temp_free_i64(tcg_res);
5420
5421     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5422     tcg_temp_free_i64(tcg_resl);
5423     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5424     tcg_temp_free_i64(tcg_resh);
5425 }
5426
5427 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5428                         int opc, bool is_min, TCGv_ptr fpst)
5429 {
5430     /* Helper function for disas_simd_across_lanes: do a single precision
5431      * min/max operation on the specified two inputs,
5432      * and return the result in tcg_elt1.
5433      */
5434     if (opc == 0xc) {
5435         if (is_min) {
5436             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5437         } else {
5438             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5439         }
5440     } else {
5441         assert(opc == 0xf);
5442         if (is_min) {
5443             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5444         } else {
5445             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5446         }
5447     }
5448 }
5449
5450 /* C3.6.4 AdvSIMD across lanes
5451  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5452  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5453  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5454  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5455  */
5456 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5457 {
5458     int rd = extract32(insn, 0, 5);
5459     int rn = extract32(insn, 5, 5);
5460     int size = extract32(insn, 22, 2);
5461     int opcode = extract32(insn, 12, 5);
5462     bool is_q = extract32(insn, 30, 1);
5463     bool is_u = extract32(insn, 29, 1);
5464     bool is_fp = false;
5465     bool is_min = false;
5466     int esize;
5467     int elements;
5468     int i;
5469     TCGv_i64 tcg_res, tcg_elt;
5470
5471     switch (opcode) {
5472     case 0x1b: /* ADDV */
5473         if (is_u) {
5474             unallocated_encoding(s);
5475             return;
5476         }
5477         /* fall through */
5478     case 0x3: /* SADDLV, UADDLV */
5479     case 0xa: /* SMAXV, UMAXV */
5480     case 0x1a: /* SMINV, UMINV */
5481         if (size == 3 || (size == 2 && !is_q)) {
5482             unallocated_encoding(s);
5483             return;
5484         }
5485         break;
5486     case 0xc: /* FMAXNMV, FMINNMV */
5487     case 0xf: /* FMAXV, FMINV */
5488         if (!is_u || !is_q || extract32(size, 0, 1)) {
5489             unallocated_encoding(s);
5490             return;
5491         }
5492         /* Bit 1 of size field encodes min vs max, and actual size is always
5493          * 32 bits: adjust the size variable so following code can rely on it
5494          */
5495         is_min = extract32(size, 1, 1);
5496         is_fp = true;
5497         size = 2;
5498         break;
5499     default:
5500         unallocated_encoding(s);
5501         return;
5502     }
5503
5504     if (!fp_access_check(s)) {
5505         return;
5506     }
5507
5508     esize = 8 << size;
5509     elements = (is_q ? 128 : 64) / esize;
5510
5511     tcg_res = tcg_temp_new_i64();
5512     tcg_elt = tcg_temp_new_i64();
5513
5514     /* These instructions operate across all lanes of a vector
5515      * to produce a single result. We can guarantee that a 64
5516      * bit intermediate is sufficient:
5517      *  + for [US]ADDLV the maximum element size is 32 bits, and
5518      *    the result type is 64 bits
5519      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5520      *    same as the element size, which is 32 bits at most
5521      * For the integer operations we can choose to work at 64
5522      * or 32 bits and truncate at the end; for simplicity
5523      * we use 64 bits always. The floating point
5524      * ops do require 32 bit intermediates, though.
5525      */
5526     if (!is_fp) {
5527         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5528
5529         for (i = 1; i < elements; i++) {
5530             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5531
5532             switch (opcode) {
5533             case 0x03: /* SADDLV / UADDLV */
5534             case 0x1b: /* ADDV */
5535                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5536                 break;
5537             case 0x0a: /* SMAXV / UMAXV */
5538                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5539                                     tcg_res,
5540                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5541                 break;
5542             case 0x1a: /* SMINV / UMINV */
5543                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5544                                     tcg_res,
5545                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5546                 break;
5547                 break;
5548             default:
5549                 g_assert_not_reached();
5550             }
5551
5552         }
5553     } else {
5554         /* Floating point ops which work on 32 bit (single) intermediates.
5555          * Note that correct NaN propagation requires that we do these
5556          * operations in exactly the order specified by the pseudocode.
5557          */
5558         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5559         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5560         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5561         TCGv_ptr fpst = get_fpstatus_ptr();
5562
5563         assert(esize == 32);
5564         assert(elements == 4);
5565
5566         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5567         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5568         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5569         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5570
5571         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5572
5573         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5574         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5575         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5576         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5577
5578         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5579
5580         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5581
5582         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5583         tcg_temp_free_i32(tcg_elt1);
5584         tcg_temp_free_i32(tcg_elt2);
5585         tcg_temp_free_i32(tcg_elt3);
5586         tcg_temp_free_ptr(fpst);
5587     }
5588
5589     tcg_temp_free_i64(tcg_elt);
5590
5591     /* Now truncate the result to the width required for the final output */
5592     if (opcode == 0x03) {
5593         /* SADDLV, UADDLV: result is 2*esize */
5594         size++;
5595     }
5596
5597     switch (size) {
5598     case 0:
5599         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5600         break;
5601     case 1:
5602         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5603         break;
5604     case 2:
5605         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5606         break;
5607     case 3:
5608         break;
5609     default:
5610         g_assert_not_reached();
5611     }
5612
5613     write_fp_dreg(s, rd, tcg_res);
5614     tcg_temp_free_i64(tcg_res);
5615 }
5616
5617 /* C6.3.31 DUP (Element, Vector)
5618  *
5619  *  31  30   29              21 20    16 15        10  9    5 4    0
5620  * +---+---+-------------------+--------+-------------+------+------+
5621  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5622  * +---+---+-------------------+--------+-------------+------+------+
5623  *
5624  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5625  */
5626 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5627                              int imm5)
5628 {
5629     int size = ctz32(imm5);
5630     int esize = 8 << size;
5631     int elements = (is_q ? 128 : 64) / esize;
5632     int index, i;
5633     TCGv_i64 tmp;
5634
5635     if (size > 3 || (size == 3 && !is_q)) {
5636         unallocated_encoding(s);
5637         return;
5638     }
5639
5640     if (!fp_access_check(s)) {
5641         return;
5642     }
5643
5644     index = imm5 >> (size + 1);
5645
5646     tmp = tcg_temp_new_i64();
5647     read_vec_element(s, tmp, rn, index, size);
5648
5649     for (i = 0; i < elements; i++) {
5650         write_vec_element(s, tmp, rd, i, size);
5651     }
5652
5653     if (!is_q) {
5654         clear_vec_high(s, rd);
5655     }
5656
5657     tcg_temp_free_i64(tmp);
5658 }
5659
5660 /* C6.3.31 DUP (element, scalar)
5661  *  31                   21 20    16 15        10  9    5 4    0
5662  * +-----------------------+--------+-------------+------+------+
5663  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5664  * +-----------------------+--------+-------------+------+------+
5665  */
5666 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5667                               int imm5)
5668 {
5669     int size = ctz32(imm5);
5670     int index;
5671     TCGv_i64 tmp;
5672
5673     if (size > 3) {
5674         unallocated_encoding(s);
5675         return;
5676     }
5677
5678     if (!fp_access_check(s)) {
5679         return;
5680     }
5681
5682     index = imm5 >> (size + 1);
5683
5684     /* This instruction just extracts the specified element and
5685      * zero-extends it into the bottom of the destination register.
5686      */
5687     tmp = tcg_temp_new_i64();
5688     read_vec_element(s, tmp, rn, index, size);
5689     write_fp_dreg(s, rd, tmp);
5690     tcg_temp_free_i64(tmp);
5691 }
5692
5693 /* C6.3.32 DUP (General)
5694  *
5695  *  31  30   29              21 20    16 15        10  9    5 4    0
5696  * +---+---+-------------------+--------+-------------+------+------+
5697  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5698  * +---+---+-------------------+--------+-------------+------+------+
5699  *
5700  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5701  */
5702 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5703                              int imm5)
5704 {
5705     int size = ctz32(imm5);
5706     int esize = 8 << size;
5707     int elements = (is_q ? 128 : 64)/esize;
5708     int i = 0;
5709
5710     if (size > 3 || ((size == 3) && !is_q)) {
5711         unallocated_encoding(s);
5712         return;
5713     }
5714
5715     if (!fp_access_check(s)) {
5716         return;
5717     }
5718
5719     for (i = 0; i < elements; i++) {
5720         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5721     }
5722     if (!is_q) {
5723         clear_vec_high(s, rd);
5724     }
5725 }
5726
5727 /* C6.3.150 INS (Element)
5728  *
5729  *  31                   21 20    16 15  14    11  10 9    5 4    0
5730  * +-----------------------+--------+------------+---+------+------+
5731  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5732  * +-----------------------+--------+------------+---+------+------+
5733  *
5734  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5735  * index: encoded in imm5<4:size+1>
5736  */
5737 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5738                              int imm4, int imm5)
5739 {
5740     int size = ctz32(imm5);
5741     int src_index, dst_index;
5742     TCGv_i64 tmp;
5743
5744     if (size > 3) {
5745         unallocated_encoding(s);
5746         return;
5747     }
5748
5749     if (!fp_access_check(s)) {
5750         return;
5751     }
5752
5753     dst_index = extract32(imm5, 1+size, 5);
5754     src_index = extract32(imm4, size, 4);
5755
5756     tmp = tcg_temp_new_i64();
5757
5758     read_vec_element(s, tmp, rn, src_index, size);
5759     write_vec_element(s, tmp, rd, dst_index, size);
5760
5761     tcg_temp_free_i64(tmp);
5762 }
5763
5764
5765 /* C6.3.151 INS (General)
5766  *
5767  *  31                   21 20    16 15        10  9    5 4    0
5768  * +-----------------------+--------+-------------+------+------+
5769  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5770  * +-----------------------+--------+-------------+------+------+
5771  *
5772  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5773  * index: encoded in imm5<4:size+1>
5774  */
5775 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5776 {
5777     int size = ctz32(imm5);
5778     int idx;
5779
5780     if (size > 3) {
5781         unallocated_encoding(s);
5782         return;
5783     }
5784
5785     if (!fp_access_check(s)) {
5786         return;
5787     }
5788
5789     idx = extract32(imm5, 1 + size, 4 - size);
5790     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5791 }
5792
5793 /*
5794  * C6.3.321 UMOV (General)
5795  * C6.3.237 SMOV (General)
5796  *
5797  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5798  * +---+---+-------------------+--------+-------------+------+------+
5799  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5800  * +---+---+-------------------+--------+-------------+------+------+
5801  *
5802  * U: unsigned when set
5803  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5804  */
5805 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5806                                   int rn, int rd, int imm5)
5807 {
5808     int size = ctz32(imm5);
5809     int element;
5810     TCGv_i64 tcg_rd;
5811
5812     /* Check for UnallocatedEncodings */
5813     if (is_signed) {
5814         if (size > 2 || (size == 2 && !is_q)) {
5815             unallocated_encoding(s);
5816             return;
5817         }
5818     } else {
5819         if (size > 3
5820             || (size < 3 && is_q)
5821             || (size == 3 && !is_q)) {
5822             unallocated_encoding(s);
5823             return;
5824         }
5825     }
5826
5827     if (!fp_access_check(s)) {
5828         return;
5829     }
5830
5831     element = extract32(imm5, 1+size, 4);
5832
5833     tcg_rd = cpu_reg(s, rd);
5834     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5835     if (is_signed && !is_q) {
5836         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5837     }
5838 }
5839
5840 /* C3.6.5 AdvSIMD copy
5841  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5842  * +---+---+----+-----------------+------+---+------+---+------+------+
5843  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5844  * +---+---+----+-----------------+------+---+------+---+------+------+
5845  */
5846 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5847 {
5848     int rd = extract32(insn, 0, 5);
5849     int rn = extract32(insn, 5, 5);
5850     int imm4 = extract32(insn, 11, 4);
5851     int op = extract32(insn, 29, 1);
5852     int is_q = extract32(insn, 30, 1);
5853     int imm5 = extract32(insn, 16, 5);
5854
5855     if (op) {
5856         if (is_q) {
5857             /* INS (element) */
5858             handle_simd_inse(s, rd, rn, imm4, imm5);
5859         } else {
5860             unallocated_encoding(s);
5861         }
5862     } else {
5863         switch (imm4) {
5864         case 0:
5865             /* DUP (element - vector) */
5866             handle_simd_dupe(s, is_q, rd, rn, imm5);
5867             break;
5868         case 1:
5869             /* DUP (general) */
5870             handle_simd_dupg(s, is_q, rd, rn, imm5);
5871             break;
5872         case 3:
5873             if (is_q) {
5874                 /* INS (general) */
5875                 handle_simd_insg(s, rd, rn, imm5);
5876             } else {
5877                 unallocated_encoding(s);
5878             }
5879             break;
5880         case 5:
5881         case 7:
5882             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5883             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5884             break;
5885         default:
5886             unallocated_encoding(s);
5887             break;
5888         }
5889     }
5890 }
5891
5892 /* C3.6.6 AdvSIMD modified immediate
5893  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5894  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5895  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5896  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5897  *
5898  * There are a number of operations that can be carried out here:
5899  *   MOVI - move (shifted) imm into register
5900  *   MVNI - move inverted (shifted) imm into register
5901  *   ORR  - bitwise OR of (shifted) imm with register
5902  *   BIC  - bitwise clear of (shifted) imm with register
5903  */
5904 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5905 {
5906     int rd = extract32(insn, 0, 5);
5907     int cmode = extract32(insn, 12, 4);
5908     int cmode_3_1 = extract32(cmode, 1, 3);
5909     int cmode_0 = extract32(cmode, 0, 1);
5910     int o2 = extract32(insn, 11, 1);
5911     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5912     bool is_neg = extract32(insn, 29, 1);
5913     bool is_q = extract32(insn, 30, 1);
5914     uint64_t imm = 0;
5915     TCGv_i64 tcg_rd, tcg_imm;
5916     int i;
5917
5918     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5919         unallocated_encoding(s);
5920         return;
5921     }
5922
5923     if (!fp_access_check(s)) {
5924         return;
5925     }
5926
5927     /* See AdvSIMDExpandImm() in ARM ARM */
5928     switch (cmode_3_1) {
5929     case 0: /* Replicate(Zeros(24):imm8, 2) */
5930     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5931     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5932     case 3: /* Replicate(imm8:Zeros(24), 2) */
5933     {
5934         int shift = cmode_3_1 * 8;
5935         imm = bitfield_replicate(abcdefgh << shift, 32);
5936         break;
5937     }
5938     case 4: /* Replicate(Zeros(8):imm8, 4) */
5939     case 5: /* Replicate(imm8:Zeros(8), 4) */
5940     {
5941         int shift = (cmode_3_1 & 0x1) * 8;
5942         imm = bitfield_replicate(abcdefgh << shift, 16);
5943         break;
5944     }
5945     case 6:
5946         if (cmode_0) {
5947             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5948             imm = (abcdefgh << 16) | 0xffff;
5949         } else {
5950             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5951             imm = (abcdefgh << 8) | 0xff;
5952         }
5953         imm = bitfield_replicate(imm, 32);
5954         break;
5955     case 7:
5956         if (!cmode_0 && !is_neg) {
5957             imm = bitfield_replicate(abcdefgh, 8);
5958         } else if (!cmode_0 && is_neg) {
5959             int i;
5960             imm = 0;
5961             for (i = 0; i < 8; i++) {
5962                 if ((abcdefgh) & (1 << i)) {
5963                     imm |= 0xffULL << (i * 8);
5964                 }
5965             }
5966         } else if (cmode_0) {
5967             if (is_neg) {
5968                 imm = (abcdefgh & 0x3f) << 48;
5969                 if (abcdefgh & 0x80) {
5970                     imm |= 0x8000000000000000ULL;
5971                 }
5972                 if (abcdefgh & 0x40) {
5973                     imm |= 0x3fc0000000000000ULL;
5974                 } else {
5975                     imm |= 0x4000000000000000ULL;
5976                 }
5977             } else {
5978                 imm = (abcdefgh & 0x3f) << 19;
5979                 if (abcdefgh & 0x80) {
5980                     imm |= 0x80000000;
5981                 }
5982                 if (abcdefgh & 0x40) {
5983                     imm |= 0x3e000000;
5984                 } else {
5985                     imm |= 0x40000000;
5986                 }
5987                 imm |= (imm << 32);
5988             }
5989         }
5990         break;
5991     }
5992
5993     if (cmode_3_1 != 7 && is_neg) {
5994         imm = ~imm;
5995     }
5996
5997     tcg_imm = tcg_const_i64(imm);
5998     tcg_rd = new_tmp_a64(s);
5999
6000     for (i = 0; i < 2; i++) {
6001         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6002
6003         if (i == 1 && !is_q) {
6004             /* non-quad ops clear high half of vector */
6005             tcg_gen_movi_i64(tcg_rd, 0);
6006         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6007             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6008             if (is_neg) {
6009                 /* AND (BIC) */
6010                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6011             } else {
6012                 /* ORR */
6013                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6014             }
6015         } else {
6016             /* MOVI */
6017             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6018         }
6019         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6020     }
6021
6022     tcg_temp_free_i64(tcg_imm);
6023 }
6024
6025 /* C3.6.7 AdvSIMD scalar copy
6026  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6027  * +-----+----+-----------------+------+---+------+---+------+------+
6028  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6029  * +-----+----+-----------------+------+---+------+---+------+------+
6030  */
6031 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6032 {
6033     int rd = extract32(insn, 0, 5);
6034     int rn = extract32(insn, 5, 5);
6035     int imm4 = extract32(insn, 11, 4);
6036     int imm5 = extract32(insn, 16, 5);
6037     int op = extract32(insn, 29, 1);
6038
6039     if (op != 0 || imm4 != 0) {
6040         unallocated_encoding(s);
6041         return;
6042     }
6043
6044     /* DUP (element, scalar) */
6045     handle_simd_dupes(s, rd, rn, imm5);
6046 }
6047
6048 /* C3.6.8 AdvSIMD scalar pairwise
6049  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6050  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6051  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6052  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6053  */
6054 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6055 {
6056     int u = extract32(insn, 29, 1);
6057     int size = extract32(insn, 22, 2);
6058     int opcode = extract32(insn, 12, 5);
6059     int rn = extract32(insn, 5, 5);
6060     int rd = extract32(insn, 0, 5);
6061     TCGv_ptr fpst;
6062
6063     /* For some ops (the FP ones), size[1] is part of the encoding.
6064      * For ADDP strictly it is not but size[1] is always 1 for valid
6065      * encodings.
6066      */
6067     opcode |= (extract32(size, 1, 1) << 5);
6068
6069     switch (opcode) {
6070     case 0x3b: /* ADDP */
6071         if (u || size != 3) {
6072             unallocated_encoding(s);
6073             return;
6074         }
6075         if (!fp_access_check(s)) {
6076             return;
6077         }
6078
6079         TCGV_UNUSED_PTR(fpst);
6080         break;
6081     case 0xc: /* FMAXNMP */
6082     case 0xd: /* FADDP */
6083     case 0xf: /* FMAXP */
6084     case 0x2c: /* FMINNMP */
6085     case 0x2f: /* FMINP */
6086         /* FP op, size[0] is 32 or 64 bit */
6087         if (!u) {
6088             unallocated_encoding(s);
6089             return;
6090         }
6091         if (!fp_access_check(s)) {
6092             return;
6093         }
6094
6095         size = extract32(size, 0, 1) ? 3 : 2;
6096         fpst = get_fpstatus_ptr();
6097         break;
6098     default:
6099         unallocated_encoding(s);
6100         return;
6101     }
6102
6103     if (size == 3) {
6104         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6105         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6106         TCGv_i64 tcg_res = tcg_temp_new_i64();
6107
6108         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6109         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6110
6111         switch (opcode) {
6112         case 0x3b: /* ADDP */
6113             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6114             break;
6115         case 0xc: /* FMAXNMP */
6116             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6117             break;
6118         case 0xd: /* FADDP */
6119             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6120             break;
6121         case 0xf: /* FMAXP */
6122             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6123             break;
6124         case 0x2c: /* FMINNMP */
6125             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6126             break;
6127         case 0x2f: /* FMINP */
6128             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6129             break;
6130         default:
6131             g_assert_not_reached();
6132         }
6133
6134         write_fp_dreg(s, rd, tcg_res);
6135
6136         tcg_temp_free_i64(tcg_op1);
6137         tcg_temp_free_i64(tcg_op2);
6138         tcg_temp_free_i64(tcg_res);
6139     } else {
6140         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6141         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6142         TCGv_i32 tcg_res = tcg_temp_new_i32();
6143
6144         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6145         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6146
6147         switch (opcode) {
6148         case 0xc: /* FMAXNMP */
6149             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6150             break;
6151         case 0xd: /* FADDP */
6152             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6153             break;
6154         case 0xf: /* FMAXP */
6155             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6156             break;
6157         case 0x2c: /* FMINNMP */
6158             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6159             break;
6160         case 0x2f: /* FMINP */
6161             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6162             break;
6163         default:
6164             g_assert_not_reached();
6165         }
6166
6167         write_fp_sreg(s, rd, tcg_res);
6168
6169         tcg_temp_free_i32(tcg_op1);
6170         tcg_temp_free_i32(tcg_op2);
6171         tcg_temp_free_i32(tcg_res);
6172     }
6173
6174     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6175         tcg_temp_free_ptr(fpst);
6176     }
6177 }
6178
6179 /*
6180  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6181  *
6182  * This code is handles the common shifting code and is used by both
6183  * the vector and scalar code.
6184  */
6185 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6186                                     TCGv_i64 tcg_rnd, bool accumulate,
6187                                     bool is_u, int size, int shift)
6188 {
6189     bool extended_result = false;
6190     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6191     int ext_lshift = 0;
6192     TCGv_i64 tcg_src_hi;
6193
6194     if (round && size == 3) {
6195         extended_result = true;
6196         ext_lshift = 64 - shift;
6197         tcg_src_hi = tcg_temp_new_i64();
6198     } else if (shift == 64) {
6199         if (!accumulate && is_u) {
6200             /* result is zero */
6201             tcg_gen_movi_i64(tcg_res, 0);
6202             return;
6203         }
6204     }
6205
6206     /* Deal with the rounding step */
6207     if (round) {
6208         if (extended_result) {
6209             TCGv_i64 tcg_zero = tcg_const_i64(0);
6210             if (!is_u) {
6211                 /* take care of sign extending tcg_res */
6212                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6213                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6214                                  tcg_src, tcg_src_hi,
6215                                  tcg_rnd, tcg_zero);
6216             } else {
6217                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6218                                  tcg_src, tcg_zero,
6219                                  tcg_rnd, tcg_zero);
6220             }
6221             tcg_temp_free_i64(tcg_zero);
6222         } else {
6223             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6224         }
6225     }
6226
6227     /* Now do the shift right */
6228     if (round && extended_result) {
6229         /* extended case, >64 bit precision required */
6230         if (ext_lshift == 0) {
6231             /* special case, only high bits matter */
6232             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6233         } else {
6234             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6235             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6236             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6237         }
6238     } else {
6239         if (is_u) {
6240             if (shift == 64) {
6241                 /* essentially shifting in 64 zeros */
6242                 tcg_gen_movi_i64(tcg_src, 0);
6243             } else {
6244                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6245             }
6246         } else {
6247             if (shift == 64) {
6248                 /* effectively extending the sign-bit */
6249                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6250             } else {
6251                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6252             }
6253         }
6254     }
6255
6256     if (accumulate) {
6257         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6258     } else {
6259         tcg_gen_mov_i64(tcg_res, tcg_src);
6260     }
6261
6262     if (extended_result) {
6263         tcg_temp_free_i64(tcg_src_hi);
6264     }
6265 }
6266
6267 /* Common SHL/SLI - Shift left with an optional insert */
6268 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6269                                  bool insert, int shift)
6270 {
6271     if (insert) { /* SLI */
6272         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6273     } else { /* SHL */
6274         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6275     }
6276 }
6277
6278 /* SRI: shift right with insert */
6279 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6280                                  int size, int shift)
6281 {
6282     int esize = 8 << size;
6283
6284     /* shift count same as element size is valid but does nothing;
6285      * special case to avoid potential shift by 64.
6286      */
6287     if (shift != esize) {
6288         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6289         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6290     }
6291 }
6292
6293 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6294 static void handle_scalar_simd_shri(DisasContext *s,
6295                                     bool is_u, int immh, int immb,
6296                                     int opcode, int rn, int rd)
6297 {
6298     const int size = 3;
6299     int immhb = immh << 3 | immb;
6300     int shift = 2 * (8 << size) - immhb;
6301     bool accumulate = false;
6302     bool round = false;
6303     bool insert = false;
6304     TCGv_i64 tcg_rn;
6305     TCGv_i64 tcg_rd;
6306     TCGv_i64 tcg_round;
6307
6308     if (!extract32(immh, 3, 1)) {
6309         unallocated_encoding(s);
6310         return;
6311     }
6312
6313     if (!fp_access_check(s)) {
6314         return;
6315     }
6316
6317     switch (opcode) {
6318     case 0x02: /* SSRA / USRA (accumulate) */
6319         accumulate = true;
6320         break;
6321     case 0x04: /* SRSHR / URSHR (rounding) */
6322         round = true;
6323         break;
6324     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6325         accumulate = round = true;
6326         break;
6327     case 0x08: /* SRI */
6328         insert = true;
6329         break;
6330     }
6331
6332     if (round) {
6333         uint64_t round_const = 1ULL << (shift - 1);
6334         tcg_round = tcg_const_i64(round_const);
6335     } else {
6336         TCGV_UNUSED_I64(tcg_round);
6337     }
6338
6339     tcg_rn = read_fp_dreg(s, rn);
6340     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6341
6342     if (insert) {
6343         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6344     } else {
6345         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6346                                 accumulate, is_u, size, shift);
6347     }
6348
6349     write_fp_dreg(s, rd, tcg_rd);
6350
6351     tcg_temp_free_i64(tcg_rn);
6352     tcg_temp_free_i64(tcg_rd);
6353     if (round) {
6354         tcg_temp_free_i64(tcg_round);
6355     }
6356 }
6357
6358 /* SHL/SLI - Scalar shift left */
6359 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6360                                     int immh, int immb, int opcode,
6361                                     int rn, int rd)
6362 {
6363     int size = 32 - clz32(immh) - 1;
6364     int immhb = immh << 3 | immb;
6365     int shift = immhb - (8 << size);
6366     TCGv_i64 tcg_rn = new_tmp_a64(s);
6367     TCGv_i64 tcg_rd = new_tmp_a64(s);
6368
6369     if (!extract32(immh, 3, 1)) {
6370         unallocated_encoding(s);
6371         return;
6372     }
6373
6374     if (!fp_access_check(s)) {
6375         return;
6376     }
6377
6378     tcg_rn = read_fp_dreg(s, rn);
6379     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6380
6381     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6382
6383     write_fp_dreg(s, rd, tcg_rd);
6384
6385     tcg_temp_free_i64(tcg_rn);
6386     tcg_temp_free_i64(tcg_rd);
6387 }
6388
6389 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6390  * (signed/unsigned) narrowing */
6391 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6392                                    bool is_u_shift, bool is_u_narrow,
6393                                    int immh, int immb, int opcode,
6394                                    int rn, int rd)
6395 {
6396     int immhb = immh << 3 | immb;
6397     int size = 32 - clz32(immh) - 1;
6398     int esize = 8 << size;
6399     int shift = (2 * esize) - immhb;
6400     int elements = is_scalar ? 1 : (64 / esize);
6401     bool round = extract32(opcode, 0, 1);
6402     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6403     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6404     TCGv_i32 tcg_rd_narrowed;
6405     TCGv_i64 tcg_final;
6406
6407     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6408         { gen_helper_neon_narrow_sat_s8,
6409           gen_helper_neon_unarrow_sat8 },
6410         { gen_helper_neon_narrow_sat_s16,
6411           gen_helper_neon_unarrow_sat16 },
6412         { gen_helper_neon_narrow_sat_s32,
6413           gen_helper_neon_unarrow_sat32 },
6414         { NULL, NULL },
6415     };
6416     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6417         gen_helper_neon_narrow_sat_u8,
6418         gen_helper_neon_narrow_sat_u16,
6419         gen_helper_neon_narrow_sat_u32,
6420         NULL
6421     };
6422     NeonGenNarrowEnvFn *narrowfn;
6423
6424     int i;
6425
6426     assert(size < 4);
6427
6428     if (extract32(immh, 3, 1)) {
6429         unallocated_encoding(s);
6430         return;
6431     }
6432
6433     if (!fp_access_check(s)) {
6434         return;
6435     }
6436
6437     if (is_u_shift) {
6438         narrowfn = unsigned_narrow_fns[size];
6439     } else {
6440         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6441     }
6442
6443     tcg_rn = tcg_temp_new_i64();
6444     tcg_rd = tcg_temp_new_i64();
6445     tcg_rd_narrowed = tcg_temp_new_i32();
6446     tcg_final = tcg_const_i64(0);
6447
6448     if (round) {
6449         uint64_t round_const = 1ULL << (shift - 1);
6450         tcg_round = tcg_const_i64(round_const);
6451     } else {
6452         TCGV_UNUSED_I64(tcg_round);
6453     }
6454
6455     for (i = 0; i < elements; i++) {
6456         read_vec_element(s, tcg_rn, rn, i, ldop);
6457         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6458                                 false, is_u_shift, size+1, shift);
6459         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6460         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6461         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6462     }
6463
6464     if (!is_q) {
6465         clear_vec_high(s, rd);
6466         write_vec_element(s, tcg_final, rd, 0, MO_64);
6467     } else {
6468         write_vec_element(s, tcg_final, rd, 1, MO_64);
6469     }
6470
6471     if (round) {
6472         tcg_temp_free_i64(tcg_round);
6473     }
6474     tcg_temp_free_i64(tcg_rn);
6475     tcg_temp_free_i64(tcg_rd);
6476     tcg_temp_free_i32(tcg_rd_narrowed);
6477     tcg_temp_free_i64(tcg_final);
6478     return;
6479 }
6480
6481 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6482 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6483                              bool src_unsigned, bool dst_unsigned,
6484                              int immh, int immb, int rn, int rd)
6485 {
6486     int immhb = immh << 3 | immb;
6487     int size = 32 - clz32(immh) - 1;
6488     int shift = immhb - (8 << size);
6489     int pass;
6490
6491     assert(immh != 0);
6492     assert(!(scalar && is_q));
6493
6494     if (!scalar) {
6495         if (!is_q && extract32(immh, 3, 1)) {
6496             unallocated_encoding(s);
6497             return;
6498         }
6499
6500         /* Since we use the variable-shift helpers we must
6501          * replicate the shift count into each element of
6502          * the tcg_shift value.
6503          */
6504         switch (size) {
6505         case 0:
6506             shift |= shift << 8;
6507             /* fall through */
6508         case 1:
6509             shift |= shift << 16;
6510             break;
6511         case 2:
6512         case 3:
6513             break;
6514         default:
6515             g_assert_not_reached();
6516         }
6517     }
6518
6519     if (!fp_access_check(s)) {
6520         return;
6521     }
6522
6523     if (size == 3) {
6524         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6525         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6526             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6527             { NULL, gen_helper_neon_qshl_u64 },
6528         };
6529         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6530         int maxpass = is_q ? 2 : 1;
6531
6532         for (pass = 0; pass < maxpass; pass++) {
6533             TCGv_i64 tcg_op = tcg_temp_new_i64();
6534
6535             read_vec_element(s, tcg_op, rn, pass, MO_64);
6536             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6537             write_vec_element(s, tcg_op, rd, pass, MO_64);
6538
6539             tcg_temp_free_i64(tcg_op);
6540         }
6541         tcg_temp_free_i64(tcg_shift);
6542
6543         if (!is_q) {
6544             clear_vec_high(s, rd);
6545         }
6546     } else {
6547         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6548         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6549             {
6550                 { gen_helper_neon_qshl_s8,
6551                   gen_helper_neon_qshl_s16,
6552                   gen_helper_neon_qshl_s32 },
6553                 { gen_helper_neon_qshlu_s8,
6554                   gen_helper_neon_qshlu_s16,
6555                   gen_helper_neon_qshlu_s32 }
6556             }, {
6557                 { NULL, NULL, NULL },
6558                 { gen_helper_neon_qshl_u8,
6559                   gen_helper_neon_qshl_u16,
6560                   gen_helper_neon_qshl_u32 }
6561             }
6562         };
6563         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6564         TCGMemOp memop = scalar ? size : MO_32;
6565         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6566
6567         for (pass = 0; pass < maxpass; pass++) {
6568             TCGv_i32 tcg_op = tcg_temp_new_i32();
6569
6570             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6571             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6572             if (scalar) {
6573                 switch (size) {
6574                 case 0:
6575                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6576                     break;
6577                 case 1:
6578                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6579                     break;
6580                 case 2:
6581                     break;
6582                 default:
6583                     g_assert_not_reached();
6584                 }
6585                 write_fp_sreg(s, rd, tcg_op);
6586             } else {
6587                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6588             }
6589
6590             tcg_temp_free_i32(tcg_op);
6591         }
6592         tcg_temp_free_i32(tcg_shift);
6593
6594         if (!is_q && !scalar) {
6595             clear_vec_high(s, rd);
6596         }
6597     }
6598 }
6599
6600 /* Common vector code for handling integer to FP conversion */
6601 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6602                                    int elements, int is_signed,
6603                                    int fracbits, int size)
6604 {
6605     bool is_double = size == 3 ? true : false;
6606     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6607     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6608     TCGv_i64 tcg_int = tcg_temp_new_i64();
6609     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6610     int pass;
6611
6612     for (pass = 0; pass < elements; pass++) {
6613         read_vec_element(s, tcg_int, rn, pass, mop);
6614
6615         if (is_double) {
6616             TCGv_i64 tcg_double = tcg_temp_new_i64();
6617             if (is_signed) {
6618                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6619                                      tcg_shift, tcg_fpst);
6620             } else {
6621                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6622                                      tcg_shift, tcg_fpst);
6623             }
6624             if (elements == 1) {
6625                 write_fp_dreg(s, rd, tcg_double);
6626             } else {
6627                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6628             }
6629             tcg_temp_free_i64(tcg_double);
6630         } else {
6631             TCGv_i32 tcg_single = tcg_temp_new_i32();
6632             if (is_signed) {
6633                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6634                                      tcg_shift, tcg_fpst);
6635             } else {
6636                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6637                                      tcg_shift, tcg_fpst);
6638             }
6639             if (elements == 1) {
6640                 write_fp_sreg(s, rd, tcg_single);
6641             } else {
6642                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6643             }
6644             tcg_temp_free_i32(tcg_single);
6645         }
6646     }
6647
6648     if (!is_double && elements == 2) {
6649         clear_vec_high(s, rd);
6650     }
6651
6652     tcg_temp_free_i64(tcg_int);
6653     tcg_temp_free_ptr(tcg_fpst);
6654     tcg_temp_free_i32(tcg_shift);
6655 }
6656
6657 /* UCVTF/SCVTF - Integer to FP conversion */
6658 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6659                                          bool is_q, bool is_u,
6660                                          int immh, int immb, int opcode,
6661                                          int rn, int rd)
6662 {
6663     bool is_double = extract32(immh, 3, 1);
6664     int size = is_double ? MO_64 : MO_32;
6665     int elements;
6666     int immhb = immh << 3 | immb;
6667     int fracbits = (is_double ? 128 : 64) - immhb;
6668
6669     if (!extract32(immh, 2, 2)) {
6670         unallocated_encoding(s);
6671         return;
6672     }
6673
6674     if (is_scalar) {
6675         elements = 1;
6676     } else {
6677         elements = is_double ? 2 : is_q ? 4 : 2;
6678         if (is_double && !is_q) {
6679             unallocated_encoding(s);
6680             return;
6681         }
6682     }
6683
6684     if (!fp_access_check(s)) {
6685         return;
6686     }
6687
6688     /* immh == 0 would be a failure of the decode logic */
6689     g_assert(immh);
6690
6691     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6692 }
6693
6694 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6695 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6696                                          bool is_q, bool is_u,
6697                                          int immh, int immb, int rn, int rd)
6698 {
6699     bool is_double = extract32(immh, 3, 1);
6700     int immhb = immh << 3 | immb;
6701     int fracbits = (is_double ? 128 : 64) - immhb;
6702     int pass;
6703     TCGv_ptr tcg_fpstatus;
6704     TCGv_i32 tcg_rmode, tcg_shift;
6705
6706     if (!extract32(immh, 2, 2)) {
6707         unallocated_encoding(s);
6708         return;
6709     }
6710
6711     if (!is_scalar && !is_q && is_double) {
6712         unallocated_encoding(s);
6713         return;
6714     }
6715
6716     if (!fp_access_check(s)) {
6717         return;
6718     }
6719
6720     assert(!(is_scalar && is_q));
6721
6722     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6723     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6724     tcg_fpstatus = get_fpstatus_ptr();
6725     tcg_shift = tcg_const_i32(fracbits);
6726
6727     if (is_double) {
6728         int maxpass = is_scalar ? 1 : 2;
6729
6730         for (pass = 0; pass < maxpass; pass++) {
6731             TCGv_i64 tcg_op = tcg_temp_new_i64();
6732
6733             read_vec_element(s, tcg_op, rn, pass, MO_64);
6734             if (is_u) {
6735                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6736             } else {
6737                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6738             }
6739             write_vec_element(s, tcg_op, rd, pass, MO_64);
6740             tcg_temp_free_i64(tcg_op);
6741         }
6742         if (!is_q) {
6743             clear_vec_high(s, rd);
6744         }
6745     } else {
6746         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6747         for (pass = 0; pass < maxpass; pass++) {
6748             TCGv_i32 tcg_op = tcg_temp_new_i32();
6749
6750             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6751             if (is_u) {
6752                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6753             } else {
6754                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6755             }
6756             if (is_scalar) {
6757                 write_fp_sreg(s, rd, tcg_op);
6758             } else {
6759                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6760             }
6761             tcg_temp_free_i32(tcg_op);
6762         }
6763         if (!is_q && !is_scalar) {
6764             clear_vec_high(s, rd);
6765         }
6766     }
6767
6768     tcg_temp_free_ptr(tcg_fpstatus);
6769     tcg_temp_free_i32(tcg_shift);
6770     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6771     tcg_temp_free_i32(tcg_rmode);
6772 }
6773
6774 /* C3.6.9 AdvSIMD scalar shift by immediate
6775  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6776  * +-----+---+-------------+------+------+--------+---+------+------+
6777  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6778  * +-----+---+-------------+------+------+--------+---+------+------+
6779  *
6780  * This is the scalar version so it works on a fixed sized registers
6781  */
6782 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6783 {
6784     int rd = extract32(insn, 0, 5);
6785     int rn = extract32(insn, 5, 5);
6786     int opcode = extract32(insn, 11, 5);
6787     int immb = extract32(insn, 16, 3);
6788     int immh = extract32(insn, 19, 4);
6789     bool is_u = extract32(insn, 29, 1);
6790
6791     if (immh == 0) {
6792         unallocated_encoding(s);
6793         return;
6794     }
6795
6796     switch (opcode) {
6797     case 0x08: /* SRI */
6798         if (!is_u) {
6799             unallocated_encoding(s);
6800             return;
6801         }
6802         /* fall through */
6803     case 0x00: /* SSHR / USHR */
6804     case 0x02: /* SSRA / USRA */
6805     case 0x04: /* SRSHR / URSHR */
6806     case 0x06: /* SRSRA / URSRA */
6807         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6808         break;
6809     case 0x0a: /* SHL / SLI */
6810         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6811         break;
6812     case 0x1c: /* SCVTF, UCVTF */
6813         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6814                                      opcode, rn, rd);
6815         break;
6816     case 0x10: /* SQSHRUN, SQSHRUN2 */
6817     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6818         if (!is_u) {
6819             unallocated_encoding(s);
6820             return;
6821         }
6822         handle_vec_simd_sqshrn(s, true, false, false, true,
6823                                immh, immb, opcode, rn, rd);
6824         break;
6825     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6826     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6827         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6828                                immh, immb, opcode, rn, rd);
6829         break;
6830     case 0xc: /* SQSHLU */
6831         if (!is_u) {
6832             unallocated_encoding(s);
6833             return;
6834         }
6835         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6836         break;
6837     case 0xe: /* SQSHL, UQSHL */
6838         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6839         break;
6840     case 0x1f: /* FCVTZS, FCVTZU */
6841         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6842         break;
6843     default:
6844         unallocated_encoding(s);
6845         break;
6846     }
6847 }
6848
6849 /* C3.6.10 AdvSIMD scalar three different
6850  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6851  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6852  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6853  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6854  */
6855 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6856 {
6857     bool is_u = extract32(insn, 29, 1);
6858     int size = extract32(insn, 22, 2);
6859     int opcode = extract32(insn, 12, 4);
6860     int rm = extract32(insn, 16, 5);
6861     int rn = extract32(insn, 5, 5);
6862     int rd = extract32(insn, 0, 5);
6863
6864     if (is_u) {
6865         unallocated_encoding(s);
6866         return;
6867     }
6868
6869     switch (opcode) {
6870     case 0x9: /* SQDMLAL, SQDMLAL2 */
6871     case 0xb: /* SQDMLSL, SQDMLSL2 */
6872     case 0xd: /* SQDMULL, SQDMULL2 */
6873         if (size == 0 || size == 3) {
6874             unallocated_encoding(s);
6875             return;
6876         }
6877         break;
6878     default:
6879         unallocated_encoding(s);
6880         return;
6881     }
6882
6883     if (!fp_access_check(s)) {
6884         return;
6885     }
6886
6887     if (size == 2) {
6888         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6889         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6890         TCGv_i64 tcg_res = tcg_temp_new_i64();
6891
6892         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6893         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6894
6895         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6896         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6897
6898         switch (opcode) {
6899         case 0xd: /* SQDMULL, SQDMULL2 */
6900             break;
6901         case 0xb: /* SQDMLSL, SQDMLSL2 */
6902             tcg_gen_neg_i64(tcg_res, tcg_res);
6903             /* fall through */
6904         case 0x9: /* SQDMLAL, SQDMLAL2 */
6905             read_vec_element(s, tcg_op1, rd, 0, MO_64);
6906             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6907                                               tcg_res, tcg_op1);
6908             break;
6909         default:
6910             g_assert_not_reached();
6911         }
6912
6913         write_fp_dreg(s, rd, tcg_res);
6914
6915         tcg_temp_free_i64(tcg_op1);
6916         tcg_temp_free_i64(tcg_op2);
6917         tcg_temp_free_i64(tcg_res);
6918     } else {
6919         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6920         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6921         TCGv_i64 tcg_res = tcg_temp_new_i64();
6922
6923         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6924         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6925
6926         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6927         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6928
6929         switch (opcode) {
6930         case 0xd: /* SQDMULL, SQDMULL2 */
6931             break;
6932         case 0xb: /* SQDMLSL, SQDMLSL2 */
6933             gen_helper_neon_negl_u32(tcg_res, tcg_res);
6934             /* fall through */
6935         case 0x9: /* SQDMLAL, SQDMLAL2 */
6936         {
6937             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6938             read_vec_element(s, tcg_op3, rd, 0, MO_32);
6939             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6940                                               tcg_res, tcg_op3);
6941             tcg_temp_free_i64(tcg_op3);
6942             break;
6943         }
6944         default:
6945             g_assert_not_reached();
6946         }
6947
6948         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6949         write_fp_dreg(s, rd, tcg_res);
6950
6951         tcg_temp_free_i32(tcg_op1);
6952         tcg_temp_free_i32(tcg_op2);
6953         tcg_temp_free_i64(tcg_res);
6954     }
6955 }
6956
6957 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6958                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6959 {
6960     /* Handle 64x64->64 opcodes which are shared between the scalar
6961      * and vector 3-same groups. We cover every opcode where size == 3
6962      * is valid in either the three-reg-same (integer, not pairwise)
6963      * or scalar-three-reg-same groups. (Some opcodes are not yet
6964      * implemented.)
6965      */
6966     TCGCond cond;
6967
6968     switch (opcode) {
6969     case 0x1: /* SQADD */
6970         if (u) {
6971             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6972         } else {
6973             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6974         }
6975         break;
6976     case 0x5: /* SQSUB */
6977         if (u) {
6978             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6979         } else {
6980             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6981         }
6982         break;
6983     case 0x6: /* CMGT, CMHI */
6984         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6985          * We implement this using setcond (test) and then negating.
6986          */
6987         cond = u ? TCG_COND_GTU : TCG_COND_GT;
6988     do_cmop:
6989         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
6990         tcg_gen_neg_i64(tcg_rd, tcg_rd);
6991         break;
6992     case 0x7: /* CMGE, CMHS */
6993         cond = u ? TCG_COND_GEU : TCG_COND_GE;
6994         goto do_cmop;
6995     case 0x11: /* CMTST, CMEQ */
6996         if (u) {
6997             cond = TCG_COND_EQ;
6998             goto do_cmop;
6999         }
7000         /* CMTST : test is "if (X & Y != 0)". */
7001         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7002         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7003         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7004         break;
7005     case 0x8: /* SSHL, USHL */
7006         if (u) {
7007             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7008         } else {
7009             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7010         }
7011         break;
7012     case 0x9: /* SQSHL, UQSHL */
7013         if (u) {
7014             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7015         } else {
7016             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7017         }
7018         break;
7019     case 0xa: /* SRSHL, URSHL */
7020         if (u) {
7021             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7022         } else {
7023             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7024         }
7025         break;
7026     case 0xb: /* SQRSHL, UQRSHL */
7027         if (u) {
7028             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7029         } else {
7030             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7031         }
7032         break;
7033     case 0x10: /* ADD, SUB */
7034         if (u) {
7035             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7036         } else {
7037             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7038         }
7039         break;
7040     default:
7041         g_assert_not_reached();
7042     }
7043 }
7044
7045 /* Handle the 3-same-operands float operations; shared by the scalar
7046  * and vector encodings. The caller must filter out any encodings
7047  * not allocated for the encoding it is dealing with.
7048  */
7049 static void handle_3same_float(DisasContext *s, int size, int elements,
7050                                int fpopcode, int rd, int rn, int rm)
7051 {
7052     int pass;
7053     TCGv_ptr fpst = get_fpstatus_ptr();
7054
7055     for (pass = 0; pass < elements; pass++) {
7056         if (size) {
7057             /* Double */
7058             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7059             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7060             TCGv_i64 tcg_res = tcg_temp_new_i64();
7061
7062             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7063             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7064
7065             switch (fpopcode) {
7066             case 0x39: /* FMLS */
7067                 /* As usual for ARM, separate negation for fused multiply-add */
7068                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7069                 /* fall through */
7070             case 0x19: /* FMLA */
7071                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7072                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7073                                        tcg_res, fpst);
7074                 break;
7075             case 0x18: /* FMAXNM */
7076                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7077                 break;
7078             case 0x1a: /* FADD */
7079                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7080                 break;
7081             case 0x1b: /* FMULX */
7082                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7083                 break;
7084             case 0x1c: /* FCMEQ */
7085                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7086                 break;
7087             case 0x1e: /* FMAX */
7088                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7089                 break;
7090             case 0x1f: /* FRECPS */
7091                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7092                 break;
7093             case 0x38: /* FMINNM */
7094                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7095                 break;
7096             case 0x3a: /* FSUB */
7097                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7098                 break;
7099             case 0x3e: /* FMIN */
7100                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7101                 break;
7102             case 0x3f: /* FRSQRTS */
7103                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7104                 break;
7105             case 0x5b: /* FMUL */
7106                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7107                 break;
7108             case 0x5c: /* FCMGE */
7109                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7110                 break;
7111             case 0x5d: /* FACGE */
7112                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7113                 break;
7114             case 0x5f: /* FDIV */
7115                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7116                 break;
7117             case 0x7a: /* FABD */
7118                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7119                 gen_helper_vfp_absd(tcg_res, tcg_res);
7120                 break;
7121             case 0x7c: /* FCMGT */
7122                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7123                 break;
7124             case 0x7d: /* FACGT */
7125                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7126                 break;
7127             default:
7128                 g_assert_not_reached();
7129             }
7130
7131             write_vec_element(s, tcg_res, rd, pass, MO_64);
7132
7133             tcg_temp_free_i64(tcg_res);
7134             tcg_temp_free_i64(tcg_op1);
7135             tcg_temp_free_i64(tcg_op2);
7136         } else {
7137             /* Single */
7138             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7139             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7140             TCGv_i32 tcg_res = tcg_temp_new_i32();
7141
7142             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7143             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7144
7145             switch (fpopcode) {
7146             case 0x39: /* FMLS */
7147                 /* As usual for ARM, separate negation for fused multiply-add */
7148                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7149                 /* fall through */
7150             case 0x19: /* FMLA */
7151                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7152                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7153                                        tcg_res, fpst);
7154                 break;
7155             case 0x1a: /* FADD */
7156                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7157                 break;
7158             case 0x1b: /* FMULX */
7159                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7160                 break;
7161             case 0x1c: /* FCMEQ */
7162                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7163                 break;
7164             case 0x1e: /* FMAX */
7165                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7166                 break;
7167             case 0x1f: /* FRECPS */
7168                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7169                 break;
7170             case 0x18: /* FMAXNM */
7171                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7172                 break;
7173             case 0x38: /* FMINNM */
7174                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7175                 break;
7176             case 0x3a: /* FSUB */
7177                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7178                 break;
7179             case 0x3e: /* FMIN */
7180                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7181                 break;
7182             case 0x3f: /* FRSQRTS */
7183                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7184                 break;
7185             case 0x5b: /* FMUL */
7186                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7187                 break;
7188             case 0x5c: /* FCMGE */
7189                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7190                 break;
7191             case 0x5d: /* FACGE */
7192                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7193                 break;
7194             case 0x5f: /* FDIV */
7195                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7196                 break;
7197             case 0x7a: /* FABD */
7198                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7199                 gen_helper_vfp_abss(tcg_res, tcg_res);
7200                 break;
7201             case 0x7c: /* FCMGT */
7202                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7203                 break;
7204             case 0x7d: /* FACGT */
7205                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7206                 break;
7207             default:
7208                 g_assert_not_reached();
7209             }
7210
7211             if (elements == 1) {
7212                 /* scalar single so clear high part */
7213                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7214
7215                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7216                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7217                 tcg_temp_free_i64(tcg_tmp);
7218             } else {
7219                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7220             }
7221
7222             tcg_temp_free_i32(tcg_res);
7223             tcg_temp_free_i32(tcg_op1);
7224             tcg_temp_free_i32(tcg_op2);
7225         }
7226     }
7227
7228     tcg_temp_free_ptr(fpst);
7229
7230     if ((elements << size) < 4) {
7231         /* scalar, or non-quad vector op */
7232         clear_vec_high(s, rd);
7233     }
7234 }
7235
7236 /* C3.6.11 AdvSIMD scalar three same
7237  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7238  * +-----+---+-----------+------+---+------+--------+---+------+------+
7239  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7240  * +-----+---+-----------+------+---+------+--------+---+------+------+
7241  */
7242 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7243 {
7244     int rd = extract32(insn, 0, 5);
7245     int rn = extract32(insn, 5, 5);
7246     int opcode = extract32(insn, 11, 5);
7247     int rm = extract32(insn, 16, 5);
7248     int size = extract32(insn, 22, 2);
7249     bool u = extract32(insn, 29, 1);
7250     TCGv_i64 tcg_rd;
7251
7252     if (opcode >= 0x18) {
7253         /* Floating point: U, size[1] and opcode indicate operation */
7254         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7255         switch (fpopcode) {
7256         case 0x1b: /* FMULX */
7257         case 0x1f: /* FRECPS */
7258         case 0x3f: /* FRSQRTS */
7259         case 0x5d: /* FACGE */
7260         case 0x7d: /* FACGT */
7261         case 0x1c: /* FCMEQ */
7262         case 0x5c: /* FCMGE */
7263         case 0x7c: /* FCMGT */
7264         case 0x7a: /* FABD */
7265             break;
7266         default:
7267             unallocated_encoding(s);
7268             return;
7269         }
7270
7271         if (!fp_access_check(s)) {
7272             return;
7273         }
7274
7275         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7276         return;
7277     }
7278
7279     switch (opcode) {
7280     case 0x1: /* SQADD, UQADD */
7281     case 0x5: /* SQSUB, UQSUB */
7282     case 0x9: /* SQSHL, UQSHL */
7283     case 0xb: /* SQRSHL, UQRSHL */
7284         break;
7285     case 0x8: /* SSHL, USHL */
7286     case 0xa: /* SRSHL, URSHL */
7287     case 0x6: /* CMGT, CMHI */
7288     case 0x7: /* CMGE, CMHS */
7289     case 0x11: /* CMTST, CMEQ */
7290     case 0x10: /* ADD, SUB (vector) */
7291         if (size != 3) {
7292             unallocated_encoding(s);
7293             return;
7294         }
7295         break;
7296     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7297         if (size != 1 && size != 2) {
7298             unallocated_encoding(s);
7299             return;
7300         }
7301         break;
7302     default:
7303         unallocated_encoding(s);
7304         return;
7305     }
7306
7307     if (!fp_access_check(s)) {
7308         return;
7309     }
7310
7311     tcg_rd = tcg_temp_new_i64();
7312
7313     if (size == 3) {
7314         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7315         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7316
7317         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7318         tcg_temp_free_i64(tcg_rn);
7319         tcg_temp_free_i64(tcg_rm);
7320     } else {
7321         /* Do a single operation on the lowest element in the vector.
7322          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7323          * no side effects for all these operations.
7324          * OPTME: special-purpose helpers would avoid doing some
7325          * unnecessary work in the helper for the 8 and 16 bit cases.
7326          */
7327         NeonGenTwoOpEnvFn *genenvfn;
7328         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7329         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7330         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7331
7332         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7333         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7334
7335         switch (opcode) {
7336         case 0x1: /* SQADD, UQADD */
7337         {
7338             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7339                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7340                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7341                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7342             };
7343             genenvfn = fns[size][u];
7344             break;
7345         }
7346         case 0x5: /* SQSUB, UQSUB */
7347         {
7348             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7349                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7350                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7351                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7352             };
7353             genenvfn = fns[size][u];
7354             break;
7355         }
7356         case 0x9: /* SQSHL, UQSHL */
7357         {
7358             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7359                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7360                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7361                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7362             };
7363             genenvfn = fns[size][u];
7364             break;
7365         }
7366         case 0xb: /* SQRSHL, UQRSHL */
7367         {
7368             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7369                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7370                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7371                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7372             };
7373             genenvfn = fns[size][u];
7374             break;
7375         }
7376         case 0x16: /* SQDMULH, SQRDMULH */
7377         {
7378             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7379                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7380                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7381             };
7382             assert(size == 1 || size == 2);
7383             genenvfn = fns[size - 1][u];
7384             break;
7385         }
7386         default:
7387             g_assert_not_reached();
7388         }
7389
7390         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7391         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7392         tcg_temp_free_i32(tcg_rd32);
7393         tcg_temp_free_i32(tcg_rn);
7394         tcg_temp_free_i32(tcg_rm);
7395     }
7396
7397     write_fp_dreg(s, rd, tcg_rd);
7398
7399     tcg_temp_free_i64(tcg_rd);
7400 }
7401
7402 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7403                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7404                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7405 {
7406     /* Handle 64->64 opcodes which are shared between the scalar and
7407      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7408      * is valid in either group and also the double-precision fp ops.
7409      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7410      * requires them.
7411      */
7412     TCGCond cond;
7413
7414     switch (opcode) {
7415     case 0x4: /* CLS, CLZ */
7416         if (u) {
7417             gen_helper_clz64(tcg_rd, tcg_rn);
7418         } else {
7419             gen_helper_cls64(tcg_rd, tcg_rn);
7420         }
7421         break;
7422     case 0x5: /* NOT */
7423         /* This opcode is shared with CNT and RBIT but we have earlier
7424          * enforced that size == 3 if and only if this is the NOT insn.
7425          */
7426         tcg_gen_not_i64(tcg_rd, tcg_rn);
7427         break;
7428     case 0x7: /* SQABS, SQNEG */
7429         if (u) {
7430             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7431         } else {
7432             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7433         }
7434         break;
7435     case 0xa: /* CMLT */
7436         /* 64 bit integer comparison against zero, result is
7437          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7438          * subtracting 1.
7439          */
7440         cond = TCG_COND_LT;
7441     do_cmop:
7442         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7443         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7444         break;
7445     case 0x8: /* CMGT, CMGE */
7446         cond = u ? TCG_COND_GE : TCG_COND_GT;
7447         goto do_cmop;
7448     case 0x9: /* CMEQ, CMLE */
7449         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7450         goto do_cmop;
7451     case 0xb: /* ABS, NEG */
7452         if (u) {
7453             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7454         } else {
7455             TCGv_i64 tcg_zero = tcg_const_i64(0);
7456             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7457             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7458                                 tcg_rn, tcg_rd);
7459             tcg_temp_free_i64(tcg_zero);
7460         }
7461         break;
7462     case 0x2f: /* FABS */
7463         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7464         break;
7465     case 0x6f: /* FNEG */
7466         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7467         break;
7468     case 0x7f: /* FSQRT */
7469         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7470         break;
7471     case 0x1a: /* FCVTNS */
7472     case 0x1b: /* FCVTMS */
7473     case 0x1c: /* FCVTAS */
7474     case 0x3a: /* FCVTPS */
7475     case 0x3b: /* FCVTZS */
7476     {
7477         TCGv_i32 tcg_shift = tcg_const_i32(0);
7478         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7479         tcg_temp_free_i32(tcg_shift);
7480         break;
7481     }
7482     case 0x5a: /* FCVTNU */
7483     case 0x5b: /* FCVTMU */
7484     case 0x5c: /* FCVTAU */
7485     case 0x7a: /* FCVTPU */
7486     case 0x7b: /* FCVTZU */
7487     {
7488         TCGv_i32 tcg_shift = tcg_const_i32(0);
7489         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7490         tcg_temp_free_i32(tcg_shift);
7491         break;
7492     }
7493     case 0x18: /* FRINTN */
7494     case 0x19: /* FRINTM */
7495     case 0x38: /* FRINTP */
7496     case 0x39: /* FRINTZ */
7497     case 0x58: /* FRINTA */
7498     case 0x79: /* FRINTI */
7499         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7500         break;
7501     case 0x59: /* FRINTX */
7502         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7503         break;
7504     default:
7505         g_assert_not_reached();
7506     }
7507 }
7508
7509 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7510                                    bool is_scalar, bool is_u, bool is_q,
7511                                    int size, int rn, int rd)
7512 {
7513     bool is_double = (size == 3);
7514     TCGv_ptr fpst;
7515
7516     if (!fp_access_check(s)) {
7517         return;
7518     }
7519
7520     fpst = get_fpstatus_ptr();
7521
7522     if (is_double) {
7523         TCGv_i64 tcg_op = tcg_temp_new_i64();
7524         TCGv_i64 tcg_zero = tcg_const_i64(0);
7525         TCGv_i64 tcg_res = tcg_temp_new_i64();
7526         NeonGenTwoDoubleOPFn *genfn;
7527         bool swap = false;
7528         int pass;
7529
7530         switch (opcode) {
7531         case 0x2e: /* FCMLT (zero) */
7532             swap = true;
7533             /* fallthrough */
7534         case 0x2c: /* FCMGT (zero) */
7535             genfn = gen_helper_neon_cgt_f64;
7536             break;
7537         case 0x2d: /* FCMEQ (zero) */
7538             genfn = gen_helper_neon_ceq_f64;
7539             break;
7540         case 0x6d: /* FCMLE (zero) */
7541             swap = true;
7542             /* fall through */
7543         case 0x6c: /* FCMGE (zero) */
7544             genfn = gen_helper_neon_cge_f64;
7545             break;
7546         default:
7547             g_assert_not_reached();
7548         }
7549
7550         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7551             read_vec_element(s, tcg_op, rn, pass, MO_64);
7552             if (swap) {
7553                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7554             } else {
7555                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7556             }
7557             write_vec_element(s, tcg_res, rd, pass, MO_64);
7558         }
7559         if (is_scalar) {
7560             clear_vec_high(s, rd);
7561         }
7562
7563         tcg_temp_free_i64(tcg_res);
7564         tcg_temp_free_i64(tcg_zero);
7565         tcg_temp_free_i64(tcg_op);
7566     } else {
7567         TCGv_i32 tcg_op = tcg_temp_new_i32();
7568         TCGv_i32 tcg_zero = tcg_const_i32(0);
7569         TCGv_i32 tcg_res = tcg_temp_new_i32();
7570         NeonGenTwoSingleOPFn *genfn;
7571         bool swap = false;
7572         int pass, maxpasses;
7573
7574         switch (opcode) {
7575         case 0x2e: /* FCMLT (zero) */
7576             swap = true;
7577             /* fall through */
7578         case 0x2c: /* FCMGT (zero) */
7579             genfn = gen_helper_neon_cgt_f32;
7580             break;
7581         case 0x2d: /* FCMEQ (zero) */
7582             genfn = gen_helper_neon_ceq_f32;
7583             break;
7584         case 0x6d: /* FCMLE (zero) */
7585             swap = true;
7586             /* fall through */
7587         case 0x6c: /* FCMGE (zero) */
7588             genfn = gen_helper_neon_cge_f32;
7589             break;
7590         default:
7591             g_assert_not_reached();
7592         }
7593
7594         if (is_scalar) {
7595             maxpasses = 1;
7596         } else {
7597             maxpasses = is_q ? 4 : 2;
7598         }
7599
7600         for (pass = 0; pass < maxpasses; pass++) {
7601             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7602             if (swap) {
7603                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7604             } else {
7605                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7606             }
7607             if (is_scalar) {
7608                 write_fp_sreg(s, rd, tcg_res);
7609             } else {
7610                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7611             }
7612         }
7613         tcg_temp_free_i32(tcg_res);
7614         tcg_temp_free_i32(tcg_zero);
7615         tcg_temp_free_i32(tcg_op);
7616         if (!is_q && !is_scalar) {
7617             clear_vec_high(s, rd);
7618         }
7619     }
7620
7621     tcg_temp_free_ptr(fpst);
7622 }
7623
7624 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7625                                     bool is_scalar, bool is_u, bool is_q,
7626                                     int size, int rn, int rd)
7627 {
7628     bool is_double = (size == 3);
7629     TCGv_ptr fpst = get_fpstatus_ptr();
7630
7631     if (is_double) {
7632         TCGv_i64 tcg_op = tcg_temp_new_i64();
7633         TCGv_i64 tcg_res = tcg_temp_new_i64();
7634         int pass;
7635
7636         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7637             read_vec_element(s, tcg_op, rn, pass, MO_64);
7638             switch (opcode) {
7639             case 0x3d: /* FRECPE */
7640                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7641                 break;
7642             case 0x3f: /* FRECPX */
7643                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7644                 break;
7645             case 0x7d: /* FRSQRTE */
7646                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7647                 break;
7648             default:
7649                 g_assert_not_reached();
7650             }
7651             write_vec_element(s, tcg_res, rd, pass, MO_64);
7652         }
7653         if (is_scalar) {
7654             clear_vec_high(s, rd);
7655         }
7656
7657         tcg_temp_free_i64(tcg_res);
7658         tcg_temp_free_i64(tcg_op);
7659     } else {
7660         TCGv_i32 tcg_op = tcg_temp_new_i32();
7661         TCGv_i32 tcg_res = tcg_temp_new_i32();
7662         int pass, maxpasses;
7663
7664         if (is_scalar) {
7665             maxpasses = 1;
7666         } else {
7667             maxpasses = is_q ? 4 : 2;
7668         }
7669
7670         for (pass = 0; pass < maxpasses; pass++) {
7671             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7672
7673             switch (opcode) {
7674             case 0x3c: /* URECPE */
7675                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7676                 break;
7677             case 0x3d: /* FRECPE */
7678                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7679                 break;
7680             case 0x3f: /* FRECPX */
7681                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7682                 break;
7683             case 0x7d: /* FRSQRTE */
7684                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7685                 break;
7686             default:
7687                 g_assert_not_reached();
7688             }
7689
7690             if (is_scalar) {
7691                 write_fp_sreg(s, rd, tcg_res);
7692             } else {
7693                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7694             }
7695         }
7696         tcg_temp_free_i32(tcg_res);
7697         tcg_temp_free_i32(tcg_op);
7698         if (!is_q && !is_scalar) {
7699             clear_vec_high(s, rd);
7700         }
7701     }
7702     tcg_temp_free_ptr(fpst);
7703 }
7704
7705 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7706                                 int opcode, bool u, bool is_q,
7707                                 int size, int rn, int rd)
7708 {
7709     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7710      * in the source becomes a size element in the destination).
7711      */
7712     int pass;
7713     TCGv_i32 tcg_res[2];
7714     int destelt = is_q ? 2 : 0;
7715     int passes = scalar ? 1 : 2;
7716
7717     if (scalar) {
7718         tcg_res[1] = tcg_const_i32(0);
7719     }
7720
7721     for (pass = 0; pass < passes; pass++) {
7722         TCGv_i64 tcg_op = tcg_temp_new_i64();
7723         NeonGenNarrowFn *genfn = NULL;
7724         NeonGenNarrowEnvFn *genenvfn = NULL;
7725
7726         if (scalar) {
7727             read_vec_element(s, tcg_op, rn, pass, size + 1);
7728         } else {
7729             read_vec_element(s, tcg_op, rn, pass, MO_64);
7730         }
7731         tcg_res[pass] = tcg_temp_new_i32();
7732
7733         switch (opcode) {
7734         case 0x12: /* XTN, SQXTUN */
7735         {
7736             static NeonGenNarrowFn * const xtnfns[3] = {
7737                 gen_helper_neon_narrow_u8,
7738                 gen_helper_neon_narrow_u16,
7739                 tcg_gen_extrl_i64_i32,
7740             };
7741             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7742                 gen_helper_neon_unarrow_sat8,
7743                 gen_helper_neon_unarrow_sat16,
7744                 gen_helper_neon_unarrow_sat32,
7745             };
7746             if (u) {
7747                 genenvfn = sqxtunfns[size];
7748             } else {
7749                 genfn = xtnfns[size];
7750             }
7751             break;
7752         }
7753         case 0x14: /* SQXTN, UQXTN */
7754         {
7755             static NeonGenNarrowEnvFn * const fns[3][2] = {
7756                 { gen_helper_neon_narrow_sat_s8,
7757                   gen_helper_neon_narrow_sat_u8 },
7758                 { gen_helper_neon_narrow_sat_s16,
7759                   gen_helper_neon_narrow_sat_u16 },
7760                 { gen_helper_neon_narrow_sat_s32,
7761                   gen_helper_neon_narrow_sat_u32 },
7762             };
7763             genenvfn = fns[size][u];
7764             break;
7765         }
7766         case 0x16: /* FCVTN, FCVTN2 */
7767             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7768             if (size == 2) {
7769                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7770             } else {
7771                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7772                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7773                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7774                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7775                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7776                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7777                 tcg_temp_free_i32(tcg_lo);
7778                 tcg_temp_free_i32(tcg_hi);
7779             }
7780             break;
7781         case 0x56:  /* FCVTXN, FCVTXN2 */
7782             /* 64 bit to 32 bit float conversion
7783              * with von Neumann rounding (round to odd)
7784              */
7785             assert(size == 2);
7786             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7787             break;
7788         default:
7789             g_assert_not_reached();
7790         }
7791
7792         if (genfn) {
7793             genfn(tcg_res[pass], tcg_op);
7794         } else if (genenvfn) {
7795             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7796         }
7797
7798         tcg_temp_free_i64(tcg_op);
7799     }
7800
7801     for (pass = 0; pass < 2; pass++) {
7802         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7803         tcg_temp_free_i32(tcg_res[pass]);
7804     }
7805     if (!is_q) {
7806         clear_vec_high(s, rd);
7807     }
7808 }
7809
7810 /* Remaining saturating accumulating ops */
7811 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7812                                 bool is_q, int size, int rn, int rd)
7813 {
7814     bool is_double = (size == 3);
7815
7816     if (is_double) {
7817         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7818         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7819         int pass;
7820
7821         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7822             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7823             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7824
7825             if (is_u) { /* USQADD */
7826                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7827             } else { /* SUQADD */
7828                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7829             }
7830             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7831         }
7832         if (is_scalar) {
7833             clear_vec_high(s, rd);
7834         }
7835
7836         tcg_temp_free_i64(tcg_rd);
7837         tcg_temp_free_i64(tcg_rn);
7838     } else {
7839         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7840         TCGv_i32 tcg_rd = tcg_temp_new_i32();
7841         int pass, maxpasses;
7842
7843         if (is_scalar) {
7844             maxpasses = 1;
7845         } else {
7846             maxpasses = is_q ? 4 : 2;
7847         }
7848
7849         for (pass = 0; pass < maxpasses; pass++) {
7850             if (is_scalar) {
7851                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7852                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7853             } else {
7854                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7855                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7856             }
7857
7858             if (is_u) { /* USQADD */
7859                 switch (size) {
7860                 case 0:
7861                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7862                     break;
7863                 case 1:
7864                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7865                     break;
7866                 case 2:
7867                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7868                     break;
7869                 default:
7870                     g_assert_not_reached();
7871                 }
7872             } else { /* SUQADD */
7873                 switch (size) {
7874                 case 0:
7875                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7876                     break;
7877                 case 1:
7878                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7879                     break;
7880                 case 2:
7881                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7882                     break;
7883                 default:
7884                     g_assert_not_reached();
7885                 }
7886             }
7887
7888             if (is_scalar) {
7889                 TCGv_i64 tcg_zero = tcg_const_i64(0);
7890                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7891                 tcg_temp_free_i64(tcg_zero);
7892             }
7893             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7894         }
7895
7896         if (!is_q) {
7897             clear_vec_high(s, rd);
7898         }
7899
7900         tcg_temp_free_i32(tcg_rd);
7901         tcg_temp_free_i32(tcg_rn);
7902     }
7903 }
7904
7905 /* C3.6.12 AdvSIMD scalar two reg misc
7906  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7907  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7908  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7909  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7910  */
7911 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7912 {
7913     int rd = extract32(insn, 0, 5);
7914     int rn = extract32(insn, 5, 5);
7915     int opcode = extract32(insn, 12, 5);
7916     int size = extract32(insn, 22, 2);
7917     bool u = extract32(insn, 29, 1);
7918     bool is_fcvt = false;
7919     int rmode;
7920     TCGv_i32 tcg_rmode;
7921     TCGv_ptr tcg_fpstatus;
7922
7923     switch (opcode) {
7924     case 0x3: /* USQADD / SUQADD*/
7925         if (!fp_access_check(s)) {
7926             return;
7927         }
7928         handle_2misc_satacc(s, true, u, false, size, rn, rd);
7929         return;
7930     case 0x7: /* SQABS / SQNEG */
7931         break;
7932     case 0xa: /* CMLT */
7933         if (u) {
7934             unallocated_encoding(s);
7935             return;
7936         }
7937         /* fall through */
7938     case 0x8: /* CMGT, CMGE */
7939     case 0x9: /* CMEQ, CMLE */
7940     case 0xb: /* ABS, NEG */
7941         if (size != 3) {
7942             unallocated_encoding(s);
7943             return;
7944         }
7945         break;
7946     case 0x12: /* SQXTUN */
7947         if (!u) {
7948             unallocated_encoding(s);
7949             return;
7950         }
7951         /* fall through */
7952     case 0x14: /* SQXTN, UQXTN */
7953         if (size == 3) {
7954             unallocated_encoding(s);
7955             return;
7956         }
7957         if (!fp_access_check(s)) {
7958             return;
7959         }
7960         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7961         return;
7962     case 0xc ... 0xf:
7963     case 0x16 ... 0x1d:
7964     case 0x1f:
7965         /* Floating point: U, size[1] and opcode indicate operation;
7966          * size[0] indicates single or double precision.
7967          */
7968         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7969         size = extract32(size, 0, 1) ? 3 : 2;
7970         switch (opcode) {
7971         case 0x2c: /* FCMGT (zero) */
7972         case 0x2d: /* FCMEQ (zero) */
7973         case 0x2e: /* FCMLT (zero) */
7974         case 0x6c: /* FCMGE (zero) */
7975         case 0x6d: /* FCMLE (zero) */
7976             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7977             return;
7978         case 0x1d: /* SCVTF */
7979         case 0x5d: /* UCVTF */
7980         {
7981             bool is_signed = (opcode == 0x1d);
7982             if (!fp_access_check(s)) {
7983                 return;
7984             }
7985             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7986             return;
7987         }
7988         case 0x3d: /* FRECPE */
7989         case 0x3f: /* FRECPX */
7990         case 0x7d: /* FRSQRTE */
7991             if (!fp_access_check(s)) {
7992                 return;
7993             }
7994             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
7995             return;
7996         case 0x1a: /* FCVTNS */
7997         case 0x1b: /* FCVTMS */
7998         case 0x3a: /* FCVTPS */
7999         case 0x3b: /* FCVTZS */
8000         case 0x5a: /* FCVTNU */
8001         case 0x5b: /* FCVTMU */
8002         case 0x7a: /* FCVTPU */
8003         case 0x7b: /* FCVTZU */
8004             is_fcvt = true;
8005             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8006             break;
8007         case 0x1c: /* FCVTAS */
8008         case 0x5c: /* FCVTAU */
8009             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8010             is_fcvt = true;
8011             rmode = FPROUNDING_TIEAWAY;
8012             break;
8013         case 0x56: /* FCVTXN, FCVTXN2 */
8014             if (size == 2) {
8015                 unallocated_encoding(s);
8016                 return;
8017             }
8018             if (!fp_access_check(s)) {
8019                 return;
8020             }
8021             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8022             return;
8023         default:
8024             unallocated_encoding(s);
8025             return;
8026         }
8027         break;
8028     default:
8029         unallocated_encoding(s);
8030         return;
8031     }
8032
8033     if (!fp_access_check(s)) {
8034         return;
8035     }
8036
8037     if (is_fcvt) {
8038         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8039         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8040         tcg_fpstatus = get_fpstatus_ptr();
8041     } else {
8042         TCGV_UNUSED_I32(tcg_rmode);
8043         TCGV_UNUSED_PTR(tcg_fpstatus);
8044     }
8045
8046     if (size == 3) {
8047         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8048         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8049
8050         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8051         write_fp_dreg(s, rd, tcg_rd);
8052         tcg_temp_free_i64(tcg_rd);
8053         tcg_temp_free_i64(tcg_rn);
8054     } else {
8055         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8056         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8057
8058         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8059
8060         switch (opcode) {
8061         case 0x7: /* SQABS, SQNEG */
8062         {
8063             NeonGenOneOpEnvFn *genfn;
8064             static NeonGenOneOpEnvFn * const fns[3][2] = {
8065                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8066                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8067                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8068             };
8069             genfn = fns[size][u];
8070             genfn(tcg_rd, cpu_env, tcg_rn);
8071             break;
8072         }
8073         case 0x1a: /* FCVTNS */
8074         case 0x1b: /* FCVTMS */
8075         case 0x1c: /* FCVTAS */
8076         case 0x3a: /* FCVTPS */
8077         case 0x3b: /* FCVTZS */
8078         {
8079             TCGv_i32 tcg_shift = tcg_const_i32(0);
8080             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8081             tcg_temp_free_i32(tcg_shift);
8082             break;
8083         }
8084         case 0x5a: /* FCVTNU */
8085         case 0x5b: /* FCVTMU */
8086         case 0x5c: /* FCVTAU */
8087         case 0x7a: /* FCVTPU */
8088         case 0x7b: /* FCVTZU */
8089         {
8090             TCGv_i32 tcg_shift = tcg_const_i32(0);
8091             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8092             tcg_temp_free_i32(tcg_shift);
8093             break;
8094         }
8095         default:
8096             g_assert_not_reached();
8097         }
8098
8099         write_fp_sreg(s, rd, tcg_rd);
8100         tcg_temp_free_i32(tcg_rd);
8101         tcg_temp_free_i32(tcg_rn);
8102     }
8103
8104     if (is_fcvt) {
8105         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8106         tcg_temp_free_i32(tcg_rmode);
8107         tcg_temp_free_ptr(tcg_fpstatus);
8108     }
8109 }
8110
8111 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8112 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8113                                  int immh, int immb, int opcode, int rn, int rd)
8114 {
8115     int size = 32 - clz32(immh) - 1;
8116     int immhb = immh << 3 | immb;
8117     int shift = 2 * (8 << size) - immhb;
8118     bool accumulate = false;
8119     bool round = false;
8120     bool insert = false;
8121     int dsize = is_q ? 128 : 64;
8122     int esize = 8 << size;
8123     int elements = dsize/esize;
8124     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8125     TCGv_i64 tcg_rn = new_tmp_a64(s);
8126     TCGv_i64 tcg_rd = new_tmp_a64(s);
8127     TCGv_i64 tcg_round;
8128     int i;
8129
8130     if (extract32(immh, 3, 1) && !is_q) {
8131         unallocated_encoding(s);
8132         return;
8133     }
8134
8135     if (size > 3 && !is_q) {
8136         unallocated_encoding(s);
8137         return;
8138     }
8139
8140     if (!fp_access_check(s)) {
8141         return;
8142     }
8143
8144     switch (opcode) {
8145     case 0x02: /* SSRA / USRA (accumulate) */
8146         accumulate = true;
8147         break;
8148     case 0x04: /* SRSHR / URSHR (rounding) */
8149         round = true;
8150         break;
8151     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8152         accumulate = round = true;
8153         break;
8154     case 0x08: /* SRI */
8155         insert = true;
8156         break;
8157     }
8158
8159     if (round) {
8160         uint64_t round_const = 1ULL << (shift - 1);
8161         tcg_round = tcg_const_i64(round_const);
8162     } else {
8163         TCGV_UNUSED_I64(tcg_round);
8164     }
8165
8166     for (i = 0; i < elements; i++) {
8167         read_vec_element(s, tcg_rn, rn, i, memop);
8168         if (accumulate || insert) {
8169             read_vec_element(s, tcg_rd, rd, i, memop);
8170         }
8171
8172         if (insert) {
8173             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8174         } else {
8175             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8176                                     accumulate, is_u, size, shift);
8177         }
8178
8179         write_vec_element(s, tcg_rd, rd, i, size);
8180     }
8181
8182     if (!is_q) {
8183         clear_vec_high(s, rd);
8184     }
8185
8186     if (round) {
8187         tcg_temp_free_i64(tcg_round);
8188     }
8189 }
8190
8191 /* SHL/SLI - Vector shift left */
8192 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8193                                 int immh, int immb, int opcode, int rn, int rd)
8194 {
8195     int size = 32 - clz32(immh) - 1;
8196     int immhb = immh << 3 | immb;
8197     int shift = immhb - (8 << size);
8198     int dsize = is_q ? 128 : 64;
8199     int esize = 8 << size;
8200     int elements = dsize/esize;
8201     TCGv_i64 tcg_rn = new_tmp_a64(s);
8202     TCGv_i64 tcg_rd = new_tmp_a64(s);
8203     int i;
8204
8205     if (extract32(immh, 3, 1) && !is_q) {
8206         unallocated_encoding(s);
8207         return;
8208     }
8209
8210     if (size > 3 && !is_q) {
8211         unallocated_encoding(s);
8212         return;
8213     }
8214
8215     if (!fp_access_check(s)) {
8216         return;
8217     }
8218
8219     for (i = 0; i < elements; i++) {
8220         read_vec_element(s, tcg_rn, rn, i, size);
8221         if (insert) {
8222             read_vec_element(s, tcg_rd, rd, i, size);
8223         }
8224
8225         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8226
8227         write_vec_element(s, tcg_rd, rd, i, size);
8228     }
8229
8230     if (!is_q) {
8231         clear_vec_high(s, rd);
8232     }
8233 }
8234
8235 /* USHLL/SHLL - Vector shift left with widening */
8236 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8237                                  int immh, int immb, int opcode, int rn, int rd)
8238 {
8239     int size = 32 - clz32(immh) - 1;
8240     int immhb = immh << 3 | immb;
8241     int shift = immhb - (8 << size);
8242     int dsize = 64;
8243     int esize = 8 << size;
8244     int elements = dsize/esize;
8245     TCGv_i64 tcg_rn = new_tmp_a64(s);
8246     TCGv_i64 tcg_rd = new_tmp_a64(s);
8247     int i;
8248
8249     if (size >= 3) {
8250         unallocated_encoding(s);
8251         return;
8252     }
8253
8254     if (!fp_access_check(s)) {
8255         return;
8256     }
8257
8258     /* For the LL variants the store is larger than the load,
8259      * so if rd == rn we would overwrite parts of our input.
8260      * So load everything right now and use shifts in the main loop.
8261      */
8262     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8263
8264     for (i = 0; i < elements; i++) {
8265         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8266         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8267         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8268         write_vec_element(s, tcg_rd, rd, i, size + 1);
8269     }
8270 }
8271
8272 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8273 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8274                                  int immh, int immb, int opcode, int rn, int rd)
8275 {
8276     int immhb = immh << 3 | immb;
8277     int size = 32 - clz32(immh) - 1;
8278     int dsize = 64;
8279     int esize = 8 << size;
8280     int elements = dsize/esize;
8281     int shift = (2 * esize) - immhb;
8282     bool round = extract32(opcode, 0, 1);
8283     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8284     TCGv_i64 tcg_round;
8285     int i;
8286
8287     if (extract32(immh, 3, 1)) {
8288         unallocated_encoding(s);
8289         return;
8290     }
8291
8292     if (!fp_access_check(s)) {
8293         return;
8294     }
8295
8296     tcg_rn = tcg_temp_new_i64();
8297     tcg_rd = tcg_temp_new_i64();
8298     tcg_final = tcg_temp_new_i64();
8299     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8300
8301     if (round) {
8302         uint64_t round_const = 1ULL << (shift - 1);
8303         tcg_round = tcg_const_i64(round_const);
8304     } else {
8305         TCGV_UNUSED_I64(tcg_round);
8306     }
8307
8308     for (i = 0; i < elements; i++) {
8309         read_vec_element(s, tcg_rn, rn, i, size+1);
8310         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8311                                 false, true, size+1, shift);
8312
8313         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8314     }
8315
8316     if (!is_q) {
8317         clear_vec_high(s, rd);
8318         write_vec_element(s, tcg_final, rd, 0, MO_64);
8319     } else {
8320         write_vec_element(s, tcg_final, rd, 1, MO_64);
8321     }
8322
8323     if (round) {
8324         tcg_temp_free_i64(tcg_round);
8325     }
8326     tcg_temp_free_i64(tcg_rn);
8327     tcg_temp_free_i64(tcg_rd);
8328     tcg_temp_free_i64(tcg_final);
8329     return;
8330 }
8331
8332
8333 /* C3.6.14 AdvSIMD shift by immediate
8334  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8335  * +---+---+---+-------------+------+------+--------+---+------+------+
8336  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8337  * +---+---+---+-------------+------+------+--------+---+------+------+
8338  */
8339 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8340 {
8341     int rd = extract32(insn, 0, 5);
8342     int rn = extract32(insn, 5, 5);
8343     int opcode = extract32(insn, 11, 5);
8344     int immb = extract32(insn, 16, 3);
8345     int immh = extract32(insn, 19, 4);
8346     bool is_u = extract32(insn, 29, 1);
8347     bool is_q = extract32(insn, 30, 1);
8348
8349     switch (opcode) {
8350     case 0x08: /* SRI */
8351         if (!is_u) {
8352             unallocated_encoding(s);
8353             return;
8354         }
8355         /* fall through */
8356     case 0x00: /* SSHR / USHR */
8357     case 0x02: /* SSRA / USRA (accumulate) */
8358     case 0x04: /* SRSHR / URSHR (rounding) */
8359     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8360         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8361         break;
8362     case 0x0a: /* SHL / SLI */
8363         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8364         break;
8365     case 0x10: /* SHRN */
8366     case 0x11: /* RSHRN / SQRSHRUN */
8367         if (is_u) {
8368             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8369                                    opcode, rn, rd);
8370         } else {
8371             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8372         }
8373         break;
8374     case 0x12: /* SQSHRN / UQSHRN */
8375     case 0x13: /* SQRSHRN / UQRSHRN */
8376         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8377                                opcode, rn, rd);
8378         break;
8379     case 0x14: /* SSHLL / USHLL */
8380         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8381         break;
8382     case 0x1c: /* SCVTF / UCVTF */
8383         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8384                                      opcode, rn, rd);
8385         break;
8386     case 0xc: /* SQSHLU */
8387         if (!is_u) {
8388             unallocated_encoding(s);
8389             return;
8390         }
8391         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8392         break;
8393     case 0xe: /* SQSHL, UQSHL */
8394         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8395         break;
8396     case 0x1f: /* FCVTZS/ FCVTZU */
8397         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8398         return;
8399     default:
8400         unallocated_encoding(s);
8401         return;
8402     }
8403 }
8404
8405 /* Generate code to do a "long" addition or subtraction, ie one done in
8406  * TCGv_i64 on vector lanes twice the width specified by size.
8407  */
8408 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8409                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8410 {
8411     static NeonGenTwo64OpFn * const fns[3][2] = {
8412         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8413         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8414         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8415     };
8416     NeonGenTwo64OpFn *genfn;
8417     assert(size < 3);
8418
8419     genfn = fns[size][is_sub];
8420     genfn(tcg_res, tcg_op1, tcg_op2);
8421 }
8422
8423 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8424                                 int opcode, int rd, int rn, int rm)
8425 {
8426     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8427     TCGv_i64 tcg_res[2];
8428     int pass, accop;
8429
8430     tcg_res[0] = tcg_temp_new_i64();
8431     tcg_res[1] = tcg_temp_new_i64();
8432
8433     /* Does this op do an adding accumulate, a subtracting accumulate,
8434      * or no accumulate at all?
8435      */
8436     switch (opcode) {
8437     case 5:
8438     case 8:
8439     case 9:
8440         accop = 1;
8441         break;
8442     case 10:
8443     case 11:
8444         accop = -1;
8445         break;
8446     default:
8447         accop = 0;
8448         break;
8449     }
8450
8451     if (accop != 0) {
8452         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8453         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8454     }
8455
8456     /* size == 2 means two 32x32->64 operations; this is worth special
8457      * casing because we can generally handle it inline.
8458      */
8459     if (size == 2) {
8460         for (pass = 0; pass < 2; pass++) {
8461             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8462             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8463             TCGv_i64 tcg_passres;
8464             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8465
8466             int elt = pass + is_q * 2;
8467
8468             read_vec_element(s, tcg_op1, rn, elt, memop);
8469             read_vec_element(s, tcg_op2, rm, elt, memop);
8470
8471             if (accop == 0) {
8472                 tcg_passres = tcg_res[pass];
8473             } else {
8474                 tcg_passres = tcg_temp_new_i64();
8475             }
8476
8477             switch (opcode) {
8478             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8479                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8480                 break;
8481             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8482                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8483                 break;
8484             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8485             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8486             {
8487                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8488                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8489
8490                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8491                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8492                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8493                                     tcg_passres,
8494                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8495                 tcg_temp_free_i64(tcg_tmp1);
8496                 tcg_temp_free_i64(tcg_tmp2);
8497                 break;
8498             }
8499             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8500             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8501             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8502                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8503                 break;
8504             case 9: /* SQDMLAL, SQDMLAL2 */
8505             case 11: /* SQDMLSL, SQDMLSL2 */
8506             case 13: /* SQDMULL, SQDMULL2 */
8507                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8508                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8509                                                   tcg_passres, tcg_passres);
8510                 break;
8511             default:
8512                 g_assert_not_reached();
8513             }
8514
8515             if (opcode == 9 || opcode == 11) {
8516                 /* saturating accumulate ops */
8517                 if (accop < 0) {
8518                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8519                 }
8520                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8521                                                   tcg_res[pass], tcg_passres);
8522             } else if (accop > 0) {
8523                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8524             } else if (accop < 0) {
8525                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8526             }
8527
8528             if (accop != 0) {
8529                 tcg_temp_free_i64(tcg_passres);
8530             }
8531
8532             tcg_temp_free_i64(tcg_op1);
8533             tcg_temp_free_i64(tcg_op2);
8534         }
8535     } else {
8536         /* size 0 or 1, generally helper functions */
8537         for (pass = 0; pass < 2; pass++) {
8538             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8539             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8540             TCGv_i64 tcg_passres;
8541             int elt = pass + is_q * 2;
8542
8543             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8544             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8545
8546             if (accop == 0) {
8547                 tcg_passres = tcg_res[pass];
8548             } else {
8549                 tcg_passres = tcg_temp_new_i64();
8550             }
8551
8552             switch (opcode) {
8553             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8554             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8555             {
8556                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8557                 static NeonGenWidenFn * const widenfns[2][2] = {
8558                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8559                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8560                 };
8561                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8562
8563                 widenfn(tcg_op2_64, tcg_op2);
8564                 widenfn(tcg_passres, tcg_op1);
8565                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8566                               tcg_passres, tcg_op2_64);
8567                 tcg_temp_free_i64(tcg_op2_64);
8568                 break;
8569             }
8570             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8571             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8572                 if (size == 0) {
8573                     if (is_u) {
8574                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8575                     } else {
8576                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8577                     }
8578                 } else {
8579                     if (is_u) {
8580                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8581                     } else {
8582                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8583                     }
8584                 }
8585                 break;
8586             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8587             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8588             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8589                 if (size == 0) {
8590                     if (is_u) {
8591                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8592                     } else {
8593                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8594                     }
8595                 } else {
8596                     if (is_u) {
8597                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8598                     } else {
8599                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8600                     }
8601                 }
8602                 break;
8603             case 9: /* SQDMLAL, SQDMLAL2 */
8604             case 11: /* SQDMLSL, SQDMLSL2 */
8605             case 13: /* SQDMULL, SQDMULL2 */
8606                 assert(size == 1);
8607                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8608                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8609                                                   tcg_passres, tcg_passres);
8610                 break;
8611             case 14: /* PMULL */
8612                 assert(size == 0);
8613                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8614                 break;
8615             default:
8616                 g_assert_not_reached();
8617             }
8618             tcg_temp_free_i32(tcg_op1);
8619             tcg_temp_free_i32(tcg_op2);
8620
8621             if (accop != 0) {
8622                 if (opcode == 9 || opcode == 11) {
8623                     /* saturating accumulate ops */
8624                     if (accop < 0) {
8625                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8626                     }
8627                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8628                                                       tcg_res[pass],
8629                                                       tcg_passres);
8630                 } else {
8631                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8632                                   tcg_res[pass], tcg_passres);
8633                 }
8634                 tcg_temp_free_i64(tcg_passres);
8635             }
8636         }
8637     }
8638
8639     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8640     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8641     tcg_temp_free_i64(tcg_res[0]);
8642     tcg_temp_free_i64(tcg_res[1]);
8643 }
8644
8645 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8646                             int opcode, int rd, int rn, int rm)
8647 {
8648     TCGv_i64 tcg_res[2];
8649     int part = is_q ? 2 : 0;
8650     int pass;
8651
8652     for (pass = 0; pass < 2; pass++) {
8653         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8654         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8655         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8656         static NeonGenWidenFn * const widenfns[3][2] = {
8657             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8658             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8659             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8660         };
8661         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8662
8663         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8664         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8665         widenfn(tcg_op2_wide, tcg_op2);
8666         tcg_temp_free_i32(tcg_op2);
8667         tcg_res[pass] = tcg_temp_new_i64();
8668         gen_neon_addl(size, (opcode == 3),
8669                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8670         tcg_temp_free_i64(tcg_op1);
8671         tcg_temp_free_i64(tcg_op2_wide);
8672     }
8673
8674     for (pass = 0; pass < 2; pass++) {
8675         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8676         tcg_temp_free_i64(tcg_res[pass]);
8677     }
8678 }
8679
8680 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8681 {
8682     tcg_gen_addi_i64(in, in, 1U << 31);
8683     tcg_gen_extrh_i64_i32(res, in);
8684 }
8685
8686 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8687                                  int opcode, int rd, int rn, int rm)
8688 {
8689     TCGv_i32 tcg_res[2];
8690     int part = is_q ? 2 : 0;
8691     int pass;
8692
8693     for (pass = 0; pass < 2; pass++) {
8694         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8695         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8696         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8697         static NeonGenNarrowFn * const narrowfns[3][2] = {
8698             { gen_helper_neon_narrow_high_u8,
8699               gen_helper_neon_narrow_round_high_u8 },
8700             { gen_helper_neon_narrow_high_u16,
8701               gen_helper_neon_narrow_round_high_u16 },
8702             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8703         };
8704         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8705
8706         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8707         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8708
8709         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8710
8711         tcg_temp_free_i64(tcg_op1);
8712         tcg_temp_free_i64(tcg_op2);
8713
8714         tcg_res[pass] = tcg_temp_new_i32();
8715         gennarrow(tcg_res[pass], tcg_wideres);
8716         tcg_temp_free_i64(tcg_wideres);
8717     }
8718
8719     for (pass = 0; pass < 2; pass++) {
8720         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8721         tcg_temp_free_i32(tcg_res[pass]);
8722     }
8723     if (!is_q) {
8724         clear_vec_high(s, rd);
8725     }
8726 }
8727
8728 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8729 {
8730     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8731      * is the only three-reg-diff instruction which produces a
8732      * 128-bit wide result from a single operation. However since
8733      * it's possible to calculate the two halves more or less
8734      * separately we just use two helper calls.
8735      */
8736     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8737     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8738     TCGv_i64 tcg_res = tcg_temp_new_i64();
8739
8740     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8741     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8742     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8743     write_vec_element(s, tcg_res, rd, 0, MO_64);
8744     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8745     write_vec_element(s, tcg_res, rd, 1, MO_64);
8746
8747     tcg_temp_free_i64(tcg_op1);
8748     tcg_temp_free_i64(tcg_op2);
8749     tcg_temp_free_i64(tcg_res);
8750 }
8751
8752 /* C3.6.15 AdvSIMD three different
8753  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8754  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8755  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8756  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8757  */
8758 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8759 {
8760     /* Instructions in this group fall into three basic classes
8761      * (in each case with the operation working on each element in
8762      * the input vectors):
8763      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8764      *     128 bit input)
8765      * (2) wide 64 x 128 -> 128
8766      * (3) narrowing 128 x 128 -> 64
8767      * Here we do initial decode, catch unallocated cases and
8768      * dispatch to separate functions for each class.
8769      */
8770     int is_q = extract32(insn, 30, 1);
8771     int is_u = extract32(insn, 29, 1);
8772     int size = extract32(insn, 22, 2);
8773     int opcode = extract32(insn, 12, 4);
8774     int rm = extract32(insn, 16, 5);
8775     int rn = extract32(insn, 5, 5);
8776     int rd = extract32(insn, 0, 5);
8777
8778     switch (opcode) {
8779     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8780     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8781         /* 64 x 128 -> 128 */
8782         if (size == 3) {
8783             unallocated_encoding(s);
8784             return;
8785         }
8786         if (!fp_access_check(s)) {
8787             return;
8788         }
8789         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8790         break;
8791     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8792     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8793         /* 128 x 128 -> 64 */
8794         if (size == 3) {
8795             unallocated_encoding(s);
8796             return;
8797         }
8798         if (!fp_access_check(s)) {
8799             return;
8800         }
8801         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8802         break;
8803     case 14: /* PMULL, PMULL2 */
8804         if (is_u || size == 1 || size == 2) {
8805             unallocated_encoding(s);
8806             return;
8807         }
8808         if (size == 3) {
8809             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8810                 unallocated_encoding(s);
8811                 return;
8812             }
8813             if (!fp_access_check(s)) {
8814                 return;
8815             }
8816             handle_pmull_64(s, is_q, rd, rn, rm);
8817             return;
8818         }
8819         goto is_widening;
8820     case 9: /* SQDMLAL, SQDMLAL2 */
8821     case 11: /* SQDMLSL, SQDMLSL2 */
8822     case 13: /* SQDMULL, SQDMULL2 */
8823         if (is_u || size == 0) {
8824             unallocated_encoding(s);
8825             return;
8826         }
8827         /* fall through */
8828     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8829     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8830     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8831     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8832     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8833     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8834     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8835         /* 64 x 64 -> 128 */
8836         if (size == 3) {
8837             unallocated_encoding(s);
8838             return;
8839         }
8840     is_widening:
8841         if (!fp_access_check(s)) {
8842             return;
8843         }
8844
8845         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8846         break;
8847     default:
8848         /* opcode 15 not allocated */
8849         unallocated_encoding(s);
8850         break;
8851     }
8852 }
8853
8854 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8855 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8856 {
8857     int rd = extract32(insn, 0, 5);
8858     int rn = extract32(insn, 5, 5);
8859     int rm = extract32(insn, 16, 5);
8860     int size = extract32(insn, 22, 2);
8861     bool is_u = extract32(insn, 29, 1);
8862     bool is_q = extract32(insn, 30, 1);
8863     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8864     int pass;
8865
8866     if (!fp_access_check(s)) {
8867         return;
8868     }
8869
8870     tcg_op1 = tcg_temp_new_i64();
8871     tcg_op2 = tcg_temp_new_i64();
8872     tcg_res[0] = tcg_temp_new_i64();
8873     tcg_res[1] = tcg_temp_new_i64();
8874
8875     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8876         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8877         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8878
8879         if (!is_u) {
8880             switch (size) {
8881             case 0: /* AND */
8882                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8883                 break;
8884             case 1: /* BIC */
8885                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8886                 break;
8887             case 2: /* ORR */
8888                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8889                 break;
8890             case 3: /* ORN */
8891                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8892                 break;
8893             }
8894         } else {
8895             if (size != 0) {
8896                 /* B* ops need res loaded to operate on */
8897                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8898             }
8899
8900             switch (size) {
8901             case 0: /* EOR */
8902                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8903                 break;
8904             case 1: /* BSL bitwise select */
8905                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8906                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8907                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8908                 break;
8909             case 2: /* BIT, bitwise insert if true */
8910                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8911                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8912                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8913                 break;
8914             case 3: /* BIF, bitwise insert if false */
8915                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8916                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8917                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8918                 break;
8919             }
8920         }
8921     }
8922
8923     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8924     if (!is_q) {
8925         tcg_gen_movi_i64(tcg_res[1], 0);
8926     }
8927     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8928
8929     tcg_temp_free_i64(tcg_op1);
8930     tcg_temp_free_i64(tcg_op2);
8931     tcg_temp_free_i64(tcg_res[0]);
8932     tcg_temp_free_i64(tcg_res[1]);
8933 }
8934
8935 /* Helper functions for 32 bit comparisons */
8936 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8937 {
8938     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8939 }
8940
8941 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8942 {
8943     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8944 }
8945
8946 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8947 {
8948     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8949 }
8950
8951 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8952 {
8953     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8954 }
8955
8956 /* Pairwise op subgroup of C3.6.16.
8957  *
8958  * This is called directly or via the handle_3same_float for float pairwise
8959  * operations where the opcode and size are calculated differently.
8960  */
8961 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8962                                    int size, int rn, int rm, int rd)
8963 {
8964     TCGv_ptr fpst;
8965     int pass;
8966
8967     /* Floating point operations need fpst */
8968     if (opcode >= 0x58) {
8969         fpst = get_fpstatus_ptr();
8970     } else {
8971         TCGV_UNUSED_PTR(fpst);
8972     }
8973
8974     if (!fp_access_check(s)) {
8975         return;
8976     }
8977
8978     /* These operations work on the concatenated rm:rn, with each pair of
8979      * adjacent elements being operated on to produce an element in the result.
8980      */
8981     if (size == 3) {
8982         TCGv_i64 tcg_res[2];
8983
8984         for (pass = 0; pass < 2; pass++) {
8985             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8986             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8987             int passreg = (pass == 0) ? rn : rm;
8988
8989             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
8990             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
8991             tcg_res[pass] = tcg_temp_new_i64();
8992
8993             switch (opcode) {
8994             case 0x17: /* ADDP */
8995                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
8996                 break;
8997             case 0x58: /* FMAXNMP */
8998                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8999                 break;
9000             case 0x5a: /* FADDP */
9001                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9002                 break;
9003             case 0x5e: /* FMAXP */
9004                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9005                 break;
9006             case 0x78: /* FMINNMP */
9007                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9008                 break;
9009             case 0x7e: /* FMINP */
9010                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9011                 break;
9012             default:
9013                 g_assert_not_reached();
9014             }
9015
9016             tcg_temp_free_i64(tcg_op1);
9017             tcg_temp_free_i64(tcg_op2);
9018         }
9019
9020         for (pass = 0; pass < 2; pass++) {
9021             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9022             tcg_temp_free_i64(tcg_res[pass]);
9023         }
9024     } else {
9025         int maxpass = is_q ? 4 : 2;
9026         TCGv_i32 tcg_res[4];
9027
9028         for (pass = 0; pass < maxpass; pass++) {
9029             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9030             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9031             NeonGenTwoOpFn *genfn = NULL;
9032             int passreg = pass < (maxpass / 2) ? rn : rm;
9033             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9034
9035             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9036             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9037             tcg_res[pass] = tcg_temp_new_i32();
9038
9039             switch (opcode) {
9040             case 0x17: /* ADDP */
9041             {
9042                 static NeonGenTwoOpFn * const fns[3] = {
9043                     gen_helper_neon_padd_u8,
9044                     gen_helper_neon_padd_u16,
9045                     tcg_gen_add_i32,
9046                 };
9047                 genfn = fns[size];
9048                 break;
9049             }
9050             case 0x14: /* SMAXP, UMAXP */
9051             {
9052                 static NeonGenTwoOpFn * const fns[3][2] = {
9053                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9054                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9055                     { gen_max_s32, gen_max_u32 },
9056                 };
9057                 genfn = fns[size][u];
9058                 break;
9059             }
9060             case 0x15: /* SMINP, UMINP */
9061             {
9062                 static NeonGenTwoOpFn * const fns[3][2] = {
9063                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9064                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9065                     { gen_min_s32, gen_min_u32 },
9066                 };
9067                 genfn = fns[size][u];
9068                 break;
9069             }
9070             /* The FP operations are all on single floats (32 bit) */
9071             case 0x58: /* FMAXNMP */
9072                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9073                 break;
9074             case 0x5a: /* FADDP */
9075                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9076                 break;
9077             case 0x5e: /* FMAXP */
9078                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9079                 break;
9080             case 0x78: /* FMINNMP */
9081                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9082                 break;
9083             case 0x7e: /* FMINP */
9084                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9085                 break;
9086             default:
9087                 g_assert_not_reached();
9088             }
9089
9090             /* FP ops called directly, otherwise call now */
9091             if (genfn) {
9092                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9093             }
9094
9095             tcg_temp_free_i32(tcg_op1);
9096             tcg_temp_free_i32(tcg_op2);
9097         }
9098
9099         for (pass = 0; pass < maxpass; pass++) {
9100             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9101             tcg_temp_free_i32(tcg_res[pass]);
9102         }
9103         if (!is_q) {
9104             clear_vec_high(s, rd);
9105         }
9106     }
9107
9108     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9109         tcg_temp_free_ptr(fpst);
9110     }
9111 }
9112
9113 /* Floating point op subgroup of C3.6.16. */
9114 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9115 {
9116     /* For floating point ops, the U, size[1] and opcode bits
9117      * together indicate the operation. size[0] indicates single
9118      * or double.
9119      */
9120     int fpopcode = extract32(insn, 11, 5)
9121         | (extract32(insn, 23, 1) << 5)
9122         | (extract32(insn, 29, 1) << 6);
9123     int is_q = extract32(insn, 30, 1);
9124     int size = extract32(insn, 22, 1);
9125     int rm = extract32(insn, 16, 5);
9126     int rn = extract32(insn, 5, 5);
9127     int rd = extract32(insn, 0, 5);
9128
9129     int datasize = is_q ? 128 : 64;
9130     int esize = 32 << size;
9131     int elements = datasize / esize;
9132
9133     if (size == 1 && !is_q) {
9134         unallocated_encoding(s);
9135         return;
9136     }
9137
9138     switch (fpopcode) {
9139     case 0x58: /* FMAXNMP */
9140     case 0x5a: /* FADDP */
9141     case 0x5e: /* FMAXP */
9142     case 0x78: /* FMINNMP */
9143     case 0x7e: /* FMINP */
9144         if (size && !is_q) {
9145             unallocated_encoding(s);
9146             return;
9147         }
9148         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9149                                rn, rm, rd);
9150         return;
9151     case 0x1b: /* FMULX */
9152     case 0x1f: /* FRECPS */
9153     case 0x3f: /* FRSQRTS */
9154     case 0x5d: /* FACGE */
9155     case 0x7d: /* FACGT */
9156     case 0x19: /* FMLA */
9157     case 0x39: /* FMLS */
9158     case 0x18: /* FMAXNM */
9159     case 0x1a: /* FADD */
9160     case 0x1c: /* FCMEQ */
9161     case 0x1e: /* FMAX */
9162     case 0x38: /* FMINNM */
9163     case 0x3a: /* FSUB */
9164     case 0x3e: /* FMIN */
9165     case 0x5b: /* FMUL */
9166     case 0x5c: /* FCMGE */
9167     case 0x5f: /* FDIV */
9168     case 0x7a: /* FABD */
9169     case 0x7c: /* FCMGT */
9170         if (!fp_access_check(s)) {
9171             return;
9172         }
9173
9174         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9175         return;
9176     default:
9177         unallocated_encoding(s);
9178         return;
9179     }
9180 }
9181
9182 /* Integer op subgroup of C3.6.16. */
9183 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9184 {
9185     int is_q = extract32(insn, 30, 1);
9186     int u = extract32(insn, 29, 1);
9187     int size = extract32(insn, 22, 2);
9188     int opcode = extract32(insn, 11, 5);
9189     int rm = extract32(insn, 16, 5);
9190     int rn = extract32(insn, 5, 5);
9191     int rd = extract32(insn, 0, 5);
9192     int pass;
9193
9194     switch (opcode) {
9195     case 0x13: /* MUL, PMUL */
9196         if (u && size != 0) {
9197             unallocated_encoding(s);
9198             return;
9199         }
9200         /* fall through */
9201     case 0x0: /* SHADD, UHADD */
9202     case 0x2: /* SRHADD, URHADD */
9203     case 0x4: /* SHSUB, UHSUB */
9204     case 0xc: /* SMAX, UMAX */
9205     case 0xd: /* SMIN, UMIN */
9206     case 0xe: /* SABD, UABD */
9207     case 0xf: /* SABA, UABA */
9208     case 0x12: /* MLA, MLS */
9209         if (size == 3) {
9210             unallocated_encoding(s);
9211             return;
9212         }
9213         break;
9214     case 0x16: /* SQDMULH, SQRDMULH */
9215         if (size == 0 || size == 3) {
9216             unallocated_encoding(s);
9217             return;
9218         }
9219         break;
9220     default:
9221         if (size == 3 && !is_q) {
9222             unallocated_encoding(s);
9223             return;
9224         }
9225         break;
9226     }
9227
9228     if (!fp_access_check(s)) {
9229         return;
9230     }
9231
9232     if (size == 3) {
9233         assert(is_q);
9234         for (pass = 0; pass < 2; pass++) {
9235             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9236             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9237             TCGv_i64 tcg_res = tcg_temp_new_i64();
9238
9239             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9240             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9241
9242             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9243
9244             write_vec_element(s, tcg_res, rd, pass, MO_64);
9245
9246             tcg_temp_free_i64(tcg_res);
9247             tcg_temp_free_i64(tcg_op1);
9248             tcg_temp_free_i64(tcg_op2);
9249         }
9250     } else {
9251         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9252             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9253             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9254             TCGv_i32 tcg_res = tcg_temp_new_i32();
9255             NeonGenTwoOpFn *genfn = NULL;
9256             NeonGenTwoOpEnvFn *genenvfn = NULL;
9257
9258             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9259             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9260
9261             switch (opcode) {
9262             case 0x0: /* SHADD, UHADD */
9263             {
9264                 static NeonGenTwoOpFn * const fns[3][2] = {
9265                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9266                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9267                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9268                 };
9269                 genfn = fns[size][u];
9270                 break;
9271             }
9272             case 0x1: /* SQADD, UQADD */
9273             {
9274                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9275                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9276                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9277                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9278                 };
9279                 genenvfn = fns[size][u];
9280                 break;
9281             }
9282             case 0x2: /* SRHADD, URHADD */
9283             {
9284                 static NeonGenTwoOpFn * const fns[3][2] = {
9285                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9286                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9287                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9288                 };
9289                 genfn = fns[size][u];
9290                 break;
9291             }
9292             case 0x4: /* SHSUB, UHSUB */
9293             {
9294                 static NeonGenTwoOpFn * const fns[3][2] = {
9295                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9296                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9297                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9298                 };
9299                 genfn = fns[size][u];
9300                 break;
9301             }
9302             case 0x5: /* SQSUB, UQSUB */
9303             {
9304                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9305                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9306                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9307                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9308                 };
9309                 genenvfn = fns[size][u];
9310                 break;
9311             }
9312             case 0x6: /* CMGT, CMHI */
9313             {
9314                 static NeonGenTwoOpFn * const fns[3][2] = {
9315                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9316                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9317                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9318                 };
9319                 genfn = fns[size][u];
9320                 break;
9321             }
9322             case 0x7: /* CMGE, CMHS */
9323             {
9324                 static NeonGenTwoOpFn * const fns[3][2] = {
9325                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9326                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9327                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9328                 };
9329                 genfn = fns[size][u];
9330                 break;
9331             }
9332             case 0x8: /* SSHL, USHL */
9333             {
9334                 static NeonGenTwoOpFn * const fns[3][2] = {
9335                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9336                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9337                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9338                 };
9339                 genfn = fns[size][u];
9340                 break;
9341             }
9342             case 0x9: /* SQSHL, UQSHL */
9343             {
9344                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9345                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9346                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9347                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9348                 };
9349                 genenvfn = fns[size][u];
9350                 break;
9351             }
9352             case 0xa: /* SRSHL, URSHL */
9353             {
9354                 static NeonGenTwoOpFn * const fns[3][2] = {
9355                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9356                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9357                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9358                 };
9359                 genfn = fns[size][u];
9360                 break;
9361             }
9362             case 0xb: /* SQRSHL, UQRSHL */
9363             {
9364                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9365                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9366                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9367                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9368                 };
9369                 genenvfn = fns[size][u];
9370                 break;
9371             }
9372             case 0xc: /* SMAX, UMAX */
9373             {
9374                 static NeonGenTwoOpFn * const fns[3][2] = {
9375                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9376                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9377                     { gen_max_s32, gen_max_u32 },
9378                 };
9379                 genfn = fns[size][u];
9380                 break;
9381             }
9382
9383             case 0xd: /* SMIN, UMIN */
9384             {
9385                 static NeonGenTwoOpFn * const fns[3][2] = {
9386                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9387                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9388                     { gen_min_s32, gen_min_u32 },
9389                 };
9390                 genfn = fns[size][u];
9391                 break;
9392             }
9393             case 0xe: /* SABD, UABD */
9394             case 0xf: /* SABA, UABA */
9395             {
9396                 static NeonGenTwoOpFn * const fns[3][2] = {
9397                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9398                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9399                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9400                 };
9401                 genfn = fns[size][u];
9402                 break;
9403             }
9404             case 0x10: /* ADD, SUB */
9405             {
9406                 static NeonGenTwoOpFn * const fns[3][2] = {
9407                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9408                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9409                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9410                 };
9411                 genfn = fns[size][u];
9412                 break;
9413             }
9414             case 0x11: /* CMTST, CMEQ */
9415             {
9416                 static NeonGenTwoOpFn * const fns[3][2] = {
9417                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9418                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9419                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9420                 };
9421                 genfn = fns[size][u];
9422                 break;
9423             }
9424             case 0x13: /* MUL, PMUL */
9425                 if (u) {
9426                     /* PMUL */
9427                     assert(size == 0);
9428                     genfn = gen_helper_neon_mul_p8;
9429                     break;
9430                 }
9431                 /* fall through : MUL */
9432             case 0x12: /* MLA, MLS */
9433             {
9434                 static NeonGenTwoOpFn * const fns[3] = {
9435                     gen_helper_neon_mul_u8,
9436                     gen_helper_neon_mul_u16,
9437                     tcg_gen_mul_i32,
9438                 };
9439                 genfn = fns[size];
9440                 break;
9441             }
9442             case 0x16: /* SQDMULH, SQRDMULH */
9443             {
9444                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9445                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9446                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9447                 };
9448                 assert(size == 1 || size == 2);
9449                 genenvfn = fns[size - 1][u];
9450                 break;
9451             }
9452             default:
9453                 g_assert_not_reached();
9454             }
9455
9456             if (genenvfn) {
9457                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9458             } else {
9459                 genfn(tcg_res, tcg_op1, tcg_op2);
9460             }
9461
9462             if (opcode == 0xf || opcode == 0x12) {
9463                 /* SABA, UABA, MLA, MLS: accumulating ops */
9464                 static NeonGenTwoOpFn * const fns[3][2] = {
9465                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9466                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9467                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9468                 };
9469                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9470
9471                 genfn = fns[size][is_sub];
9472                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9473                 genfn(tcg_res, tcg_op1, tcg_res);
9474             }
9475
9476             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9477
9478             tcg_temp_free_i32(tcg_res);
9479             tcg_temp_free_i32(tcg_op1);
9480             tcg_temp_free_i32(tcg_op2);
9481         }
9482     }
9483
9484     if (!is_q) {
9485         clear_vec_high(s, rd);
9486     }
9487 }
9488
9489 /* C3.6.16 AdvSIMD three same
9490  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9491  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9492  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9493  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9494  */
9495 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9496 {
9497     int opcode = extract32(insn, 11, 5);
9498
9499     switch (opcode) {
9500     case 0x3: /* logic ops */
9501         disas_simd_3same_logic(s, insn);
9502         break;
9503     case 0x17: /* ADDP */
9504     case 0x14: /* SMAXP, UMAXP */
9505     case 0x15: /* SMINP, UMINP */
9506     {
9507         /* Pairwise operations */
9508         int is_q = extract32(insn, 30, 1);
9509         int u = extract32(insn, 29, 1);
9510         int size = extract32(insn, 22, 2);
9511         int rm = extract32(insn, 16, 5);
9512         int rn = extract32(insn, 5, 5);
9513         int rd = extract32(insn, 0, 5);
9514         if (opcode == 0x17) {
9515             if (u || (size == 3 && !is_q)) {
9516                 unallocated_encoding(s);
9517                 return;
9518             }
9519         } else {
9520             if (size == 3) {
9521                 unallocated_encoding(s);
9522                 return;
9523             }
9524         }
9525         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9526         break;
9527     }
9528     case 0x18 ... 0x31:
9529         /* floating point ops, sz[1] and U are part of opcode */
9530         disas_simd_3same_float(s, insn);
9531         break;
9532     default:
9533         disas_simd_3same_int(s, insn);
9534         break;
9535     }
9536 }
9537
9538 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9539                                   int size, int rn, int rd)
9540 {
9541     /* Handle 2-reg-misc ops which are widening (so each size element
9542      * in the source becomes a 2*size element in the destination.
9543      * The only instruction like this is FCVTL.
9544      */
9545     int pass;
9546
9547     if (size == 3) {
9548         /* 32 -> 64 bit fp conversion */
9549         TCGv_i64 tcg_res[2];
9550         int srcelt = is_q ? 2 : 0;
9551
9552         for (pass = 0; pass < 2; pass++) {
9553             TCGv_i32 tcg_op = tcg_temp_new_i32();
9554             tcg_res[pass] = tcg_temp_new_i64();
9555
9556             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9557             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9558             tcg_temp_free_i32(tcg_op);
9559         }
9560         for (pass = 0; pass < 2; pass++) {
9561             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9562             tcg_temp_free_i64(tcg_res[pass]);
9563         }
9564     } else {
9565         /* 16 -> 32 bit fp conversion */
9566         int srcelt = is_q ? 4 : 0;
9567         TCGv_i32 tcg_res[4];
9568
9569         for (pass = 0; pass < 4; pass++) {
9570             tcg_res[pass] = tcg_temp_new_i32();
9571
9572             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9573             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9574                                            cpu_env);
9575         }
9576         for (pass = 0; pass < 4; pass++) {
9577             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9578             tcg_temp_free_i32(tcg_res[pass]);
9579         }
9580     }
9581 }
9582
9583 static void handle_rev(DisasContext *s, int opcode, bool u,
9584                        bool is_q, int size, int rn, int rd)
9585 {
9586     int op = (opcode << 1) | u;
9587     int opsz = op + size;
9588     int grp_size = 3 - opsz;
9589     int dsize = is_q ? 128 : 64;
9590     int i;
9591
9592     if (opsz >= 3) {
9593         unallocated_encoding(s);
9594         return;
9595     }
9596
9597     if (!fp_access_check(s)) {
9598         return;
9599     }
9600
9601     if (size == 0) {
9602         /* Special case bytes, use bswap op on each group of elements */
9603         int groups = dsize / (8 << grp_size);
9604
9605         for (i = 0; i < groups; i++) {
9606             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9607
9608             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9609             switch (grp_size) {
9610             case MO_16:
9611                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9612                 break;
9613             case MO_32:
9614                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9615                 break;
9616             case MO_64:
9617                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9618                 break;
9619             default:
9620                 g_assert_not_reached();
9621             }
9622             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9623             tcg_temp_free_i64(tcg_tmp);
9624         }
9625         if (!is_q) {
9626             clear_vec_high(s, rd);
9627         }
9628     } else {
9629         int revmask = (1 << grp_size) - 1;
9630         int esize = 8 << size;
9631         int elements = dsize / esize;
9632         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9633         TCGv_i64 tcg_rd = tcg_const_i64(0);
9634         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9635
9636         for (i = 0; i < elements; i++) {
9637             int e_rev = (i & 0xf) ^ revmask;
9638             int off = e_rev * esize;
9639             read_vec_element(s, tcg_rn, rn, i, size);
9640             if (off >= 64) {
9641                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9642                                     tcg_rn, off - 64, esize);
9643             } else {
9644                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9645             }
9646         }
9647         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9648         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9649
9650         tcg_temp_free_i64(tcg_rd_hi);
9651         tcg_temp_free_i64(tcg_rd);
9652         tcg_temp_free_i64(tcg_rn);
9653     }
9654 }
9655
9656 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9657                                   bool is_q, int size, int rn, int rd)
9658 {
9659     /* Implement the pairwise operations from 2-misc:
9660      * SADDLP, UADDLP, SADALP, UADALP.
9661      * These all add pairs of elements in the input to produce a
9662      * double-width result element in the output (possibly accumulating).
9663      */
9664     bool accum = (opcode == 0x6);
9665     int maxpass = is_q ? 2 : 1;
9666     int pass;
9667     TCGv_i64 tcg_res[2];
9668
9669     if (size == 2) {
9670         /* 32 + 32 -> 64 op */
9671         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9672
9673         for (pass = 0; pass < maxpass; pass++) {
9674             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9675             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9676
9677             tcg_res[pass] = tcg_temp_new_i64();
9678
9679             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9680             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9681             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9682             if (accum) {
9683                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9684                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9685             }
9686
9687             tcg_temp_free_i64(tcg_op1);
9688             tcg_temp_free_i64(tcg_op2);
9689         }
9690     } else {
9691         for (pass = 0; pass < maxpass; pass++) {
9692             TCGv_i64 tcg_op = tcg_temp_new_i64();
9693             NeonGenOneOpFn *genfn;
9694             static NeonGenOneOpFn * const fns[2][2] = {
9695                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9696                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9697             };
9698
9699             genfn = fns[size][u];
9700
9701             tcg_res[pass] = tcg_temp_new_i64();
9702
9703             read_vec_element(s, tcg_op, rn, pass, MO_64);
9704             genfn(tcg_res[pass], tcg_op);
9705
9706             if (accum) {
9707                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9708                 if (size == 0) {
9709                     gen_helper_neon_addl_u16(tcg_res[pass],
9710                                              tcg_res[pass], tcg_op);
9711                 } else {
9712                     gen_helper_neon_addl_u32(tcg_res[pass],
9713                                              tcg_res[pass], tcg_op);
9714                 }
9715             }
9716             tcg_temp_free_i64(tcg_op);
9717         }
9718     }
9719     if (!is_q) {
9720         tcg_res[1] = tcg_const_i64(0);
9721     }
9722     for (pass = 0; pass < 2; pass++) {
9723         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9724         tcg_temp_free_i64(tcg_res[pass]);
9725     }
9726 }
9727
9728 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9729 {
9730     /* Implement SHLL and SHLL2 */
9731     int pass;
9732     int part = is_q ? 2 : 0;
9733     TCGv_i64 tcg_res[2];
9734
9735     for (pass = 0; pass < 2; pass++) {
9736         static NeonGenWidenFn * const widenfns[3] = {
9737             gen_helper_neon_widen_u8,
9738             gen_helper_neon_widen_u16,
9739             tcg_gen_extu_i32_i64,
9740         };
9741         NeonGenWidenFn *widenfn = widenfns[size];
9742         TCGv_i32 tcg_op = tcg_temp_new_i32();
9743
9744         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9745         tcg_res[pass] = tcg_temp_new_i64();
9746         widenfn(tcg_res[pass], tcg_op);
9747         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9748
9749         tcg_temp_free_i32(tcg_op);
9750     }
9751
9752     for (pass = 0; pass < 2; pass++) {
9753         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9754         tcg_temp_free_i64(tcg_res[pass]);
9755     }
9756 }
9757
9758 /* C3.6.17 AdvSIMD two reg misc
9759  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9760  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9761  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9762  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9763  */
9764 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9765 {
9766     int size = extract32(insn, 22, 2);
9767     int opcode = extract32(insn, 12, 5);
9768     bool u = extract32(insn, 29, 1);
9769     bool is_q = extract32(insn, 30, 1);
9770     int rn = extract32(insn, 5, 5);
9771     int rd = extract32(insn, 0, 5);
9772     bool need_fpstatus = false;
9773     bool need_rmode = false;
9774     int rmode = -1;
9775     TCGv_i32 tcg_rmode;
9776     TCGv_ptr tcg_fpstatus;
9777
9778     switch (opcode) {
9779     case 0x0: /* REV64, REV32 */
9780     case 0x1: /* REV16 */
9781         handle_rev(s, opcode, u, is_q, size, rn, rd);
9782         return;
9783     case 0x5: /* CNT, NOT, RBIT */
9784         if (u && size == 0) {
9785             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9786             size = 3;
9787             break;
9788         } else if (u && size == 1) {
9789             /* RBIT */
9790             break;
9791         } else if (!u && size == 0) {
9792             /* CNT */
9793             break;
9794         }
9795         unallocated_encoding(s);
9796         return;
9797     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9798     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9799         if (size == 3) {
9800             unallocated_encoding(s);
9801             return;
9802         }
9803         if (!fp_access_check(s)) {
9804             return;
9805         }
9806
9807         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9808         return;
9809     case 0x4: /* CLS, CLZ */
9810         if (size == 3) {
9811             unallocated_encoding(s);
9812             return;
9813         }
9814         break;
9815     case 0x2: /* SADDLP, UADDLP */
9816     case 0x6: /* SADALP, UADALP */
9817         if (size == 3) {
9818             unallocated_encoding(s);
9819             return;
9820         }
9821         if (!fp_access_check(s)) {
9822             return;
9823         }
9824         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9825         return;
9826     case 0x13: /* SHLL, SHLL2 */
9827         if (u == 0 || size == 3) {
9828             unallocated_encoding(s);
9829             return;
9830         }
9831         if (!fp_access_check(s)) {
9832             return;
9833         }
9834         handle_shll(s, is_q, size, rn, rd);
9835         return;
9836     case 0xa: /* CMLT */
9837         if (u == 1) {
9838             unallocated_encoding(s);
9839             return;
9840         }
9841         /* fall through */
9842     case 0x8: /* CMGT, CMGE */
9843     case 0x9: /* CMEQ, CMLE */
9844     case 0xb: /* ABS, NEG */
9845         if (size == 3 && !is_q) {
9846             unallocated_encoding(s);
9847             return;
9848         }
9849         break;
9850     case 0x3: /* SUQADD, USQADD */
9851         if (size == 3 && !is_q) {
9852             unallocated_encoding(s);
9853             return;
9854         }
9855         if (!fp_access_check(s)) {
9856             return;
9857         }
9858         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9859         return;
9860     case 0x7: /* SQABS, SQNEG */
9861         if (size == 3 && !is_q) {
9862             unallocated_encoding(s);
9863             return;
9864         }
9865         break;
9866     case 0xc ... 0xf:
9867     case 0x16 ... 0x1d:
9868     case 0x1f:
9869     {
9870         /* Floating point: U, size[1] and opcode indicate operation;
9871          * size[0] indicates single or double precision.
9872          */
9873         int is_double = extract32(size, 0, 1);
9874         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9875         size = is_double ? 3 : 2;
9876         switch (opcode) {
9877         case 0x2f: /* FABS */
9878         case 0x6f: /* FNEG */
9879             if (size == 3 && !is_q) {
9880                 unallocated_encoding(s);
9881                 return;
9882             }
9883             break;
9884         case 0x1d: /* SCVTF */
9885         case 0x5d: /* UCVTF */
9886         {
9887             bool is_signed = (opcode == 0x1d) ? true : false;
9888             int elements = is_double ? 2 : is_q ? 4 : 2;
9889             if (is_double && !is_q) {
9890                 unallocated_encoding(s);
9891                 return;
9892             }
9893             if (!fp_access_check(s)) {
9894                 return;
9895             }
9896             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9897             return;
9898         }
9899         case 0x2c: /* FCMGT (zero) */
9900         case 0x2d: /* FCMEQ (zero) */
9901         case 0x2e: /* FCMLT (zero) */
9902         case 0x6c: /* FCMGE (zero) */
9903         case 0x6d: /* FCMLE (zero) */
9904             if (size == 3 && !is_q) {
9905                 unallocated_encoding(s);
9906                 return;
9907             }
9908             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9909             return;
9910         case 0x7f: /* FSQRT */
9911             if (size == 3 && !is_q) {
9912                 unallocated_encoding(s);
9913                 return;
9914             }
9915             break;
9916         case 0x1a: /* FCVTNS */
9917         case 0x1b: /* FCVTMS */
9918         case 0x3a: /* FCVTPS */
9919         case 0x3b: /* FCVTZS */
9920         case 0x5a: /* FCVTNU */
9921         case 0x5b: /* FCVTMU */
9922         case 0x7a: /* FCVTPU */
9923         case 0x7b: /* FCVTZU */
9924             need_fpstatus = true;
9925             need_rmode = true;
9926             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9927             if (size == 3 && !is_q) {
9928                 unallocated_encoding(s);
9929                 return;
9930             }
9931             break;
9932         case 0x5c: /* FCVTAU */
9933         case 0x1c: /* FCVTAS */
9934             need_fpstatus = true;
9935             need_rmode = true;
9936             rmode = FPROUNDING_TIEAWAY;
9937             if (size == 3 && !is_q) {
9938                 unallocated_encoding(s);
9939                 return;
9940             }
9941             break;
9942         case 0x3c: /* URECPE */
9943             if (size == 3) {
9944                 unallocated_encoding(s);
9945                 return;
9946             }
9947             /* fall through */
9948         case 0x3d: /* FRECPE */
9949         case 0x7d: /* FRSQRTE */
9950             if (size == 3 && !is_q) {
9951                 unallocated_encoding(s);
9952                 return;
9953             }
9954             if (!fp_access_check(s)) {
9955                 return;
9956             }
9957             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9958             return;
9959         case 0x56: /* FCVTXN, FCVTXN2 */
9960             if (size == 2) {
9961                 unallocated_encoding(s);
9962                 return;
9963             }
9964             /* fall through */
9965         case 0x16: /* FCVTN, FCVTN2 */
9966             /* handle_2misc_narrow does a 2*size -> size operation, but these
9967              * instructions encode the source size rather than dest size.
9968              */
9969             if (!fp_access_check(s)) {
9970                 return;
9971             }
9972             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9973             return;
9974         case 0x17: /* FCVTL, FCVTL2 */
9975             if (!fp_access_check(s)) {
9976                 return;
9977             }
9978             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9979             return;
9980         case 0x18: /* FRINTN */
9981         case 0x19: /* FRINTM */
9982         case 0x38: /* FRINTP */
9983         case 0x39: /* FRINTZ */
9984             need_rmode = true;
9985             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9986             /* fall through */
9987         case 0x59: /* FRINTX */
9988         case 0x79: /* FRINTI */
9989             need_fpstatus = true;
9990             if (size == 3 && !is_q) {
9991                 unallocated_encoding(s);
9992                 return;
9993             }
9994             break;
9995         case 0x58: /* FRINTA */
9996             need_rmode = true;
9997             rmode = FPROUNDING_TIEAWAY;
9998             need_fpstatus = true;
9999             if (size == 3 && !is_q) {
10000                 unallocated_encoding(s);
10001                 return;
10002             }
10003             break;
10004         case 0x7c: /* URSQRTE */
10005             if (size == 3) {
10006                 unallocated_encoding(s);
10007                 return;
10008             }
10009             need_fpstatus = true;
10010             break;
10011         default:
10012             unallocated_encoding(s);
10013             return;
10014         }
10015         break;
10016     }
10017     default:
10018         unallocated_encoding(s);
10019         return;
10020     }
10021
10022     if (!fp_access_check(s)) {
10023         return;
10024     }
10025
10026     if (need_fpstatus) {
10027         tcg_fpstatus = get_fpstatus_ptr();
10028     } else {
10029         TCGV_UNUSED_PTR(tcg_fpstatus);
10030     }
10031     if (need_rmode) {
10032         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10033         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10034     } else {
10035         TCGV_UNUSED_I32(tcg_rmode);
10036     }
10037
10038     if (size == 3) {
10039         /* All 64-bit element operations can be shared with scalar 2misc */
10040         int pass;
10041
10042         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10043             TCGv_i64 tcg_op = tcg_temp_new_i64();
10044             TCGv_i64 tcg_res = tcg_temp_new_i64();
10045
10046             read_vec_element(s, tcg_op, rn, pass, MO_64);
10047
10048             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10049                             tcg_rmode, tcg_fpstatus);
10050
10051             write_vec_element(s, tcg_res, rd, pass, MO_64);
10052
10053             tcg_temp_free_i64(tcg_res);
10054             tcg_temp_free_i64(tcg_op);
10055         }
10056     } else {
10057         int pass;
10058
10059         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10060             TCGv_i32 tcg_op = tcg_temp_new_i32();
10061             TCGv_i32 tcg_res = tcg_temp_new_i32();
10062             TCGCond cond;
10063
10064             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10065
10066             if (size == 2) {
10067                 /* Special cases for 32 bit elements */
10068                 switch (opcode) {
10069                 case 0xa: /* CMLT */
10070                     /* 32 bit integer comparison against zero, result is
10071                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10072                      * and inverting.
10073                      */
10074                     cond = TCG_COND_LT;
10075                 do_cmop:
10076                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10077                     tcg_gen_neg_i32(tcg_res, tcg_res);
10078                     break;
10079                 case 0x8: /* CMGT, CMGE */
10080                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10081                     goto do_cmop;
10082                 case 0x9: /* CMEQ, CMLE */
10083                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10084                     goto do_cmop;
10085                 case 0x4: /* CLS */
10086                     if (u) {
10087                         gen_helper_clz32(tcg_res, tcg_op);
10088                     } else {
10089                         gen_helper_cls32(tcg_res, tcg_op);
10090                     }
10091                     break;
10092                 case 0x7: /* SQABS, SQNEG */
10093                     if (u) {
10094                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10095                     } else {
10096                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10097                     }
10098                     break;
10099                 case 0xb: /* ABS, NEG */
10100                     if (u) {
10101                         tcg_gen_neg_i32(tcg_res, tcg_op);
10102                     } else {
10103                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10104                         tcg_gen_neg_i32(tcg_res, tcg_op);
10105                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10106                                             tcg_zero, tcg_op, tcg_res);
10107                         tcg_temp_free_i32(tcg_zero);
10108                     }
10109                     break;
10110                 case 0x2f: /* FABS */
10111                     gen_helper_vfp_abss(tcg_res, tcg_op);
10112                     break;
10113                 case 0x6f: /* FNEG */
10114                     gen_helper_vfp_negs(tcg_res, tcg_op);
10115                     break;
10116                 case 0x7f: /* FSQRT */
10117                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10118                     break;
10119                 case 0x1a: /* FCVTNS */
10120                 case 0x1b: /* FCVTMS */
10121                 case 0x1c: /* FCVTAS */
10122                 case 0x3a: /* FCVTPS */
10123                 case 0x3b: /* FCVTZS */
10124                 {
10125                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10126                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10127                                          tcg_shift, tcg_fpstatus);
10128                     tcg_temp_free_i32(tcg_shift);
10129                     break;
10130                 }
10131                 case 0x5a: /* FCVTNU */
10132                 case 0x5b: /* FCVTMU */
10133                 case 0x5c: /* FCVTAU */
10134                 case 0x7a: /* FCVTPU */
10135                 case 0x7b: /* FCVTZU */
10136                 {
10137                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10138                     gen_helper_vfp_touls(tcg_res, tcg_op,
10139                                          tcg_shift, tcg_fpstatus);
10140                     tcg_temp_free_i32(tcg_shift);
10141                     break;
10142                 }
10143                 case 0x18: /* FRINTN */
10144                 case 0x19: /* FRINTM */
10145                 case 0x38: /* FRINTP */
10146                 case 0x39: /* FRINTZ */
10147                 case 0x58: /* FRINTA */
10148                 case 0x79: /* FRINTI */
10149                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10150                     break;
10151                 case 0x59: /* FRINTX */
10152                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10153                     break;
10154                 case 0x7c: /* URSQRTE */
10155                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10156                     break;
10157                 default:
10158                     g_assert_not_reached();
10159                 }
10160             } else {
10161                 /* Use helpers for 8 and 16 bit elements */
10162                 switch (opcode) {
10163                 case 0x5: /* CNT, RBIT */
10164                     /* For these two insns size is part of the opcode specifier
10165                      * (handled earlier); they always operate on byte elements.
10166                      */
10167                     if (u) {
10168                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10169                     } else {
10170                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10171                     }
10172                     break;
10173                 case 0x7: /* SQABS, SQNEG */
10174                 {
10175                     NeonGenOneOpEnvFn *genfn;
10176                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10177                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10178                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10179                     };
10180                     genfn = fns[size][u];
10181                     genfn(tcg_res, cpu_env, tcg_op);
10182                     break;
10183                 }
10184                 case 0x8: /* CMGT, CMGE */
10185                 case 0x9: /* CMEQ, CMLE */
10186                 case 0xa: /* CMLT */
10187                 {
10188                     static NeonGenTwoOpFn * const fns[3][2] = {
10189                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10190                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10191                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10192                     };
10193                     NeonGenTwoOpFn *genfn;
10194                     int comp;
10195                     bool reverse;
10196                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10197
10198                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10199                     comp = (opcode - 0x8) * 2 + u;
10200                     /* ...but LE, LT are implemented as reverse GE, GT */
10201                     reverse = (comp > 2);
10202                     if (reverse) {
10203                         comp = 4 - comp;
10204                     }
10205                     genfn = fns[comp][size];
10206                     if (reverse) {
10207                         genfn(tcg_res, tcg_zero, tcg_op);
10208                     } else {
10209                         genfn(tcg_res, tcg_op, tcg_zero);
10210                     }
10211                     tcg_temp_free_i32(tcg_zero);
10212                     break;
10213                 }
10214                 case 0xb: /* ABS, NEG */
10215                     if (u) {
10216                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10217                         if (size) {
10218                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10219                         } else {
10220                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10221                         }
10222                         tcg_temp_free_i32(tcg_zero);
10223                     } else {
10224                         if (size) {
10225                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10226                         } else {
10227                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10228                         }
10229                     }
10230                     break;
10231                 case 0x4: /* CLS, CLZ */
10232                     if (u) {
10233                         if (size == 0) {
10234                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10235                         } else {
10236                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10237                         }
10238                     } else {
10239                         if (size == 0) {
10240                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10241                         } else {
10242                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10243                         }
10244                     }
10245                     break;
10246                 default:
10247                     g_assert_not_reached();
10248                 }
10249             }
10250
10251             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10252
10253             tcg_temp_free_i32(tcg_res);
10254             tcg_temp_free_i32(tcg_op);
10255         }
10256     }
10257     if (!is_q) {
10258         clear_vec_high(s, rd);
10259     }
10260
10261     if (need_rmode) {
10262         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10263         tcg_temp_free_i32(tcg_rmode);
10264     }
10265     if (need_fpstatus) {
10266         tcg_temp_free_ptr(tcg_fpstatus);
10267     }
10268 }
10269
10270 /* C3.6.13 AdvSIMD scalar x indexed element
10271  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10272  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10273  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10274  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10275  * C3.6.18 AdvSIMD vector x indexed element
10276  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10277  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10278  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10279  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10280  */
10281 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10282 {
10283     /* This encoding has two kinds of instruction:
10284      *  normal, where we perform elt x idxelt => elt for each
10285      *     element in the vector
10286      *  long, where we perform elt x idxelt and generate a result of
10287      *     double the width of the input element
10288      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10289      */
10290     bool is_scalar = extract32(insn, 28, 1);
10291     bool is_q = extract32(insn, 30, 1);
10292     bool u = extract32(insn, 29, 1);
10293     int size = extract32(insn, 22, 2);
10294     int l = extract32(insn, 21, 1);
10295     int m = extract32(insn, 20, 1);
10296     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10297     int rm = extract32(insn, 16, 4);
10298     int opcode = extract32(insn, 12, 4);
10299     int h = extract32(insn, 11, 1);
10300     int rn = extract32(insn, 5, 5);
10301     int rd = extract32(insn, 0, 5);
10302     bool is_long = false;
10303     bool is_fp = false;
10304     int index;
10305     TCGv_ptr fpst;
10306
10307     switch (opcode) {
10308     case 0x0: /* MLA */
10309     case 0x4: /* MLS */
10310         if (!u || is_scalar) {
10311             unallocated_encoding(s);
10312             return;
10313         }
10314         break;
10315     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10316     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10317     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10318         if (is_scalar) {
10319             unallocated_encoding(s);
10320             return;
10321         }
10322         is_long = true;
10323         break;
10324     case 0x3: /* SQDMLAL, SQDMLAL2 */
10325     case 0x7: /* SQDMLSL, SQDMLSL2 */
10326     case 0xb: /* SQDMULL, SQDMULL2 */
10327         is_long = true;
10328         /* fall through */
10329     case 0xc: /* SQDMULH */
10330     case 0xd: /* SQRDMULH */
10331         if (u) {
10332             unallocated_encoding(s);
10333             return;
10334         }
10335         break;
10336     case 0x8: /* MUL */
10337         if (u || is_scalar) {
10338             unallocated_encoding(s);
10339             return;
10340         }
10341         break;
10342     case 0x1: /* FMLA */
10343     case 0x5: /* FMLS */
10344         if (u) {
10345             unallocated_encoding(s);
10346             return;
10347         }
10348         /* fall through */
10349     case 0x9: /* FMUL, FMULX */
10350         if (!extract32(size, 1, 1)) {
10351             unallocated_encoding(s);
10352             return;
10353         }
10354         is_fp = true;
10355         break;
10356     default:
10357         unallocated_encoding(s);
10358         return;
10359     }
10360
10361     if (is_fp) {
10362         /* low bit of size indicates single/double */
10363         size = extract32(size, 0, 1) ? 3 : 2;
10364         if (size == 2) {
10365             index = h << 1 | l;
10366         } else {
10367             if (l || !is_q) {
10368                 unallocated_encoding(s);
10369                 return;
10370             }
10371             index = h;
10372         }
10373         rm |= (m << 4);
10374     } else {
10375         switch (size) {
10376         case 1:
10377             index = h << 2 | l << 1 | m;
10378             break;
10379         case 2:
10380             index = h << 1 | l;
10381             rm |= (m << 4);
10382             break;
10383         default:
10384             unallocated_encoding(s);
10385             return;
10386         }
10387     }
10388
10389     if (!fp_access_check(s)) {
10390         return;
10391     }
10392
10393     if (is_fp) {
10394         fpst = get_fpstatus_ptr();
10395     } else {
10396         TCGV_UNUSED_PTR(fpst);
10397     }
10398
10399     if (size == 3) {
10400         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10401         int pass;
10402
10403         assert(is_fp && is_q && !is_long);
10404
10405         read_vec_element(s, tcg_idx, rm, index, MO_64);
10406
10407         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10408             TCGv_i64 tcg_op = tcg_temp_new_i64();
10409             TCGv_i64 tcg_res = tcg_temp_new_i64();
10410
10411             read_vec_element(s, tcg_op, rn, pass, MO_64);
10412
10413             switch (opcode) {
10414             case 0x5: /* FMLS */
10415                 /* As usual for ARM, separate negation for fused multiply-add */
10416                 gen_helper_vfp_negd(tcg_op, tcg_op);
10417                 /* fall through */
10418             case 0x1: /* FMLA */
10419                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10420                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10421                 break;
10422             case 0x9: /* FMUL, FMULX */
10423                 if (u) {
10424                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10425                 } else {
10426                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10427                 }
10428                 break;
10429             default:
10430                 g_assert_not_reached();
10431             }
10432
10433             write_vec_element(s, tcg_res, rd, pass, MO_64);
10434             tcg_temp_free_i64(tcg_op);
10435             tcg_temp_free_i64(tcg_res);
10436         }
10437
10438         if (is_scalar) {
10439             clear_vec_high(s, rd);
10440         }
10441
10442         tcg_temp_free_i64(tcg_idx);
10443     } else if (!is_long) {
10444         /* 32 bit floating point, or 16 or 32 bit integer.
10445          * For the 16 bit scalar case we use the usual Neon helpers and
10446          * rely on the fact that 0 op 0 == 0 with no side effects.
10447          */
10448         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10449         int pass, maxpasses;
10450
10451         if (is_scalar) {
10452             maxpasses = 1;
10453         } else {
10454             maxpasses = is_q ? 4 : 2;
10455         }
10456
10457         read_vec_element_i32(s, tcg_idx, rm, index, size);
10458
10459         if (size == 1 && !is_scalar) {
10460             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10461              * the index into both halves of the 32 bit tcg_idx and then use
10462              * the usual Neon helpers.
10463              */
10464             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10465         }
10466
10467         for (pass = 0; pass < maxpasses; pass++) {
10468             TCGv_i32 tcg_op = tcg_temp_new_i32();
10469             TCGv_i32 tcg_res = tcg_temp_new_i32();
10470
10471             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10472
10473             switch (opcode) {
10474             case 0x0: /* MLA */
10475             case 0x4: /* MLS */
10476             case 0x8: /* MUL */
10477             {
10478                 static NeonGenTwoOpFn * const fns[2][2] = {
10479                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10480                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10481                 };
10482                 NeonGenTwoOpFn *genfn;
10483                 bool is_sub = opcode == 0x4;
10484
10485                 if (size == 1) {
10486                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10487                 } else {
10488                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10489                 }
10490                 if (opcode == 0x8) {
10491                     break;
10492                 }
10493                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10494                 genfn = fns[size - 1][is_sub];
10495                 genfn(tcg_res, tcg_op, tcg_res);
10496                 break;
10497             }
10498             case 0x5: /* FMLS */
10499                 /* As usual for ARM, separate negation for fused multiply-add */
10500                 gen_helper_vfp_negs(tcg_op, tcg_op);
10501                 /* fall through */
10502             case 0x1: /* FMLA */
10503                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10504                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10505                 break;
10506             case 0x9: /* FMUL, FMULX */
10507                 if (u) {
10508                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10509                 } else {
10510                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10511                 }
10512                 break;
10513             case 0xc: /* SQDMULH */
10514                 if (size == 1) {
10515                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10516                                                tcg_op, tcg_idx);
10517                 } else {
10518                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10519                                                tcg_op, tcg_idx);
10520                 }
10521                 break;
10522             case 0xd: /* SQRDMULH */
10523                 if (size == 1) {
10524                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10525                                                 tcg_op, tcg_idx);
10526                 } else {
10527                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10528                                                 tcg_op, tcg_idx);
10529                 }
10530                 break;
10531             default:
10532                 g_assert_not_reached();
10533             }
10534
10535             if (is_scalar) {
10536                 write_fp_sreg(s, rd, tcg_res);
10537             } else {
10538                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10539             }
10540
10541             tcg_temp_free_i32(tcg_op);
10542             tcg_temp_free_i32(tcg_res);
10543         }
10544
10545         tcg_temp_free_i32(tcg_idx);
10546
10547         if (!is_q) {
10548             clear_vec_high(s, rd);
10549         }
10550     } else {
10551         /* long ops: 16x16->32 or 32x32->64 */
10552         TCGv_i64 tcg_res[2];
10553         int pass;
10554         bool satop = extract32(opcode, 0, 1);
10555         TCGMemOp memop = MO_32;
10556
10557         if (satop || !u) {
10558             memop |= MO_SIGN;
10559         }
10560
10561         if (size == 2) {
10562             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10563
10564             read_vec_element(s, tcg_idx, rm, index, memop);
10565
10566             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10567                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10568                 TCGv_i64 tcg_passres;
10569                 int passelt;
10570
10571                 if (is_scalar) {
10572                     passelt = 0;
10573                 } else {
10574                     passelt = pass + (is_q * 2);
10575                 }
10576
10577                 read_vec_element(s, tcg_op, rn, passelt, memop);
10578
10579                 tcg_res[pass] = tcg_temp_new_i64();
10580
10581                 if (opcode == 0xa || opcode == 0xb) {
10582                     /* Non-accumulating ops */
10583                     tcg_passres = tcg_res[pass];
10584                 } else {
10585                     tcg_passres = tcg_temp_new_i64();
10586                 }
10587
10588                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10589                 tcg_temp_free_i64(tcg_op);
10590
10591                 if (satop) {
10592                     /* saturating, doubling */
10593                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10594                                                       tcg_passres, tcg_passres);
10595                 }
10596
10597                 if (opcode == 0xa || opcode == 0xb) {
10598                     continue;
10599                 }
10600
10601                 /* Accumulating op: handle accumulate step */
10602                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10603
10604                 switch (opcode) {
10605                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10606                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10607                     break;
10608                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10609                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10610                     break;
10611                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10612                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10613                     /* fall through */
10614                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10615                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10616                                                       tcg_res[pass],
10617                                                       tcg_passres);
10618                     break;
10619                 default:
10620                     g_assert_not_reached();
10621                 }
10622                 tcg_temp_free_i64(tcg_passres);
10623             }
10624             tcg_temp_free_i64(tcg_idx);
10625
10626             if (is_scalar) {
10627                 clear_vec_high(s, rd);
10628             }
10629         } else {
10630             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10631
10632             assert(size == 1);
10633             read_vec_element_i32(s, tcg_idx, rm, index, size);
10634
10635             if (!is_scalar) {
10636                 /* The simplest way to handle the 16x16 indexed ops is to
10637                  * duplicate the index into both halves of the 32 bit tcg_idx
10638                  * and then use the usual Neon helpers.
10639                  */
10640                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10641             }
10642
10643             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10644                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10645                 TCGv_i64 tcg_passres;
10646
10647                 if (is_scalar) {
10648                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10649                 } else {
10650                     read_vec_element_i32(s, tcg_op, rn,
10651                                          pass + (is_q * 2), MO_32);
10652                 }
10653
10654                 tcg_res[pass] = tcg_temp_new_i64();
10655
10656                 if (opcode == 0xa || opcode == 0xb) {
10657                     /* Non-accumulating ops */
10658                     tcg_passres = tcg_res[pass];
10659                 } else {
10660                     tcg_passres = tcg_temp_new_i64();
10661                 }
10662
10663                 if (memop & MO_SIGN) {
10664                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10665                 } else {
10666                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10667                 }
10668                 if (satop) {
10669                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10670                                                       tcg_passres, tcg_passres);
10671                 }
10672                 tcg_temp_free_i32(tcg_op);
10673
10674                 if (opcode == 0xa || opcode == 0xb) {
10675                     continue;
10676                 }
10677
10678                 /* Accumulating op: handle accumulate step */
10679                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10680
10681                 switch (opcode) {
10682                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10683                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10684                                              tcg_passres);
10685                     break;
10686                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10687                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10688                                              tcg_passres);
10689                     break;
10690                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10691                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10692                     /* fall through */
10693                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10694                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10695                                                       tcg_res[pass],
10696                                                       tcg_passres);
10697                     break;
10698                 default:
10699                     g_assert_not_reached();
10700                 }
10701                 tcg_temp_free_i64(tcg_passres);
10702             }
10703             tcg_temp_free_i32(tcg_idx);
10704
10705             if (is_scalar) {
10706                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10707             }
10708         }
10709
10710         if (is_scalar) {
10711             tcg_res[1] = tcg_const_i64(0);
10712         }
10713
10714         for (pass = 0; pass < 2; pass++) {
10715             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10716             tcg_temp_free_i64(tcg_res[pass]);
10717         }
10718     }
10719
10720     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10721         tcg_temp_free_ptr(fpst);
10722     }
10723 }
10724
10725 /* C3.6.19 Crypto AES
10726  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10727  * +-----------------+------+-----------+--------+-----+------+------+
10728  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10729  * +-----------------+------+-----------+--------+-----+------+------+
10730  */
10731 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10732 {
10733     int size = extract32(insn, 22, 2);
10734     int opcode = extract32(insn, 12, 5);
10735     int rn = extract32(insn, 5, 5);
10736     int rd = extract32(insn, 0, 5);
10737     int decrypt;
10738     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10739     CryptoThreeOpEnvFn *genfn;
10740
10741     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10742         || size != 0) {
10743         unallocated_encoding(s);
10744         return;
10745     }
10746
10747     switch (opcode) {
10748     case 0x4: /* AESE */
10749         decrypt = 0;
10750         genfn = gen_helper_crypto_aese;
10751         break;
10752     case 0x6: /* AESMC */
10753         decrypt = 0;
10754         genfn = gen_helper_crypto_aesmc;
10755         break;
10756     case 0x5: /* AESD */
10757         decrypt = 1;
10758         genfn = gen_helper_crypto_aese;
10759         break;
10760     case 0x7: /* AESIMC */
10761         decrypt = 1;
10762         genfn = gen_helper_crypto_aesmc;
10763         break;
10764     default:
10765         unallocated_encoding(s);
10766         return;
10767     }
10768
10769     /* Note that we convert the Vx register indexes into the
10770      * index within the vfp.regs[] array, so we can share the
10771      * helper with the AArch32 instructions.
10772      */
10773     tcg_rd_regno = tcg_const_i32(rd << 1);
10774     tcg_rn_regno = tcg_const_i32(rn << 1);
10775     tcg_decrypt = tcg_const_i32(decrypt);
10776
10777     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10778
10779     tcg_temp_free_i32(tcg_rd_regno);
10780     tcg_temp_free_i32(tcg_rn_regno);
10781     tcg_temp_free_i32(tcg_decrypt);
10782 }
10783
10784 /* C3.6.20 Crypto three-reg SHA
10785  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10786  * +-----------------+------+---+------+---+--------+-----+------+------+
10787  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10788  * +-----------------+------+---+------+---+--------+-----+------+------+
10789  */
10790 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10791 {
10792     int size = extract32(insn, 22, 2);
10793     int opcode = extract32(insn, 12, 3);
10794     int rm = extract32(insn, 16, 5);
10795     int rn = extract32(insn, 5, 5);
10796     int rd = extract32(insn, 0, 5);
10797     CryptoThreeOpEnvFn *genfn;
10798     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10799     int feature = ARM_FEATURE_V8_SHA256;
10800
10801     if (size != 0) {
10802         unallocated_encoding(s);
10803         return;
10804     }
10805
10806     switch (opcode) {
10807     case 0: /* SHA1C */
10808     case 1: /* SHA1P */
10809     case 2: /* SHA1M */
10810     case 3: /* SHA1SU0 */
10811         genfn = NULL;
10812         feature = ARM_FEATURE_V8_SHA1;
10813         break;
10814     case 4: /* SHA256H */
10815         genfn = gen_helper_crypto_sha256h;
10816         break;
10817     case 5: /* SHA256H2 */
10818         genfn = gen_helper_crypto_sha256h2;
10819         break;
10820     case 6: /* SHA256SU1 */
10821         genfn = gen_helper_crypto_sha256su1;
10822         break;
10823     default:
10824         unallocated_encoding(s);
10825         return;
10826     }
10827
10828     if (!arm_dc_feature(s, feature)) {
10829         unallocated_encoding(s);
10830         return;
10831     }
10832
10833     tcg_rd_regno = tcg_const_i32(rd << 1);
10834     tcg_rn_regno = tcg_const_i32(rn << 1);
10835     tcg_rm_regno = tcg_const_i32(rm << 1);
10836
10837     if (genfn) {
10838         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10839     } else {
10840         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10841
10842         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10843                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10844         tcg_temp_free_i32(tcg_opcode);
10845     }
10846
10847     tcg_temp_free_i32(tcg_rd_regno);
10848     tcg_temp_free_i32(tcg_rn_regno);
10849     tcg_temp_free_i32(tcg_rm_regno);
10850 }
10851
10852 /* C3.6.21 Crypto two-reg SHA
10853  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10854  * +-----------------+------+-----------+--------+-----+------+------+
10855  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10856  * +-----------------+------+-----------+--------+-----+------+------+
10857  */
10858 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10859 {
10860     int size = extract32(insn, 22, 2);
10861     int opcode = extract32(insn, 12, 5);
10862     int rn = extract32(insn, 5, 5);
10863     int rd = extract32(insn, 0, 5);
10864     CryptoTwoOpEnvFn *genfn;
10865     int feature;
10866     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10867
10868     if (size != 0) {
10869         unallocated_encoding(s);
10870         return;
10871     }
10872
10873     switch (opcode) {
10874     case 0: /* SHA1H */
10875         feature = ARM_FEATURE_V8_SHA1;
10876         genfn = gen_helper_crypto_sha1h;
10877         break;
10878     case 1: /* SHA1SU1 */
10879         feature = ARM_FEATURE_V8_SHA1;
10880         genfn = gen_helper_crypto_sha1su1;
10881         break;
10882     case 2: /* SHA256SU0 */
10883         feature = ARM_FEATURE_V8_SHA256;
10884         genfn = gen_helper_crypto_sha256su0;
10885         break;
10886     default:
10887         unallocated_encoding(s);
10888         return;
10889     }
10890
10891     if (!arm_dc_feature(s, feature)) {
10892         unallocated_encoding(s);
10893         return;
10894     }
10895
10896     tcg_rd_regno = tcg_const_i32(rd << 1);
10897     tcg_rn_regno = tcg_const_i32(rn << 1);
10898
10899     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10900
10901     tcg_temp_free_i32(tcg_rd_regno);
10902     tcg_temp_free_i32(tcg_rn_regno);
10903 }
10904
10905 /* C3.6 Data processing - SIMD, inc Crypto
10906  *
10907  * As the decode gets a little complex we are using a table based
10908  * approach for this part of the decode.
10909  */
10910 static const AArch64DecodeTable data_proc_simd[] = {
10911     /* pattern  ,  mask     ,  fn                        */
10912     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10913     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10914     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10915     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10916     { 0x0e000400, 0x9fe08400, disas_simd_copy },
10917     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10918     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10919     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10920     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10921     { 0x0e000000, 0xbf208c00, disas_simd_tb },
10922     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10923     { 0x2e000000, 0xbf208400, disas_simd_ext },
10924     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10925     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10926     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10927     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10928     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10929     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10930     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10931     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10932     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10933     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10934     { 0x00000000, 0x00000000, NULL }
10935 };
10936
10937 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10938 {
10939     /* Note that this is called with all non-FP cases from
10940      * table C3-6 so it must UNDEF for entries not specifically
10941      * allocated to instructions in that table.
10942      */
10943     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10944     if (fn) {
10945         fn(s, insn);
10946     } else {
10947         unallocated_encoding(s);
10948     }
10949 }
10950
10951 /* C3.6 Data processing - SIMD and floating point */
10952 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10953 {
10954     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10955         disas_data_proc_fp(s, insn);
10956     } else {
10957         /* SIMD, including crypto */
10958         disas_data_proc_simd(s, insn);
10959     }
10960 }
10961
10962 /* C3.1 A64 instruction index by encoding */
10963 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10964 {
10965     uint32_t insn;
10966
10967     insn = arm_ldl_code(env, s->pc, s->bswap_code);
10968     s->insn = insn;
10969     s->pc += 4;
10970
10971     s->fp_access_checked = false;
10972
10973     switch (extract32(insn, 25, 4)) {
10974     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10975         unallocated_encoding(s);
10976         break;
10977     case 0x8: case 0x9: /* Data processing - immediate */
10978         disas_data_proc_imm(s, insn);
10979         break;
10980     case 0xa: case 0xb: /* Branch, exception generation and system insns */
10981         disas_b_exc_sys(s, insn);
10982         break;
10983     case 0x4:
10984     case 0x6:
10985     case 0xc:
10986     case 0xe:      /* Loads and stores */
10987         disas_ldst(s, insn);
10988         break;
10989     case 0x5:
10990     case 0xd:      /* Data processing - register */
10991         disas_data_proc_reg(s, insn);
10992         break;
10993     case 0x7:
10994     case 0xf:      /* Data processing - SIMD and floating point */
10995         disas_data_proc_simd_fp(s, insn);
10996         break;
10997     default:
10998         assert(FALSE); /* all 15 cases should be handled above */
10999         break;
11000     }
11001
11002     /* if we allocated any temporaries, free them here */
11003     free_tmp_a64(s);
11004 }
11005
11006 void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11007 {
11008     CPUState *cs = CPU(cpu);
11009     CPUARMState *env = &cpu->env;
11010     DisasContext dc1, *dc = &dc1;
11011     target_ulong pc_start;
11012     target_ulong next_page_start;
11013     int num_insns;
11014     int max_insns;
11015
11016     pc_start = tb->pc;
11017
11018     dc->tb = tb;
11019
11020     dc->is_jmp = DISAS_NEXT;
11021     dc->pc = pc_start;
11022     dc->singlestep_enabled = cs->singlestep_enabled;
11023     dc->condjmp = 0;
11024
11025     dc->aarch64 = 1;
11026     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11027      * there is no secure EL1, so we route exceptions to EL3.
11028      */
11029     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11030                                !arm_el_is_aa64(env, 3);
11031     dc->thumb = 0;
11032     dc->bswap_code = 0;
11033     dc->condexec_mask = 0;
11034     dc->condexec_cond = 0;
11035     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11036     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11037 #if !defined(CONFIG_USER_ONLY)
11038     dc->user = (dc->current_el == 0);
11039 #endif
11040     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11041     dc->vec_len = 0;
11042     dc->vec_stride = 0;
11043     dc->cp_regs = cpu->cp_regs;
11044     dc->features = env->features;
11045
11046     /* Single step state. The code-generation logic here is:
11047      *  SS_ACTIVE == 0:
11048      *   generate code with no special handling for single-stepping (except
11049      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11050      *   this happens anyway because those changes are all system register or
11051      *   PSTATE writes).
11052      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11053      *   emit code for one insn
11054      *   emit code to clear PSTATE.SS
11055      *   emit code to generate software step exception for completed step
11056      *   end TB (as usual for having generated an exception)
11057      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11058      *   emit code to generate a software step exception
11059      *   end the TB
11060      */
11061     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11062     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11063     dc->is_ldex = false;
11064     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11065
11066     init_tmp_a64_array(dc);
11067
11068     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11069     num_insns = 0;
11070     max_insns = tb->cflags & CF_COUNT_MASK;
11071     if (max_insns == 0) {
11072         max_insns = CF_COUNT_MASK;
11073     }
11074     if (max_insns > TCG_MAX_INSNS) {
11075         max_insns = TCG_MAX_INSNS;
11076     }
11077
11078     gen_tb_start(tb);
11079
11080     tcg_clear_temp_count();
11081
11082     do {
11083         tcg_gen_insn_start(dc->pc, 0);
11084         num_insns++;
11085
11086         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11087             CPUBreakpoint *bp;
11088             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11089                 if (bp->pc == dc->pc) {
11090                     if (bp->flags & BP_CPU) {
11091                         gen_a64_set_pc_im(dc->pc);
11092                         gen_helper_check_breakpoints(cpu_env);
11093                         /* End the TB early; it likely won't be executed */
11094                         dc->is_jmp = DISAS_UPDATE;
11095                     } else {
11096                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11097                         /* The address covered by the breakpoint must be
11098                            included in [tb->pc, tb->pc + tb->size) in order
11099                            to for it to be properly cleared -- thus we
11100                            increment the PC here so that the logic setting
11101                            tb->size below does the right thing.  */
11102                         dc->pc += 4;
11103                         goto done_generating;
11104                     }
11105                     break;
11106                 }
11107             }
11108         }
11109
11110         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11111             gen_io_start();
11112         }
11113
11114         if (dc->ss_active && !dc->pstate_ss) {
11115             /* Singlestep state is Active-pending.
11116              * If we're in this state at the start of a TB then either
11117              *  a) we just took an exception to an EL which is being debugged
11118              *     and this is the first insn in the exception handler
11119              *  b) debug exceptions were masked and we just unmasked them
11120              *     without changing EL (eg by clearing PSTATE.D)
11121              * In either case we're going to take a swstep exception in the
11122              * "did not step an insn" case, and so the syndrome ISV and EX
11123              * bits should be zero.
11124              */
11125             assert(num_insns == 1);
11126             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11127                           default_exception_el(dc));
11128             dc->is_jmp = DISAS_EXC;
11129             break;
11130         }
11131
11132         disas_a64_insn(env, dc);
11133
11134         if (tcg_check_temp_count()) {
11135             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11136                     dc->pc);
11137         }
11138
11139         /* Translation stops when a conditional branch is encountered.
11140          * Otherwise the subsequent code could get translated several times.
11141          * Also stop translation when a page boundary is reached.  This
11142          * ensures prefetch aborts occur at the right place.
11143          */
11144     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11145              !cs->singlestep_enabled &&
11146              !singlestep &&
11147              !dc->ss_active &&
11148              dc->pc < next_page_start &&
11149              num_insns < max_insns);
11150
11151     if (tb->cflags & CF_LAST_IO) {
11152         gen_io_end();
11153     }
11154
11155     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11156         && dc->is_jmp != DISAS_EXC) {
11157         /* Note that this means single stepping WFI doesn't halt the CPU.
11158          * For conditional branch insns this is harmless unreachable code as
11159          * gen_goto_tb() has already handled emitting the debug exception
11160          * (and thus a tb-jump is not possible when singlestepping).
11161          */
11162         assert(dc->is_jmp != DISAS_TB_JUMP);
11163         if (dc->is_jmp != DISAS_JUMP) {
11164             gen_a64_set_pc_im(dc->pc);
11165         }
11166         if (cs->singlestep_enabled) {
11167             gen_exception_internal(EXCP_DEBUG);
11168         } else {
11169             gen_step_complete_exception(dc);
11170         }
11171     } else {
11172         switch (dc->is_jmp) {
11173         case DISAS_NEXT:
11174             gen_goto_tb(dc, 1, dc->pc);
11175             break;
11176         default:
11177         case DISAS_UPDATE:
11178             gen_a64_set_pc_im(dc->pc);
11179             /* fall through */
11180         case DISAS_JUMP:
11181             /* indicate that the hash table must be used to find the next TB */
11182             tcg_gen_exit_tb(0);
11183             break;
11184         case DISAS_TB_JUMP:
11185         case DISAS_EXC:
11186         case DISAS_SWI:
11187             break;
11188         case DISAS_WFE:
11189             gen_a64_set_pc_im(dc->pc);
11190             gen_helper_wfe(cpu_env);
11191             break;
11192         case DISAS_YIELD:
11193             gen_a64_set_pc_im(dc->pc);
11194             gen_helper_yield(cpu_env);
11195             break;
11196         case DISAS_WFI:
11197             /* This is a special case because we don't want to just halt the CPU
11198              * if trying to debug across a WFI.
11199              */
11200             gen_a64_set_pc_im(dc->pc);
11201             gen_helper_wfi(cpu_env);
11202             /* The helper doesn't necessarily throw an exception, but we
11203              * must go back to the main loop to check for interrupts anyway.
11204              */
11205             tcg_gen_exit_tb(0);
11206             break;
11207         }
11208     }
11209
11210 done_generating:
11211     gen_tb_end(tb, num_insns);
11212
11213 #ifdef DEBUG_DISAS
11214     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11215         qemu_log("----------------\n");
11216         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11217         log_target_disas(cs, pc_start, dc->pc - pc_start,
11218                          4 | (dc->bswap_code << 1));
11219         qemu_log("\n");
11220     }
11221 #endif
11222     tb->size = dc->pc - pc_start;
11223     tb->icount = num_insns;
11224 }