target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include <stdarg.h>
  20 #include <stdlib.h>
  21 #include <stdio.h>
  22 #include <string.h>
  23 #include <inttypes.h>
  24
  25 #include "cpu.h"
  26 #include "tcg-op.h"
  27 #include "qemu/log.h"
  28 #include "arm_ldst.h"
  29 #include "translate.h"
  30 #include "internals.h"
  31 #include "qemu/host-utils.h"
  32
  33 #include "exec/semihost.h"
  34 #include "exec/gen-icount.h"
  35
  36 #include "exec/helper-proto.h"
  37 #include "exec/helper-gen.h"
  38
  39 #include "trace-tcg.h"
  40
  41 static TCGv_i64 cpu_X[32];
  42 static TCGv_i64 cpu_pc;
  43
  44 /* Load/store exclusive handling */
  45 static TCGv_i64 cpu_exclusive_high;
  46
  47 static const char *regnames[] = {
  48     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52 };
  53
  54 enum a64_shift_type {
  55     A64_SHIFT_TYPE_LSL = 0,
  56     A64_SHIFT_TYPE_LSR = 1,
  57     A64_SHIFT_TYPE_ASR = 2,
  58     A64_SHIFT_TYPE_ROR = 3
  59 };
  60
  61 /* Table based decoder typedefs - used when the relevant bits for decode
  62  * are too awkwardly scattered across the instruction (eg SIMD).
  63  */
  64 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66 typedef struct AArch64DecodeTable {
  67     uint32_t pattern;
  68     uint32_t mask;
  69     AArch64DecodeFn *disas_fn;
  70 } AArch64DecodeTable;
  71
  72 /* Function prototype for gen_ functions for calling Neon helpers */
  73 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  85 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  86
  87 /* initialize TCG globals.  */
  88 void a64_translate_init(void)
  89 {
  90     int i;
  91
  92     cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
  93                                     offsetof(CPUARMState, pc),
  94                                     "pc");
  95     for (i = 0; i < 32; i++) {
  96         cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
  97                                           offsetof(CPUARMState, xregs[i]),
  98                                           regnames[i]);
  99     }
 100
 101     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
 102         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 103 }
 104
 105 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 106 {
 107     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 108      *  if EL1, access as if EL0; otherwise access at current EL
 109      */
 110     switch (s->mmu_idx) {
 111     case ARMMMUIdx_S12NSE1:
 112         return ARMMMUIdx_S12NSE0;
 113     case ARMMMUIdx_S1SE1:
 114         return ARMMMUIdx_S1SE0;
 115     case ARMMMUIdx_S2NS:
 116         g_assert_not_reached();
 117     default:
 118         return s->mmu_idx;
 119     }
 120 }
 121
 122 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 123                             fprintf_function cpu_fprintf, int flags)
 124 {
 125     ARMCPU *cpu = ARM_CPU(cs);
 126     CPUARMState *env = &cpu->env;
 127     uint32_t psr = pstate_read(env);
 128     int i;
 129
 130     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 131             env->pc, env->xregs[31]);
 132     for (i = 0; i < 31; i++) {
 133         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 134         if ((i % 4) == 3) {
 135             cpu_fprintf(f, "\n");
 136         } else {
 137             cpu_fprintf(f, " ");
 138         }
 139     }
 140     cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
 141                 psr,
 142                 psr & PSTATE_N ? 'N' : '-',
 143                 psr & PSTATE_Z ? 'Z' : '-',
 144                 psr & PSTATE_C ? 'C' : '-',
 145                 psr & PSTATE_V ? 'V' : '-');
 146     cpu_fprintf(f, "\n");
 147
 148     if (flags & CPU_DUMP_FPU) {
 149         int numvfpregs = 32;
 150         for (i = 0; i < numvfpregs; i += 2) {
 151             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 152             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 153             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 154                         i, vhi, vlo);
 155             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 156             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 157             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 158                         i + 1, vhi, vlo);
 159         }
 160         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 161                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 162     }
 163 }
 164
 165 void gen_a64_set_pc_im(uint64_t val)
 166 {
 167     tcg_gen_movi_i64(cpu_pc, val);
 168 }
 169
 170 typedef struct DisasCompare64 {
 171     TCGCond cond;
 172     TCGv_i64 value;
 173 } DisasCompare64;
 174
 175 static void a64_test_cc(DisasCompare64 *c64, int cc)
 176 {
 177     DisasCompare c32;
 178
 179     arm_test_cc(&c32, cc);
 180
 181     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 182        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 183     c64->cond = c32.cond;
 184     c64->value = tcg_temp_new_i64();
 185     tcg_gen_ext_i32_i64(c64->value, c32.value);
 186
 187     arm_free_cc(&c32);
 188 }
 189
 190 static void a64_free_cc(DisasCompare64 *c64)
 191 {
 192     tcg_temp_free_i64(c64->value);
 193 }
 194
 195 static void gen_exception_internal(int excp)
 196 {
 197     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 198
 199     assert(excp_is_internal(excp));
 200     gen_helper_exception_internal(cpu_env, tcg_excp);
 201     tcg_temp_free_i32(tcg_excp);
 202 }
 203
 204 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 205 {
 206     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 207     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 208     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 209
 210     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 211                                        tcg_syn, tcg_el);
 212     tcg_temp_free_i32(tcg_el);
 213     tcg_temp_free_i32(tcg_syn);
 214     tcg_temp_free_i32(tcg_excp);
 215 }
 216
 217 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 218 {
 219     gen_a64_set_pc_im(s->pc - offset);
 220     gen_exception_internal(excp);
 221     s->is_jmp = DISAS_EXC;
 222 }
 223
 224 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 225                                uint32_t syndrome, uint32_t target_el)
 226 {
 227     gen_a64_set_pc_im(s->pc - offset);
 228     gen_exception(excp, syndrome, target_el);
 229     s->is_jmp = DISAS_EXC;
 230 }
 231
 232 static void gen_ss_advance(DisasContext *s)
 233 {
 234     /* If the singlestep state is Active-not-pending, advance to
 235      * Active-pending.
 236      */
 237     if (s->ss_active) {
 238         s->pstate_ss = 0;
 239         gen_helper_clear_pstate_ss(cpu_env);
 240     }
 241 }
 242
 243 static void gen_step_complete_exception(DisasContext *s)
 244 {
 245     /* We just completed step of an insn. Move from Active-not-pending
 246      * to Active-pending, and then also take the swstep exception.
 247      * This corresponds to making the (IMPDEF) choice to prioritize
 248      * swstep exceptions over asynchronous exceptions taken to an exception
 249      * level where debug is disabled. This choice has the advantage that
 250      * we do not need to maintain internal state corresponding to the
 251      * ISV/EX syndrome bits between completion of the step and generation
 252      * of the exception, and our syndrome information is always correct.
 253      */
 254     gen_ss_advance(s);
 255     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 256                   default_exception_el(s));
 257     s->is_jmp = DISAS_EXC;
 258 }
 259
 260 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 261 {
 262     /* No direct tb linking with singlestep (either QEMU's or the ARM
 263      * debug architecture kind) or deterministic io
 264      */
 265     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 266         return false;
 267     }
 268
 269     /* Only link tbs from inside the same guest page */
 270     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 271         return false;
 272     }
 273
 274     return true;
 275 }
 276
 277 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 278 {
 279     TranslationBlock *tb;
 280
 281     tb = s->tb;
 282     if (use_goto_tb(s, n, dest)) {
 283         tcg_gen_goto_tb(n);
 284         gen_a64_set_pc_im(dest);
 285         tcg_gen_exit_tb((intptr_t)tb + n);
 286         s->is_jmp = DISAS_TB_JUMP;
 287     } else {
 288         gen_a64_set_pc_im(dest);
 289         if (s->ss_active) {
 290             gen_step_complete_exception(s);
 291         } else if (s->singlestep_enabled) {
 292             gen_exception_internal(EXCP_DEBUG);
 293         } else {
 294             tcg_gen_exit_tb(0);
 295             s->is_jmp = DISAS_TB_JUMP;
 296         }
 297     }
 298 }
 299
 300 static void unallocated_encoding(DisasContext *s)
 301 {
 302     /* Unallocated and reserved encodings are uncategorized */
 303     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 304                        default_exception_el(s));
 305 }
 306
 307 #define unsupported_encoding(s, insn)                                    \
 308     do {                                                                 \
 309         qemu_log_mask(LOG_UNIMP,                                         \
 310                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 311                       "at pc=%016" PRIx64 "\n",                          \
 312                       __FILE__, __LINE__, insn, s->pc - 4);              \
 313         unallocated_encoding(s);                                         \
 314     } while (0);
 315
 316 static void init_tmp_a64_array(DisasContext *s)
 317 {
 318 #ifdef CONFIG_DEBUG_TCG
 319     int i;
 320     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 321         TCGV_UNUSED_I64(s->tmp_a64[i]);
 322     }
 323 #endif
 324     s->tmp_a64_count = 0;
 325 }
 326
 327 static void free_tmp_a64(DisasContext *s)
 328 {
 329     int i;
 330     for (i = 0; i < s->tmp_a64_count; i++) {
 331         tcg_temp_free_i64(s->tmp_a64[i]);
 332     }
 333     init_tmp_a64_array(s);
 334 }
 335
 336 static TCGv_i64 new_tmp_a64(DisasContext *s)
 337 {
 338     assert(s->tmp_a64_count < TMP_A64_MAX);
 339     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 340 }
 341
 342 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 343 {
 344     TCGv_i64 t = new_tmp_a64(s);
 345     tcg_gen_movi_i64(t, 0);
 346     return t;
 347 }
 348
 349 /*
 350  * Register access functions
 351  *
 352  * These functions are used for directly accessing a register in where
 353  * changes to the final register value are likely to be made. If you
 354  * need to use a register for temporary calculation (e.g. index type
 355  * operations) use the read_* form.
 356  *
 357  * B1.2.1 Register mappings
 358  *
 359  * In instruction register encoding 31 can refer to ZR (zero register) or
 360  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 361  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 362  * This is the point of the _sp forms.
 363  */
 364 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 365 {
 366     if (reg == 31) {
 367         return new_tmp_a64_zero(s);
 368     } else {
 369         return cpu_X[reg];
 370     }
 371 }
 372
 373 /* register access for when 31 == SP */
 374 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 375 {
 376     return cpu_X[reg];
 377 }
 378
 379 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 380  * representing the register contents. This TCGv is an auto-freed
 381  * temporary so it need not be explicitly freed, and may be modified.
 382  */
 383 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 384 {
 385     TCGv_i64 v = new_tmp_a64(s);
 386     if (reg != 31) {
 387         if (sf) {
 388             tcg_gen_mov_i64(v, cpu_X[reg]);
 389         } else {
 390             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 391         }
 392     } else {
 393         tcg_gen_movi_i64(v, 0);
 394     }
 395     return v;
 396 }
 397
 398 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 399 {
 400     TCGv_i64 v = new_tmp_a64(s);
 401     if (sf) {
 402         tcg_gen_mov_i64(v, cpu_X[reg]);
 403     } else {
 404         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 405     }
 406     return v;
 407 }
 408
 409 /* We should have at some point before trying to access an FP register
 410  * done the necessary access check, so assert that
 411  * (a) we did the check and
 412  * (b) we didn't then just plough ahead anyway if it failed.
 413  * Print the instruction pattern in the abort message so we can figure
 414  * out what we need to fix if a user encounters this problem in the wild.
 415  */
 416 static inline void assert_fp_access_checked(DisasContext *s)
 417 {
 418 #ifdef CONFIG_DEBUG_TCG
 419     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 420         fprintf(stderr, "target-arm: FP access check missing for "
 421                 "instruction 0x%08x\n", s->insn);
 422         abort();
 423     }
 424 #endif
 425 }
 426
 427 /* Return the offset into CPUARMState of an element of specified
 428  * size, 'element' places in from the least significant end of
 429  * the FP/vector register Qn.
 430  */
 431 static inline int vec_reg_offset(DisasContext *s, int regno,
 432                                  int element, TCGMemOp size)
 433 {
 434     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 435 #ifdef HOST_WORDS_BIGENDIAN
 436     /* This is complicated slightly because vfp.regs[2n] is
 437      * still the low half and  vfp.regs[2n+1] the high half
 438      * of the 128 bit vector, even on big endian systems.
 439      * Calculate the offset assuming a fully bigendian 128 bits,
 440      * then XOR to account for the order of the two 64 bit halves.
 441      */
 442     offs += (16 - ((element + 1) * (1 << size)));
 443     offs ^= 8;
 444 #else
 445     offs += element * (1 << size);
 446 #endif
 447     assert_fp_access_checked(s);
 448     return offs;
 449 }
 450
 451 /* Return the offset into CPUARMState of a slice (from
 452  * the least significant end) of FP register Qn (ie
 453  * Dn, Sn, Hn or Bn).
 454  * (Note that this is not the same mapping as for A32; see cpu.h)
 455  */
 456 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 457 {
 458     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 459 #ifdef HOST_WORDS_BIGENDIAN
 460     offs += (8 - (1 << size));
 461 #endif
 462     assert_fp_access_checked(s);
 463     return offs;
 464 }
 465
 466 /* Offset of the high half of the 128 bit vector Qn */
 467 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 468 {
 469     assert_fp_access_checked(s);
 470     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 471 }
 472
 473 /* Convenience accessors for reading and writing single and double
 474  * FP registers. Writing clears the upper parts of the associated
 475  * 128 bit vector register, as required by the architecture.
 476  * Note that unlike the GP register accessors, the values returned
 477  * by the read functions must be manually freed.
 478  */
 479 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 480 {
 481     TCGv_i64 v = tcg_temp_new_i64();
 482
 483     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 484     return v;
 485 }
 486
 487 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 488 {
 489     TCGv_i32 v = tcg_temp_new_i32();
 490
 491     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 492     return v;
 493 }
 494
 495 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 496 {
 497     TCGv_i64 tcg_zero = tcg_const_i64(0);
 498
 499     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 500     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 501     tcg_temp_free_i64(tcg_zero);
 502 }
 503
 504 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 505 {
 506     TCGv_i64 tmp = tcg_temp_new_i64();
 507
 508     tcg_gen_extu_i32_i64(tmp, v);
 509     write_fp_dreg(s, reg, tmp);
 510     tcg_temp_free_i64(tmp);
 511 }
 512
 513 static TCGv_ptr get_fpstatus_ptr(void)
 514 {
 515     TCGv_ptr statusptr = tcg_temp_new_ptr();
 516     int offset;
 517
 518     /* In A64 all instructions (both FP and Neon) use the FPCR;
 519      * there is no equivalent of the A32 Neon "standard FPSCR value"
 520      * and all operations use vfp.fp_status.
 521      */
 522     offset = offsetof(CPUARMState, vfp.fp_status);
 523     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 524     return statusptr;
 525 }
 526
 527 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 528  * than the 32 bit equivalent.
 529  */
 530 static inline void gen_set_NZ64(TCGv_i64 result)
 531 {
 532     TCGv_i64 flag = tcg_temp_new_i64();
 533
 534     tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
 535     tcg_gen_extrl_i64_i32(cpu_ZF, flag);
 536     tcg_gen_shri_i64(flag, result, 32);
 537     tcg_gen_extrl_i64_i32(cpu_NF, flag);
 538     tcg_temp_free_i64(flag);
 539 }
 540
 541 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 542 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 543 {
 544     if (sf) {
 545         gen_set_NZ64(result);
 546     } else {
 547         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 548         tcg_gen_extrl_i64_i32(cpu_NF, result);
 549     }
 550     tcg_gen_movi_i32(cpu_CF, 0);
 551     tcg_gen_movi_i32(cpu_VF, 0);
 552 }
 553
 554 /* dest = T0 + T1; compute C, N, V and Z flags */
 555 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 556 {
 557     if (sf) {
 558         TCGv_i64 result, flag, tmp;
 559         result = tcg_temp_new_i64();
 560         flag = tcg_temp_new_i64();
 561         tmp = tcg_temp_new_i64();
 562
 563         tcg_gen_movi_i64(tmp, 0);
 564         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 565
 566         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 567
 568         gen_set_NZ64(result);
 569
 570         tcg_gen_xor_i64(flag, result, t0);
 571         tcg_gen_xor_i64(tmp, t0, t1);
 572         tcg_gen_andc_i64(flag, flag, tmp);
 573         tcg_temp_free_i64(tmp);
 574         tcg_gen_shri_i64(flag, flag, 32);
 575         tcg_gen_extrl_i64_i32(cpu_VF, flag);
 576
 577         tcg_gen_mov_i64(dest, result);
 578         tcg_temp_free_i64(result);
 579         tcg_temp_free_i64(flag);
 580     } else {
 581         /* 32 bit arithmetic */
 582         TCGv_i32 t0_32 = tcg_temp_new_i32();
 583         TCGv_i32 t1_32 = tcg_temp_new_i32();
 584         TCGv_i32 tmp = tcg_temp_new_i32();
 585
 586         tcg_gen_movi_i32(tmp, 0);
 587         tcg_gen_extrl_i64_i32(t0_32, t0);
 588         tcg_gen_extrl_i64_i32(t1_32, t1);
 589         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 590         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 591         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 592         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 593         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 594         tcg_gen_extu_i32_i64(dest, cpu_NF);
 595
 596         tcg_temp_free_i32(tmp);
 597         tcg_temp_free_i32(t0_32);
 598         tcg_temp_free_i32(t1_32);
 599     }
 600 }
 601
 602 /* dest = T0 - T1; compute C, N, V and Z flags */
 603 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 604 {
 605     if (sf) {
 606         /* 64 bit arithmetic */
 607         TCGv_i64 result, flag, tmp;
 608
 609         result = tcg_temp_new_i64();
 610         flag = tcg_temp_new_i64();
 611         tcg_gen_sub_i64(result, t0, t1);
 612
 613         gen_set_NZ64(result);
 614
 615         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 616         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 617
 618         tcg_gen_xor_i64(flag, result, t0);
 619         tmp = tcg_temp_new_i64();
 620         tcg_gen_xor_i64(tmp, t0, t1);
 621         tcg_gen_and_i64(flag, flag, tmp);
 622         tcg_temp_free_i64(tmp);
 623         tcg_gen_shri_i64(flag, flag, 32);
 624         tcg_gen_extrl_i64_i32(cpu_VF, flag);
 625         tcg_gen_mov_i64(dest, result);
 626         tcg_temp_free_i64(flag);
 627         tcg_temp_free_i64(result);
 628     } else {
 629         /* 32 bit arithmetic */
 630         TCGv_i32 t0_32 = tcg_temp_new_i32();
 631         TCGv_i32 t1_32 = tcg_temp_new_i32();
 632         TCGv_i32 tmp;
 633
 634         tcg_gen_extrl_i64_i32(t0_32, t0);
 635         tcg_gen_extrl_i64_i32(t1_32, t1);
 636         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 637         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 638         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 639         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 640         tmp = tcg_temp_new_i32();
 641         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 642         tcg_temp_free_i32(t0_32);
 643         tcg_temp_free_i32(t1_32);
 644         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 645         tcg_temp_free_i32(tmp);
 646         tcg_gen_extu_i32_i64(dest, cpu_NF);
 647     }
 648 }
 649
 650 /* dest = T0 + T1 + CF; do not compute flags. */
 651 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 652 {
 653     TCGv_i64 flag = tcg_temp_new_i64();
 654     tcg_gen_extu_i32_i64(flag, cpu_CF);
 655     tcg_gen_add_i64(dest, t0, t1);
 656     tcg_gen_add_i64(dest, dest, flag);
 657     tcg_temp_free_i64(flag);
 658
 659     if (!sf) {
 660         tcg_gen_ext32u_i64(dest, dest);
 661     }
 662 }
 663
 664 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 665 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 666 {
 667     if (sf) {
 668         TCGv_i64 result, cf_64, vf_64, tmp;
 669         result = tcg_temp_new_i64();
 670         cf_64 = tcg_temp_new_i64();
 671         vf_64 = tcg_temp_new_i64();
 672         tmp = tcg_const_i64(0);
 673
 674         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 675         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 676         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 677         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 678         gen_set_NZ64(result);
 679
 680         tcg_gen_xor_i64(vf_64, result, t0);
 681         tcg_gen_xor_i64(tmp, t0, t1);
 682         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 683         tcg_gen_shri_i64(vf_64, vf_64, 32);
 684         tcg_gen_extrl_i64_i32(cpu_VF, vf_64);
 685
 686         tcg_gen_mov_i64(dest, result);
 687
 688         tcg_temp_free_i64(tmp);
 689         tcg_temp_free_i64(vf_64);
 690         tcg_temp_free_i64(cf_64);
 691         tcg_temp_free_i64(result);
 692     } else {
 693         TCGv_i32 t0_32, t1_32, tmp;
 694         t0_32 = tcg_temp_new_i32();
 695         t1_32 = tcg_temp_new_i32();
 696         tmp = tcg_const_i32(0);
 697
 698         tcg_gen_extrl_i64_i32(t0_32, t0);
 699         tcg_gen_extrl_i64_i32(t1_32, t1);
 700         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 701         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 702
 703         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 704         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 705         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 706         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 707         tcg_gen_extu_i32_i64(dest, cpu_NF);
 708
 709         tcg_temp_free_i32(tmp);
 710         tcg_temp_free_i32(t1_32);
 711         tcg_temp_free_i32(t0_32);
 712     }
 713 }
 714
 715 /*
 716  * Load/Store generators
 717  */
 718
 719 /*
 720  * Store from GPR register to memory.
 721  */
 722 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 723                              TCGv_i64 tcg_addr, int size, int memidx)
 724 {
 725     g_assert(size <= 3);
 726     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
 727 }
 728
 729 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 730                       TCGv_i64 tcg_addr, int size)
 731 {
 732     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 733 }
 734
 735 /*
 736  * Load from memory to GPR register
 737  */
 738 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 739                              int size, bool is_signed, bool extend, int memidx)
 740 {
 741     TCGMemOp memop = MO_TE + size;
 742
 743     g_assert(size <= 3);
 744
 745     if (is_signed) {
 746         memop += MO_SIGN;
 747     }
 748
 749     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 750
 751     if (extend && is_signed) {
 752         g_assert(size < 3);
 753         tcg_gen_ext32u_i64(dest, dest);
 754     }
 755 }
 756
 757 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 758                       int size, bool is_signed, bool extend)
 759 {
 760     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 761                      get_mem_index(s));
 762 }
 763
 764 /*
 765  * Store from FP register to memory
 766  */
 767 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 768 {
 769     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 770     TCGv_i64 tmp = tcg_temp_new_i64();
 771     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 772     if (size < 4) {
 773         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
 774     } else {
 775         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 776         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
 777         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 778         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 779         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 780         tcg_temp_free_i64(tcg_hiaddr);
 781     }
 782
 783     tcg_temp_free_i64(tmp);
 784 }
 785
 786 /*
 787  * Load from memory to FP register
 788  */
 789 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 790 {
 791     /* This always zero-extends and writes to a full 128 bit wide vector */
 792     TCGv_i64 tmplo = tcg_temp_new_i64();
 793     TCGv_i64 tmphi;
 794
 795     if (size < 4) {
 796         TCGMemOp memop = MO_TE + size;
 797         tmphi = tcg_const_i64(0);
 798         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 799     } else {
 800         TCGv_i64 tcg_hiaddr;
 801         tmphi = tcg_temp_new_i64();
 802         tcg_hiaddr = tcg_temp_new_i64();
 803
 804         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
 805         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 806         tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 807         tcg_temp_free_i64(tcg_hiaddr);
 808     }
 809
 810     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 811     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 812
 813     tcg_temp_free_i64(tmplo);
 814     tcg_temp_free_i64(tmphi);
 815 }
 816
 817 /*
 818  * Vector load/store helpers.
 819  *
 820  * The principal difference between this and a FP load is that we don't
 821  * zero extend as we are filling a partial chunk of the vector register.
 822  * These functions don't support 128 bit loads/stores, which would be
 823  * normal load/store operations.
 824  *
 825  * The _i32 versions are useful when operating on 32 bit quantities
 826  * (eg for floating point single or using Neon helper functions).
 827  */
 828
 829 /* Get value of an element within a vector register */
 830 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 831                              int element, TCGMemOp memop)
 832 {
 833     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 834     switch (memop) {
 835     case MO_8:
 836         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 837         break;
 838     case MO_16:
 839         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 840         break;
 841     case MO_32:
 842         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 843         break;
 844     case MO_8|MO_SIGN:
 845         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 846         break;
 847     case MO_16|MO_SIGN:
 848         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 849         break;
 850     case MO_32|MO_SIGN:
 851         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 852         break;
 853     case MO_64:
 854     case MO_64|MO_SIGN:
 855         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 856         break;
 857     default:
 858         g_assert_not_reached();
 859     }
 860 }
 861
 862 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 863                                  int element, TCGMemOp memop)
 864 {
 865     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 866     switch (memop) {
 867     case MO_8:
 868         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 869         break;
 870     case MO_16:
 871         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 872         break;
 873     case MO_8|MO_SIGN:
 874         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 875         break;
 876     case MO_16|MO_SIGN:
 877         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 878         break;
 879     case MO_32:
 880     case MO_32|MO_SIGN:
 881         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 882         break;
 883     default:
 884         g_assert_not_reached();
 885     }
 886 }
 887
 888 /* Set value of an element within a vector register */
 889 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 890                               int element, TCGMemOp memop)
 891 {
 892     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 893     switch (memop) {
 894     case MO_8:
 895         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 896         break;
 897     case MO_16:
 898         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 899         break;
 900     case MO_32:
 901         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 902         break;
 903     case MO_64:
 904         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 905         break;
 906     default:
 907         g_assert_not_reached();
 908     }
 909 }
 910
 911 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 912                                   int destidx, int element, TCGMemOp memop)
 913 {
 914     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 915     switch (memop) {
 916     case MO_8:
 917         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 918         break;
 919     case MO_16:
 920         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 921         break;
 922     case MO_32:
 923         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 924         break;
 925     default:
 926         g_assert_not_reached();
 927     }
 928 }
 929
 930 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 931  * vector ops all need to do this).
 932  */
 933 static void clear_vec_high(DisasContext *s, int rd)
 934 {
 935     TCGv_i64 tcg_zero = tcg_const_i64(0);
 936
 937     write_vec_element(s, tcg_zero, rd, 1, MO_64);
 938     tcg_temp_free_i64(tcg_zero);
 939 }
 940
 941 /* Store from vector register to memory */
 942 static void do_vec_st(DisasContext *s, int srcidx, int element,
 943                       TCGv_i64 tcg_addr, int size)
 944 {
 945     TCGMemOp memop = MO_TE + size;
 946     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 947
 948     read_vec_element(s, tcg_tmp, srcidx, element, size);
 949     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 950
 951     tcg_temp_free_i64(tcg_tmp);
 952 }
 953
 954 /* Load from memory to vector register */
 955 static void do_vec_ld(DisasContext *s, int destidx, int element,
 956                       TCGv_i64 tcg_addr, int size)
 957 {
 958     TCGMemOp memop = MO_TE + size;
 959     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 960
 961     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 962     write_vec_element(s, tcg_tmp, destidx, element, size);
 963
 964     tcg_temp_free_i64(tcg_tmp);
 965 }
 966
 967 /* Check that FP/Neon access is enabled. If it is, return
 968  * true. If not, emit code to generate an appropriate exception,
 969  * and return false; the caller should not emit any code for
 970  * the instruction. Note that this check must happen after all
 971  * unallocated-encoding checks (otherwise the syndrome information
 972  * for the resulting exception will be incorrect).
 973  */
 974 static inline bool fp_access_check(DisasContext *s)
 975 {
 976     assert(!s->fp_access_checked);
 977     s->fp_access_checked = true;
 978
 979     if (!s->fp_excp_el) {
 980         return true;
 981     }
 982
 983     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
 984                        s->fp_excp_el);
 985     return false;
 986 }
 987
 988 /*
 989  * This utility function is for doing register extension with an
 990  * optional shift. You will likely want to pass a temporary for the
 991  * destination register. See DecodeRegExtend() in the ARM ARM.
 992  */
 993 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
 994                               int option, unsigned int shift)
 995 {
 996     int extsize = extract32(option, 0, 2);
 997     bool is_signed = extract32(option, 2, 1);
 998
 999     if (is_signed) {
1000         switch (extsize) {
1001         case 0:
1002             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1003             break;
1004         case 1:
1005             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1006             break;
1007         case 2:
1008             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1009             break;
1010         case 3:
1011             tcg_gen_mov_i64(tcg_out, tcg_in);
1012             break;
1013         }
1014     } else {
1015         switch (extsize) {
1016         case 0:
1017             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1018             break;
1019         case 1:
1020             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1021             break;
1022         case 2:
1023             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1024             break;
1025         case 3:
1026             tcg_gen_mov_i64(tcg_out, tcg_in);
1027             break;
1028         }
1029     }
1030
1031     if (shift) {
1032         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1033     }
1034 }
1035
1036 static inline void gen_check_sp_alignment(DisasContext *s)
1037 {
1038     /* The AArch64 architecture mandates that (if enabled via PSTATE
1039      * or SCTLR bits) there is a check that SP is 16-aligned on every
1040      * SP-relative load or store (with an exception generated if it is not).
1041      * In line with general QEMU practice regarding misaligned accesses,
1042      * we omit these checks for the sake of guest program performance.
1043      * This function is provided as a hook so we can more easily add these
1044      * checks in future (possibly as a "favour catching guest program bugs
1045      * over speed" user selectable option).
1046      */
1047 }
1048
1049 /*
1050  * This provides a simple table based table lookup decoder. It is
1051  * intended to be used when the relevant bits for decode are too
1052  * awkwardly placed and switch/if based logic would be confusing and
1053  * deeply nested. Since it's a linear search through the table, tables
1054  * should be kept small.
1055  *
1056  * It returns the first handler where insn & mask == pattern, or
1057  * NULL if there is no match.
1058  * The table is terminated by an empty mask (i.e. 0)
1059  */
1060 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1061                                                uint32_t insn)
1062 {
1063     const AArch64DecodeTable *tptr = table;
1064
1065     while (tptr->mask) {
1066         if ((insn & tptr->mask) == tptr->pattern) {
1067             return tptr->disas_fn;
1068         }
1069         tptr++;
1070     }
1071     return NULL;
1072 }
1073
1074 /*
1075  * the instruction disassembly implemented here matches
1076  * the instruction encoding classifications in chapter 3 (C3)
1077  * of the ARM Architecture Reference Manual (DDI0487A_a)
1078  */
1079
1080 /* C3.2.7 Unconditional branch (immediate)
1081  *   31  30       26 25                                  0
1082  * +----+-----------+-------------------------------------+
1083  * | op | 0 0 1 0 1 |                 imm26               |
1084  * +----+-----------+-------------------------------------+
1085  */
1086 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1087 {
1088     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1089
1090     if (insn & (1U << 31)) {
1091         /* C5.6.26 BL Branch with link */
1092         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1093     }
1094
1095     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1096     gen_goto_tb(s, 0, addr);
1097 }
1098
1099 /* C3.2.1 Compare & branch (immediate)
1100  *   31  30         25  24  23                  5 4      0
1101  * +----+-------------+----+---------------------+--------+
1102  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1103  * +----+-------------+----+---------------------+--------+
1104  */
1105 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1106 {
1107     unsigned int sf, op, rt;
1108     uint64_t addr;
1109     TCGLabel *label_match;
1110     TCGv_i64 tcg_cmp;
1111
1112     sf = extract32(insn, 31, 1);
1113     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1114     rt = extract32(insn, 0, 5);
1115     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1116
1117     tcg_cmp = read_cpu_reg(s, rt, sf);
1118     label_match = gen_new_label();
1119
1120     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1121                         tcg_cmp, 0, label_match);
1122
1123     gen_goto_tb(s, 0, s->pc);
1124     gen_set_label(label_match);
1125     gen_goto_tb(s, 1, addr);
1126 }
1127
1128 /* C3.2.5 Test & branch (immediate)
1129  *   31  30         25  24  23   19 18          5 4    0
1130  * +----+-------------+----+-------+-------------+------+
1131  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1132  * +----+-------------+----+-------+-------------+------+
1133  */
1134 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1135 {
1136     unsigned int bit_pos, op, rt;
1137     uint64_t addr;
1138     TCGLabel *label_match;
1139     TCGv_i64 tcg_cmp;
1140
1141     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1142     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1143     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1144     rt = extract32(insn, 0, 5);
1145
1146     tcg_cmp = tcg_temp_new_i64();
1147     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1148     label_match = gen_new_label();
1149     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1150                         tcg_cmp, 0, label_match);
1151     tcg_temp_free_i64(tcg_cmp);
1152     gen_goto_tb(s, 0, s->pc);
1153     gen_set_label(label_match);
1154     gen_goto_tb(s, 1, addr);
1155 }
1156
1157 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1158  *  31           25  24  23                  5   4  3    0
1159  * +---------------+----+---------------------+----+------+
1160  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1161  * +---------------+----+---------------------+----+------+
1162  */
1163 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1164 {
1165     unsigned int cond;
1166     uint64_t addr;
1167
1168     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1169         unallocated_encoding(s);
1170         return;
1171     }
1172     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1173     cond = extract32(insn, 0, 4);
1174
1175     if (cond < 0x0e) {
1176         /* genuinely conditional branches */
1177         TCGLabel *label_match = gen_new_label();
1178         arm_gen_test_cc(cond, label_match);
1179         gen_goto_tb(s, 0, s->pc);
1180         gen_set_label(label_match);
1181         gen_goto_tb(s, 1, addr);
1182     } else {
1183         /* 0xe and 0xf are both "always" conditions */
1184         gen_goto_tb(s, 0, addr);
1185     }
1186 }
1187
1188 /* C5.6.68 HINT */
1189 static void handle_hint(DisasContext *s, uint32_t insn,
1190                         unsigned int op1, unsigned int op2, unsigned int crm)
1191 {
1192     unsigned int selector = crm << 3 | op2;
1193
1194     if (op1 != 3) {
1195         unallocated_encoding(s);
1196         return;
1197     }
1198
1199     switch (selector) {
1200     case 0: /* NOP */
1201         return;
1202     case 3: /* WFI */
1203         s->is_jmp = DISAS_WFI;
1204         return;
1205     case 1: /* YIELD */
1206         s->is_jmp = DISAS_YIELD;
1207         return;
1208     case 2: /* WFE */
1209         s->is_jmp = DISAS_WFE;
1210         return;
1211     case 4: /* SEV */
1212     case 5: /* SEVL */
1213         /* we treat all as NOP at least for now */
1214         return;
1215     default:
1216         /* default specified as NOP equivalent */
1217         return;
1218     }
1219 }
1220
1221 static void gen_clrex(DisasContext *s, uint32_t insn)
1222 {
1223     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1224 }
1225
1226 /* CLREX, DSB, DMB, ISB */
1227 static void handle_sync(DisasContext *s, uint32_t insn,
1228                         unsigned int op1, unsigned int op2, unsigned int crm)
1229 {
1230     if (op1 != 3) {
1231         unallocated_encoding(s);
1232         return;
1233     }
1234
1235     switch (op2) {
1236     case 2: /* CLREX */
1237         gen_clrex(s, insn);
1238         return;
1239     case 4: /* DSB */
1240     case 5: /* DMB */
1241     case 6: /* ISB */
1242         /* We don't emulate caches so barriers are no-ops */
1243         return;
1244     default:
1245         unallocated_encoding(s);
1246         return;
1247     }
1248 }
1249
1250 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1251 static void handle_msr_i(DisasContext *s, uint32_t insn,
1252                          unsigned int op1, unsigned int op2, unsigned int crm)
1253 {
1254     int op = op1 << 3 | op2;
1255     switch (op) {
1256     case 0x05: /* SPSel */
1257         if (s->current_el == 0) {
1258             unallocated_encoding(s);
1259             return;
1260         }
1261         /* fall through */
1262     case 0x1e: /* DAIFSet */
1263     case 0x1f: /* DAIFClear */
1264     {
1265         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1266         TCGv_i32 tcg_op = tcg_const_i32(op);
1267         gen_a64_set_pc_im(s->pc - 4);
1268         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1269         tcg_temp_free_i32(tcg_imm);
1270         tcg_temp_free_i32(tcg_op);
1271         s->is_jmp = DISAS_UPDATE;
1272         break;
1273     }
1274     default:
1275         unallocated_encoding(s);
1276         return;
1277     }
1278 }
1279
1280 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1281 {
1282     TCGv_i32 tmp = tcg_temp_new_i32();
1283     TCGv_i32 nzcv = tcg_temp_new_i32();
1284
1285     /* build bit 31, N */
1286     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1287     /* build bit 30, Z */
1288     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1289     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1290     /* build bit 29, C */
1291     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1292     /* build bit 28, V */
1293     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1294     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1295     /* generate result */
1296     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1297
1298     tcg_temp_free_i32(nzcv);
1299     tcg_temp_free_i32(tmp);
1300 }
1301
1302 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1303
1304 {
1305     TCGv_i32 nzcv = tcg_temp_new_i32();
1306
1307     /* take NZCV from R[t] */
1308     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1309
1310     /* bit 31, N */
1311     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1312     /* bit 30, Z */
1313     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1314     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1315     /* bit 29, C */
1316     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1317     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1318     /* bit 28, V */
1319     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1320     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1321     tcg_temp_free_i32(nzcv);
1322 }
1323
1324 /* C5.6.129 MRS - move from system register
1325  * C5.6.131 MSR (register) - move to system register
1326  * C5.6.204 SYS
1327  * C5.6.205 SYSL
1328  * These are all essentially the same insn in 'read' and 'write'
1329  * versions, with varying op0 fields.
1330  */
1331 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1332                        unsigned int op0, unsigned int op1, unsigned int op2,
1333                        unsigned int crn, unsigned int crm, unsigned int rt)
1334 {
1335     const ARMCPRegInfo *ri;
1336     TCGv_i64 tcg_rt;
1337
1338     ri = get_arm_cp_reginfo(s->cp_regs,
1339                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1340                                                crn, crm, op0, op1, op2));
1341
1342     if (!ri) {
1343         /* Unknown register; this might be a guest error or a QEMU
1344          * unimplemented feature.
1345          */
1346         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1347                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1348                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1349         unallocated_encoding(s);
1350         return;
1351     }
1352
1353     /* Check access permissions */
1354     if (!cp_access_ok(s->current_el, ri, isread)) {
1355         unallocated_encoding(s);
1356         return;
1357     }
1358
1359     if (ri->accessfn) {
1360         /* Emit code to perform further access permissions checks at
1361          * runtime; this may result in an exception.
1362          */
1363         TCGv_ptr tmpptr;
1364         TCGv_i32 tcg_syn;
1365         uint32_t syndrome;
1366
1367         gen_a64_set_pc_im(s->pc - 4);
1368         tmpptr = tcg_const_ptr(ri);
1369         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1370         tcg_syn = tcg_const_i32(syndrome);
1371         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
1372         tcg_temp_free_ptr(tmpptr);
1373         tcg_temp_free_i32(tcg_syn);
1374     }
1375
1376     /* Handle special cases first */
1377     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1378     case ARM_CP_NOP:
1379         return;
1380     case ARM_CP_NZCV:
1381         tcg_rt = cpu_reg(s, rt);
1382         if (isread) {
1383             gen_get_nzcv(tcg_rt);
1384         } else {
1385             gen_set_nzcv(tcg_rt);
1386         }
1387         return;
1388     case ARM_CP_CURRENTEL:
1389         /* Reads as current EL value from pstate, which is
1390          * guaranteed to be constant by the tb flags.
1391          */
1392         tcg_rt = cpu_reg(s, rt);
1393         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1394         return;
1395     case ARM_CP_DC_ZVA:
1396         /* Writes clear the aligned block of memory which rt points into. */
1397         tcg_rt = cpu_reg(s, rt);
1398         gen_helper_dc_zva(cpu_env, tcg_rt);
1399         return;
1400     default:
1401         break;
1402     }
1403
1404     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1405         gen_io_start();
1406     }
1407
1408     tcg_rt = cpu_reg(s, rt);
1409
1410     if (isread) {
1411         if (ri->type & ARM_CP_CONST) {
1412             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1413         } else if (ri->readfn) {
1414             TCGv_ptr tmpptr;
1415             tmpptr = tcg_const_ptr(ri);
1416             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1417             tcg_temp_free_ptr(tmpptr);
1418         } else {
1419             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1420         }
1421     } else {
1422         if (ri->type & ARM_CP_CONST) {
1423             /* If not forbidden by access permissions, treat as WI */
1424             return;
1425         } else if (ri->writefn) {
1426             TCGv_ptr tmpptr;
1427             tmpptr = tcg_const_ptr(ri);
1428             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1429             tcg_temp_free_ptr(tmpptr);
1430         } else {
1431             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1432         }
1433     }
1434
1435     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1436         /* I/O operations must end the TB here (whether read or write) */
1437         gen_io_end();
1438         s->is_jmp = DISAS_UPDATE;
1439     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1440         /* We default to ending the TB on a coprocessor register write,
1441          * but allow this to be suppressed by the register definition
1442          * (usually only necessary to work around guest bugs).
1443          */
1444         s->is_jmp = DISAS_UPDATE;
1445     }
1446 }
1447
1448 /* C3.2.4 System
1449  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1450  * +---------------------+---+-----+-----+-------+-------+-----+------+
1451  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1452  * +---------------------+---+-----+-----+-------+-------+-----+------+
1453  */
1454 static void disas_system(DisasContext *s, uint32_t insn)
1455 {
1456     unsigned int l, op0, op1, crn, crm, op2, rt;
1457     l = extract32(insn, 21, 1);
1458     op0 = extract32(insn, 19, 2);
1459     op1 = extract32(insn, 16, 3);
1460     crn = extract32(insn, 12, 4);
1461     crm = extract32(insn, 8, 4);
1462     op2 = extract32(insn, 5, 3);
1463     rt = extract32(insn, 0, 5);
1464
1465     if (op0 == 0) {
1466         if (l || rt != 31) {
1467             unallocated_encoding(s);
1468             return;
1469         }
1470         switch (crn) {
1471         case 2: /* C5.6.68 HINT */
1472             handle_hint(s, insn, op1, op2, crm);
1473             break;
1474         case 3: /* CLREX, DSB, DMB, ISB */
1475             handle_sync(s, insn, op1, op2, crm);
1476             break;
1477         case 4: /* C5.6.130 MSR (immediate) */
1478             handle_msr_i(s, insn, op1, op2, crm);
1479             break;
1480         default:
1481             unallocated_encoding(s);
1482             break;
1483         }
1484         return;
1485     }
1486     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1487 }
1488
1489 /* C3.2.3 Exception generation
1490  *
1491  *  31             24 23 21 20                     5 4   2 1  0
1492  * +-----------------+-----+------------------------+-----+----+
1493  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1494  * +-----------------------+------------------------+----------+
1495  */
1496 static void disas_exc(DisasContext *s, uint32_t insn)
1497 {
1498     int opc = extract32(insn, 21, 3);
1499     int op2_ll = extract32(insn, 0, 5);
1500     int imm16 = extract32(insn, 5, 16);
1501     TCGv_i32 tmp;
1502
1503     switch (opc) {
1504     case 0:
1505         /* For SVC, HVC and SMC we advance the single-step state
1506          * machine before taking the exception. This is architecturally
1507          * mandated, to ensure that single-stepping a system call
1508          * instruction works properly.
1509          */
1510         switch (op2_ll) {
1511         case 1:
1512             gen_ss_advance(s);
1513             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1514                                default_exception_el(s));
1515             break;
1516         case 2:
1517             if (s->current_el == 0) {
1518                 unallocated_encoding(s);
1519                 break;
1520             }
1521             /* The pre HVC helper handles cases when HVC gets trapped
1522              * as an undefined insn by runtime configuration.
1523              */
1524             gen_a64_set_pc_im(s->pc - 4);
1525             gen_helper_pre_hvc(cpu_env);
1526             gen_ss_advance(s);
1527             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1528             break;
1529         case 3:
1530             if (s->current_el == 0) {
1531                 unallocated_encoding(s);
1532                 break;
1533             }
1534             gen_a64_set_pc_im(s->pc - 4);
1535             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1536             gen_helper_pre_smc(cpu_env, tmp);
1537             tcg_temp_free_i32(tmp);
1538             gen_ss_advance(s);
1539             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1540             break;
1541         default:
1542             unallocated_encoding(s);
1543             break;
1544         }
1545         break;
1546     case 1:
1547         if (op2_ll != 0) {
1548             unallocated_encoding(s);
1549             break;
1550         }
1551         /* BRK */
1552         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1553                            default_exception_el(s));
1554         break;
1555     case 2:
1556         if (op2_ll != 0) {
1557             unallocated_encoding(s);
1558             break;
1559         }
1560         /* HLT. This has two purposes.
1561          * Architecturally, it is an external halting debug instruction.
1562          * Since QEMU doesn't implement external debug, we treat this as
1563          * it is required for halting debug disabled: it will UNDEF.
1564          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1565          */
1566         if (semihosting_enabled() && imm16 == 0xf000) {
1567 #ifndef CONFIG_USER_ONLY
1568             /* In system mode, don't allow userspace access to semihosting,
1569              * to provide some semblance of security (and for consistency
1570              * with our 32-bit semihosting).
1571              */
1572             if (s->current_el == 0) {
1573                 unsupported_encoding(s, insn);
1574                 break;
1575             }
1576 #endif
1577             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1578         } else {
1579             unsupported_encoding(s, insn);
1580         }
1581         break;
1582     case 5:
1583         if (op2_ll < 1 || op2_ll > 3) {
1584             unallocated_encoding(s);
1585             break;
1586         }
1587         /* DCPS1, DCPS2, DCPS3 */
1588         unsupported_encoding(s, insn);
1589         break;
1590     default:
1591         unallocated_encoding(s);
1592         break;
1593     }
1594 }
1595
1596 /* C3.2.7 Unconditional branch (register)
1597  *  31           25 24   21 20   16 15   10 9    5 4     0
1598  * +---------------+-------+-------+-------+------+-------+
1599  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1600  * +---------------+-------+-------+-------+------+-------+
1601  */
1602 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1603 {
1604     unsigned int opc, op2, op3, rn, op4;
1605
1606     opc = extract32(insn, 21, 4);
1607     op2 = extract32(insn, 16, 5);
1608     op3 = extract32(insn, 10, 6);
1609     rn = extract32(insn, 5, 5);
1610     op4 = extract32(insn, 0, 5);
1611
1612     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1613         unallocated_encoding(s);
1614         return;
1615     }
1616
1617     switch (opc) {
1618     case 0: /* BR */
1619     case 2: /* RET */
1620         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1621         break;
1622     case 1: /* BLR */
1623         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1624         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1625         break;
1626     case 4: /* ERET */
1627         if (s->current_el == 0) {
1628             unallocated_encoding(s);
1629             return;
1630         }
1631         gen_helper_exception_return(cpu_env);
1632         s->is_jmp = DISAS_JUMP;
1633         return;
1634     case 5: /* DRPS */
1635         if (rn != 0x1f) {
1636             unallocated_encoding(s);
1637         } else {
1638             unsupported_encoding(s, insn);
1639         }
1640         return;
1641     default:
1642         unallocated_encoding(s);
1643         return;
1644     }
1645
1646     s->is_jmp = DISAS_JUMP;
1647 }
1648
1649 /* C3.2 Branches, exception generating and system instructions */
1650 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1651 {
1652     switch (extract32(insn, 25, 7)) {
1653     case 0x0a: case 0x0b:
1654     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1655         disas_uncond_b_imm(s, insn);
1656         break;
1657     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1658         disas_comp_b_imm(s, insn);
1659         break;
1660     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1661         disas_test_b_imm(s, insn);
1662         break;
1663     case 0x2a: /* Conditional branch (immediate) */
1664         disas_cond_b_imm(s, insn);
1665         break;
1666     case 0x6a: /* Exception generation / System */
1667         if (insn & (1 << 24)) {
1668             disas_system(s, insn);
1669         } else {
1670             disas_exc(s, insn);
1671         }
1672         break;
1673     case 0x6b: /* Unconditional branch (register) */
1674         disas_uncond_b_reg(s, insn);
1675         break;
1676     default:
1677         unallocated_encoding(s);
1678         break;
1679     }
1680 }
1681
1682 /*
1683  * Load/Store exclusive instructions are implemented by remembering
1684  * the value/address loaded, and seeing if these are the same
1685  * when the store is performed. This is not actually the architecturally
1686  * mandated semantics, but it works for typical guest code sequences
1687  * and avoids having to monitor regular stores.
1688  *
1689  * In system emulation mode only one CPU will be running at once, so
1690  * this sequence is effectively atomic.  In user emulation mode we
1691  * throw an exception and handle the atomic operation elsewhere.
1692  */
1693 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1694                                TCGv_i64 addr, int size, bool is_pair)
1695 {
1696     TCGv_i64 tmp = tcg_temp_new_i64();
1697     TCGMemOp memop = MO_TE + size;
1698
1699     g_assert(size <= 3);
1700     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1701
1702     if (is_pair) {
1703         TCGv_i64 addr2 = tcg_temp_new_i64();
1704         TCGv_i64 hitmp = tcg_temp_new_i64();
1705
1706         g_assert(size >= 2);
1707         tcg_gen_addi_i64(addr2, addr, 1 << size);
1708         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1709         tcg_temp_free_i64(addr2);
1710         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1711         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1712         tcg_temp_free_i64(hitmp);
1713     }
1714
1715     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1716     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1717
1718     tcg_temp_free_i64(tmp);
1719     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1720 }
1721
1722 #ifdef CONFIG_USER_ONLY
1723 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1724                                 TCGv_i64 addr, int size, int is_pair)
1725 {
1726     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1727     tcg_gen_movi_i32(cpu_exclusive_info,
1728                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1729     gen_exception_internal_insn(s, 4, EXCP_STREX);
1730 }
1731 #else
1732 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1733                                 TCGv_i64 inaddr, int size, int is_pair)
1734 {
1735     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1736      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1737      *     [addr] = {Rt};
1738      *     if (is_pair) {
1739      *         [addr + datasize] = {Rt2};
1740      *     }
1741      *     {Rd} = 0;
1742      * } else {
1743      *     {Rd} = 1;
1744      * }
1745      * env->exclusive_addr = -1;
1746      */
1747     TCGLabel *fail_label = gen_new_label();
1748     TCGLabel *done_label = gen_new_label();
1749     TCGv_i64 addr = tcg_temp_local_new_i64();
1750     TCGv_i64 tmp;
1751
1752     /* Copy input into a local temp so it is not trashed when the
1753      * basic block ends at the branch insn.
1754      */
1755     tcg_gen_mov_i64(addr, inaddr);
1756     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1757
1758     tmp = tcg_temp_new_i64();
1759     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1760     tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1761     tcg_temp_free_i64(tmp);
1762
1763     if (is_pair) {
1764         TCGv_i64 addrhi = tcg_temp_new_i64();
1765         TCGv_i64 tmphi = tcg_temp_new_i64();
1766
1767         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1768         tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1769         tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1770
1771         tcg_temp_free_i64(tmphi);
1772         tcg_temp_free_i64(addrhi);
1773     }
1774
1775     /* We seem to still have the exclusive monitor, so do the store */
1776     tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1777     if (is_pair) {
1778         TCGv_i64 addrhi = tcg_temp_new_i64();
1779
1780         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1781         tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1782                             get_mem_index(s), MO_TE + size);
1783         tcg_temp_free_i64(addrhi);
1784     }
1785
1786     tcg_temp_free_i64(addr);
1787
1788     tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1789     tcg_gen_br(done_label);
1790     gen_set_label(fail_label);
1791     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1792     gen_set_label(done_label);
1793     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1794
1795 }
1796 #endif
1797
1798 /* C3.3.6 Load/store exclusive
1799  *
1800  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1801  * +-----+-------------+----+---+----+------+----+-------+------+------+
1802  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1803  * +-----+-------------+----+---+----+------+----+-------+------+------+
1804  *
1805  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1806  *   L: 0 -> store, 1 -> load
1807  *  o2: 0 -> exclusive, 1 -> not
1808  *  o1: 0 -> single register, 1 -> register pair
1809  *  o0: 1 -> load-acquire/store-release, 0 -> not
1810  *
1811  *  o0 == 0 AND o2 == 1 is un-allocated
1812  *  o1 == 1 is un-allocated except for 32 and 64 bit sizes
1813  */
1814 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1815 {
1816     int rt = extract32(insn, 0, 5);
1817     int rn = extract32(insn, 5, 5);
1818     int rt2 = extract32(insn, 10, 5);
1819     int is_lasr = extract32(insn, 15, 1);
1820     int rs = extract32(insn, 16, 5);
1821     int is_pair = extract32(insn, 21, 1);
1822     int is_store = !extract32(insn, 22, 1);
1823     int is_excl = !extract32(insn, 23, 1);
1824     int size = extract32(insn, 30, 2);
1825     TCGv_i64 tcg_addr;
1826
1827     if ((!is_excl && !is_lasr) ||
1828         (is_pair && size < 2)) {
1829         unallocated_encoding(s);
1830         return;
1831     }
1832
1833     if (rn == 31) {
1834         gen_check_sp_alignment(s);
1835     }
1836     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1837
1838     /* Note that since TCG is single threaded load-acquire/store-release
1839      * semantics require no extra if (is_lasr) { ... } handling.
1840      */
1841
1842     if (is_excl) {
1843         if (!is_store) {
1844             s->is_ldex = true;
1845             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1846         } else {
1847             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1848         }
1849     } else {
1850         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1851         if (is_store) {
1852             do_gpr_st(s, tcg_rt, tcg_addr, size);
1853         } else {
1854             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1855         }
1856         if (is_pair) {
1857             TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1858             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1859             if (is_store) {
1860                 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1861             } else {
1862                 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1863             }
1864         }
1865     }
1866 }
1867
1868 /*
1869  * C3.3.5 Load register (literal)
1870  *
1871  *  31 30 29   27  26 25 24 23                5 4     0
1872  * +-----+-------+---+-----+-------------------+-------+
1873  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1874  * +-----+-------+---+-----+-------------------+-------+
1875  *
1876  * V: 1 -> vector (simd/fp)
1877  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1878  *                   10-> 32 bit signed, 11 -> prefetch
1879  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1880  */
1881 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1882 {
1883     int rt = extract32(insn, 0, 5);
1884     int64_t imm = sextract32(insn, 5, 19) << 2;
1885     bool is_vector = extract32(insn, 26, 1);
1886     int opc = extract32(insn, 30, 2);
1887     bool is_signed = false;
1888     int size = 2;
1889     TCGv_i64 tcg_rt, tcg_addr;
1890
1891     if (is_vector) {
1892         if (opc == 3) {
1893             unallocated_encoding(s);
1894             return;
1895         }
1896         size = 2 + opc;
1897         if (!fp_access_check(s)) {
1898             return;
1899         }
1900     } else {
1901         if (opc == 3) {
1902             /* PRFM (literal) : prefetch */
1903             return;
1904         }
1905         size = 2 + extract32(opc, 0, 1);
1906         is_signed = extract32(opc, 1, 1);
1907     }
1908
1909     tcg_rt = cpu_reg(s, rt);
1910
1911     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1912     if (is_vector) {
1913         do_fp_ld(s, rt, tcg_addr, size);
1914     } else {
1915         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1916     }
1917     tcg_temp_free_i64(tcg_addr);
1918 }
1919
1920 /*
1921  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1922  * C5.6.81 LDP (Load Pair - non vector)
1923  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1924  * C5.6.176 STNP (Store Pair - non-temporal hint)
1925  * C5.6.177 STP (Store Pair - non vector)
1926  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1927  * C6.3.165 LDP (Load Pair of SIMD&FP)
1928  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1929  * C6.3.284 STP (Store Pair of SIMD&FP)
1930  *
1931  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1932  * +-----+-------+---+---+-------+---+-----------------------------+
1933  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1934  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1935  *
1936  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1937  *      LDPSW                    01
1938  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1939  *   V: 0 -> GPR, 1 -> Vector
1940  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1941  *      10 -> signed offset, 11 -> pre-index
1942  *   L: 0 -> Store 1 -> Load
1943  *
1944  * Rt, Rt2 = GPR or SIMD registers to be stored
1945  * Rn = general purpose register containing address
1946  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1947  */
1948 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1949 {
1950     int rt = extract32(insn, 0, 5);
1951     int rn = extract32(insn, 5, 5);
1952     int rt2 = extract32(insn, 10, 5);
1953     uint64_t offset = sextract64(insn, 15, 7);
1954     int index = extract32(insn, 23, 2);
1955     bool is_vector = extract32(insn, 26, 1);
1956     bool is_load = extract32(insn, 22, 1);
1957     int opc = extract32(insn, 30, 2);
1958
1959     bool is_signed = false;
1960     bool postindex = false;
1961     bool wback = false;
1962
1963     TCGv_i64 tcg_addr; /* calculated address */
1964     int size;
1965
1966     if (opc == 3) {
1967         unallocated_encoding(s);
1968         return;
1969     }
1970
1971     if (is_vector) {
1972         size = 2 + opc;
1973     } else {
1974         size = 2 + extract32(opc, 1, 1);
1975         is_signed = extract32(opc, 0, 1);
1976         if (!is_load && is_signed) {
1977             unallocated_encoding(s);
1978             return;
1979         }
1980     }
1981
1982     switch (index) {
1983     case 1: /* post-index */
1984         postindex = true;
1985         wback = true;
1986         break;
1987     case 0:
1988         /* signed offset with "non-temporal" hint. Since we don't emulate
1989          * caches we don't care about hints to the cache system about
1990          * data access patterns, and handle this identically to plain
1991          * signed offset.
1992          */
1993         if (is_signed) {
1994             /* There is no non-temporal-hint version of LDPSW */
1995             unallocated_encoding(s);
1996             return;
1997         }
1998         postindex = false;
1999         break;
2000     case 2: /* signed offset, rn not updated */
2001         postindex = false;
2002         break;
2003     case 3: /* pre-index */
2004         postindex = false;
2005         wback = true;
2006         break;
2007     }
2008
2009     if (is_vector && !fp_access_check(s)) {
2010         return;
2011     }
2012
2013     offset <<= size;
2014
2015     if (rn == 31) {
2016         gen_check_sp_alignment(s);
2017     }
2018
2019     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2020
2021     if (!postindex) {
2022         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2023     }
2024
2025     if (is_vector) {
2026         if (is_load) {
2027             do_fp_ld(s, rt, tcg_addr, size);
2028         } else {
2029             do_fp_st(s, rt, tcg_addr, size);
2030         }
2031     } else {
2032         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2033         if (is_load) {
2034             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2035         } else {
2036             do_gpr_st(s, tcg_rt, tcg_addr, size);
2037         }
2038     }
2039     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2040     if (is_vector) {
2041         if (is_load) {
2042             do_fp_ld(s, rt2, tcg_addr, size);
2043         } else {
2044             do_fp_st(s, rt2, tcg_addr, size);
2045         }
2046     } else {
2047         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2048         if (is_load) {
2049             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2050         } else {
2051             do_gpr_st(s, tcg_rt2, tcg_addr, size);
2052         }
2053     }
2054
2055     if (wback) {
2056         if (postindex) {
2057             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2058         } else {
2059             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2060         }
2061         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2062     }
2063 }
2064
2065 /*
2066  * C3.3.8 Load/store (immediate post-indexed)
2067  * C3.3.9 Load/store (immediate pre-indexed)
2068  * C3.3.12 Load/store (unscaled immediate)
2069  *
2070  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2071  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2072  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2073  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2074  *
2075  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2076          10 -> unprivileged
2077  * V = 0 -> non-vector
2078  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2079  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2080  */
2081 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2082 {
2083     int rt = extract32(insn, 0, 5);
2084     int rn = extract32(insn, 5, 5);
2085     int imm9 = sextract32(insn, 12, 9);
2086     int opc = extract32(insn, 22, 2);
2087     int size = extract32(insn, 30, 2);
2088     int idx = extract32(insn, 10, 2);
2089     bool is_signed = false;
2090     bool is_store = false;
2091     bool is_extended = false;
2092     bool is_unpriv = (idx == 2);
2093     bool is_vector = extract32(insn, 26, 1);
2094     bool post_index;
2095     bool writeback;
2096
2097     TCGv_i64 tcg_addr;
2098
2099     if (is_vector) {
2100         size |= (opc & 2) << 1;
2101         if (size > 4 || is_unpriv) {
2102             unallocated_encoding(s);
2103             return;
2104         }
2105         is_store = ((opc & 1) == 0);
2106         if (!fp_access_check(s)) {
2107             return;
2108         }
2109     } else {
2110         if (size == 3 && opc == 2) {
2111             /* PRFM - prefetch */
2112             if (is_unpriv) {
2113                 unallocated_encoding(s);
2114                 return;
2115             }
2116             return;
2117         }
2118         if (opc == 3 && size > 1) {
2119             unallocated_encoding(s);
2120             return;
2121         }
2122         is_store = (opc == 0);
2123         is_signed = opc & (1<<1);
2124         is_extended = (size < 3) && (opc & 1);
2125     }
2126
2127     switch (idx) {
2128     case 0:
2129     case 2:
2130         post_index = false;
2131         writeback = false;
2132         break;
2133     case 1:
2134         post_index = true;
2135         writeback = true;
2136         break;
2137     case 3:
2138         post_index = false;
2139         writeback = true;
2140         break;
2141     }
2142
2143     if (rn == 31) {
2144         gen_check_sp_alignment(s);
2145     }
2146     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2147
2148     if (!post_index) {
2149         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2150     }
2151
2152     if (is_vector) {
2153         if (is_store) {
2154             do_fp_st(s, rt, tcg_addr, size);
2155         } else {
2156             do_fp_ld(s, rt, tcg_addr, size);
2157         }
2158     } else {
2159         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2160         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2161
2162         if (is_store) {
2163             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2164         } else {
2165             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2166                              is_signed, is_extended, memidx);
2167         }
2168     }
2169
2170     if (writeback) {
2171         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2172         if (post_index) {
2173             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2174         }
2175         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2176     }
2177 }
2178
2179 /*
2180  * C3.3.10 Load/store (register offset)
2181  *
2182  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2183  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2184  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2185  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2186  *
2187  * For non-vector:
2188  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2189  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2190  * For vector:
2191  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2192  *   opc<0>: 0 -> store, 1 -> load
2193  * V: 1 -> vector/simd
2194  * opt: extend encoding (see DecodeRegExtend)
2195  * S: if S=1 then scale (essentially index by sizeof(size))
2196  * Rt: register to transfer into/out of
2197  * Rn: address register or SP for base
2198  * Rm: offset register or ZR for offset
2199  */
2200 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2201 {
2202     int rt = extract32(insn, 0, 5);
2203     int rn = extract32(insn, 5, 5);
2204     int shift = extract32(insn, 12, 1);
2205     int rm = extract32(insn, 16, 5);
2206     int opc = extract32(insn, 22, 2);
2207     int opt = extract32(insn, 13, 3);
2208     int size = extract32(insn, 30, 2);
2209     bool is_signed = false;
2210     bool is_store = false;
2211     bool is_extended = false;
2212     bool is_vector = extract32(insn, 26, 1);
2213
2214     TCGv_i64 tcg_rm;
2215     TCGv_i64 tcg_addr;
2216
2217     if (extract32(opt, 1, 1) == 0) {
2218         unallocated_encoding(s);
2219         return;
2220     }
2221
2222     if (is_vector) {
2223         size |= (opc & 2) << 1;
2224         if (size > 4) {
2225             unallocated_encoding(s);
2226             return;
2227         }
2228         is_store = !extract32(opc, 0, 1);
2229         if (!fp_access_check(s)) {
2230             return;
2231         }
2232     } else {
2233         if (size == 3 && opc == 2) {
2234             /* PRFM - prefetch */
2235             return;
2236         }
2237         if (opc == 3 && size > 1) {
2238             unallocated_encoding(s);
2239             return;
2240         }
2241         is_store = (opc == 0);
2242         is_signed = extract32(opc, 1, 1);
2243         is_extended = (size < 3) && extract32(opc, 0, 1);
2244     }
2245
2246     if (rn == 31) {
2247         gen_check_sp_alignment(s);
2248     }
2249     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2250
2251     tcg_rm = read_cpu_reg(s, rm, 1);
2252     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2253
2254     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2255
2256     if (is_vector) {
2257         if (is_store) {
2258             do_fp_st(s, rt, tcg_addr, size);
2259         } else {
2260             do_fp_ld(s, rt, tcg_addr, size);
2261         }
2262     } else {
2263         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2264         if (is_store) {
2265             do_gpr_st(s, tcg_rt, tcg_addr, size);
2266         } else {
2267             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2268         }
2269     }
2270 }
2271
2272 /*
2273  * C3.3.13 Load/store (unsigned immediate)
2274  *
2275  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2276  * +----+-------+---+-----+-----+------------+-------+------+
2277  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2278  * +----+-------+---+-----+-----+------------+-------+------+
2279  *
2280  * For non-vector:
2281  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2282  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2283  * For vector:
2284  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2285  *   opc<0>: 0 -> store, 1 -> load
2286  * Rn: base address register (inc SP)
2287  * Rt: target register
2288  */
2289 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2290 {
2291     int rt = extract32(insn, 0, 5);
2292     int rn = extract32(insn, 5, 5);
2293     unsigned int imm12 = extract32(insn, 10, 12);
2294     bool is_vector = extract32(insn, 26, 1);
2295     int size = extract32(insn, 30, 2);
2296     int opc = extract32(insn, 22, 2);
2297     unsigned int offset;
2298
2299     TCGv_i64 tcg_addr;
2300
2301     bool is_store;
2302     bool is_signed = false;
2303     bool is_extended = false;
2304
2305     if (is_vector) {
2306         size |= (opc & 2) << 1;
2307         if (size > 4) {
2308             unallocated_encoding(s);
2309             return;
2310         }
2311         is_store = !extract32(opc, 0, 1);
2312         if (!fp_access_check(s)) {
2313             return;
2314         }
2315     } else {
2316         if (size == 3 && opc == 2) {
2317             /* PRFM - prefetch */
2318             return;
2319         }
2320         if (opc == 3 && size > 1) {
2321             unallocated_encoding(s);
2322             return;
2323         }
2324         is_store = (opc == 0);
2325         is_signed = extract32(opc, 1, 1);
2326         is_extended = (size < 3) && extract32(opc, 0, 1);
2327     }
2328
2329     if (rn == 31) {
2330         gen_check_sp_alignment(s);
2331     }
2332     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2333     offset = imm12 << size;
2334     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2335
2336     if (is_vector) {
2337         if (is_store) {
2338             do_fp_st(s, rt, tcg_addr, size);
2339         } else {
2340             do_fp_ld(s, rt, tcg_addr, size);
2341         }
2342     } else {
2343         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2344         if (is_store) {
2345             do_gpr_st(s, tcg_rt, tcg_addr, size);
2346         } else {
2347             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2348         }
2349     }
2350 }
2351
2352 /* Load/store register (all forms) */
2353 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2354 {
2355     switch (extract32(insn, 24, 2)) {
2356     case 0:
2357         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2358             disas_ldst_reg_roffset(s, insn);
2359         } else {
2360             /* Load/store register (unscaled immediate)
2361              * Load/store immediate pre/post-indexed
2362              * Load/store register unprivileged
2363              */
2364             disas_ldst_reg_imm9(s, insn);
2365         }
2366         break;
2367     case 1:
2368         disas_ldst_reg_unsigned_imm(s, insn);
2369         break;
2370     default:
2371         unallocated_encoding(s);
2372         break;
2373     }
2374 }
2375
2376 /* C3.3.1 AdvSIMD load/store multiple structures
2377  *
2378  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2379  * +---+---+---------------+---+-------------+--------+------+------+------+
2380  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2381  * +---+---+---------------+---+-------------+--------+------+------+------+
2382  *
2383  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2384  *
2385  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2386  * +---+---+---------------+---+---+---------+--------+------+------+------+
2387  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2388  * +---+---+---------------+---+---+---------+--------+------+------+------+
2389  *
2390  * Rt: first (or only) SIMD&FP register to be transferred
2391  * Rn: base address or SP
2392  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2393  */
2394 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2395 {
2396     int rt = extract32(insn, 0, 5);
2397     int rn = extract32(insn, 5, 5);
2398     int size = extract32(insn, 10, 2);
2399     int opcode = extract32(insn, 12, 4);
2400     bool is_store = !extract32(insn, 22, 1);
2401     bool is_postidx = extract32(insn, 23, 1);
2402     bool is_q = extract32(insn, 30, 1);
2403     TCGv_i64 tcg_addr, tcg_rn;
2404
2405     int ebytes = 1 << size;
2406     int elements = (is_q ? 128 : 64) / (8 << size);
2407     int rpt;    /* num iterations */
2408     int selem;  /* structure elements */
2409     int r;
2410
2411     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2412         unallocated_encoding(s);
2413         return;
2414     }
2415
2416     /* From the shared decode logic */
2417     switch (opcode) {
2418     case 0x0:
2419         rpt = 1;
2420         selem = 4;
2421         break;
2422     case 0x2:
2423         rpt = 4;
2424         selem = 1;
2425         break;
2426     case 0x4:
2427         rpt = 1;
2428         selem = 3;
2429         break;
2430     case 0x6:
2431         rpt = 3;
2432         selem = 1;
2433         break;
2434     case 0x7:
2435         rpt = 1;
2436         selem = 1;
2437         break;
2438     case 0x8:
2439         rpt = 1;
2440         selem = 2;
2441         break;
2442     case 0xa:
2443         rpt = 2;
2444         selem = 1;
2445         break;
2446     default:
2447         unallocated_encoding(s);
2448         return;
2449     }
2450
2451     if (size == 3 && !is_q && selem != 1) {
2452         /* reserved */
2453         unallocated_encoding(s);
2454         return;
2455     }
2456
2457     if (!fp_access_check(s)) {
2458         return;
2459     }
2460
2461     if (rn == 31) {
2462         gen_check_sp_alignment(s);
2463     }
2464
2465     tcg_rn = cpu_reg_sp(s, rn);
2466     tcg_addr = tcg_temp_new_i64();
2467     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2468
2469     for (r = 0; r < rpt; r++) {
2470         int e;
2471         for (e = 0; e < elements; e++) {
2472             int tt = (rt + r) % 32;
2473             int xs;
2474             for (xs = 0; xs < selem; xs++) {
2475                 if (is_store) {
2476                     do_vec_st(s, tt, e, tcg_addr, size);
2477                 } else {
2478                     do_vec_ld(s, tt, e, tcg_addr, size);
2479
2480                     /* For non-quad operations, setting a slice of the low
2481                      * 64 bits of the register clears the high 64 bits (in
2482                      * the ARM ARM pseudocode this is implicit in the fact
2483                      * that 'rval' is a 64 bit wide variable). We optimize
2484                      * by noticing that we only need to do this the first
2485                      * time we touch a register.
2486                      */
2487                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2488                         clear_vec_high(s, tt);
2489                     }
2490                 }
2491                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2492                 tt = (tt + 1) % 32;
2493             }
2494         }
2495     }
2496
2497     if (is_postidx) {
2498         int rm = extract32(insn, 16, 5);
2499         if (rm == 31) {
2500             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2501         } else {
2502             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2503         }
2504     }
2505     tcg_temp_free_i64(tcg_addr);
2506 }
2507
2508 /* C3.3.3 AdvSIMD load/store single structure
2509  *
2510  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2511  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2512  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2513  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2514  *
2515  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2516  *
2517  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2518  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2519  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2520  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2521  *
2522  * Rt: first (or only) SIMD&FP register to be transferred
2523  * Rn: base address or SP
2524  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2525  * index = encoded in Q:S:size dependent on size
2526  *
2527  * lane_size = encoded in R, opc
2528  * transfer width = encoded in opc, S, size
2529  */
2530 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2531 {
2532     int rt = extract32(insn, 0, 5);
2533     int rn = extract32(insn, 5, 5);
2534     int size = extract32(insn, 10, 2);
2535     int S = extract32(insn, 12, 1);
2536     int opc = extract32(insn, 13, 3);
2537     int R = extract32(insn, 21, 1);
2538     int is_load = extract32(insn, 22, 1);
2539     int is_postidx = extract32(insn, 23, 1);
2540     int is_q = extract32(insn, 30, 1);
2541
2542     int scale = extract32(opc, 1, 2);
2543     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2544     bool replicate = false;
2545     int index = is_q << 3 | S << 2 | size;
2546     int ebytes, xs;
2547     TCGv_i64 tcg_addr, tcg_rn;
2548
2549     switch (scale) {
2550     case 3:
2551         if (!is_load || S) {
2552             unallocated_encoding(s);
2553             return;
2554         }
2555         scale = size;
2556         replicate = true;
2557         break;
2558     case 0:
2559         break;
2560     case 1:
2561         if (extract32(size, 0, 1)) {
2562             unallocated_encoding(s);
2563             return;
2564         }
2565         index >>= 1;
2566         break;
2567     case 2:
2568         if (extract32(size, 1, 1)) {
2569             unallocated_encoding(s);
2570             return;
2571         }
2572         if (!extract32(size, 0, 1)) {
2573             index >>= 2;
2574         } else {
2575             if (S) {
2576                 unallocated_encoding(s);
2577                 return;
2578             }
2579             index >>= 3;
2580             scale = 3;
2581         }
2582         break;
2583     default:
2584         g_assert_not_reached();
2585     }
2586
2587     if (!fp_access_check(s)) {
2588         return;
2589     }
2590
2591     ebytes = 1 << scale;
2592
2593     if (rn == 31) {
2594         gen_check_sp_alignment(s);
2595     }
2596
2597     tcg_rn = cpu_reg_sp(s, rn);
2598     tcg_addr = tcg_temp_new_i64();
2599     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2600
2601     for (xs = 0; xs < selem; xs++) {
2602         if (replicate) {
2603             /* Load and replicate to all elements */
2604             uint64_t mulconst;
2605             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2606
2607             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2608                                 get_mem_index(s), MO_TE + scale);
2609             switch (scale) {
2610             case 0:
2611                 mulconst = 0x0101010101010101ULL;
2612                 break;
2613             case 1:
2614                 mulconst = 0x0001000100010001ULL;
2615                 break;
2616             case 2:
2617                 mulconst = 0x0000000100000001ULL;
2618                 break;
2619             case 3:
2620                 mulconst = 0;
2621                 break;
2622             default:
2623                 g_assert_not_reached();
2624             }
2625             if (mulconst) {
2626                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2627             }
2628             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2629             if (is_q) {
2630                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2631             } else {
2632                 clear_vec_high(s, rt);
2633             }
2634             tcg_temp_free_i64(tcg_tmp);
2635         } else {
2636             /* Load/store one element per register */
2637             if (is_load) {
2638                 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2639             } else {
2640                 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2641             }
2642         }
2643         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2644         rt = (rt + 1) % 32;
2645     }
2646
2647     if (is_postidx) {
2648         int rm = extract32(insn, 16, 5);
2649         if (rm == 31) {
2650             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2651         } else {
2652             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2653         }
2654     }
2655     tcg_temp_free_i64(tcg_addr);
2656 }
2657
2658 /* C3.3 Loads and stores */
2659 static void disas_ldst(DisasContext *s, uint32_t insn)
2660 {
2661     switch (extract32(insn, 24, 6)) {
2662     case 0x08: /* Load/store exclusive */
2663         disas_ldst_excl(s, insn);
2664         break;
2665     case 0x18: case 0x1c: /* Load register (literal) */
2666         disas_ld_lit(s, insn);
2667         break;
2668     case 0x28: case 0x29:
2669     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2670         disas_ldst_pair(s, insn);
2671         break;
2672     case 0x38: case 0x39:
2673     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2674         disas_ldst_reg(s, insn);
2675         break;
2676     case 0x0c: /* AdvSIMD load/store multiple structures */
2677         disas_ldst_multiple_struct(s, insn);
2678         break;
2679     case 0x0d: /* AdvSIMD load/store single structure */
2680         disas_ldst_single_struct(s, insn);
2681         break;
2682     default:
2683         unallocated_encoding(s);
2684         break;
2685     }
2686 }
2687
2688 /* C3.4.6 PC-rel. addressing
2689  *   31  30   29 28       24 23                5 4    0
2690  * +----+-------+-----------+-------------------+------+
2691  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2692  * +----+-------+-----------+-------------------+------+
2693  */
2694 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2695 {
2696     unsigned int page, rd;
2697     uint64_t base;
2698     uint64_t offset;
2699
2700     page = extract32(insn, 31, 1);
2701     /* SignExtend(immhi:immlo) -> offset */
2702     offset = sextract64(insn, 5, 19);
2703     offset = offset << 2 | extract32(insn, 29, 2);
2704     rd = extract32(insn, 0, 5);
2705     base = s->pc - 4;
2706
2707     if (page) {
2708         /* ADRP (page based) */
2709         base &= ~0xfff;
2710         offset <<= 12;
2711     }
2712
2713     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2714 }
2715
2716 /*
2717  * C3.4.1 Add/subtract (immediate)
2718  *
2719  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2720  * +--+--+--+-----------+-----+-------------+-----+-----+
2721  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2722  * +--+--+--+-----------+-----+-------------+-----+-----+
2723  *
2724  *    sf: 0 -> 32bit, 1 -> 64bit
2725  *    op: 0 -> add  , 1 -> sub
2726  *     S: 1 -> set flags
2727  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2728  */
2729 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2730 {
2731     int rd = extract32(insn, 0, 5);
2732     int rn = extract32(insn, 5, 5);
2733     uint64_t imm = extract32(insn, 10, 12);
2734     int shift = extract32(insn, 22, 2);
2735     bool setflags = extract32(insn, 29, 1);
2736     bool sub_op = extract32(insn, 30, 1);
2737     bool is_64bit = extract32(insn, 31, 1);
2738
2739     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2740     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2741     TCGv_i64 tcg_result;
2742
2743     switch (shift) {
2744     case 0x0:
2745         break;
2746     case 0x1:
2747         imm <<= 12;
2748         break;
2749     default:
2750         unallocated_encoding(s);
2751         return;
2752     }
2753
2754     tcg_result = tcg_temp_new_i64();
2755     if (!setflags) {
2756         if (sub_op) {
2757             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2758         } else {
2759             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2760         }
2761     } else {
2762         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2763         if (sub_op) {
2764             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2765         } else {
2766             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2767         }
2768         tcg_temp_free_i64(tcg_imm);
2769     }
2770
2771     if (is_64bit) {
2772         tcg_gen_mov_i64(tcg_rd, tcg_result);
2773     } else {
2774         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2775     }
2776
2777     tcg_temp_free_i64(tcg_result);
2778 }
2779
2780 /* The input should be a value in the bottom e bits (with higher
2781  * bits zero); returns that value replicated into every element
2782  * of size e in a 64 bit integer.
2783  */
2784 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2785 {
2786     assert(e != 0);
2787     while (e < 64) {
2788         mask |= mask << e;
2789         e *= 2;
2790     }
2791     return mask;
2792 }
2793
2794 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2795 static inline uint64_t bitmask64(unsigned int length)
2796 {
2797     assert(length > 0 && length <= 64);
2798     return ~0ULL >> (64 - length);
2799 }
2800
2801 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2802  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2803  * value (ie should cause a guest UNDEF exception), and true if they are
2804  * valid, in which case the decoded bit pattern is written to result.
2805  */
2806 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2807                                    unsigned int imms, unsigned int immr)
2808 {
2809     uint64_t mask;
2810     unsigned e, levels, s, r;
2811     int len;
2812
2813     assert(immn < 2 && imms < 64 && immr < 64);
2814
2815     /* The bit patterns we create here are 64 bit patterns which
2816      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2817      * 64 bits each. Each element contains the same value: a run
2818      * of between 1 and e-1 non-zero bits, rotated within the
2819      * element by between 0 and e-1 bits.
2820      *
2821      * The element size and run length are encoded into immn (1 bit)
2822      * and imms (6 bits) as follows:
2823      * 64 bit elements: immn = 1, imms = <length of run - 1>
2824      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2825      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2826      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2827      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2828      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2829      * Notice that immn = 0, imms = 11111x is the only combination
2830      * not covered by one of the above options; this is reserved.
2831      * Further, <length of run - 1> all-ones is a reserved pattern.
2832      *
2833      * In all cases the rotation is by immr % e (and immr is 6 bits).
2834      */
2835
2836     /* First determine the element size */
2837     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2838     if (len < 1) {
2839         /* This is the immn == 0, imms == 0x11111x case */
2840         return false;
2841     }
2842     e = 1 << len;
2843
2844     levels = e - 1;
2845     s = imms & levels;
2846     r = immr & levels;
2847
2848     if (s == levels) {
2849         /* <length of run - 1> mustn't be all-ones. */
2850         return false;
2851     }
2852
2853     /* Create the value of one element: s+1 set bits rotated
2854      * by r within the element (which is e bits wide)...
2855      */
2856     mask = bitmask64(s + 1);
2857     if (r) {
2858         mask = (mask >> r) | (mask << (e - r));
2859         mask &= bitmask64(e);
2860     }
2861     /* ...then replicate the element over the whole 64 bit value */
2862     mask = bitfield_replicate(mask, e);
2863     *result = mask;
2864     return true;
2865 }
2866
2867 /* C3.4.4 Logical (immediate)
2868  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2869  * +----+-----+-------------+---+------+------+------+------+
2870  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2871  * +----+-----+-------------+---+------+------+------+------+
2872  */
2873 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2874 {
2875     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2876     TCGv_i64 tcg_rd, tcg_rn;
2877     uint64_t wmask;
2878     bool is_and = false;
2879
2880     sf = extract32(insn, 31, 1);
2881     opc = extract32(insn, 29, 2);
2882     is_n = extract32(insn, 22, 1);
2883     immr = extract32(insn, 16, 6);
2884     imms = extract32(insn, 10, 6);
2885     rn = extract32(insn, 5, 5);
2886     rd = extract32(insn, 0, 5);
2887
2888     if (!sf && is_n) {
2889         unallocated_encoding(s);
2890         return;
2891     }
2892
2893     if (opc == 0x3) { /* ANDS */
2894         tcg_rd = cpu_reg(s, rd);
2895     } else {
2896         tcg_rd = cpu_reg_sp(s, rd);
2897     }
2898     tcg_rn = cpu_reg(s, rn);
2899
2900     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2901         /* some immediate field values are reserved */
2902         unallocated_encoding(s);
2903         return;
2904     }
2905
2906     if (!sf) {
2907         wmask &= 0xffffffff;
2908     }
2909
2910     switch (opc) {
2911     case 0x3: /* ANDS */
2912     case 0x0: /* AND */
2913         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2914         is_and = true;
2915         break;
2916     case 0x1: /* ORR */
2917         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2918         break;
2919     case 0x2: /* EOR */
2920         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2921         break;
2922     default:
2923         assert(FALSE); /* must handle all above */
2924         break;
2925     }
2926
2927     if (!sf && !is_and) {
2928         /* zero extend final result; we know we can skip this for AND
2929          * since the immediate had the high 32 bits clear.
2930          */
2931         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2932     }
2933
2934     if (opc == 3) { /* ANDS */
2935         gen_logic_CC(sf, tcg_rd);
2936     }
2937 }
2938
2939 /*
2940  * C3.4.5 Move wide (immediate)
2941  *
2942  *  31 30 29 28         23 22 21 20             5 4    0
2943  * +--+-----+-------------+-----+----------------+------+
2944  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2945  * +--+-----+-------------+-----+----------------+------+
2946  *
2947  * sf: 0 -> 32 bit, 1 -> 64 bit
2948  * opc: 00 -> N, 10 -> Z, 11 -> K
2949  * hw: shift/16 (0,16, and sf only 32, 48)
2950  */
2951 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2952 {
2953     int rd = extract32(insn, 0, 5);
2954     uint64_t imm = extract32(insn, 5, 16);
2955     int sf = extract32(insn, 31, 1);
2956     int opc = extract32(insn, 29, 2);
2957     int pos = extract32(insn, 21, 2) << 4;
2958     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2959     TCGv_i64 tcg_imm;
2960
2961     if (!sf && (pos >= 32)) {
2962         unallocated_encoding(s);
2963         return;
2964     }
2965
2966     switch (opc) {
2967     case 0: /* MOVN */
2968     case 2: /* MOVZ */
2969         imm <<= pos;
2970         if (opc == 0) {
2971             imm = ~imm;
2972         }
2973         if (!sf) {
2974             imm &= 0xffffffffu;
2975         }
2976         tcg_gen_movi_i64(tcg_rd, imm);
2977         break;
2978     case 3: /* MOVK */
2979         tcg_imm = tcg_const_i64(imm);
2980         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2981         tcg_temp_free_i64(tcg_imm);
2982         if (!sf) {
2983             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2984         }
2985         break;
2986     default:
2987         unallocated_encoding(s);
2988         break;
2989     }
2990 }
2991
2992 /* C3.4.2 Bitfield
2993  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2994  * +----+-----+-------------+---+------+------+------+------+
2995  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2996  * +----+-----+-------------+---+------+------+------+------+
2997  */
2998 static void disas_bitfield(DisasContext *s, uint32_t insn)
2999 {
3000     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3001     TCGv_i64 tcg_rd, tcg_tmp;
3002
3003     sf = extract32(insn, 31, 1);
3004     opc = extract32(insn, 29, 2);
3005     n = extract32(insn, 22, 1);
3006     ri = extract32(insn, 16, 6);
3007     si = extract32(insn, 10, 6);
3008     rn = extract32(insn, 5, 5);
3009     rd = extract32(insn, 0, 5);
3010     bitsize = sf ? 64 : 32;
3011
3012     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3013         unallocated_encoding(s);
3014         return;
3015     }
3016
3017     tcg_rd = cpu_reg(s, rd);
3018     tcg_tmp = read_cpu_reg(s, rn, sf);
3019
3020     /* Recognize the common aliases.  */
3021     if (opc == 0) { /* SBFM */
3022         if (ri == 0) {
3023             if (si == 7) { /* SXTB */
3024                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3025                 goto done;
3026             } else if (si == 15) { /* SXTH */
3027                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3028                 goto done;
3029             } else if (si == 31) { /* SXTW */
3030                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3031                 goto done;
3032             }
3033         }
3034         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3035             if (si == 31) {
3036                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3037             }
3038             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3039             goto done;
3040         }
3041     }
3042
3043     if (opc != 1) { /* SBFM or UBFM */
3044         tcg_gen_movi_i64(tcg_rd, 0);
3045     }
3046
3047     /* do the bit move operation */
3048     if (si >= ri) {
3049         /* Wd<s-r:0> = Wn<s:r> */
3050         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3051         pos = 0;
3052         len = (si - ri) + 1;
3053     } else {
3054         /* Wd<32+s-r,32-r> = Wn<s:0> */
3055         pos = bitsize - ri;
3056         len = si + 1;
3057     }
3058
3059     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3060
3061     if (opc == 0) { /* SBFM - sign extend the destination field */
3062         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3063         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3064     }
3065
3066  done:
3067     if (!sf) { /* zero extend final result */
3068         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3069     }
3070 }
3071
3072 /* C3.4.3 Extract
3073  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3074  * +----+------+-------------+---+----+------+--------+------+------+
3075  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3076  * +----+------+-------------+---+----+------+--------+------+------+
3077  */
3078 static void disas_extract(DisasContext *s, uint32_t insn)
3079 {
3080     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3081
3082     sf = extract32(insn, 31, 1);
3083     n = extract32(insn, 22, 1);
3084     rm = extract32(insn, 16, 5);
3085     imm = extract32(insn, 10, 6);
3086     rn = extract32(insn, 5, 5);
3087     rd = extract32(insn, 0, 5);
3088     op21 = extract32(insn, 29, 2);
3089     op0 = extract32(insn, 21, 1);
3090     bitsize = sf ? 64 : 32;
3091
3092     if (sf != n || op21 || op0 || imm >= bitsize) {
3093         unallocated_encoding(s);
3094     } else {
3095         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3096
3097         tcg_rd = cpu_reg(s, rd);
3098
3099         if (imm) {
3100             /* OPTME: we can special case rm==rn as a rotate */
3101             tcg_rm = read_cpu_reg(s, rm, sf);
3102             tcg_rn = read_cpu_reg(s, rn, sf);
3103             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3104             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3105             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3106             if (!sf) {
3107                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3108             }
3109         } else {
3110             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3111              * so an extract from bit 0 is a special case.
3112              */
3113             if (sf) {
3114                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3115             } else {
3116                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3117             }
3118         }
3119
3120     }
3121 }
3122
3123 /* C3.4 Data processing - immediate */
3124 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3125 {
3126     switch (extract32(insn, 23, 6)) {
3127     case 0x20: case 0x21: /* PC-rel. addressing */
3128         disas_pc_rel_adr(s, insn);
3129         break;
3130     case 0x22: case 0x23: /* Add/subtract (immediate) */
3131         disas_add_sub_imm(s, insn);
3132         break;
3133     case 0x24: /* Logical (immediate) */
3134         disas_logic_imm(s, insn);
3135         break;
3136     case 0x25: /* Move wide (immediate) */
3137         disas_movw_imm(s, insn);
3138         break;
3139     case 0x26: /* Bitfield */
3140         disas_bitfield(s, insn);
3141         break;
3142     case 0x27: /* Extract */
3143         disas_extract(s, insn);
3144         break;
3145     default:
3146         unallocated_encoding(s);
3147         break;
3148     }
3149 }
3150
3151 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3152  * Note that it is the caller's responsibility to ensure that the
3153  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3154  * mandated semantics for out of range shifts.
3155  */
3156 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3157                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3158 {
3159     switch (shift_type) {
3160     case A64_SHIFT_TYPE_LSL:
3161         tcg_gen_shl_i64(dst, src, shift_amount);
3162         break;
3163     case A64_SHIFT_TYPE_LSR:
3164         tcg_gen_shr_i64(dst, src, shift_amount);
3165         break;
3166     case A64_SHIFT_TYPE_ASR:
3167         if (!sf) {
3168             tcg_gen_ext32s_i64(dst, src);
3169         }
3170         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3171         break;
3172     case A64_SHIFT_TYPE_ROR:
3173         if (sf) {
3174             tcg_gen_rotr_i64(dst, src, shift_amount);
3175         } else {
3176             TCGv_i32 t0, t1;
3177             t0 = tcg_temp_new_i32();
3178             t1 = tcg_temp_new_i32();
3179             tcg_gen_extrl_i64_i32(t0, src);
3180             tcg_gen_extrl_i64_i32(t1, shift_amount);
3181             tcg_gen_rotr_i32(t0, t0, t1);
3182             tcg_gen_extu_i32_i64(dst, t0);
3183             tcg_temp_free_i32(t0);
3184             tcg_temp_free_i32(t1);
3185         }
3186         break;
3187     default:
3188         assert(FALSE); /* all shift types should be handled */
3189         break;
3190     }
3191
3192     if (!sf) { /* zero extend final result */
3193         tcg_gen_ext32u_i64(dst, dst);
3194     }
3195 }
3196
3197 /* Shift a TCGv src by immediate, put result in dst.
3198  * The shift amount must be in range (this should always be true as the
3199  * relevant instructions will UNDEF on bad shift immediates).
3200  */
3201 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3202                           enum a64_shift_type shift_type, unsigned int shift_i)
3203 {
3204     assert(shift_i < (sf ? 64 : 32));
3205
3206     if (shift_i == 0) {
3207         tcg_gen_mov_i64(dst, src);
3208     } else {
3209         TCGv_i64 shift_const;
3210
3211         shift_const = tcg_const_i64(shift_i);
3212         shift_reg(dst, src, sf, shift_type, shift_const);
3213         tcg_temp_free_i64(shift_const);
3214     }
3215 }
3216
3217 /* C3.5.10 Logical (shifted register)
3218  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3219  * +----+-----+-----------+-------+---+------+--------+------+------+
3220  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3221  * +----+-----+-----------+-------+---+------+--------+------+------+
3222  */
3223 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3224 {
3225     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3226     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3227
3228     sf = extract32(insn, 31, 1);
3229     opc = extract32(insn, 29, 2);
3230     shift_type = extract32(insn, 22, 2);
3231     invert = extract32(insn, 21, 1);
3232     rm = extract32(insn, 16, 5);
3233     shift_amount = extract32(insn, 10, 6);
3234     rn = extract32(insn, 5, 5);
3235     rd = extract32(insn, 0, 5);
3236
3237     if (!sf && (shift_amount & (1 << 5))) {
3238         unallocated_encoding(s);
3239         return;
3240     }
3241
3242     tcg_rd = cpu_reg(s, rd);
3243
3244     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3245         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3246          * register-register MOV and MVN, so it is worth special casing.
3247          */
3248         tcg_rm = cpu_reg(s, rm);
3249         if (invert) {
3250             tcg_gen_not_i64(tcg_rd, tcg_rm);
3251             if (!sf) {
3252                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3253             }
3254         } else {
3255             if (sf) {
3256                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3257             } else {
3258                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3259             }
3260         }
3261         return;
3262     }
3263
3264     tcg_rm = read_cpu_reg(s, rm, sf);
3265
3266     if (shift_amount) {
3267         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3268     }
3269
3270     tcg_rn = cpu_reg(s, rn);
3271
3272     switch (opc | (invert << 2)) {
3273     case 0: /* AND */
3274     case 3: /* ANDS */
3275         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3276         break;
3277     case 1: /* ORR */
3278         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3279         break;
3280     case 2: /* EOR */
3281         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3282         break;
3283     case 4: /* BIC */
3284     case 7: /* BICS */
3285         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3286         break;
3287     case 5: /* ORN */
3288         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3289         break;
3290     case 6: /* EON */
3291         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3292         break;
3293     default:
3294         assert(FALSE);
3295         break;
3296     }
3297
3298     if (!sf) {
3299         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3300     }
3301
3302     if (opc == 3) {
3303         gen_logic_CC(sf, tcg_rd);
3304     }
3305 }
3306
3307 /*
3308  * C3.5.1 Add/subtract (extended register)
3309  *
3310  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3311  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3312  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3313  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3314  *
3315  *  sf: 0 -> 32bit, 1 -> 64bit
3316  *  op: 0 -> add  , 1 -> sub
3317  *   S: 1 -> set flags
3318  * opt: 00
3319  * option: extension type (see DecodeRegExtend)
3320  * imm3: optional shift to Rm
3321  *
3322  * Rd = Rn + LSL(extend(Rm), amount)
3323  */
3324 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3325 {
3326     int rd = extract32(insn, 0, 5);
3327     int rn = extract32(insn, 5, 5);
3328     int imm3 = extract32(insn, 10, 3);
3329     int option = extract32(insn, 13, 3);
3330     int rm = extract32(insn, 16, 5);
3331     bool setflags = extract32(insn, 29, 1);
3332     bool sub_op = extract32(insn, 30, 1);
3333     bool sf = extract32(insn, 31, 1);
3334
3335     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3336     TCGv_i64 tcg_rd;
3337     TCGv_i64 tcg_result;
3338
3339     if (imm3 > 4) {
3340         unallocated_encoding(s);
3341         return;
3342     }
3343
3344     /* non-flag setting ops may use SP */
3345     if (!setflags) {
3346         tcg_rd = cpu_reg_sp(s, rd);
3347     } else {
3348         tcg_rd = cpu_reg(s, rd);
3349     }
3350     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3351
3352     tcg_rm = read_cpu_reg(s, rm, sf);
3353     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3354
3355     tcg_result = tcg_temp_new_i64();
3356
3357     if (!setflags) {
3358         if (sub_op) {
3359             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3360         } else {
3361             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3362         }
3363     } else {
3364         if (sub_op) {
3365             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3366         } else {
3367             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3368         }
3369     }
3370
3371     if (sf) {
3372         tcg_gen_mov_i64(tcg_rd, tcg_result);
3373     } else {
3374         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3375     }
3376
3377     tcg_temp_free_i64(tcg_result);
3378 }
3379
3380 /*
3381  * C3.5.2 Add/subtract (shifted register)
3382  *
3383  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3384  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3385  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3386  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3387  *
3388  *    sf: 0 -> 32bit, 1 -> 64bit
3389  *    op: 0 -> add  , 1 -> sub
3390  *     S: 1 -> set flags
3391  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3392  *  imm6: Shift amount to apply to Rm before the add/sub
3393  */
3394 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3395 {
3396     int rd = extract32(insn, 0, 5);
3397     int rn = extract32(insn, 5, 5);
3398     int imm6 = extract32(insn, 10, 6);
3399     int rm = extract32(insn, 16, 5);
3400     int shift_type = extract32(insn, 22, 2);
3401     bool setflags = extract32(insn, 29, 1);
3402     bool sub_op = extract32(insn, 30, 1);
3403     bool sf = extract32(insn, 31, 1);
3404
3405     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3406     TCGv_i64 tcg_rn, tcg_rm;
3407     TCGv_i64 tcg_result;
3408
3409     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3410         unallocated_encoding(s);
3411         return;
3412     }
3413
3414     tcg_rn = read_cpu_reg(s, rn, sf);
3415     tcg_rm = read_cpu_reg(s, rm, sf);
3416
3417     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3418
3419     tcg_result = tcg_temp_new_i64();
3420
3421     if (!setflags) {
3422         if (sub_op) {
3423             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3424         } else {
3425             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3426         }
3427     } else {
3428         if (sub_op) {
3429             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3430         } else {
3431             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3432         }
3433     }
3434
3435     if (sf) {
3436         tcg_gen_mov_i64(tcg_rd, tcg_result);
3437     } else {
3438         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3439     }
3440
3441     tcg_temp_free_i64(tcg_result);
3442 }
3443
3444 /* C3.5.9 Data-processing (3 source)
3445
3446    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3447   +--+------+-----------+------+------+----+------+------+------+
3448   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3449   +--+------+-----------+------+------+----+------+------+------+
3450
3451  */
3452 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3453 {
3454     int rd = extract32(insn, 0, 5);
3455     int rn = extract32(insn, 5, 5);
3456     int ra = extract32(insn, 10, 5);
3457     int rm = extract32(insn, 16, 5);
3458     int op_id = (extract32(insn, 29, 3) << 4) |
3459         (extract32(insn, 21, 3) << 1) |
3460         extract32(insn, 15, 1);
3461     bool sf = extract32(insn, 31, 1);
3462     bool is_sub = extract32(op_id, 0, 1);
3463     bool is_high = extract32(op_id, 2, 1);
3464     bool is_signed = false;
3465     TCGv_i64 tcg_op1;
3466     TCGv_i64 tcg_op2;
3467     TCGv_i64 tcg_tmp;
3468
3469     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3470     switch (op_id) {
3471     case 0x42: /* SMADDL */
3472     case 0x43: /* SMSUBL */
3473     case 0x44: /* SMULH */
3474         is_signed = true;
3475         break;
3476     case 0x0: /* MADD (32bit) */
3477     case 0x1: /* MSUB (32bit) */
3478     case 0x40: /* MADD (64bit) */
3479     case 0x41: /* MSUB (64bit) */
3480     case 0x4a: /* UMADDL */
3481     case 0x4b: /* UMSUBL */
3482     case 0x4c: /* UMULH */
3483         break;
3484     default:
3485         unallocated_encoding(s);
3486         return;
3487     }
3488
3489     if (is_high) {
3490         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3491         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3492         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3493         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3494
3495         if (is_signed) {
3496             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3497         } else {
3498             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3499         }
3500
3501         tcg_temp_free_i64(low_bits);
3502         return;
3503     }
3504
3505     tcg_op1 = tcg_temp_new_i64();
3506     tcg_op2 = tcg_temp_new_i64();
3507     tcg_tmp = tcg_temp_new_i64();
3508
3509     if (op_id < 0x42) {
3510         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3511         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3512     } else {
3513         if (is_signed) {
3514             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3515             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3516         } else {
3517             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3518             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3519         }
3520     }
3521
3522     if (ra == 31 && !is_sub) {
3523         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3524         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3525     } else {
3526         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3527         if (is_sub) {
3528             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3529         } else {
3530             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3531         }
3532     }
3533
3534     if (!sf) {
3535         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3536     }
3537
3538     tcg_temp_free_i64(tcg_op1);
3539     tcg_temp_free_i64(tcg_op2);
3540     tcg_temp_free_i64(tcg_tmp);
3541 }
3542
3543 /* C3.5.3 - Add/subtract (with carry)
3544  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3545  * +--+--+--+------------------------+------+---------+------+-----+
3546  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3547  * +--+--+--+------------------------+------+---------+------+-----+
3548  *                                            [000000]
3549  */
3550
3551 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3552 {
3553     unsigned int sf, op, setflags, rm, rn, rd;
3554     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3555
3556     if (extract32(insn, 10, 6) != 0) {
3557         unallocated_encoding(s);
3558         return;
3559     }
3560
3561     sf = extract32(insn, 31, 1);
3562     op = extract32(insn, 30, 1);
3563     setflags = extract32(insn, 29, 1);
3564     rm = extract32(insn, 16, 5);
3565     rn = extract32(insn, 5, 5);
3566     rd = extract32(insn, 0, 5);
3567
3568     tcg_rd = cpu_reg(s, rd);
3569     tcg_rn = cpu_reg(s, rn);
3570
3571     if (op) {
3572         tcg_y = new_tmp_a64(s);
3573         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3574     } else {
3575         tcg_y = cpu_reg(s, rm);
3576     }
3577
3578     if (setflags) {
3579         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3580     } else {
3581         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3582     }
3583 }
3584
3585 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3586  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3587  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3588  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3589  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3590  *        [1]                             y                [0]       [0]
3591  */
3592 static void disas_cc(DisasContext *s, uint32_t insn)
3593 {
3594     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3595     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3596     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3597     DisasCompare c;
3598
3599     if (!extract32(insn, 29, 1)) {
3600         unallocated_encoding(s);
3601         return;
3602     }
3603     if (insn & (1 << 10 | 1 << 4)) {
3604         unallocated_encoding(s);
3605         return;
3606     }
3607     sf = extract32(insn, 31, 1);
3608     op = extract32(insn, 30, 1);
3609     is_imm = extract32(insn, 11, 1);
3610     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3611     cond = extract32(insn, 12, 4);
3612     rn = extract32(insn, 5, 5);
3613     nzcv = extract32(insn, 0, 4);
3614
3615     /* Set T0 = !COND.  */
3616     tcg_t0 = tcg_temp_new_i32();
3617     arm_test_cc(&c, cond);
3618     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3619     arm_free_cc(&c);
3620
3621     /* Load the arguments for the new comparison.  */
3622     if (is_imm) {
3623         tcg_y = new_tmp_a64(s);
3624         tcg_gen_movi_i64(tcg_y, y);
3625     } else {
3626         tcg_y = cpu_reg(s, y);
3627     }
3628     tcg_rn = cpu_reg(s, rn);
3629
3630     /* Set the flags for the new comparison.  */
3631     tcg_tmp = tcg_temp_new_i64();
3632     if (op) {
3633         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3634     } else {
3635         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3636     }
3637     tcg_temp_free_i64(tcg_tmp);
3638
3639     /* If COND was false, force the flags to #nzcv.  Compute two masks
3640      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3641      * For tcg hosts that support ANDC, we can make do with just T1.
3642      * In either case, allow the tcg optimizer to delete any unused mask.
3643      */
3644     tcg_t1 = tcg_temp_new_i32();
3645     tcg_t2 = tcg_temp_new_i32();
3646     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3647     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3648
3649     if (nzcv & 8) { /* N */
3650         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3651     } else {
3652         if (TCG_TARGET_HAS_andc_i32) {
3653             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3654         } else {
3655             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3656         }
3657     }
3658     if (nzcv & 4) { /* Z */
3659         if (TCG_TARGET_HAS_andc_i32) {
3660             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3661         } else {
3662             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3663         }
3664     } else {
3665         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3666     }
3667     if (nzcv & 2) { /* C */
3668         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3669     } else {
3670         if (TCG_TARGET_HAS_andc_i32) {
3671             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3672         } else {
3673             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3674         }
3675     }
3676     if (nzcv & 1) { /* V */
3677         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3678     } else {
3679         if (TCG_TARGET_HAS_andc_i32) {
3680             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3681         } else {
3682             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3683         }
3684     }
3685     tcg_temp_free_i32(tcg_t0);
3686     tcg_temp_free_i32(tcg_t1);
3687     tcg_temp_free_i32(tcg_t2);
3688 }
3689
3690 /* C3.5.6 Conditional select
3691  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3692  * +----+----+---+-----------------+------+------+-----+------+------+
3693  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3694  * +----+----+---+-----------------+------+------+-----+------+------+
3695  */
3696 static void disas_cond_select(DisasContext *s, uint32_t insn)
3697 {
3698     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3699     TCGv_i64 tcg_rd, zero;
3700     DisasCompare64 c;
3701
3702     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3703         /* S == 1 or op2<1> == 1 */
3704         unallocated_encoding(s);
3705         return;
3706     }
3707     sf = extract32(insn, 31, 1);
3708     else_inv = extract32(insn, 30, 1);
3709     rm = extract32(insn, 16, 5);
3710     cond = extract32(insn, 12, 4);
3711     else_inc = extract32(insn, 10, 1);
3712     rn = extract32(insn, 5, 5);
3713     rd = extract32(insn, 0, 5);
3714
3715     tcg_rd = cpu_reg(s, rd);
3716
3717     a64_test_cc(&c, cond);
3718     zero = tcg_const_i64(0);
3719
3720     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3721         /* CSET & CSETM.  */
3722         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3723         if (else_inv) {
3724             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3725         }
3726     } else {
3727         TCGv_i64 t_true = cpu_reg(s, rn);
3728         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3729         if (else_inv && else_inc) {
3730             tcg_gen_neg_i64(t_false, t_false);
3731         } else if (else_inv) {
3732             tcg_gen_not_i64(t_false, t_false);
3733         } else if (else_inc) {
3734             tcg_gen_addi_i64(t_false, t_false, 1);
3735         }
3736         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3737     }
3738
3739     tcg_temp_free_i64(zero);
3740     a64_free_cc(&c);
3741
3742     if (!sf) {
3743         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3744     }
3745 }
3746
3747 static void handle_clz(DisasContext *s, unsigned int sf,
3748                        unsigned int rn, unsigned int rd)
3749 {
3750     TCGv_i64 tcg_rd, tcg_rn;
3751     tcg_rd = cpu_reg(s, rd);
3752     tcg_rn = cpu_reg(s, rn);
3753
3754     if (sf) {
3755         gen_helper_clz64(tcg_rd, tcg_rn);
3756     } else {
3757         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3758         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3759         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3760         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3761         tcg_temp_free_i32(tcg_tmp32);
3762     }
3763 }
3764
3765 static void handle_cls(DisasContext *s, unsigned int sf,
3766                        unsigned int rn, unsigned int rd)
3767 {
3768     TCGv_i64 tcg_rd, tcg_rn;
3769     tcg_rd = cpu_reg(s, rd);
3770     tcg_rn = cpu_reg(s, rn);
3771
3772     if (sf) {
3773         gen_helper_cls64(tcg_rd, tcg_rn);
3774     } else {
3775         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3776         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3777         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3778         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3779         tcg_temp_free_i32(tcg_tmp32);
3780     }
3781 }
3782
3783 static void handle_rbit(DisasContext *s, unsigned int sf,
3784                         unsigned int rn, unsigned int rd)
3785 {
3786     TCGv_i64 tcg_rd, tcg_rn;
3787     tcg_rd = cpu_reg(s, rd);
3788     tcg_rn = cpu_reg(s, rn);
3789
3790     if (sf) {
3791         gen_helper_rbit64(tcg_rd, tcg_rn);
3792     } else {
3793         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3794         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3795         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3796         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3797         tcg_temp_free_i32(tcg_tmp32);
3798     }
3799 }
3800
3801 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3802 static void handle_rev64(DisasContext *s, unsigned int sf,
3803                          unsigned int rn, unsigned int rd)
3804 {
3805     if (!sf) {
3806         unallocated_encoding(s);
3807         return;
3808     }
3809     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3810 }
3811
3812 /* C5.6.149 REV with sf==0, opcode==2
3813  * C5.6.151 REV32 (sf==1, opcode==2)
3814  */
3815 static void handle_rev32(DisasContext *s, unsigned int sf,
3816                          unsigned int rn, unsigned int rd)
3817 {
3818     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3819
3820     if (sf) {
3821         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3822         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3823
3824         /* bswap32_i64 requires zero high word */
3825         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3826         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3827         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3828         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3829         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3830
3831         tcg_temp_free_i64(tcg_tmp);
3832     } else {
3833         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3834         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3835     }
3836 }
3837
3838 /* C5.6.150 REV16 (opcode==1) */
3839 static void handle_rev16(DisasContext *s, unsigned int sf,
3840                          unsigned int rn, unsigned int rd)
3841 {
3842     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3843     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3844     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3845
3846     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3847     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3848
3849     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3850     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3851     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3852     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3853
3854     if (sf) {
3855         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3856         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3857         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3858         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3859
3860         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3861         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3862         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3863     }
3864
3865     tcg_temp_free_i64(tcg_tmp);
3866 }
3867
3868 /* C3.5.7 Data-processing (1 source)
3869  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3870  * +----+---+---+-----------------+---------+--------+------+------+
3871  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3872  * +----+---+---+-----------------+---------+--------+------+------+
3873  */
3874 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3875 {
3876     unsigned int sf, opcode, rn, rd;
3877
3878     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3879         unallocated_encoding(s);
3880         return;
3881     }
3882
3883     sf = extract32(insn, 31, 1);
3884     opcode = extract32(insn, 10, 6);
3885     rn = extract32(insn, 5, 5);
3886     rd = extract32(insn, 0, 5);
3887
3888     switch (opcode) {
3889     case 0: /* RBIT */
3890         handle_rbit(s, sf, rn, rd);
3891         break;
3892     case 1: /* REV16 */
3893         handle_rev16(s, sf, rn, rd);
3894         break;
3895     case 2: /* REV32 */
3896         handle_rev32(s, sf, rn, rd);
3897         break;
3898     case 3: /* REV64 */
3899         handle_rev64(s, sf, rn, rd);
3900         break;
3901     case 4: /* CLZ */
3902         handle_clz(s, sf, rn, rd);
3903         break;
3904     case 5: /* CLS */
3905         handle_cls(s, sf, rn, rd);
3906         break;
3907     }
3908 }
3909
3910 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3911                        unsigned int rm, unsigned int rn, unsigned int rd)
3912 {
3913     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3914     tcg_rd = cpu_reg(s, rd);
3915
3916     if (!sf && is_signed) {
3917         tcg_n = new_tmp_a64(s);
3918         tcg_m = new_tmp_a64(s);
3919         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3920         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3921     } else {
3922         tcg_n = read_cpu_reg(s, rn, sf);
3923         tcg_m = read_cpu_reg(s, rm, sf);
3924     }
3925
3926     if (is_signed) {
3927         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3928     } else {
3929         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3930     }
3931
3932     if (!sf) { /* zero extend final result */
3933         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3934     }
3935 }
3936
3937 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3938 static void handle_shift_reg(DisasContext *s,
3939                              enum a64_shift_type shift_type, unsigned int sf,
3940                              unsigned int rm, unsigned int rn, unsigned int rd)
3941 {
3942     TCGv_i64 tcg_shift = tcg_temp_new_i64();
3943     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3944     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3945
3946     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3947     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3948     tcg_temp_free_i64(tcg_shift);
3949 }
3950
3951 /* CRC32[BHWX], CRC32C[BHWX] */
3952 static void handle_crc32(DisasContext *s,
3953                          unsigned int sf, unsigned int sz, bool crc32c,
3954                          unsigned int rm, unsigned int rn, unsigned int rd)
3955 {
3956     TCGv_i64 tcg_acc, tcg_val;
3957     TCGv_i32 tcg_bytes;
3958
3959     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3960         || (sf == 1 && sz != 3)
3961         || (sf == 0 && sz == 3)) {
3962         unallocated_encoding(s);
3963         return;
3964     }
3965
3966     if (sz == 3) {
3967         tcg_val = cpu_reg(s, rm);
3968     } else {
3969         uint64_t mask;
3970         switch (sz) {
3971         case 0:
3972             mask = 0xFF;
3973             break;
3974         case 1:
3975             mask = 0xFFFF;
3976             break;
3977         case 2:
3978             mask = 0xFFFFFFFF;
3979             break;
3980         default:
3981             g_assert_not_reached();
3982         }
3983         tcg_val = new_tmp_a64(s);
3984         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
3985     }
3986
3987     tcg_acc = cpu_reg(s, rn);
3988     tcg_bytes = tcg_const_i32(1 << sz);
3989
3990     if (crc32c) {
3991         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
3992     } else {
3993         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
3994     }
3995
3996     tcg_temp_free_i32(tcg_bytes);
3997 }
3998
3999 /* C3.5.8 Data-processing (2 source)
4000  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4001  * +----+---+---+-----------------+------+--------+------+------+
4002  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4003  * +----+---+---+-----------------+------+--------+------+------+
4004  */
4005 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4006 {
4007     unsigned int sf, rm, opcode, rn, rd;
4008     sf = extract32(insn, 31, 1);
4009     rm = extract32(insn, 16, 5);
4010     opcode = extract32(insn, 10, 6);
4011     rn = extract32(insn, 5, 5);
4012     rd = extract32(insn, 0, 5);
4013
4014     if (extract32(insn, 29, 1)) {
4015         unallocated_encoding(s);
4016         return;
4017     }
4018
4019     switch (opcode) {
4020     case 2: /* UDIV */
4021         handle_div(s, false, sf, rm, rn, rd);
4022         break;
4023     case 3: /* SDIV */
4024         handle_div(s, true, sf, rm, rn, rd);
4025         break;
4026     case 8: /* LSLV */
4027         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4028         break;
4029     case 9: /* LSRV */
4030         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4031         break;
4032     case 10: /* ASRV */
4033         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4034         break;
4035     case 11: /* RORV */
4036         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4037         break;
4038     case 16:
4039     case 17:
4040     case 18:
4041     case 19:
4042     case 20:
4043     case 21:
4044     case 22:
4045     case 23: /* CRC32 */
4046     {
4047         int sz = extract32(opcode, 0, 2);
4048         bool crc32c = extract32(opcode, 2, 1);
4049         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4050         break;
4051     }
4052     default:
4053         unallocated_encoding(s);
4054         break;
4055     }
4056 }
4057
4058 /* C3.5 Data processing - register */
4059 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4060 {
4061     switch (extract32(insn, 24, 5)) {
4062     case 0x0a: /* Logical (shifted register) */
4063         disas_logic_reg(s, insn);
4064         break;
4065     case 0x0b: /* Add/subtract */
4066         if (insn & (1 << 21)) { /* (extended register) */
4067             disas_add_sub_ext_reg(s, insn);
4068         } else {
4069             disas_add_sub_reg(s, insn);
4070         }
4071         break;
4072     case 0x1b: /* Data-processing (3 source) */
4073         disas_data_proc_3src(s, insn);
4074         break;
4075     case 0x1a:
4076         switch (extract32(insn, 21, 3)) {
4077         case 0x0: /* Add/subtract (with carry) */
4078             disas_adc_sbc(s, insn);
4079             break;
4080         case 0x2: /* Conditional compare */
4081             disas_cc(s, insn); /* both imm and reg forms */
4082             break;
4083         case 0x4: /* Conditional select */
4084             disas_cond_select(s, insn);
4085             break;
4086         case 0x6: /* Data-processing */
4087             if (insn & (1 << 30)) { /* (1 source) */
4088                 disas_data_proc_1src(s, insn);
4089             } else {            /* (2 source) */
4090                 disas_data_proc_2src(s, insn);
4091             }
4092             break;
4093         default:
4094             unallocated_encoding(s);
4095             break;
4096         }
4097         break;
4098     default:
4099         unallocated_encoding(s);
4100         break;
4101     }
4102 }
4103
4104 static void handle_fp_compare(DisasContext *s, bool is_double,
4105                               unsigned int rn, unsigned int rm,
4106                               bool cmp_with_zero, bool signal_all_nans)
4107 {
4108     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4109     TCGv_ptr fpst = get_fpstatus_ptr();
4110
4111     if (is_double) {
4112         TCGv_i64 tcg_vn, tcg_vm;
4113
4114         tcg_vn = read_fp_dreg(s, rn);
4115         if (cmp_with_zero) {
4116             tcg_vm = tcg_const_i64(0);
4117         } else {
4118             tcg_vm = read_fp_dreg(s, rm);
4119         }
4120         if (signal_all_nans) {
4121             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4122         } else {
4123             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4124         }
4125         tcg_temp_free_i64(tcg_vn);
4126         tcg_temp_free_i64(tcg_vm);
4127     } else {
4128         TCGv_i32 tcg_vn, tcg_vm;
4129
4130         tcg_vn = read_fp_sreg(s, rn);
4131         if (cmp_with_zero) {
4132             tcg_vm = tcg_const_i32(0);
4133         } else {
4134             tcg_vm = read_fp_sreg(s, rm);
4135         }
4136         if (signal_all_nans) {
4137             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4138         } else {
4139             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4140         }
4141         tcg_temp_free_i32(tcg_vn);
4142         tcg_temp_free_i32(tcg_vm);
4143     }
4144
4145     tcg_temp_free_ptr(fpst);
4146
4147     gen_set_nzcv(tcg_flags);
4148
4149     tcg_temp_free_i64(tcg_flags);
4150 }
4151
4152 /* C3.6.22 Floating point compare
4153  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4154  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4155  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4156  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4157  */
4158 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4159 {
4160     unsigned int mos, type, rm, op, rn, opc, op2r;
4161
4162     mos = extract32(insn, 29, 3);
4163     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4164     rm = extract32(insn, 16, 5);
4165     op = extract32(insn, 14, 2);
4166     rn = extract32(insn, 5, 5);
4167     opc = extract32(insn, 3, 2);
4168     op2r = extract32(insn, 0, 3);
4169
4170     if (mos || op || op2r || type > 1) {
4171         unallocated_encoding(s);
4172         return;
4173     }
4174
4175     if (!fp_access_check(s)) {
4176         return;
4177     }
4178
4179     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4180 }
4181
4182 /* C3.6.23 Floating point conditional compare
4183  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4184  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4185  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4186  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4187  */
4188 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4189 {
4190     unsigned int mos, type, rm, cond, rn, op, nzcv;
4191     TCGv_i64 tcg_flags;
4192     TCGLabel *label_continue = NULL;
4193
4194     mos = extract32(insn, 29, 3);
4195     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4196     rm = extract32(insn, 16, 5);
4197     cond = extract32(insn, 12, 4);
4198     rn = extract32(insn, 5, 5);
4199     op = extract32(insn, 4, 1);
4200     nzcv = extract32(insn, 0, 4);
4201
4202     if (mos || type > 1) {
4203         unallocated_encoding(s);
4204         return;
4205     }
4206
4207     if (!fp_access_check(s)) {
4208         return;
4209     }
4210
4211     if (cond < 0x0e) { /* not always */
4212         TCGLabel *label_match = gen_new_label();
4213         label_continue = gen_new_label();
4214         arm_gen_test_cc(cond, label_match);
4215         /* nomatch: */
4216         tcg_flags = tcg_const_i64(nzcv << 28);
4217         gen_set_nzcv(tcg_flags);
4218         tcg_temp_free_i64(tcg_flags);
4219         tcg_gen_br(label_continue);
4220         gen_set_label(label_match);
4221     }
4222
4223     handle_fp_compare(s, type, rn, rm, false, op);
4224
4225     if (cond < 0x0e) {
4226         gen_set_label(label_continue);
4227     }
4228 }
4229
4230 /* C3.6.24 Floating point conditional select
4231  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4232  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4233  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4234  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4235  */
4236 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4237 {
4238     unsigned int mos, type, rm, cond, rn, rd;
4239     TCGv_i64 t_true, t_false, t_zero;
4240     DisasCompare64 c;
4241
4242     mos = extract32(insn, 29, 3);
4243     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4244     rm = extract32(insn, 16, 5);
4245     cond = extract32(insn, 12, 4);
4246     rn = extract32(insn, 5, 5);
4247     rd = extract32(insn, 0, 5);
4248
4249     if (mos || type > 1) {
4250         unallocated_encoding(s);
4251         return;
4252     }
4253
4254     if (!fp_access_check(s)) {
4255         return;
4256     }
4257
4258     /* Zero extend sreg inputs to 64 bits now.  */
4259     t_true = tcg_temp_new_i64();
4260     t_false = tcg_temp_new_i64();
4261     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4262     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4263
4264     a64_test_cc(&c, cond);
4265     t_zero = tcg_const_i64(0);
4266     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4267     tcg_temp_free_i64(t_zero);
4268     tcg_temp_free_i64(t_false);
4269     a64_free_cc(&c);
4270
4271     /* Note that sregs write back zeros to the high bits,
4272        and we've already done the zero-extension.  */
4273     write_fp_dreg(s, rd, t_true);
4274     tcg_temp_free_i64(t_true);
4275 }
4276
4277 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4278 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4279 {
4280     TCGv_ptr fpst;
4281     TCGv_i32 tcg_op;
4282     TCGv_i32 tcg_res;
4283
4284     fpst = get_fpstatus_ptr();
4285     tcg_op = read_fp_sreg(s, rn);
4286     tcg_res = tcg_temp_new_i32();
4287
4288     switch (opcode) {
4289     case 0x0: /* FMOV */
4290         tcg_gen_mov_i32(tcg_res, tcg_op);
4291         break;
4292     case 0x1: /* FABS */
4293         gen_helper_vfp_abss(tcg_res, tcg_op);
4294         break;
4295     case 0x2: /* FNEG */
4296         gen_helper_vfp_negs(tcg_res, tcg_op);
4297         break;
4298     case 0x3: /* FSQRT */
4299         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4300         break;
4301     case 0x8: /* FRINTN */
4302     case 0x9: /* FRINTP */
4303     case 0xa: /* FRINTM */
4304     case 0xb: /* FRINTZ */
4305     case 0xc: /* FRINTA */
4306     {
4307         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4308
4309         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4310         gen_helper_rints(tcg_res, tcg_op, fpst);
4311
4312         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4313         tcg_temp_free_i32(tcg_rmode);
4314         break;
4315     }
4316     case 0xe: /* FRINTX */
4317         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4318         break;
4319     case 0xf: /* FRINTI */
4320         gen_helper_rints(tcg_res, tcg_op, fpst);
4321         break;
4322     default:
4323         abort();
4324     }
4325
4326     write_fp_sreg(s, rd, tcg_res);
4327
4328     tcg_temp_free_ptr(fpst);
4329     tcg_temp_free_i32(tcg_op);
4330     tcg_temp_free_i32(tcg_res);
4331 }
4332
4333 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4334 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4335 {
4336     TCGv_ptr fpst;
4337     TCGv_i64 tcg_op;
4338     TCGv_i64 tcg_res;
4339
4340     fpst = get_fpstatus_ptr();
4341     tcg_op = read_fp_dreg(s, rn);
4342     tcg_res = tcg_temp_new_i64();
4343
4344     switch (opcode) {
4345     case 0x0: /* FMOV */
4346         tcg_gen_mov_i64(tcg_res, tcg_op);
4347         break;
4348     case 0x1: /* FABS */
4349         gen_helper_vfp_absd(tcg_res, tcg_op);
4350         break;
4351     case 0x2: /* FNEG */
4352         gen_helper_vfp_negd(tcg_res, tcg_op);
4353         break;
4354     case 0x3: /* FSQRT */
4355         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4356         break;
4357     case 0x8: /* FRINTN */
4358     case 0x9: /* FRINTP */
4359     case 0xa: /* FRINTM */
4360     case 0xb: /* FRINTZ */
4361     case 0xc: /* FRINTA */
4362     {
4363         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4364
4365         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4366         gen_helper_rintd(tcg_res, tcg_op, fpst);
4367
4368         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4369         tcg_temp_free_i32(tcg_rmode);
4370         break;
4371     }
4372     case 0xe: /* FRINTX */
4373         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4374         break;
4375     case 0xf: /* FRINTI */
4376         gen_helper_rintd(tcg_res, tcg_op, fpst);
4377         break;
4378     default:
4379         abort();
4380     }
4381
4382     write_fp_dreg(s, rd, tcg_res);
4383
4384     tcg_temp_free_ptr(fpst);
4385     tcg_temp_free_i64(tcg_op);
4386     tcg_temp_free_i64(tcg_res);
4387 }
4388
4389 static void handle_fp_fcvt(DisasContext *s, int opcode,
4390                            int rd, int rn, int dtype, int ntype)
4391 {
4392     switch (ntype) {
4393     case 0x0:
4394     {
4395         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4396         if (dtype == 1) {
4397             /* Single to double */
4398             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4399             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4400             write_fp_dreg(s, rd, tcg_rd);
4401             tcg_temp_free_i64(tcg_rd);
4402         } else {
4403             /* Single to half */
4404             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4405             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4406             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4407             write_fp_sreg(s, rd, tcg_rd);
4408             tcg_temp_free_i32(tcg_rd);
4409         }
4410         tcg_temp_free_i32(tcg_rn);
4411         break;
4412     }
4413     case 0x1:
4414     {
4415         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4416         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4417         if (dtype == 0) {
4418             /* Double to single */
4419             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4420         } else {
4421             /* Double to half */
4422             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4423             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4424         }
4425         write_fp_sreg(s, rd, tcg_rd);
4426         tcg_temp_free_i32(tcg_rd);
4427         tcg_temp_free_i64(tcg_rn);
4428         break;
4429     }
4430     case 0x3:
4431     {
4432         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4433         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4434         if (dtype == 0) {
4435             /* Half to single */
4436             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4437             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4438             write_fp_sreg(s, rd, tcg_rd);
4439             tcg_temp_free_i32(tcg_rd);
4440         } else {
4441             /* Half to double */
4442             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4443             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4444             write_fp_dreg(s, rd, tcg_rd);
4445             tcg_temp_free_i64(tcg_rd);
4446         }
4447         tcg_temp_free_i32(tcg_rn);
4448         break;
4449     }
4450     default:
4451         abort();
4452     }
4453 }
4454
4455 /* C3.6.25 Floating point data-processing (1 source)
4456  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4457  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4458  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4459  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4460  */
4461 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4462 {
4463     int type = extract32(insn, 22, 2);
4464     int opcode = extract32(insn, 15, 6);
4465     int rn = extract32(insn, 5, 5);
4466     int rd = extract32(insn, 0, 5);
4467
4468     switch (opcode) {
4469     case 0x4: case 0x5: case 0x7:
4470     {
4471         /* FCVT between half, single and double precision */
4472         int dtype = extract32(opcode, 0, 2);
4473         if (type == 2 || dtype == type) {
4474             unallocated_encoding(s);
4475             return;
4476         }
4477         if (!fp_access_check(s)) {
4478             return;
4479         }
4480
4481         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4482         break;
4483     }
4484     case 0x0 ... 0x3:
4485     case 0x8 ... 0xc:
4486     case 0xe ... 0xf:
4487         /* 32-to-32 and 64-to-64 ops */
4488         switch (type) {
4489         case 0:
4490             if (!fp_access_check(s)) {
4491                 return;
4492             }
4493
4494             handle_fp_1src_single(s, opcode, rd, rn);
4495             break;
4496         case 1:
4497             if (!fp_access_check(s)) {
4498                 return;
4499             }
4500
4501             handle_fp_1src_double(s, opcode, rd, rn);
4502             break;
4503         default:
4504             unallocated_encoding(s);
4505         }
4506         break;
4507     default:
4508         unallocated_encoding(s);
4509         break;
4510     }
4511 }
4512
4513 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4514 static void handle_fp_2src_single(DisasContext *s, int opcode,
4515                                   int rd, int rn, int rm)
4516 {
4517     TCGv_i32 tcg_op1;
4518     TCGv_i32 tcg_op2;
4519     TCGv_i32 tcg_res;
4520     TCGv_ptr fpst;
4521
4522     tcg_res = tcg_temp_new_i32();
4523     fpst = get_fpstatus_ptr();
4524     tcg_op1 = read_fp_sreg(s, rn);
4525     tcg_op2 = read_fp_sreg(s, rm);
4526
4527     switch (opcode) {
4528     case 0x0: /* FMUL */
4529         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4530         break;
4531     case 0x1: /* FDIV */
4532         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4533         break;
4534     case 0x2: /* FADD */
4535         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4536         break;
4537     case 0x3: /* FSUB */
4538         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4539         break;
4540     case 0x4: /* FMAX */
4541         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4542         break;
4543     case 0x5: /* FMIN */
4544         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4545         break;
4546     case 0x6: /* FMAXNM */
4547         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4548         break;
4549     case 0x7: /* FMINNM */
4550         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4551         break;
4552     case 0x8: /* FNMUL */
4553         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4554         gen_helper_vfp_negs(tcg_res, tcg_res);
4555         break;
4556     }
4557
4558     write_fp_sreg(s, rd, tcg_res);
4559
4560     tcg_temp_free_ptr(fpst);
4561     tcg_temp_free_i32(tcg_op1);
4562     tcg_temp_free_i32(tcg_op2);
4563     tcg_temp_free_i32(tcg_res);
4564 }
4565
4566 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4567 static void handle_fp_2src_double(DisasContext *s, int opcode,
4568                                   int rd, int rn, int rm)
4569 {
4570     TCGv_i64 tcg_op1;
4571     TCGv_i64 tcg_op2;
4572     TCGv_i64 tcg_res;
4573     TCGv_ptr fpst;
4574
4575     tcg_res = tcg_temp_new_i64();
4576     fpst = get_fpstatus_ptr();
4577     tcg_op1 = read_fp_dreg(s, rn);
4578     tcg_op2 = read_fp_dreg(s, rm);
4579
4580     switch (opcode) {
4581     case 0x0: /* FMUL */
4582         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4583         break;
4584     case 0x1: /* FDIV */
4585         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4586         break;
4587     case 0x2: /* FADD */
4588         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4589         break;
4590     case 0x3: /* FSUB */
4591         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4592         break;
4593     case 0x4: /* FMAX */
4594         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4595         break;
4596     case 0x5: /* FMIN */
4597         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4598         break;
4599     case 0x6: /* FMAXNM */
4600         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4601         break;
4602     case 0x7: /* FMINNM */
4603         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4604         break;
4605     case 0x8: /* FNMUL */
4606         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4607         gen_helper_vfp_negd(tcg_res, tcg_res);
4608         break;
4609     }
4610
4611     write_fp_dreg(s, rd, tcg_res);
4612
4613     tcg_temp_free_ptr(fpst);
4614     tcg_temp_free_i64(tcg_op1);
4615     tcg_temp_free_i64(tcg_op2);
4616     tcg_temp_free_i64(tcg_res);
4617 }
4618
4619 /* C3.6.26 Floating point data-processing (2 source)
4620  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4621  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4622  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4623  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4624  */
4625 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4626 {
4627     int type = extract32(insn, 22, 2);
4628     int rd = extract32(insn, 0, 5);
4629     int rn = extract32(insn, 5, 5);
4630     int rm = extract32(insn, 16, 5);
4631     int opcode = extract32(insn, 12, 4);
4632
4633     if (opcode > 8) {
4634         unallocated_encoding(s);
4635         return;
4636     }
4637
4638     switch (type) {
4639     case 0:
4640         if (!fp_access_check(s)) {
4641             return;
4642         }
4643         handle_fp_2src_single(s, opcode, rd, rn, rm);
4644         break;
4645     case 1:
4646         if (!fp_access_check(s)) {
4647             return;
4648         }
4649         handle_fp_2src_double(s, opcode, rd, rn, rm);
4650         break;
4651     default:
4652         unallocated_encoding(s);
4653     }
4654 }
4655
4656 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4657 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4658                                   int rd, int rn, int rm, int ra)
4659 {
4660     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4661     TCGv_i32 tcg_res = tcg_temp_new_i32();
4662     TCGv_ptr fpst = get_fpstatus_ptr();
4663
4664     tcg_op1 = read_fp_sreg(s, rn);
4665     tcg_op2 = read_fp_sreg(s, rm);
4666     tcg_op3 = read_fp_sreg(s, ra);
4667
4668     /* These are fused multiply-add, and must be done as one
4669      * floating point operation with no rounding between the
4670      * multiplication and addition steps.
4671      * NB that doing the negations here as separate steps is
4672      * correct : an input NaN should come out with its sign bit
4673      * flipped if it is a negated-input.
4674      */
4675     if (o1 == true) {
4676         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4677     }
4678
4679     if (o0 != o1) {
4680         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4681     }
4682
4683     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4684
4685     write_fp_sreg(s, rd, tcg_res);
4686
4687     tcg_temp_free_ptr(fpst);
4688     tcg_temp_free_i32(tcg_op1);
4689     tcg_temp_free_i32(tcg_op2);
4690     tcg_temp_free_i32(tcg_op3);
4691     tcg_temp_free_i32(tcg_res);
4692 }
4693
4694 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4695 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4696                                   int rd, int rn, int rm, int ra)
4697 {
4698     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4699     TCGv_i64 tcg_res = tcg_temp_new_i64();
4700     TCGv_ptr fpst = get_fpstatus_ptr();
4701
4702     tcg_op1 = read_fp_dreg(s, rn);
4703     tcg_op2 = read_fp_dreg(s, rm);
4704     tcg_op3 = read_fp_dreg(s, ra);
4705
4706     /* These are fused multiply-add, and must be done as one
4707      * floating point operation with no rounding between the
4708      * multiplication and addition steps.
4709      * NB that doing the negations here as separate steps is
4710      * correct : an input NaN should come out with its sign bit
4711      * flipped if it is a negated-input.
4712      */
4713     if (o1 == true) {
4714         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4715     }
4716
4717     if (o0 != o1) {
4718         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4719     }
4720
4721     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4722
4723     write_fp_dreg(s, rd, tcg_res);
4724
4725     tcg_temp_free_ptr(fpst);
4726     tcg_temp_free_i64(tcg_op1);
4727     tcg_temp_free_i64(tcg_op2);
4728     tcg_temp_free_i64(tcg_op3);
4729     tcg_temp_free_i64(tcg_res);
4730 }
4731
4732 /* C3.6.27 Floating point data-processing (3 source)
4733  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4734  * +---+---+---+-----------+------+----+------+----+------+------+------+
4735  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4736  * +---+---+---+-----------+------+----+------+----+------+------+------+
4737  */
4738 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4739 {
4740     int type = extract32(insn, 22, 2);
4741     int rd = extract32(insn, 0, 5);
4742     int rn = extract32(insn, 5, 5);
4743     int ra = extract32(insn, 10, 5);
4744     int rm = extract32(insn, 16, 5);
4745     bool o0 = extract32(insn, 15, 1);
4746     bool o1 = extract32(insn, 21, 1);
4747
4748     switch (type) {
4749     case 0:
4750         if (!fp_access_check(s)) {
4751             return;
4752         }
4753         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4754         break;
4755     case 1:
4756         if (!fp_access_check(s)) {
4757             return;
4758         }
4759         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4760         break;
4761     default:
4762         unallocated_encoding(s);
4763     }
4764 }
4765
4766 /* C3.6.28 Floating point immediate
4767  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4768  * +---+---+---+-----------+------+---+------------+-------+------+------+
4769  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4770  * +---+---+---+-----------+------+---+------------+-------+------+------+
4771  */
4772 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4773 {
4774     int rd = extract32(insn, 0, 5);
4775     int imm8 = extract32(insn, 13, 8);
4776     int is_double = extract32(insn, 22, 2);
4777     uint64_t imm;
4778     TCGv_i64 tcg_res;
4779
4780     if (is_double > 1) {
4781         unallocated_encoding(s);
4782         return;
4783     }
4784
4785     if (!fp_access_check(s)) {
4786         return;
4787     }
4788
4789     /* The imm8 encodes the sign bit, enough bits to represent
4790      * an exponent in the range 01....1xx to 10....0xx,
4791      * and the most significant 4 bits of the mantissa; see
4792      * VFPExpandImm() in the v8 ARM ARM.
4793      */
4794     if (is_double) {
4795         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4796             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4797             extract32(imm8, 0, 6);
4798         imm <<= 48;
4799     } else {
4800         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4801             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4802             (extract32(imm8, 0, 6) << 3);
4803         imm <<= 16;
4804     }
4805
4806     tcg_res = tcg_const_i64(imm);
4807     write_fp_dreg(s, rd, tcg_res);
4808     tcg_temp_free_i64(tcg_res);
4809 }
4810
4811 /* Handle floating point <=> fixed point conversions. Note that we can
4812  * also deal with fp <=> integer conversions as a special case (scale == 64)
4813  * OPTME: consider handling that special case specially or at least skipping
4814  * the call to scalbn in the helpers for zero shifts.
4815  */
4816 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4817                            bool itof, int rmode, int scale, int sf, int type)
4818 {
4819     bool is_signed = !(opcode & 1);
4820     bool is_double = type;
4821     TCGv_ptr tcg_fpstatus;
4822     TCGv_i32 tcg_shift;
4823
4824     tcg_fpstatus = get_fpstatus_ptr();
4825
4826     tcg_shift = tcg_const_i32(64 - scale);
4827
4828     if (itof) {
4829         TCGv_i64 tcg_int = cpu_reg(s, rn);
4830         if (!sf) {
4831             TCGv_i64 tcg_extend = new_tmp_a64(s);
4832
4833             if (is_signed) {
4834                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4835             } else {
4836                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4837             }
4838
4839             tcg_int = tcg_extend;
4840         }
4841
4842         if (is_double) {
4843             TCGv_i64 tcg_double = tcg_temp_new_i64();
4844             if (is_signed) {
4845                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4846                                      tcg_shift, tcg_fpstatus);
4847             } else {
4848                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4849                                      tcg_shift, tcg_fpstatus);
4850             }
4851             write_fp_dreg(s, rd, tcg_double);
4852             tcg_temp_free_i64(tcg_double);
4853         } else {
4854             TCGv_i32 tcg_single = tcg_temp_new_i32();
4855             if (is_signed) {
4856                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4857                                      tcg_shift, tcg_fpstatus);
4858             } else {
4859                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4860                                      tcg_shift, tcg_fpstatus);
4861             }
4862             write_fp_sreg(s, rd, tcg_single);
4863             tcg_temp_free_i32(tcg_single);
4864         }
4865     } else {
4866         TCGv_i64 tcg_int = cpu_reg(s, rd);
4867         TCGv_i32 tcg_rmode;
4868
4869         if (extract32(opcode, 2, 1)) {
4870             /* There are too many rounding modes to all fit into rmode,
4871              * so FCVTA[US] is a special case.
4872              */
4873             rmode = FPROUNDING_TIEAWAY;
4874         }
4875
4876         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4877
4878         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4879
4880         if (is_double) {
4881             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4882             if (is_signed) {
4883                 if (!sf) {
4884                     gen_helper_vfp_tosld(tcg_int, tcg_double,
4885                                          tcg_shift, tcg_fpstatus);
4886                 } else {
4887                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
4888                                          tcg_shift, tcg_fpstatus);
4889                 }
4890             } else {
4891                 if (!sf) {
4892                     gen_helper_vfp_tould(tcg_int, tcg_double,
4893                                          tcg_shift, tcg_fpstatus);
4894                 } else {
4895                     gen_helper_vfp_touqd(tcg_int, tcg_double,
4896                                          tcg_shift, tcg_fpstatus);
4897                 }
4898             }
4899             tcg_temp_free_i64(tcg_double);
4900         } else {
4901             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4902             if (sf) {
4903                 if (is_signed) {
4904                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
4905                                          tcg_shift, tcg_fpstatus);
4906                 } else {
4907                     gen_helper_vfp_touqs(tcg_int, tcg_single,
4908                                          tcg_shift, tcg_fpstatus);
4909                 }
4910             } else {
4911                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4912                 if (is_signed) {
4913                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
4914                                          tcg_shift, tcg_fpstatus);
4915                 } else {
4916                     gen_helper_vfp_touls(tcg_dest, tcg_single,
4917                                          tcg_shift, tcg_fpstatus);
4918                 }
4919                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4920                 tcg_temp_free_i32(tcg_dest);
4921             }
4922             tcg_temp_free_i32(tcg_single);
4923         }
4924
4925         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4926         tcg_temp_free_i32(tcg_rmode);
4927
4928         if (!sf) {
4929             tcg_gen_ext32u_i64(tcg_int, tcg_int);
4930         }
4931     }
4932
4933     tcg_temp_free_ptr(tcg_fpstatus);
4934     tcg_temp_free_i32(tcg_shift);
4935 }
4936
4937 /* C3.6.29 Floating point <-> fixed point conversions
4938  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4939  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4940  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4941  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4942  */
4943 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4944 {
4945     int rd = extract32(insn, 0, 5);
4946     int rn = extract32(insn, 5, 5);
4947     int scale = extract32(insn, 10, 6);
4948     int opcode = extract32(insn, 16, 3);
4949     int rmode = extract32(insn, 19, 2);
4950     int type = extract32(insn, 22, 2);
4951     bool sbit = extract32(insn, 29, 1);
4952     bool sf = extract32(insn, 31, 1);
4953     bool itof;
4954
4955     if (sbit || (type > 1)
4956         || (!sf && scale < 32)) {
4957         unallocated_encoding(s);
4958         return;
4959     }
4960
4961     switch ((rmode << 3) | opcode) {
4962     case 0x2: /* SCVTF */
4963     case 0x3: /* UCVTF */
4964         itof = true;
4965         break;
4966     case 0x18: /* FCVTZS */
4967     case 0x19: /* FCVTZU */
4968         itof = false;
4969         break;
4970     default:
4971         unallocated_encoding(s);
4972         return;
4973     }
4974
4975     if (!fp_access_check(s)) {
4976         return;
4977     }
4978
4979     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
4980 }
4981
4982 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4983 {
4984     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4985      * without conversion.
4986      */
4987
4988     if (itof) {
4989         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4990
4991         switch (type) {
4992         case 0:
4993         {
4994             /* 32 bit */
4995             TCGv_i64 tmp = tcg_temp_new_i64();
4996             tcg_gen_ext32u_i64(tmp, tcg_rn);
4997             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
4998             tcg_gen_movi_i64(tmp, 0);
4999             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5000             tcg_temp_free_i64(tmp);
5001             break;
5002         }
5003         case 1:
5004         {
5005             /* 64 bit */
5006             TCGv_i64 tmp = tcg_const_i64(0);
5007             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5008             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5009             tcg_temp_free_i64(tmp);
5010             break;
5011         }
5012         case 2:
5013             /* 64 bit to top half. */
5014             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5015             break;
5016         }
5017     } else {
5018         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5019
5020         switch (type) {
5021         case 0:
5022             /* 32 bit */
5023             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5024             break;
5025         case 1:
5026             /* 64 bit */
5027             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5028             break;
5029         case 2:
5030             /* 64 bits from top half */
5031             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5032             break;
5033         }
5034     }
5035 }
5036
5037 /* C3.6.30 Floating point <-> integer conversions
5038  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5039  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5040  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5041  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5042  */
5043 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5044 {
5045     int rd = extract32(insn, 0, 5);
5046     int rn = extract32(insn, 5, 5);
5047     int opcode = extract32(insn, 16, 3);
5048     int rmode = extract32(insn, 19, 2);
5049     int type = extract32(insn, 22, 2);
5050     bool sbit = extract32(insn, 29, 1);
5051     bool sf = extract32(insn, 31, 1);
5052
5053     if (sbit) {
5054         unallocated_encoding(s);
5055         return;
5056     }
5057
5058     if (opcode > 5) {
5059         /* FMOV */
5060         bool itof = opcode & 1;
5061
5062         if (rmode >= 2) {
5063             unallocated_encoding(s);
5064             return;
5065         }
5066
5067         switch (sf << 3 | type << 1 | rmode) {
5068         case 0x0: /* 32 bit */
5069         case 0xa: /* 64 bit */
5070         case 0xd: /* 64 bit to top half of quad */
5071             break;
5072         default:
5073             /* all other sf/type/rmode combinations are invalid */
5074             unallocated_encoding(s);
5075             break;
5076         }
5077
5078         if (!fp_access_check(s)) {
5079             return;
5080         }
5081         handle_fmov(s, rd, rn, type, itof);
5082     } else {
5083         /* actual FP conversions */
5084         bool itof = extract32(opcode, 1, 1);
5085
5086         if (type > 1 || (rmode != 0 && opcode > 1)) {
5087             unallocated_encoding(s);
5088             return;
5089         }
5090
5091         if (!fp_access_check(s)) {
5092             return;
5093         }
5094         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5095     }
5096 }
5097
5098 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5099  *   31  30  29 28     25 24                          0
5100  * +---+---+---+---------+-----------------------------+
5101  * |   | 0 |   | 1 1 1 1 |                             |
5102  * +---+---+---+---------+-----------------------------+
5103  */
5104 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5105 {
5106     if (extract32(insn, 24, 1)) {
5107         /* Floating point data-processing (3 source) */
5108         disas_fp_3src(s, insn);
5109     } else if (extract32(insn, 21, 1) == 0) {
5110         /* Floating point to fixed point conversions */
5111         disas_fp_fixed_conv(s, insn);
5112     } else {
5113         switch (extract32(insn, 10, 2)) {
5114         case 1:
5115             /* Floating point conditional compare */
5116             disas_fp_ccomp(s, insn);
5117             break;
5118         case 2:
5119             /* Floating point data-processing (2 source) */
5120             disas_fp_2src(s, insn);
5121             break;
5122         case 3:
5123             /* Floating point conditional select */
5124             disas_fp_csel(s, insn);
5125             break;
5126         case 0:
5127             switch (ctz32(extract32(insn, 12, 4))) {
5128             case 0: /* [15:12] == xxx1 */
5129                 /* Floating point immediate */
5130                 disas_fp_imm(s, insn);
5131                 break;
5132             case 1: /* [15:12] == xx10 */
5133                 /* Floating point compare */
5134                 disas_fp_compare(s, insn);
5135                 break;
5136             case 2: /* [15:12] == x100 */
5137                 /* Floating point data-processing (1 source) */
5138                 disas_fp_1src(s, insn);
5139                 break;
5140             case 3: /* [15:12] == 1000 */
5141                 unallocated_encoding(s);
5142                 break;
5143             default: /* [15:12] == 0000 */
5144                 /* Floating point <-> integer conversions */
5145                 disas_fp_int_conv(s, insn);
5146                 break;
5147             }
5148             break;
5149         }
5150     }
5151 }
5152
5153 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5154                      int pos)
5155 {
5156     /* Extract 64 bits from the middle of two concatenated 64 bit
5157      * vector register slices left:right. The extracted bits start
5158      * at 'pos' bits into the right (least significant) side.
5159      * We return the result in tcg_right, and guarantee not to
5160      * trash tcg_left.
5161      */
5162     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5163     assert(pos > 0 && pos < 64);
5164
5165     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5166     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5167     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5168
5169     tcg_temp_free_i64(tcg_tmp);
5170 }
5171
5172 /* C3.6.1 EXT
5173  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5174  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5175  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5176  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5177  */
5178 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5179 {
5180     int is_q = extract32(insn, 30, 1);
5181     int op2 = extract32(insn, 22, 2);
5182     int imm4 = extract32(insn, 11, 4);
5183     int rm = extract32(insn, 16, 5);
5184     int rn = extract32(insn, 5, 5);
5185     int rd = extract32(insn, 0, 5);
5186     int pos = imm4 << 3;
5187     TCGv_i64 tcg_resl, tcg_resh;
5188
5189     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5190         unallocated_encoding(s);
5191         return;
5192     }
5193
5194     if (!fp_access_check(s)) {
5195         return;
5196     }
5197
5198     tcg_resh = tcg_temp_new_i64();
5199     tcg_resl = tcg_temp_new_i64();
5200
5201     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5202      * either extracting 128 bits from a 128:128 concatenation, or
5203      * extracting 64 bits from a 64:64 concatenation.
5204      */
5205     if (!is_q) {
5206         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5207         if (pos != 0) {
5208             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5209             do_ext64(s, tcg_resh, tcg_resl, pos);
5210         }
5211         tcg_gen_movi_i64(tcg_resh, 0);
5212     } else {
5213         TCGv_i64 tcg_hh;
5214         typedef struct {
5215             int reg;
5216             int elt;
5217         } EltPosns;
5218         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5219         EltPosns *elt = eltposns;
5220
5221         if (pos >= 64) {
5222             elt++;
5223             pos -= 64;
5224         }
5225
5226         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5227         elt++;
5228         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5229         elt++;
5230         if (pos != 0) {
5231             do_ext64(s, tcg_resh, tcg_resl, pos);
5232             tcg_hh = tcg_temp_new_i64();
5233             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5234             do_ext64(s, tcg_hh, tcg_resh, pos);
5235             tcg_temp_free_i64(tcg_hh);
5236         }
5237     }
5238
5239     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5240     tcg_temp_free_i64(tcg_resl);
5241     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5242     tcg_temp_free_i64(tcg_resh);
5243 }
5244
5245 /* C3.6.2 TBL/TBX
5246  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5247  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5248  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5249  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5250  */
5251 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5252 {
5253     int op2 = extract32(insn, 22, 2);
5254     int is_q = extract32(insn, 30, 1);
5255     int rm = extract32(insn, 16, 5);
5256     int rn = extract32(insn, 5, 5);
5257     int rd = extract32(insn, 0, 5);
5258     int is_tblx = extract32(insn, 12, 1);
5259     int len = extract32(insn, 13, 2);
5260     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5261     TCGv_i32 tcg_regno, tcg_numregs;
5262
5263     if (op2 != 0) {
5264         unallocated_encoding(s);
5265         return;
5266     }
5267
5268     if (!fp_access_check(s)) {
5269         return;
5270     }
5271
5272     /* This does a table lookup: for every byte element in the input
5273      * we index into a table formed from up to four vector registers,
5274      * and then the output is the result of the lookups. Our helper
5275      * function does the lookup operation for a single 64 bit part of
5276      * the input.
5277      */
5278     tcg_resl = tcg_temp_new_i64();
5279     tcg_resh = tcg_temp_new_i64();
5280
5281     if (is_tblx) {
5282         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5283     } else {
5284         tcg_gen_movi_i64(tcg_resl, 0);
5285     }
5286     if (is_tblx && is_q) {
5287         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5288     } else {
5289         tcg_gen_movi_i64(tcg_resh, 0);
5290     }
5291
5292     tcg_idx = tcg_temp_new_i64();
5293     tcg_regno = tcg_const_i32(rn);
5294     tcg_numregs = tcg_const_i32(len + 1);
5295     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5296     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5297                         tcg_regno, tcg_numregs);
5298     if (is_q) {
5299         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5300         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5301                             tcg_regno, tcg_numregs);
5302     }
5303     tcg_temp_free_i64(tcg_idx);
5304     tcg_temp_free_i32(tcg_regno);
5305     tcg_temp_free_i32(tcg_numregs);
5306
5307     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5308     tcg_temp_free_i64(tcg_resl);
5309     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5310     tcg_temp_free_i64(tcg_resh);
5311 }
5312
5313 /* C3.6.3 ZIP/UZP/TRN
5314  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5315  * +---+---+-------------+------+---+------+---+------------------+------+
5316  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5317  * +---+---+-------------+------+---+------+---+------------------+------+
5318  */
5319 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5320 {
5321     int rd = extract32(insn, 0, 5);
5322     int rn = extract32(insn, 5, 5);
5323     int rm = extract32(insn, 16, 5);
5324     int size = extract32(insn, 22, 2);
5325     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5326      * bit 2 indicates 1 vs 2 variant of the insn.
5327      */
5328     int opcode = extract32(insn, 12, 2);
5329     bool part = extract32(insn, 14, 1);
5330     bool is_q = extract32(insn, 30, 1);
5331     int esize = 8 << size;
5332     int i, ofs;
5333     int datasize = is_q ? 128 : 64;
5334     int elements = datasize / esize;
5335     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5336
5337     if (opcode == 0 || (size == 3 && !is_q)) {
5338         unallocated_encoding(s);
5339         return;
5340     }
5341
5342     if (!fp_access_check(s)) {
5343         return;
5344     }
5345
5346     tcg_resl = tcg_const_i64(0);
5347     tcg_resh = tcg_const_i64(0);
5348     tcg_res = tcg_temp_new_i64();
5349
5350     for (i = 0; i < elements; i++) {
5351         switch (opcode) {
5352         case 1: /* UZP1/2 */
5353         {
5354             int midpoint = elements / 2;
5355             if (i < midpoint) {
5356                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5357             } else {
5358                 read_vec_element(s, tcg_res, rm,
5359                                  2 * (i - midpoint) + part, size);
5360             }
5361             break;
5362         }
5363         case 2: /* TRN1/2 */
5364             if (i & 1) {
5365                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5366             } else {
5367                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5368             }
5369             break;
5370         case 3: /* ZIP1/2 */
5371         {
5372             int base = part * elements / 2;
5373             if (i & 1) {
5374                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5375             } else {
5376                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5377             }
5378             break;
5379         }
5380         default:
5381             g_assert_not_reached();
5382         }
5383
5384         ofs = i * esize;
5385         if (ofs < 64) {
5386             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5387             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5388         } else {
5389             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5390             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5391         }
5392     }
5393
5394     tcg_temp_free_i64(tcg_res);
5395
5396     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5397     tcg_temp_free_i64(tcg_resl);
5398     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5399     tcg_temp_free_i64(tcg_resh);
5400 }
5401
5402 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5403                         int opc, bool is_min, TCGv_ptr fpst)
5404 {
5405     /* Helper function for disas_simd_across_lanes: do a single precision
5406      * min/max operation on the specified two inputs,
5407      * and return the result in tcg_elt1.
5408      */
5409     if (opc == 0xc) {
5410         if (is_min) {
5411             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5412         } else {
5413             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5414         }
5415     } else {
5416         assert(opc == 0xf);
5417         if (is_min) {
5418             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5419         } else {
5420             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5421         }
5422     }
5423 }
5424
5425 /* C3.6.4 AdvSIMD across lanes
5426  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5427  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5428  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5429  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5430  */
5431 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5432 {
5433     int rd = extract32(insn, 0, 5);
5434     int rn = extract32(insn, 5, 5);
5435     int size = extract32(insn, 22, 2);
5436     int opcode = extract32(insn, 12, 5);
5437     bool is_q = extract32(insn, 30, 1);
5438     bool is_u = extract32(insn, 29, 1);
5439     bool is_fp = false;
5440     bool is_min = false;
5441     int esize;
5442     int elements;
5443     int i;
5444     TCGv_i64 tcg_res, tcg_elt;
5445
5446     switch (opcode) {
5447     case 0x1b: /* ADDV */
5448         if (is_u) {
5449             unallocated_encoding(s);
5450             return;
5451         }
5452         /* fall through */
5453     case 0x3: /* SADDLV, UADDLV */
5454     case 0xa: /* SMAXV, UMAXV */
5455     case 0x1a: /* SMINV, UMINV */
5456         if (size == 3 || (size == 2 && !is_q)) {
5457             unallocated_encoding(s);
5458             return;
5459         }
5460         break;
5461     case 0xc: /* FMAXNMV, FMINNMV */
5462     case 0xf: /* FMAXV, FMINV */
5463         if (!is_u || !is_q || extract32(size, 0, 1)) {
5464             unallocated_encoding(s);
5465             return;
5466         }
5467         /* Bit 1 of size field encodes min vs max, and actual size is always
5468          * 32 bits: adjust the size variable so following code can rely on it
5469          */
5470         is_min = extract32(size, 1, 1);
5471         is_fp = true;
5472         size = 2;
5473         break;
5474     default:
5475         unallocated_encoding(s);
5476         return;
5477     }
5478
5479     if (!fp_access_check(s)) {
5480         return;
5481     }
5482
5483     esize = 8 << size;
5484     elements = (is_q ? 128 : 64) / esize;
5485
5486     tcg_res = tcg_temp_new_i64();
5487     tcg_elt = tcg_temp_new_i64();
5488
5489     /* These instructions operate across all lanes of a vector
5490      * to produce a single result. We can guarantee that a 64
5491      * bit intermediate is sufficient:
5492      *  + for [US]ADDLV the maximum element size is 32 bits, and
5493      *    the result type is 64 bits
5494      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5495      *    same as the element size, which is 32 bits at most
5496      * For the integer operations we can choose to work at 64
5497      * or 32 bits and truncate at the end; for simplicity
5498      * we use 64 bits always. The floating point
5499      * ops do require 32 bit intermediates, though.
5500      */
5501     if (!is_fp) {
5502         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5503
5504         for (i = 1; i < elements; i++) {
5505             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5506
5507             switch (opcode) {
5508             case 0x03: /* SADDLV / UADDLV */
5509             case 0x1b: /* ADDV */
5510                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5511                 break;
5512             case 0x0a: /* SMAXV / UMAXV */
5513                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5514                                     tcg_res,
5515                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5516                 break;
5517             case 0x1a: /* SMINV / UMINV */
5518                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5519                                     tcg_res,
5520                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5521                 break;
5522                 break;
5523             default:
5524                 g_assert_not_reached();
5525             }
5526
5527         }
5528     } else {
5529         /* Floating point ops which work on 32 bit (single) intermediates.
5530          * Note that correct NaN propagation requires that we do these
5531          * operations in exactly the order specified by the pseudocode.
5532          */
5533         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5534         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5535         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5536         TCGv_ptr fpst = get_fpstatus_ptr();
5537
5538         assert(esize == 32);
5539         assert(elements == 4);
5540
5541         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5542         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5543         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5544         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5545
5546         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5547
5548         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5549         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5550         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5551         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5552
5553         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5554
5555         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5556
5557         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5558         tcg_temp_free_i32(tcg_elt1);
5559         tcg_temp_free_i32(tcg_elt2);
5560         tcg_temp_free_i32(tcg_elt3);
5561         tcg_temp_free_ptr(fpst);
5562     }
5563
5564     tcg_temp_free_i64(tcg_elt);
5565
5566     /* Now truncate the result to the width required for the final output */
5567     if (opcode == 0x03) {
5568         /* SADDLV, UADDLV: result is 2*esize */
5569         size++;
5570     }
5571
5572     switch (size) {
5573     case 0:
5574         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5575         break;
5576     case 1:
5577         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5578         break;
5579     case 2:
5580         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5581         break;
5582     case 3:
5583         break;
5584     default:
5585         g_assert_not_reached();
5586     }
5587
5588     write_fp_dreg(s, rd, tcg_res);
5589     tcg_temp_free_i64(tcg_res);
5590 }
5591
5592 /* C6.3.31 DUP (Element, Vector)
5593  *
5594  *  31  30   29              21 20    16 15        10  9    5 4    0
5595  * +---+---+-------------------+--------+-------------+------+------+
5596  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5597  * +---+---+-------------------+--------+-------------+------+------+
5598  *
5599  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5600  */
5601 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5602                              int imm5)
5603 {
5604     int size = ctz32(imm5);
5605     int esize = 8 << size;
5606     int elements = (is_q ? 128 : 64) / esize;
5607     int index, i;
5608     TCGv_i64 tmp;
5609
5610     if (size > 3 || (size == 3 && !is_q)) {
5611         unallocated_encoding(s);
5612         return;
5613     }
5614
5615     if (!fp_access_check(s)) {
5616         return;
5617     }
5618
5619     index = imm5 >> (size + 1);
5620
5621     tmp = tcg_temp_new_i64();
5622     read_vec_element(s, tmp, rn, index, size);
5623
5624     for (i = 0; i < elements; i++) {
5625         write_vec_element(s, tmp, rd, i, size);
5626     }
5627
5628     if (!is_q) {
5629         clear_vec_high(s, rd);
5630     }
5631
5632     tcg_temp_free_i64(tmp);
5633 }
5634
5635 /* C6.3.31 DUP (element, scalar)
5636  *  31                   21 20    16 15        10  9    5 4    0
5637  * +-----------------------+--------+-------------+------+------+
5638  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5639  * +-----------------------+--------+-------------+------+------+
5640  */
5641 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5642                               int imm5)
5643 {
5644     int size = ctz32(imm5);
5645     int index;
5646     TCGv_i64 tmp;
5647
5648     if (size > 3) {
5649         unallocated_encoding(s);
5650         return;
5651     }
5652
5653     if (!fp_access_check(s)) {
5654         return;
5655     }
5656
5657     index = imm5 >> (size + 1);
5658
5659     /* This instruction just extracts the specified element and
5660      * zero-extends it into the bottom of the destination register.
5661      */
5662     tmp = tcg_temp_new_i64();
5663     read_vec_element(s, tmp, rn, index, size);
5664     write_fp_dreg(s, rd, tmp);
5665     tcg_temp_free_i64(tmp);
5666 }
5667
5668 /* C6.3.32 DUP (General)
5669  *
5670  *  31  30   29              21 20    16 15        10  9    5 4    0
5671  * +---+---+-------------------+--------+-------------+------+------+
5672  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5673  * +---+---+-------------------+--------+-------------+------+------+
5674  *
5675  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5676  */
5677 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5678                              int imm5)
5679 {
5680     int size = ctz32(imm5);
5681     int esize = 8 << size;
5682     int elements = (is_q ? 128 : 64)/esize;
5683     int i = 0;
5684
5685     if (size > 3 || ((size == 3) && !is_q)) {
5686         unallocated_encoding(s);
5687         return;
5688     }
5689
5690     if (!fp_access_check(s)) {
5691         return;
5692     }
5693
5694     for (i = 0; i < elements; i++) {
5695         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5696     }
5697     if (!is_q) {
5698         clear_vec_high(s, rd);
5699     }
5700 }
5701
5702 /* C6.3.150 INS (Element)
5703  *
5704  *  31                   21 20    16 15  14    11  10 9    5 4    0
5705  * +-----------------------+--------+------------+---+------+------+
5706  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5707  * +-----------------------+--------+------------+---+------+------+
5708  *
5709  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5710  * index: encoded in imm5<4:size+1>
5711  */
5712 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5713                              int imm4, int imm5)
5714 {
5715     int size = ctz32(imm5);
5716     int src_index, dst_index;
5717     TCGv_i64 tmp;
5718
5719     if (size > 3) {
5720         unallocated_encoding(s);
5721         return;
5722     }
5723
5724     if (!fp_access_check(s)) {
5725         return;
5726     }
5727
5728     dst_index = extract32(imm5, 1+size, 5);
5729     src_index = extract32(imm4, size, 4);
5730
5731     tmp = tcg_temp_new_i64();
5732
5733     read_vec_element(s, tmp, rn, src_index, size);
5734     write_vec_element(s, tmp, rd, dst_index, size);
5735
5736     tcg_temp_free_i64(tmp);
5737 }
5738
5739
5740 /* C6.3.151 INS (General)
5741  *
5742  *  31                   21 20    16 15        10  9    5 4    0
5743  * +-----------------------+--------+-------------+------+------+
5744  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5745  * +-----------------------+--------+-------------+------+------+
5746  *
5747  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5748  * index: encoded in imm5<4:size+1>
5749  */
5750 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5751 {
5752     int size = ctz32(imm5);
5753     int idx;
5754
5755     if (size > 3) {
5756         unallocated_encoding(s);
5757         return;
5758     }
5759
5760     if (!fp_access_check(s)) {
5761         return;
5762     }
5763
5764     idx = extract32(imm5, 1 + size, 4 - size);
5765     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5766 }
5767
5768 /*
5769  * C6.3.321 UMOV (General)
5770  * C6.3.237 SMOV (General)
5771  *
5772  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5773  * +---+---+-------------------+--------+-------------+------+------+
5774  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5775  * +---+---+-------------------+--------+-------------+------+------+
5776  *
5777  * U: unsigned when set
5778  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5779  */
5780 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5781                                   int rn, int rd, int imm5)
5782 {
5783     int size = ctz32(imm5);
5784     int element;
5785     TCGv_i64 tcg_rd;
5786
5787     /* Check for UnallocatedEncodings */
5788     if (is_signed) {
5789         if (size > 2 || (size == 2 && !is_q)) {
5790             unallocated_encoding(s);
5791             return;
5792         }
5793     } else {
5794         if (size > 3
5795             || (size < 3 && is_q)
5796             || (size == 3 && !is_q)) {
5797             unallocated_encoding(s);
5798             return;
5799         }
5800     }
5801
5802     if (!fp_access_check(s)) {
5803         return;
5804     }
5805
5806     element = extract32(imm5, 1+size, 4);
5807
5808     tcg_rd = cpu_reg(s, rd);
5809     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5810     if (is_signed && !is_q) {
5811         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5812     }
5813 }
5814
5815 /* C3.6.5 AdvSIMD copy
5816  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5817  * +---+---+----+-----------------+------+---+------+---+------+------+
5818  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5819  * +---+---+----+-----------------+------+---+------+---+------+------+
5820  */
5821 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5822 {
5823     int rd = extract32(insn, 0, 5);
5824     int rn = extract32(insn, 5, 5);
5825     int imm4 = extract32(insn, 11, 4);
5826     int op = extract32(insn, 29, 1);
5827     int is_q = extract32(insn, 30, 1);
5828     int imm5 = extract32(insn, 16, 5);
5829
5830     if (op) {
5831         if (is_q) {
5832             /* INS (element) */
5833             handle_simd_inse(s, rd, rn, imm4, imm5);
5834         } else {
5835             unallocated_encoding(s);
5836         }
5837     } else {
5838         switch (imm4) {
5839         case 0:
5840             /* DUP (element - vector) */
5841             handle_simd_dupe(s, is_q, rd, rn, imm5);
5842             break;
5843         case 1:
5844             /* DUP (general) */
5845             handle_simd_dupg(s, is_q, rd, rn, imm5);
5846             break;
5847         case 3:
5848             if (is_q) {
5849                 /* INS (general) */
5850                 handle_simd_insg(s, rd, rn, imm5);
5851             } else {
5852                 unallocated_encoding(s);
5853             }
5854             break;
5855         case 5:
5856         case 7:
5857             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5858             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5859             break;
5860         default:
5861             unallocated_encoding(s);
5862             break;
5863         }
5864     }
5865 }
5866
5867 /* C3.6.6 AdvSIMD modified immediate
5868  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5869  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5870  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5871  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5872  *
5873  * There are a number of operations that can be carried out here:
5874  *   MOVI - move (shifted) imm into register
5875  *   MVNI - move inverted (shifted) imm into register
5876  *   ORR  - bitwise OR of (shifted) imm with register
5877  *   BIC  - bitwise clear of (shifted) imm with register
5878  */
5879 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5880 {
5881     int rd = extract32(insn, 0, 5);
5882     int cmode = extract32(insn, 12, 4);
5883     int cmode_3_1 = extract32(cmode, 1, 3);
5884     int cmode_0 = extract32(cmode, 0, 1);
5885     int o2 = extract32(insn, 11, 1);
5886     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5887     bool is_neg = extract32(insn, 29, 1);
5888     bool is_q = extract32(insn, 30, 1);
5889     uint64_t imm = 0;
5890     TCGv_i64 tcg_rd, tcg_imm;
5891     int i;
5892
5893     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5894         unallocated_encoding(s);
5895         return;
5896     }
5897
5898     if (!fp_access_check(s)) {
5899         return;
5900     }
5901
5902     /* See AdvSIMDExpandImm() in ARM ARM */
5903     switch (cmode_3_1) {
5904     case 0: /* Replicate(Zeros(24):imm8, 2) */
5905     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5906     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5907     case 3: /* Replicate(imm8:Zeros(24), 2) */
5908     {
5909         int shift = cmode_3_1 * 8;
5910         imm = bitfield_replicate(abcdefgh << shift, 32);
5911         break;
5912     }
5913     case 4: /* Replicate(Zeros(8):imm8, 4) */
5914     case 5: /* Replicate(imm8:Zeros(8), 4) */
5915     {
5916         int shift = (cmode_3_1 & 0x1) * 8;
5917         imm = bitfield_replicate(abcdefgh << shift, 16);
5918         break;
5919     }
5920     case 6:
5921         if (cmode_0) {
5922             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5923             imm = (abcdefgh << 16) | 0xffff;
5924         } else {
5925             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5926             imm = (abcdefgh << 8) | 0xff;
5927         }
5928         imm = bitfield_replicate(imm, 32);
5929         break;
5930     case 7:
5931         if (!cmode_0 && !is_neg) {
5932             imm = bitfield_replicate(abcdefgh, 8);
5933         } else if (!cmode_0 && is_neg) {
5934             int i;
5935             imm = 0;
5936             for (i = 0; i < 8; i++) {
5937                 if ((abcdefgh) & (1 << i)) {
5938                     imm |= 0xffULL << (i * 8);
5939                 }
5940             }
5941         } else if (cmode_0) {
5942             if (is_neg) {
5943                 imm = (abcdefgh & 0x3f) << 48;
5944                 if (abcdefgh & 0x80) {
5945                     imm |= 0x8000000000000000ULL;
5946                 }
5947                 if (abcdefgh & 0x40) {
5948                     imm |= 0x3fc0000000000000ULL;
5949                 } else {
5950                     imm |= 0x4000000000000000ULL;
5951                 }
5952             } else {
5953                 imm = (abcdefgh & 0x3f) << 19;
5954                 if (abcdefgh & 0x80) {
5955                     imm |= 0x80000000;
5956                 }
5957                 if (abcdefgh & 0x40) {
5958                     imm |= 0x3e000000;
5959                 } else {
5960                     imm |= 0x40000000;
5961                 }
5962                 imm |= (imm << 32);
5963             }
5964         }
5965         break;
5966     }
5967
5968     if (cmode_3_1 != 7 && is_neg) {
5969         imm = ~imm;
5970     }
5971
5972     tcg_imm = tcg_const_i64(imm);
5973     tcg_rd = new_tmp_a64(s);
5974
5975     for (i = 0; i < 2; i++) {
5976         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
5977
5978         if (i == 1 && !is_q) {
5979             /* non-quad ops clear high half of vector */
5980             tcg_gen_movi_i64(tcg_rd, 0);
5981         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
5982             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
5983             if (is_neg) {
5984                 /* AND (BIC) */
5985                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
5986             } else {
5987                 /* ORR */
5988                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
5989             }
5990         } else {
5991             /* MOVI */
5992             tcg_gen_mov_i64(tcg_rd, tcg_imm);
5993         }
5994         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
5995     }
5996
5997     tcg_temp_free_i64(tcg_imm);
5998 }
5999
6000 /* C3.6.7 AdvSIMD scalar copy
6001  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6002  * +-----+----+-----------------+------+---+------+---+------+------+
6003  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6004  * +-----+----+-----------------+------+---+------+---+------+------+
6005  */
6006 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6007 {
6008     int rd = extract32(insn, 0, 5);
6009     int rn = extract32(insn, 5, 5);
6010     int imm4 = extract32(insn, 11, 4);
6011     int imm5 = extract32(insn, 16, 5);
6012     int op = extract32(insn, 29, 1);
6013
6014     if (op != 0 || imm4 != 0) {
6015         unallocated_encoding(s);
6016         return;
6017     }
6018
6019     /* DUP (element, scalar) */
6020     handle_simd_dupes(s, rd, rn, imm5);
6021 }
6022
6023 /* C3.6.8 AdvSIMD scalar pairwise
6024  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6025  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6026  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6027  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6028  */
6029 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6030 {
6031     int u = extract32(insn, 29, 1);
6032     int size = extract32(insn, 22, 2);
6033     int opcode = extract32(insn, 12, 5);
6034     int rn = extract32(insn, 5, 5);
6035     int rd = extract32(insn, 0, 5);
6036     TCGv_ptr fpst;
6037
6038     /* For some ops (the FP ones), size[1] is part of the encoding.
6039      * For ADDP strictly it is not but size[1] is always 1 for valid
6040      * encodings.
6041      */
6042     opcode |= (extract32(size, 1, 1) << 5);
6043
6044     switch (opcode) {
6045     case 0x3b: /* ADDP */
6046         if (u || size != 3) {
6047             unallocated_encoding(s);
6048             return;
6049         }
6050         if (!fp_access_check(s)) {
6051             return;
6052         }
6053
6054         TCGV_UNUSED_PTR(fpst);
6055         break;
6056     case 0xc: /* FMAXNMP */
6057     case 0xd: /* FADDP */
6058     case 0xf: /* FMAXP */
6059     case 0x2c: /* FMINNMP */
6060     case 0x2f: /* FMINP */
6061         /* FP op, size[0] is 32 or 64 bit */
6062         if (!u) {
6063             unallocated_encoding(s);
6064             return;
6065         }
6066         if (!fp_access_check(s)) {
6067             return;
6068         }
6069
6070         size = extract32(size, 0, 1) ? 3 : 2;
6071         fpst = get_fpstatus_ptr();
6072         break;
6073     default:
6074         unallocated_encoding(s);
6075         return;
6076     }
6077
6078     if (size == 3) {
6079         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6080         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6081         TCGv_i64 tcg_res = tcg_temp_new_i64();
6082
6083         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6084         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6085
6086         switch (opcode) {
6087         case 0x3b: /* ADDP */
6088             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6089             break;
6090         case 0xc: /* FMAXNMP */
6091             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6092             break;
6093         case 0xd: /* FADDP */
6094             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6095             break;
6096         case 0xf: /* FMAXP */
6097             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6098             break;
6099         case 0x2c: /* FMINNMP */
6100             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6101             break;
6102         case 0x2f: /* FMINP */
6103             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6104             break;
6105         default:
6106             g_assert_not_reached();
6107         }
6108
6109         write_fp_dreg(s, rd, tcg_res);
6110
6111         tcg_temp_free_i64(tcg_op1);
6112         tcg_temp_free_i64(tcg_op2);
6113         tcg_temp_free_i64(tcg_res);
6114     } else {
6115         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6116         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6117         TCGv_i32 tcg_res = tcg_temp_new_i32();
6118
6119         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6120         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6121
6122         switch (opcode) {
6123         case 0xc: /* FMAXNMP */
6124             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6125             break;
6126         case 0xd: /* FADDP */
6127             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6128             break;
6129         case 0xf: /* FMAXP */
6130             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6131             break;
6132         case 0x2c: /* FMINNMP */
6133             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6134             break;
6135         case 0x2f: /* FMINP */
6136             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6137             break;
6138         default:
6139             g_assert_not_reached();
6140         }
6141
6142         write_fp_sreg(s, rd, tcg_res);
6143
6144         tcg_temp_free_i32(tcg_op1);
6145         tcg_temp_free_i32(tcg_op2);
6146         tcg_temp_free_i32(tcg_res);
6147     }
6148
6149     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6150         tcg_temp_free_ptr(fpst);
6151     }
6152 }
6153
6154 /*
6155  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6156  *
6157  * This code is handles the common shifting code and is used by both
6158  * the vector and scalar code.
6159  */
6160 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6161                                     TCGv_i64 tcg_rnd, bool accumulate,
6162                                     bool is_u, int size, int shift)
6163 {
6164     bool extended_result = false;
6165     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6166     int ext_lshift = 0;
6167     TCGv_i64 tcg_src_hi;
6168
6169     if (round && size == 3) {
6170         extended_result = true;
6171         ext_lshift = 64 - shift;
6172         tcg_src_hi = tcg_temp_new_i64();
6173     } else if (shift == 64) {
6174         if (!accumulate && is_u) {
6175             /* result is zero */
6176             tcg_gen_movi_i64(tcg_res, 0);
6177             return;
6178         }
6179     }
6180
6181     /* Deal with the rounding step */
6182     if (round) {
6183         if (extended_result) {
6184             TCGv_i64 tcg_zero = tcg_const_i64(0);
6185             if (!is_u) {
6186                 /* take care of sign extending tcg_res */
6187                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6188                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6189                                  tcg_src, tcg_src_hi,
6190                                  tcg_rnd, tcg_zero);
6191             } else {
6192                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6193                                  tcg_src, tcg_zero,
6194                                  tcg_rnd, tcg_zero);
6195             }
6196             tcg_temp_free_i64(tcg_zero);
6197         } else {
6198             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6199         }
6200     }
6201
6202     /* Now do the shift right */
6203     if (round && extended_result) {
6204         /* extended case, >64 bit precision required */
6205         if (ext_lshift == 0) {
6206             /* special case, only high bits matter */
6207             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6208         } else {
6209             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6210             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6211             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6212         }
6213     } else {
6214         if (is_u) {
6215             if (shift == 64) {
6216                 /* essentially shifting in 64 zeros */
6217                 tcg_gen_movi_i64(tcg_src, 0);
6218             } else {
6219                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6220             }
6221         } else {
6222             if (shift == 64) {
6223                 /* effectively extending the sign-bit */
6224                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6225             } else {
6226                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6227             }
6228         }
6229     }
6230
6231     if (accumulate) {
6232         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6233     } else {
6234         tcg_gen_mov_i64(tcg_res, tcg_src);
6235     }
6236
6237     if (extended_result) {
6238         tcg_temp_free_i64(tcg_src_hi);
6239     }
6240 }
6241
6242 /* Common SHL/SLI - Shift left with an optional insert */
6243 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6244                                  bool insert, int shift)
6245 {
6246     if (insert) { /* SLI */
6247         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6248     } else { /* SHL */
6249         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6250     }
6251 }
6252
6253 /* SRI: shift right with insert */
6254 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6255                                  int size, int shift)
6256 {
6257     int esize = 8 << size;
6258
6259     /* shift count same as element size is valid but does nothing;
6260      * special case to avoid potential shift by 64.
6261      */
6262     if (shift != esize) {
6263         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6264         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6265     }
6266 }
6267
6268 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6269 static void handle_scalar_simd_shri(DisasContext *s,
6270                                     bool is_u, int immh, int immb,
6271                                     int opcode, int rn, int rd)
6272 {
6273     const int size = 3;
6274     int immhb = immh << 3 | immb;
6275     int shift = 2 * (8 << size) - immhb;
6276     bool accumulate = false;
6277     bool round = false;
6278     bool insert = false;
6279     TCGv_i64 tcg_rn;
6280     TCGv_i64 tcg_rd;
6281     TCGv_i64 tcg_round;
6282
6283     if (!extract32(immh, 3, 1)) {
6284         unallocated_encoding(s);
6285         return;
6286     }
6287
6288     if (!fp_access_check(s)) {
6289         return;
6290     }
6291
6292     switch (opcode) {
6293     case 0x02: /* SSRA / USRA (accumulate) */
6294         accumulate = true;
6295         break;
6296     case 0x04: /* SRSHR / URSHR (rounding) */
6297         round = true;
6298         break;
6299     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6300         accumulate = round = true;
6301         break;
6302     case 0x08: /* SRI */
6303         insert = true;
6304         break;
6305     }
6306
6307     if (round) {
6308         uint64_t round_const = 1ULL << (shift - 1);
6309         tcg_round = tcg_const_i64(round_const);
6310     } else {
6311         TCGV_UNUSED_I64(tcg_round);
6312     }
6313
6314     tcg_rn = read_fp_dreg(s, rn);
6315     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6316
6317     if (insert) {
6318         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6319     } else {
6320         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6321                                 accumulate, is_u, size, shift);
6322     }
6323
6324     write_fp_dreg(s, rd, tcg_rd);
6325
6326     tcg_temp_free_i64(tcg_rn);
6327     tcg_temp_free_i64(tcg_rd);
6328     if (round) {
6329         tcg_temp_free_i64(tcg_round);
6330     }
6331 }
6332
6333 /* SHL/SLI - Scalar shift left */
6334 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6335                                     int immh, int immb, int opcode,
6336                                     int rn, int rd)
6337 {
6338     int size = 32 - clz32(immh) - 1;
6339     int immhb = immh << 3 | immb;
6340     int shift = immhb - (8 << size);
6341     TCGv_i64 tcg_rn = new_tmp_a64(s);
6342     TCGv_i64 tcg_rd = new_tmp_a64(s);
6343
6344     if (!extract32(immh, 3, 1)) {
6345         unallocated_encoding(s);
6346         return;
6347     }
6348
6349     if (!fp_access_check(s)) {
6350         return;
6351     }
6352
6353     tcg_rn = read_fp_dreg(s, rn);
6354     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6355
6356     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6357
6358     write_fp_dreg(s, rd, tcg_rd);
6359
6360     tcg_temp_free_i64(tcg_rn);
6361     tcg_temp_free_i64(tcg_rd);
6362 }
6363
6364 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6365  * (signed/unsigned) narrowing */
6366 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6367                                    bool is_u_shift, bool is_u_narrow,
6368                                    int immh, int immb, int opcode,
6369                                    int rn, int rd)
6370 {
6371     int immhb = immh << 3 | immb;
6372     int size = 32 - clz32(immh) - 1;
6373     int esize = 8 << size;
6374     int shift = (2 * esize) - immhb;
6375     int elements = is_scalar ? 1 : (64 / esize);
6376     bool round = extract32(opcode, 0, 1);
6377     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6378     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6379     TCGv_i32 tcg_rd_narrowed;
6380     TCGv_i64 tcg_final;
6381
6382     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6383         { gen_helper_neon_narrow_sat_s8,
6384           gen_helper_neon_unarrow_sat8 },
6385         { gen_helper_neon_narrow_sat_s16,
6386           gen_helper_neon_unarrow_sat16 },
6387         { gen_helper_neon_narrow_sat_s32,
6388           gen_helper_neon_unarrow_sat32 },
6389         { NULL, NULL },
6390     };
6391     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6392         gen_helper_neon_narrow_sat_u8,
6393         gen_helper_neon_narrow_sat_u16,
6394         gen_helper_neon_narrow_sat_u32,
6395         NULL
6396     };
6397     NeonGenNarrowEnvFn *narrowfn;
6398
6399     int i;
6400
6401     assert(size < 4);
6402
6403     if (extract32(immh, 3, 1)) {
6404         unallocated_encoding(s);
6405         return;
6406     }
6407
6408     if (!fp_access_check(s)) {
6409         return;
6410     }
6411
6412     if (is_u_shift) {
6413         narrowfn = unsigned_narrow_fns[size];
6414     } else {
6415         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6416     }
6417
6418     tcg_rn = tcg_temp_new_i64();
6419     tcg_rd = tcg_temp_new_i64();
6420     tcg_rd_narrowed = tcg_temp_new_i32();
6421     tcg_final = tcg_const_i64(0);
6422
6423     if (round) {
6424         uint64_t round_const = 1ULL << (shift - 1);
6425         tcg_round = tcg_const_i64(round_const);
6426     } else {
6427         TCGV_UNUSED_I64(tcg_round);
6428     }
6429
6430     for (i = 0; i < elements; i++) {
6431         read_vec_element(s, tcg_rn, rn, i, ldop);
6432         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6433                                 false, is_u_shift, size+1, shift);
6434         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6435         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6436         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6437     }
6438
6439     if (!is_q) {
6440         clear_vec_high(s, rd);
6441         write_vec_element(s, tcg_final, rd, 0, MO_64);
6442     } else {
6443         write_vec_element(s, tcg_final, rd, 1, MO_64);
6444     }
6445
6446     if (round) {
6447         tcg_temp_free_i64(tcg_round);
6448     }
6449     tcg_temp_free_i64(tcg_rn);
6450     tcg_temp_free_i64(tcg_rd);
6451     tcg_temp_free_i32(tcg_rd_narrowed);
6452     tcg_temp_free_i64(tcg_final);
6453     return;
6454 }
6455
6456 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6457 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6458                              bool src_unsigned, bool dst_unsigned,
6459                              int immh, int immb, int rn, int rd)
6460 {
6461     int immhb = immh << 3 | immb;
6462     int size = 32 - clz32(immh) - 1;
6463     int shift = immhb - (8 << size);
6464     int pass;
6465
6466     assert(immh != 0);
6467     assert(!(scalar && is_q));
6468
6469     if (!scalar) {
6470         if (!is_q && extract32(immh, 3, 1)) {
6471             unallocated_encoding(s);
6472             return;
6473         }
6474
6475         /* Since we use the variable-shift helpers we must
6476          * replicate the shift count into each element of
6477          * the tcg_shift value.
6478          */
6479         switch (size) {
6480         case 0:
6481             shift |= shift << 8;
6482             /* fall through */
6483         case 1:
6484             shift |= shift << 16;
6485             break;
6486         case 2:
6487         case 3:
6488             break;
6489         default:
6490             g_assert_not_reached();
6491         }
6492     }
6493
6494     if (!fp_access_check(s)) {
6495         return;
6496     }
6497
6498     if (size == 3) {
6499         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6500         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6501             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6502             { NULL, gen_helper_neon_qshl_u64 },
6503         };
6504         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6505         int maxpass = is_q ? 2 : 1;
6506
6507         for (pass = 0; pass < maxpass; pass++) {
6508             TCGv_i64 tcg_op = tcg_temp_new_i64();
6509
6510             read_vec_element(s, tcg_op, rn, pass, MO_64);
6511             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6512             write_vec_element(s, tcg_op, rd, pass, MO_64);
6513
6514             tcg_temp_free_i64(tcg_op);
6515         }
6516         tcg_temp_free_i64(tcg_shift);
6517
6518         if (!is_q) {
6519             clear_vec_high(s, rd);
6520         }
6521     } else {
6522         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6523         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6524             {
6525                 { gen_helper_neon_qshl_s8,
6526                   gen_helper_neon_qshl_s16,
6527                   gen_helper_neon_qshl_s32 },
6528                 { gen_helper_neon_qshlu_s8,
6529                   gen_helper_neon_qshlu_s16,
6530                   gen_helper_neon_qshlu_s32 }
6531             }, {
6532                 { NULL, NULL, NULL },
6533                 { gen_helper_neon_qshl_u8,
6534                   gen_helper_neon_qshl_u16,
6535                   gen_helper_neon_qshl_u32 }
6536             }
6537         };
6538         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6539         TCGMemOp memop = scalar ? size : MO_32;
6540         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6541
6542         for (pass = 0; pass < maxpass; pass++) {
6543             TCGv_i32 tcg_op = tcg_temp_new_i32();
6544
6545             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6546             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6547             if (scalar) {
6548                 switch (size) {
6549                 case 0:
6550                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6551                     break;
6552                 case 1:
6553                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6554                     break;
6555                 case 2:
6556                     break;
6557                 default:
6558                     g_assert_not_reached();
6559                 }
6560                 write_fp_sreg(s, rd, tcg_op);
6561             } else {
6562                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6563             }
6564
6565             tcg_temp_free_i32(tcg_op);
6566         }
6567         tcg_temp_free_i32(tcg_shift);
6568
6569         if (!is_q && !scalar) {
6570             clear_vec_high(s, rd);
6571         }
6572     }
6573 }
6574
6575 /* Common vector code for handling integer to FP conversion */
6576 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6577                                    int elements, int is_signed,
6578                                    int fracbits, int size)
6579 {
6580     bool is_double = size == 3 ? true : false;
6581     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6582     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6583     TCGv_i64 tcg_int = tcg_temp_new_i64();
6584     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6585     int pass;
6586
6587     for (pass = 0; pass < elements; pass++) {
6588         read_vec_element(s, tcg_int, rn, pass, mop);
6589
6590         if (is_double) {
6591             TCGv_i64 tcg_double = tcg_temp_new_i64();
6592             if (is_signed) {
6593                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6594                                      tcg_shift, tcg_fpst);
6595             } else {
6596                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6597                                      tcg_shift, tcg_fpst);
6598             }
6599             if (elements == 1) {
6600                 write_fp_dreg(s, rd, tcg_double);
6601             } else {
6602                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6603             }
6604             tcg_temp_free_i64(tcg_double);
6605         } else {
6606             TCGv_i32 tcg_single = tcg_temp_new_i32();
6607             if (is_signed) {
6608                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6609                                      tcg_shift, tcg_fpst);
6610             } else {
6611                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6612                                      tcg_shift, tcg_fpst);
6613             }
6614             if (elements == 1) {
6615                 write_fp_sreg(s, rd, tcg_single);
6616             } else {
6617                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6618             }
6619             tcg_temp_free_i32(tcg_single);
6620         }
6621     }
6622
6623     if (!is_double && elements == 2) {
6624         clear_vec_high(s, rd);
6625     }
6626
6627     tcg_temp_free_i64(tcg_int);
6628     tcg_temp_free_ptr(tcg_fpst);
6629     tcg_temp_free_i32(tcg_shift);
6630 }
6631
6632 /* UCVTF/SCVTF - Integer to FP conversion */
6633 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6634                                          bool is_q, bool is_u,
6635                                          int immh, int immb, int opcode,
6636                                          int rn, int rd)
6637 {
6638     bool is_double = extract32(immh, 3, 1);
6639     int size = is_double ? MO_64 : MO_32;
6640     int elements;
6641     int immhb = immh << 3 | immb;
6642     int fracbits = (is_double ? 128 : 64) - immhb;
6643
6644     if (!extract32(immh, 2, 2)) {
6645         unallocated_encoding(s);
6646         return;
6647     }
6648
6649     if (is_scalar) {
6650         elements = 1;
6651     } else {
6652         elements = is_double ? 2 : is_q ? 4 : 2;
6653         if (is_double && !is_q) {
6654             unallocated_encoding(s);
6655             return;
6656         }
6657     }
6658
6659     if (!fp_access_check(s)) {
6660         return;
6661     }
6662
6663     /* immh == 0 would be a failure of the decode logic */
6664     g_assert(immh);
6665
6666     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6667 }
6668
6669 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6670 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6671                                          bool is_q, bool is_u,
6672                                          int immh, int immb, int rn, int rd)
6673 {
6674     bool is_double = extract32(immh, 3, 1);
6675     int immhb = immh << 3 | immb;
6676     int fracbits = (is_double ? 128 : 64) - immhb;
6677     int pass;
6678     TCGv_ptr tcg_fpstatus;
6679     TCGv_i32 tcg_rmode, tcg_shift;
6680
6681     if (!extract32(immh, 2, 2)) {
6682         unallocated_encoding(s);
6683         return;
6684     }
6685
6686     if (!is_scalar && !is_q && is_double) {
6687         unallocated_encoding(s);
6688         return;
6689     }
6690
6691     if (!fp_access_check(s)) {
6692         return;
6693     }
6694
6695     assert(!(is_scalar && is_q));
6696
6697     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6698     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6699     tcg_fpstatus = get_fpstatus_ptr();
6700     tcg_shift = tcg_const_i32(fracbits);
6701
6702     if (is_double) {
6703         int maxpass = is_scalar ? 1 : 2;
6704
6705         for (pass = 0; pass < maxpass; pass++) {
6706             TCGv_i64 tcg_op = tcg_temp_new_i64();
6707
6708             read_vec_element(s, tcg_op, rn, pass, MO_64);
6709             if (is_u) {
6710                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6711             } else {
6712                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6713             }
6714             write_vec_element(s, tcg_op, rd, pass, MO_64);
6715             tcg_temp_free_i64(tcg_op);
6716         }
6717         if (!is_q) {
6718             clear_vec_high(s, rd);
6719         }
6720     } else {
6721         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6722         for (pass = 0; pass < maxpass; pass++) {
6723             TCGv_i32 tcg_op = tcg_temp_new_i32();
6724
6725             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6726             if (is_u) {
6727                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6728             } else {
6729                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6730             }
6731             if (is_scalar) {
6732                 write_fp_sreg(s, rd, tcg_op);
6733             } else {
6734                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6735             }
6736             tcg_temp_free_i32(tcg_op);
6737         }
6738         if (!is_q && !is_scalar) {
6739             clear_vec_high(s, rd);
6740         }
6741     }
6742
6743     tcg_temp_free_ptr(tcg_fpstatus);
6744     tcg_temp_free_i32(tcg_shift);
6745     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6746     tcg_temp_free_i32(tcg_rmode);
6747 }
6748
6749 /* C3.6.9 AdvSIMD scalar shift by immediate
6750  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6751  * +-----+---+-------------+------+------+--------+---+------+------+
6752  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6753  * +-----+---+-------------+------+------+--------+---+------+------+
6754  *
6755  * This is the scalar version so it works on a fixed sized registers
6756  */
6757 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6758 {
6759     int rd = extract32(insn, 0, 5);
6760     int rn = extract32(insn, 5, 5);
6761     int opcode = extract32(insn, 11, 5);
6762     int immb = extract32(insn, 16, 3);
6763     int immh = extract32(insn, 19, 4);
6764     bool is_u = extract32(insn, 29, 1);
6765
6766     if (immh == 0) {
6767         unallocated_encoding(s);
6768         return;
6769     }
6770
6771     switch (opcode) {
6772     case 0x08: /* SRI */
6773         if (!is_u) {
6774             unallocated_encoding(s);
6775             return;
6776         }
6777         /* fall through */
6778     case 0x00: /* SSHR / USHR */
6779     case 0x02: /* SSRA / USRA */
6780     case 0x04: /* SRSHR / URSHR */
6781     case 0x06: /* SRSRA / URSRA */
6782         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6783         break;
6784     case 0x0a: /* SHL / SLI */
6785         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6786         break;
6787     case 0x1c: /* SCVTF, UCVTF */
6788         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6789                                      opcode, rn, rd);
6790         break;
6791     case 0x10: /* SQSHRUN, SQSHRUN2 */
6792     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6793         if (!is_u) {
6794             unallocated_encoding(s);
6795             return;
6796         }
6797         handle_vec_simd_sqshrn(s, true, false, false, true,
6798                                immh, immb, opcode, rn, rd);
6799         break;
6800     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6801     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6802         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6803                                immh, immb, opcode, rn, rd);
6804         break;
6805     case 0xc: /* SQSHLU */
6806         if (!is_u) {
6807             unallocated_encoding(s);
6808             return;
6809         }
6810         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6811         break;
6812     case 0xe: /* SQSHL, UQSHL */
6813         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6814         break;
6815     case 0x1f: /* FCVTZS, FCVTZU */
6816         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6817         break;
6818     default:
6819         unallocated_encoding(s);
6820         break;
6821     }
6822 }
6823
6824 /* C3.6.10 AdvSIMD scalar three different
6825  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6826  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6827  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6828  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6829  */
6830 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6831 {
6832     bool is_u = extract32(insn, 29, 1);
6833     int size = extract32(insn, 22, 2);
6834     int opcode = extract32(insn, 12, 4);
6835     int rm = extract32(insn, 16, 5);
6836     int rn = extract32(insn, 5, 5);
6837     int rd = extract32(insn, 0, 5);
6838
6839     if (is_u) {
6840         unallocated_encoding(s);
6841         return;
6842     }
6843
6844     switch (opcode) {
6845     case 0x9: /* SQDMLAL, SQDMLAL2 */
6846     case 0xb: /* SQDMLSL, SQDMLSL2 */
6847     case 0xd: /* SQDMULL, SQDMULL2 */
6848         if (size == 0 || size == 3) {
6849             unallocated_encoding(s);
6850             return;
6851         }
6852         break;
6853     default:
6854         unallocated_encoding(s);
6855         return;
6856     }
6857
6858     if (!fp_access_check(s)) {
6859         return;
6860     }
6861
6862     if (size == 2) {
6863         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6864         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6865         TCGv_i64 tcg_res = tcg_temp_new_i64();
6866
6867         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6868         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6869
6870         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6871         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6872
6873         switch (opcode) {
6874         case 0xd: /* SQDMULL, SQDMULL2 */
6875             break;
6876         case 0xb: /* SQDMLSL, SQDMLSL2 */
6877             tcg_gen_neg_i64(tcg_res, tcg_res);
6878             /* fall through */
6879         case 0x9: /* SQDMLAL, SQDMLAL2 */
6880             read_vec_element(s, tcg_op1, rd, 0, MO_64);
6881             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6882                                               tcg_res, tcg_op1);
6883             break;
6884         default:
6885             g_assert_not_reached();
6886         }
6887
6888         write_fp_dreg(s, rd, tcg_res);
6889
6890         tcg_temp_free_i64(tcg_op1);
6891         tcg_temp_free_i64(tcg_op2);
6892         tcg_temp_free_i64(tcg_res);
6893     } else {
6894         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6895         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6896         TCGv_i64 tcg_res = tcg_temp_new_i64();
6897
6898         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6899         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6900
6901         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6902         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6903
6904         switch (opcode) {
6905         case 0xd: /* SQDMULL, SQDMULL2 */
6906             break;
6907         case 0xb: /* SQDMLSL, SQDMLSL2 */
6908             gen_helper_neon_negl_u32(tcg_res, tcg_res);
6909             /* fall through */
6910         case 0x9: /* SQDMLAL, SQDMLAL2 */
6911         {
6912             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6913             read_vec_element(s, tcg_op3, rd, 0, MO_32);
6914             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6915                                               tcg_res, tcg_op3);
6916             tcg_temp_free_i64(tcg_op3);
6917             break;
6918         }
6919         default:
6920             g_assert_not_reached();
6921         }
6922
6923         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6924         write_fp_dreg(s, rd, tcg_res);
6925
6926         tcg_temp_free_i32(tcg_op1);
6927         tcg_temp_free_i32(tcg_op2);
6928         tcg_temp_free_i64(tcg_res);
6929     }
6930 }
6931
6932 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6933                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6934 {
6935     /* Handle 64x64->64 opcodes which are shared between the scalar
6936      * and vector 3-same groups. We cover every opcode where size == 3
6937      * is valid in either the three-reg-same (integer, not pairwise)
6938      * or scalar-three-reg-same groups. (Some opcodes are not yet
6939      * implemented.)
6940      */
6941     TCGCond cond;
6942
6943     switch (opcode) {
6944     case 0x1: /* SQADD */
6945         if (u) {
6946             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6947         } else {
6948             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6949         }
6950         break;
6951     case 0x5: /* SQSUB */
6952         if (u) {
6953             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6954         } else {
6955             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6956         }
6957         break;
6958     case 0x6: /* CMGT, CMHI */
6959         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6960          * We implement this using setcond (test) and then negating.
6961          */
6962         cond = u ? TCG_COND_GTU : TCG_COND_GT;
6963     do_cmop:
6964         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
6965         tcg_gen_neg_i64(tcg_rd, tcg_rd);
6966         break;
6967     case 0x7: /* CMGE, CMHS */
6968         cond = u ? TCG_COND_GEU : TCG_COND_GE;
6969         goto do_cmop;
6970     case 0x11: /* CMTST, CMEQ */
6971         if (u) {
6972             cond = TCG_COND_EQ;
6973             goto do_cmop;
6974         }
6975         /* CMTST : test is "if (X & Y != 0)". */
6976         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
6977         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
6978         tcg_gen_neg_i64(tcg_rd, tcg_rd);
6979         break;
6980     case 0x8: /* SSHL, USHL */
6981         if (u) {
6982             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
6983         } else {
6984             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
6985         }
6986         break;
6987     case 0x9: /* SQSHL, UQSHL */
6988         if (u) {
6989             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6990         } else {
6991             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6992         }
6993         break;
6994     case 0xa: /* SRSHL, URSHL */
6995         if (u) {
6996             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
6997         } else {
6998             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
6999         }
7000         break;
7001     case 0xb: /* SQRSHL, UQRSHL */
7002         if (u) {
7003             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7004         } else {
7005             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7006         }
7007         break;
7008     case 0x10: /* ADD, SUB */
7009         if (u) {
7010             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7011         } else {
7012             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7013         }
7014         break;
7015     default:
7016         g_assert_not_reached();
7017     }
7018 }
7019
7020 /* Handle the 3-same-operands float operations; shared by the scalar
7021  * and vector encodings. The caller must filter out any encodings
7022  * not allocated for the encoding it is dealing with.
7023  */
7024 static void handle_3same_float(DisasContext *s, int size, int elements,
7025                                int fpopcode, int rd, int rn, int rm)
7026 {
7027     int pass;
7028     TCGv_ptr fpst = get_fpstatus_ptr();
7029
7030     for (pass = 0; pass < elements; pass++) {
7031         if (size) {
7032             /* Double */
7033             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7034             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7035             TCGv_i64 tcg_res = tcg_temp_new_i64();
7036
7037             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7038             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7039
7040             switch (fpopcode) {
7041             case 0x39: /* FMLS */
7042                 /* As usual for ARM, separate negation for fused multiply-add */
7043                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7044                 /* fall through */
7045             case 0x19: /* FMLA */
7046                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7047                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7048                                        tcg_res, fpst);
7049                 break;
7050             case 0x18: /* FMAXNM */
7051                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7052                 break;
7053             case 0x1a: /* FADD */
7054                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7055                 break;
7056             case 0x1b: /* FMULX */
7057                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7058                 break;
7059             case 0x1c: /* FCMEQ */
7060                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7061                 break;
7062             case 0x1e: /* FMAX */
7063                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7064                 break;
7065             case 0x1f: /* FRECPS */
7066                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7067                 break;
7068             case 0x38: /* FMINNM */
7069                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7070                 break;
7071             case 0x3a: /* FSUB */
7072                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7073                 break;
7074             case 0x3e: /* FMIN */
7075                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7076                 break;
7077             case 0x3f: /* FRSQRTS */
7078                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7079                 break;
7080             case 0x5b: /* FMUL */
7081                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7082                 break;
7083             case 0x5c: /* FCMGE */
7084                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7085                 break;
7086             case 0x5d: /* FACGE */
7087                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7088                 break;
7089             case 0x5f: /* FDIV */
7090                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7091                 break;
7092             case 0x7a: /* FABD */
7093                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7094                 gen_helper_vfp_absd(tcg_res, tcg_res);
7095                 break;
7096             case 0x7c: /* FCMGT */
7097                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7098                 break;
7099             case 0x7d: /* FACGT */
7100                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7101                 break;
7102             default:
7103                 g_assert_not_reached();
7104             }
7105
7106             write_vec_element(s, tcg_res, rd, pass, MO_64);
7107
7108             tcg_temp_free_i64(tcg_res);
7109             tcg_temp_free_i64(tcg_op1);
7110             tcg_temp_free_i64(tcg_op2);
7111         } else {
7112             /* Single */
7113             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7114             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7115             TCGv_i32 tcg_res = tcg_temp_new_i32();
7116
7117             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7118             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7119
7120             switch (fpopcode) {
7121             case 0x39: /* FMLS */
7122                 /* As usual for ARM, separate negation for fused multiply-add */
7123                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7124                 /* fall through */
7125             case 0x19: /* FMLA */
7126                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7127                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7128                                        tcg_res, fpst);
7129                 break;
7130             case 0x1a: /* FADD */
7131                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7132                 break;
7133             case 0x1b: /* FMULX */
7134                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7135                 break;
7136             case 0x1c: /* FCMEQ */
7137                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7138                 break;
7139             case 0x1e: /* FMAX */
7140                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7141                 break;
7142             case 0x1f: /* FRECPS */
7143                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7144                 break;
7145             case 0x18: /* FMAXNM */
7146                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7147                 break;
7148             case 0x38: /* FMINNM */
7149                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7150                 break;
7151             case 0x3a: /* FSUB */
7152                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7153                 break;
7154             case 0x3e: /* FMIN */
7155                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7156                 break;
7157             case 0x3f: /* FRSQRTS */
7158                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7159                 break;
7160             case 0x5b: /* FMUL */
7161                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7162                 break;
7163             case 0x5c: /* FCMGE */
7164                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7165                 break;
7166             case 0x5d: /* FACGE */
7167                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7168                 break;
7169             case 0x5f: /* FDIV */
7170                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7171                 break;
7172             case 0x7a: /* FABD */
7173                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7174                 gen_helper_vfp_abss(tcg_res, tcg_res);
7175                 break;
7176             case 0x7c: /* FCMGT */
7177                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7178                 break;
7179             case 0x7d: /* FACGT */
7180                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7181                 break;
7182             default:
7183                 g_assert_not_reached();
7184             }
7185
7186             if (elements == 1) {
7187                 /* scalar single so clear high part */
7188                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7189
7190                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7191                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7192                 tcg_temp_free_i64(tcg_tmp);
7193             } else {
7194                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7195             }
7196
7197             tcg_temp_free_i32(tcg_res);
7198             tcg_temp_free_i32(tcg_op1);
7199             tcg_temp_free_i32(tcg_op2);
7200         }
7201     }
7202
7203     tcg_temp_free_ptr(fpst);
7204
7205     if ((elements << size) < 4) {
7206         /* scalar, or non-quad vector op */
7207         clear_vec_high(s, rd);
7208     }
7209 }
7210
7211 /* C3.6.11 AdvSIMD scalar three same
7212  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7213  * +-----+---+-----------+------+---+------+--------+---+------+------+
7214  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7215  * +-----+---+-----------+------+---+------+--------+---+------+------+
7216  */
7217 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7218 {
7219     int rd = extract32(insn, 0, 5);
7220     int rn = extract32(insn, 5, 5);
7221     int opcode = extract32(insn, 11, 5);
7222     int rm = extract32(insn, 16, 5);
7223     int size = extract32(insn, 22, 2);
7224     bool u = extract32(insn, 29, 1);
7225     TCGv_i64 tcg_rd;
7226
7227     if (opcode >= 0x18) {
7228         /* Floating point: U, size[1] and opcode indicate operation */
7229         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7230         switch (fpopcode) {
7231         case 0x1b: /* FMULX */
7232         case 0x1f: /* FRECPS */
7233         case 0x3f: /* FRSQRTS */
7234         case 0x5d: /* FACGE */
7235         case 0x7d: /* FACGT */
7236         case 0x1c: /* FCMEQ */
7237         case 0x5c: /* FCMGE */
7238         case 0x7c: /* FCMGT */
7239         case 0x7a: /* FABD */
7240             break;
7241         default:
7242             unallocated_encoding(s);
7243             return;
7244         }
7245
7246         if (!fp_access_check(s)) {
7247             return;
7248         }
7249
7250         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7251         return;
7252     }
7253
7254     switch (opcode) {
7255     case 0x1: /* SQADD, UQADD */
7256     case 0x5: /* SQSUB, UQSUB */
7257     case 0x9: /* SQSHL, UQSHL */
7258     case 0xb: /* SQRSHL, UQRSHL */
7259         break;
7260     case 0x8: /* SSHL, USHL */
7261     case 0xa: /* SRSHL, URSHL */
7262     case 0x6: /* CMGT, CMHI */
7263     case 0x7: /* CMGE, CMHS */
7264     case 0x11: /* CMTST, CMEQ */
7265     case 0x10: /* ADD, SUB (vector) */
7266         if (size != 3) {
7267             unallocated_encoding(s);
7268             return;
7269         }
7270         break;
7271     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7272         if (size != 1 && size != 2) {
7273             unallocated_encoding(s);
7274             return;
7275         }
7276         break;
7277     default:
7278         unallocated_encoding(s);
7279         return;
7280     }
7281
7282     if (!fp_access_check(s)) {
7283         return;
7284     }
7285
7286     tcg_rd = tcg_temp_new_i64();
7287
7288     if (size == 3) {
7289         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7290         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7291
7292         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7293         tcg_temp_free_i64(tcg_rn);
7294         tcg_temp_free_i64(tcg_rm);
7295     } else {
7296         /* Do a single operation on the lowest element in the vector.
7297          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7298          * no side effects for all these operations.
7299          * OPTME: special-purpose helpers would avoid doing some
7300          * unnecessary work in the helper for the 8 and 16 bit cases.
7301          */
7302         NeonGenTwoOpEnvFn *genenvfn;
7303         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7304         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7305         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7306
7307         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7308         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7309
7310         switch (opcode) {
7311         case 0x1: /* SQADD, UQADD */
7312         {
7313             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7314                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7315                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7316                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7317             };
7318             genenvfn = fns[size][u];
7319             break;
7320         }
7321         case 0x5: /* SQSUB, UQSUB */
7322         {
7323             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7324                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7325                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7326                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7327             };
7328             genenvfn = fns[size][u];
7329             break;
7330         }
7331         case 0x9: /* SQSHL, UQSHL */
7332         {
7333             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7334                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7335                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7336                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7337             };
7338             genenvfn = fns[size][u];
7339             break;
7340         }
7341         case 0xb: /* SQRSHL, UQRSHL */
7342         {
7343             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7344                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7345                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7346                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7347             };
7348             genenvfn = fns[size][u];
7349             break;
7350         }
7351         case 0x16: /* SQDMULH, SQRDMULH */
7352         {
7353             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7354                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7355                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7356             };
7357             assert(size == 1 || size == 2);
7358             genenvfn = fns[size - 1][u];
7359             break;
7360         }
7361         default:
7362             g_assert_not_reached();
7363         }
7364
7365         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7366         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7367         tcg_temp_free_i32(tcg_rd32);
7368         tcg_temp_free_i32(tcg_rn);
7369         tcg_temp_free_i32(tcg_rm);
7370     }
7371
7372     write_fp_dreg(s, rd, tcg_rd);
7373
7374     tcg_temp_free_i64(tcg_rd);
7375 }
7376
7377 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7378                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7379                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7380 {
7381     /* Handle 64->64 opcodes which are shared between the scalar and
7382      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7383      * is valid in either group and also the double-precision fp ops.
7384      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7385      * requires them.
7386      */
7387     TCGCond cond;
7388
7389     switch (opcode) {
7390     case 0x4: /* CLS, CLZ */
7391         if (u) {
7392             gen_helper_clz64(tcg_rd, tcg_rn);
7393         } else {
7394             gen_helper_cls64(tcg_rd, tcg_rn);
7395         }
7396         break;
7397     case 0x5: /* NOT */
7398         /* This opcode is shared with CNT and RBIT but we have earlier
7399          * enforced that size == 3 if and only if this is the NOT insn.
7400          */
7401         tcg_gen_not_i64(tcg_rd, tcg_rn);
7402         break;
7403     case 0x7: /* SQABS, SQNEG */
7404         if (u) {
7405             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7406         } else {
7407             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7408         }
7409         break;
7410     case 0xa: /* CMLT */
7411         /* 64 bit integer comparison against zero, result is
7412          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7413          * subtracting 1.
7414          */
7415         cond = TCG_COND_LT;
7416     do_cmop:
7417         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7418         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7419         break;
7420     case 0x8: /* CMGT, CMGE */
7421         cond = u ? TCG_COND_GE : TCG_COND_GT;
7422         goto do_cmop;
7423     case 0x9: /* CMEQ, CMLE */
7424         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7425         goto do_cmop;
7426     case 0xb: /* ABS, NEG */
7427         if (u) {
7428             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7429         } else {
7430             TCGv_i64 tcg_zero = tcg_const_i64(0);
7431             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7432             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7433                                 tcg_rn, tcg_rd);
7434             tcg_temp_free_i64(tcg_zero);
7435         }
7436         break;
7437     case 0x2f: /* FABS */
7438         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7439         break;
7440     case 0x6f: /* FNEG */
7441         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7442         break;
7443     case 0x7f: /* FSQRT */
7444         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7445         break;
7446     case 0x1a: /* FCVTNS */
7447     case 0x1b: /* FCVTMS */
7448     case 0x1c: /* FCVTAS */
7449     case 0x3a: /* FCVTPS */
7450     case 0x3b: /* FCVTZS */
7451     {
7452         TCGv_i32 tcg_shift = tcg_const_i32(0);
7453         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7454         tcg_temp_free_i32(tcg_shift);
7455         break;
7456     }
7457     case 0x5a: /* FCVTNU */
7458     case 0x5b: /* FCVTMU */
7459     case 0x5c: /* FCVTAU */
7460     case 0x7a: /* FCVTPU */
7461     case 0x7b: /* FCVTZU */
7462     {
7463         TCGv_i32 tcg_shift = tcg_const_i32(0);
7464         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7465         tcg_temp_free_i32(tcg_shift);
7466         break;
7467     }
7468     case 0x18: /* FRINTN */
7469     case 0x19: /* FRINTM */
7470     case 0x38: /* FRINTP */
7471     case 0x39: /* FRINTZ */
7472     case 0x58: /* FRINTA */
7473     case 0x79: /* FRINTI */
7474         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7475         break;
7476     case 0x59: /* FRINTX */
7477         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7478         break;
7479     default:
7480         g_assert_not_reached();
7481     }
7482 }
7483
7484 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7485                                    bool is_scalar, bool is_u, bool is_q,
7486                                    int size, int rn, int rd)
7487 {
7488     bool is_double = (size == 3);
7489     TCGv_ptr fpst;
7490
7491     if (!fp_access_check(s)) {
7492         return;
7493     }
7494
7495     fpst = get_fpstatus_ptr();
7496
7497     if (is_double) {
7498         TCGv_i64 tcg_op = tcg_temp_new_i64();
7499         TCGv_i64 tcg_zero = tcg_const_i64(0);
7500         TCGv_i64 tcg_res = tcg_temp_new_i64();
7501         NeonGenTwoDoubleOPFn *genfn;
7502         bool swap = false;
7503         int pass;
7504
7505         switch (opcode) {
7506         case 0x2e: /* FCMLT (zero) */
7507             swap = true;
7508             /* fallthrough */
7509         case 0x2c: /* FCMGT (zero) */
7510             genfn = gen_helper_neon_cgt_f64;
7511             break;
7512         case 0x2d: /* FCMEQ (zero) */
7513             genfn = gen_helper_neon_ceq_f64;
7514             break;
7515         case 0x6d: /* FCMLE (zero) */
7516             swap = true;
7517             /* fall through */
7518         case 0x6c: /* FCMGE (zero) */
7519             genfn = gen_helper_neon_cge_f64;
7520             break;
7521         default:
7522             g_assert_not_reached();
7523         }
7524
7525         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7526             read_vec_element(s, tcg_op, rn, pass, MO_64);
7527             if (swap) {
7528                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7529             } else {
7530                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7531             }
7532             write_vec_element(s, tcg_res, rd, pass, MO_64);
7533         }
7534         if (is_scalar) {
7535             clear_vec_high(s, rd);
7536         }
7537
7538         tcg_temp_free_i64(tcg_res);
7539         tcg_temp_free_i64(tcg_zero);
7540         tcg_temp_free_i64(tcg_op);
7541     } else {
7542         TCGv_i32 tcg_op = tcg_temp_new_i32();
7543         TCGv_i32 tcg_zero = tcg_const_i32(0);
7544         TCGv_i32 tcg_res = tcg_temp_new_i32();
7545         NeonGenTwoSingleOPFn *genfn;
7546         bool swap = false;
7547         int pass, maxpasses;
7548
7549         switch (opcode) {
7550         case 0x2e: /* FCMLT (zero) */
7551             swap = true;
7552             /* fall through */
7553         case 0x2c: /* FCMGT (zero) */
7554             genfn = gen_helper_neon_cgt_f32;
7555             break;
7556         case 0x2d: /* FCMEQ (zero) */
7557             genfn = gen_helper_neon_ceq_f32;
7558             break;
7559         case 0x6d: /* FCMLE (zero) */
7560             swap = true;
7561             /* fall through */
7562         case 0x6c: /* FCMGE (zero) */
7563             genfn = gen_helper_neon_cge_f32;
7564             break;
7565         default:
7566             g_assert_not_reached();
7567         }
7568
7569         if (is_scalar) {
7570             maxpasses = 1;
7571         } else {
7572             maxpasses = is_q ? 4 : 2;
7573         }
7574
7575         for (pass = 0; pass < maxpasses; pass++) {
7576             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7577             if (swap) {
7578                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7579             } else {
7580                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7581             }
7582             if (is_scalar) {
7583                 write_fp_sreg(s, rd, tcg_res);
7584             } else {
7585                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7586             }
7587         }
7588         tcg_temp_free_i32(tcg_res);
7589         tcg_temp_free_i32(tcg_zero);
7590         tcg_temp_free_i32(tcg_op);
7591         if (!is_q && !is_scalar) {
7592             clear_vec_high(s, rd);
7593         }
7594     }
7595
7596     tcg_temp_free_ptr(fpst);
7597 }
7598
7599 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7600                                     bool is_scalar, bool is_u, bool is_q,
7601                                     int size, int rn, int rd)
7602 {
7603     bool is_double = (size == 3);
7604     TCGv_ptr fpst = get_fpstatus_ptr();
7605
7606     if (is_double) {
7607         TCGv_i64 tcg_op = tcg_temp_new_i64();
7608         TCGv_i64 tcg_res = tcg_temp_new_i64();
7609         int pass;
7610
7611         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7612             read_vec_element(s, tcg_op, rn, pass, MO_64);
7613             switch (opcode) {
7614             case 0x3d: /* FRECPE */
7615                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7616                 break;
7617             case 0x3f: /* FRECPX */
7618                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7619                 break;
7620             case 0x7d: /* FRSQRTE */
7621                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7622                 break;
7623             default:
7624                 g_assert_not_reached();
7625             }
7626             write_vec_element(s, tcg_res, rd, pass, MO_64);
7627         }
7628         if (is_scalar) {
7629             clear_vec_high(s, rd);
7630         }
7631
7632         tcg_temp_free_i64(tcg_res);
7633         tcg_temp_free_i64(tcg_op);
7634     } else {
7635         TCGv_i32 tcg_op = tcg_temp_new_i32();
7636         TCGv_i32 tcg_res = tcg_temp_new_i32();
7637         int pass, maxpasses;
7638
7639         if (is_scalar) {
7640             maxpasses = 1;
7641         } else {
7642             maxpasses = is_q ? 4 : 2;
7643         }
7644
7645         for (pass = 0; pass < maxpasses; pass++) {
7646             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7647
7648             switch (opcode) {
7649             case 0x3c: /* URECPE */
7650                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7651                 break;
7652             case 0x3d: /* FRECPE */
7653                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7654                 break;
7655             case 0x3f: /* FRECPX */
7656                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7657                 break;
7658             case 0x7d: /* FRSQRTE */
7659                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7660                 break;
7661             default:
7662                 g_assert_not_reached();
7663             }
7664
7665             if (is_scalar) {
7666                 write_fp_sreg(s, rd, tcg_res);
7667             } else {
7668                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7669             }
7670         }
7671         tcg_temp_free_i32(tcg_res);
7672         tcg_temp_free_i32(tcg_op);
7673         if (!is_q && !is_scalar) {
7674             clear_vec_high(s, rd);
7675         }
7676     }
7677     tcg_temp_free_ptr(fpst);
7678 }
7679
7680 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7681                                 int opcode, bool u, bool is_q,
7682                                 int size, int rn, int rd)
7683 {
7684     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7685      * in the source becomes a size element in the destination).
7686      */
7687     int pass;
7688     TCGv_i32 tcg_res[2];
7689     int destelt = is_q ? 2 : 0;
7690     int passes = scalar ? 1 : 2;
7691
7692     if (scalar) {
7693         tcg_res[1] = tcg_const_i32(0);
7694     }
7695
7696     for (pass = 0; pass < passes; pass++) {
7697         TCGv_i64 tcg_op = tcg_temp_new_i64();
7698         NeonGenNarrowFn *genfn = NULL;
7699         NeonGenNarrowEnvFn *genenvfn = NULL;
7700
7701         if (scalar) {
7702             read_vec_element(s, tcg_op, rn, pass, size + 1);
7703         } else {
7704             read_vec_element(s, tcg_op, rn, pass, MO_64);
7705         }
7706         tcg_res[pass] = tcg_temp_new_i32();
7707
7708         switch (opcode) {
7709         case 0x12: /* XTN, SQXTUN */
7710         {
7711             static NeonGenNarrowFn * const xtnfns[3] = {
7712                 gen_helper_neon_narrow_u8,
7713                 gen_helper_neon_narrow_u16,
7714                 tcg_gen_extrl_i64_i32,
7715             };
7716             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7717                 gen_helper_neon_unarrow_sat8,
7718                 gen_helper_neon_unarrow_sat16,
7719                 gen_helper_neon_unarrow_sat32,
7720             };
7721             if (u) {
7722                 genenvfn = sqxtunfns[size];
7723             } else {
7724                 genfn = xtnfns[size];
7725             }
7726             break;
7727         }
7728         case 0x14: /* SQXTN, UQXTN */
7729         {
7730             static NeonGenNarrowEnvFn * const fns[3][2] = {
7731                 { gen_helper_neon_narrow_sat_s8,
7732                   gen_helper_neon_narrow_sat_u8 },
7733                 { gen_helper_neon_narrow_sat_s16,
7734                   gen_helper_neon_narrow_sat_u16 },
7735                 { gen_helper_neon_narrow_sat_s32,
7736                   gen_helper_neon_narrow_sat_u32 },
7737             };
7738             genenvfn = fns[size][u];
7739             break;
7740         }
7741         case 0x16: /* FCVTN, FCVTN2 */
7742             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7743             if (size == 2) {
7744                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7745             } else {
7746                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7747                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7748                 tcg_gen_extrl_i64_i32(tcg_lo, tcg_op);
7749                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7750                 tcg_gen_shri_i64(tcg_op, tcg_op, 32);
7751                 tcg_gen_extrl_i64_i32(tcg_hi, tcg_op);
7752                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7753                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7754                 tcg_temp_free_i32(tcg_lo);
7755                 tcg_temp_free_i32(tcg_hi);
7756             }
7757             break;
7758         case 0x56:  /* FCVTXN, FCVTXN2 */
7759             /* 64 bit to 32 bit float conversion
7760              * with von Neumann rounding (round to odd)
7761              */
7762             assert(size == 2);
7763             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7764             break;
7765         default:
7766             g_assert_not_reached();
7767         }
7768
7769         if (genfn) {
7770             genfn(tcg_res[pass], tcg_op);
7771         } else if (genenvfn) {
7772             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7773         }
7774
7775         tcg_temp_free_i64(tcg_op);
7776     }
7777
7778     for (pass = 0; pass < 2; pass++) {
7779         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7780         tcg_temp_free_i32(tcg_res[pass]);
7781     }
7782     if (!is_q) {
7783         clear_vec_high(s, rd);
7784     }
7785 }
7786
7787 /* Remaining saturating accumulating ops */
7788 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7789                                 bool is_q, int size, int rn, int rd)
7790 {
7791     bool is_double = (size == 3);
7792
7793     if (is_double) {
7794         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7795         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7796         int pass;
7797
7798         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7799             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7800             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7801
7802             if (is_u) { /* USQADD */
7803                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7804             } else { /* SUQADD */
7805                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7806             }
7807             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7808         }
7809         if (is_scalar) {
7810             clear_vec_high(s, rd);
7811         }
7812
7813         tcg_temp_free_i64(tcg_rd);
7814         tcg_temp_free_i64(tcg_rn);
7815     } else {
7816         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7817         TCGv_i32 tcg_rd = tcg_temp_new_i32();
7818         int pass, maxpasses;
7819
7820         if (is_scalar) {
7821             maxpasses = 1;
7822         } else {
7823             maxpasses = is_q ? 4 : 2;
7824         }
7825
7826         for (pass = 0; pass < maxpasses; pass++) {
7827             if (is_scalar) {
7828                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7829                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7830             } else {
7831                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7832                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7833             }
7834
7835             if (is_u) { /* USQADD */
7836                 switch (size) {
7837                 case 0:
7838                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7839                     break;
7840                 case 1:
7841                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7842                     break;
7843                 case 2:
7844                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7845                     break;
7846                 default:
7847                     g_assert_not_reached();
7848                 }
7849             } else { /* SUQADD */
7850                 switch (size) {
7851                 case 0:
7852                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7853                     break;
7854                 case 1:
7855                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7856                     break;
7857                 case 2:
7858                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7859                     break;
7860                 default:
7861                     g_assert_not_reached();
7862                 }
7863             }
7864
7865             if (is_scalar) {
7866                 TCGv_i64 tcg_zero = tcg_const_i64(0);
7867                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7868                 tcg_temp_free_i64(tcg_zero);
7869             }
7870             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7871         }
7872
7873         if (!is_q) {
7874             clear_vec_high(s, rd);
7875         }
7876
7877         tcg_temp_free_i32(tcg_rd);
7878         tcg_temp_free_i32(tcg_rn);
7879     }
7880 }
7881
7882 /* C3.6.12 AdvSIMD scalar two reg misc
7883  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7884  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7885  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7886  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7887  */
7888 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7889 {
7890     int rd = extract32(insn, 0, 5);
7891     int rn = extract32(insn, 5, 5);
7892     int opcode = extract32(insn, 12, 5);
7893     int size = extract32(insn, 22, 2);
7894     bool u = extract32(insn, 29, 1);
7895     bool is_fcvt = false;
7896     int rmode;
7897     TCGv_i32 tcg_rmode;
7898     TCGv_ptr tcg_fpstatus;
7899
7900     switch (opcode) {
7901     case 0x3: /* USQADD / SUQADD*/
7902         if (!fp_access_check(s)) {
7903             return;
7904         }
7905         handle_2misc_satacc(s, true, u, false, size, rn, rd);
7906         return;
7907     case 0x7: /* SQABS / SQNEG */
7908         break;
7909     case 0xa: /* CMLT */
7910         if (u) {
7911             unallocated_encoding(s);
7912             return;
7913         }
7914         /* fall through */
7915     case 0x8: /* CMGT, CMGE */
7916     case 0x9: /* CMEQ, CMLE */
7917     case 0xb: /* ABS, NEG */
7918         if (size != 3) {
7919             unallocated_encoding(s);
7920             return;
7921         }
7922         break;
7923     case 0x12: /* SQXTUN */
7924         if (!u) {
7925             unallocated_encoding(s);
7926             return;
7927         }
7928         /* fall through */
7929     case 0x14: /* SQXTN, UQXTN */
7930         if (size == 3) {
7931             unallocated_encoding(s);
7932             return;
7933         }
7934         if (!fp_access_check(s)) {
7935             return;
7936         }
7937         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7938         return;
7939     case 0xc ... 0xf:
7940     case 0x16 ... 0x1d:
7941     case 0x1f:
7942         /* Floating point: U, size[1] and opcode indicate operation;
7943          * size[0] indicates single or double precision.
7944          */
7945         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7946         size = extract32(size, 0, 1) ? 3 : 2;
7947         switch (opcode) {
7948         case 0x2c: /* FCMGT (zero) */
7949         case 0x2d: /* FCMEQ (zero) */
7950         case 0x2e: /* FCMLT (zero) */
7951         case 0x6c: /* FCMGE (zero) */
7952         case 0x6d: /* FCMLE (zero) */
7953             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7954             return;
7955         case 0x1d: /* SCVTF */
7956         case 0x5d: /* UCVTF */
7957         {
7958             bool is_signed = (opcode == 0x1d);
7959             if (!fp_access_check(s)) {
7960                 return;
7961             }
7962             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7963             return;
7964         }
7965         case 0x3d: /* FRECPE */
7966         case 0x3f: /* FRECPX */
7967         case 0x7d: /* FRSQRTE */
7968             if (!fp_access_check(s)) {
7969                 return;
7970             }
7971             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
7972             return;
7973         case 0x1a: /* FCVTNS */
7974         case 0x1b: /* FCVTMS */
7975         case 0x3a: /* FCVTPS */
7976         case 0x3b: /* FCVTZS */
7977         case 0x5a: /* FCVTNU */
7978         case 0x5b: /* FCVTMU */
7979         case 0x7a: /* FCVTPU */
7980         case 0x7b: /* FCVTZU */
7981             is_fcvt = true;
7982             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
7983             break;
7984         case 0x1c: /* FCVTAS */
7985         case 0x5c: /* FCVTAU */
7986             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
7987             is_fcvt = true;
7988             rmode = FPROUNDING_TIEAWAY;
7989             break;
7990         case 0x56: /* FCVTXN, FCVTXN2 */
7991             if (size == 2) {
7992                 unallocated_encoding(s);
7993                 return;
7994             }
7995             if (!fp_access_check(s)) {
7996                 return;
7997             }
7998             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
7999             return;
8000         default:
8001             unallocated_encoding(s);
8002             return;
8003         }
8004         break;
8005     default:
8006         unallocated_encoding(s);
8007         return;
8008     }
8009
8010     if (!fp_access_check(s)) {
8011         return;
8012     }
8013
8014     if (is_fcvt) {
8015         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8016         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8017         tcg_fpstatus = get_fpstatus_ptr();
8018     } else {
8019         TCGV_UNUSED_I32(tcg_rmode);
8020         TCGV_UNUSED_PTR(tcg_fpstatus);
8021     }
8022
8023     if (size == 3) {
8024         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8025         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8026
8027         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8028         write_fp_dreg(s, rd, tcg_rd);
8029         tcg_temp_free_i64(tcg_rd);
8030         tcg_temp_free_i64(tcg_rn);
8031     } else {
8032         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8033         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8034
8035         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8036
8037         switch (opcode) {
8038         case 0x7: /* SQABS, SQNEG */
8039         {
8040             NeonGenOneOpEnvFn *genfn;
8041             static NeonGenOneOpEnvFn * const fns[3][2] = {
8042                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8043                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8044                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8045             };
8046             genfn = fns[size][u];
8047             genfn(tcg_rd, cpu_env, tcg_rn);
8048             break;
8049         }
8050         case 0x1a: /* FCVTNS */
8051         case 0x1b: /* FCVTMS */
8052         case 0x1c: /* FCVTAS */
8053         case 0x3a: /* FCVTPS */
8054         case 0x3b: /* FCVTZS */
8055         {
8056             TCGv_i32 tcg_shift = tcg_const_i32(0);
8057             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8058             tcg_temp_free_i32(tcg_shift);
8059             break;
8060         }
8061         case 0x5a: /* FCVTNU */
8062         case 0x5b: /* FCVTMU */
8063         case 0x5c: /* FCVTAU */
8064         case 0x7a: /* FCVTPU */
8065         case 0x7b: /* FCVTZU */
8066         {
8067             TCGv_i32 tcg_shift = tcg_const_i32(0);
8068             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8069             tcg_temp_free_i32(tcg_shift);
8070             break;
8071         }
8072         default:
8073             g_assert_not_reached();
8074         }
8075
8076         write_fp_sreg(s, rd, tcg_rd);
8077         tcg_temp_free_i32(tcg_rd);
8078         tcg_temp_free_i32(tcg_rn);
8079     }
8080
8081     if (is_fcvt) {
8082         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8083         tcg_temp_free_i32(tcg_rmode);
8084         tcg_temp_free_ptr(tcg_fpstatus);
8085     }
8086 }
8087
8088 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8089 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8090                                  int immh, int immb, int opcode, int rn, int rd)
8091 {
8092     int size = 32 - clz32(immh) - 1;
8093     int immhb = immh << 3 | immb;
8094     int shift = 2 * (8 << size) - immhb;
8095     bool accumulate = false;
8096     bool round = false;
8097     bool insert = false;
8098     int dsize = is_q ? 128 : 64;
8099     int esize = 8 << size;
8100     int elements = dsize/esize;
8101     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8102     TCGv_i64 tcg_rn = new_tmp_a64(s);
8103     TCGv_i64 tcg_rd = new_tmp_a64(s);
8104     TCGv_i64 tcg_round;
8105     int i;
8106
8107     if (extract32(immh, 3, 1) && !is_q) {
8108         unallocated_encoding(s);
8109         return;
8110     }
8111
8112     if (size > 3 && !is_q) {
8113         unallocated_encoding(s);
8114         return;
8115     }
8116
8117     if (!fp_access_check(s)) {
8118         return;
8119     }
8120
8121     switch (opcode) {
8122     case 0x02: /* SSRA / USRA (accumulate) */
8123         accumulate = true;
8124         break;
8125     case 0x04: /* SRSHR / URSHR (rounding) */
8126         round = true;
8127         break;
8128     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8129         accumulate = round = true;
8130         break;
8131     case 0x08: /* SRI */
8132         insert = true;
8133         break;
8134     }
8135
8136     if (round) {
8137         uint64_t round_const = 1ULL << (shift - 1);
8138         tcg_round = tcg_const_i64(round_const);
8139     } else {
8140         TCGV_UNUSED_I64(tcg_round);
8141     }
8142
8143     for (i = 0; i < elements; i++) {
8144         read_vec_element(s, tcg_rn, rn, i, memop);
8145         if (accumulate || insert) {
8146             read_vec_element(s, tcg_rd, rd, i, memop);
8147         }
8148
8149         if (insert) {
8150             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8151         } else {
8152             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8153                                     accumulate, is_u, size, shift);
8154         }
8155
8156         write_vec_element(s, tcg_rd, rd, i, size);
8157     }
8158
8159     if (!is_q) {
8160         clear_vec_high(s, rd);
8161     }
8162
8163     if (round) {
8164         tcg_temp_free_i64(tcg_round);
8165     }
8166 }
8167
8168 /* SHL/SLI - Vector shift left */
8169 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8170                                 int immh, int immb, int opcode, int rn, int rd)
8171 {
8172     int size = 32 - clz32(immh) - 1;
8173     int immhb = immh << 3 | immb;
8174     int shift = immhb - (8 << size);
8175     int dsize = is_q ? 128 : 64;
8176     int esize = 8 << size;
8177     int elements = dsize/esize;
8178     TCGv_i64 tcg_rn = new_tmp_a64(s);
8179     TCGv_i64 tcg_rd = new_tmp_a64(s);
8180     int i;
8181
8182     if (extract32(immh, 3, 1) && !is_q) {
8183         unallocated_encoding(s);
8184         return;
8185     }
8186
8187     if (size > 3 && !is_q) {
8188         unallocated_encoding(s);
8189         return;
8190     }
8191
8192     if (!fp_access_check(s)) {
8193         return;
8194     }
8195
8196     for (i = 0; i < elements; i++) {
8197         read_vec_element(s, tcg_rn, rn, i, size);
8198         if (insert) {
8199             read_vec_element(s, tcg_rd, rd, i, size);
8200         }
8201
8202         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8203
8204         write_vec_element(s, tcg_rd, rd, i, size);
8205     }
8206
8207     if (!is_q) {
8208         clear_vec_high(s, rd);
8209     }
8210 }
8211
8212 /* USHLL/SHLL - Vector shift left with widening */
8213 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8214                                  int immh, int immb, int opcode, int rn, int rd)
8215 {
8216     int size = 32 - clz32(immh) - 1;
8217     int immhb = immh << 3 | immb;
8218     int shift = immhb - (8 << size);
8219     int dsize = 64;
8220     int esize = 8 << size;
8221     int elements = dsize/esize;
8222     TCGv_i64 tcg_rn = new_tmp_a64(s);
8223     TCGv_i64 tcg_rd = new_tmp_a64(s);
8224     int i;
8225
8226     if (size >= 3) {
8227         unallocated_encoding(s);
8228         return;
8229     }
8230
8231     if (!fp_access_check(s)) {
8232         return;
8233     }
8234
8235     /* For the LL variants the store is larger than the load,
8236      * so if rd == rn we would overwrite parts of our input.
8237      * So load everything right now and use shifts in the main loop.
8238      */
8239     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8240
8241     for (i = 0; i < elements; i++) {
8242         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8243         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8244         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8245         write_vec_element(s, tcg_rd, rd, i, size + 1);
8246     }
8247 }
8248
8249 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8250 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8251                                  int immh, int immb, int opcode, int rn, int rd)
8252 {
8253     int immhb = immh << 3 | immb;
8254     int size = 32 - clz32(immh) - 1;
8255     int dsize = 64;
8256     int esize = 8 << size;
8257     int elements = dsize/esize;
8258     int shift = (2 * esize) - immhb;
8259     bool round = extract32(opcode, 0, 1);
8260     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8261     TCGv_i64 tcg_round;
8262     int i;
8263
8264     if (extract32(immh, 3, 1)) {
8265         unallocated_encoding(s);
8266         return;
8267     }
8268
8269     if (!fp_access_check(s)) {
8270         return;
8271     }
8272
8273     tcg_rn = tcg_temp_new_i64();
8274     tcg_rd = tcg_temp_new_i64();
8275     tcg_final = tcg_temp_new_i64();
8276     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8277
8278     if (round) {
8279         uint64_t round_const = 1ULL << (shift - 1);
8280         tcg_round = tcg_const_i64(round_const);
8281     } else {
8282         TCGV_UNUSED_I64(tcg_round);
8283     }
8284
8285     for (i = 0; i < elements; i++) {
8286         read_vec_element(s, tcg_rn, rn, i, size+1);
8287         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8288                                 false, true, size+1, shift);
8289
8290         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8291     }
8292
8293     if (!is_q) {
8294         clear_vec_high(s, rd);
8295         write_vec_element(s, tcg_final, rd, 0, MO_64);
8296     } else {
8297         write_vec_element(s, tcg_final, rd, 1, MO_64);
8298     }
8299
8300     if (round) {
8301         tcg_temp_free_i64(tcg_round);
8302     }
8303     tcg_temp_free_i64(tcg_rn);
8304     tcg_temp_free_i64(tcg_rd);
8305     tcg_temp_free_i64(tcg_final);
8306     return;
8307 }
8308
8309
8310 /* C3.6.14 AdvSIMD shift by immediate
8311  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8312  * +---+---+---+-------------+------+------+--------+---+------+------+
8313  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8314  * +---+---+---+-------------+------+------+--------+---+------+------+
8315  */
8316 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8317 {
8318     int rd = extract32(insn, 0, 5);
8319     int rn = extract32(insn, 5, 5);
8320     int opcode = extract32(insn, 11, 5);
8321     int immb = extract32(insn, 16, 3);
8322     int immh = extract32(insn, 19, 4);
8323     bool is_u = extract32(insn, 29, 1);
8324     bool is_q = extract32(insn, 30, 1);
8325
8326     switch (opcode) {
8327     case 0x08: /* SRI */
8328         if (!is_u) {
8329             unallocated_encoding(s);
8330             return;
8331         }
8332         /* fall through */
8333     case 0x00: /* SSHR / USHR */
8334     case 0x02: /* SSRA / USRA (accumulate) */
8335     case 0x04: /* SRSHR / URSHR (rounding) */
8336     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8337         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8338         break;
8339     case 0x0a: /* SHL / SLI */
8340         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8341         break;
8342     case 0x10: /* SHRN */
8343     case 0x11: /* RSHRN / SQRSHRUN */
8344         if (is_u) {
8345             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8346                                    opcode, rn, rd);
8347         } else {
8348             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8349         }
8350         break;
8351     case 0x12: /* SQSHRN / UQSHRN */
8352     case 0x13: /* SQRSHRN / UQRSHRN */
8353         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8354                                opcode, rn, rd);
8355         break;
8356     case 0x14: /* SSHLL / USHLL */
8357         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8358         break;
8359     case 0x1c: /* SCVTF / UCVTF */
8360         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8361                                      opcode, rn, rd);
8362         break;
8363     case 0xc: /* SQSHLU */
8364         if (!is_u) {
8365             unallocated_encoding(s);
8366             return;
8367         }
8368         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8369         break;
8370     case 0xe: /* SQSHL, UQSHL */
8371         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8372         break;
8373     case 0x1f: /* FCVTZS/ FCVTZU */
8374         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8375         return;
8376     default:
8377         unallocated_encoding(s);
8378         return;
8379     }
8380 }
8381
8382 /* Generate code to do a "long" addition or subtraction, ie one done in
8383  * TCGv_i64 on vector lanes twice the width specified by size.
8384  */
8385 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8386                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8387 {
8388     static NeonGenTwo64OpFn * const fns[3][2] = {
8389         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8390         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8391         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8392     };
8393     NeonGenTwo64OpFn *genfn;
8394     assert(size < 3);
8395
8396     genfn = fns[size][is_sub];
8397     genfn(tcg_res, tcg_op1, tcg_op2);
8398 }
8399
8400 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8401                                 int opcode, int rd, int rn, int rm)
8402 {
8403     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8404     TCGv_i64 tcg_res[2];
8405     int pass, accop;
8406
8407     tcg_res[0] = tcg_temp_new_i64();
8408     tcg_res[1] = tcg_temp_new_i64();
8409
8410     /* Does this op do an adding accumulate, a subtracting accumulate,
8411      * or no accumulate at all?
8412      */
8413     switch (opcode) {
8414     case 5:
8415     case 8:
8416     case 9:
8417         accop = 1;
8418         break;
8419     case 10:
8420     case 11:
8421         accop = -1;
8422         break;
8423     default:
8424         accop = 0;
8425         break;
8426     }
8427
8428     if (accop != 0) {
8429         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8430         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8431     }
8432
8433     /* size == 2 means two 32x32->64 operations; this is worth special
8434      * casing because we can generally handle it inline.
8435      */
8436     if (size == 2) {
8437         for (pass = 0; pass < 2; pass++) {
8438             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8439             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8440             TCGv_i64 tcg_passres;
8441             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8442
8443             int elt = pass + is_q * 2;
8444
8445             read_vec_element(s, tcg_op1, rn, elt, memop);
8446             read_vec_element(s, tcg_op2, rm, elt, memop);
8447
8448             if (accop == 0) {
8449                 tcg_passres = tcg_res[pass];
8450             } else {
8451                 tcg_passres = tcg_temp_new_i64();
8452             }
8453
8454             switch (opcode) {
8455             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8456                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8457                 break;
8458             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8459                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8460                 break;
8461             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8462             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8463             {
8464                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8465                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8466
8467                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8468                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8469                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8470                                     tcg_passres,
8471                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8472                 tcg_temp_free_i64(tcg_tmp1);
8473                 tcg_temp_free_i64(tcg_tmp2);
8474                 break;
8475             }
8476             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8477             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8478             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8479                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8480                 break;
8481             case 9: /* SQDMLAL, SQDMLAL2 */
8482             case 11: /* SQDMLSL, SQDMLSL2 */
8483             case 13: /* SQDMULL, SQDMULL2 */
8484                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8485                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8486                                                   tcg_passres, tcg_passres);
8487                 break;
8488             default:
8489                 g_assert_not_reached();
8490             }
8491
8492             if (opcode == 9 || opcode == 11) {
8493                 /* saturating accumulate ops */
8494                 if (accop < 0) {
8495                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8496                 }
8497                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8498                                                   tcg_res[pass], tcg_passres);
8499             } else if (accop > 0) {
8500                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8501             } else if (accop < 0) {
8502                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8503             }
8504
8505             if (accop != 0) {
8506                 tcg_temp_free_i64(tcg_passres);
8507             }
8508
8509             tcg_temp_free_i64(tcg_op1);
8510             tcg_temp_free_i64(tcg_op2);
8511         }
8512     } else {
8513         /* size 0 or 1, generally helper functions */
8514         for (pass = 0; pass < 2; pass++) {
8515             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8516             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8517             TCGv_i64 tcg_passres;
8518             int elt = pass + is_q * 2;
8519
8520             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8521             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8522
8523             if (accop == 0) {
8524                 tcg_passres = tcg_res[pass];
8525             } else {
8526                 tcg_passres = tcg_temp_new_i64();
8527             }
8528
8529             switch (opcode) {
8530             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8531             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8532             {
8533                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8534                 static NeonGenWidenFn * const widenfns[2][2] = {
8535                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8536                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8537                 };
8538                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8539
8540                 widenfn(tcg_op2_64, tcg_op2);
8541                 widenfn(tcg_passres, tcg_op1);
8542                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8543                               tcg_passres, tcg_op2_64);
8544                 tcg_temp_free_i64(tcg_op2_64);
8545                 break;
8546             }
8547             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8548             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8549                 if (size == 0) {
8550                     if (is_u) {
8551                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8552                     } else {
8553                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8554                     }
8555                 } else {
8556                     if (is_u) {
8557                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8558                     } else {
8559                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8560                     }
8561                 }
8562                 break;
8563             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8564             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8565             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8566                 if (size == 0) {
8567                     if (is_u) {
8568                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8569                     } else {
8570                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8571                     }
8572                 } else {
8573                     if (is_u) {
8574                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8575                     } else {
8576                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8577                     }
8578                 }
8579                 break;
8580             case 9: /* SQDMLAL, SQDMLAL2 */
8581             case 11: /* SQDMLSL, SQDMLSL2 */
8582             case 13: /* SQDMULL, SQDMULL2 */
8583                 assert(size == 1);
8584                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8585                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8586                                                   tcg_passres, tcg_passres);
8587                 break;
8588             case 14: /* PMULL */
8589                 assert(size == 0);
8590                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8591                 break;
8592             default:
8593                 g_assert_not_reached();
8594             }
8595             tcg_temp_free_i32(tcg_op1);
8596             tcg_temp_free_i32(tcg_op2);
8597
8598             if (accop != 0) {
8599                 if (opcode == 9 || opcode == 11) {
8600                     /* saturating accumulate ops */
8601                     if (accop < 0) {
8602                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8603                     }
8604                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8605                                                       tcg_res[pass],
8606                                                       tcg_passres);
8607                 } else {
8608                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8609                                   tcg_res[pass], tcg_passres);
8610                 }
8611                 tcg_temp_free_i64(tcg_passres);
8612             }
8613         }
8614     }
8615
8616     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8617     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8618     tcg_temp_free_i64(tcg_res[0]);
8619     tcg_temp_free_i64(tcg_res[1]);
8620 }
8621
8622 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8623                             int opcode, int rd, int rn, int rm)
8624 {
8625     TCGv_i64 tcg_res[2];
8626     int part = is_q ? 2 : 0;
8627     int pass;
8628
8629     for (pass = 0; pass < 2; pass++) {
8630         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8631         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8632         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8633         static NeonGenWidenFn * const widenfns[3][2] = {
8634             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8635             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8636             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8637         };
8638         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8639
8640         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8641         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8642         widenfn(tcg_op2_wide, tcg_op2);
8643         tcg_temp_free_i32(tcg_op2);
8644         tcg_res[pass] = tcg_temp_new_i64();
8645         gen_neon_addl(size, (opcode == 3),
8646                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8647         tcg_temp_free_i64(tcg_op1);
8648         tcg_temp_free_i64(tcg_op2_wide);
8649     }
8650
8651     for (pass = 0; pass < 2; pass++) {
8652         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8653         tcg_temp_free_i64(tcg_res[pass]);
8654     }
8655 }
8656
8657 static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
8658 {
8659     tcg_gen_shri_i64(in, in, 32);
8660     tcg_gen_extrl_i64_i32(res, in);
8661 }
8662
8663 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8664 {
8665     tcg_gen_addi_i64(in, in, 1U << 31);
8666     do_narrow_high_u32(res, in);
8667 }
8668
8669 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8670                                  int opcode, int rd, int rn, int rm)
8671 {
8672     TCGv_i32 tcg_res[2];
8673     int part = is_q ? 2 : 0;
8674     int pass;
8675
8676     for (pass = 0; pass < 2; pass++) {
8677         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8678         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8679         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8680         static NeonGenNarrowFn * const narrowfns[3][2] = {
8681             { gen_helper_neon_narrow_high_u8,
8682               gen_helper_neon_narrow_round_high_u8 },
8683             { gen_helper_neon_narrow_high_u16,
8684               gen_helper_neon_narrow_round_high_u16 },
8685             { do_narrow_high_u32, do_narrow_round_high_u32 },
8686         };
8687         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8688
8689         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8690         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8691
8692         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8693
8694         tcg_temp_free_i64(tcg_op1);
8695         tcg_temp_free_i64(tcg_op2);
8696
8697         tcg_res[pass] = tcg_temp_new_i32();
8698         gennarrow(tcg_res[pass], tcg_wideres);
8699         tcg_temp_free_i64(tcg_wideres);
8700     }
8701
8702     for (pass = 0; pass < 2; pass++) {
8703         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8704         tcg_temp_free_i32(tcg_res[pass]);
8705     }
8706     if (!is_q) {
8707         clear_vec_high(s, rd);
8708     }
8709 }
8710
8711 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8712 {
8713     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8714      * is the only three-reg-diff instruction which produces a
8715      * 128-bit wide result from a single operation. However since
8716      * it's possible to calculate the two halves more or less
8717      * separately we just use two helper calls.
8718      */
8719     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8720     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8721     TCGv_i64 tcg_res = tcg_temp_new_i64();
8722
8723     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8724     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8725     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8726     write_vec_element(s, tcg_res, rd, 0, MO_64);
8727     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8728     write_vec_element(s, tcg_res, rd, 1, MO_64);
8729
8730     tcg_temp_free_i64(tcg_op1);
8731     tcg_temp_free_i64(tcg_op2);
8732     tcg_temp_free_i64(tcg_res);
8733 }
8734
8735 /* C3.6.15 AdvSIMD three different
8736  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8737  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8738  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8739  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8740  */
8741 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8742 {
8743     /* Instructions in this group fall into three basic classes
8744      * (in each case with the operation working on each element in
8745      * the input vectors):
8746      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8747      *     128 bit input)
8748      * (2) wide 64 x 128 -> 128
8749      * (3) narrowing 128 x 128 -> 64
8750      * Here we do initial decode, catch unallocated cases and
8751      * dispatch to separate functions for each class.
8752      */
8753     int is_q = extract32(insn, 30, 1);
8754     int is_u = extract32(insn, 29, 1);
8755     int size = extract32(insn, 22, 2);
8756     int opcode = extract32(insn, 12, 4);
8757     int rm = extract32(insn, 16, 5);
8758     int rn = extract32(insn, 5, 5);
8759     int rd = extract32(insn, 0, 5);
8760
8761     switch (opcode) {
8762     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8763     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8764         /* 64 x 128 -> 128 */
8765         if (size == 3) {
8766             unallocated_encoding(s);
8767             return;
8768         }
8769         if (!fp_access_check(s)) {
8770             return;
8771         }
8772         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8773         break;
8774     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8775     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8776         /* 128 x 128 -> 64 */
8777         if (size == 3) {
8778             unallocated_encoding(s);
8779             return;
8780         }
8781         if (!fp_access_check(s)) {
8782             return;
8783         }
8784         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8785         break;
8786     case 14: /* PMULL, PMULL2 */
8787         if (is_u || size == 1 || size == 2) {
8788             unallocated_encoding(s);
8789             return;
8790         }
8791         if (size == 3) {
8792             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8793                 unallocated_encoding(s);
8794                 return;
8795             }
8796             if (!fp_access_check(s)) {
8797                 return;
8798             }
8799             handle_pmull_64(s, is_q, rd, rn, rm);
8800             return;
8801         }
8802         goto is_widening;
8803     case 9: /* SQDMLAL, SQDMLAL2 */
8804     case 11: /* SQDMLSL, SQDMLSL2 */
8805     case 13: /* SQDMULL, SQDMULL2 */
8806         if (is_u || size == 0) {
8807             unallocated_encoding(s);
8808             return;
8809         }
8810         /* fall through */
8811     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8812     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8813     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8814     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8815     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8816     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8817     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8818         /* 64 x 64 -> 128 */
8819         if (size == 3) {
8820             unallocated_encoding(s);
8821             return;
8822         }
8823     is_widening:
8824         if (!fp_access_check(s)) {
8825             return;
8826         }
8827
8828         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8829         break;
8830     default:
8831         /* opcode 15 not allocated */
8832         unallocated_encoding(s);
8833         break;
8834     }
8835 }
8836
8837 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8838 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8839 {
8840     int rd = extract32(insn, 0, 5);
8841     int rn = extract32(insn, 5, 5);
8842     int rm = extract32(insn, 16, 5);
8843     int size = extract32(insn, 22, 2);
8844     bool is_u = extract32(insn, 29, 1);
8845     bool is_q = extract32(insn, 30, 1);
8846     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8847     int pass;
8848
8849     if (!fp_access_check(s)) {
8850         return;
8851     }
8852
8853     tcg_op1 = tcg_temp_new_i64();
8854     tcg_op2 = tcg_temp_new_i64();
8855     tcg_res[0] = tcg_temp_new_i64();
8856     tcg_res[1] = tcg_temp_new_i64();
8857
8858     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8859         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8860         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8861
8862         if (!is_u) {
8863             switch (size) {
8864             case 0: /* AND */
8865                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8866                 break;
8867             case 1: /* BIC */
8868                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8869                 break;
8870             case 2: /* ORR */
8871                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8872                 break;
8873             case 3: /* ORN */
8874                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8875                 break;
8876             }
8877         } else {
8878             if (size != 0) {
8879                 /* B* ops need res loaded to operate on */
8880                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8881             }
8882
8883             switch (size) {
8884             case 0: /* EOR */
8885                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8886                 break;
8887             case 1: /* BSL bitwise select */
8888                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8889                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8890                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8891                 break;
8892             case 2: /* BIT, bitwise insert if true */
8893                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8894                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8895                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8896                 break;
8897             case 3: /* BIF, bitwise insert if false */
8898                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8899                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8900                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8901                 break;
8902             }
8903         }
8904     }
8905
8906     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8907     if (!is_q) {
8908         tcg_gen_movi_i64(tcg_res[1], 0);
8909     }
8910     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8911
8912     tcg_temp_free_i64(tcg_op1);
8913     tcg_temp_free_i64(tcg_op2);
8914     tcg_temp_free_i64(tcg_res[0]);
8915     tcg_temp_free_i64(tcg_res[1]);
8916 }
8917
8918 /* Helper functions for 32 bit comparisons */
8919 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8920 {
8921     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8922 }
8923
8924 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8925 {
8926     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8927 }
8928
8929 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8930 {
8931     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8932 }
8933
8934 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8935 {
8936     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8937 }
8938
8939 /* Pairwise op subgroup of C3.6.16.
8940  *
8941  * This is called directly or via the handle_3same_float for float pairwise
8942  * operations where the opcode and size are calculated differently.
8943  */
8944 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8945                                    int size, int rn, int rm, int rd)
8946 {
8947     TCGv_ptr fpst;
8948     int pass;
8949
8950     /* Floating point operations need fpst */
8951     if (opcode >= 0x58) {
8952         fpst = get_fpstatus_ptr();
8953     } else {
8954         TCGV_UNUSED_PTR(fpst);
8955     }
8956
8957     if (!fp_access_check(s)) {
8958         return;
8959     }
8960
8961     /* These operations work on the concatenated rm:rn, with each pair of
8962      * adjacent elements being operated on to produce an element in the result.
8963      */
8964     if (size == 3) {
8965         TCGv_i64 tcg_res[2];
8966
8967         for (pass = 0; pass < 2; pass++) {
8968             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8969             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8970             int passreg = (pass == 0) ? rn : rm;
8971
8972             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
8973             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
8974             tcg_res[pass] = tcg_temp_new_i64();
8975
8976             switch (opcode) {
8977             case 0x17: /* ADDP */
8978                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
8979                 break;
8980             case 0x58: /* FMAXNMP */
8981                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8982                 break;
8983             case 0x5a: /* FADDP */
8984                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8985                 break;
8986             case 0x5e: /* FMAXP */
8987                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8988                 break;
8989             case 0x78: /* FMINNMP */
8990                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8991                 break;
8992             case 0x7e: /* FMINP */
8993                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8994                 break;
8995             default:
8996                 g_assert_not_reached();
8997             }
8998
8999             tcg_temp_free_i64(tcg_op1);
9000             tcg_temp_free_i64(tcg_op2);
9001         }
9002
9003         for (pass = 0; pass < 2; pass++) {
9004             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9005             tcg_temp_free_i64(tcg_res[pass]);
9006         }
9007     } else {
9008         int maxpass = is_q ? 4 : 2;
9009         TCGv_i32 tcg_res[4];
9010
9011         for (pass = 0; pass < maxpass; pass++) {
9012             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9013             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9014             NeonGenTwoOpFn *genfn = NULL;
9015             int passreg = pass < (maxpass / 2) ? rn : rm;
9016             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9017
9018             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9019             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9020             tcg_res[pass] = tcg_temp_new_i32();
9021
9022             switch (opcode) {
9023             case 0x17: /* ADDP */
9024             {
9025                 static NeonGenTwoOpFn * const fns[3] = {
9026                     gen_helper_neon_padd_u8,
9027                     gen_helper_neon_padd_u16,
9028                     tcg_gen_add_i32,
9029                 };
9030                 genfn = fns[size];
9031                 break;
9032             }
9033             case 0x14: /* SMAXP, UMAXP */
9034             {
9035                 static NeonGenTwoOpFn * const fns[3][2] = {
9036                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9037                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9038                     { gen_max_s32, gen_max_u32 },
9039                 };
9040                 genfn = fns[size][u];
9041                 break;
9042             }
9043             case 0x15: /* SMINP, UMINP */
9044             {
9045                 static NeonGenTwoOpFn * const fns[3][2] = {
9046                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9047                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9048                     { gen_min_s32, gen_min_u32 },
9049                 };
9050                 genfn = fns[size][u];
9051                 break;
9052             }
9053             /* The FP operations are all on single floats (32 bit) */
9054             case 0x58: /* FMAXNMP */
9055                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9056                 break;
9057             case 0x5a: /* FADDP */
9058                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9059                 break;
9060             case 0x5e: /* FMAXP */
9061                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9062                 break;
9063             case 0x78: /* FMINNMP */
9064                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9065                 break;
9066             case 0x7e: /* FMINP */
9067                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9068                 break;
9069             default:
9070                 g_assert_not_reached();
9071             }
9072
9073             /* FP ops called directly, otherwise call now */
9074             if (genfn) {
9075                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9076             }
9077
9078             tcg_temp_free_i32(tcg_op1);
9079             tcg_temp_free_i32(tcg_op2);
9080         }
9081
9082         for (pass = 0; pass < maxpass; pass++) {
9083             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9084             tcg_temp_free_i32(tcg_res[pass]);
9085         }
9086         if (!is_q) {
9087             clear_vec_high(s, rd);
9088         }
9089     }
9090
9091     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9092         tcg_temp_free_ptr(fpst);
9093     }
9094 }
9095
9096 /* Floating point op subgroup of C3.6.16. */
9097 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9098 {
9099     /* For floating point ops, the U, size[1] and opcode bits
9100      * together indicate the operation. size[0] indicates single
9101      * or double.
9102      */
9103     int fpopcode = extract32(insn, 11, 5)
9104         | (extract32(insn, 23, 1) << 5)
9105         | (extract32(insn, 29, 1) << 6);
9106     int is_q = extract32(insn, 30, 1);
9107     int size = extract32(insn, 22, 1);
9108     int rm = extract32(insn, 16, 5);
9109     int rn = extract32(insn, 5, 5);
9110     int rd = extract32(insn, 0, 5);
9111
9112     int datasize = is_q ? 128 : 64;
9113     int esize = 32 << size;
9114     int elements = datasize / esize;
9115
9116     if (size == 1 && !is_q) {
9117         unallocated_encoding(s);
9118         return;
9119     }
9120
9121     switch (fpopcode) {
9122     case 0x58: /* FMAXNMP */
9123     case 0x5a: /* FADDP */
9124     case 0x5e: /* FMAXP */
9125     case 0x78: /* FMINNMP */
9126     case 0x7e: /* FMINP */
9127         if (size && !is_q) {
9128             unallocated_encoding(s);
9129             return;
9130         }
9131         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9132                                rn, rm, rd);
9133         return;
9134     case 0x1b: /* FMULX */
9135     case 0x1f: /* FRECPS */
9136     case 0x3f: /* FRSQRTS */
9137     case 0x5d: /* FACGE */
9138     case 0x7d: /* FACGT */
9139     case 0x19: /* FMLA */
9140     case 0x39: /* FMLS */
9141     case 0x18: /* FMAXNM */
9142     case 0x1a: /* FADD */
9143     case 0x1c: /* FCMEQ */
9144     case 0x1e: /* FMAX */
9145     case 0x38: /* FMINNM */
9146     case 0x3a: /* FSUB */
9147     case 0x3e: /* FMIN */
9148     case 0x5b: /* FMUL */
9149     case 0x5c: /* FCMGE */
9150     case 0x5f: /* FDIV */
9151     case 0x7a: /* FABD */
9152     case 0x7c: /* FCMGT */
9153         if (!fp_access_check(s)) {
9154             return;
9155         }
9156
9157         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9158         return;
9159     default:
9160         unallocated_encoding(s);
9161         return;
9162     }
9163 }
9164
9165 /* Integer op subgroup of C3.6.16. */
9166 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9167 {
9168     int is_q = extract32(insn, 30, 1);
9169     int u = extract32(insn, 29, 1);
9170     int size = extract32(insn, 22, 2);
9171     int opcode = extract32(insn, 11, 5);
9172     int rm = extract32(insn, 16, 5);
9173     int rn = extract32(insn, 5, 5);
9174     int rd = extract32(insn, 0, 5);
9175     int pass;
9176
9177     switch (opcode) {
9178     case 0x13: /* MUL, PMUL */
9179         if (u && size != 0) {
9180             unallocated_encoding(s);
9181             return;
9182         }
9183         /* fall through */
9184     case 0x0: /* SHADD, UHADD */
9185     case 0x2: /* SRHADD, URHADD */
9186     case 0x4: /* SHSUB, UHSUB */
9187     case 0xc: /* SMAX, UMAX */
9188     case 0xd: /* SMIN, UMIN */
9189     case 0xe: /* SABD, UABD */
9190     case 0xf: /* SABA, UABA */
9191     case 0x12: /* MLA, MLS */
9192         if (size == 3) {
9193             unallocated_encoding(s);
9194             return;
9195         }
9196         break;
9197     case 0x16: /* SQDMULH, SQRDMULH */
9198         if (size == 0 || size == 3) {
9199             unallocated_encoding(s);
9200             return;
9201         }
9202         break;
9203     default:
9204         if (size == 3 && !is_q) {
9205             unallocated_encoding(s);
9206             return;
9207         }
9208         break;
9209     }
9210
9211     if (!fp_access_check(s)) {
9212         return;
9213     }
9214
9215     if (size == 3) {
9216         assert(is_q);
9217         for (pass = 0; pass < 2; pass++) {
9218             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9219             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9220             TCGv_i64 tcg_res = tcg_temp_new_i64();
9221
9222             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9223             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9224
9225             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9226
9227             write_vec_element(s, tcg_res, rd, pass, MO_64);
9228
9229             tcg_temp_free_i64(tcg_res);
9230             tcg_temp_free_i64(tcg_op1);
9231             tcg_temp_free_i64(tcg_op2);
9232         }
9233     } else {
9234         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9235             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9236             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9237             TCGv_i32 tcg_res = tcg_temp_new_i32();
9238             NeonGenTwoOpFn *genfn = NULL;
9239             NeonGenTwoOpEnvFn *genenvfn = NULL;
9240
9241             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9242             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9243
9244             switch (opcode) {
9245             case 0x0: /* SHADD, UHADD */
9246             {
9247                 static NeonGenTwoOpFn * const fns[3][2] = {
9248                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9249                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9250                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9251                 };
9252                 genfn = fns[size][u];
9253                 break;
9254             }
9255             case 0x1: /* SQADD, UQADD */
9256             {
9257                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9258                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9259                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9260                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9261                 };
9262                 genenvfn = fns[size][u];
9263                 break;
9264             }
9265             case 0x2: /* SRHADD, URHADD */
9266             {
9267                 static NeonGenTwoOpFn * const fns[3][2] = {
9268                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9269                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9270                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9271                 };
9272                 genfn = fns[size][u];
9273                 break;
9274             }
9275             case 0x4: /* SHSUB, UHSUB */
9276             {
9277                 static NeonGenTwoOpFn * const fns[3][2] = {
9278                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9279                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9280                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9281                 };
9282                 genfn = fns[size][u];
9283                 break;
9284             }
9285             case 0x5: /* SQSUB, UQSUB */
9286             {
9287                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9288                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9289                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9290                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9291                 };
9292                 genenvfn = fns[size][u];
9293                 break;
9294             }
9295             case 0x6: /* CMGT, CMHI */
9296             {
9297                 static NeonGenTwoOpFn * const fns[3][2] = {
9298                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9299                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9300                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9301                 };
9302                 genfn = fns[size][u];
9303                 break;
9304             }
9305             case 0x7: /* CMGE, CMHS */
9306             {
9307                 static NeonGenTwoOpFn * const fns[3][2] = {
9308                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9309                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9310                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9311                 };
9312                 genfn = fns[size][u];
9313                 break;
9314             }
9315             case 0x8: /* SSHL, USHL */
9316             {
9317                 static NeonGenTwoOpFn * const fns[3][2] = {
9318                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9319                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9320                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9321                 };
9322                 genfn = fns[size][u];
9323                 break;
9324             }
9325             case 0x9: /* SQSHL, UQSHL */
9326             {
9327                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9328                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9329                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9330                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9331                 };
9332                 genenvfn = fns[size][u];
9333                 break;
9334             }
9335             case 0xa: /* SRSHL, URSHL */
9336             {
9337                 static NeonGenTwoOpFn * const fns[3][2] = {
9338                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9339                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9340                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9341                 };
9342                 genfn = fns[size][u];
9343                 break;
9344             }
9345             case 0xb: /* SQRSHL, UQRSHL */
9346             {
9347                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9348                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9349                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9350                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9351                 };
9352                 genenvfn = fns[size][u];
9353                 break;
9354             }
9355             case 0xc: /* SMAX, UMAX */
9356             {
9357                 static NeonGenTwoOpFn * const fns[3][2] = {
9358                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9359                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9360                     { gen_max_s32, gen_max_u32 },
9361                 };
9362                 genfn = fns[size][u];
9363                 break;
9364             }
9365
9366             case 0xd: /* SMIN, UMIN */
9367             {
9368                 static NeonGenTwoOpFn * const fns[3][2] = {
9369                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9370                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9371                     { gen_min_s32, gen_min_u32 },
9372                 };
9373                 genfn = fns[size][u];
9374                 break;
9375             }
9376             case 0xe: /* SABD, UABD */
9377             case 0xf: /* SABA, UABA */
9378             {
9379                 static NeonGenTwoOpFn * const fns[3][2] = {
9380                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9381                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9382                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9383                 };
9384                 genfn = fns[size][u];
9385                 break;
9386             }
9387             case 0x10: /* ADD, SUB */
9388             {
9389                 static NeonGenTwoOpFn * const fns[3][2] = {
9390                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9391                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9392                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9393                 };
9394                 genfn = fns[size][u];
9395                 break;
9396             }
9397             case 0x11: /* CMTST, CMEQ */
9398             {
9399                 static NeonGenTwoOpFn * const fns[3][2] = {
9400                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9401                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9402                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9403                 };
9404                 genfn = fns[size][u];
9405                 break;
9406             }
9407             case 0x13: /* MUL, PMUL */
9408                 if (u) {
9409                     /* PMUL */
9410                     assert(size == 0);
9411                     genfn = gen_helper_neon_mul_p8;
9412                     break;
9413                 }
9414                 /* fall through : MUL */
9415             case 0x12: /* MLA, MLS */
9416             {
9417                 static NeonGenTwoOpFn * const fns[3] = {
9418                     gen_helper_neon_mul_u8,
9419                     gen_helper_neon_mul_u16,
9420                     tcg_gen_mul_i32,
9421                 };
9422                 genfn = fns[size];
9423                 break;
9424             }
9425             case 0x16: /* SQDMULH, SQRDMULH */
9426             {
9427                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9428                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9429                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9430                 };
9431                 assert(size == 1 || size == 2);
9432                 genenvfn = fns[size - 1][u];
9433                 break;
9434             }
9435             default:
9436                 g_assert_not_reached();
9437             }
9438
9439             if (genenvfn) {
9440                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9441             } else {
9442                 genfn(tcg_res, tcg_op1, tcg_op2);
9443             }
9444
9445             if (opcode == 0xf || opcode == 0x12) {
9446                 /* SABA, UABA, MLA, MLS: accumulating ops */
9447                 static NeonGenTwoOpFn * const fns[3][2] = {
9448                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9449                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9450                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9451                 };
9452                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9453
9454                 genfn = fns[size][is_sub];
9455                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9456                 genfn(tcg_res, tcg_op1, tcg_res);
9457             }
9458
9459             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9460
9461             tcg_temp_free_i32(tcg_res);
9462             tcg_temp_free_i32(tcg_op1);
9463             tcg_temp_free_i32(tcg_op2);
9464         }
9465     }
9466
9467     if (!is_q) {
9468         clear_vec_high(s, rd);
9469     }
9470 }
9471
9472 /* C3.6.16 AdvSIMD three same
9473  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9474  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9475  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9476  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9477  */
9478 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9479 {
9480     int opcode = extract32(insn, 11, 5);
9481
9482     switch (opcode) {
9483     case 0x3: /* logic ops */
9484         disas_simd_3same_logic(s, insn);
9485         break;
9486     case 0x17: /* ADDP */
9487     case 0x14: /* SMAXP, UMAXP */
9488     case 0x15: /* SMINP, UMINP */
9489     {
9490         /* Pairwise operations */
9491         int is_q = extract32(insn, 30, 1);
9492         int u = extract32(insn, 29, 1);
9493         int size = extract32(insn, 22, 2);
9494         int rm = extract32(insn, 16, 5);
9495         int rn = extract32(insn, 5, 5);
9496         int rd = extract32(insn, 0, 5);
9497         if (opcode == 0x17) {
9498             if (u || (size == 3 && !is_q)) {
9499                 unallocated_encoding(s);
9500                 return;
9501             }
9502         } else {
9503             if (size == 3) {
9504                 unallocated_encoding(s);
9505                 return;
9506             }
9507         }
9508         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9509         break;
9510     }
9511     case 0x18 ... 0x31:
9512         /* floating point ops, sz[1] and U are part of opcode */
9513         disas_simd_3same_float(s, insn);
9514         break;
9515     default:
9516         disas_simd_3same_int(s, insn);
9517         break;
9518     }
9519 }
9520
9521 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9522                                   int size, int rn, int rd)
9523 {
9524     /* Handle 2-reg-misc ops which are widening (so each size element
9525      * in the source becomes a 2*size element in the destination.
9526      * The only instruction like this is FCVTL.
9527      */
9528     int pass;
9529
9530     if (size == 3) {
9531         /* 32 -> 64 bit fp conversion */
9532         TCGv_i64 tcg_res[2];
9533         int srcelt = is_q ? 2 : 0;
9534
9535         for (pass = 0; pass < 2; pass++) {
9536             TCGv_i32 tcg_op = tcg_temp_new_i32();
9537             tcg_res[pass] = tcg_temp_new_i64();
9538
9539             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9540             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9541             tcg_temp_free_i32(tcg_op);
9542         }
9543         for (pass = 0; pass < 2; pass++) {
9544             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9545             tcg_temp_free_i64(tcg_res[pass]);
9546         }
9547     } else {
9548         /* 16 -> 32 bit fp conversion */
9549         int srcelt = is_q ? 4 : 0;
9550         TCGv_i32 tcg_res[4];
9551
9552         for (pass = 0; pass < 4; pass++) {
9553             tcg_res[pass] = tcg_temp_new_i32();
9554
9555             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9556             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9557                                            cpu_env);
9558         }
9559         for (pass = 0; pass < 4; pass++) {
9560             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9561             tcg_temp_free_i32(tcg_res[pass]);
9562         }
9563     }
9564 }
9565
9566 static void handle_rev(DisasContext *s, int opcode, bool u,
9567                        bool is_q, int size, int rn, int rd)
9568 {
9569     int op = (opcode << 1) | u;
9570     int opsz = op + size;
9571     int grp_size = 3 - opsz;
9572     int dsize = is_q ? 128 : 64;
9573     int i;
9574
9575     if (opsz >= 3) {
9576         unallocated_encoding(s);
9577         return;
9578     }
9579
9580     if (!fp_access_check(s)) {
9581         return;
9582     }
9583
9584     if (size == 0) {
9585         /* Special case bytes, use bswap op on each group of elements */
9586         int groups = dsize / (8 << grp_size);
9587
9588         for (i = 0; i < groups; i++) {
9589             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9590
9591             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9592             switch (grp_size) {
9593             case MO_16:
9594                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9595                 break;
9596             case MO_32:
9597                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9598                 break;
9599             case MO_64:
9600                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9601                 break;
9602             default:
9603                 g_assert_not_reached();
9604             }
9605             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9606             tcg_temp_free_i64(tcg_tmp);
9607         }
9608         if (!is_q) {
9609             clear_vec_high(s, rd);
9610         }
9611     } else {
9612         int revmask = (1 << grp_size) - 1;
9613         int esize = 8 << size;
9614         int elements = dsize / esize;
9615         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9616         TCGv_i64 tcg_rd = tcg_const_i64(0);
9617         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9618
9619         for (i = 0; i < elements; i++) {
9620             int e_rev = (i & 0xf) ^ revmask;
9621             int off = e_rev * esize;
9622             read_vec_element(s, tcg_rn, rn, i, size);
9623             if (off >= 64) {
9624                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9625                                     tcg_rn, off - 64, esize);
9626             } else {
9627                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9628             }
9629         }
9630         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9631         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9632
9633         tcg_temp_free_i64(tcg_rd_hi);
9634         tcg_temp_free_i64(tcg_rd);
9635         tcg_temp_free_i64(tcg_rn);
9636     }
9637 }
9638
9639 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9640                                   bool is_q, int size, int rn, int rd)
9641 {
9642     /* Implement the pairwise operations from 2-misc:
9643      * SADDLP, UADDLP, SADALP, UADALP.
9644      * These all add pairs of elements in the input to produce a
9645      * double-width result element in the output (possibly accumulating).
9646      */
9647     bool accum = (opcode == 0x6);
9648     int maxpass = is_q ? 2 : 1;
9649     int pass;
9650     TCGv_i64 tcg_res[2];
9651
9652     if (size == 2) {
9653         /* 32 + 32 -> 64 op */
9654         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9655
9656         for (pass = 0; pass < maxpass; pass++) {
9657             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9658             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9659
9660             tcg_res[pass] = tcg_temp_new_i64();
9661
9662             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9663             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9664             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9665             if (accum) {
9666                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9667                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9668             }
9669
9670             tcg_temp_free_i64(tcg_op1);
9671             tcg_temp_free_i64(tcg_op2);
9672         }
9673     } else {
9674         for (pass = 0; pass < maxpass; pass++) {
9675             TCGv_i64 tcg_op = tcg_temp_new_i64();
9676             NeonGenOneOpFn *genfn;
9677             static NeonGenOneOpFn * const fns[2][2] = {
9678                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9679                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9680             };
9681
9682             genfn = fns[size][u];
9683
9684             tcg_res[pass] = tcg_temp_new_i64();
9685
9686             read_vec_element(s, tcg_op, rn, pass, MO_64);
9687             genfn(tcg_res[pass], tcg_op);
9688
9689             if (accum) {
9690                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9691                 if (size == 0) {
9692                     gen_helper_neon_addl_u16(tcg_res[pass],
9693                                              tcg_res[pass], tcg_op);
9694                 } else {
9695                     gen_helper_neon_addl_u32(tcg_res[pass],
9696                                              tcg_res[pass], tcg_op);
9697                 }
9698             }
9699             tcg_temp_free_i64(tcg_op);
9700         }
9701     }
9702     if (!is_q) {
9703         tcg_res[1] = tcg_const_i64(0);
9704     }
9705     for (pass = 0; pass < 2; pass++) {
9706         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9707         tcg_temp_free_i64(tcg_res[pass]);
9708     }
9709 }
9710
9711 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9712 {
9713     /* Implement SHLL and SHLL2 */
9714     int pass;
9715     int part = is_q ? 2 : 0;
9716     TCGv_i64 tcg_res[2];
9717
9718     for (pass = 0; pass < 2; pass++) {
9719         static NeonGenWidenFn * const widenfns[3] = {
9720             gen_helper_neon_widen_u8,
9721             gen_helper_neon_widen_u16,
9722             tcg_gen_extu_i32_i64,
9723         };
9724         NeonGenWidenFn *widenfn = widenfns[size];
9725         TCGv_i32 tcg_op = tcg_temp_new_i32();
9726
9727         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9728         tcg_res[pass] = tcg_temp_new_i64();
9729         widenfn(tcg_res[pass], tcg_op);
9730         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9731
9732         tcg_temp_free_i32(tcg_op);
9733     }
9734
9735     for (pass = 0; pass < 2; pass++) {
9736         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9737         tcg_temp_free_i64(tcg_res[pass]);
9738     }
9739 }
9740
9741 /* C3.6.17 AdvSIMD two reg misc
9742  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9743  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9744  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9745  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9746  */
9747 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9748 {
9749     int size = extract32(insn, 22, 2);
9750     int opcode = extract32(insn, 12, 5);
9751     bool u = extract32(insn, 29, 1);
9752     bool is_q = extract32(insn, 30, 1);
9753     int rn = extract32(insn, 5, 5);
9754     int rd = extract32(insn, 0, 5);
9755     bool need_fpstatus = false;
9756     bool need_rmode = false;
9757     int rmode = -1;
9758     TCGv_i32 tcg_rmode;
9759     TCGv_ptr tcg_fpstatus;
9760
9761     switch (opcode) {
9762     case 0x0: /* REV64, REV32 */
9763     case 0x1: /* REV16 */
9764         handle_rev(s, opcode, u, is_q, size, rn, rd);
9765         return;
9766     case 0x5: /* CNT, NOT, RBIT */
9767         if (u && size == 0) {
9768             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9769             size = 3;
9770             break;
9771         } else if (u && size == 1) {
9772             /* RBIT */
9773             break;
9774         } else if (!u && size == 0) {
9775             /* CNT */
9776             break;
9777         }
9778         unallocated_encoding(s);
9779         return;
9780     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9781     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9782         if (size == 3) {
9783             unallocated_encoding(s);
9784             return;
9785         }
9786         if (!fp_access_check(s)) {
9787             return;
9788         }
9789
9790         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9791         return;
9792     case 0x4: /* CLS, CLZ */
9793         if (size == 3) {
9794             unallocated_encoding(s);
9795             return;
9796         }
9797         break;
9798     case 0x2: /* SADDLP, UADDLP */
9799     case 0x6: /* SADALP, UADALP */
9800         if (size == 3) {
9801             unallocated_encoding(s);
9802             return;
9803         }
9804         if (!fp_access_check(s)) {
9805             return;
9806         }
9807         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9808         return;
9809     case 0x13: /* SHLL, SHLL2 */
9810         if (u == 0 || size == 3) {
9811             unallocated_encoding(s);
9812             return;
9813         }
9814         if (!fp_access_check(s)) {
9815             return;
9816         }
9817         handle_shll(s, is_q, size, rn, rd);
9818         return;
9819     case 0xa: /* CMLT */
9820         if (u == 1) {
9821             unallocated_encoding(s);
9822             return;
9823         }
9824         /* fall through */
9825     case 0x8: /* CMGT, CMGE */
9826     case 0x9: /* CMEQ, CMLE */
9827     case 0xb: /* ABS, NEG */
9828         if (size == 3 && !is_q) {
9829             unallocated_encoding(s);
9830             return;
9831         }
9832         break;
9833     case 0x3: /* SUQADD, USQADD */
9834         if (size == 3 && !is_q) {
9835             unallocated_encoding(s);
9836             return;
9837         }
9838         if (!fp_access_check(s)) {
9839             return;
9840         }
9841         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9842         return;
9843     case 0x7: /* SQABS, SQNEG */
9844         if (size == 3 && !is_q) {
9845             unallocated_encoding(s);
9846             return;
9847         }
9848         break;
9849     case 0xc ... 0xf:
9850     case 0x16 ... 0x1d:
9851     case 0x1f:
9852     {
9853         /* Floating point: U, size[1] and opcode indicate operation;
9854          * size[0] indicates single or double precision.
9855          */
9856         int is_double = extract32(size, 0, 1);
9857         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9858         size = is_double ? 3 : 2;
9859         switch (opcode) {
9860         case 0x2f: /* FABS */
9861         case 0x6f: /* FNEG */
9862             if (size == 3 && !is_q) {
9863                 unallocated_encoding(s);
9864                 return;
9865             }
9866             break;
9867         case 0x1d: /* SCVTF */
9868         case 0x5d: /* UCVTF */
9869         {
9870             bool is_signed = (opcode == 0x1d) ? true : false;
9871             int elements = is_double ? 2 : is_q ? 4 : 2;
9872             if (is_double && !is_q) {
9873                 unallocated_encoding(s);
9874                 return;
9875             }
9876             if (!fp_access_check(s)) {
9877                 return;
9878             }
9879             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9880             return;
9881         }
9882         case 0x2c: /* FCMGT (zero) */
9883         case 0x2d: /* FCMEQ (zero) */
9884         case 0x2e: /* FCMLT (zero) */
9885         case 0x6c: /* FCMGE (zero) */
9886         case 0x6d: /* FCMLE (zero) */
9887             if (size == 3 && !is_q) {
9888                 unallocated_encoding(s);
9889                 return;
9890             }
9891             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9892             return;
9893         case 0x7f: /* FSQRT */
9894             if (size == 3 && !is_q) {
9895                 unallocated_encoding(s);
9896                 return;
9897             }
9898             break;
9899         case 0x1a: /* FCVTNS */
9900         case 0x1b: /* FCVTMS */
9901         case 0x3a: /* FCVTPS */
9902         case 0x3b: /* FCVTZS */
9903         case 0x5a: /* FCVTNU */
9904         case 0x5b: /* FCVTMU */
9905         case 0x7a: /* FCVTPU */
9906         case 0x7b: /* FCVTZU */
9907             need_fpstatus = true;
9908             need_rmode = true;
9909             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9910             if (size == 3 && !is_q) {
9911                 unallocated_encoding(s);
9912                 return;
9913             }
9914             break;
9915         case 0x5c: /* FCVTAU */
9916         case 0x1c: /* FCVTAS */
9917             need_fpstatus = true;
9918             need_rmode = true;
9919             rmode = FPROUNDING_TIEAWAY;
9920             if (size == 3 && !is_q) {
9921                 unallocated_encoding(s);
9922                 return;
9923             }
9924             break;
9925         case 0x3c: /* URECPE */
9926             if (size == 3) {
9927                 unallocated_encoding(s);
9928                 return;
9929             }
9930             /* fall through */
9931         case 0x3d: /* FRECPE */
9932         case 0x7d: /* FRSQRTE */
9933             if (size == 3 && !is_q) {
9934                 unallocated_encoding(s);
9935                 return;
9936             }
9937             if (!fp_access_check(s)) {
9938                 return;
9939             }
9940             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9941             return;
9942         case 0x56: /* FCVTXN, FCVTXN2 */
9943             if (size == 2) {
9944                 unallocated_encoding(s);
9945                 return;
9946             }
9947             /* fall through */
9948         case 0x16: /* FCVTN, FCVTN2 */
9949             /* handle_2misc_narrow does a 2*size -> size operation, but these
9950              * instructions encode the source size rather than dest size.
9951              */
9952             if (!fp_access_check(s)) {
9953                 return;
9954             }
9955             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9956             return;
9957         case 0x17: /* FCVTL, FCVTL2 */
9958             if (!fp_access_check(s)) {
9959                 return;
9960             }
9961             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9962             return;
9963         case 0x18: /* FRINTN */
9964         case 0x19: /* FRINTM */
9965         case 0x38: /* FRINTP */
9966         case 0x39: /* FRINTZ */
9967             need_rmode = true;
9968             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9969             /* fall through */
9970         case 0x59: /* FRINTX */
9971         case 0x79: /* FRINTI */
9972             need_fpstatus = true;
9973             if (size == 3 && !is_q) {
9974                 unallocated_encoding(s);
9975                 return;
9976             }
9977             break;
9978         case 0x58: /* FRINTA */
9979             need_rmode = true;
9980             rmode = FPROUNDING_TIEAWAY;
9981             need_fpstatus = true;
9982             if (size == 3 && !is_q) {
9983                 unallocated_encoding(s);
9984                 return;
9985             }
9986             break;
9987         case 0x7c: /* URSQRTE */
9988             if (size == 3) {
9989                 unallocated_encoding(s);
9990                 return;
9991             }
9992             need_fpstatus = true;
9993             break;
9994         default:
9995             unallocated_encoding(s);
9996             return;
9997         }
9998         break;
9999     }
10000     default:
10001         unallocated_encoding(s);
10002         return;
10003     }
10004
10005     if (!fp_access_check(s)) {
10006         return;
10007     }
10008
10009     if (need_fpstatus) {
10010         tcg_fpstatus = get_fpstatus_ptr();
10011     } else {
10012         TCGV_UNUSED_PTR(tcg_fpstatus);
10013     }
10014     if (need_rmode) {
10015         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10016         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10017     } else {
10018         TCGV_UNUSED_I32(tcg_rmode);
10019     }
10020
10021     if (size == 3) {
10022         /* All 64-bit element operations can be shared with scalar 2misc */
10023         int pass;
10024
10025         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10026             TCGv_i64 tcg_op = tcg_temp_new_i64();
10027             TCGv_i64 tcg_res = tcg_temp_new_i64();
10028
10029             read_vec_element(s, tcg_op, rn, pass, MO_64);
10030
10031             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10032                             tcg_rmode, tcg_fpstatus);
10033
10034             write_vec_element(s, tcg_res, rd, pass, MO_64);
10035
10036             tcg_temp_free_i64(tcg_res);
10037             tcg_temp_free_i64(tcg_op);
10038         }
10039     } else {
10040         int pass;
10041
10042         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10043             TCGv_i32 tcg_op = tcg_temp_new_i32();
10044             TCGv_i32 tcg_res = tcg_temp_new_i32();
10045             TCGCond cond;
10046
10047             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10048
10049             if (size == 2) {
10050                 /* Special cases for 32 bit elements */
10051                 switch (opcode) {
10052                 case 0xa: /* CMLT */
10053                     /* 32 bit integer comparison against zero, result is
10054                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10055                      * and inverting.
10056                      */
10057                     cond = TCG_COND_LT;
10058                 do_cmop:
10059                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10060                     tcg_gen_neg_i32(tcg_res, tcg_res);
10061                     break;
10062                 case 0x8: /* CMGT, CMGE */
10063                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10064                     goto do_cmop;
10065                 case 0x9: /* CMEQ, CMLE */
10066                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10067                     goto do_cmop;
10068                 case 0x4: /* CLS */
10069                     if (u) {
10070                         gen_helper_clz32(tcg_res, tcg_op);
10071                     } else {
10072                         gen_helper_cls32(tcg_res, tcg_op);
10073                     }
10074                     break;
10075                 case 0x7: /* SQABS, SQNEG */
10076                     if (u) {
10077                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10078                     } else {
10079                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10080                     }
10081                     break;
10082                 case 0xb: /* ABS, NEG */
10083                     if (u) {
10084                         tcg_gen_neg_i32(tcg_res, tcg_op);
10085                     } else {
10086                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10087                         tcg_gen_neg_i32(tcg_res, tcg_op);
10088                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10089                                             tcg_zero, tcg_op, tcg_res);
10090                         tcg_temp_free_i32(tcg_zero);
10091                     }
10092                     break;
10093                 case 0x2f: /* FABS */
10094                     gen_helper_vfp_abss(tcg_res, tcg_op);
10095                     break;
10096                 case 0x6f: /* FNEG */
10097                     gen_helper_vfp_negs(tcg_res, tcg_op);
10098                     break;
10099                 case 0x7f: /* FSQRT */
10100                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10101                     break;
10102                 case 0x1a: /* FCVTNS */
10103                 case 0x1b: /* FCVTMS */
10104                 case 0x1c: /* FCVTAS */
10105                 case 0x3a: /* FCVTPS */
10106                 case 0x3b: /* FCVTZS */
10107                 {
10108                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10109                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10110                                          tcg_shift, tcg_fpstatus);
10111                     tcg_temp_free_i32(tcg_shift);
10112                     break;
10113                 }
10114                 case 0x5a: /* FCVTNU */
10115                 case 0x5b: /* FCVTMU */
10116                 case 0x5c: /* FCVTAU */
10117                 case 0x7a: /* FCVTPU */
10118                 case 0x7b: /* FCVTZU */
10119                 {
10120                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10121                     gen_helper_vfp_touls(tcg_res, tcg_op,
10122                                          tcg_shift, tcg_fpstatus);
10123                     tcg_temp_free_i32(tcg_shift);
10124                     break;
10125                 }
10126                 case 0x18: /* FRINTN */
10127                 case 0x19: /* FRINTM */
10128                 case 0x38: /* FRINTP */
10129                 case 0x39: /* FRINTZ */
10130                 case 0x58: /* FRINTA */
10131                 case 0x79: /* FRINTI */
10132                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10133                     break;
10134                 case 0x59: /* FRINTX */
10135                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10136                     break;
10137                 case 0x7c: /* URSQRTE */
10138                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10139                     break;
10140                 default:
10141                     g_assert_not_reached();
10142                 }
10143             } else {
10144                 /* Use helpers for 8 and 16 bit elements */
10145                 switch (opcode) {
10146                 case 0x5: /* CNT, RBIT */
10147                     /* For these two insns size is part of the opcode specifier
10148                      * (handled earlier); they always operate on byte elements.
10149                      */
10150                     if (u) {
10151                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10152                     } else {
10153                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10154                     }
10155                     break;
10156                 case 0x7: /* SQABS, SQNEG */
10157                 {
10158                     NeonGenOneOpEnvFn *genfn;
10159                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10160                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10161                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10162                     };
10163                     genfn = fns[size][u];
10164                     genfn(tcg_res, cpu_env, tcg_op);
10165                     break;
10166                 }
10167                 case 0x8: /* CMGT, CMGE */
10168                 case 0x9: /* CMEQ, CMLE */
10169                 case 0xa: /* CMLT */
10170                 {
10171                     static NeonGenTwoOpFn * const fns[3][2] = {
10172                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10173                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10174                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10175                     };
10176                     NeonGenTwoOpFn *genfn;
10177                     int comp;
10178                     bool reverse;
10179                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10180
10181                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10182                     comp = (opcode - 0x8) * 2 + u;
10183                     /* ...but LE, LT are implemented as reverse GE, GT */
10184                     reverse = (comp > 2);
10185                     if (reverse) {
10186                         comp = 4 - comp;
10187                     }
10188                     genfn = fns[comp][size];
10189                     if (reverse) {
10190                         genfn(tcg_res, tcg_zero, tcg_op);
10191                     } else {
10192                         genfn(tcg_res, tcg_op, tcg_zero);
10193                     }
10194                     tcg_temp_free_i32(tcg_zero);
10195                     break;
10196                 }
10197                 case 0xb: /* ABS, NEG */
10198                     if (u) {
10199                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10200                         if (size) {
10201                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10202                         } else {
10203                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10204                         }
10205                         tcg_temp_free_i32(tcg_zero);
10206                     } else {
10207                         if (size) {
10208                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10209                         } else {
10210                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10211                         }
10212                     }
10213                     break;
10214                 case 0x4: /* CLS, CLZ */
10215                     if (u) {
10216                         if (size == 0) {
10217                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10218                         } else {
10219                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10220                         }
10221                     } else {
10222                         if (size == 0) {
10223                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10224                         } else {
10225                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10226                         }
10227                     }
10228                     break;
10229                 default:
10230                     g_assert_not_reached();
10231                 }
10232             }
10233
10234             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10235
10236             tcg_temp_free_i32(tcg_res);
10237             tcg_temp_free_i32(tcg_op);
10238         }
10239     }
10240     if (!is_q) {
10241         clear_vec_high(s, rd);
10242     }
10243
10244     if (need_rmode) {
10245         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10246         tcg_temp_free_i32(tcg_rmode);
10247     }
10248     if (need_fpstatus) {
10249         tcg_temp_free_ptr(tcg_fpstatus);
10250     }
10251 }
10252
10253 /* C3.6.13 AdvSIMD scalar x indexed element
10254  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10255  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10256  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10257  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10258  * C3.6.18 AdvSIMD vector x indexed element
10259  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10260  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10261  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10262  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10263  */
10264 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10265 {
10266     /* This encoding has two kinds of instruction:
10267      *  normal, where we perform elt x idxelt => elt for each
10268      *     element in the vector
10269      *  long, where we perform elt x idxelt and generate a result of
10270      *     double the width of the input element
10271      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10272      */
10273     bool is_scalar = extract32(insn, 28, 1);
10274     bool is_q = extract32(insn, 30, 1);
10275     bool u = extract32(insn, 29, 1);
10276     int size = extract32(insn, 22, 2);
10277     int l = extract32(insn, 21, 1);
10278     int m = extract32(insn, 20, 1);
10279     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10280     int rm = extract32(insn, 16, 4);
10281     int opcode = extract32(insn, 12, 4);
10282     int h = extract32(insn, 11, 1);
10283     int rn = extract32(insn, 5, 5);
10284     int rd = extract32(insn, 0, 5);
10285     bool is_long = false;
10286     bool is_fp = false;
10287     int index;
10288     TCGv_ptr fpst;
10289
10290     switch (opcode) {
10291     case 0x0: /* MLA */
10292     case 0x4: /* MLS */
10293         if (!u || is_scalar) {
10294             unallocated_encoding(s);
10295             return;
10296         }
10297         break;
10298     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10299     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10300     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10301         if (is_scalar) {
10302             unallocated_encoding(s);
10303             return;
10304         }
10305         is_long = true;
10306         break;
10307     case 0x3: /* SQDMLAL, SQDMLAL2 */
10308     case 0x7: /* SQDMLSL, SQDMLSL2 */
10309     case 0xb: /* SQDMULL, SQDMULL2 */
10310         is_long = true;
10311         /* fall through */
10312     case 0xc: /* SQDMULH */
10313     case 0xd: /* SQRDMULH */
10314         if (u) {
10315             unallocated_encoding(s);
10316             return;
10317         }
10318         break;
10319     case 0x8: /* MUL */
10320         if (u || is_scalar) {
10321             unallocated_encoding(s);
10322             return;
10323         }
10324         break;
10325     case 0x1: /* FMLA */
10326     case 0x5: /* FMLS */
10327         if (u) {
10328             unallocated_encoding(s);
10329             return;
10330         }
10331         /* fall through */
10332     case 0x9: /* FMUL, FMULX */
10333         if (!extract32(size, 1, 1)) {
10334             unallocated_encoding(s);
10335             return;
10336         }
10337         is_fp = true;
10338         break;
10339     default:
10340         unallocated_encoding(s);
10341         return;
10342     }
10343
10344     if (is_fp) {
10345         /* low bit of size indicates single/double */
10346         size = extract32(size, 0, 1) ? 3 : 2;
10347         if (size == 2) {
10348             index = h << 1 | l;
10349         } else {
10350             if (l || !is_q) {
10351                 unallocated_encoding(s);
10352                 return;
10353             }
10354             index = h;
10355         }
10356         rm |= (m << 4);
10357     } else {
10358         switch (size) {
10359         case 1:
10360             index = h << 2 | l << 1 | m;
10361             break;
10362         case 2:
10363             index = h << 1 | l;
10364             rm |= (m << 4);
10365             break;
10366         default:
10367             unallocated_encoding(s);
10368             return;
10369         }
10370     }
10371
10372     if (!fp_access_check(s)) {
10373         return;
10374     }
10375
10376     if (is_fp) {
10377         fpst = get_fpstatus_ptr();
10378     } else {
10379         TCGV_UNUSED_PTR(fpst);
10380     }
10381
10382     if (size == 3) {
10383         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10384         int pass;
10385
10386         assert(is_fp && is_q && !is_long);
10387
10388         read_vec_element(s, tcg_idx, rm, index, MO_64);
10389
10390         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10391             TCGv_i64 tcg_op = tcg_temp_new_i64();
10392             TCGv_i64 tcg_res = tcg_temp_new_i64();
10393
10394             read_vec_element(s, tcg_op, rn, pass, MO_64);
10395
10396             switch (opcode) {
10397             case 0x5: /* FMLS */
10398                 /* As usual for ARM, separate negation for fused multiply-add */
10399                 gen_helper_vfp_negd(tcg_op, tcg_op);
10400                 /* fall through */
10401             case 0x1: /* FMLA */
10402                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10403                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10404                 break;
10405             case 0x9: /* FMUL, FMULX */
10406                 if (u) {
10407                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10408                 } else {
10409                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10410                 }
10411                 break;
10412             default:
10413                 g_assert_not_reached();
10414             }
10415
10416             write_vec_element(s, tcg_res, rd, pass, MO_64);
10417             tcg_temp_free_i64(tcg_op);
10418             tcg_temp_free_i64(tcg_res);
10419         }
10420
10421         if (is_scalar) {
10422             clear_vec_high(s, rd);
10423         }
10424
10425         tcg_temp_free_i64(tcg_idx);
10426     } else if (!is_long) {
10427         /* 32 bit floating point, or 16 or 32 bit integer.
10428          * For the 16 bit scalar case we use the usual Neon helpers and
10429          * rely on the fact that 0 op 0 == 0 with no side effects.
10430          */
10431         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10432         int pass, maxpasses;
10433
10434         if (is_scalar) {
10435             maxpasses = 1;
10436         } else {
10437             maxpasses = is_q ? 4 : 2;
10438         }
10439
10440         read_vec_element_i32(s, tcg_idx, rm, index, size);
10441
10442         if (size == 1 && !is_scalar) {
10443             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10444              * the index into both halves of the 32 bit tcg_idx and then use
10445              * the usual Neon helpers.
10446              */
10447             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10448         }
10449
10450         for (pass = 0; pass < maxpasses; pass++) {
10451             TCGv_i32 tcg_op = tcg_temp_new_i32();
10452             TCGv_i32 tcg_res = tcg_temp_new_i32();
10453
10454             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10455
10456             switch (opcode) {
10457             case 0x0: /* MLA */
10458             case 0x4: /* MLS */
10459             case 0x8: /* MUL */
10460             {
10461                 static NeonGenTwoOpFn * const fns[2][2] = {
10462                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10463                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10464                 };
10465                 NeonGenTwoOpFn *genfn;
10466                 bool is_sub = opcode == 0x4;
10467
10468                 if (size == 1) {
10469                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10470                 } else {
10471                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10472                 }
10473                 if (opcode == 0x8) {
10474                     break;
10475                 }
10476                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10477                 genfn = fns[size - 1][is_sub];
10478                 genfn(tcg_res, tcg_op, tcg_res);
10479                 break;
10480             }
10481             case 0x5: /* FMLS */
10482                 /* As usual for ARM, separate negation for fused multiply-add */
10483                 gen_helper_vfp_negs(tcg_op, tcg_op);
10484                 /* fall through */
10485             case 0x1: /* FMLA */
10486                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10487                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10488                 break;
10489             case 0x9: /* FMUL, FMULX */
10490                 if (u) {
10491                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10492                 } else {
10493                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10494                 }
10495                 break;
10496             case 0xc: /* SQDMULH */
10497                 if (size == 1) {
10498                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10499                                                tcg_op, tcg_idx);
10500                 } else {
10501                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10502                                                tcg_op, tcg_idx);
10503                 }
10504                 break;
10505             case 0xd: /* SQRDMULH */
10506                 if (size == 1) {
10507                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10508                                                 tcg_op, tcg_idx);
10509                 } else {
10510                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10511                                                 tcg_op, tcg_idx);
10512                 }
10513                 break;
10514             default:
10515                 g_assert_not_reached();
10516             }
10517
10518             if (is_scalar) {
10519                 write_fp_sreg(s, rd, tcg_res);
10520             } else {
10521                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10522             }
10523
10524             tcg_temp_free_i32(tcg_op);
10525             tcg_temp_free_i32(tcg_res);
10526         }
10527
10528         tcg_temp_free_i32(tcg_idx);
10529
10530         if (!is_q) {
10531             clear_vec_high(s, rd);
10532         }
10533     } else {
10534         /* long ops: 16x16->32 or 32x32->64 */
10535         TCGv_i64 tcg_res[2];
10536         int pass;
10537         bool satop = extract32(opcode, 0, 1);
10538         TCGMemOp memop = MO_32;
10539
10540         if (satop || !u) {
10541             memop |= MO_SIGN;
10542         }
10543
10544         if (size == 2) {
10545             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10546
10547             read_vec_element(s, tcg_idx, rm, index, memop);
10548
10549             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10550                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10551                 TCGv_i64 tcg_passres;
10552                 int passelt;
10553
10554                 if (is_scalar) {
10555                     passelt = 0;
10556                 } else {
10557                     passelt = pass + (is_q * 2);
10558                 }
10559
10560                 read_vec_element(s, tcg_op, rn, passelt, memop);
10561
10562                 tcg_res[pass] = tcg_temp_new_i64();
10563
10564                 if (opcode == 0xa || opcode == 0xb) {
10565                     /* Non-accumulating ops */
10566                     tcg_passres = tcg_res[pass];
10567                 } else {
10568                     tcg_passres = tcg_temp_new_i64();
10569                 }
10570
10571                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10572                 tcg_temp_free_i64(tcg_op);
10573
10574                 if (satop) {
10575                     /* saturating, doubling */
10576                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10577                                                       tcg_passres, tcg_passres);
10578                 }
10579
10580                 if (opcode == 0xa || opcode == 0xb) {
10581                     continue;
10582                 }
10583
10584                 /* Accumulating op: handle accumulate step */
10585                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10586
10587                 switch (opcode) {
10588                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10589                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10590                     break;
10591                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10592                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10593                     break;
10594                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10595                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10596                     /* fall through */
10597                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10598                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10599                                                       tcg_res[pass],
10600                                                       tcg_passres);
10601                     break;
10602                 default:
10603                     g_assert_not_reached();
10604                 }
10605                 tcg_temp_free_i64(tcg_passres);
10606             }
10607             tcg_temp_free_i64(tcg_idx);
10608
10609             if (is_scalar) {
10610                 clear_vec_high(s, rd);
10611             }
10612         } else {
10613             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10614
10615             assert(size == 1);
10616             read_vec_element_i32(s, tcg_idx, rm, index, size);
10617
10618             if (!is_scalar) {
10619                 /* The simplest way to handle the 16x16 indexed ops is to
10620                  * duplicate the index into both halves of the 32 bit tcg_idx
10621                  * and then use the usual Neon helpers.
10622                  */
10623                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10624             }
10625
10626             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10627                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10628                 TCGv_i64 tcg_passres;
10629
10630                 if (is_scalar) {
10631                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10632                 } else {
10633                     read_vec_element_i32(s, tcg_op, rn,
10634                                          pass + (is_q * 2), MO_32);
10635                 }
10636
10637                 tcg_res[pass] = tcg_temp_new_i64();
10638
10639                 if (opcode == 0xa || opcode == 0xb) {
10640                     /* Non-accumulating ops */
10641                     tcg_passres = tcg_res[pass];
10642                 } else {
10643                     tcg_passres = tcg_temp_new_i64();
10644                 }
10645
10646                 if (memop & MO_SIGN) {
10647                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10648                 } else {
10649                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10650                 }
10651                 if (satop) {
10652                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10653                                                       tcg_passres, tcg_passres);
10654                 }
10655                 tcg_temp_free_i32(tcg_op);
10656
10657                 if (opcode == 0xa || opcode == 0xb) {
10658                     continue;
10659                 }
10660
10661                 /* Accumulating op: handle accumulate step */
10662                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10663
10664                 switch (opcode) {
10665                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10666                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10667                                              tcg_passres);
10668                     break;
10669                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10670                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10671                                              tcg_passres);
10672                     break;
10673                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10674                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10675                     /* fall through */
10676                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10677                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10678                                                       tcg_res[pass],
10679                                                       tcg_passres);
10680                     break;
10681                 default:
10682                     g_assert_not_reached();
10683                 }
10684                 tcg_temp_free_i64(tcg_passres);
10685             }
10686             tcg_temp_free_i32(tcg_idx);
10687
10688             if (is_scalar) {
10689                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10690             }
10691         }
10692
10693         if (is_scalar) {
10694             tcg_res[1] = tcg_const_i64(0);
10695         }
10696
10697         for (pass = 0; pass < 2; pass++) {
10698             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10699             tcg_temp_free_i64(tcg_res[pass]);
10700         }
10701     }
10702
10703     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10704         tcg_temp_free_ptr(fpst);
10705     }
10706 }
10707
10708 /* C3.6.19 Crypto AES
10709  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10710  * +-----------------+------+-----------+--------+-----+------+------+
10711  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10712  * +-----------------+------+-----------+--------+-----+------+------+
10713  */
10714 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10715 {
10716     int size = extract32(insn, 22, 2);
10717     int opcode = extract32(insn, 12, 5);
10718     int rn = extract32(insn, 5, 5);
10719     int rd = extract32(insn, 0, 5);
10720     int decrypt;
10721     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10722     CryptoThreeOpEnvFn *genfn;
10723
10724     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10725         || size != 0) {
10726         unallocated_encoding(s);
10727         return;
10728     }
10729
10730     switch (opcode) {
10731     case 0x4: /* AESE */
10732         decrypt = 0;
10733         genfn = gen_helper_crypto_aese;
10734         break;
10735     case 0x6: /* AESMC */
10736         decrypt = 0;
10737         genfn = gen_helper_crypto_aesmc;
10738         break;
10739     case 0x5: /* AESD */
10740         decrypt = 1;
10741         genfn = gen_helper_crypto_aese;
10742         break;
10743     case 0x7: /* AESIMC */
10744         decrypt = 1;
10745         genfn = gen_helper_crypto_aesmc;
10746         break;
10747     default:
10748         unallocated_encoding(s);
10749         return;
10750     }
10751
10752     /* Note that we convert the Vx register indexes into the
10753      * index within the vfp.regs[] array, so we can share the
10754      * helper with the AArch32 instructions.
10755      */
10756     tcg_rd_regno = tcg_const_i32(rd << 1);
10757     tcg_rn_regno = tcg_const_i32(rn << 1);
10758     tcg_decrypt = tcg_const_i32(decrypt);
10759
10760     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10761
10762     tcg_temp_free_i32(tcg_rd_regno);
10763     tcg_temp_free_i32(tcg_rn_regno);
10764     tcg_temp_free_i32(tcg_decrypt);
10765 }
10766
10767 /* C3.6.20 Crypto three-reg SHA
10768  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10769  * +-----------------+------+---+------+---+--------+-----+------+------+
10770  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10771  * +-----------------+------+---+------+---+--------+-----+------+------+
10772  */
10773 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10774 {
10775     int size = extract32(insn, 22, 2);
10776     int opcode = extract32(insn, 12, 3);
10777     int rm = extract32(insn, 16, 5);
10778     int rn = extract32(insn, 5, 5);
10779     int rd = extract32(insn, 0, 5);
10780     CryptoThreeOpEnvFn *genfn;
10781     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10782     int feature = ARM_FEATURE_V8_SHA256;
10783
10784     if (size != 0) {
10785         unallocated_encoding(s);
10786         return;
10787     }
10788
10789     switch (opcode) {
10790     case 0: /* SHA1C */
10791     case 1: /* SHA1P */
10792     case 2: /* SHA1M */
10793     case 3: /* SHA1SU0 */
10794         genfn = NULL;
10795         feature = ARM_FEATURE_V8_SHA1;
10796         break;
10797     case 4: /* SHA256H */
10798         genfn = gen_helper_crypto_sha256h;
10799         break;
10800     case 5: /* SHA256H2 */
10801         genfn = gen_helper_crypto_sha256h2;
10802         break;
10803     case 6: /* SHA256SU1 */
10804         genfn = gen_helper_crypto_sha256su1;
10805         break;
10806     default:
10807         unallocated_encoding(s);
10808         return;
10809     }
10810
10811     if (!arm_dc_feature(s, feature)) {
10812         unallocated_encoding(s);
10813         return;
10814     }
10815
10816     tcg_rd_regno = tcg_const_i32(rd << 1);
10817     tcg_rn_regno = tcg_const_i32(rn << 1);
10818     tcg_rm_regno = tcg_const_i32(rm << 1);
10819
10820     if (genfn) {
10821         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10822     } else {
10823         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10824
10825         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10826                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10827         tcg_temp_free_i32(tcg_opcode);
10828     }
10829
10830     tcg_temp_free_i32(tcg_rd_regno);
10831     tcg_temp_free_i32(tcg_rn_regno);
10832     tcg_temp_free_i32(tcg_rm_regno);
10833 }
10834
10835 /* C3.6.21 Crypto two-reg SHA
10836  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10837  * +-----------------+------+-----------+--------+-----+------+------+
10838  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10839  * +-----------------+------+-----------+--------+-----+------+------+
10840  */
10841 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10842 {
10843     int size = extract32(insn, 22, 2);
10844     int opcode = extract32(insn, 12, 5);
10845     int rn = extract32(insn, 5, 5);
10846     int rd = extract32(insn, 0, 5);
10847     CryptoTwoOpEnvFn *genfn;
10848     int feature;
10849     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10850
10851     if (size != 0) {
10852         unallocated_encoding(s);
10853         return;
10854     }
10855
10856     switch (opcode) {
10857     case 0: /* SHA1H */
10858         feature = ARM_FEATURE_V8_SHA1;
10859         genfn = gen_helper_crypto_sha1h;
10860         break;
10861     case 1: /* SHA1SU1 */
10862         feature = ARM_FEATURE_V8_SHA1;
10863         genfn = gen_helper_crypto_sha1su1;
10864         break;
10865     case 2: /* SHA256SU0 */
10866         feature = ARM_FEATURE_V8_SHA256;
10867         genfn = gen_helper_crypto_sha256su0;
10868         break;
10869     default:
10870         unallocated_encoding(s);
10871         return;
10872     }
10873
10874     if (!arm_dc_feature(s, feature)) {
10875         unallocated_encoding(s);
10876         return;
10877     }
10878
10879     tcg_rd_regno = tcg_const_i32(rd << 1);
10880     tcg_rn_regno = tcg_const_i32(rn << 1);
10881
10882     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10883
10884     tcg_temp_free_i32(tcg_rd_regno);
10885     tcg_temp_free_i32(tcg_rn_regno);
10886 }
10887
10888 /* C3.6 Data processing - SIMD, inc Crypto
10889  *
10890  * As the decode gets a little complex we are using a table based
10891  * approach for this part of the decode.
10892  */
10893 static const AArch64DecodeTable data_proc_simd[] = {
10894     /* pattern  ,  mask     ,  fn                        */
10895     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10896     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10897     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10898     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10899     { 0x0e000400, 0x9fe08400, disas_simd_copy },
10900     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10901     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10902     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10903     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10904     { 0x0e000000, 0xbf208c00, disas_simd_tb },
10905     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10906     { 0x2e000000, 0xbf208400, disas_simd_ext },
10907     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10908     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10909     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10910     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10911     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10912     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10913     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10914     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10915     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10916     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10917     { 0x00000000, 0x00000000, NULL }
10918 };
10919
10920 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10921 {
10922     /* Note that this is called with all non-FP cases from
10923      * table C3-6 so it must UNDEF for entries not specifically
10924      * allocated to instructions in that table.
10925      */
10926     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10927     if (fn) {
10928         fn(s, insn);
10929     } else {
10930         unallocated_encoding(s);
10931     }
10932 }
10933
10934 /* C3.6 Data processing - SIMD and floating point */
10935 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10936 {
10937     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10938         disas_data_proc_fp(s, insn);
10939     } else {
10940         /* SIMD, including crypto */
10941         disas_data_proc_simd(s, insn);
10942     }
10943 }
10944
10945 /* C3.1 A64 instruction index by encoding */
10946 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10947 {
10948     uint32_t insn;
10949
10950     insn = arm_ldl_code(env, s->pc, s->bswap_code);
10951     s->insn = insn;
10952     s->pc += 4;
10953
10954     s->fp_access_checked = false;
10955
10956     switch (extract32(insn, 25, 4)) {
10957     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10958         unallocated_encoding(s);
10959         break;
10960     case 0x8: case 0x9: /* Data processing - immediate */
10961         disas_data_proc_imm(s, insn);
10962         break;
10963     case 0xa: case 0xb: /* Branch, exception generation and system insns */
10964         disas_b_exc_sys(s, insn);
10965         break;
10966     case 0x4:
10967     case 0x6:
10968     case 0xc:
10969     case 0xe:      /* Loads and stores */
10970         disas_ldst(s, insn);
10971         break;
10972     case 0x5:
10973     case 0xd:      /* Data processing - register */
10974         disas_data_proc_reg(s, insn);
10975         break;
10976     case 0x7:
10977     case 0xf:      /* Data processing - SIMD and floating point */
10978         disas_data_proc_simd_fp(s, insn);
10979         break;
10980     default:
10981         assert(FALSE); /* all 15 cases should be handled above */
10982         break;
10983     }
10984
10985     /* if we allocated any temporaries, free them here */
10986     free_tmp_a64(s);
10987 }
10988
10989 void gen_intermediate_code_internal_a64(ARMCPU *cpu,
10990                                         TranslationBlock *tb,
10991                                         bool search_pc)
10992 {
10993     CPUState *cs = CPU(cpu);
10994     CPUARMState *env = &cpu->env;
10995     DisasContext dc1, *dc = &dc1;
10996     CPUBreakpoint *bp;
10997     int j, lj;
10998     target_ulong pc_start;
10999     target_ulong next_page_start;
11000     int num_insns;
11001     int max_insns;
11002
11003     pc_start = tb->pc;
11004
11005     dc->tb = tb;
11006
11007     dc->is_jmp = DISAS_NEXT;
11008     dc->pc = pc_start;
11009     dc->singlestep_enabled = cs->singlestep_enabled;
11010     dc->condjmp = 0;
11011
11012     dc->aarch64 = 1;
11013     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11014      * there is no secure EL1, so we route exceptions to EL3.
11015      */
11016     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11017                                !arm_el_is_aa64(env, 3);
11018     dc->thumb = 0;
11019     dc->bswap_code = 0;
11020     dc->condexec_mask = 0;
11021     dc->condexec_cond = 0;
11022     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11023     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11024 #if !defined(CONFIG_USER_ONLY)
11025     dc->user = (dc->current_el == 0);
11026 #endif
11027     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11028     dc->vec_len = 0;
11029     dc->vec_stride = 0;
11030     dc->cp_regs = cpu->cp_regs;
11031     dc->features = env->features;
11032
11033     /* Single step state. The code-generation logic here is:
11034      *  SS_ACTIVE == 0:
11035      *   generate code with no special handling for single-stepping (except
11036      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11037      *   this happens anyway because those changes are all system register or
11038      *   PSTATE writes).
11039      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11040      *   emit code for one insn
11041      *   emit code to clear PSTATE.SS
11042      *   emit code to generate software step exception for completed step
11043      *   end TB (as usual for having generated an exception)
11044      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11045      *   emit code to generate a software step exception
11046      *   end the TB
11047      */
11048     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11049     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11050     dc->is_ldex = false;
11051     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11052
11053     init_tmp_a64_array(dc);
11054
11055     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11056     lj = -1;
11057     num_insns = 0;
11058     max_insns = tb->cflags & CF_COUNT_MASK;
11059     if (max_insns == 0) {
11060         max_insns = CF_COUNT_MASK;
11061     }
11062
11063     gen_tb_start(tb);
11064
11065     tcg_clear_temp_count();
11066
11067     do {
11068         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11069             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11070                 if (bp->pc == dc->pc) {
11071                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11072                     /* Advance PC so that clearing the breakpoint will
11073                        invalidate this TB.  */
11074                     dc->pc += 2;
11075                     goto done_generating;
11076                 }
11077             }
11078         }
11079
11080         if (search_pc) {
11081             j = tcg_op_buf_count();
11082             if (lj < j) {
11083                 lj++;
11084                 while (lj < j) {
11085                     tcg_ctx.gen_opc_instr_start[lj++] = 0;
11086                 }
11087             }
11088             tcg_ctx.gen_opc_pc[lj] = dc->pc;
11089             tcg_ctx.gen_opc_instr_start[lj] = 1;
11090             tcg_ctx.gen_opc_icount[lj] = num_insns;
11091         }
11092
11093         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
11094             gen_io_start();
11095         }
11096
11097         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
11098             tcg_gen_debug_insn_start(dc->pc);
11099         }
11100
11101         if (dc->ss_active && !dc->pstate_ss) {
11102             /* Singlestep state is Active-pending.
11103              * If we're in this state at the start of a TB then either
11104              *  a) we just took an exception to an EL which is being debugged
11105              *     and this is the first insn in the exception handler
11106              *  b) debug exceptions were masked and we just unmasked them
11107              *     without changing EL (eg by clearing PSTATE.D)
11108              * In either case we're going to take a swstep exception in the
11109              * "did not step an insn" case, and so the syndrome ISV and EX
11110              * bits should be zero.
11111              */
11112             assert(num_insns == 0);
11113             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11114                           default_exception_el(dc));
11115             dc->is_jmp = DISAS_EXC;
11116             break;
11117         }
11118
11119         disas_a64_insn(env, dc);
11120
11121         if (tcg_check_temp_count()) {
11122             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11123                     dc->pc);
11124         }
11125
11126         /* Translation stops when a conditional branch is encountered.
11127          * Otherwise the subsequent code could get translated several times.
11128          * Also stop translation when a page boundary is reached.  This
11129          * ensures prefetch aborts occur at the right place.
11130          */
11131         num_insns++;
11132     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11133              !cs->singlestep_enabled &&
11134              !singlestep &&
11135              !dc->ss_active &&
11136              dc->pc < next_page_start &&
11137              num_insns < max_insns);
11138
11139     if (tb->cflags & CF_LAST_IO) {
11140         gen_io_end();
11141     }
11142
11143     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11144         && dc->is_jmp != DISAS_EXC) {
11145         /* Note that this means single stepping WFI doesn't halt the CPU.
11146          * For conditional branch insns this is harmless unreachable code as
11147          * gen_goto_tb() has already handled emitting the debug exception
11148          * (and thus a tb-jump is not possible when singlestepping).
11149          */
11150         assert(dc->is_jmp != DISAS_TB_JUMP);
11151         if (dc->is_jmp != DISAS_JUMP) {
11152             gen_a64_set_pc_im(dc->pc);
11153         }
11154         if (cs->singlestep_enabled) {
11155             gen_exception_internal(EXCP_DEBUG);
11156         } else {
11157             gen_step_complete_exception(dc);
11158         }
11159     } else {
11160         switch (dc->is_jmp) {
11161         case DISAS_NEXT:
11162             gen_goto_tb(dc, 1, dc->pc);
11163             break;
11164         default:
11165         case DISAS_UPDATE:
11166             gen_a64_set_pc_im(dc->pc);
11167             /* fall through */
11168         case DISAS_JUMP:
11169             /* indicate that the hash table must be used to find the next TB */
11170             tcg_gen_exit_tb(0);
11171             break;
11172         case DISAS_TB_JUMP:
11173         case DISAS_EXC:
11174         case DISAS_SWI:
11175             break;
11176         case DISAS_WFE:
11177             gen_a64_set_pc_im(dc->pc);
11178             gen_helper_wfe(cpu_env);
11179             break;
11180         case DISAS_YIELD:
11181             gen_a64_set_pc_im(dc->pc);
11182             gen_helper_yield(cpu_env);
11183             break;
11184         case DISAS_WFI:
11185             /* This is a special case because we don't want to just halt the CPU
11186              * if trying to debug across a WFI.
11187              */
11188             gen_a64_set_pc_im(dc->pc);
11189             gen_helper_wfi(cpu_env);
11190             /* The helper doesn't necessarily throw an exception, but we
11191              * must go back to the main loop to check for interrupts anyway.
11192              */
11193             tcg_gen_exit_tb(0);
11194             break;
11195         }
11196     }
11197
11198 done_generating:
11199     gen_tb_end(tb, num_insns);
11200
11201 #ifdef DEBUG_DISAS
11202     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11203         qemu_log("----------------\n");
11204         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11205         log_target_disas(cs, pc_start, dc->pc - pc_start,
11206                          4 | (dc->bswap_code << 1));
11207         qemu_log("\n");
11208     }
11209 #endif
11210     if (search_pc) {
11211         j = tcg_op_buf_count();
11212         lj++;
11213         while (lj <= j) {
11214             tcg_ctx.gen_opc_instr_start[lj++] = 0;
11215         }
11216     } else {
11217         tb->size = dc->pc - pc_start;
11218         tb->icount = num_insns;
11219     }
11220 }