target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "tcg-op.h"
  23 #include "qemu/log.h"
  24 #include "arm_ldst.h"
  25 #include "translate.h"
  26 #include "internals.h"
  27 #include "qemu/host-utils.h"
  28
  29 #include "exec/semihost.h"
  30 #include "exec/gen-icount.h"
  31
  32 #include "exec/helper-proto.h"
  33 #include "exec/helper-gen.h"
  34
  35 #include "trace-tcg.h"
  36
  37 static TCGv_i64 cpu_X[32];
  38 static TCGv_i64 cpu_pc;
  39
  40 /* Load/store exclusive handling */
  41 static TCGv_i64 cpu_exclusive_high;
  42
  43 static const char *regnames[] = {
  44     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  45     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  46     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  47     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  48 };
  49
  50 enum a64_shift_type {
  51     A64_SHIFT_TYPE_LSL = 0,
  52     A64_SHIFT_TYPE_LSR = 1,
  53     A64_SHIFT_TYPE_ASR = 2,
  54     A64_SHIFT_TYPE_ROR = 3
  55 };
  56
  57 /* Table based decoder typedefs - used when the relevant bits for decode
  58  * are too awkwardly scattered across the instruction (eg SIMD).
  59  */
  60 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  61
  62 typedef struct AArch64DecodeTable {
  63     uint32_t pattern;
  64     uint32_t mask;
  65     AArch64DecodeFn *disas_fn;
  66 } AArch64DecodeTable;
  67
  68 /* Function prototype for gen_ functions for calling Neon helpers */
  69 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  70 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  71 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  72 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  73 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  74 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  75 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  76 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  77 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  78 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  79 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  80 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  81 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  82
  83 /* initialize TCG globals.  */
  84 void a64_translate_init(void)
  85 {
  86     int i;
  87
  88     cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
  89                                     offsetof(CPUARMState, pc),
  90                                     "pc");
  91     for (i = 0; i < 32; i++) {
  92         cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
  93                                           offsetof(CPUARMState, xregs[i]),
  94                                           regnames[i]);
  95     }
  96
  97     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
  98         offsetof(CPUARMState, exclusive_high), "exclusive_high");
  99 }
 100
 101 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 102 {
 103     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 104      *  if EL1, access as if EL0; otherwise access at current EL
 105      */
 106     switch (s->mmu_idx) {
 107     case ARMMMUIdx_S12NSE1:
 108         return ARMMMUIdx_S12NSE0;
 109     case ARMMMUIdx_S1SE1:
 110         return ARMMMUIdx_S1SE0;
 111     case ARMMMUIdx_S2NS:
 112         g_assert_not_reached();
 113     default:
 114         return s->mmu_idx;
 115     }
 116 }
 117
 118 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 119                             fprintf_function cpu_fprintf, int flags)
 120 {
 121     ARMCPU *cpu = ARM_CPU(cs);
 122     CPUARMState *env = &cpu->env;
 123     uint32_t psr = pstate_read(env);
 124     int i;
 125     int el = arm_current_el(env);
 126     const char *ns_status;
 127
 128     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 129             env->pc, env->xregs[31]);
 130     for (i = 0; i < 31; i++) {
 131         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 132         if ((i % 4) == 3) {
 133             cpu_fprintf(f, "\n");
 134         } else {
 135             cpu_fprintf(f, " ");
 136         }
 137     }
 138
 139     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 140         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 141     } else {
 142         ns_status = "";
 143     }
 144
 145     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 146                 psr,
 147                 psr & PSTATE_N ? 'N' : '-',
 148                 psr & PSTATE_Z ? 'Z' : '-',
 149                 psr & PSTATE_C ? 'C' : '-',
 150                 psr & PSTATE_V ? 'V' : '-',
 151                 ns_status,
 152                 el,
 153                 psr & PSTATE_SP ? 'h' : 't');
 154
 155     if (flags & CPU_DUMP_FPU) {
 156         int numvfpregs = 32;
 157         for (i = 0; i < numvfpregs; i += 2) {
 158             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 159             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 160             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 161                         i, vhi, vlo);
 162             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 163             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 164             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 165                         i + 1, vhi, vlo);
 166         }
 167         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 168                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 169     }
 170 }
 171
 172 void gen_a64_set_pc_im(uint64_t val)
 173 {
 174     tcg_gen_movi_i64(cpu_pc, val);
 175 }
 176
 177 typedef struct DisasCompare64 {
 178     TCGCond cond;
 179     TCGv_i64 value;
 180 } DisasCompare64;
 181
 182 static void a64_test_cc(DisasCompare64 *c64, int cc)
 183 {
 184     DisasCompare c32;
 185
 186     arm_test_cc(&c32, cc);
 187
 188     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 189        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 190     c64->cond = c32.cond;
 191     c64->value = tcg_temp_new_i64();
 192     tcg_gen_ext_i32_i64(c64->value, c32.value);
 193
 194     arm_free_cc(&c32);
 195 }
 196
 197 static void a64_free_cc(DisasCompare64 *c64)
 198 {
 199     tcg_temp_free_i64(c64->value);
 200 }
 201
 202 static void gen_exception_internal(int excp)
 203 {
 204     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 205
 206     assert(excp_is_internal(excp));
 207     gen_helper_exception_internal(cpu_env, tcg_excp);
 208     tcg_temp_free_i32(tcg_excp);
 209 }
 210
 211 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 212 {
 213     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 214     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 215     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 216
 217     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 218                                        tcg_syn, tcg_el);
 219     tcg_temp_free_i32(tcg_el);
 220     tcg_temp_free_i32(tcg_syn);
 221     tcg_temp_free_i32(tcg_excp);
 222 }
 223
 224 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 225 {
 226     gen_a64_set_pc_im(s->pc - offset);
 227     gen_exception_internal(excp);
 228     s->is_jmp = DISAS_EXC;
 229 }
 230
 231 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 232                                uint32_t syndrome, uint32_t target_el)
 233 {
 234     gen_a64_set_pc_im(s->pc - offset);
 235     gen_exception(excp, syndrome, target_el);
 236     s->is_jmp = DISAS_EXC;
 237 }
 238
 239 static void gen_ss_advance(DisasContext *s)
 240 {
 241     /* If the singlestep state is Active-not-pending, advance to
 242      * Active-pending.
 243      */
 244     if (s->ss_active) {
 245         s->pstate_ss = 0;
 246         gen_helper_clear_pstate_ss(cpu_env);
 247     }
 248 }
 249
 250 static void gen_step_complete_exception(DisasContext *s)
 251 {
 252     /* We just completed step of an insn. Move from Active-not-pending
 253      * to Active-pending, and then also take the swstep exception.
 254      * This corresponds to making the (IMPDEF) choice to prioritize
 255      * swstep exceptions over asynchronous exceptions taken to an exception
 256      * level where debug is disabled. This choice has the advantage that
 257      * we do not need to maintain internal state corresponding to the
 258      * ISV/EX syndrome bits between completion of the step and generation
 259      * of the exception, and our syndrome information is always correct.
 260      */
 261     gen_ss_advance(s);
 262     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 263                   default_exception_el(s));
 264     s->is_jmp = DISAS_EXC;
 265 }
 266
 267 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 268 {
 269     /* No direct tb linking with singlestep (either QEMU's or the ARM
 270      * debug architecture kind) or deterministic io
 271      */
 272     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 273         return false;
 274     }
 275
 276     /* Only link tbs from inside the same guest page */
 277     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 278         return false;
 279     }
 280
 281     return true;
 282 }
 283
 284 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 285 {
 286     TranslationBlock *tb;
 287
 288     tb = s->tb;
 289     if (use_goto_tb(s, n, dest)) {
 290         tcg_gen_goto_tb(n);
 291         gen_a64_set_pc_im(dest);
 292         tcg_gen_exit_tb((intptr_t)tb + n);
 293         s->is_jmp = DISAS_TB_JUMP;
 294     } else {
 295         gen_a64_set_pc_im(dest);
 296         if (s->ss_active) {
 297             gen_step_complete_exception(s);
 298         } else if (s->singlestep_enabled) {
 299             gen_exception_internal(EXCP_DEBUG);
 300         } else {
 301             tcg_gen_exit_tb(0);
 302             s->is_jmp = DISAS_TB_JUMP;
 303         }
 304     }
 305 }
 306
 307 static void unallocated_encoding(DisasContext *s)
 308 {
 309     /* Unallocated and reserved encodings are uncategorized */
 310     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 311                        default_exception_el(s));
 312 }
 313
 314 #define unsupported_encoding(s, insn)                                    \
 315     do {                                                                 \
 316         qemu_log_mask(LOG_UNIMP,                                         \
 317                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 318                       "at pc=%016" PRIx64 "\n",                          \
 319                       __FILE__, __LINE__, insn, s->pc - 4);              \
 320         unallocated_encoding(s);                                         \
 321     } while (0);
 322
 323 static void init_tmp_a64_array(DisasContext *s)
 324 {
 325 #ifdef CONFIG_DEBUG_TCG
 326     int i;
 327     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 328         TCGV_UNUSED_I64(s->tmp_a64[i]);
 329     }
 330 #endif
 331     s->tmp_a64_count = 0;
 332 }
 333
 334 static void free_tmp_a64(DisasContext *s)
 335 {
 336     int i;
 337     for (i = 0; i < s->tmp_a64_count; i++) {
 338         tcg_temp_free_i64(s->tmp_a64[i]);
 339     }
 340     init_tmp_a64_array(s);
 341 }
 342
 343 static TCGv_i64 new_tmp_a64(DisasContext *s)
 344 {
 345     assert(s->tmp_a64_count < TMP_A64_MAX);
 346     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 347 }
 348
 349 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 350 {
 351     TCGv_i64 t = new_tmp_a64(s);
 352     tcg_gen_movi_i64(t, 0);
 353     return t;
 354 }
 355
 356 /*
 357  * Register access functions
 358  *
 359  * These functions are used for directly accessing a register in where
 360  * changes to the final register value are likely to be made. If you
 361  * need to use a register for temporary calculation (e.g. index type
 362  * operations) use the read_* form.
 363  *
 364  * B1.2.1 Register mappings
 365  *
 366  * In instruction register encoding 31 can refer to ZR (zero register) or
 367  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 368  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 369  * This is the point of the _sp forms.
 370  */
 371 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 372 {
 373     if (reg == 31) {
 374         return new_tmp_a64_zero(s);
 375     } else {
 376         return cpu_X[reg];
 377     }
 378 }
 379
 380 /* register access for when 31 == SP */
 381 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 382 {
 383     return cpu_X[reg];
 384 }
 385
 386 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 387  * representing the register contents. This TCGv is an auto-freed
 388  * temporary so it need not be explicitly freed, and may be modified.
 389  */
 390 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 391 {
 392     TCGv_i64 v = new_tmp_a64(s);
 393     if (reg != 31) {
 394         if (sf) {
 395             tcg_gen_mov_i64(v, cpu_X[reg]);
 396         } else {
 397             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 398         }
 399     } else {
 400         tcg_gen_movi_i64(v, 0);
 401     }
 402     return v;
 403 }
 404
 405 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 406 {
 407     TCGv_i64 v = new_tmp_a64(s);
 408     if (sf) {
 409         tcg_gen_mov_i64(v, cpu_X[reg]);
 410     } else {
 411         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 412     }
 413     return v;
 414 }
 415
 416 /* We should have at some point before trying to access an FP register
 417  * done the necessary access check, so assert that
 418  * (a) we did the check and
 419  * (b) we didn't then just plough ahead anyway if it failed.
 420  * Print the instruction pattern in the abort message so we can figure
 421  * out what we need to fix if a user encounters this problem in the wild.
 422  */
 423 static inline void assert_fp_access_checked(DisasContext *s)
 424 {
 425 #ifdef CONFIG_DEBUG_TCG
 426     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 427         fprintf(stderr, "target-arm: FP access check missing for "
 428                 "instruction 0x%08x\n", s->insn);
 429         abort();
 430     }
 431 #endif
 432 }
 433
 434 /* Return the offset into CPUARMState of an element of specified
 435  * size, 'element' places in from the least significant end of
 436  * the FP/vector register Qn.
 437  */
 438 static inline int vec_reg_offset(DisasContext *s, int regno,
 439                                  int element, TCGMemOp size)
 440 {
 441     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 442 #ifdef HOST_WORDS_BIGENDIAN
 443     /* This is complicated slightly because vfp.regs[2n] is
 444      * still the low half and  vfp.regs[2n+1] the high half
 445      * of the 128 bit vector, even on big endian systems.
 446      * Calculate the offset assuming a fully bigendian 128 bits,
 447      * then XOR to account for the order of the two 64 bit halves.
 448      */
 449     offs += (16 - ((element + 1) * (1 << size)));
 450     offs ^= 8;
 451 #else
 452     offs += element * (1 << size);
 453 #endif
 454     assert_fp_access_checked(s);
 455     return offs;
 456 }
 457
 458 /* Return the offset into CPUARMState of a slice (from
 459  * the least significant end) of FP register Qn (ie
 460  * Dn, Sn, Hn or Bn).
 461  * (Note that this is not the same mapping as for A32; see cpu.h)
 462  */
 463 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 464 {
 465     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 466 #ifdef HOST_WORDS_BIGENDIAN
 467     offs += (8 - (1 << size));
 468 #endif
 469     assert_fp_access_checked(s);
 470     return offs;
 471 }
 472
 473 /* Offset of the high half of the 128 bit vector Qn */
 474 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 475 {
 476     assert_fp_access_checked(s);
 477     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 478 }
 479
 480 /* Convenience accessors for reading and writing single and double
 481  * FP registers. Writing clears the upper parts of the associated
 482  * 128 bit vector register, as required by the architecture.
 483  * Note that unlike the GP register accessors, the values returned
 484  * by the read functions must be manually freed.
 485  */
 486 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 487 {
 488     TCGv_i64 v = tcg_temp_new_i64();
 489
 490     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 491     return v;
 492 }
 493
 494 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 495 {
 496     TCGv_i32 v = tcg_temp_new_i32();
 497
 498     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 499     return v;
 500 }
 501
 502 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 503 {
 504     TCGv_i64 tcg_zero = tcg_const_i64(0);
 505
 506     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 507     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 508     tcg_temp_free_i64(tcg_zero);
 509 }
 510
 511 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 512 {
 513     TCGv_i64 tmp = tcg_temp_new_i64();
 514
 515     tcg_gen_extu_i32_i64(tmp, v);
 516     write_fp_dreg(s, reg, tmp);
 517     tcg_temp_free_i64(tmp);
 518 }
 519
 520 static TCGv_ptr get_fpstatus_ptr(void)
 521 {
 522     TCGv_ptr statusptr = tcg_temp_new_ptr();
 523     int offset;
 524
 525     /* In A64 all instructions (both FP and Neon) use the FPCR;
 526      * there is no equivalent of the A32 Neon "standard FPSCR value"
 527      * and all operations use vfp.fp_status.
 528      */
 529     offset = offsetof(CPUARMState, vfp.fp_status);
 530     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 531     return statusptr;
 532 }
 533
 534 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 535  * than the 32 bit equivalent.
 536  */
 537 static inline void gen_set_NZ64(TCGv_i64 result)
 538 {
 539     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 540     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 541 }
 542
 543 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 544 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 545 {
 546     if (sf) {
 547         gen_set_NZ64(result);
 548     } else {
 549         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 550         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 551     }
 552     tcg_gen_movi_i32(cpu_CF, 0);
 553     tcg_gen_movi_i32(cpu_VF, 0);
 554 }
 555
 556 /* dest = T0 + T1; compute C, N, V and Z flags */
 557 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 558 {
 559     if (sf) {
 560         TCGv_i64 result, flag, tmp;
 561         result = tcg_temp_new_i64();
 562         flag = tcg_temp_new_i64();
 563         tmp = tcg_temp_new_i64();
 564
 565         tcg_gen_movi_i64(tmp, 0);
 566         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 567
 568         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 569
 570         gen_set_NZ64(result);
 571
 572         tcg_gen_xor_i64(flag, result, t0);
 573         tcg_gen_xor_i64(tmp, t0, t1);
 574         tcg_gen_andc_i64(flag, flag, tmp);
 575         tcg_temp_free_i64(tmp);
 576         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 577
 578         tcg_gen_mov_i64(dest, result);
 579         tcg_temp_free_i64(result);
 580         tcg_temp_free_i64(flag);
 581     } else {
 582         /* 32 bit arithmetic */
 583         TCGv_i32 t0_32 = tcg_temp_new_i32();
 584         TCGv_i32 t1_32 = tcg_temp_new_i32();
 585         TCGv_i32 tmp = tcg_temp_new_i32();
 586
 587         tcg_gen_movi_i32(tmp, 0);
 588         tcg_gen_extrl_i64_i32(t0_32, t0);
 589         tcg_gen_extrl_i64_i32(t1_32, t1);
 590         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 591         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 592         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 593         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 594         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 595         tcg_gen_extu_i32_i64(dest, cpu_NF);
 596
 597         tcg_temp_free_i32(tmp);
 598         tcg_temp_free_i32(t0_32);
 599         tcg_temp_free_i32(t1_32);
 600     }
 601 }
 602
 603 /* dest = T0 - T1; compute C, N, V and Z flags */
 604 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 605 {
 606     if (sf) {
 607         /* 64 bit arithmetic */
 608         TCGv_i64 result, flag, tmp;
 609
 610         result = tcg_temp_new_i64();
 611         flag = tcg_temp_new_i64();
 612         tcg_gen_sub_i64(result, t0, t1);
 613
 614         gen_set_NZ64(result);
 615
 616         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 617         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 618
 619         tcg_gen_xor_i64(flag, result, t0);
 620         tmp = tcg_temp_new_i64();
 621         tcg_gen_xor_i64(tmp, t0, t1);
 622         tcg_gen_and_i64(flag, flag, tmp);
 623         tcg_temp_free_i64(tmp);
 624         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 625         tcg_gen_mov_i64(dest, result);
 626         tcg_temp_free_i64(flag);
 627         tcg_temp_free_i64(result);
 628     } else {
 629         /* 32 bit arithmetic */
 630         TCGv_i32 t0_32 = tcg_temp_new_i32();
 631         TCGv_i32 t1_32 = tcg_temp_new_i32();
 632         TCGv_i32 tmp;
 633
 634         tcg_gen_extrl_i64_i32(t0_32, t0);
 635         tcg_gen_extrl_i64_i32(t1_32, t1);
 636         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 637         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 638         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 639         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 640         tmp = tcg_temp_new_i32();
 641         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 642         tcg_temp_free_i32(t0_32);
 643         tcg_temp_free_i32(t1_32);
 644         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 645         tcg_temp_free_i32(tmp);
 646         tcg_gen_extu_i32_i64(dest, cpu_NF);
 647     }
 648 }
 649
 650 /* dest = T0 + T1 + CF; do not compute flags. */
 651 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 652 {
 653     TCGv_i64 flag = tcg_temp_new_i64();
 654     tcg_gen_extu_i32_i64(flag, cpu_CF);
 655     tcg_gen_add_i64(dest, t0, t1);
 656     tcg_gen_add_i64(dest, dest, flag);
 657     tcg_temp_free_i64(flag);
 658
 659     if (!sf) {
 660         tcg_gen_ext32u_i64(dest, dest);
 661     }
 662 }
 663
 664 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 665 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 666 {
 667     if (sf) {
 668         TCGv_i64 result, cf_64, vf_64, tmp;
 669         result = tcg_temp_new_i64();
 670         cf_64 = tcg_temp_new_i64();
 671         vf_64 = tcg_temp_new_i64();
 672         tmp = tcg_const_i64(0);
 673
 674         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 675         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 676         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 677         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 678         gen_set_NZ64(result);
 679
 680         tcg_gen_xor_i64(vf_64, result, t0);
 681         tcg_gen_xor_i64(tmp, t0, t1);
 682         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 683         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 684
 685         tcg_gen_mov_i64(dest, result);
 686
 687         tcg_temp_free_i64(tmp);
 688         tcg_temp_free_i64(vf_64);
 689         tcg_temp_free_i64(cf_64);
 690         tcg_temp_free_i64(result);
 691     } else {
 692         TCGv_i32 t0_32, t1_32, tmp;
 693         t0_32 = tcg_temp_new_i32();
 694         t1_32 = tcg_temp_new_i32();
 695         tmp = tcg_const_i32(0);
 696
 697         tcg_gen_extrl_i64_i32(t0_32, t0);
 698         tcg_gen_extrl_i64_i32(t1_32, t1);
 699         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 700         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 701
 702         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 703         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 704         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 705         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 706         tcg_gen_extu_i32_i64(dest, cpu_NF);
 707
 708         tcg_temp_free_i32(tmp);
 709         tcg_temp_free_i32(t1_32);
 710         tcg_temp_free_i32(t0_32);
 711     }
 712 }
 713
 714 /*
 715  * Load/Store generators
 716  */
 717
 718 /*
 719  * Store from GPR register to memory.
 720  */
 721 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 722                              TCGv_i64 tcg_addr, int size, int memidx)
 723 {
 724     g_assert(size <= 3);
 725     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
 726 }
 727
 728 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 729                       TCGv_i64 tcg_addr, int size)
 730 {
 731     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 732 }
 733
 734 /*
 735  * Load from memory to GPR register
 736  */
 737 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 738                              int size, bool is_signed, bool extend, int memidx)
 739 {
 740     TCGMemOp memop = MO_TE + size;
 741
 742     g_assert(size <= 3);
 743
 744     if (is_signed) {
 745         memop += MO_SIGN;
 746     }
 747
 748     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 749
 750     if (extend && is_signed) {
 751         g_assert(size < 3);
 752         tcg_gen_ext32u_i64(dest, dest);
 753     }
 754 }
 755
 756 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 757                       int size, bool is_signed, bool extend)
 758 {
 759     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 760                      get_mem_index(s));
 761 }
 762
 763 /*
 764  * Store from FP register to memory
 765  */
 766 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 767 {
 768     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 769     TCGv_i64 tmp = tcg_temp_new_i64();
 770     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 771     if (size < 4) {
 772         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
 773     } else {
 774         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 775         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
 776         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 777         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 778         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 779         tcg_temp_free_i64(tcg_hiaddr);
 780     }
 781
 782     tcg_temp_free_i64(tmp);
 783 }
 784
 785 /*
 786  * Load from memory to FP register
 787  */
 788 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 789 {
 790     /* This always zero-extends and writes to a full 128 bit wide vector */
 791     TCGv_i64 tmplo = tcg_temp_new_i64();
 792     TCGv_i64 tmphi;
 793
 794     if (size < 4) {
 795         TCGMemOp memop = MO_TE + size;
 796         tmphi = tcg_const_i64(0);
 797         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 798     } else {
 799         TCGv_i64 tcg_hiaddr;
 800         tmphi = tcg_temp_new_i64();
 801         tcg_hiaddr = tcg_temp_new_i64();
 802
 803         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
 804         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 805         tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 806         tcg_temp_free_i64(tcg_hiaddr);
 807     }
 808
 809     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 810     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 811
 812     tcg_temp_free_i64(tmplo);
 813     tcg_temp_free_i64(tmphi);
 814 }
 815
 816 /*
 817  * Vector load/store helpers.
 818  *
 819  * The principal difference between this and a FP load is that we don't
 820  * zero extend as we are filling a partial chunk of the vector register.
 821  * These functions don't support 128 bit loads/stores, which would be
 822  * normal load/store operations.
 823  *
 824  * The _i32 versions are useful when operating on 32 bit quantities
 825  * (eg for floating point single or using Neon helper functions).
 826  */
 827
 828 /* Get value of an element within a vector register */
 829 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 830                              int element, TCGMemOp memop)
 831 {
 832     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 833     switch (memop) {
 834     case MO_8:
 835         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 836         break;
 837     case MO_16:
 838         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 839         break;
 840     case MO_32:
 841         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 842         break;
 843     case MO_8|MO_SIGN:
 844         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 845         break;
 846     case MO_16|MO_SIGN:
 847         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 848         break;
 849     case MO_32|MO_SIGN:
 850         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 851         break;
 852     case MO_64:
 853     case MO_64|MO_SIGN:
 854         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 855         break;
 856     default:
 857         g_assert_not_reached();
 858     }
 859 }
 860
 861 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 862                                  int element, TCGMemOp memop)
 863 {
 864     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 865     switch (memop) {
 866     case MO_8:
 867         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 868         break;
 869     case MO_16:
 870         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 871         break;
 872     case MO_8|MO_SIGN:
 873         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 874         break;
 875     case MO_16|MO_SIGN:
 876         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 877         break;
 878     case MO_32:
 879     case MO_32|MO_SIGN:
 880         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 881         break;
 882     default:
 883         g_assert_not_reached();
 884     }
 885 }
 886
 887 /* Set value of an element within a vector register */
 888 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 889                               int element, TCGMemOp memop)
 890 {
 891     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 892     switch (memop) {
 893     case MO_8:
 894         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 895         break;
 896     case MO_16:
 897         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 898         break;
 899     case MO_32:
 900         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 901         break;
 902     case MO_64:
 903         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 904         break;
 905     default:
 906         g_assert_not_reached();
 907     }
 908 }
 909
 910 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 911                                   int destidx, int element, TCGMemOp memop)
 912 {
 913     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 914     switch (memop) {
 915     case MO_8:
 916         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 917         break;
 918     case MO_16:
 919         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 920         break;
 921     case MO_32:
 922         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 923         break;
 924     default:
 925         g_assert_not_reached();
 926     }
 927 }
 928
 929 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 930  * vector ops all need to do this).
 931  */
 932 static void clear_vec_high(DisasContext *s, int rd)
 933 {
 934     TCGv_i64 tcg_zero = tcg_const_i64(0);
 935
 936     write_vec_element(s, tcg_zero, rd, 1, MO_64);
 937     tcg_temp_free_i64(tcg_zero);
 938 }
 939
 940 /* Store from vector register to memory */
 941 static void do_vec_st(DisasContext *s, int srcidx, int element,
 942                       TCGv_i64 tcg_addr, int size)
 943 {
 944     TCGMemOp memop = MO_TE + size;
 945     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 946
 947     read_vec_element(s, tcg_tmp, srcidx, element, size);
 948     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 949
 950     tcg_temp_free_i64(tcg_tmp);
 951 }
 952
 953 /* Load from memory to vector register */
 954 static void do_vec_ld(DisasContext *s, int destidx, int element,
 955                       TCGv_i64 tcg_addr, int size)
 956 {
 957     TCGMemOp memop = MO_TE + size;
 958     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 959
 960     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 961     write_vec_element(s, tcg_tmp, destidx, element, size);
 962
 963     tcg_temp_free_i64(tcg_tmp);
 964 }
 965
 966 /* Check that FP/Neon access is enabled. If it is, return
 967  * true. If not, emit code to generate an appropriate exception,
 968  * and return false; the caller should not emit any code for
 969  * the instruction. Note that this check must happen after all
 970  * unallocated-encoding checks (otherwise the syndrome information
 971  * for the resulting exception will be incorrect).
 972  */
 973 static inline bool fp_access_check(DisasContext *s)
 974 {
 975     assert(!s->fp_access_checked);
 976     s->fp_access_checked = true;
 977
 978     if (!s->fp_excp_el) {
 979         return true;
 980     }
 981
 982     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
 983                        s->fp_excp_el);
 984     return false;
 985 }
 986
 987 /*
 988  * This utility function is for doing register extension with an
 989  * optional shift. You will likely want to pass a temporary for the
 990  * destination register. See DecodeRegExtend() in the ARM ARM.
 991  */
 992 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
 993                               int option, unsigned int shift)
 994 {
 995     int extsize = extract32(option, 0, 2);
 996     bool is_signed = extract32(option, 2, 1);
 997
 998     if (is_signed) {
 999         switch (extsize) {
1000         case 0:
1001             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1002             break;
1003         case 1:
1004             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1005             break;
1006         case 2:
1007             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1008             break;
1009         case 3:
1010             tcg_gen_mov_i64(tcg_out, tcg_in);
1011             break;
1012         }
1013     } else {
1014         switch (extsize) {
1015         case 0:
1016             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1017             break;
1018         case 1:
1019             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1020             break;
1021         case 2:
1022             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1023             break;
1024         case 3:
1025             tcg_gen_mov_i64(tcg_out, tcg_in);
1026             break;
1027         }
1028     }
1029
1030     if (shift) {
1031         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1032     }
1033 }
1034
1035 static inline void gen_check_sp_alignment(DisasContext *s)
1036 {
1037     /* The AArch64 architecture mandates that (if enabled via PSTATE
1038      * or SCTLR bits) there is a check that SP is 16-aligned on every
1039      * SP-relative load or store (with an exception generated if it is not).
1040      * In line with general QEMU practice regarding misaligned accesses,
1041      * we omit these checks for the sake of guest program performance.
1042      * This function is provided as a hook so we can more easily add these
1043      * checks in future (possibly as a "favour catching guest program bugs
1044      * over speed" user selectable option).
1045      */
1046 }
1047
1048 /*
1049  * This provides a simple table based table lookup decoder. It is
1050  * intended to be used when the relevant bits for decode are too
1051  * awkwardly placed and switch/if based logic would be confusing and
1052  * deeply nested. Since it's a linear search through the table, tables
1053  * should be kept small.
1054  *
1055  * It returns the first handler where insn & mask == pattern, or
1056  * NULL if there is no match.
1057  * The table is terminated by an empty mask (i.e. 0)
1058  */
1059 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1060                                                uint32_t insn)
1061 {
1062     const AArch64DecodeTable *tptr = table;
1063
1064     while (tptr->mask) {
1065         if ((insn & tptr->mask) == tptr->pattern) {
1066             return tptr->disas_fn;
1067         }
1068         tptr++;
1069     }
1070     return NULL;
1071 }
1072
1073 /*
1074  * the instruction disassembly implemented here matches
1075  * the instruction encoding classifications in chapter 3 (C3)
1076  * of the ARM Architecture Reference Manual (DDI0487A_a)
1077  */
1078
1079 /* C3.2.7 Unconditional branch (immediate)
1080  *   31  30       26 25                                  0
1081  * +----+-----------+-------------------------------------+
1082  * | op | 0 0 1 0 1 |                 imm26               |
1083  * +----+-----------+-------------------------------------+
1084  */
1085 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1086 {
1087     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1088
1089     if (insn & (1U << 31)) {
1090         /* C5.6.26 BL Branch with link */
1091         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1092     }
1093
1094     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1095     gen_goto_tb(s, 0, addr);
1096 }
1097
1098 /* C3.2.1 Compare & branch (immediate)
1099  *   31  30         25  24  23                  5 4      0
1100  * +----+-------------+----+---------------------+--------+
1101  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1102  * +----+-------------+----+---------------------+--------+
1103  */
1104 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1105 {
1106     unsigned int sf, op, rt;
1107     uint64_t addr;
1108     TCGLabel *label_match;
1109     TCGv_i64 tcg_cmp;
1110
1111     sf = extract32(insn, 31, 1);
1112     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1113     rt = extract32(insn, 0, 5);
1114     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1115
1116     tcg_cmp = read_cpu_reg(s, rt, sf);
1117     label_match = gen_new_label();
1118
1119     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1120                         tcg_cmp, 0, label_match);
1121
1122     gen_goto_tb(s, 0, s->pc);
1123     gen_set_label(label_match);
1124     gen_goto_tb(s, 1, addr);
1125 }
1126
1127 /* C3.2.5 Test & branch (immediate)
1128  *   31  30         25  24  23   19 18          5 4    0
1129  * +----+-------------+----+-------+-------------+------+
1130  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1131  * +----+-------------+----+-------+-------------+------+
1132  */
1133 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1134 {
1135     unsigned int bit_pos, op, rt;
1136     uint64_t addr;
1137     TCGLabel *label_match;
1138     TCGv_i64 tcg_cmp;
1139
1140     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1141     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1142     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1143     rt = extract32(insn, 0, 5);
1144
1145     tcg_cmp = tcg_temp_new_i64();
1146     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1147     label_match = gen_new_label();
1148     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1149                         tcg_cmp, 0, label_match);
1150     tcg_temp_free_i64(tcg_cmp);
1151     gen_goto_tb(s, 0, s->pc);
1152     gen_set_label(label_match);
1153     gen_goto_tb(s, 1, addr);
1154 }
1155
1156 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1157  *  31           25  24  23                  5   4  3    0
1158  * +---------------+----+---------------------+----+------+
1159  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1160  * +---------------+----+---------------------+----+------+
1161  */
1162 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1163 {
1164     unsigned int cond;
1165     uint64_t addr;
1166
1167     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1168         unallocated_encoding(s);
1169         return;
1170     }
1171     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1172     cond = extract32(insn, 0, 4);
1173
1174     if (cond < 0x0e) {
1175         /* genuinely conditional branches */
1176         TCGLabel *label_match = gen_new_label();
1177         arm_gen_test_cc(cond, label_match);
1178         gen_goto_tb(s, 0, s->pc);
1179         gen_set_label(label_match);
1180         gen_goto_tb(s, 1, addr);
1181     } else {
1182         /* 0xe and 0xf are both "always" conditions */
1183         gen_goto_tb(s, 0, addr);
1184     }
1185 }
1186
1187 /* C5.6.68 HINT */
1188 static void handle_hint(DisasContext *s, uint32_t insn,
1189                         unsigned int op1, unsigned int op2, unsigned int crm)
1190 {
1191     unsigned int selector = crm << 3 | op2;
1192
1193     if (op1 != 3) {
1194         unallocated_encoding(s);
1195         return;
1196     }
1197
1198     switch (selector) {
1199     case 0: /* NOP */
1200         return;
1201     case 3: /* WFI */
1202         s->is_jmp = DISAS_WFI;
1203         return;
1204     case 1: /* YIELD */
1205         s->is_jmp = DISAS_YIELD;
1206         return;
1207     case 2: /* WFE */
1208         s->is_jmp = DISAS_WFE;
1209         return;
1210     case 4: /* SEV */
1211     case 5: /* SEVL */
1212         /* we treat all as NOP at least for now */
1213         return;
1214     default:
1215         /* default specified as NOP equivalent */
1216         return;
1217     }
1218 }
1219
1220 static void gen_clrex(DisasContext *s, uint32_t insn)
1221 {
1222     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1223 }
1224
1225 /* CLREX, DSB, DMB, ISB */
1226 static void handle_sync(DisasContext *s, uint32_t insn,
1227                         unsigned int op1, unsigned int op2, unsigned int crm)
1228 {
1229     if (op1 != 3) {
1230         unallocated_encoding(s);
1231         return;
1232     }
1233
1234     switch (op2) {
1235     case 2: /* CLREX */
1236         gen_clrex(s, insn);
1237         return;
1238     case 4: /* DSB */
1239     case 5: /* DMB */
1240         /* We don't emulate caches so barriers are no-ops */
1241         return;
1242     case 6: /* ISB */
1243         /* We need to break the TB after this insn to execute
1244          * a self-modified code correctly and also to take
1245          * any pending interrupts immediately.
1246          */
1247         s->is_jmp = DISAS_UPDATE;
1248         return;
1249     default:
1250         unallocated_encoding(s);
1251         return;
1252     }
1253 }
1254
1255 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1256 static void handle_msr_i(DisasContext *s, uint32_t insn,
1257                          unsigned int op1, unsigned int op2, unsigned int crm)
1258 {
1259     int op = op1 << 3 | op2;
1260     switch (op) {
1261     case 0x05: /* SPSel */
1262         if (s->current_el == 0) {
1263             unallocated_encoding(s);
1264             return;
1265         }
1266         /* fall through */
1267     case 0x1e: /* DAIFSet */
1268     case 0x1f: /* DAIFClear */
1269     {
1270         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1271         TCGv_i32 tcg_op = tcg_const_i32(op);
1272         gen_a64_set_pc_im(s->pc - 4);
1273         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1274         tcg_temp_free_i32(tcg_imm);
1275         tcg_temp_free_i32(tcg_op);
1276         s->is_jmp = DISAS_UPDATE;
1277         break;
1278     }
1279     default:
1280         unallocated_encoding(s);
1281         return;
1282     }
1283 }
1284
1285 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1286 {
1287     TCGv_i32 tmp = tcg_temp_new_i32();
1288     TCGv_i32 nzcv = tcg_temp_new_i32();
1289
1290     /* build bit 31, N */
1291     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1292     /* build bit 30, Z */
1293     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1294     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1295     /* build bit 29, C */
1296     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1297     /* build bit 28, V */
1298     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1299     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1300     /* generate result */
1301     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1302
1303     tcg_temp_free_i32(nzcv);
1304     tcg_temp_free_i32(tmp);
1305 }
1306
1307 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1308
1309 {
1310     TCGv_i32 nzcv = tcg_temp_new_i32();
1311
1312     /* take NZCV from R[t] */
1313     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1314
1315     /* bit 31, N */
1316     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1317     /* bit 30, Z */
1318     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1319     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1320     /* bit 29, C */
1321     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1322     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1323     /* bit 28, V */
1324     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1325     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1326     tcg_temp_free_i32(nzcv);
1327 }
1328
1329 /* C5.6.129 MRS - move from system register
1330  * C5.6.131 MSR (register) - move to system register
1331  * C5.6.204 SYS
1332  * C5.6.205 SYSL
1333  * These are all essentially the same insn in 'read' and 'write'
1334  * versions, with varying op0 fields.
1335  */
1336 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1337                        unsigned int op0, unsigned int op1, unsigned int op2,
1338                        unsigned int crn, unsigned int crm, unsigned int rt)
1339 {
1340     const ARMCPRegInfo *ri;
1341     TCGv_i64 tcg_rt;
1342
1343     ri = get_arm_cp_reginfo(s->cp_regs,
1344                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1345                                                crn, crm, op0, op1, op2));
1346
1347     if (!ri) {
1348         /* Unknown register; this might be a guest error or a QEMU
1349          * unimplemented feature.
1350          */
1351         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1352                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1353                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1354         unallocated_encoding(s);
1355         return;
1356     }
1357
1358     /* Check access permissions */
1359     if (!cp_access_ok(s->current_el, ri, isread)) {
1360         unallocated_encoding(s);
1361         return;
1362     }
1363
1364     if (ri->accessfn) {
1365         /* Emit code to perform further access permissions checks at
1366          * runtime; this may result in an exception.
1367          */
1368         TCGv_ptr tmpptr;
1369         TCGv_i32 tcg_syn;
1370         uint32_t syndrome;
1371
1372         gen_a64_set_pc_im(s->pc - 4);
1373         tmpptr = tcg_const_ptr(ri);
1374         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1375         tcg_syn = tcg_const_i32(syndrome);
1376         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
1377         tcg_temp_free_ptr(tmpptr);
1378         tcg_temp_free_i32(tcg_syn);
1379     }
1380
1381     /* Handle special cases first */
1382     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1383     case ARM_CP_NOP:
1384         return;
1385     case ARM_CP_NZCV:
1386         tcg_rt = cpu_reg(s, rt);
1387         if (isread) {
1388             gen_get_nzcv(tcg_rt);
1389         } else {
1390             gen_set_nzcv(tcg_rt);
1391         }
1392         return;
1393     case ARM_CP_CURRENTEL:
1394         /* Reads as current EL value from pstate, which is
1395          * guaranteed to be constant by the tb flags.
1396          */
1397         tcg_rt = cpu_reg(s, rt);
1398         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1399         return;
1400     case ARM_CP_DC_ZVA:
1401         /* Writes clear the aligned block of memory which rt points into. */
1402         tcg_rt = cpu_reg(s, rt);
1403         gen_helper_dc_zva(cpu_env, tcg_rt);
1404         return;
1405     default:
1406         break;
1407     }
1408
1409     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1410         gen_io_start();
1411     }
1412
1413     tcg_rt = cpu_reg(s, rt);
1414
1415     if (isread) {
1416         if (ri->type & ARM_CP_CONST) {
1417             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1418         } else if (ri->readfn) {
1419             TCGv_ptr tmpptr;
1420             tmpptr = tcg_const_ptr(ri);
1421             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1422             tcg_temp_free_ptr(tmpptr);
1423         } else {
1424             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1425         }
1426     } else {
1427         if (ri->type & ARM_CP_CONST) {
1428             /* If not forbidden by access permissions, treat as WI */
1429             return;
1430         } else if (ri->writefn) {
1431             TCGv_ptr tmpptr;
1432             tmpptr = tcg_const_ptr(ri);
1433             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1434             tcg_temp_free_ptr(tmpptr);
1435         } else {
1436             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1437         }
1438     }
1439
1440     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1441         /* I/O operations must end the TB here (whether read or write) */
1442         gen_io_end();
1443         s->is_jmp = DISAS_UPDATE;
1444     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1445         /* We default to ending the TB on a coprocessor register write,
1446          * but allow this to be suppressed by the register definition
1447          * (usually only necessary to work around guest bugs).
1448          */
1449         s->is_jmp = DISAS_UPDATE;
1450     }
1451 }
1452
1453 /* C3.2.4 System
1454  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1455  * +---------------------+---+-----+-----+-------+-------+-----+------+
1456  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1457  * +---------------------+---+-----+-----+-------+-------+-----+------+
1458  */
1459 static void disas_system(DisasContext *s, uint32_t insn)
1460 {
1461     unsigned int l, op0, op1, crn, crm, op2, rt;
1462     l = extract32(insn, 21, 1);
1463     op0 = extract32(insn, 19, 2);
1464     op1 = extract32(insn, 16, 3);
1465     crn = extract32(insn, 12, 4);
1466     crm = extract32(insn, 8, 4);
1467     op2 = extract32(insn, 5, 3);
1468     rt = extract32(insn, 0, 5);
1469
1470     if (op0 == 0) {
1471         if (l || rt != 31) {
1472             unallocated_encoding(s);
1473             return;
1474         }
1475         switch (crn) {
1476         case 2: /* C5.6.68 HINT */
1477             handle_hint(s, insn, op1, op2, crm);
1478             break;
1479         case 3: /* CLREX, DSB, DMB, ISB */
1480             handle_sync(s, insn, op1, op2, crm);
1481             break;
1482         case 4: /* C5.6.130 MSR (immediate) */
1483             handle_msr_i(s, insn, op1, op2, crm);
1484             break;
1485         default:
1486             unallocated_encoding(s);
1487             break;
1488         }
1489         return;
1490     }
1491     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1492 }
1493
1494 /* C3.2.3 Exception generation
1495  *
1496  *  31             24 23 21 20                     5 4   2 1  0
1497  * +-----------------+-----+------------------------+-----+----+
1498  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1499  * +-----------------------+------------------------+----------+
1500  */
1501 static void disas_exc(DisasContext *s, uint32_t insn)
1502 {
1503     int opc = extract32(insn, 21, 3);
1504     int op2_ll = extract32(insn, 0, 5);
1505     int imm16 = extract32(insn, 5, 16);
1506     TCGv_i32 tmp;
1507
1508     switch (opc) {
1509     case 0:
1510         /* For SVC, HVC and SMC we advance the single-step state
1511          * machine before taking the exception. This is architecturally
1512          * mandated, to ensure that single-stepping a system call
1513          * instruction works properly.
1514          */
1515         switch (op2_ll) {
1516         case 1:
1517             gen_ss_advance(s);
1518             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1519                                default_exception_el(s));
1520             break;
1521         case 2:
1522             if (s->current_el == 0) {
1523                 unallocated_encoding(s);
1524                 break;
1525             }
1526             /* The pre HVC helper handles cases when HVC gets trapped
1527              * as an undefined insn by runtime configuration.
1528              */
1529             gen_a64_set_pc_im(s->pc - 4);
1530             gen_helper_pre_hvc(cpu_env);
1531             gen_ss_advance(s);
1532             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1533             break;
1534         case 3:
1535             if (s->current_el == 0) {
1536                 unallocated_encoding(s);
1537                 break;
1538             }
1539             gen_a64_set_pc_im(s->pc - 4);
1540             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1541             gen_helper_pre_smc(cpu_env, tmp);
1542             tcg_temp_free_i32(tmp);
1543             gen_ss_advance(s);
1544             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1545             break;
1546         default:
1547             unallocated_encoding(s);
1548             break;
1549         }
1550         break;
1551     case 1:
1552         if (op2_ll != 0) {
1553             unallocated_encoding(s);
1554             break;
1555         }
1556         /* BRK */
1557         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1558                            default_exception_el(s));
1559         break;
1560     case 2:
1561         if (op2_ll != 0) {
1562             unallocated_encoding(s);
1563             break;
1564         }
1565         /* HLT. This has two purposes.
1566          * Architecturally, it is an external halting debug instruction.
1567          * Since QEMU doesn't implement external debug, we treat this as
1568          * it is required for halting debug disabled: it will UNDEF.
1569          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1570          */
1571         if (semihosting_enabled() && imm16 == 0xf000) {
1572 #ifndef CONFIG_USER_ONLY
1573             /* In system mode, don't allow userspace access to semihosting,
1574              * to provide some semblance of security (and for consistency
1575              * with our 32-bit semihosting).
1576              */
1577             if (s->current_el == 0) {
1578                 unsupported_encoding(s, insn);
1579                 break;
1580             }
1581 #endif
1582             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1583         } else {
1584             unsupported_encoding(s, insn);
1585         }
1586         break;
1587     case 5:
1588         if (op2_ll < 1 || op2_ll > 3) {
1589             unallocated_encoding(s);
1590             break;
1591         }
1592         /* DCPS1, DCPS2, DCPS3 */
1593         unsupported_encoding(s, insn);
1594         break;
1595     default:
1596         unallocated_encoding(s);
1597         break;
1598     }
1599 }
1600
1601 /* C3.2.7 Unconditional branch (register)
1602  *  31           25 24   21 20   16 15   10 9    5 4     0
1603  * +---------------+-------+-------+-------+------+-------+
1604  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1605  * +---------------+-------+-------+-------+------+-------+
1606  */
1607 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1608 {
1609     unsigned int opc, op2, op3, rn, op4;
1610
1611     opc = extract32(insn, 21, 4);
1612     op2 = extract32(insn, 16, 5);
1613     op3 = extract32(insn, 10, 6);
1614     rn = extract32(insn, 5, 5);
1615     op4 = extract32(insn, 0, 5);
1616
1617     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1618         unallocated_encoding(s);
1619         return;
1620     }
1621
1622     switch (opc) {
1623     case 0: /* BR */
1624     case 2: /* RET */
1625         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1626         break;
1627     case 1: /* BLR */
1628         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1629         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1630         break;
1631     case 4: /* ERET */
1632         if (s->current_el == 0) {
1633             unallocated_encoding(s);
1634             return;
1635         }
1636         gen_helper_exception_return(cpu_env);
1637         s->is_jmp = DISAS_JUMP;
1638         return;
1639     case 5: /* DRPS */
1640         if (rn != 0x1f) {
1641             unallocated_encoding(s);
1642         } else {
1643             unsupported_encoding(s, insn);
1644         }
1645         return;
1646     default:
1647         unallocated_encoding(s);
1648         return;
1649     }
1650
1651     s->is_jmp = DISAS_JUMP;
1652 }
1653
1654 /* C3.2 Branches, exception generating and system instructions */
1655 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1656 {
1657     switch (extract32(insn, 25, 7)) {
1658     case 0x0a: case 0x0b:
1659     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1660         disas_uncond_b_imm(s, insn);
1661         break;
1662     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1663         disas_comp_b_imm(s, insn);
1664         break;
1665     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1666         disas_test_b_imm(s, insn);
1667         break;
1668     case 0x2a: /* Conditional branch (immediate) */
1669         disas_cond_b_imm(s, insn);
1670         break;
1671     case 0x6a: /* Exception generation / System */
1672         if (insn & (1 << 24)) {
1673             disas_system(s, insn);
1674         } else {
1675             disas_exc(s, insn);
1676         }
1677         break;
1678     case 0x6b: /* Unconditional branch (register) */
1679         disas_uncond_b_reg(s, insn);
1680         break;
1681     default:
1682         unallocated_encoding(s);
1683         break;
1684     }
1685 }
1686
1687 /*
1688  * Load/Store exclusive instructions are implemented by remembering
1689  * the value/address loaded, and seeing if these are the same
1690  * when the store is performed. This is not actually the architecturally
1691  * mandated semantics, but it works for typical guest code sequences
1692  * and avoids having to monitor regular stores.
1693  *
1694  * In system emulation mode only one CPU will be running at once, so
1695  * this sequence is effectively atomic.  In user emulation mode we
1696  * throw an exception and handle the atomic operation elsewhere.
1697  */
1698 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1699                                TCGv_i64 addr, int size, bool is_pair)
1700 {
1701     TCGv_i64 tmp = tcg_temp_new_i64();
1702     TCGMemOp memop = MO_TE + size;
1703
1704     g_assert(size <= 3);
1705     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1706
1707     if (is_pair) {
1708         TCGv_i64 addr2 = tcg_temp_new_i64();
1709         TCGv_i64 hitmp = tcg_temp_new_i64();
1710
1711         g_assert(size >= 2);
1712         tcg_gen_addi_i64(addr2, addr, 1 << size);
1713         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1714         tcg_temp_free_i64(addr2);
1715         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1716         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1717         tcg_temp_free_i64(hitmp);
1718     }
1719
1720     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1721     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1722
1723     tcg_temp_free_i64(tmp);
1724     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1725 }
1726
1727 #ifdef CONFIG_USER_ONLY
1728 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1729                                 TCGv_i64 addr, int size, int is_pair)
1730 {
1731     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1732     tcg_gen_movi_i32(cpu_exclusive_info,
1733                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1734     gen_exception_internal_insn(s, 4, EXCP_STREX);
1735 }
1736 #else
1737 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1738                                 TCGv_i64 inaddr, int size, int is_pair)
1739 {
1740     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1741      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1742      *     [addr] = {Rt};
1743      *     if (is_pair) {
1744      *         [addr + datasize] = {Rt2};
1745      *     }
1746      *     {Rd} = 0;
1747      * } else {
1748      *     {Rd} = 1;
1749      * }
1750      * env->exclusive_addr = -1;
1751      */
1752     TCGLabel *fail_label = gen_new_label();
1753     TCGLabel *done_label = gen_new_label();
1754     TCGv_i64 addr = tcg_temp_local_new_i64();
1755     TCGv_i64 tmp;
1756
1757     /* Copy input into a local temp so it is not trashed when the
1758      * basic block ends at the branch insn.
1759      */
1760     tcg_gen_mov_i64(addr, inaddr);
1761     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1762
1763     tmp = tcg_temp_new_i64();
1764     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1765     tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1766     tcg_temp_free_i64(tmp);
1767
1768     if (is_pair) {
1769         TCGv_i64 addrhi = tcg_temp_new_i64();
1770         TCGv_i64 tmphi = tcg_temp_new_i64();
1771
1772         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1773         tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1774         tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1775
1776         tcg_temp_free_i64(tmphi);
1777         tcg_temp_free_i64(addrhi);
1778     }
1779
1780     /* We seem to still have the exclusive monitor, so do the store */
1781     tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1782     if (is_pair) {
1783         TCGv_i64 addrhi = tcg_temp_new_i64();
1784
1785         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1786         tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1787                             get_mem_index(s), MO_TE + size);
1788         tcg_temp_free_i64(addrhi);
1789     }
1790
1791     tcg_temp_free_i64(addr);
1792
1793     tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1794     tcg_gen_br(done_label);
1795     gen_set_label(fail_label);
1796     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1797     gen_set_label(done_label);
1798     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1799
1800 }
1801 #endif
1802
1803 /* C3.3.6 Load/store exclusive
1804  *
1805  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1806  * +-----+-------------+----+---+----+------+----+-------+------+------+
1807  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1808  * +-----+-------------+----+---+----+------+----+-------+------+------+
1809  *
1810  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1811  *   L: 0 -> store, 1 -> load
1812  *  o2: 0 -> exclusive, 1 -> not
1813  *  o1: 0 -> single register, 1 -> register pair
1814  *  o0: 1 -> load-acquire/store-release, 0 -> not
1815  */
1816 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1817 {
1818     int rt = extract32(insn, 0, 5);
1819     int rn = extract32(insn, 5, 5);
1820     int rt2 = extract32(insn, 10, 5);
1821     int is_lasr = extract32(insn, 15, 1);
1822     int rs = extract32(insn, 16, 5);
1823     int is_pair = extract32(insn, 21, 1);
1824     int is_store = !extract32(insn, 22, 1);
1825     int is_excl = !extract32(insn, 23, 1);
1826     int size = extract32(insn, 30, 2);
1827     TCGv_i64 tcg_addr;
1828
1829     if ((!is_excl && !is_pair && !is_lasr) ||
1830         (!is_excl && is_pair) ||
1831         (is_pair && size < 2)) {
1832         unallocated_encoding(s);
1833         return;
1834     }
1835
1836     if (rn == 31) {
1837         gen_check_sp_alignment(s);
1838     }
1839     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1840
1841     /* Note that since TCG is single threaded load-acquire/store-release
1842      * semantics require no extra if (is_lasr) { ... } handling.
1843      */
1844
1845     if (is_excl) {
1846         if (!is_store) {
1847             s->is_ldex = true;
1848             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1849         } else {
1850             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1851         }
1852     } else {
1853         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1854         if (is_store) {
1855             do_gpr_st(s, tcg_rt, tcg_addr, size);
1856         } else {
1857             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1858         }
1859     }
1860 }
1861
1862 /*
1863  * C3.3.5 Load register (literal)
1864  *
1865  *  31 30 29   27  26 25 24 23                5 4     0
1866  * +-----+-------+---+-----+-------------------+-------+
1867  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1868  * +-----+-------+---+-----+-------------------+-------+
1869  *
1870  * V: 1 -> vector (simd/fp)
1871  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1872  *                   10-> 32 bit signed, 11 -> prefetch
1873  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1874  */
1875 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1876 {
1877     int rt = extract32(insn, 0, 5);
1878     int64_t imm = sextract32(insn, 5, 19) << 2;
1879     bool is_vector = extract32(insn, 26, 1);
1880     int opc = extract32(insn, 30, 2);
1881     bool is_signed = false;
1882     int size = 2;
1883     TCGv_i64 tcg_rt, tcg_addr;
1884
1885     if (is_vector) {
1886         if (opc == 3) {
1887             unallocated_encoding(s);
1888             return;
1889         }
1890         size = 2 + opc;
1891         if (!fp_access_check(s)) {
1892             return;
1893         }
1894     } else {
1895         if (opc == 3) {
1896             /* PRFM (literal) : prefetch */
1897             return;
1898         }
1899         size = 2 + extract32(opc, 0, 1);
1900         is_signed = extract32(opc, 1, 1);
1901     }
1902
1903     tcg_rt = cpu_reg(s, rt);
1904
1905     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1906     if (is_vector) {
1907         do_fp_ld(s, rt, tcg_addr, size);
1908     } else {
1909         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1910     }
1911     tcg_temp_free_i64(tcg_addr);
1912 }
1913
1914 /*
1915  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1916  * C5.6.81 LDP (Load Pair - non vector)
1917  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1918  * C5.6.176 STNP (Store Pair - non-temporal hint)
1919  * C5.6.177 STP (Store Pair - non vector)
1920  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1921  * C6.3.165 LDP (Load Pair of SIMD&FP)
1922  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1923  * C6.3.284 STP (Store Pair of SIMD&FP)
1924  *
1925  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1926  * +-----+-------+---+---+-------+---+-----------------------------+
1927  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1928  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1929  *
1930  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1931  *      LDPSW                    01
1932  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1933  *   V: 0 -> GPR, 1 -> Vector
1934  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1935  *      10 -> signed offset, 11 -> pre-index
1936  *   L: 0 -> Store 1 -> Load
1937  *
1938  * Rt, Rt2 = GPR or SIMD registers to be stored
1939  * Rn = general purpose register containing address
1940  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1941  */
1942 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1943 {
1944     int rt = extract32(insn, 0, 5);
1945     int rn = extract32(insn, 5, 5);
1946     int rt2 = extract32(insn, 10, 5);
1947     uint64_t offset = sextract64(insn, 15, 7);
1948     int index = extract32(insn, 23, 2);
1949     bool is_vector = extract32(insn, 26, 1);
1950     bool is_load = extract32(insn, 22, 1);
1951     int opc = extract32(insn, 30, 2);
1952
1953     bool is_signed = false;
1954     bool postindex = false;
1955     bool wback = false;
1956
1957     TCGv_i64 tcg_addr; /* calculated address */
1958     int size;
1959
1960     if (opc == 3) {
1961         unallocated_encoding(s);
1962         return;
1963     }
1964
1965     if (is_vector) {
1966         size = 2 + opc;
1967     } else {
1968         size = 2 + extract32(opc, 1, 1);
1969         is_signed = extract32(opc, 0, 1);
1970         if (!is_load && is_signed) {
1971             unallocated_encoding(s);
1972             return;
1973         }
1974     }
1975
1976     switch (index) {
1977     case 1: /* post-index */
1978         postindex = true;
1979         wback = true;
1980         break;
1981     case 0:
1982         /* signed offset with "non-temporal" hint. Since we don't emulate
1983          * caches we don't care about hints to the cache system about
1984          * data access patterns, and handle this identically to plain
1985          * signed offset.
1986          */
1987         if (is_signed) {
1988             /* There is no non-temporal-hint version of LDPSW */
1989             unallocated_encoding(s);
1990             return;
1991         }
1992         postindex = false;
1993         break;
1994     case 2: /* signed offset, rn not updated */
1995         postindex = false;
1996         break;
1997     case 3: /* pre-index */
1998         postindex = false;
1999         wback = true;
2000         break;
2001     }
2002
2003     if (is_vector && !fp_access_check(s)) {
2004         return;
2005     }
2006
2007     offset <<= size;
2008
2009     if (rn == 31) {
2010         gen_check_sp_alignment(s);
2011     }
2012
2013     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2014
2015     if (!postindex) {
2016         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2017     }
2018
2019     if (is_vector) {
2020         if (is_load) {
2021             do_fp_ld(s, rt, tcg_addr, size);
2022         } else {
2023             do_fp_st(s, rt, tcg_addr, size);
2024         }
2025     } else {
2026         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2027         if (is_load) {
2028             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2029         } else {
2030             do_gpr_st(s, tcg_rt, tcg_addr, size);
2031         }
2032     }
2033     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2034     if (is_vector) {
2035         if (is_load) {
2036             do_fp_ld(s, rt2, tcg_addr, size);
2037         } else {
2038             do_fp_st(s, rt2, tcg_addr, size);
2039         }
2040     } else {
2041         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2042         if (is_load) {
2043             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2044         } else {
2045             do_gpr_st(s, tcg_rt2, tcg_addr, size);
2046         }
2047     }
2048
2049     if (wback) {
2050         if (postindex) {
2051             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2052         } else {
2053             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2054         }
2055         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2056     }
2057 }
2058
2059 /*
2060  * C3.3.8 Load/store (immediate post-indexed)
2061  * C3.3.9 Load/store (immediate pre-indexed)
2062  * C3.3.12 Load/store (unscaled immediate)
2063  *
2064  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2065  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2066  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2067  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2068  *
2069  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2070          10 -> unprivileged
2071  * V = 0 -> non-vector
2072  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2073  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2074  */
2075 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2076 {
2077     int rt = extract32(insn, 0, 5);
2078     int rn = extract32(insn, 5, 5);
2079     int imm9 = sextract32(insn, 12, 9);
2080     int opc = extract32(insn, 22, 2);
2081     int size = extract32(insn, 30, 2);
2082     int idx = extract32(insn, 10, 2);
2083     bool is_signed = false;
2084     bool is_store = false;
2085     bool is_extended = false;
2086     bool is_unpriv = (idx == 2);
2087     bool is_vector = extract32(insn, 26, 1);
2088     bool post_index;
2089     bool writeback;
2090
2091     TCGv_i64 tcg_addr;
2092
2093     if (is_vector) {
2094         size |= (opc & 2) << 1;
2095         if (size > 4 || is_unpriv) {
2096             unallocated_encoding(s);
2097             return;
2098         }
2099         is_store = ((opc & 1) == 0);
2100         if (!fp_access_check(s)) {
2101             return;
2102         }
2103     } else {
2104         if (size == 3 && opc == 2) {
2105             /* PRFM - prefetch */
2106             if (is_unpriv) {
2107                 unallocated_encoding(s);
2108                 return;
2109             }
2110             return;
2111         }
2112         if (opc == 3 && size > 1) {
2113             unallocated_encoding(s);
2114             return;
2115         }
2116         is_store = (opc == 0);
2117         is_signed = opc & (1<<1);
2118         is_extended = (size < 3) && (opc & 1);
2119     }
2120
2121     switch (idx) {
2122     case 0:
2123     case 2:
2124         post_index = false;
2125         writeback = false;
2126         break;
2127     case 1:
2128         post_index = true;
2129         writeback = true;
2130         break;
2131     case 3:
2132         post_index = false;
2133         writeback = true;
2134         break;
2135     }
2136
2137     if (rn == 31) {
2138         gen_check_sp_alignment(s);
2139     }
2140     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2141
2142     if (!post_index) {
2143         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2144     }
2145
2146     if (is_vector) {
2147         if (is_store) {
2148             do_fp_st(s, rt, tcg_addr, size);
2149         } else {
2150             do_fp_ld(s, rt, tcg_addr, size);
2151         }
2152     } else {
2153         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2154         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2155
2156         if (is_store) {
2157             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2158         } else {
2159             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2160                              is_signed, is_extended, memidx);
2161         }
2162     }
2163
2164     if (writeback) {
2165         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2166         if (post_index) {
2167             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2168         }
2169         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2170     }
2171 }
2172
2173 /*
2174  * C3.3.10 Load/store (register offset)
2175  *
2176  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2177  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2178  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2179  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2180  *
2181  * For non-vector:
2182  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2183  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2184  * For vector:
2185  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2186  *   opc<0>: 0 -> store, 1 -> load
2187  * V: 1 -> vector/simd
2188  * opt: extend encoding (see DecodeRegExtend)
2189  * S: if S=1 then scale (essentially index by sizeof(size))
2190  * Rt: register to transfer into/out of
2191  * Rn: address register or SP for base
2192  * Rm: offset register or ZR for offset
2193  */
2194 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2195 {
2196     int rt = extract32(insn, 0, 5);
2197     int rn = extract32(insn, 5, 5);
2198     int shift = extract32(insn, 12, 1);
2199     int rm = extract32(insn, 16, 5);
2200     int opc = extract32(insn, 22, 2);
2201     int opt = extract32(insn, 13, 3);
2202     int size = extract32(insn, 30, 2);
2203     bool is_signed = false;
2204     bool is_store = false;
2205     bool is_extended = false;
2206     bool is_vector = extract32(insn, 26, 1);
2207
2208     TCGv_i64 tcg_rm;
2209     TCGv_i64 tcg_addr;
2210
2211     if (extract32(opt, 1, 1) == 0) {
2212         unallocated_encoding(s);
2213         return;
2214     }
2215
2216     if (is_vector) {
2217         size |= (opc & 2) << 1;
2218         if (size > 4) {
2219             unallocated_encoding(s);
2220             return;
2221         }
2222         is_store = !extract32(opc, 0, 1);
2223         if (!fp_access_check(s)) {
2224             return;
2225         }
2226     } else {
2227         if (size == 3 && opc == 2) {
2228             /* PRFM - prefetch */
2229             return;
2230         }
2231         if (opc == 3 && size > 1) {
2232             unallocated_encoding(s);
2233             return;
2234         }
2235         is_store = (opc == 0);
2236         is_signed = extract32(opc, 1, 1);
2237         is_extended = (size < 3) && extract32(opc, 0, 1);
2238     }
2239
2240     if (rn == 31) {
2241         gen_check_sp_alignment(s);
2242     }
2243     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2244
2245     tcg_rm = read_cpu_reg(s, rm, 1);
2246     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2247
2248     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2249
2250     if (is_vector) {
2251         if (is_store) {
2252             do_fp_st(s, rt, tcg_addr, size);
2253         } else {
2254             do_fp_ld(s, rt, tcg_addr, size);
2255         }
2256     } else {
2257         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2258         if (is_store) {
2259             do_gpr_st(s, tcg_rt, tcg_addr, size);
2260         } else {
2261             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2262         }
2263     }
2264 }
2265
2266 /*
2267  * C3.3.13 Load/store (unsigned immediate)
2268  *
2269  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2270  * +----+-------+---+-----+-----+------------+-------+------+
2271  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2272  * +----+-------+---+-----+-----+------------+-------+------+
2273  *
2274  * For non-vector:
2275  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2276  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2277  * For vector:
2278  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2279  *   opc<0>: 0 -> store, 1 -> load
2280  * Rn: base address register (inc SP)
2281  * Rt: target register
2282  */
2283 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2284 {
2285     int rt = extract32(insn, 0, 5);
2286     int rn = extract32(insn, 5, 5);
2287     unsigned int imm12 = extract32(insn, 10, 12);
2288     bool is_vector = extract32(insn, 26, 1);
2289     int size = extract32(insn, 30, 2);
2290     int opc = extract32(insn, 22, 2);
2291     unsigned int offset;
2292
2293     TCGv_i64 tcg_addr;
2294
2295     bool is_store;
2296     bool is_signed = false;
2297     bool is_extended = false;
2298
2299     if (is_vector) {
2300         size |= (opc & 2) << 1;
2301         if (size > 4) {
2302             unallocated_encoding(s);
2303             return;
2304         }
2305         is_store = !extract32(opc, 0, 1);
2306         if (!fp_access_check(s)) {
2307             return;
2308         }
2309     } else {
2310         if (size == 3 && opc == 2) {
2311             /* PRFM - prefetch */
2312             return;
2313         }
2314         if (opc == 3 && size > 1) {
2315             unallocated_encoding(s);
2316             return;
2317         }
2318         is_store = (opc == 0);
2319         is_signed = extract32(opc, 1, 1);
2320         is_extended = (size < 3) && extract32(opc, 0, 1);
2321     }
2322
2323     if (rn == 31) {
2324         gen_check_sp_alignment(s);
2325     }
2326     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2327     offset = imm12 << size;
2328     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2329
2330     if (is_vector) {
2331         if (is_store) {
2332             do_fp_st(s, rt, tcg_addr, size);
2333         } else {
2334             do_fp_ld(s, rt, tcg_addr, size);
2335         }
2336     } else {
2337         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2338         if (is_store) {
2339             do_gpr_st(s, tcg_rt, tcg_addr, size);
2340         } else {
2341             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2342         }
2343     }
2344 }
2345
2346 /* Load/store register (all forms) */
2347 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2348 {
2349     switch (extract32(insn, 24, 2)) {
2350     case 0:
2351         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2352             disas_ldst_reg_roffset(s, insn);
2353         } else {
2354             /* Load/store register (unscaled immediate)
2355              * Load/store immediate pre/post-indexed
2356              * Load/store register unprivileged
2357              */
2358             disas_ldst_reg_imm9(s, insn);
2359         }
2360         break;
2361     case 1:
2362         disas_ldst_reg_unsigned_imm(s, insn);
2363         break;
2364     default:
2365         unallocated_encoding(s);
2366         break;
2367     }
2368 }
2369
2370 /* C3.3.1 AdvSIMD load/store multiple structures
2371  *
2372  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2373  * +---+---+---------------+---+-------------+--------+------+------+------+
2374  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2375  * +---+---+---------------+---+-------------+--------+------+------+------+
2376  *
2377  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2378  *
2379  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2380  * +---+---+---------------+---+---+---------+--------+------+------+------+
2381  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2382  * +---+---+---------------+---+---+---------+--------+------+------+------+
2383  *
2384  * Rt: first (or only) SIMD&FP register to be transferred
2385  * Rn: base address or SP
2386  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2387  */
2388 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2389 {
2390     int rt = extract32(insn, 0, 5);
2391     int rn = extract32(insn, 5, 5);
2392     int size = extract32(insn, 10, 2);
2393     int opcode = extract32(insn, 12, 4);
2394     bool is_store = !extract32(insn, 22, 1);
2395     bool is_postidx = extract32(insn, 23, 1);
2396     bool is_q = extract32(insn, 30, 1);
2397     TCGv_i64 tcg_addr, tcg_rn;
2398
2399     int ebytes = 1 << size;
2400     int elements = (is_q ? 128 : 64) / (8 << size);
2401     int rpt;    /* num iterations */
2402     int selem;  /* structure elements */
2403     int r;
2404
2405     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2406         unallocated_encoding(s);
2407         return;
2408     }
2409
2410     /* From the shared decode logic */
2411     switch (opcode) {
2412     case 0x0:
2413         rpt = 1;
2414         selem = 4;
2415         break;
2416     case 0x2:
2417         rpt = 4;
2418         selem = 1;
2419         break;
2420     case 0x4:
2421         rpt = 1;
2422         selem = 3;
2423         break;
2424     case 0x6:
2425         rpt = 3;
2426         selem = 1;
2427         break;
2428     case 0x7:
2429         rpt = 1;
2430         selem = 1;
2431         break;
2432     case 0x8:
2433         rpt = 1;
2434         selem = 2;
2435         break;
2436     case 0xa:
2437         rpt = 2;
2438         selem = 1;
2439         break;
2440     default:
2441         unallocated_encoding(s);
2442         return;
2443     }
2444
2445     if (size == 3 && !is_q && selem != 1) {
2446         /* reserved */
2447         unallocated_encoding(s);
2448         return;
2449     }
2450
2451     if (!fp_access_check(s)) {
2452         return;
2453     }
2454
2455     if (rn == 31) {
2456         gen_check_sp_alignment(s);
2457     }
2458
2459     tcg_rn = cpu_reg_sp(s, rn);
2460     tcg_addr = tcg_temp_new_i64();
2461     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2462
2463     for (r = 0; r < rpt; r++) {
2464         int e;
2465         for (e = 0; e < elements; e++) {
2466             int tt = (rt + r) % 32;
2467             int xs;
2468             for (xs = 0; xs < selem; xs++) {
2469                 if (is_store) {
2470                     do_vec_st(s, tt, e, tcg_addr, size);
2471                 } else {
2472                     do_vec_ld(s, tt, e, tcg_addr, size);
2473
2474                     /* For non-quad operations, setting a slice of the low
2475                      * 64 bits of the register clears the high 64 bits (in
2476                      * the ARM ARM pseudocode this is implicit in the fact
2477                      * that 'rval' is a 64 bit wide variable). We optimize
2478                      * by noticing that we only need to do this the first
2479                      * time we touch a register.
2480                      */
2481                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2482                         clear_vec_high(s, tt);
2483                     }
2484                 }
2485                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2486                 tt = (tt + 1) % 32;
2487             }
2488         }
2489     }
2490
2491     if (is_postidx) {
2492         int rm = extract32(insn, 16, 5);
2493         if (rm == 31) {
2494             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2495         } else {
2496             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2497         }
2498     }
2499     tcg_temp_free_i64(tcg_addr);
2500 }
2501
2502 /* C3.3.3 AdvSIMD load/store single structure
2503  *
2504  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2505  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2506  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2507  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2508  *
2509  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2510  *
2511  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2512  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2513  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2514  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2515  *
2516  * Rt: first (or only) SIMD&FP register to be transferred
2517  * Rn: base address or SP
2518  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2519  * index = encoded in Q:S:size dependent on size
2520  *
2521  * lane_size = encoded in R, opc
2522  * transfer width = encoded in opc, S, size
2523  */
2524 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2525 {
2526     int rt = extract32(insn, 0, 5);
2527     int rn = extract32(insn, 5, 5);
2528     int size = extract32(insn, 10, 2);
2529     int S = extract32(insn, 12, 1);
2530     int opc = extract32(insn, 13, 3);
2531     int R = extract32(insn, 21, 1);
2532     int is_load = extract32(insn, 22, 1);
2533     int is_postidx = extract32(insn, 23, 1);
2534     int is_q = extract32(insn, 30, 1);
2535
2536     int scale = extract32(opc, 1, 2);
2537     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2538     bool replicate = false;
2539     int index = is_q << 3 | S << 2 | size;
2540     int ebytes, xs;
2541     TCGv_i64 tcg_addr, tcg_rn;
2542
2543     switch (scale) {
2544     case 3:
2545         if (!is_load || S) {
2546             unallocated_encoding(s);
2547             return;
2548         }
2549         scale = size;
2550         replicate = true;
2551         break;
2552     case 0:
2553         break;
2554     case 1:
2555         if (extract32(size, 0, 1)) {
2556             unallocated_encoding(s);
2557             return;
2558         }
2559         index >>= 1;
2560         break;
2561     case 2:
2562         if (extract32(size, 1, 1)) {
2563             unallocated_encoding(s);
2564             return;
2565         }
2566         if (!extract32(size, 0, 1)) {
2567             index >>= 2;
2568         } else {
2569             if (S) {
2570                 unallocated_encoding(s);
2571                 return;
2572             }
2573             index >>= 3;
2574             scale = 3;
2575         }
2576         break;
2577     default:
2578         g_assert_not_reached();
2579     }
2580
2581     if (!fp_access_check(s)) {
2582         return;
2583     }
2584
2585     ebytes = 1 << scale;
2586
2587     if (rn == 31) {
2588         gen_check_sp_alignment(s);
2589     }
2590
2591     tcg_rn = cpu_reg_sp(s, rn);
2592     tcg_addr = tcg_temp_new_i64();
2593     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2594
2595     for (xs = 0; xs < selem; xs++) {
2596         if (replicate) {
2597             /* Load and replicate to all elements */
2598             uint64_t mulconst;
2599             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2600
2601             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2602                                 get_mem_index(s), MO_TE + scale);
2603             switch (scale) {
2604             case 0:
2605                 mulconst = 0x0101010101010101ULL;
2606                 break;
2607             case 1:
2608                 mulconst = 0x0001000100010001ULL;
2609                 break;
2610             case 2:
2611                 mulconst = 0x0000000100000001ULL;
2612                 break;
2613             case 3:
2614                 mulconst = 0;
2615                 break;
2616             default:
2617                 g_assert_not_reached();
2618             }
2619             if (mulconst) {
2620                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2621             }
2622             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2623             if (is_q) {
2624                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2625             } else {
2626                 clear_vec_high(s, rt);
2627             }
2628             tcg_temp_free_i64(tcg_tmp);
2629         } else {
2630             /* Load/store one element per register */
2631             if (is_load) {
2632                 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2633             } else {
2634                 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2635             }
2636         }
2637         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2638         rt = (rt + 1) % 32;
2639     }
2640
2641     if (is_postidx) {
2642         int rm = extract32(insn, 16, 5);
2643         if (rm == 31) {
2644             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2645         } else {
2646             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2647         }
2648     }
2649     tcg_temp_free_i64(tcg_addr);
2650 }
2651
2652 /* C3.3 Loads and stores */
2653 static void disas_ldst(DisasContext *s, uint32_t insn)
2654 {
2655     switch (extract32(insn, 24, 6)) {
2656     case 0x08: /* Load/store exclusive */
2657         disas_ldst_excl(s, insn);
2658         break;
2659     case 0x18: case 0x1c: /* Load register (literal) */
2660         disas_ld_lit(s, insn);
2661         break;
2662     case 0x28: case 0x29:
2663     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2664         disas_ldst_pair(s, insn);
2665         break;
2666     case 0x38: case 0x39:
2667     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2668         disas_ldst_reg(s, insn);
2669         break;
2670     case 0x0c: /* AdvSIMD load/store multiple structures */
2671         disas_ldst_multiple_struct(s, insn);
2672         break;
2673     case 0x0d: /* AdvSIMD load/store single structure */
2674         disas_ldst_single_struct(s, insn);
2675         break;
2676     default:
2677         unallocated_encoding(s);
2678         break;
2679     }
2680 }
2681
2682 /* C3.4.6 PC-rel. addressing
2683  *   31  30   29 28       24 23                5 4    0
2684  * +----+-------+-----------+-------------------+------+
2685  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2686  * +----+-------+-----------+-------------------+------+
2687  */
2688 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2689 {
2690     unsigned int page, rd;
2691     uint64_t base;
2692     uint64_t offset;
2693
2694     page = extract32(insn, 31, 1);
2695     /* SignExtend(immhi:immlo) -> offset */
2696     offset = sextract64(insn, 5, 19);
2697     offset = offset << 2 | extract32(insn, 29, 2);
2698     rd = extract32(insn, 0, 5);
2699     base = s->pc - 4;
2700
2701     if (page) {
2702         /* ADRP (page based) */
2703         base &= ~0xfff;
2704         offset <<= 12;
2705     }
2706
2707     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2708 }
2709
2710 /*
2711  * C3.4.1 Add/subtract (immediate)
2712  *
2713  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2714  * +--+--+--+-----------+-----+-------------+-----+-----+
2715  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2716  * +--+--+--+-----------+-----+-------------+-----+-----+
2717  *
2718  *    sf: 0 -> 32bit, 1 -> 64bit
2719  *    op: 0 -> add  , 1 -> sub
2720  *     S: 1 -> set flags
2721  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2722  */
2723 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2724 {
2725     int rd = extract32(insn, 0, 5);
2726     int rn = extract32(insn, 5, 5);
2727     uint64_t imm = extract32(insn, 10, 12);
2728     int shift = extract32(insn, 22, 2);
2729     bool setflags = extract32(insn, 29, 1);
2730     bool sub_op = extract32(insn, 30, 1);
2731     bool is_64bit = extract32(insn, 31, 1);
2732
2733     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2734     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2735     TCGv_i64 tcg_result;
2736
2737     switch (shift) {
2738     case 0x0:
2739         break;
2740     case 0x1:
2741         imm <<= 12;
2742         break;
2743     default:
2744         unallocated_encoding(s);
2745         return;
2746     }
2747
2748     tcg_result = tcg_temp_new_i64();
2749     if (!setflags) {
2750         if (sub_op) {
2751             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2752         } else {
2753             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2754         }
2755     } else {
2756         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2757         if (sub_op) {
2758             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2759         } else {
2760             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2761         }
2762         tcg_temp_free_i64(tcg_imm);
2763     }
2764
2765     if (is_64bit) {
2766         tcg_gen_mov_i64(tcg_rd, tcg_result);
2767     } else {
2768         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2769     }
2770
2771     tcg_temp_free_i64(tcg_result);
2772 }
2773
2774 /* The input should be a value in the bottom e bits (with higher
2775  * bits zero); returns that value replicated into every element
2776  * of size e in a 64 bit integer.
2777  */
2778 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2779 {
2780     assert(e != 0);
2781     while (e < 64) {
2782         mask |= mask << e;
2783         e *= 2;
2784     }
2785     return mask;
2786 }
2787
2788 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2789 static inline uint64_t bitmask64(unsigned int length)
2790 {
2791     assert(length > 0 && length <= 64);
2792     return ~0ULL >> (64 - length);
2793 }
2794
2795 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2796  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2797  * value (ie should cause a guest UNDEF exception), and true if they are
2798  * valid, in which case the decoded bit pattern is written to result.
2799  */
2800 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2801                                    unsigned int imms, unsigned int immr)
2802 {
2803     uint64_t mask;
2804     unsigned e, levels, s, r;
2805     int len;
2806
2807     assert(immn < 2 && imms < 64 && immr < 64);
2808
2809     /* The bit patterns we create here are 64 bit patterns which
2810      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2811      * 64 bits each. Each element contains the same value: a run
2812      * of between 1 and e-1 non-zero bits, rotated within the
2813      * element by between 0 and e-1 bits.
2814      *
2815      * The element size and run length are encoded into immn (1 bit)
2816      * and imms (6 bits) as follows:
2817      * 64 bit elements: immn = 1, imms = <length of run - 1>
2818      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2819      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2820      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2821      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2822      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2823      * Notice that immn = 0, imms = 11111x is the only combination
2824      * not covered by one of the above options; this is reserved.
2825      * Further, <length of run - 1> all-ones is a reserved pattern.
2826      *
2827      * In all cases the rotation is by immr % e (and immr is 6 bits).
2828      */
2829
2830     /* First determine the element size */
2831     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2832     if (len < 1) {
2833         /* This is the immn == 0, imms == 0x11111x case */
2834         return false;
2835     }
2836     e = 1 << len;
2837
2838     levels = e - 1;
2839     s = imms & levels;
2840     r = immr & levels;
2841
2842     if (s == levels) {
2843         /* <length of run - 1> mustn't be all-ones. */
2844         return false;
2845     }
2846
2847     /* Create the value of one element: s+1 set bits rotated
2848      * by r within the element (which is e bits wide)...
2849      */
2850     mask = bitmask64(s + 1);
2851     if (r) {
2852         mask = (mask >> r) | (mask << (e - r));
2853         mask &= bitmask64(e);
2854     }
2855     /* ...then replicate the element over the whole 64 bit value */
2856     mask = bitfield_replicate(mask, e);
2857     *result = mask;
2858     return true;
2859 }
2860
2861 /* C3.4.4 Logical (immediate)
2862  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2863  * +----+-----+-------------+---+------+------+------+------+
2864  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2865  * +----+-----+-------------+---+------+------+------+------+
2866  */
2867 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2868 {
2869     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2870     TCGv_i64 tcg_rd, tcg_rn;
2871     uint64_t wmask;
2872     bool is_and = false;
2873
2874     sf = extract32(insn, 31, 1);
2875     opc = extract32(insn, 29, 2);
2876     is_n = extract32(insn, 22, 1);
2877     immr = extract32(insn, 16, 6);
2878     imms = extract32(insn, 10, 6);
2879     rn = extract32(insn, 5, 5);
2880     rd = extract32(insn, 0, 5);
2881
2882     if (!sf && is_n) {
2883         unallocated_encoding(s);
2884         return;
2885     }
2886
2887     if (opc == 0x3) { /* ANDS */
2888         tcg_rd = cpu_reg(s, rd);
2889     } else {
2890         tcg_rd = cpu_reg_sp(s, rd);
2891     }
2892     tcg_rn = cpu_reg(s, rn);
2893
2894     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2895         /* some immediate field values are reserved */
2896         unallocated_encoding(s);
2897         return;
2898     }
2899
2900     if (!sf) {
2901         wmask &= 0xffffffff;
2902     }
2903
2904     switch (opc) {
2905     case 0x3: /* ANDS */
2906     case 0x0: /* AND */
2907         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2908         is_and = true;
2909         break;
2910     case 0x1: /* ORR */
2911         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2912         break;
2913     case 0x2: /* EOR */
2914         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2915         break;
2916     default:
2917         assert(FALSE); /* must handle all above */
2918         break;
2919     }
2920
2921     if (!sf && !is_and) {
2922         /* zero extend final result; we know we can skip this for AND
2923          * since the immediate had the high 32 bits clear.
2924          */
2925         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2926     }
2927
2928     if (opc == 3) { /* ANDS */
2929         gen_logic_CC(sf, tcg_rd);
2930     }
2931 }
2932
2933 /*
2934  * C3.4.5 Move wide (immediate)
2935  *
2936  *  31 30 29 28         23 22 21 20             5 4    0
2937  * +--+-----+-------------+-----+----------------+------+
2938  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2939  * +--+-----+-------------+-----+----------------+------+
2940  *
2941  * sf: 0 -> 32 bit, 1 -> 64 bit
2942  * opc: 00 -> N, 10 -> Z, 11 -> K
2943  * hw: shift/16 (0,16, and sf only 32, 48)
2944  */
2945 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2946 {
2947     int rd = extract32(insn, 0, 5);
2948     uint64_t imm = extract32(insn, 5, 16);
2949     int sf = extract32(insn, 31, 1);
2950     int opc = extract32(insn, 29, 2);
2951     int pos = extract32(insn, 21, 2) << 4;
2952     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2953     TCGv_i64 tcg_imm;
2954
2955     if (!sf && (pos >= 32)) {
2956         unallocated_encoding(s);
2957         return;
2958     }
2959
2960     switch (opc) {
2961     case 0: /* MOVN */
2962     case 2: /* MOVZ */
2963         imm <<= pos;
2964         if (opc == 0) {
2965             imm = ~imm;
2966         }
2967         if (!sf) {
2968             imm &= 0xffffffffu;
2969         }
2970         tcg_gen_movi_i64(tcg_rd, imm);
2971         break;
2972     case 3: /* MOVK */
2973         tcg_imm = tcg_const_i64(imm);
2974         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2975         tcg_temp_free_i64(tcg_imm);
2976         if (!sf) {
2977             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2978         }
2979         break;
2980     default:
2981         unallocated_encoding(s);
2982         break;
2983     }
2984 }
2985
2986 /* C3.4.2 Bitfield
2987  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2988  * +----+-----+-------------+---+------+------+------+------+
2989  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2990  * +----+-----+-------------+---+------+------+------+------+
2991  */
2992 static void disas_bitfield(DisasContext *s, uint32_t insn)
2993 {
2994     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2995     TCGv_i64 tcg_rd, tcg_tmp;
2996
2997     sf = extract32(insn, 31, 1);
2998     opc = extract32(insn, 29, 2);
2999     n = extract32(insn, 22, 1);
3000     ri = extract32(insn, 16, 6);
3001     si = extract32(insn, 10, 6);
3002     rn = extract32(insn, 5, 5);
3003     rd = extract32(insn, 0, 5);
3004     bitsize = sf ? 64 : 32;
3005
3006     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3007         unallocated_encoding(s);
3008         return;
3009     }
3010
3011     tcg_rd = cpu_reg(s, rd);
3012
3013     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3014        to be smaller than bitsize, we'll never reference data outside the
3015        low 32-bits anyway.  */
3016     tcg_tmp = read_cpu_reg(s, rn, 1);
3017
3018     /* Recognize the common aliases.  */
3019     if (opc == 0) { /* SBFM */
3020         if (ri == 0) {
3021             if (si == 7) { /* SXTB */
3022                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3023                 goto done;
3024             } else if (si == 15) { /* SXTH */
3025                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3026                 goto done;
3027             } else if (si == 31) { /* SXTW */
3028                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3029                 goto done;
3030             }
3031         }
3032         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3033             if (si == 31) {
3034                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3035             }
3036             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3037             goto done;
3038         }
3039     } else if (opc == 2) { /* UBFM */
3040         if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3041             tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3042             return;
3043         }
3044         if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3045             if (si == 31) {
3046                 tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3047             }
3048             tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3049             return;
3050         }
3051         if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3052             int shift = bitsize - 1 - si;
3053             tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3054             goto done;
3055         }
3056     }
3057
3058     if (opc != 1) { /* SBFM or UBFM */
3059         tcg_gen_movi_i64(tcg_rd, 0);
3060     }
3061
3062     /* do the bit move operation */
3063     if (si >= ri) {
3064         /* Wd<s-r:0> = Wn<s:r> */
3065         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3066         pos = 0;
3067         len = (si - ri) + 1;
3068     } else {
3069         /* Wd<32+s-r,32-r> = Wn<s:0> */
3070         pos = bitsize - ri;
3071         len = si + 1;
3072     }
3073
3074     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3075
3076     if (opc == 0) { /* SBFM - sign extend the destination field */
3077         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3078         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3079     }
3080
3081  done:
3082     if (!sf) { /* zero extend final result */
3083         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3084     }
3085 }
3086
3087 /* C3.4.3 Extract
3088  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3089  * +----+------+-------------+---+----+------+--------+------+------+
3090  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3091  * +----+------+-------------+---+----+------+--------+------+------+
3092  */
3093 static void disas_extract(DisasContext *s, uint32_t insn)
3094 {
3095     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3096
3097     sf = extract32(insn, 31, 1);
3098     n = extract32(insn, 22, 1);
3099     rm = extract32(insn, 16, 5);
3100     imm = extract32(insn, 10, 6);
3101     rn = extract32(insn, 5, 5);
3102     rd = extract32(insn, 0, 5);
3103     op21 = extract32(insn, 29, 2);
3104     op0 = extract32(insn, 21, 1);
3105     bitsize = sf ? 64 : 32;
3106
3107     if (sf != n || op21 || op0 || imm >= bitsize) {
3108         unallocated_encoding(s);
3109     } else {
3110         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3111
3112         tcg_rd = cpu_reg(s, rd);
3113
3114         if (unlikely(imm == 0)) {
3115             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3116              * so an extract from bit 0 is a special case.
3117              */
3118             if (sf) {
3119                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3120             } else {
3121                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3122             }
3123         } else if (rm == rn) { /* ROR */
3124             tcg_rm = cpu_reg(s, rm);
3125             if (sf) {
3126                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3127             } else {
3128                 TCGv_i32 tmp = tcg_temp_new_i32();
3129                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3130                 tcg_gen_rotri_i32(tmp, tmp, imm);
3131                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3132                 tcg_temp_free_i32(tmp);
3133             }
3134         } else {
3135             tcg_rm = read_cpu_reg(s, rm, sf);
3136             tcg_rn = read_cpu_reg(s, rn, sf);
3137             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3138             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3139             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3140             if (!sf) {
3141                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3142             }
3143         }
3144     }
3145 }
3146
3147 /* C3.4 Data processing - immediate */
3148 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3149 {
3150     switch (extract32(insn, 23, 6)) {
3151     case 0x20: case 0x21: /* PC-rel. addressing */
3152         disas_pc_rel_adr(s, insn);
3153         break;
3154     case 0x22: case 0x23: /* Add/subtract (immediate) */
3155         disas_add_sub_imm(s, insn);
3156         break;
3157     case 0x24: /* Logical (immediate) */
3158         disas_logic_imm(s, insn);
3159         break;
3160     case 0x25: /* Move wide (immediate) */
3161         disas_movw_imm(s, insn);
3162         break;
3163     case 0x26: /* Bitfield */
3164         disas_bitfield(s, insn);
3165         break;
3166     case 0x27: /* Extract */
3167         disas_extract(s, insn);
3168         break;
3169     default:
3170         unallocated_encoding(s);
3171         break;
3172     }
3173 }
3174
3175 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3176  * Note that it is the caller's responsibility to ensure that the
3177  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3178  * mandated semantics for out of range shifts.
3179  */
3180 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3181                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3182 {
3183     switch (shift_type) {
3184     case A64_SHIFT_TYPE_LSL:
3185         tcg_gen_shl_i64(dst, src, shift_amount);
3186         break;
3187     case A64_SHIFT_TYPE_LSR:
3188         tcg_gen_shr_i64(dst, src, shift_amount);
3189         break;
3190     case A64_SHIFT_TYPE_ASR:
3191         if (!sf) {
3192             tcg_gen_ext32s_i64(dst, src);
3193         }
3194         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3195         break;
3196     case A64_SHIFT_TYPE_ROR:
3197         if (sf) {
3198             tcg_gen_rotr_i64(dst, src, shift_amount);
3199         } else {
3200             TCGv_i32 t0, t1;
3201             t0 = tcg_temp_new_i32();
3202             t1 = tcg_temp_new_i32();
3203             tcg_gen_extrl_i64_i32(t0, src);
3204             tcg_gen_extrl_i64_i32(t1, shift_amount);
3205             tcg_gen_rotr_i32(t0, t0, t1);
3206             tcg_gen_extu_i32_i64(dst, t0);
3207             tcg_temp_free_i32(t0);
3208             tcg_temp_free_i32(t1);
3209         }
3210         break;
3211     default:
3212         assert(FALSE); /* all shift types should be handled */
3213         break;
3214     }
3215
3216     if (!sf) { /* zero extend final result */
3217         tcg_gen_ext32u_i64(dst, dst);
3218     }
3219 }
3220
3221 /* Shift a TCGv src by immediate, put result in dst.
3222  * The shift amount must be in range (this should always be true as the
3223  * relevant instructions will UNDEF on bad shift immediates).
3224  */
3225 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3226                           enum a64_shift_type shift_type, unsigned int shift_i)
3227 {
3228     assert(shift_i < (sf ? 64 : 32));
3229
3230     if (shift_i == 0) {
3231         tcg_gen_mov_i64(dst, src);
3232     } else {
3233         TCGv_i64 shift_const;
3234
3235         shift_const = tcg_const_i64(shift_i);
3236         shift_reg(dst, src, sf, shift_type, shift_const);
3237         tcg_temp_free_i64(shift_const);
3238     }
3239 }
3240
3241 /* C3.5.10 Logical (shifted register)
3242  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3243  * +----+-----+-----------+-------+---+------+--------+------+------+
3244  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3245  * +----+-----+-----------+-------+---+------+--------+------+------+
3246  */
3247 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3248 {
3249     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3250     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3251
3252     sf = extract32(insn, 31, 1);
3253     opc = extract32(insn, 29, 2);
3254     shift_type = extract32(insn, 22, 2);
3255     invert = extract32(insn, 21, 1);
3256     rm = extract32(insn, 16, 5);
3257     shift_amount = extract32(insn, 10, 6);
3258     rn = extract32(insn, 5, 5);
3259     rd = extract32(insn, 0, 5);
3260
3261     if (!sf && (shift_amount & (1 << 5))) {
3262         unallocated_encoding(s);
3263         return;
3264     }
3265
3266     tcg_rd = cpu_reg(s, rd);
3267
3268     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3269         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3270          * register-register MOV and MVN, so it is worth special casing.
3271          */
3272         tcg_rm = cpu_reg(s, rm);
3273         if (invert) {
3274             tcg_gen_not_i64(tcg_rd, tcg_rm);
3275             if (!sf) {
3276                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3277             }
3278         } else {
3279             if (sf) {
3280                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3281             } else {
3282                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3283             }
3284         }
3285         return;
3286     }
3287
3288     tcg_rm = read_cpu_reg(s, rm, sf);
3289
3290     if (shift_amount) {
3291         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3292     }
3293
3294     tcg_rn = cpu_reg(s, rn);
3295
3296     switch (opc | (invert << 2)) {
3297     case 0: /* AND */
3298     case 3: /* ANDS */
3299         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3300         break;
3301     case 1: /* ORR */
3302         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3303         break;
3304     case 2: /* EOR */
3305         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3306         break;
3307     case 4: /* BIC */
3308     case 7: /* BICS */
3309         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3310         break;
3311     case 5: /* ORN */
3312         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3313         break;
3314     case 6: /* EON */
3315         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3316         break;
3317     default:
3318         assert(FALSE);
3319         break;
3320     }
3321
3322     if (!sf) {
3323         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3324     }
3325
3326     if (opc == 3) {
3327         gen_logic_CC(sf, tcg_rd);
3328     }
3329 }
3330
3331 /*
3332  * C3.5.1 Add/subtract (extended register)
3333  *
3334  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3335  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3336  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3337  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3338  *
3339  *  sf: 0 -> 32bit, 1 -> 64bit
3340  *  op: 0 -> add  , 1 -> sub
3341  *   S: 1 -> set flags
3342  * opt: 00
3343  * option: extension type (see DecodeRegExtend)
3344  * imm3: optional shift to Rm
3345  *
3346  * Rd = Rn + LSL(extend(Rm), amount)
3347  */
3348 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3349 {
3350     int rd = extract32(insn, 0, 5);
3351     int rn = extract32(insn, 5, 5);
3352     int imm3 = extract32(insn, 10, 3);
3353     int option = extract32(insn, 13, 3);
3354     int rm = extract32(insn, 16, 5);
3355     bool setflags = extract32(insn, 29, 1);
3356     bool sub_op = extract32(insn, 30, 1);
3357     bool sf = extract32(insn, 31, 1);
3358
3359     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3360     TCGv_i64 tcg_rd;
3361     TCGv_i64 tcg_result;
3362
3363     if (imm3 > 4) {
3364         unallocated_encoding(s);
3365         return;
3366     }
3367
3368     /* non-flag setting ops may use SP */
3369     if (!setflags) {
3370         tcg_rd = cpu_reg_sp(s, rd);
3371     } else {
3372         tcg_rd = cpu_reg(s, rd);
3373     }
3374     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3375
3376     tcg_rm = read_cpu_reg(s, rm, sf);
3377     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3378
3379     tcg_result = tcg_temp_new_i64();
3380
3381     if (!setflags) {
3382         if (sub_op) {
3383             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3384         } else {
3385             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3386         }
3387     } else {
3388         if (sub_op) {
3389             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3390         } else {
3391             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3392         }
3393     }
3394
3395     if (sf) {
3396         tcg_gen_mov_i64(tcg_rd, tcg_result);
3397     } else {
3398         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3399     }
3400
3401     tcg_temp_free_i64(tcg_result);
3402 }
3403
3404 /*
3405  * C3.5.2 Add/subtract (shifted register)
3406  *
3407  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3408  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3409  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3410  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3411  *
3412  *    sf: 0 -> 32bit, 1 -> 64bit
3413  *    op: 0 -> add  , 1 -> sub
3414  *     S: 1 -> set flags
3415  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3416  *  imm6: Shift amount to apply to Rm before the add/sub
3417  */
3418 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3419 {
3420     int rd = extract32(insn, 0, 5);
3421     int rn = extract32(insn, 5, 5);
3422     int imm6 = extract32(insn, 10, 6);
3423     int rm = extract32(insn, 16, 5);
3424     int shift_type = extract32(insn, 22, 2);
3425     bool setflags = extract32(insn, 29, 1);
3426     bool sub_op = extract32(insn, 30, 1);
3427     bool sf = extract32(insn, 31, 1);
3428
3429     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3430     TCGv_i64 tcg_rn, tcg_rm;
3431     TCGv_i64 tcg_result;
3432
3433     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3434         unallocated_encoding(s);
3435         return;
3436     }
3437
3438     tcg_rn = read_cpu_reg(s, rn, sf);
3439     tcg_rm = read_cpu_reg(s, rm, sf);
3440
3441     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3442
3443     tcg_result = tcg_temp_new_i64();
3444
3445     if (!setflags) {
3446         if (sub_op) {
3447             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3448         } else {
3449             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3450         }
3451     } else {
3452         if (sub_op) {
3453             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3454         } else {
3455             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3456         }
3457     }
3458
3459     if (sf) {
3460         tcg_gen_mov_i64(tcg_rd, tcg_result);
3461     } else {
3462         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3463     }
3464
3465     tcg_temp_free_i64(tcg_result);
3466 }
3467
3468 /* C3.5.9 Data-processing (3 source)
3469
3470    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3471   +--+------+-----------+------+------+----+------+------+------+
3472   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3473   +--+------+-----------+------+------+----+------+------+------+
3474
3475  */
3476 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3477 {
3478     int rd = extract32(insn, 0, 5);
3479     int rn = extract32(insn, 5, 5);
3480     int ra = extract32(insn, 10, 5);
3481     int rm = extract32(insn, 16, 5);
3482     int op_id = (extract32(insn, 29, 3) << 4) |
3483         (extract32(insn, 21, 3) << 1) |
3484         extract32(insn, 15, 1);
3485     bool sf = extract32(insn, 31, 1);
3486     bool is_sub = extract32(op_id, 0, 1);
3487     bool is_high = extract32(op_id, 2, 1);
3488     bool is_signed = false;
3489     TCGv_i64 tcg_op1;
3490     TCGv_i64 tcg_op2;
3491     TCGv_i64 tcg_tmp;
3492
3493     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3494     switch (op_id) {
3495     case 0x42: /* SMADDL */
3496     case 0x43: /* SMSUBL */
3497     case 0x44: /* SMULH */
3498         is_signed = true;
3499         break;
3500     case 0x0: /* MADD (32bit) */
3501     case 0x1: /* MSUB (32bit) */
3502     case 0x40: /* MADD (64bit) */
3503     case 0x41: /* MSUB (64bit) */
3504     case 0x4a: /* UMADDL */
3505     case 0x4b: /* UMSUBL */
3506     case 0x4c: /* UMULH */
3507         break;
3508     default:
3509         unallocated_encoding(s);
3510         return;
3511     }
3512
3513     if (is_high) {
3514         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3515         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3516         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3517         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3518
3519         if (is_signed) {
3520             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3521         } else {
3522             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3523         }
3524
3525         tcg_temp_free_i64(low_bits);
3526         return;
3527     }
3528
3529     tcg_op1 = tcg_temp_new_i64();
3530     tcg_op2 = tcg_temp_new_i64();
3531     tcg_tmp = tcg_temp_new_i64();
3532
3533     if (op_id < 0x42) {
3534         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3535         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3536     } else {
3537         if (is_signed) {
3538             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3539             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3540         } else {
3541             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3542             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3543         }
3544     }
3545
3546     if (ra == 31 && !is_sub) {
3547         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3548         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3549     } else {
3550         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3551         if (is_sub) {
3552             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3553         } else {
3554             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3555         }
3556     }
3557
3558     if (!sf) {
3559         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3560     }
3561
3562     tcg_temp_free_i64(tcg_op1);
3563     tcg_temp_free_i64(tcg_op2);
3564     tcg_temp_free_i64(tcg_tmp);
3565 }
3566
3567 /* C3.5.3 - Add/subtract (with carry)
3568  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3569  * +--+--+--+------------------------+------+---------+------+-----+
3570  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3571  * +--+--+--+------------------------+------+---------+------+-----+
3572  *                                            [000000]
3573  */
3574
3575 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3576 {
3577     unsigned int sf, op, setflags, rm, rn, rd;
3578     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3579
3580     if (extract32(insn, 10, 6) != 0) {
3581         unallocated_encoding(s);
3582         return;
3583     }
3584
3585     sf = extract32(insn, 31, 1);
3586     op = extract32(insn, 30, 1);
3587     setflags = extract32(insn, 29, 1);
3588     rm = extract32(insn, 16, 5);
3589     rn = extract32(insn, 5, 5);
3590     rd = extract32(insn, 0, 5);
3591
3592     tcg_rd = cpu_reg(s, rd);
3593     tcg_rn = cpu_reg(s, rn);
3594
3595     if (op) {
3596         tcg_y = new_tmp_a64(s);
3597         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3598     } else {
3599         tcg_y = cpu_reg(s, rm);
3600     }
3601
3602     if (setflags) {
3603         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3604     } else {
3605         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3606     }
3607 }
3608
3609 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3610  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3611  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3612  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3613  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3614  *        [1]                             y                [0]       [0]
3615  */
3616 static void disas_cc(DisasContext *s, uint32_t insn)
3617 {
3618     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3619     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3620     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3621     DisasCompare c;
3622
3623     if (!extract32(insn, 29, 1)) {
3624         unallocated_encoding(s);
3625         return;
3626     }
3627     if (insn & (1 << 10 | 1 << 4)) {
3628         unallocated_encoding(s);
3629         return;
3630     }
3631     sf = extract32(insn, 31, 1);
3632     op = extract32(insn, 30, 1);
3633     is_imm = extract32(insn, 11, 1);
3634     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3635     cond = extract32(insn, 12, 4);
3636     rn = extract32(insn, 5, 5);
3637     nzcv = extract32(insn, 0, 4);
3638
3639     /* Set T0 = !COND.  */
3640     tcg_t0 = tcg_temp_new_i32();
3641     arm_test_cc(&c, cond);
3642     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3643     arm_free_cc(&c);
3644
3645     /* Load the arguments for the new comparison.  */
3646     if (is_imm) {
3647         tcg_y = new_tmp_a64(s);
3648         tcg_gen_movi_i64(tcg_y, y);
3649     } else {
3650         tcg_y = cpu_reg(s, y);
3651     }
3652     tcg_rn = cpu_reg(s, rn);
3653
3654     /* Set the flags for the new comparison.  */
3655     tcg_tmp = tcg_temp_new_i64();
3656     if (op) {
3657         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3658     } else {
3659         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3660     }
3661     tcg_temp_free_i64(tcg_tmp);
3662
3663     /* If COND was false, force the flags to #nzcv.  Compute two masks
3664      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3665      * For tcg hosts that support ANDC, we can make do with just T1.
3666      * In either case, allow the tcg optimizer to delete any unused mask.
3667      */
3668     tcg_t1 = tcg_temp_new_i32();
3669     tcg_t2 = tcg_temp_new_i32();
3670     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3671     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3672
3673     if (nzcv & 8) { /* N */
3674         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3675     } else {
3676         if (TCG_TARGET_HAS_andc_i32) {
3677             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3678         } else {
3679             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3680         }
3681     }
3682     if (nzcv & 4) { /* Z */
3683         if (TCG_TARGET_HAS_andc_i32) {
3684             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3685         } else {
3686             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3687         }
3688     } else {
3689         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3690     }
3691     if (nzcv & 2) { /* C */
3692         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3693     } else {
3694         if (TCG_TARGET_HAS_andc_i32) {
3695             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3696         } else {
3697             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3698         }
3699     }
3700     if (nzcv & 1) { /* V */
3701         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3702     } else {
3703         if (TCG_TARGET_HAS_andc_i32) {
3704             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3705         } else {
3706             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3707         }
3708     }
3709     tcg_temp_free_i32(tcg_t0);
3710     tcg_temp_free_i32(tcg_t1);
3711     tcg_temp_free_i32(tcg_t2);
3712 }
3713
3714 /* C3.5.6 Conditional select
3715  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3716  * +----+----+---+-----------------+------+------+-----+------+------+
3717  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3718  * +----+----+---+-----------------+------+------+-----+------+------+
3719  */
3720 static void disas_cond_select(DisasContext *s, uint32_t insn)
3721 {
3722     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3723     TCGv_i64 tcg_rd, zero;
3724     DisasCompare64 c;
3725
3726     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3727         /* S == 1 or op2<1> == 1 */
3728         unallocated_encoding(s);
3729         return;
3730     }
3731     sf = extract32(insn, 31, 1);
3732     else_inv = extract32(insn, 30, 1);
3733     rm = extract32(insn, 16, 5);
3734     cond = extract32(insn, 12, 4);
3735     else_inc = extract32(insn, 10, 1);
3736     rn = extract32(insn, 5, 5);
3737     rd = extract32(insn, 0, 5);
3738
3739     tcg_rd = cpu_reg(s, rd);
3740
3741     a64_test_cc(&c, cond);
3742     zero = tcg_const_i64(0);
3743
3744     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3745         /* CSET & CSETM.  */
3746         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3747         if (else_inv) {
3748             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3749         }
3750     } else {
3751         TCGv_i64 t_true = cpu_reg(s, rn);
3752         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3753         if (else_inv && else_inc) {
3754             tcg_gen_neg_i64(t_false, t_false);
3755         } else if (else_inv) {
3756             tcg_gen_not_i64(t_false, t_false);
3757         } else if (else_inc) {
3758             tcg_gen_addi_i64(t_false, t_false, 1);
3759         }
3760         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3761     }
3762
3763     tcg_temp_free_i64(zero);
3764     a64_free_cc(&c);
3765
3766     if (!sf) {
3767         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3768     }
3769 }
3770
3771 static void handle_clz(DisasContext *s, unsigned int sf,
3772                        unsigned int rn, unsigned int rd)
3773 {
3774     TCGv_i64 tcg_rd, tcg_rn;
3775     tcg_rd = cpu_reg(s, rd);
3776     tcg_rn = cpu_reg(s, rn);
3777
3778     if (sf) {
3779         gen_helper_clz64(tcg_rd, tcg_rn);
3780     } else {
3781         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3782         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3783         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3784         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3785         tcg_temp_free_i32(tcg_tmp32);
3786     }
3787 }
3788
3789 static void handle_cls(DisasContext *s, unsigned int sf,
3790                        unsigned int rn, unsigned int rd)
3791 {
3792     TCGv_i64 tcg_rd, tcg_rn;
3793     tcg_rd = cpu_reg(s, rd);
3794     tcg_rn = cpu_reg(s, rn);
3795
3796     if (sf) {
3797         gen_helper_cls64(tcg_rd, tcg_rn);
3798     } else {
3799         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3800         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3801         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3802         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3803         tcg_temp_free_i32(tcg_tmp32);
3804     }
3805 }
3806
3807 static void handle_rbit(DisasContext *s, unsigned int sf,
3808                         unsigned int rn, unsigned int rd)
3809 {
3810     TCGv_i64 tcg_rd, tcg_rn;
3811     tcg_rd = cpu_reg(s, rd);
3812     tcg_rn = cpu_reg(s, rn);
3813
3814     if (sf) {
3815         gen_helper_rbit64(tcg_rd, tcg_rn);
3816     } else {
3817         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3818         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3819         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3820         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3821         tcg_temp_free_i32(tcg_tmp32);
3822     }
3823 }
3824
3825 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3826 static void handle_rev64(DisasContext *s, unsigned int sf,
3827                          unsigned int rn, unsigned int rd)
3828 {
3829     if (!sf) {
3830         unallocated_encoding(s);
3831         return;
3832     }
3833     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3834 }
3835
3836 /* C5.6.149 REV with sf==0, opcode==2
3837  * C5.6.151 REV32 (sf==1, opcode==2)
3838  */
3839 static void handle_rev32(DisasContext *s, unsigned int sf,
3840                          unsigned int rn, unsigned int rd)
3841 {
3842     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3843
3844     if (sf) {
3845         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3846         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3847
3848         /* bswap32_i64 requires zero high word */
3849         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3850         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3851         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3852         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3853         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3854
3855         tcg_temp_free_i64(tcg_tmp);
3856     } else {
3857         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3858         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3859     }
3860 }
3861
3862 /* C5.6.150 REV16 (opcode==1) */
3863 static void handle_rev16(DisasContext *s, unsigned int sf,
3864                          unsigned int rn, unsigned int rd)
3865 {
3866     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3867     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3868     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3869
3870     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3871     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3872
3873     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3874     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3875     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3876     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3877
3878     if (sf) {
3879         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3880         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3881         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3882         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3883
3884         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3885         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3886         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3887     }
3888
3889     tcg_temp_free_i64(tcg_tmp);
3890 }
3891
3892 /* C3.5.7 Data-processing (1 source)
3893  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3894  * +----+---+---+-----------------+---------+--------+------+------+
3895  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3896  * +----+---+---+-----------------+---------+--------+------+------+
3897  */
3898 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3899 {
3900     unsigned int sf, opcode, rn, rd;
3901
3902     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3903         unallocated_encoding(s);
3904         return;
3905     }
3906
3907     sf = extract32(insn, 31, 1);
3908     opcode = extract32(insn, 10, 6);
3909     rn = extract32(insn, 5, 5);
3910     rd = extract32(insn, 0, 5);
3911
3912     switch (opcode) {
3913     case 0: /* RBIT */
3914         handle_rbit(s, sf, rn, rd);
3915         break;
3916     case 1: /* REV16 */
3917         handle_rev16(s, sf, rn, rd);
3918         break;
3919     case 2: /* REV32 */
3920         handle_rev32(s, sf, rn, rd);
3921         break;
3922     case 3: /* REV64 */
3923         handle_rev64(s, sf, rn, rd);
3924         break;
3925     case 4: /* CLZ */
3926         handle_clz(s, sf, rn, rd);
3927         break;
3928     case 5: /* CLS */
3929         handle_cls(s, sf, rn, rd);
3930         break;
3931     }
3932 }
3933
3934 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3935                        unsigned int rm, unsigned int rn, unsigned int rd)
3936 {
3937     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3938     tcg_rd = cpu_reg(s, rd);
3939
3940     if (!sf && is_signed) {
3941         tcg_n = new_tmp_a64(s);
3942         tcg_m = new_tmp_a64(s);
3943         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3944         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3945     } else {
3946         tcg_n = read_cpu_reg(s, rn, sf);
3947         tcg_m = read_cpu_reg(s, rm, sf);
3948     }
3949
3950     if (is_signed) {
3951         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3952     } else {
3953         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3954     }
3955
3956     if (!sf) { /* zero extend final result */
3957         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3958     }
3959 }
3960
3961 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3962 static void handle_shift_reg(DisasContext *s,
3963                              enum a64_shift_type shift_type, unsigned int sf,
3964                              unsigned int rm, unsigned int rn, unsigned int rd)
3965 {
3966     TCGv_i64 tcg_shift = tcg_temp_new_i64();
3967     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3968     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3969
3970     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3971     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3972     tcg_temp_free_i64(tcg_shift);
3973 }
3974
3975 /* CRC32[BHWX], CRC32C[BHWX] */
3976 static void handle_crc32(DisasContext *s,
3977                          unsigned int sf, unsigned int sz, bool crc32c,
3978                          unsigned int rm, unsigned int rn, unsigned int rd)
3979 {
3980     TCGv_i64 tcg_acc, tcg_val;
3981     TCGv_i32 tcg_bytes;
3982
3983     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3984         || (sf == 1 && sz != 3)
3985         || (sf == 0 && sz == 3)) {
3986         unallocated_encoding(s);
3987         return;
3988     }
3989
3990     if (sz == 3) {
3991         tcg_val = cpu_reg(s, rm);
3992     } else {
3993         uint64_t mask;
3994         switch (sz) {
3995         case 0:
3996             mask = 0xFF;
3997             break;
3998         case 1:
3999             mask = 0xFFFF;
4000             break;
4001         case 2:
4002             mask = 0xFFFFFFFF;
4003             break;
4004         default:
4005             g_assert_not_reached();
4006         }
4007         tcg_val = new_tmp_a64(s);
4008         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4009     }
4010
4011     tcg_acc = cpu_reg(s, rn);
4012     tcg_bytes = tcg_const_i32(1 << sz);
4013
4014     if (crc32c) {
4015         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4016     } else {
4017         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4018     }
4019
4020     tcg_temp_free_i32(tcg_bytes);
4021 }
4022
4023 /* C3.5.8 Data-processing (2 source)
4024  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4025  * +----+---+---+-----------------+------+--------+------+------+
4026  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4027  * +----+---+---+-----------------+------+--------+------+------+
4028  */
4029 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4030 {
4031     unsigned int sf, rm, opcode, rn, rd;
4032     sf = extract32(insn, 31, 1);
4033     rm = extract32(insn, 16, 5);
4034     opcode = extract32(insn, 10, 6);
4035     rn = extract32(insn, 5, 5);
4036     rd = extract32(insn, 0, 5);
4037
4038     if (extract32(insn, 29, 1)) {
4039         unallocated_encoding(s);
4040         return;
4041     }
4042
4043     switch (opcode) {
4044     case 2: /* UDIV */
4045         handle_div(s, false, sf, rm, rn, rd);
4046         break;
4047     case 3: /* SDIV */
4048         handle_div(s, true, sf, rm, rn, rd);
4049         break;
4050     case 8: /* LSLV */
4051         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4052         break;
4053     case 9: /* LSRV */
4054         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4055         break;
4056     case 10: /* ASRV */
4057         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4058         break;
4059     case 11: /* RORV */
4060         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4061         break;
4062     case 16:
4063     case 17:
4064     case 18:
4065     case 19:
4066     case 20:
4067     case 21:
4068     case 22:
4069     case 23: /* CRC32 */
4070     {
4071         int sz = extract32(opcode, 0, 2);
4072         bool crc32c = extract32(opcode, 2, 1);
4073         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4074         break;
4075     }
4076     default:
4077         unallocated_encoding(s);
4078         break;
4079     }
4080 }
4081
4082 /* C3.5 Data processing - register */
4083 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4084 {
4085     switch (extract32(insn, 24, 5)) {
4086     case 0x0a: /* Logical (shifted register) */
4087         disas_logic_reg(s, insn);
4088         break;
4089     case 0x0b: /* Add/subtract */
4090         if (insn & (1 << 21)) { /* (extended register) */
4091             disas_add_sub_ext_reg(s, insn);
4092         } else {
4093             disas_add_sub_reg(s, insn);
4094         }
4095         break;
4096     case 0x1b: /* Data-processing (3 source) */
4097         disas_data_proc_3src(s, insn);
4098         break;
4099     case 0x1a:
4100         switch (extract32(insn, 21, 3)) {
4101         case 0x0: /* Add/subtract (with carry) */
4102             disas_adc_sbc(s, insn);
4103             break;
4104         case 0x2: /* Conditional compare */
4105             disas_cc(s, insn); /* both imm and reg forms */
4106             break;
4107         case 0x4: /* Conditional select */
4108             disas_cond_select(s, insn);
4109             break;
4110         case 0x6: /* Data-processing */
4111             if (insn & (1 << 30)) { /* (1 source) */
4112                 disas_data_proc_1src(s, insn);
4113             } else {            /* (2 source) */
4114                 disas_data_proc_2src(s, insn);
4115             }
4116             break;
4117         default:
4118             unallocated_encoding(s);
4119             break;
4120         }
4121         break;
4122     default:
4123         unallocated_encoding(s);
4124         break;
4125     }
4126 }
4127
4128 static void handle_fp_compare(DisasContext *s, bool is_double,
4129                               unsigned int rn, unsigned int rm,
4130                               bool cmp_with_zero, bool signal_all_nans)
4131 {
4132     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4133     TCGv_ptr fpst = get_fpstatus_ptr();
4134
4135     if (is_double) {
4136         TCGv_i64 tcg_vn, tcg_vm;
4137
4138         tcg_vn = read_fp_dreg(s, rn);
4139         if (cmp_with_zero) {
4140             tcg_vm = tcg_const_i64(0);
4141         } else {
4142             tcg_vm = read_fp_dreg(s, rm);
4143         }
4144         if (signal_all_nans) {
4145             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4146         } else {
4147             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4148         }
4149         tcg_temp_free_i64(tcg_vn);
4150         tcg_temp_free_i64(tcg_vm);
4151     } else {
4152         TCGv_i32 tcg_vn, tcg_vm;
4153
4154         tcg_vn = read_fp_sreg(s, rn);
4155         if (cmp_with_zero) {
4156             tcg_vm = tcg_const_i32(0);
4157         } else {
4158             tcg_vm = read_fp_sreg(s, rm);
4159         }
4160         if (signal_all_nans) {
4161             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4162         } else {
4163             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4164         }
4165         tcg_temp_free_i32(tcg_vn);
4166         tcg_temp_free_i32(tcg_vm);
4167     }
4168
4169     tcg_temp_free_ptr(fpst);
4170
4171     gen_set_nzcv(tcg_flags);
4172
4173     tcg_temp_free_i64(tcg_flags);
4174 }
4175
4176 /* C3.6.22 Floating point compare
4177  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4178  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4179  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4180  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4181  */
4182 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4183 {
4184     unsigned int mos, type, rm, op, rn, opc, op2r;
4185
4186     mos = extract32(insn, 29, 3);
4187     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4188     rm = extract32(insn, 16, 5);
4189     op = extract32(insn, 14, 2);
4190     rn = extract32(insn, 5, 5);
4191     opc = extract32(insn, 3, 2);
4192     op2r = extract32(insn, 0, 3);
4193
4194     if (mos || op || op2r || type > 1) {
4195         unallocated_encoding(s);
4196         return;
4197     }
4198
4199     if (!fp_access_check(s)) {
4200         return;
4201     }
4202
4203     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4204 }
4205
4206 /* C3.6.23 Floating point conditional compare
4207  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4208  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4209  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4210  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4211  */
4212 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4213 {
4214     unsigned int mos, type, rm, cond, rn, op, nzcv;
4215     TCGv_i64 tcg_flags;
4216     TCGLabel *label_continue = NULL;
4217
4218     mos = extract32(insn, 29, 3);
4219     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4220     rm = extract32(insn, 16, 5);
4221     cond = extract32(insn, 12, 4);
4222     rn = extract32(insn, 5, 5);
4223     op = extract32(insn, 4, 1);
4224     nzcv = extract32(insn, 0, 4);
4225
4226     if (mos || type > 1) {
4227         unallocated_encoding(s);
4228         return;
4229     }
4230
4231     if (!fp_access_check(s)) {
4232         return;
4233     }
4234
4235     if (cond < 0x0e) { /* not always */
4236         TCGLabel *label_match = gen_new_label();
4237         label_continue = gen_new_label();
4238         arm_gen_test_cc(cond, label_match);
4239         /* nomatch: */
4240         tcg_flags = tcg_const_i64(nzcv << 28);
4241         gen_set_nzcv(tcg_flags);
4242         tcg_temp_free_i64(tcg_flags);
4243         tcg_gen_br(label_continue);
4244         gen_set_label(label_match);
4245     }
4246
4247     handle_fp_compare(s, type, rn, rm, false, op);
4248
4249     if (cond < 0x0e) {
4250         gen_set_label(label_continue);
4251     }
4252 }
4253
4254 /* C3.6.24 Floating point conditional select
4255  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4256  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4257  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4258  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4259  */
4260 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4261 {
4262     unsigned int mos, type, rm, cond, rn, rd;
4263     TCGv_i64 t_true, t_false, t_zero;
4264     DisasCompare64 c;
4265
4266     mos = extract32(insn, 29, 3);
4267     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4268     rm = extract32(insn, 16, 5);
4269     cond = extract32(insn, 12, 4);
4270     rn = extract32(insn, 5, 5);
4271     rd = extract32(insn, 0, 5);
4272
4273     if (mos || type > 1) {
4274         unallocated_encoding(s);
4275         return;
4276     }
4277
4278     if (!fp_access_check(s)) {
4279         return;
4280     }
4281
4282     /* Zero extend sreg inputs to 64 bits now.  */
4283     t_true = tcg_temp_new_i64();
4284     t_false = tcg_temp_new_i64();
4285     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4286     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4287
4288     a64_test_cc(&c, cond);
4289     t_zero = tcg_const_i64(0);
4290     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4291     tcg_temp_free_i64(t_zero);
4292     tcg_temp_free_i64(t_false);
4293     a64_free_cc(&c);
4294
4295     /* Note that sregs write back zeros to the high bits,
4296        and we've already done the zero-extension.  */
4297     write_fp_dreg(s, rd, t_true);
4298     tcg_temp_free_i64(t_true);
4299 }
4300
4301 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4302 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4303 {
4304     TCGv_ptr fpst;
4305     TCGv_i32 tcg_op;
4306     TCGv_i32 tcg_res;
4307
4308     fpst = get_fpstatus_ptr();
4309     tcg_op = read_fp_sreg(s, rn);
4310     tcg_res = tcg_temp_new_i32();
4311
4312     switch (opcode) {
4313     case 0x0: /* FMOV */
4314         tcg_gen_mov_i32(tcg_res, tcg_op);
4315         break;
4316     case 0x1: /* FABS */
4317         gen_helper_vfp_abss(tcg_res, tcg_op);
4318         break;
4319     case 0x2: /* FNEG */
4320         gen_helper_vfp_negs(tcg_res, tcg_op);
4321         break;
4322     case 0x3: /* FSQRT */
4323         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4324         break;
4325     case 0x8: /* FRINTN */
4326     case 0x9: /* FRINTP */
4327     case 0xa: /* FRINTM */
4328     case 0xb: /* FRINTZ */
4329     case 0xc: /* FRINTA */
4330     {
4331         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4332
4333         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4334         gen_helper_rints(tcg_res, tcg_op, fpst);
4335
4336         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4337         tcg_temp_free_i32(tcg_rmode);
4338         break;
4339     }
4340     case 0xe: /* FRINTX */
4341         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4342         break;
4343     case 0xf: /* FRINTI */
4344         gen_helper_rints(tcg_res, tcg_op, fpst);
4345         break;
4346     default:
4347         abort();
4348     }
4349
4350     write_fp_sreg(s, rd, tcg_res);
4351
4352     tcg_temp_free_ptr(fpst);
4353     tcg_temp_free_i32(tcg_op);
4354     tcg_temp_free_i32(tcg_res);
4355 }
4356
4357 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4358 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4359 {
4360     TCGv_ptr fpst;
4361     TCGv_i64 tcg_op;
4362     TCGv_i64 tcg_res;
4363
4364     fpst = get_fpstatus_ptr();
4365     tcg_op = read_fp_dreg(s, rn);
4366     tcg_res = tcg_temp_new_i64();
4367
4368     switch (opcode) {
4369     case 0x0: /* FMOV */
4370         tcg_gen_mov_i64(tcg_res, tcg_op);
4371         break;
4372     case 0x1: /* FABS */
4373         gen_helper_vfp_absd(tcg_res, tcg_op);
4374         break;
4375     case 0x2: /* FNEG */
4376         gen_helper_vfp_negd(tcg_res, tcg_op);
4377         break;
4378     case 0x3: /* FSQRT */
4379         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4380         break;
4381     case 0x8: /* FRINTN */
4382     case 0x9: /* FRINTP */
4383     case 0xa: /* FRINTM */
4384     case 0xb: /* FRINTZ */
4385     case 0xc: /* FRINTA */
4386     {
4387         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4388
4389         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4390         gen_helper_rintd(tcg_res, tcg_op, fpst);
4391
4392         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4393         tcg_temp_free_i32(tcg_rmode);
4394         break;
4395     }
4396     case 0xe: /* FRINTX */
4397         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4398         break;
4399     case 0xf: /* FRINTI */
4400         gen_helper_rintd(tcg_res, tcg_op, fpst);
4401         break;
4402     default:
4403         abort();
4404     }
4405
4406     write_fp_dreg(s, rd, tcg_res);
4407
4408     tcg_temp_free_ptr(fpst);
4409     tcg_temp_free_i64(tcg_op);
4410     tcg_temp_free_i64(tcg_res);
4411 }
4412
4413 static void handle_fp_fcvt(DisasContext *s, int opcode,
4414                            int rd, int rn, int dtype, int ntype)
4415 {
4416     switch (ntype) {
4417     case 0x0:
4418     {
4419         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4420         if (dtype == 1) {
4421             /* Single to double */
4422             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4423             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4424             write_fp_dreg(s, rd, tcg_rd);
4425             tcg_temp_free_i64(tcg_rd);
4426         } else {
4427             /* Single to half */
4428             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4429             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4430             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4431             write_fp_sreg(s, rd, tcg_rd);
4432             tcg_temp_free_i32(tcg_rd);
4433         }
4434         tcg_temp_free_i32(tcg_rn);
4435         break;
4436     }
4437     case 0x1:
4438     {
4439         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4440         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4441         if (dtype == 0) {
4442             /* Double to single */
4443             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4444         } else {
4445             /* Double to half */
4446             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4447             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4448         }
4449         write_fp_sreg(s, rd, tcg_rd);
4450         tcg_temp_free_i32(tcg_rd);
4451         tcg_temp_free_i64(tcg_rn);
4452         break;
4453     }
4454     case 0x3:
4455     {
4456         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4457         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4458         if (dtype == 0) {
4459             /* Half to single */
4460             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4461             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4462             write_fp_sreg(s, rd, tcg_rd);
4463             tcg_temp_free_i32(tcg_rd);
4464         } else {
4465             /* Half to double */
4466             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4467             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4468             write_fp_dreg(s, rd, tcg_rd);
4469             tcg_temp_free_i64(tcg_rd);
4470         }
4471         tcg_temp_free_i32(tcg_rn);
4472         break;
4473     }
4474     default:
4475         abort();
4476     }
4477 }
4478
4479 /* C3.6.25 Floating point data-processing (1 source)
4480  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4481  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4482  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4483  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4484  */
4485 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4486 {
4487     int type = extract32(insn, 22, 2);
4488     int opcode = extract32(insn, 15, 6);
4489     int rn = extract32(insn, 5, 5);
4490     int rd = extract32(insn, 0, 5);
4491
4492     switch (opcode) {
4493     case 0x4: case 0x5: case 0x7:
4494     {
4495         /* FCVT between half, single and double precision */
4496         int dtype = extract32(opcode, 0, 2);
4497         if (type == 2 || dtype == type) {
4498             unallocated_encoding(s);
4499             return;
4500         }
4501         if (!fp_access_check(s)) {
4502             return;
4503         }
4504
4505         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4506         break;
4507     }
4508     case 0x0 ... 0x3:
4509     case 0x8 ... 0xc:
4510     case 0xe ... 0xf:
4511         /* 32-to-32 and 64-to-64 ops */
4512         switch (type) {
4513         case 0:
4514             if (!fp_access_check(s)) {
4515                 return;
4516             }
4517
4518             handle_fp_1src_single(s, opcode, rd, rn);
4519             break;
4520         case 1:
4521             if (!fp_access_check(s)) {
4522                 return;
4523             }
4524
4525             handle_fp_1src_double(s, opcode, rd, rn);
4526             break;
4527         default:
4528             unallocated_encoding(s);
4529         }
4530         break;
4531     default:
4532         unallocated_encoding(s);
4533         break;
4534     }
4535 }
4536
4537 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4538 static void handle_fp_2src_single(DisasContext *s, int opcode,
4539                                   int rd, int rn, int rm)
4540 {
4541     TCGv_i32 tcg_op1;
4542     TCGv_i32 tcg_op2;
4543     TCGv_i32 tcg_res;
4544     TCGv_ptr fpst;
4545
4546     tcg_res = tcg_temp_new_i32();
4547     fpst = get_fpstatus_ptr();
4548     tcg_op1 = read_fp_sreg(s, rn);
4549     tcg_op2 = read_fp_sreg(s, rm);
4550
4551     switch (opcode) {
4552     case 0x0: /* FMUL */
4553         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4554         break;
4555     case 0x1: /* FDIV */
4556         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4557         break;
4558     case 0x2: /* FADD */
4559         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4560         break;
4561     case 0x3: /* FSUB */
4562         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4563         break;
4564     case 0x4: /* FMAX */
4565         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4566         break;
4567     case 0x5: /* FMIN */
4568         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4569         break;
4570     case 0x6: /* FMAXNM */
4571         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4572         break;
4573     case 0x7: /* FMINNM */
4574         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4575         break;
4576     case 0x8: /* FNMUL */
4577         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4578         gen_helper_vfp_negs(tcg_res, tcg_res);
4579         break;
4580     }
4581
4582     write_fp_sreg(s, rd, tcg_res);
4583
4584     tcg_temp_free_ptr(fpst);
4585     tcg_temp_free_i32(tcg_op1);
4586     tcg_temp_free_i32(tcg_op2);
4587     tcg_temp_free_i32(tcg_res);
4588 }
4589
4590 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4591 static void handle_fp_2src_double(DisasContext *s, int opcode,
4592                                   int rd, int rn, int rm)
4593 {
4594     TCGv_i64 tcg_op1;
4595     TCGv_i64 tcg_op2;
4596     TCGv_i64 tcg_res;
4597     TCGv_ptr fpst;
4598
4599     tcg_res = tcg_temp_new_i64();
4600     fpst = get_fpstatus_ptr();
4601     tcg_op1 = read_fp_dreg(s, rn);
4602     tcg_op2 = read_fp_dreg(s, rm);
4603
4604     switch (opcode) {
4605     case 0x0: /* FMUL */
4606         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4607         break;
4608     case 0x1: /* FDIV */
4609         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4610         break;
4611     case 0x2: /* FADD */
4612         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4613         break;
4614     case 0x3: /* FSUB */
4615         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4616         break;
4617     case 0x4: /* FMAX */
4618         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4619         break;
4620     case 0x5: /* FMIN */
4621         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4622         break;
4623     case 0x6: /* FMAXNM */
4624         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4625         break;
4626     case 0x7: /* FMINNM */
4627         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4628         break;
4629     case 0x8: /* FNMUL */
4630         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4631         gen_helper_vfp_negd(tcg_res, tcg_res);
4632         break;
4633     }
4634
4635     write_fp_dreg(s, rd, tcg_res);
4636
4637     tcg_temp_free_ptr(fpst);
4638     tcg_temp_free_i64(tcg_op1);
4639     tcg_temp_free_i64(tcg_op2);
4640     tcg_temp_free_i64(tcg_res);
4641 }
4642
4643 /* C3.6.26 Floating point data-processing (2 source)
4644  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4645  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4646  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4647  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4648  */
4649 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4650 {
4651     int type = extract32(insn, 22, 2);
4652     int rd = extract32(insn, 0, 5);
4653     int rn = extract32(insn, 5, 5);
4654     int rm = extract32(insn, 16, 5);
4655     int opcode = extract32(insn, 12, 4);
4656
4657     if (opcode > 8) {
4658         unallocated_encoding(s);
4659         return;
4660     }
4661
4662     switch (type) {
4663     case 0:
4664         if (!fp_access_check(s)) {
4665             return;
4666         }
4667         handle_fp_2src_single(s, opcode, rd, rn, rm);
4668         break;
4669     case 1:
4670         if (!fp_access_check(s)) {
4671             return;
4672         }
4673         handle_fp_2src_double(s, opcode, rd, rn, rm);
4674         break;
4675     default:
4676         unallocated_encoding(s);
4677     }
4678 }
4679
4680 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4681 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4682                                   int rd, int rn, int rm, int ra)
4683 {
4684     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4685     TCGv_i32 tcg_res = tcg_temp_new_i32();
4686     TCGv_ptr fpst = get_fpstatus_ptr();
4687
4688     tcg_op1 = read_fp_sreg(s, rn);
4689     tcg_op2 = read_fp_sreg(s, rm);
4690     tcg_op3 = read_fp_sreg(s, ra);
4691
4692     /* These are fused multiply-add, and must be done as one
4693      * floating point operation with no rounding between the
4694      * multiplication and addition steps.
4695      * NB that doing the negations here as separate steps is
4696      * correct : an input NaN should come out with its sign bit
4697      * flipped if it is a negated-input.
4698      */
4699     if (o1 == true) {
4700         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4701     }
4702
4703     if (o0 != o1) {
4704         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4705     }
4706
4707     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4708
4709     write_fp_sreg(s, rd, tcg_res);
4710
4711     tcg_temp_free_ptr(fpst);
4712     tcg_temp_free_i32(tcg_op1);
4713     tcg_temp_free_i32(tcg_op2);
4714     tcg_temp_free_i32(tcg_op3);
4715     tcg_temp_free_i32(tcg_res);
4716 }
4717
4718 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4719 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4720                                   int rd, int rn, int rm, int ra)
4721 {
4722     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4723     TCGv_i64 tcg_res = tcg_temp_new_i64();
4724     TCGv_ptr fpst = get_fpstatus_ptr();
4725
4726     tcg_op1 = read_fp_dreg(s, rn);
4727     tcg_op2 = read_fp_dreg(s, rm);
4728     tcg_op3 = read_fp_dreg(s, ra);
4729
4730     /* These are fused multiply-add, and must be done as one
4731      * floating point operation with no rounding between the
4732      * multiplication and addition steps.
4733      * NB that doing the negations here as separate steps is
4734      * correct : an input NaN should come out with its sign bit
4735      * flipped if it is a negated-input.
4736      */
4737     if (o1 == true) {
4738         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4739     }
4740
4741     if (o0 != o1) {
4742         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4743     }
4744
4745     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4746
4747     write_fp_dreg(s, rd, tcg_res);
4748
4749     tcg_temp_free_ptr(fpst);
4750     tcg_temp_free_i64(tcg_op1);
4751     tcg_temp_free_i64(tcg_op2);
4752     tcg_temp_free_i64(tcg_op3);
4753     tcg_temp_free_i64(tcg_res);
4754 }
4755
4756 /* C3.6.27 Floating point data-processing (3 source)
4757  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4758  * +---+---+---+-----------+------+----+------+----+------+------+------+
4759  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4760  * +---+---+---+-----------+------+----+------+----+------+------+------+
4761  */
4762 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4763 {
4764     int type = extract32(insn, 22, 2);
4765     int rd = extract32(insn, 0, 5);
4766     int rn = extract32(insn, 5, 5);
4767     int ra = extract32(insn, 10, 5);
4768     int rm = extract32(insn, 16, 5);
4769     bool o0 = extract32(insn, 15, 1);
4770     bool o1 = extract32(insn, 21, 1);
4771
4772     switch (type) {
4773     case 0:
4774         if (!fp_access_check(s)) {
4775             return;
4776         }
4777         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4778         break;
4779     case 1:
4780         if (!fp_access_check(s)) {
4781             return;
4782         }
4783         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4784         break;
4785     default:
4786         unallocated_encoding(s);
4787     }
4788 }
4789
4790 /* C3.6.28 Floating point immediate
4791  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4792  * +---+---+---+-----------+------+---+------------+-------+------+------+
4793  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4794  * +---+---+---+-----------+------+---+------------+-------+------+------+
4795  */
4796 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4797 {
4798     int rd = extract32(insn, 0, 5);
4799     int imm8 = extract32(insn, 13, 8);
4800     int is_double = extract32(insn, 22, 2);
4801     uint64_t imm;
4802     TCGv_i64 tcg_res;
4803
4804     if (is_double > 1) {
4805         unallocated_encoding(s);
4806         return;
4807     }
4808
4809     if (!fp_access_check(s)) {
4810         return;
4811     }
4812
4813     /* The imm8 encodes the sign bit, enough bits to represent
4814      * an exponent in the range 01....1xx to 10....0xx,
4815      * and the most significant 4 bits of the mantissa; see
4816      * VFPExpandImm() in the v8 ARM ARM.
4817      */
4818     if (is_double) {
4819         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4820             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4821             extract32(imm8, 0, 6);
4822         imm <<= 48;
4823     } else {
4824         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4825             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4826             (extract32(imm8, 0, 6) << 3);
4827         imm <<= 16;
4828     }
4829
4830     tcg_res = tcg_const_i64(imm);
4831     write_fp_dreg(s, rd, tcg_res);
4832     tcg_temp_free_i64(tcg_res);
4833 }
4834
4835 /* Handle floating point <=> fixed point conversions. Note that we can
4836  * also deal with fp <=> integer conversions as a special case (scale == 64)
4837  * OPTME: consider handling that special case specially or at least skipping
4838  * the call to scalbn in the helpers for zero shifts.
4839  */
4840 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4841                            bool itof, int rmode, int scale, int sf, int type)
4842 {
4843     bool is_signed = !(opcode & 1);
4844     bool is_double = type;
4845     TCGv_ptr tcg_fpstatus;
4846     TCGv_i32 tcg_shift;
4847
4848     tcg_fpstatus = get_fpstatus_ptr();
4849
4850     tcg_shift = tcg_const_i32(64 - scale);
4851
4852     if (itof) {
4853         TCGv_i64 tcg_int = cpu_reg(s, rn);
4854         if (!sf) {
4855             TCGv_i64 tcg_extend = new_tmp_a64(s);
4856
4857             if (is_signed) {
4858                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4859             } else {
4860                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4861             }
4862
4863             tcg_int = tcg_extend;
4864         }
4865
4866         if (is_double) {
4867             TCGv_i64 tcg_double = tcg_temp_new_i64();
4868             if (is_signed) {
4869                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4870                                      tcg_shift, tcg_fpstatus);
4871             } else {
4872                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4873                                      tcg_shift, tcg_fpstatus);
4874             }
4875             write_fp_dreg(s, rd, tcg_double);
4876             tcg_temp_free_i64(tcg_double);
4877         } else {
4878             TCGv_i32 tcg_single = tcg_temp_new_i32();
4879             if (is_signed) {
4880                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4881                                      tcg_shift, tcg_fpstatus);
4882             } else {
4883                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4884                                      tcg_shift, tcg_fpstatus);
4885             }
4886             write_fp_sreg(s, rd, tcg_single);
4887             tcg_temp_free_i32(tcg_single);
4888         }
4889     } else {
4890         TCGv_i64 tcg_int = cpu_reg(s, rd);
4891         TCGv_i32 tcg_rmode;
4892
4893         if (extract32(opcode, 2, 1)) {
4894             /* There are too many rounding modes to all fit into rmode,
4895              * so FCVTA[US] is a special case.
4896              */
4897             rmode = FPROUNDING_TIEAWAY;
4898         }
4899
4900         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4901
4902         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4903
4904         if (is_double) {
4905             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4906             if (is_signed) {
4907                 if (!sf) {
4908                     gen_helper_vfp_tosld(tcg_int, tcg_double,
4909                                          tcg_shift, tcg_fpstatus);
4910                 } else {
4911                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
4912                                          tcg_shift, tcg_fpstatus);
4913                 }
4914             } else {
4915                 if (!sf) {
4916                     gen_helper_vfp_tould(tcg_int, tcg_double,
4917                                          tcg_shift, tcg_fpstatus);
4918                 } else {
4919                     gen_helper_vfp_touqd(tcg_int, tcg_double,
4920                                          tcg_shift, tcg_fpstatus);
4921                 }
4922             }
4923             tcg_temp_free_i64(tcg_double);
4924         } else {
4925             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4926             if (sf) {
4927                 if (is_signed) {
4928                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
4929                                          tcg_shift, tcg_fpstatus);
4930                 } else {
4931                     gen_helper_vfp_touqs(tcg_int, tcg_single,
4932                                          tcg_shift, tcg_fpstatus);
4933                 }
4934             } else {
4935                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4936                 if (is_signed) {
4937                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
4938                                          tcg_shift, tcg_fpstatus);
4939                 } else {
4940                     gen_helper_vfp_touls(tcg_dest, tcg_single,
4941                                          tcg_shift, tcg_fpstatus);
4942                 }
4943                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4944                 tcg_temp_free_i32(tcg_dest);
4945             }
4946             tcg_temp_free_i32(tcg_single);
4947         }
4948
4949         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4950         tcg_temp_free_i32(tcg_rmode);
4951
4952         if (!sf) {
4953             tcg_gen_ext32u_i64(tcg_int, tcg_int);
4954         }
4955     }
4956
4957     tcg_temp_free_ptr(tcg_fpstatus);
4958     tcg_temp_free_i32(tcg_shift);
4959 }
4960
4961 /* C3.6.29 Floating point <-> fixed point conversions
4962  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4963  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4964  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4965  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4966  */
4967 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4968 {
4969     int rd = extract32(insn, 0, 5);
4970     int rn = extract32(insn, 5, 5);
4971     int scale = extract32(insn, 10, 6);
4972     int opcode = extract32(insn, 16, 3);
4973     int rmode = extract32(insn, 19, 2);
4974     int type = extract32(insn, 22, 2);
4975     bool sbit = extract32(insn, 29, 1);
4976     bool sf = extract32(insn, 31, 1);
4977     bool itof;
4978
4979     if (sbit || (type > 1)
4980         || (!sf && scale < 32)) {
4981         unallocated_encoding(s);
4982         return;
4983     }
4984
4985     switch ((rmode << 3) | opcode) {
4986     case 0x2: /* SCVTF */
4987     case 0x3: /* UCVTF */
4988         itof = true;
4989         break;
4990     case 0x18: /* FCVTZS */
4991     case 0x19: /* FCVTZU */
4992         itof = false;
4993         break;
4994     default:
4995         unallocated_encoding(s);
4996         return;
4997     }
4998
4999     if (!fp_access_check(s)) {
5000         return;
5001     }
5002
5003     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5004 }
5005
5006 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5007 {
5008     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5009      * without conversion.
5010      */
5011
5012     if (itof) {
5013         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5014
5015         switch (type) {
5016         case 0:
5017         {
5018             /* 32 bit */
5019             TCGv_i64 tmp = tcg_temp_new_i64();
5020             tcg_gen_ext32u_i64(tmp, tcg_rn);
5021             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5022             tcg_gen_movi_i64(tmp, 0);
5023             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5024             tcg_temp_free_i64(tmp);
5025             break;
5026         }
5027         case 1:
5028         {
5029             /* 64 bit */
5030             TCGv_i64 tmp = tcg_const_i64(0);
5031             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5032             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5033             tcg_temp_free_i64(tmp);
5034             break;
5035         }
5036         case 2:
5037             /* 64 bit to top half. */
5038             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5039             break;
5040         }
5041     } else {
5042         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5043
5044         switch (type) {
5045         case 0:
5046             /* 32 bit */
5047             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5048             break;
5049         case 1:
5050             /* 64 bit */
5051             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5052             break;
5053         case 2:
5054             /* 64 bits from top half */
5055             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5056             break;
5057         }
5058     }
5059 }
5060
5061 /* C3.6.30 Floating point <-> integer conversions
5062  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5063  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5064  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5065  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5066  */
5067 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5068 {
5069     int rd = extract32(insn, 0, 5);
5070     int rn = extract32(insn, 5, 5);
5071     int opcode = extract32(insn, 16, 3);
5072     int rmode = extract32(insn, 19, 2);
5073     int type = extract32(insn, 22, 2);
5074     bool sbit = extract32(insn, 29, 1);
5075     bool sf = extract32(insn, 31, 1);
5076
5077     if (sbit) {
5078         unallocated_encoding(s);
5079         return;
5080     }
5081
5082     if (opcode > 5) {
5083         /* FMOV */
5084         bool itof = opcode & 1;
5085
5086         if (rmode >= 2) {
5087             unallocated_encoding(s);
5088             return;
5089         }
5090
5091         switch (sf << 3 | type << 1 | rmode) {
5092         case 0x0: /* 32 bit */
5093         case 0xa: /* 64 bit */
5094         case 0xd: /* 64 bit to top half of quad */
5095             break;
5096         default:
5097             /* all other sf/type/rmode combinations are invalid */
5098             unallocated_encoding(s);
5099             break;
5100         }
5101
5102         if (!fp_access_check(s)) {
5103             return;
5104         }
5105         handle_fmov(s, rd, rn, type, itof);
5106     } else {
5107         /* actual FP conversions */
5108         bool itof = extract32(opcode, 1, 1);
5109
5110         if (type > 1 || (rmode != 0 && opcode > 1)) {
5111             unallocated_encoding(s);
5112             return;
5113         }
5114
5115         if (!fp_access_check(s)) {
5116             return;
5117         }
5118         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5119     }
5120 }
5121
5122 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5123  *   31  30  29 28     25 24                          0
5124  * +---+---+---+---------+-----------------------------+
5125  * |   | 0 |   | 1 1 1 1 |                             |
5126  * +---+---+---+---------+-----------------------------+
5127  */
5128 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5129 {
5130     if (extract32(insn, 24, 1)) {
5131         /* Floating point data-processing (3 source) */
5132         disas_fp_3src(s, insn);
5133     } else if (extract32(insn, 21, 1) == 0) {
5134         /* Floating point to fixed point conversions */
5135         disas_fp_fixed_conv(s, insn);
5136     } else {
5137         switch (extract32(insn, 10, 2)) {
5138         case 1:
5139             /* Floating point conditional compare */
5140             disas_fp_ccomp(s, insn);
5141             break;
5142         case 2:
5143             /* Floating point data-processing (2 source) */
5144             disas_fp_2src(s, insn);
5145             break;
5146         case 3:
5147             /* Floating point conditional select */
5148             disas_fp_csel(s, insn);
5149             break;
5150         case 0:
5151             switch (ctz32(extract32(insn, 12, 4))) {
5152             case 0: /* [15:12] == xxx1 */
5153                 /* Floating point immediate */
5154                 disas_fp_imm(s, insn);
5155                 break;
5156             case 1: /* [15:12] == xx10 */
5157                 /* Floating point compare */
5158                 disas_fp_compare(s, insn);
5159                 break;
5160             case 2: /* [15:12] == x100 */
5161                 /* Floating point data-processing (1 source) */
5162                 disas_fp_1src(s, insn);
5163                 break;
5164             case 3: /* [15:12] == 1000 */
5165                 unallocated_encoding(s);
5166                 break;
5167             default: /* [15:12] == 0000 */
5168                 /* Floating point <-> integer conversions */
5169                 disas_fp_int_conv(s, insn);
5170                 break;
5171             }
5172             break;
5173         }
5174     }
5175 }
5176
5177 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5178                      int pos)
5179 {
5180     /* Extract 64 bits from the middle of two concatenated 64 bit
5181      * vector register slices left:right. The extracted bits start
5182      * at 'pos' bits into the right (least significant) side.
5183      * We return the result in tcg_right, and guarantee not to
5184      * trash tcg_left.
5185      */
5186     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5187     assert(pos > 0 && pos < 64);
5188
5189     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5190     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5191     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5192
5193     tcg_temp_free_i64(tcg_tmp);
5194 }
5195
5196 /* C3.6.1 EXT
5197  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5198  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5199  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5200  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5201  */
5202 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5203 {
5204     int is_q = extract32(insn, 30, 1);
5205     int op2 = extract32(insn, 22, 2);
5206     int imm4 = extract32(insn, 11, 4);
5207     int rm = extract32(insn, 16, 5);
5208     int rn = extract32(insn, 5, 5);
5209     int rd = extract32(insn, 0, 5);
5210     int pos = imm4 << 3;
5211     TCGv_i64 tcg_resl, tcg_resh;
5212
5213     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5214         unallocated_encoding(s);
5215         return;
5216     }
5217
5218     if (!fp_access_check(s)) {
5219         return;
5220     }
5221
5222     tcg_resh = tcg_temp_new_i64();
5223     tcg_resl = tcg_temp_new_i64();
5224
5225     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5226      * either extracting 128 bits from a 128:128 concatenation, or
5227      * extracting 64 bits from a 64:64 concatenation.
5228      */
5229     if (!is_q) {
5230         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5231         if (pos != 0) {
5232             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5233             do_ext64(s, tcg_resh, tcg_resl, pos);
5234         }
5235         tcg_gen_movi_i64(tcg_resh, 0);
5236     } else {
5237         TCGv_i64 tcg_hh;
5238         typedef struct {
5239             int reg;
5240             int elt;
5241         } EltPosns;
5242         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5243         EltPosns *elt = eltposns;
5244
5245         if (pos >= 64) {
5246             elt++;
5247             pos -= 64;
5248         }
5249
5250         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5251         elt++;
5252         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5253         elt++;
5254         if (pos != 0) {
5255             do_ext64(s, tcg_resh, tcg_resl, pos);
5256             tcg_hh = tcg_temp_new_i64();
5257             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5258             do_ext64(s, tcg_hh, tcg_resh, pos);
5259             tcg_temp_free_i64(tcg_hh);
5260         }
5261     }
5262
5263     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5264     tcg_temp_free_i64(tcg_resl);
5265     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5266     tcg_temp_free_i64(tcg_resh);
5267 }
5268
5269 /* C3.6.2 TBL/TBX
5270  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5271  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5272  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5273  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5274  */
5275 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5276 {
5277     int op2 = extract32(insn, 22, 2);
5278     int is_q = extract32(insn, 30, 1);
5279     int rm = extract32(insn, 16, 5);
5280     int rn = extract32(insn, 5, 5);
5281     int rd = extract32(insn, 0, 5);
5282     int is_tblx = extract32(insn, 12, 1);
5283     int len = extract32(insn, 13, 2);
5284     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5285     TCGv_i32 tcg_regno, tcg_numregs;
5286
5287     if (op2 != 0) {
5288         unallocated_encoding(s);
5289         return;
5290     }
5291
5292     if (!fp_access_check(s)) {
5293         return;
5294     }
5295
5296     /* This does a table lookup: for every byte element in the input
5297      * we index into a table formed from up to four vector registers,
5298      * and then the output is the result of the lookups. Our helper
5299      * function does the lookup operation for a single 64 bit part of
5300      * the input.
5301      */
5302     tcg_resl = tcg_temp_new_i64();
5303     tcg_resh = tcg_temp_new_i64();
5304
5305     if (is_tblx) {
5306         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5307     } else {
5308         tcg_gen_movi_i64(tcg_resl, 0);
5309     }
5310     if (is_tblx && is_q) {
5311         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5312     } else {
5313         tcg_gen_movi_i64(tcg_resh, 0);
5314     }
5315
5316     tcg_idx = tcg_temp_new_i64();
5317     tcg_regno = tcg_const_i32(rn);
5318     tcg_numregs = tcg_const_i32(len + 1);
5319     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5320     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5321                         tcg_regno, tcg_numregs);
5322     if (is_q) {
5323         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5324         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5325                             tcg_regno, tcg_numregs);
5326     }
5327     tcg_temp_free_i64(tcg_idx);
5328     tcg_temp_free_i32(tcg_regno);
5329     tcg_temp_free_i32(tcg_numregs);
5330
5331     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5332     tcg_temp_free_i64(tcg_resl);
5333     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5334     tcg_temp_free_i64(tcg_resh);
5335 }
5336
5337 /* C3.6.3 ZIP/UZP/TRN
5338  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5339  * +---+---+-------------+------+---+------+---+------------------+------+
5340  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5341  * +---+---+-------------+------+---+------+---+------------------+------+
5342  */
5343 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5344 {
5345     int rd = extract32(insn, 0, 5);
5346     int rn = extract32(insn, 5, 5);
5347     int rm = extract32(insn, 16, 5);
5348     int size = extract32(insn, 22, 2);
5349     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5350      * bit 2 indicates 1 vs 2 variant of the insn.
5351      */
5352     int opcode = extract32(insn, 12, 2);
5353     bool part = extract32(insn, 14, 1);
5354     bool is_q = extract32(insn, 30, 1);
5355     int esize = 8 << size;
5356     int i, ofs;
5357     int datasize = is_q ? 128 : 64;
5358     int elements = datasize / esize;
5359     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5360
5361     if (opcode == 0 || (size == 3 && !is_q)) {
5362         unallocated_encoding(s);
5363         return;
5364     }
5365
5366     if (!fp_access_check(s)) {
5367         return;
5368     }
5369
5370     tcg_resl = tcg_const_i64(0);
5371     tcg_resh = tcg_const_i64(0);
5372     tcg_res = tcg_temp_new_i64();
5373
5374     for (i = 0; i < elements; i++) {
5375         switch (opcode) {
5376         case 1: /* UZP1/2 */
5377         {
5378             int midpoint = elements / 2;
5379             if (i < midpoint) {
5380                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5381             } else {
5382                 read_vec_element(s, tcg_res, rm,
5383                                  2 * (i - midpoint) + part, size);
5384             }
5385             break;
5386         }
5387         case 2: /* TRN1/2 */
5388             if (i & 1) {
5389                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5390             } else {
5391                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5392             }
5393             break;
5394         case 3: /* ZIP1/2 */
5395         {
5396             int base = part * elements / 2;
5397             if (i & 1) {
5398                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5399             } else {
5400                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5401             }
5402             break;
5403         }
5404         default:
5405             g_assert_not_reached();
5406         }
5407
5408         ofs = i * esize;
5409         if (ofs < 64) {
5410             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5411             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5412         } else {
5413             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5414             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5415         }
5416     }
5417
5418     tcg_temp_free_i64(tcg_res);
5419
5420     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5421     tcg_temp_free_i64(tcg_resl);
5422     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5423     tcg_temp_free_i64(tcg_resh);
5424 }
5425
5426 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5427                         int opc, bool is_min, TCGv_ptr fpst)
5428 {
5429     /* Helper function for disas_simd_across_lanes: do a single precision
5430      * min/max operation on the specified two inputs,
5431      * and return the result in tcg_elt1.
5432      */
5433     if (opc == 0xc) {
5434         if (is_min) {
5435             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5436         } else {
5437             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5438         }
5439     } else {
5440         assert(opc == 0xf);
5441         if (is_min) {
5442             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5443         } else {
5444             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5445         }
5446     }
5447 }
5448
5449 /* C3.6.4 AdvSIMD across lanes
5450  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5451  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5452  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5453  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5454  */
5455 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5456 {
5457     int rd = extract32(insn, 0, 5);
5458     int rn = extract32(insn, 5, 5);
5459     int size = extract32(insn, 22, 2);
5460     int opcode = extract32(insn, 12, 5);
5461     bool is_q = extract32(insn, 30, 1);
5462     bool is_u = extract32(insn, 29, 1);
5463     bool is_fp = false;
5464     bool is_min = false;
5465     int esize;
5466     int elements;
5467     int i;
5468     TCGv_i64 tcg_res, tcg_elt;
5469
5470     switch (opcode) {
5471     case 0x1b: /* ADDV */
5472         if (is_u) {
5473             unallocated_encoding(s);
5474             return;
5475         }
5476         /* fall through */
5477     case 0x3: /* SADDLV, UADDLV */
5478     case 0xa: /* SMAXV, UMAXV */
5479     case 0x1a: /* SMINV, UMINV */
5480         if (size == 3 || (size == 2 && !is_q)) {
5481             unallocated_encoding(s);
5482             return;
5483         }
5484         break;
5485     case 0xc: /* FMAXNMV, FMINNMV */
5486     case 0xf: /* FMAXV, FMINV */
5487         if (!is_u || !is_q || extract32(size, 0, 1)) {
5488             unallocated_encoding(s);
5489             return;
5490         }
5491         /* Bit 1 of size field encodes min vs max, and actual size is always
5492          * 32 bits: adjust the size variable so following code can rely on it
5493          */
5494         is_min = extract32(size, 1, 1);
5495         is_fp = true;
5496         size = 2;
5497         break;
5498     default:
5499         unallocated_encoding(s);
5500         return;
5501     }
5502
5503     if (!fp_access_check(s)) {
5504         return;
5505     }
5506
5507     esize = 8 << size;
5508     elements = (is_q ? 128 : 64) / esize;
5509
5510     tcg_res = tcg_temp_new_i64();
5511     tcg_elt = tcg_temp_new_i64();
5512
5513     /* These instructions operate across all lanes of a vector
5514      * to produce a single result. We can guarantee that a 64
5515      * bit intermediate is sufficient:
5516      *  + for [US]ADDLV the maximum element size is 32 bits, and
5517      *    the result type is 64 bits
5518      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5519      *    same as the element size, which is 32 bits at most
5520      * For the integer operations we can choose to work at 64
5521      * or 32 bits and truncate at the end; for simplicity
5522      * we use 64 bits always. The floating point
5523      * ops do require 32 bit intermediates, though.
5524      */
5525     if (!is_fp) {
5526         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5527
5528         for (i = 1; i < elements; i++) {
5529             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5530
5531             switch (opcode) {
5532             case 0x03: /* SADDLV / UADDLV */
5533             case 0x1b: /* ADDV */
5534                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5535                 break;
5536             case 0x0a: /* SMAXV / UMAXV */
5537                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5538                                     tcg_res,
5539                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5540                 break;
5541             case 0x1a: /* SMINV / UMINV */
5542                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5543                                     tcg_res,
5544                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5545                 break;
5546                 break;
5547             default:
5548                 g_assert_not_reached();
5549             }
5550
5551         }
5552     } else {
5553         /* Floating point ops which work on 32 bit (single) intermediates.
5554          * Note that correct NaN propagation requires that we do these
5555          * operations in exactly the order specified by the pseudocode.
5556          */
5557         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5558         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5559         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5560         TCGv_ptr fpst = get_fpstatus_ptr();
5561
5562         assert(esize == 32);
5563         assert(elements == 4);
5564
5565         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5566         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5567         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5568         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5569
5570         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5571
5572         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5573         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5574         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5575         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5576
5577         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5578
5579         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5580
5581         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5582         tcg_temp_free_i32(tcg_elt1);
5583         tcg_temp_free_i32(tcg_elt2);
5584         tcg_temp_free_i32(tcg_elt3);
5585         tcg_temp_free_ptr(fpst);
5586     }
5587
5588     tcg_temp_free_i64(tcg_elt);
5589
5590     /* Now truncate the result to the width required for the final output */
5591     if (opcode == 0x03) {
5592         /* SADDLV, UADDLV: result is 2*esize */
5593         size++;
5594     }
5595
5596     switch (size) {
5597     case 0:
5598         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5599         break;
5600     case 1:
5601         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5602         break;
5603     case 2:
5604         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5605         break;
5606     case 3:
5607         break;
5608     default:
5609         g_assert_not_reached();
5610     }
5611
5612     write_fp_dreg(s, rd, tcg_res);
5613     tcg_temp_free_i64(tcg_res);
5614 }
5615
5616 /* C6.3.31 DUP (Element, Vector)
5617  *
5618  *  31  30   29              21 20    16 15        10  9    5 4    0
5619  * +---+---+-------------------+--------+-------------+------+------+
5620  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5621  * +---+---+-------------------+--------+-------------+------+------+
5622  *
5623  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5624  */
5625 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5626                              int imm5)
5627 {
5628     int size = ctz32(imm5);
5629     int esize = 8 << size;
5630     int elements = (is_q ? 128 : 64) / esize;
5631     int index, i;
5632     TCGv_i64 tmp;
5633
5634     if (size > 3 || (size == 3 && !is_q)) {
5635         unallocated_encoding(s);
5636         return;
5637     }
5638
5639     if (!fp_access_check(s)) {
5640         return;
5641     }
5642
5643     index = imm5 >> (size + 1);
5644
5645     tmp = tcg_temp_new_i64();
5646     read_vec_element(s, tmp, rn, index, size);
5647
5648     for (i = 0; i < elements; i++) {
5649         write_vec_element(s, tmp, rd, i, size);
5650     }
5651
5652     if (!is_q) {
5653         clear_vec_high(s, rd);
5654     }
5655
5656     tcg_temp_free_i64(tmp);
5657 }
5658
5659 /* C6.3.31 DUP (element, scalar)
5660  *  31                   21 20    16 15        10  9    5 4    0
5661  * +-----------------------+--------+-------------+------+------+
5662  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5663  * +-----------------------+--------+-------------+------+------+
5664  */
5665 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5666                               int imm5)
5667 {
5668     int size = ctz32(imm5);
5669     int index;
5670     TCGv_i64 tmp;
5671
5672     if (size > 3) {
5673         unallocated_encoding(s);
5674         return;
5675     }
5676
5677     if (!fp_access_check(s)) {
5678         return;
5679     }
5680
5681     index = imm5 >> (size + 1);
5682
5683     /* This instruction just extracts the specified element and
5684      * zero-extends it into the bottom of the destination register.
5685      */
5686     tmp = tcg_temp_new_i64();
5687     read_vec_element(s, tmp, rn, index, size);
5688     write_fp_dreg(s, rd, tmp);
5689     tcg_temp_free_i64(tmp);
5690 }
5691
5692 /* C6.3.32 DUP (General)
5693  *
5694  *  31  30   29              21 20    16 15        10  9    5 4    0
5695  * +---+---+-------------------+--------+-------------+------+------+
5696  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5697  * +---+---+-------------------+--------+-------------+------+------+
5698  *
5699  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5700  */
5701 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5702                              int imm5)
5703 {
5704     int size = ctz32(imm5);
5705     int esize = 8 << size;
5706     int elements = (is_q ? 128 : 64)/esize;
5707     int i = 0;
5708
5709     if (size > 3 || ((size == 3) && !is_q)) {
5710         unallocated_encoding(s);
5711         return;
5712     }
5713
5714     if (!fp_access_check(s)) {
5715         return;
5716     }
5717
5718     for (i = 0; i < elements; i++) {
5719         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5720     }
5721     if (!is_q) {
5722         clear_vec_high(s, rd);
5723     }
5724 }
5725
5726 /* C6.3.150 INS (Element)
5727  *
5728  *  31                   21 20    16 15  14    11  10 9    5 4    0
5729  * +-----------------------+--------+------------+---+------+------+
5730  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5731  * +-----------------------+--------+------------+---+------+------+
5732  *
5733  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5734  * index: encoded in imm5<4:size+1>
5735  */
5736 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5737                              int imm4, int imm5)
5738 {
5739     int size = ctz32(imm5);
5740     int src_index, dst_index;
5741     TCGv_i64 tmp;
5742
5743     if (size > 3) {
5744         unallocated_encoding(s);
5745         return;
5746     }
5747
5748     if (!fp_access_check(s)) {
5749         return;
5750     }
5751
5752     dst_index = extract32(imm5, 1+size, 5);
5753     src_index = extract32(imm4, size, 4);
5754
5755     tmp = tcg_temp_new_i64();
5756
5757     read_vec_element(s, tmp, rn, src_index, size);
5758     write_vec_element(s, tmp, rd, dst_index, size);
5759
5760     tcg_temp_free_i64(tmp);
5761 }
5762
5763
5764 /* C6.3.151 INS (General)
5765  *
5766  *  31                   21 20    16 15        10  9    5 4    0
5767  * +-----------------------+--------+-------------+------+------+
5768  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5769  * +-----------------------+--------+-------------+------+------+
5770  *
5771  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5772  * index: encoded in imm5<4:size+1>
5773  */
5774 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5775 {
5776     int size = ctz32(imm5);
5777     int idx;
5778
5779     if (size > 3) {
5780         unallocated_encoding(s);
5781         return;
5782     }
5783
5784     if (!fp_access_check(s)) {
5785         return;
5786     }
5787
5788     idx = extract32(imm5, 1 + size, 4 - size);
5789     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5790 }
5791
5792 /*
5793  * C6.3.321 UMOV (General)
5794  * C6.3.237 SMOV (General)
5795  *
5796  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5797  * +---+---+-------------------+--------+-------------+------+------+
5798  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5799  * +---+---+-------------------+--------+-------------+------+------+
5800  *
5801  * U: unsigned when set
5802  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5803  */
5804 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5805                                   int rn, int rd, int imm5)
5806 {
5807     int size = ctz32(imm5);
5808     int element;
5809     TCGv_i64 tcg_rd;
5810
5811     /* Check for UnallocatedEncodings */
5812     if (is_signed) {
5813         if (size > 2 || (size == 2 && !is_q)) {
5814             unallocated_encoding(s);
5815             return;
5816         }
5817     } else {
5818         if (size > 3
5819             || (size < 3 && is_q)
5820             || (size == 3 && !is_q)) {
5821             unallocated_encoding(s);
5822             return;
5823         }
5824     }
5825
5826     if (!fp_access_check(s)) {
5827         return;
5828     }
5829
5830     element = extract32(imm5, 1+size, 4);
5831
5832     tcg_rd = cpu_reg(s, rd);
5833     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5834     if (is_signed && !is_q) {
5835         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5836     }
5837 }
5838
5839 /* C3.6.5 AdvSIMD copy
5840  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5841  * +---+---+----+-----------------+------+---+------+---+------+------+
5842  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5843  * +---+---+----+-----------------+------+---+------+---+------+------+
5844  */
5845 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5846 {
5847     int rd = extract32(insn, 0, 5);
5848     int rn = extract32(insn, 5, 5);
5849     int imm4 = extract32(insn, 11, 4);
5850     int op = extract32(insn, 29, 1);
5851     int is_q = extract32(insn, 30, 1);
5852     int imm5 = extract32(insn, 16, 5);
5853
5854     if (op) {
5855         if (is_q) {
5856             /* INS (element) */
5857             handle_simd_inse(s, rd, rn, imm4, imm5);
5858         } else {
5859             unallocated_encoding(s);
5860         }
5861     } else {
5862         switch (imm4) {
5863         case 0:
5864             /* DUP (element - vector) */
5865             handle_simd_dupe(s, is_q, rd, rn, imm5);
5866             break;
5867         case 1:
5868             /* DUP (general) */
5869             handle_simd_dupg(s, is_q, rd, rn, imm5);
5870             break;
5871         case 3:
5872             if (is_q) {
5873                 /* INS (general) */
5874                 handle_simd_insg(s, rd, rn, imm5);
5875             } else {
5876                 unallocated_encoding(s);
5877             }
5878             break;
5879         case 5:
5880         case 7:
5881             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5882             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5883             break;
5884         default:
5885             unallocated_encoding(s);
5886             break;
5887         }
5888     }
5889 }
5890
5891 /* C3.6.6 AdvSIMD modified immediate
5892  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5893  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5894  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5895  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5896  *
5897  * There are a number of operations that can be carried out here:
5898  *   MOVI - move (shifted) imm into register
5899  *   MVNI - move inverted (shifted) imm into register
5900  *   ORR  - bitwise OR of (shifted) imm with register
5901  *   BIC  - bitwise clear of (shifted) imm with register
5902  */
5903 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5904 {
5905     int rd = extract32(insn, 0, 5);
5906     int cmode = extract32(insn, 12, 4);
5907     int cmode_3_1 = extract32(cmode, 1, 3);
5908     int cmode_0 = extract32(cmode, 0, 1);
5909     int o2 = extract32(insn, 11, 1);
5910     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5911     bool is_neg = extract32(insn, 29, 1);
5912     bool is_q = extract32(insn, 30, 1);
5913     uint64_t imm = 0;
5914     TCGv_i64 tcg_rd, tcg_imm;
5915     int i;
5916
5917     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5918         unallocated_encoding(s);
5919         return;
5920     }
5921
5922     if (!fp_access_check(s)) {
5923         return;
5924     }
5925
5926     /* See AdvSIMDExpandImm() in ARM ARM */
5927     switch (cmode_3_1) {
5928     case 0: /* Replicate(Zeros(24):imm8, 2) */
5929     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5930     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5931     case 3: /* Replicate(imm8:Zeros(24), 2) */
5932     {
5933         int shift = cmode_3_1 * 8;
5934         imm = bitfield_replicate(abcdefgh << shift, 32);
5935         break;
5936     }
5937     case 4: /* Replicate(Zeros(8):imm8, 4) */
5938     case 5: /* Replicate(imm8:Zeros(8), 4) */
5939     {
5940         int shift = (cmode_3_1 & 0x1) * 8;
5941         imm = bitfield_replicate(abcdefgh << shift, 16);
5942         break;
5943     }
5944     case 6:
5945         if (cmode_0) {
5946             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5947             imm = (abcdefgh << 16) | 0xffff;
5948         } else {
5949             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5950             imm = (abcdefgh << 8) | 0xff;
5951         }
5952         imm = bitfield_replicate(imm, 32);
5953         break;
5954     case 7:
5955         if (!cmode_0 && !is_neg) {
5956             imm = bitfield_replicate(abcdefgh, 8);
5957         } else if (!cmode_0 && is_neg) {
5958             int i;
5959             imm = 0;
5960             for (i = 0; i < 8; i++) {
5961                 if ((abcdefgh) & (1 << i)) {
5962                     imm |= 0xffULL << (i * 8);
5963                 }
5964             }
5965         } else if (cmode_0) {
5966             if (is_neg) {
5967                 imm = (abcdefgh & 0x3f) << 48;
5968                 if (abcdefgh & 0x80) {
5969                     imm |= 0x8000000000000000ULL;
5970                 }
5971                 if (abcdefgh & 0x40) {
5972                     imm |= 0x3fc0000000000000ULL;
5973                 } else {
5974                     imm |= 0x4000000000000000ULL;
5975                 }
5976             } else {
5977                 imm = (abcdefgh & 0x3f) << 19;
5978                 if (abcdefgh & 0x80) {
5979                     imm |= 0x80000000;
5980                 }
5981                 if (abcdefgh & 0x40) {
5982                     imm |= 0x3e000000;
5983                 } else {
5984                     imm |= 0x40000000;
5985                 }
5986                 imm |= (imm << 32);
5987             }
5988         }
5989         break;
5990     }
5991
5992     if (cmode_3_1 != 7 && is_neg) {
5993         imm = ~imm;
5994     }
5995
5996     tcg_imm = tcg_const_i64(imm);
5997     tcg_rd = new_tmp_a64(s);
5998
5999     for (i = 0; i < 2; i++) {
6000         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6001
6002         if (i == 1 && !is_q) {
6003             /* non-quad ops clear high half of vector */
6004             tcg_gen_movi_i64(tcg_rd, 0);
6005         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6006             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6007             if (is_neg) {
6008                 /* AND (BIC) */
6009                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6010             } else {
6011                 /* ORR */
6012                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6013             }
6014         } else {
6015             /* MOVI */
6016             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6017         }
6018         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6019     }
6020
6021     tcg_temp_free_i64(tcg_imm);
6022 }
6023
6024 /* C3.6.7 AdvSIMD scalar copy
6025  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6026  * +-----+----+-----------------+------+---+------+---+------+------+
6027  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6028  * +-----+----+-----------------+------+---+------+---+------+------+
6029  */
6030 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6031 {
6032     int rd = extract32(insn, 0, 5);
6033     int rn = extract32(insn, 5, 5);
6034     int imm4 = extract32(insn, 11, 4);
6035     int imm5 = extract32(insn, 16, 5);
6036     int op = extract32(insn, 29, 1);
6037
6038     if (op != 0 || imm4 != 0) {
6039         unallocated_encoding(s);
6040         return;
6041     }
6042
6043     /* DUP (element, scalar) */
6044     handle_simd_dupes(s, rd, rn, imm5);
6045 }
6046
6047 /* C3.6.8 AdvSIMD scalar pairwise
6048  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6049  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6050  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6051  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6052  */
6053 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6054 {
6055     int u = extract32(insn, 29, 1);
6056     int size = extract32(insn, 22, 2);
6057     int opcode = extract32(insn, 12, 5);
6058     int rn = extract32(insn, 5, 5);
6059     int rd = extract32(insn, 0, 5);
6060     TCGv_ptr fpst;
6061
6062     /* For some ops (the FP ones), size[1] is part of the encoding.
6063      * For ADDP strictly it is not but size[1] is always 1 for valid
6064      * encodings.
6065      */
6066     opcode |= (extract32(size, 1, 1) << 5);
6067
6068     switch (opcode) {
6069     case 0x3b: /* ADDP */
6070         if (u || size != 3) {
6071             unallocated_encoding(s);
6072             return;
6073         }
6074         if (!fp_access_check(s)) {
6075             return;
6076         }
6077
6078         TCGV_UNUSED_PTR(fpst);
6079         break;
6080     case 0xc: /* FMAXNMP */
6081     case 0xd: /* FADDP */
6082     case 0xf: /* FMAXP */
6083     case 0x2c: /* FMINNMP */
6084     case 0x2f: /* FMINP */
6085         /* FP op, size[0] is 32 or 64 bit */
6086         if (!u) {
6087             unallocated_encoding(s);
6088             return;
6089         }
6090         if (!fp_access_check(s)) {
6091             return;
6092         }
6093
6094         size = extract32(size, 0, 1) ? 3 : 2;
6095         fpst = get_fpstatus_ptr();
6096         break;
6097     default:
6098         unallocated_encoding(s);
6099         return;
6100     }
6101
6102     if (size == 3) {
6103         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6104         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6105         TCGv_i64 tcg_res = tcg_temp_new_i64();
6106
6107         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6108         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6109
6110         switch (opcode) {
6111         case 0x3b: /* ADDP */
6112             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6113             break;
6114         case 0xc: /* FMAXNMP */
6115             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6116             break;
6117         case 0xd: /* FADDP */
6118             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6119             break;
6120         case 0xf: /* FMAXP */
6121             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6122             break;
6123         case 0x2c: /* FMINNMP */
6124             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6125             break;
6126         case 0x2f: /* FMINP */
6127             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6128             break;
6129         default:
6130             g_assert_not_reached();
6131         }
6132
6133         write_fp_dreg(s, rd, tcg_res);
6134
6135         tcg_temp_free_i64(tcg_op1);
6136         tcg_temp_free_i64(tcg_op2);
6137         tcg_temp_free_i64(tcg_res);
6138     } else {
6139         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6140         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6141         TCGv_i32 tcg_res = tcg_temp_new_i32();
6142
6143         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6144         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6145
6146         switch (opcode) {
6147         case 0xc: /* FMAXNMP */
6148             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6149             break;
6150         case 0xd: /* FADDP */
6151             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6152             break;
6153         case 0xf: /* FMAXP */
6154             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6155             break;
6156         case 0x2c: /* FMINNMP */
6157             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6158             break;
6159         case 0x2f: /* FMINP */
6160             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6161             break;
6162         default:
6163             g_assert_not_reached();
6164         }
6165
6166         write_fp_sreg(s, rd, tcg_res);
6167
6168         tcg_temp_free_i32(tcg_op1);
6169         tcg_temp_free_i32(tcg_op2);
6170         tcg_temp_free_i32(tcg_res);
6171     }
6172
6173     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6174         tcg_temp_free_ptr(fpst);
6175     }
6176 }
6177
6178 /*
6179  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6180  *
6181  * This code is handles the common shifting code and is used by both
6182  * the vector and scalar code.
6183  */
6184 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6185                                     TCGv_i64 tcg_rnd, bool accumulate,
6186                                     bool is_u, int size, int shift)
6187 {
6188     bool extended_result = false;
6189     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6190     int ext_lshift = 0;
6191     TCGv_i64 tcg_src_hi;
6192
6193     if (round && size == 3) {
6194         extended_result = true;
6195         ext_lshift = 64 - shift;
6196         tcg_src_hi = tcg_temp_new_i64();
6197     } else if (shift == 64) {
6198         if (!accumulate && is_u) {
6199             /* result is zero */
6200             tcg_gen_movi_i64(tcg_res, 0);
6201             return;
6202         }
6203     }
6204
6205     /* Deal with the rounding step */
6206     if (round) {
6207         if (extended_result) {
6208             TCGv_i64 tcg_zero = tcg_const_i64(0);
6209             if (!is_u) {
6210                 /* take care of sign extending tcg_res */
6211                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6212                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6213                                  tcg_src, tcg_src_hi,
6214                                  tcg_rnd, tcg_zero);
6215             } else {
6216                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6217                                  tcg_src, tcg_zero,
6218                                  tcg_rnd, tcg_zero);
6219             }
6220             tcg_temp_free_i64(tcg_zero);
6221         } else {
6222             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6223         }
6224     }
6225
6226     /* Now do the shift right */
6227     if (round && extended_result) {
6228         /* extended case, >64 bit precision required */
6229         if (ext_lshift == 0) {
6230             /* special case, only high bits matter */
6231             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6232         } else {
6233             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6234             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6235             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6236         }
6237     } else {
6238         if (is_u) {
6239             if (shift == 64) {
6240                 /* essentially shifting in 64 zeros */
6241                 tcg_gen_movi_i64(tcg_src, 0);
6242             } else {
6243                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6244             }
6245         } else {
6246             if (shift == 64) {
6247                 /* effectively extending the sign-bit */
6248                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6249             } else {
6250                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6251             }
6252         }
6253     }
6254
6255     if (accumulate) {
6256         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6257     } else {
6258         tcg_gen_mov_i64(tcg_res, tcg_src);
6259     }
6260
6261     if (extended_result) {
6262         tcg_temp_free_i64(tcg_src_hi);
6263     }
6264 }
6265
6266 /* Common SHL/SLI - Shift left with an optional insert */
6267 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6268                                  bool insert, int shift)
6269 {
6270     if (insert) { /* SLI */
6271         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6272     } else { /* SHL */
6273         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6274     }
6275 }
6276
6277 /* SRI: shift right with insert */
6278 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6279                                  int size, int shift)
6280 {
6281     int esize = 8 << size;
6282
6283     /* shift count same as element size is valid but does nothing;
6284      * special case to avoid potential shift by 64.
6285      */
6286     if (shift != esize) {
6287         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6288         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6289     }
6290 }
6291
6292 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6293 static void handle_scalar_simd_shri(DisasContext *s,
6294                                     bool is_u, int immh, int immb,
6295                                     int opcode, int rn, int rd)
6296 {
6297     const int size = 3;
6298     int immhb = immh << 3 | immb;
6299     int shift = 2 * (8 << size) - immhb;
6300     bool accumulate = false;
6301     bool round = false;
6302     bool insert = false;
6303     TCGv_i64 tcg_rn;
6304     TCGv_i64 tcg_rd;
6305     TCGv_i64 tcg_round;
6306
6307     if (!extract32(immh, 3, 1)) {
6308         unallocated_encoding(s);
6309         return;
6310     }
6311
6312     if (!fp_access_check(s)) {
6313         return;
6314     }
6315
6316     switch (opcode) {
6317     case 0x02: /* SSRA / USRA (accumulate) */
6318         accumulate = true;
6319         break;
6320     case 0x04: /* SRSHR / URSHR (rounding) */
6321         round = true;
6322         break;
6323     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6324         accumulate = round = true;
6325         break;
6326     case 0x08: /* SRI */
6327         insert = true;
6328         break;
6329     }
6330
6331     if (round) {
6332         uint64_t round_const = 1ULL << (shift - 1);
6333         tcg_round = tcg_const_i64(round_const);
6334     } else {
6335         TCGV_UNUSED_I64(tcg_round);
6336     }
6337
6338     tcg_rn = read_fp_dreg(s, rn);
6339     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6340
6341     if (insert) {
6342         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6343     } else {
6344         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6345                                 accumulate, is_u, size, shift);
6346     }
6347
6348     write_fp_dreg(s, rd, tcg_rd);
6349
6350     tcg_temp_free_i64(tcg_rn);
6351     tcg_temp_free_i64(tcg_rd);
6352     if (round) {
6353         tcg_temp_free_i64(tcg_round);
6354     }
6355 }
6356
6357 /* SHL/SLI - Scalar shift left */
6358 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6359                                     int immh, int immb, int opcode,
6360                                     int rn, int rd)
6361 {
6362     int size = 32 - clz32(immh) - 1;
6363     int immhb = immh << 3 | immb;
6364     int shift = immhb - (8 << size);
6365     TCGv_i64 tcg_rn = new_tmp_a64(s);
6366     TCGv_i64 tcg_rd = new_tmp_a64(s);
6367
6368     if (!extract32(immh, 3, 1)) {
6369         unallocated_encoding(s);
6370         return;
6371     }
6372
6373     if (!fp_access_check(s)) {
6374         return;
6375     }
6376
6377     tcg_rn = read_fp_dreg(s, rn);
6378     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6379
6380     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6381
6382     write_fp_dreg(s, rd, tcg_rd);
6383
6384     tcg_temp_free_i64(tcg_rn);
6385     tcg_temp_free_i64(tcg_rd);
6386 }
6387
6388 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6389  * (signed/unsigned) narrowing */
6390 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6391                                    bool is_u_shift, bool is_u_narrow,
6392                                    int immh, int immb, int opcode,
6393                                    int rn, int rd)
6394 {
6395     int immhb = immh << 3 | immb;
6396     int size = 32 - clz32(immh) - 1;
6397     int esize = 8 << size;
6398     int shift = (2 * esize) - immhb;
6399     int elements = is_scalar ? 1 : (64 / esize);
6400     bool round = extract32(opcode, 0, 1);
6401     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6402     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6403     TCGv_i32 tcg_rd_narrowed;
6404     TCGv_i64 tcg_final;
6405
6406     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6407         { gen_helper_neon_narrow_sat_s8,
6408           gen_helper_neon_unarrow_sat8 },
6409         { gen_helper_neon_narrow_sat_s16,
6410           gen_helper_neon_unarrow_sat16 },
6411         { gen_helper_neon_narrow_sat_s32,
6412           gen_helper_neon_unarrow_sat32 },
6413         { NULL, NULL },
6414     };
6415     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6416         gen_helper_neon_narrow_sat_u8,
6417         gen_helper_neon_narrow_sat_u16,
6418         gen_helper_neon_narrow_sat_u32,
6419         NULL
6420     };
6421     NeonGenNarrowEnvFn *narrowfn;
6422
6423     int i;
6424
6425     assert(size < 4);
6426
6427     if (extract32(immh, 3, 1)) {
6428         unallocated_encoding(s);
6429         return;
6430     }
6431
6432     if (!fp_access_check(s)) {
6433         return;
6434     }
6435
6436     if (is_u_shift) {
6437         narrowfn = unsigned_narrow_fns[size];
6438     } else {
6439         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6440     }
6441
6442     tcg_rn = tcg_temp_new_i64();
6443     tcg_rd = tcg_temp_new_i64();
6444     tcg_rd_narrowed = tcg_temp_new_i32();
6445     tcg_final = tcg_const_i64(0);
6446
6447     if (round) {
6448         uint64_t round_const = 1ULL << (shift - 1);
6449         tcg_round = tcg_const_i64(round_const);
6450     } else {
6451         TCGV_UNUSED_I64(tcg_round);
6452     }
6453
6454     for (i = 0; i < elements; i++) {
6455         read_vec_element(s, tcg_rn, rn, i, ldop);
6456         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6457                                 false, is_u_shift, size+1, shift);
6458         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6459         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6460         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6461     }
6462
6463     if (!is_q) {
6464         clear_vec_high(s, rd);
6465         write_vec_element(s, tcg_final, rd, 0, MO_64);
6466     } else {
6467         write_vec_element(s, tcg_final, rd, 1, MO_64);
6468     }
6469
6470     if (round) {
6471         tcg_temp_free_i64(tcg_round);
6472     }
6473     tcg_temp_free_i64(tcg_rn);
6474     tcg_temp_free_i64(tcg_rd);
6475     tcg_temp_free_i32(tcg_rd_narrowed);
6476     tcg_temp_free_i64(tcg_final);
6477     return;
6478 }
6479
6480 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6481 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6482                              bool src_unsigned, bool dst_unsigned,
6483                              int immh, int immb, int rn, int rd)
6484 {
6485     int immhb = immh << 3 | immb;
6486     int size = 32 - clz32(immh) - 1;
6487     int shift = immhb - (8 << size);
6488     int pass;
6489
6490     assert(immh != 0);
6491     assert(!(scalar && is_q));
6492
6493     if (!scalar) {
6494         if (!is_q && extract32(immh, 3, 1)) {
6495             unallocated_encoding(s);
6496             return;
6497         }
6498
6499         /* Since we use the variable-shift helpers we must
6500          * replicate the shift count into each element of
6501          * the tcg_shift value.
6502          */
6503         switch (size) {
6504         case 0:
6505             shift |= shift << 8;
6506             /* fall through */
6507         case 1:
6508             shift |= shift << 16;
6509             break;
6510         case 2:
6511         case 3:
6512             break;
6513         default:
6514             g_assert_not_reached();
6515         }
6516     }
6517
6518     if (!fp_access_check(s)) {
6519         return;
6520     }
6521
6522     if (size == 3) {
6523         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6524         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6525             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6526             { NULL, gen_helper_neon_qshl_u64 },
6527         };
6528         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6529         int maxpass = is_q ? 2 : 1;
6530
6531         for (pass = 0; pass < maxpass; pass++) {
6532             TCGv_i64 tcg_op = tcg_temp_new_i64();
6533
6534             read_vec_element(s, tcg_op, rn, pass, MO_64);
6535             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6536             write_vec_element(s, tcg_op, rd, pass, MO_64);
6537
6538             tcg_temp_free_i64(tcg_op);
6539         }
6540         tcg_temp_free_i64(tcg_shift);
6541
6542         if (!is_q) {
6543             clear_vec_high(s, rd);
6544         }
6545     } else {
6546         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6547         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6548             {
6549                 { gen_helper_neon_qshl_s8,
6550                   gen_helper_neon_qshl_s16,
6551                   gen_helper_neon_qshl_s32 },
6552                 { gen_helper_neon_qshlu_s8,
6553                   gen_helper_neon_qshlu_s16,
6554                   gen_helper_neon_qshlu_s32 }
6555             }, {
6556                 { NULL, NULL, NULL },
6557                 { gen_helper_neon_qshl_u8,
6558                   gen_helper_neon_qshl_u16,
6559                   gen_helper_neon_qshl_u32 }
6560             }
6561         };
6562         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6563         TCGMemOp memop = scalar ? size : MO_32;
6564         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6565
6566         for (pass = 0; pass < maxpass; pass++) {
6567             TCGv_i32 tcg_op = tcg_temp_new_i32();
6568
6569             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6570             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6571             if (scalar) {
6572                 switch (size) {
6573                 case 0:
6574                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6575                     break;
6576                 case 1:
6577                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6578                     break;
6579                 case 2:
6580                     break;
6581                 default:
6582                     g_assert_not_reached();
6583                 }
6584                 write_fp_sreg(s, rd, tcg_op);
6585             } else {
6586                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6587             }
6588
6589             tcg_temp_free_i32(tcg_op);
6590         }
6591         tcg_temp_free_i32(tcg_shift);
6592
6593         if (!is_q && !scalar) {
6594             clear_vec_high(s, rd);
6595         }
6596     }
6597 }
6598
6599 /* Common vector code for handling integer to FP conversion */
6600 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6601                                    int elements, int is_signed,
6602                                    int fracbits, int size)
6603 {
6604     bool is_double = size == 3 ? true : false;
6605     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6606     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6607     TCGv_i64 tcg_int = tcg_temp_new_i64();
6608     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6609     int pass;
6610
6611     for (pass = 0; pass < elements; pass++) {
6612         read_vec_element(s, tcg_int, rn, pass, mop);
6613
6614         if (is_double) {
6615             TCGv_i64 tcg_double = tcg_temp_new_i64();
6616             if (is_signed) {
6617                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6618                                      tcg_shift, tcg_fpst);
6619             } else {
6620                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6621                                      tcg_shift, tcg_fpst);
6622             }
6623             if (elements == 1) {
6624                 write_fp_dreg(s, rd, tcg_double);
6625             } else {
6626                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6627             }
6628             tcg_temp_free_i64(tcg_double);
6629         } else {
6630             TCGv_i32 tcg_single = tcg_temp_new_i32();
6631             if (is_signed) {
6632                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6633                                      tcg_shift, tcg_fpst);
6634             } else {
6635                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6636                                      tcg_shift, tcg_fpst);
6637             }
6638             if (elements == 1) {
6639                 write_fp_sreg(s, rd, tcg_single);
6640             } else {
6641                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6642             }
6643             tcg_temp_free_i32(tcg_single);
6644         }
6645     }
6646
6647     if (!is_double && elements == 2) {
6648         clear_vec_high(s, rd);
6649     }
6650
6651     tcg_temp_free_i64(tcg_int);
6652     tcg_temp_free_ptr(tcg_fpst);
6653     tcg_temp_free_i32(tcg_shift);
6654 }
6655
6656 /* UCVTF/SCVTF - Integer to FP conversion */
6657 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6658                                          bool is_q, bool is_u,
6659                                          int immh, int immb, int opcode,
6660                                          int rn, int rd)
6661 {
6662     bool is_double = extract32(immh, 3, 1);
6663     int size = is_double ? MO_64 : MO_32;
6664     int elements;
6665     int immhb = immh << 3 | immb;
6666     int fracbits = (is_double ? 128 : 64) - immhb;
6667
6668     if (!extract32(immh, 2, 2)) {
6669         unallocated_encoding(s);
6670         return;
6671     }
6672
6673     if (is_scalar) {
6674         elements = 1;
6675     } else {
6676         elements = is_double ? 2 : is_q ? 4 : 2;
6677         if (is_double && !is_q) {
6678             unallocated_encoding(s);
6679             return;
6680         }
6681     }
6682
6683     if (!fp_access_check(s)) {
6684         return;
6685     }
6686
6687     /* immh == 0 would be a failure of the decode logic */
6688     g_assert(immh);
6689
6690     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6691 }
6692
6693 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6694 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6695                                          bool is_q, bool is_u,
6696                                          int immh, int immb, int rn, int rd)
6697 {
6698     bool is_double = extract32(immh, 3, 1);
6699     int immhb = immh << 3 | immb;
6700     int fracbits = (is_double ? 128 : 64) - immhb;
6701     int pass;
6702     TCGv_ptr tcg_fpstatus;
6703     TCGv_i32 tcg_rmode, tcg_shift;
6704
6705     if (!extract32(immh, 2, 2)) {
6706         unallocated_encoding(s);
6707         return;
6708     }
6709
6710     if (!is_scalar && !is_q && is_double) {
6711         unallocated_encoding(s);
6712         return;
6713     }
6714
6715     if (!fp_access_check(s)) {
6716         return;
6717     }
6718
6719     assert(!(is_scalar && is_q));
6720
6721     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6722     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6723     tcg_fpstatus = get_fpstatus_ptr();
6724     tcg_shift = tcg_const_i32(fracbits);
6725
6726     if (is_double) {
6727         int maxpass = is_scalar ? 1 : 2;
6728
6729         for (pass = 0; pass < maxpass; pass++) {
6730             TCGv_i64 tcg_op = tcg_temp_new_i64();
6731
6732             read_vec_element(s, tcg_op, rn, pass, MO_64);
6733             if (is_u) {
6734                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6735             } else {
6736                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6737             }
6738             write_vec_element(s, tcg_op, rd, pass, MO_64);
6739             tcg_temp_free_i64(tcg_op);
6740         }
6741         if (!is_q) {
6742             clear_vec_high(s, rd);
6743         }
6744     } else {
6745         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6746         for (pass = 0; pass < maxpass; pass++) {
6747             TCGv_i32 tcg_op = tcg_temp_new_i32();
6748
6749             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6750             if (is_u) {
6751                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6752             } else {
6753                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6754             }
6755             if (is_scalar) {
6756                 write_fp_sreg(s, rd, tcg_op);
6757             } else {
6758                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6759             }
6760             tcg_temp_free_i32(tcg_op);
6761         }
6762         if (!is_q && !is_scalar) {
6763             clear_vec_high(s, rd);
6764         }
6765     }
6766
6767     tcg_temp_free_ptr(tcg_fpstatus);
6768     tcg_temp_free_i32(tcg_shift);
6769     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6770     tcg_temp_free_i32(tcg_rmode);
6771 }
6772
6773 /* C3.6.9 AdvSIMD scalar shift by immediate
6774  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6775  * +-----+---+-------------+------+------+--------+---+------+------+
6776  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6777  * +-----+---+-------------+------+------+--------+---+------+------+
6778  *
6779  * This is the scalar version so it works on a fixed sized registers
6780  */
6781 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6782 {
6783     int rd = extract32(insn, 0, 5);
6784     int rn = extract32(insn, 5, 5);
6785     int opcode = extract32(insn, 11, 5);
6786     int immb = extract32(insn, 16, 3);
6787     int immh = extract32(insn, 19, 4);
6788     bool is_u = extract32(insn, 29, 1);
6789
6790     if (immh == 0) {
6791         unallocated_encoding(s);
6792         return;
6793     }
6794
6795     switch (opcode) {
6796     case 0x08: /* SRI */
6797         if (!is_u) {
6798             unallocated_encoding(s);
6799             return;
6800         }
6801         /* fall through */
6802     case 0x00: /* SSHR / USHR */
6803     case 0x02: /* SSRA / USRA */
6804     case 0x04: /* SRSHR / URSHR */
6805     case 0x06: /* SRSRA / URSRA */
6806         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6807         break;
6808     case 0x0a: /* SHL / SLI */
6809         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6810         break;
6811     case 0x1c: /* SCVTF, UCVTF */
6812         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6813                                      opcode, rn, rd);
6814         break;
6815     case 0x10: /* SQSHRUN, SQSHRUN2 */
6816     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6817         if (!is_u) {
6818             unallocated_encoding(s);
6819             return;
6820         }
6821         handle_vec_simd_sqshrn(s, true, false, false, true,
6822                                immh, immb, opcode, rn, rd);
6823         break;
6824     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6825     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6826         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6827                                immh, immb, opcode, rn, rd);
6828         break;
6829     case 0xc: /* SQSHLU */
6830         if (!is_u) {
6831             unallocated_encoding(s);
6832             return;
6833         }
6834         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6835         break;
6836     case 0xe: /* SQSHL, UQSHL */
6837         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6838         break;
6839     case 0x1f: /* FCVTZS, FCVTZU */
6840         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6841         break;
6842     default:
6843         unallocated_encoding(s);
6844         break;
6845     }
6846 }
6847
6848 /* C3.6.10 AdvSIMD scalar three different
6849  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6850  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6851  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6852  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6853  */
6854 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6855 {
6856     bool is_u = extract32(insn, 29, 1);
6857     int size = extract32(insn, 22, 2);
6858     int opcode = extract32(insn, 12, 4);
6859     int rm = extract32(insn, 16, 5);
6860     int rn = extract32(insn, 5, 5);
6861     int rd = extract32(insn, 0, 5);
6862
6863     if (is_u) {
6864         unallocated_encoding(s);
6865         return;
6866     }
6867
6868     switch (opcode) {
6869     case 0x9: /* SQDMLAL, SQDMLAL2 */
6870     case 0xb: /* SQDMLSL, SQDMLSL2 */
6871     case 0xd: /* SQDMULL, SQDMULL2 */
6872         if (size == 0 || size == 3) {
6873             unallocated_encoding(s);
6874             return;
6875         }
6876         break;
6877     default:
6878         unallocated_encoding(s);
6879         return;
6880     }
6881
6882     if (!fp_access_check(s)) {
6883         return;
6884     }
6885
6886     if (size == 2) {
6887         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6888         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6889         TCGv_i64 tcg_res = tcg_temp_new_i64();
6890
6891         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6892         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6893
6894         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6895         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6896
6897         switch (opcode) {
6898         case 0xd: /* SQDMULL, SQDMULL2 */
6899             break;
6900         case 0xb: /* SQDMLSL, SQDMLSL2 */
6901             tcg_gen_neg_i64(tcg_res, tcg_res);
6902             /* fall through */
6903         case 0x9: /* SQDMLAL, SQDMLAL2 */
6904             read_vec_element(s, tcg_op1, rd, 0, MO_64);
6905             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6906                                               tcg_res, tcg_op1);
6907             break;
6908         default:
6909             g_assert_not_reached();
6910         }
6911
6912         write_fp_dreg(s, rd, tcg_res);
6913
6914         tcg_temp_free_i64(tcg_op1);
6915         tcg_temp_free_i64(tcg_op2);
6916         tcg_temp_free_i64(tcg_res);
6917     } else {
6918         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6919         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6920         TCGv_i64 tcg_res = tcg_temp_new_i64();
6921
6922         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6923         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6924
6925         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6926         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6927
6928         switch (opcode) {
6929         case 0xd: /* SQDMULL, SQDMULL2 */
6930             break;
6931         case 0xb: /* SQDMLSL, SQDMLSL2 */
6932             gen_helper_neon_negl_u32(tcg_res, tcg_res);
6933             /* fall through */
6934         case 0x9: /* SQDMLAL, SQDMLAL2 */
6935         {
6936             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6937             read_vec_element(s, tcg_op3, rd, 0, MO_32);
6938             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6939                                               tcg_res, tcg_op3);
6940             tcg_temp_free_i64(tcg_op3);
6941             break;
6942         }
6943         default:
6944             g_assert_not_reached();
6945         }
6946
6947         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6948         write_fp_dreg(s, rd, tcg_res);
6949
6950         tcg_temp_free_i32(tcg_op1);
6951         tcg_temp_free_i32(tcg_op2);
6952         tcg_temp_free_i64(tcg_res);
6953     }
6954 }
6955
6956 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6957                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6958 {
6959     /* Handle 64x64->64 opcodes which are shared between the scalar
6960      * and vector 3-same groups. We cover every opcode where size == 3
6961      * is valid in either the three-reg-same (integer, not pairwise)
6962      * or scalar-three-reg-same groups. (Some opcodes are not yet
6963      * implemented.)
6964      */
6965     TCGCond cond;
6966
6967     switch (opcode) {
6968     case 0x1: /* SQADD */
6969         if (u) {
6970             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6971         } else {
6972             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6973         }
6974         break;
6975     case 0x5: /* SQSUB */
6976         if (u) {
6977             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6978         } else {
6979             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6980         }
6981         break;
6982     case 0x6: /* CMGT, CMHI */
6983         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6984          * We implement this using setcond (test) and then negating.
6985          */
6986         cond = u ? TCG_COND_GTU : TCG_COND_GT;
6987     do_cmop:
6988         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
6989         tcg_gen_neg_i64(tcg_rd, tcg_rd);
6990         break;
6991     case 0x7: /* CMGE, CMHS */
6992         cond = u ? TCG_COND_GEU : TCG_COND_GE;
6993         goto do_cmop;
6994     case 0x11: /* CMTST, CMEQ */
6995         if (u) {
6996             cond = TCG_COND_EQ;
6997             goto do_cmop;
6998         }
6999         /* CMTST : test is "if (X & Y != 0)". */
7000         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7001         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7002         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7003         break;
7004     case 0x8: /* SSHL, USHL */
7005         if (u) {
7006             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7007         } else {
7008             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7009         }
7010         break;
7011     case 0x9: /* SQSHL, UQSHL */
7012         if (u) {
7013             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7014         } else {
7015             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7016         }
7017         break;
7018     case 0xa: /* SRSHL, URSHL */
7019         if (u) {
7020             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7021         } else {
7022             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7023         }
7024         break;
7025     case 0xb: /* SQRSHL, UQRSHL */
7026         if (u) {
7027             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7028         } else {
7029             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7030         }
7031         break;
7032     case 0x10: /* ADD, SUB */
7033         if (u) {
7034             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7035         } else {
7036             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7037         }
7038         break;
7039     default:
7040         g_assert_not_reached();
7041     }
7042 }
7043
7044 /* Handle the 3-same-operands float operations; shared by the scalar
7045  * and vector encodings. The caller must filter out any encodings
7046  * not allocated for the encoding it is dealing with.
7047  */
7048 static void handle_3same_float(DisasContext *s, int size, int elements,
7049                                int fpopcode, int rd, int rn, int rm)
7050 {
7051     int pass;
7052     TCGv_ptr fpst = get_fpstatus_ptr();
7053
7054     for (pass = 0; pass < elements; pass++) {
7055         if (size) {
7056             /* Double */
7057             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7058             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7059             TCGv_i64 tcg_res = tcg_temp_new_i64();
7060
7061             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7062             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7063
7064             switch (fpopcode) {
7065             case 0x39: /* FMLS */
7066                 /* As usual for ARM, separate negation for fused multiply-add */
7067                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7068                 /* fall through */
7069             case 0x19: /* FMLA */
7070                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7071                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7072                                        tcg_res, fpst);
7073                 break;
7074             case 0x18: /* FMAXNM */
7075                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7076                 break;
7077             case 0x1a: /* FADD */
7078                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7079                 break;
7080             case 0x1b: /* FMULX */
7081                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7082                 break;
7083             case 0x1c: /* FCMEQ */
7084                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7085                 break;
7086             case 0x1e: /* FMAX */
7087                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7088                 break;
7089             case 0x1f: /* FRECPS */
7090                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7091                 break;
7092             case 0x38: /* FMINNM */
7093                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7094                 break;
7095             case 0x3a: /* FSUB */
7096                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7097                 break;
7098             case 0x3e: /* FMIN */
7099                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7100                 break;
7101             case 0x3f: /* FRSQRTS */
7102                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7103                 break;
7104             case 0x5b: /* FMUL */
7105                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7106                 break;
7107             case 0x5c: /* FCMGE */
7108                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7109                 break;
7110             case 0x5d: /* FACGE */
7111                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7112                 break;
7113             case 0x5f: /* FDIV */
7114                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7115                 break;
7116             case 0x7a: /* FABD */
7117                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7118                 gen_helper_vfp_absd(tcg_res, tcg_res);
7119                 break;
7120             case 0x7c: /* FCMGT */
7121                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7122                 break;
7123             case 0x7d: /* FACGT */
7124                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7125                 break;
7126             default:
7127                 g_assert_not_reached();
7128             }
7129
7130             write_vec_element(s, tcg_res, rd, pass, MO_64);
7131
7132             tcg_temp_free_i64(tcg_res);
7133             tcg_temp_free_i64(tcg_op1);
7134             tcg_temp_free_i64(tcg_op2);
7135         } else {
7136             /* Single */
7137             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7138             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7139             TCGv_i32 tcg_res = tcg_temp_new_i32();
7140
7141             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7142             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7143
7144             switch (fpopcode) {
7145             case 0x39: /* FMLS */
7146                 /* As usual for ARM, separate negation for fused multiply-add */
7147                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7148                 /* fall through */
7149             case 0x19: /* FMLA */
7150                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7151                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7152                                        tcg_res, fpst);
7153                 break;
7154             case 0x1a: /* FADD */
7155                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7156                 break;
7157             case 0x1b: /* FMULX */
7158                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7159                 break;
7160             case 0x1c: /* FCMEQ */
7161                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7162                 break;
7163             case 0x1e: /* FMAX */
7164                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7165                 break;
7166             case 0x1f: /* FRECPS */
7167                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7168                 break;
7169             case 0x18: /* FMAXNM */
7170                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7171                 break;
7172             case 0x38: /* FMINNM */
7173                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7174                 break;
7175             case 0x3a: /* FSUB */
7176                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7177                 break;
7178             case 0x3e: /* FMIN */
7179                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7180                 break;
7181             case 0x3f: /* FRSQRTS */
7182                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7183                 break;
7184             case 0x5b: /* FMUL */
7185                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7186                 break;
7187             case 0x5c: /* FCMGE */
7188                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7189                 break;
7190             case 0x5d: /* FACGE */
7191                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7192                 break;
7193             case 0x5f: /* FDIV */
7194                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7195                 break;
7196             case 0x7a: /* FABD */
7197                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7198                 gen_helper_vfp_abss(tcg_res, tcg_res);
7199                 break;
7200             case 0x7c: /* FCMGT */
7201                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7202                 break;
7203             case 0x7d: /* FACGT */
7204                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7205                 break;
7206             default:
7207                 g_assert_not_reached();
7208             }
7209
7210             if (elements == 1) {
7211                 /* scalar single so clear high part */
7212                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7213
7214                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7215                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7216                 tcg_temp_free_i64(tcg_tmp);
7217             } else {
7218                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7219             }
7220
7221             tcg_temp_free_i32(tcg_res);
7222             tcg_temp_free_i32(tcg_op1);
7223             tcg_temp_free_i32(tcg_op2);
7224         }
7225     }
7226
7227     tcg_temp_free_ptr(fpst);
7228
7229     if ((elements << size) < 4) {
7230         /* scalar, or non-quad vector op */
7231         clear_vec_high(s, rd);
7232     }
7233 }
7234
7235 /* C3.6.11 AdvSIMD scalar three same
7236  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7237  * +-----+---+-----------+------+---+------+--------+---+------+------+
7238  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7239  * +-----+---+-----------+------+---+------+--------+---+------+------+
7240  */
7241 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7242 {
7243     int rd = extract32(insn, 0, 5);
7244     int rn = extract32(insn, 5, 5);
7245     int opcode = extract32(insn, 11, 5);
7246     int rm = extract32(insn, 16, 5);
7247     int size = extract32(insn, 22, 2);
7248     bool u = extract32(insn, 29, 1);
7249     TCGv_i64 tcg_rd;
7250
7251     if (opcode >= 0x18) {
7252         /* Floating point: U, size[1] and opcode indicate operation */
7253         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7254         switch (fpopcode) {
7255         case 0x1b: /* FMULX */
7256         case 0x1f: /* FRECPS */
7257         case 0x3f: /* FRSQRTS */
7258         case 0x5d: /* FACGE */
7259         case 0x7d: /* FACGT */
7260         case 0x1c: /* FCMEQ */
7261         case 0x5c: /* FCMGE */
7262         case 0x7c: /* FCMGT */
7263         case 0x7a: /* FABD */
7264             break;
7265         default:
7266             unallocated_encoding(s);
7267             return;
7268         }
7269
7270         if (!fp_access_check(s)) {
7271             return;
7272         }
7273
7274         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7275         return;
7276     }
7277
7278     switch (opcode) {
7279     case 0x1: /* SQADD, UQADD */
7280     case 0x5: /* SQSUB, UQSUB */
7281     case 0x9: /* SQSHL, UQSHL */
7282     case 0xb: /* SQRSHL, UQRSHL */
7283         break;
7284     case 0x8: /* SSHL, USHL */
7285     case 0xa: /* SRSHL, URSHL */
7286     case 0x6: /* CMGT, CMHI */
7287     case 0x7: /* CMGE, CMHS */
7288     case 0x11: /* CMTST, CMEQ */
7289     case 0x10: /* ADD, SUB (vector) */
7290         if (size != 3) {
7291             unallocated_encoding(s);
7292             return;
7293         }
7294         break;
7295     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7296         if (size != 1 && size != 2) {
7297             unallocated_encoding(s);
7298             return;
7299         }
7300         break;
7301     default:
7302         unallocated_encoding(s);
7303         return;
7304     }
7305
7306     if (!fp_access_check(s)) {
7307         return;
7308     }
7309
7310     tcg_rd = tcg_temp_new_i64();
7311
7312     if (size == 3) {
7313         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7314         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7315
7316         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7317         tcg_temp_free_i64(tcg_rn);
7318         tcg_temp_free_i64(tcg_rm);
7319     } else {
7320         /* Do a single operation on the lowest element in the vector.
7321          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7322          * no side effects for all these operations.
7323          * OPTME: special-purpose helpers would avoid doing some
7324          * unnecessary work in the helper for the 8 and 16 bit cases.
7325          */
7326         NeonGenTwoOpEnvFn *genenvfn;
7327         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7328         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7329         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7330
7331         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7332         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7333
7334         switch (opcode) {
7335         case 0x1: /* SQADD, UQADD */
7336         {
7337             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7338                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7339                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7340                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7341             };
7342             genenvfn = fns[size][u];
7343             break;
7344         }
7345         case 0x5: /* SQSUB, UQSUB */
7346         {
7347             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7348                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7349                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7350                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7351             };
7352             genenvfn = fns[size][u];
7353             break;
7354         }
7355         case 0x9: /* SQSHL, UQSHL */
7356         {
7357             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7358                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7359                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7360                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7361             };
7362             genenvfn = fns[size][u];
7363             break;
7364         }
7365         case 0xb: /* SQRSHL, UQRSHL */
7366         {
7367             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7368                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7369                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7370                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7371             };
7372             genenvfn = fns[size][u];
7373             break;
7374         }
7375         case 0x16: /* SQDMULH, SQRDMULH */
7376         {
7377             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7378                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7379                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7380             };
7381             assert(size == 1 || size == 2);
7382             genenvfn = fns[size - 1][u];
7383             break;
7384         }
7385         default:
7386             g_assert_not_reached();
7387         }
7388
7389         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7390         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7391         tcg_temp_free_i32(tcg_rd32);
7392         tcg_temp_free_i32(tcg_rn);
7393         tcg_temp_free_i32(tcg_rm);
7394     }
7395
7396     write_fp_dreg(s, rd, tcg_rd);
7397
7398     tcg_temp_free_i64(tcg_rd);
7399 }
7400
7401 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7402                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7403                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7404 {
7405     /* Handle 64->64 opcodes which are shared between the scalar and
7406      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7407      * is valid in either group and also the double-precision fp ops.
7408      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7409      * requires them.
7410      */
7411     TCGCond cond;
7412
7413     switch (opcode) {
7414     case 0x4: /* CLS, CLZ */
7415         if (u) {
7416             gen_helper_clz64(tcg_rd, tcg_rn);
7417         } else {
7418             gen_helper_cls64(tcg_rd, tcg_rn);
7419         }
7420         break;
7421     case 0x5: /* NOT */
7422         /* This opcode is shared with CNT and RBIT but we have earlier
7423          * enforced that size == 3 if and only if this is the NOT insn.
7424          */
7425         tcg_gen_not_i64(tcg_rd, tcg_rn);
7426         break;
7427     case 0x7: /* SQABS, SQNEG */
7428         if (u) {
7429             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7430         } else {
7431             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7432         }
7433         break;
7434     case 0xa: /* CMLT */
7435         /* 64 bit integer comparison against zero, result is
7436          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7437          * subtracting 1.
7438          */
7439         cond = TCG_COND_LT;
7440     do_cmop:
7441         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7442         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7443         break;
7444     case 0x8: /* CMGT, CMGE */
7445         cond = u ? TCG_COND_GE : TCG_COND_GT;
7446         goto do_cmop;
7447     case 0x9: /* CMEQ, CMLE */
7448         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7449         goto do_cmop;
7450     case 0xb: /* ABS, NEG */
7451         if (u) {
7452             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7453         } else {
7454             TCGv_i64 tcg_zero = tcg_const_i64(0);
7455             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7456             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7457                                 tcg_rn, tcg_rd);
7458             tcg_temp_free_i64(tcg_zero);
7459         }
7460         break;
7461     case 0x2f: /* FABS */
7462         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7463         break;
7464     case 0x6f: /* FNEG */
7465         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7466         break;
7467     case 0x7f: /* FSQRT */
7468         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7469         break;
7470     case 0x1a: /* FCVTNS */
7471     case 0x1b: /* FCVTMS */
7472     case 0x1c: /* FCVTAS */
7473     case 0x3a: /* FCVTPS */
7474     case 0x3b: /* FCVTZS */
7475     {
7476         TCGv_i32 tcg_shift = tcg_const_i32(0);
7477         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7478         tcg_temp_free_i32(tcg_shift);
7479         break;
7480     }
7481     case 0x5a: /* FCVTNU */
7482     case 0x5b: /* FCVTMU */
7483     case 0x5c: /* FCVTAU */
7484     case 0x7a: /* FCVTPU */
7485     case 0x7b: /* FCVTZU */
7486     {
7487         TCGv_i32 tcg_shift = tcg_const_i32(0);
7488         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7489         tcg_temp_free_i32(tcg_shift);
7490         break;
7491     }
7492     case 0x18: /* FRINTN */
7493     case 0x19: /* FRINTM */
7494     case 0x38: /* FRINTP */
7495     case 0x39: /* FRINTZ */
7496     case 0x58: /* FRINTA */
7497     case 0x79: /* FRINTI */
7498         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7499         break;
7500     case 0x59: /* FRINTX */
7501         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7502         break;
7503     default:
7504         g_assert_not_reached();
7505     }
7506 }
7507
7508 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7509                                    bool is_scalar, bool is_u, bool is_q,
7510                                    int size, int rn, int rd)
7511 {
7512     bool is_double = (size == 3);
7513     TCGv_ptr fpst;
7514
7515     if (!fp_access_check(s)) {
7516         return;
7517     }
7518
7519     fpst = get_fpstatus_ptr();
7520
7521     if (is_double) {
7522         TCGv_i64 tcg_op = tcg_temp_new_i64();
7523         TCGv_i64 tcg_zero = tcg_const_i64(0);
7524         TCGv_i64 tcg_res = tcg_temp_new_i64();
7525         NeonGenTwoDoubleOPFn *genfn;
7526         bool swap = false;
7527         int pass;
7528
7529         switch (opcode) {
7530         case 0x2e: /* FCMLT (zero) */
7531             swap = true;
7532             /* fallthrough */
7533         case 0x2c: /* FCMGT (zero) */
7534             genfn = gen_helper_neon_cgt_f64;
7535             break;
7536         case 0x2d: /* FCMEQ (zero) */
7537             genfn = gen_helper_neon_ceq_f64;
7538             break;
7539         case 0x6d: /* FCMLE (zero) */
7540             swap = true;
7541             /* fall through */
7542         case 0x6c: /* FCMGE (zero) */
7543             genfn = gen_helper_neon_cge_f64;
7544             break;
7545         default:
7546             g_assert_not_reached();
7547         }
7548
7549         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7550             read_vec_element(s, tcg_op, rn, pass, MO_64);
7551             if (swap) {
7552                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7553             } else {
7554                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7555             }
7556             write_vec_element(s, tcg_res, rd, pass, MO_64);
7557         }
7558         if (is_scalar) {
7559             clear_vec_high(s, rd);
7560         }
7561
7562         tcg_temp_free_i64(tcg_res);
7563         tcg_temp_free_i64(tcg_zero);
7564         tcg_temp_free_i64(tcg_op);
7565     } else {
7566         TCGv_i32 tcg_op = tcg_temp_new_i32();
7567         TCGv_i32 tcg_zero = tcg_const_i32(0);
7568         TCGv_i32 tcg_res = tcg_temp_new_i32();
7569         NeonGenTwoSingleOPFn *genfn;
7570         bool swap = false;
7571         int pass, maxpasses;
7572
7573         switch (opcode) {
7574         case 0x2e: /* FCMLT (zero) */
7575             swap = true;
7576             /* fall through */
7577         case 0x2c: /* FCMGT (zero) */
7578             genfn = gen_helper_neon_cgt_f32;
7579             break;
7580         case 0x2d: /* FCMEQ (zero) */
7581             genfn = gen_helper_neon_ceq_f32;
7582             break;
7583         case 0x6d: /* FCMLE (zero) */
7584             swap = true;
7585             /* fall through */
7586         case 0x6c: /* FCMGE (zero) */
7587             genfn = gen_helper_neon_cge_f32;
7588             break;
7589         default:
7590             g_assert_not_reached();
7591         }
7592
7593         if (is_scalar) {
7594             maxpasses = 1;
7595         } else {
7596             maxpasses = is_q ? 4 : 2;
7597         }
7598
7599         for (pass = 0; pass < maxpasses; pass++) {
7600             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7601             if (swap) {
7602                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7603             } else {
7604                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7605             }
7606             if (is_scalar) {
7607                 write_fp_sreg(s, rd, tcg_res);
7608             } else {
7609                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7610             }
7611         }
7612         tcg_temp_free_i32(tcg_res);
7613         tcg_temp_free_i32(tcg_zero);
7614         tcg_temp_free_i32(tcg_op);
7615         if (!is_q && !is_scalar) {
7616             clear_vec_high(s, rd);
7617         }
7618     }
7619
7620     tcg_temp_free_ptr(fpst);
7621 }
7622
7623 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7624                                     bool is_scalar, bool is_u, bool is_q,
7625                                     int size, int rn, int rd)
7626 {
7627     bool is_double = (size == 3);
7628     TCGv_ptr fpst = get_fpstatus_ptr();
7629
7630     if (is_double) {
7631         TCGv_i64 tcg_op = tcg_temp_new_i64();
7632         TCGv_i64 tcg_res = tcg_temp_new_i64();
7633         int pass;
7634
7635         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7636             read_vec_element(s, tcg_op, rn, pass, MO_64);
7637             switch (opcode) {
7638             case 0x3d: /* FRECPE */
7639                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7640                 break;
7641             case 0x3f: /* FRECPX */
7642                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7643                 break;
7644             case 0x7d: /* FRSQRTE */
7645                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7646                 break;
7647             default:
7648                 g_assert_not_reached();
7649             }
7650             write_vec_element(s, tcg_res, rd, pass, MO_64);
7651         }
7652         if (is_scalar) {
7653             clear_vec_high(s, rd);
7654         }
7655
7656         tcg_temp_free_i64(tcg_res);
7657         tcg_temp_free_i64(tcg_op);
7658     } else {
7659         TCGv_i32 tcg_op = tcg_temp_new_i32();
7660         TCGv_i32 tcg_res = tcg_temp_new_i32();
7661         int pass, maxpasses;
7662
7663         if (is_scalar) {
7664             maxpasses = 1;
7665         } else {
7666             maxpasses = is_q ? 4 : 2;
7667         }
7668
7669         for (pass = 0; pass < maxpasses; pass++) {
7670             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7671
7672             switch (opcode) {
7673             case 0x3c: /* URECPE */
7674                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7675                 break;
7676             case 0x3d: /* FRECPE */
7677                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7678                 break;
7679             case 0x3f: /* FRECPX */
7680                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7681                 break;
7682             case 0x7d: /* FRSQRTE */
7683                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7684                 break;
7685             default:
7686                 g_assert_not_reached();
7687             }
7688
7689             if (is_scalar) {
7690                 write_fp_sreg(s, rd, tcg_res);
7691             } else {
7692                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7693             }
7694         }
7695         tcg_temp_free_i32(tcg_res);
7696         tcg_temp_free_i32(tcg_op);
7697         if (!is_q && !is_scalar) {
7698             clear_vec_high(s, rd);
7699         }
7700     }
7701     tcg_temp_free_ptr(fpst);
7702 }
7703
7704 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7705                                 int opcode, bool u, bool is_q,
7706                                 int size, int rn, int rd)
7707 {
7708     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7709      * in the source becomes a size element in the destination).
7710      */
7711     int pass;
7712     TCGv_i32 tcg_res[2];
7713     int destelt = is_q ? 2 : 0;
7714     int passes = scalar ? 1 : 2;
7715
7716     if (scalar) {
7717         tcg_res[1] = tcg_const_i32(0);
7718     }
7719
7720     for (pass = 0; pass < passes; pass++) {
7721         TCGv_i64 tcg_op = tcg_temp_new_i64();
7722         NeonGenNarrowFn *genfn = NULL;
7723         NeonGenNarrowEnvFn *genenvfn = NULL;
7724
7725         if (scalar) {
7726             read_vec_element(s, tcg_op, rn, pass, size + 1);
7727         } else {
7728             read_vec_element(s, tcg_op, rn, pass, MO_64);
7729         }
7730         tcg_res[pass] = tcg_temp_new_i32();
7731
7732         switch (opcode) {
7733         case 0x12: /* XTN, SQXTUN */
7734         {
7735             static NeonGenNarrowFn * const xtnfns[3] = {
7736                 gen_helper_neon_narrow_u8,
7737                 gen_helper_neon_narrow_u16,
7738                 tcg_gen_extrl_i64_i32,
7739             };
7740             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7741                 gen_helper_neon_unarrow_sat8,
7742                 gen_helper_neon_unarrow_sat16,
7743                 gen_helper_neon_unarrow_sat32,
7744             };
7745             if (u) {
7746                 genenvfn = sqxtunfns[size];
7747             } else {
7748                 genfn = xtnfns[size];
7749             }
7750             break;
7751         }
7752         case 0x14: /* SQXTN, UQXTN */
7753         {
7754             static NeonGenNarrowEnvFn * const fns[3][2] = {
7755                 { gen_helper_neon_narrow_sat_s8,
7756                   gen_helper_neon_narrow_sat_u8 },
7757                 { gen_helper_neon_narrow_sat_s16,
7758                   gen_helper_neon_narrow_sat_u16 },
7759                 { gen_helper_neon_narrow_sat_s32,
7760                   gen_helper_neon_narrow_sat_u32 },
7761             };
7762             genenvfn = fns[size][u];
7763             break;
7764         }
7765         case 0x16: /* FCVTN, FCVTN2 */
7766             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7767             if (size == 2) {
7768                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7769             } else {
7770                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7771                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7772                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7773                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7774                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7775                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7776                 tcg_temp_free_i32(tcg_lo);
7777                 tcg_temp_free_i32(tcg_hi);
7778             }
7779             break;
7780         case 0x56:  /* FCVTXN, FCVTXN2 */
7781             /* 64 bit to 32 bit float conversion
7782              * with von Neumann rounding (round to odd)
7783              */
7784             assert(size == 2);
7785             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7786             break;
7787         default:
7788             g_assert_not_reached();
7789         }
7790
7791         if (genfn) {
7792             genfn(tcg_res[pass], tcg_op);
7793         } else if (genenvfn) {
7794             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7795         }
7796
7797         tcg_temp_free_i64(tcg_op);
7798     }
7799
7800     for (pass = 0; pass < 2; pass++) {
7801         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7802         tcg_temp_free_i32(tcg_res[pass]);
7803     }
7804     if (!is_q) {
7805         clear_vec_high(s, rd);
7806     }
7807 }
7808
7809 /* Remaining saturating accumulating ops */
7810 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7811                                 bool is_q, int size, int rn, int rd)
7812 {
7813     bool is_double = (size == 3);
7814
7815     if (is_double) {
7816         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7817         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7818         int pass;
7819
7820         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7821             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7822             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7823
7824             if (is_u) { /* USQADD */
7825                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7826             } else { /* SUQADD */
7827                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7828             }
7829             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7830         }
7831         if (is_scalar) {
7832             clear_vec_high(s, rd);
7833         }
7834
7835         tcg_temp_free_i64(tcg_rd);
7836         tcg_temp_free_i64(tcg_rn);
7837     } else {
7838         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7839         TCGv_i32 tcg_rd = tcg_temp_new_i32();
7840         int pass, maxpasses;
7841
7842         if (is_scalar) {
7843             maxpasses = 1;
7844         } else {
7845             maxpasses = is_q ? 4 : 2;
7846         }
7847
7848         for (pass = 0; pass < maxpasses; pass++) {
7849             if (is_scalar) {
7850                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7851                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7852             } else {
7853                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7854                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7855             }
7856
7857             if (is_u) { /* USQADD */
7858                 switch (size) {
7859                 case 0:
7860                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7861                     break;
7862                 case 1:
7863                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7864                     break;
7865                 case 2:
7866                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7867                     break;
7868                 default:
7869                     g_assert_not_reached();
7870                 }
7871             } else { /* SUQADD */
7872                 switch (size) {
7873                 case 0:
7874                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7875                     break;
7876                 case 1:
7877                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7878                     break;
7879                 case 2:
7880                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7881                     break;
7882                 default:
7883                     g_assert_not_reached();
7884                 }
7885             }
7886
7887             if (is_scalar) {
7888                 TCGv_i64 tcg_zero = tcg_const_i64(0);
7889                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7890                 tcg_temp_free_i64(tcg_zero);
7891             }
7892             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7893         }
7894
7895         if (!is_q) {
7896             clear_vec_high(s, rd);
7897         }
7898
7899         tcg_temp_free_i32(tcg_rd);
7900         tcg_temp_free_i32(tcg_rn);
7901     }
7902 }
7903
7904 /* C3.6.12 AdvSIMD scalar two reg misc
7905  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7906  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7907  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7908  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7909  */
7910 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7911 {
7912     int rd = extract32(insn, 0, 5);
7913     int rn = extract32(insn, 5, 5);
7914     int opcode = extract32(insn, 12, 5);
7915     int size = extract32(insn, 22, 2);
7916     bool u = extract32(insn, 29, 1);
7917     bool is_fcvt = false;
7918     int rmode;
7919     TCGv_i32 tcg_rmode;
7920     TCGv_ptr tcg_fpstatus;
7921
7922     switch (opcode) {
7923     case 0x3: /* USQADD / SUQADD*/
7924         if (!fp_access_check(s)) {
7925             return;
7926         }
7927         handle_2misc_satacc(s, true, u, false, size, rn, rd);
7928         return;
7929     case 0x7: /* SQABS / SQNEG */
7930         break;
7931     case 0xa: /* CMLT */
7932         if (u) {
7933             unallocated_encoding(s);
7934             return;
7935         }
7936         /* fall through */
7937     case 0x8: /* CMGT, CMGE */
7938     case 0x9: /* CMEQ, CMLE */
7939     case 0xb: /* ABS, NEG */
7940         if (size != 3) {
7941             unallocated_encoding(s);
7942             return;
7943         }
7944         break;
7945     case 0x12: /* SQXTUN */
7946         if (!u) {
7947             unallocated_encoding(s);
7948             return;
7949         }
7950         /* fall through */
7951     case 0x14: /* SQXTN, UQXTN */
7952         if (size == 3) {
7953             unallocated_encoding(s);
7954             return;
7955         }
7956         if (!fp_access_check(s)) {
7957             return;
7958         }
7959         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7960         return;
7961     case 0xc ... 0xf:
7962     case 0x16 ... 0x1d:
7963     case 0x1f:
7964         /* Floating point: U, size[1] and opcode indicate operation;
7965          * size[0] indicates single or double precision.
7966          */
7967         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7968         size = extract32(size, 0, 1) ? 3 : 2;
7969         switch (opcode) {
7970         case 0x2c: /* FCMGT (zero) */
7971         case 0x2d: /* FCMEQ (zero) */
7972         case 0x2e: /* FCMLT (zero) */
7973         case 0x6c: /* FCMGE (zero) */
7974         case 0x6d: /* FCMLE (zero) */
7975             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7976             return;
7977         case 0x1d: /* SCVTF */
7978         case 0x5d: /* UCVTF */
7979         {
7980             bool is_signed = (opcode == 0x1d);
7981             if (!fp_access_check(s)) {
7982                 return;
7983             }
7984             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7985             return;
7986         }
7987         case 0x3d: /* FRECPE */
7988         case 0x3f: /* FRECPX */
7989         case 0x7d: /* FRSQRTE */
7990             if (!fp_access_check(s)) {
7991                 return;
7992             }
7993             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
7994             return;
7995         case 0x1a: /* FCVTNS */
7996         case 0x1b: /* FCVTMS */
7997         case 0x3a: /* FCVTPS */
7998         case 0x3b: /* FCVTZS */
7999         case 0x5a: /* FCVTNU */
8000         case 0x5b: /* FCVTMU */
8001         case 0x7a: /* FCVTPU */
8002         case 0x7b: /* FCVTZU */
8003             is_fcvt = true;
8004             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8005             break;
8006         case 0x1c: /* FCVTAS */
8007         case 0x5c: /* FCVTAU */
8008             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8009             is_fcvt = true;
8010             rmode = FPROUNDING_TIEAWAY;
8011             break;
8012         case 0x56: /* FCVTXN, FCVTXN2 */
8013             if (size == 2) {
8014                 unallocated_encoding(s);
8015                 return;
8016             }
8017             if (!fp_access_check(s)) {
8018                 return;
8019             }
8020             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8021             return;
8022         default:
8023             unallocated_encoding(s);
8024             return;
8025         }
8026         break;
8027     default:
8028         unallocated_encoding(s);
8029         return;
8030     }
8031
8032     if (!fp_access_check(s)) {
8033         return;
8034     }
8035
8036     if (is_fcvt) {
8037         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8038         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8039         tcg_fpstatus = get_fpstatus_ptr();
8040     } else {
8041         TCGV_UNUSED_I32(tcg_rmode);
8042         TCGV_UNUSED_PTR(tcg_fpstatus);
8043     }
8044
8045     if (size == 3) {
8046         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8047         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8048
8049         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8050         write_fp_dreg(s, rd, tcg_rd);
8051         tcg_temp_free_i64(tcg_rd);
8052         tcg_temp_free_i64(tcg_rn);
8053     } else {
8054         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8055         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8056
8057         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8058
8059         switch (opcode) {
8060         case 0x7: /* SQABS, SQNEG */
8061         {
8062             NeonGenOneOpEnvFn *genfn;
8063             static NeonGenOneOpEnvFn * const fns[3][2] = {
8064                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8065                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8066                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8067             };
8068             genfn = fns[size][u];
8069             genfn(tcg_rd, cpu_env, tcg_rn);
8070             break;
8071         }
8072         case 0x1a: /* FCVTNS */
8073         case 0x1b: /* FCVTMS */
8074         case 0x1c: /* FCVTAS */
8075         case 0x3a: /* FCVTPS */
8076         case 0x3b: /* FCVTZS */
8077         {
8078             TCGv_i32 tcg_shift = tcg_const_i32(0);
8079             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8080             tcg_temp_free_i32(tcg_shift);
8081             break;
8082         }
8083         case 0x5a: /* FCVTNU */
8084         case 0x5b: /* FCVTMU */
8085         case 0x5c: /* FCVTAU */
8086         case 0x7a: /* FCVTPU */
8087         case 0x7b: /* FCVTZU */
8088         {
8089             TCGv_i32 tcg_shift = tcg_const_i32(0);
8090             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8091             tcg_temp_free_i32(tcg_shift);
8092             break;
8093         }
8094         default:
8095             g_assert_not_reached();
8096         }
8097
8098         write_fp_sreg(s, rd, tcg_rd);
8099         tcg_temp_free_i32(tcg_rd);
8100         tcg_temp_free_i32(tcg_rn);
8101     }
8102
8103     if (is_fcvt) {
8104         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8105         tcg_temp_free_i32(tcg_rmode);
8106         tcg_temp_free_ptr(tcg_fpstatus);
8107     }
8108 }
8109
8110 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8111 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8112                                  int immh, int immb, int opcode, int rn, int rd)
8113 {
8114     int size = 32 - clz32(immh) - 1;
8115     int immhb = immh << 3 | immb;
8116     int shift = 2 * (8 << size) - immhb;
8117     bool accumulate = false;
8118     bool round = false;
8119     bool insert = false;
8120     int dsize = is_q ? 128 : 64;
8121     int esize = 8 << size;
8122     int elements = dsize/esize;
8123     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8124     TCGv_i64 tcg_rn = new_tmp_a64(s);
8125     TCGv_i64 tcg_rd = new_tmp_a64(s);
8126     TCGv_i64 tcg_round;
8127     int i;
8128
8129     if (extract32(immh, 3, 1) && !is_q) {
8130         unallocated_encoding(s);
8131         return;
8132     }
8133
8134     if (size > 3 && !is_q) {
8135         unallocated_encoding(s);
8136         return;
8137     }
8138
8139     if (!fp_access_check(s)) {
8140         return;
8141     }
8142
8143     switch (opcode) {
8144     case 0x02: /* SSRA / USRA (accumulate) */
8145         accumulate = true;
8146         break;
8147     case 0x04: /* SRSHR / URSHR (rounding) */
8148         round = true;
8149         break;
8150     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8151         accumulate = round = true;
8152         break;
8153     case 0x08: /* SRI */
8154         insert = true;
8155         break;
8156     }
8157
8158     if (round) {
8159         uint64_t round_const = 1ULL << (shift - 1);
8160         tcg_round = tcg_const_i64(round_const);
8161     } else {
8162         TCGV_UNUSED_I64(tcg_round);
8163     }
8164
8165     for (i = 0; i < elements; i++) {
8166         read_vec_element(s, tcg_rn, rn, i, memop);
8167         if (accumulate || insert) {
8168             read_vec_element(s, tcg_rd, rd, i, memop);
8169         }
8170
8171         if (insert) {
8172             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8173         } else {
8174             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8175                                     accumulate, is_u, size, shift);
8176         }
8177
8178         write_vec_element(s, tcg_rd, rd, i, size);
8179     }
8180
8181     if (!is_q) {
8182         clear_vec_high(s, rd);
8183     }
8184
8185     if (round) {
8186         tcg_temp_free_i64(tcg_round);
8187     }
8188 }
8189
8190 /* SHL/SLI - Vector shift left */
8191 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8192                                 int immh, int immb, int opcode, int rn, int rd)
8193 {
8194     int size = 32 - clz32(immh) - 1;
8195     int immhb = immh << 3 | immb;
8196     int shift = immhb - (8 << size);
8197     int dsize = is_q ? 128 : 64;
8198     int esize = 8 << size;
8199     int elements = dsize/esize;
8200     TCGv_i64 tcg_rn = new_tmp_a64(s);
8201     TCGv_i64 tcg_rd = new_tmp_a64(s);
8202     int i;
8203
8204     if (extract32(immh, 3, 1) && !is_q) {
8205         unallocated_encoding(s);
8206         return;
8207     }
8208
8209     if (size > 3 && !is_q) {
8210         unallocated_encoding(s);
8211         return;
8212     }
8213
8214     if (!fp_access_check(s)) {
8215         return;
8216     }
8217
8218     for (i = 0; i < elements; i++) {
8219         read_vec_element(s, tcg_rn, rn, i, size);
8220         if (insert) {
8221             read_vec_element(s, tcg_rd, rd, i, size);
8222         }
8223
8224         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8225
8226         write_vec_element(s, tcg_rd, rd, i, size);
8227     }
8228
8229     if (!is_q) {
8230         clear_vec_high(s, rd);
8231     }
8232 }
8233
8234 /* USHLL/SHLL - Vector shift left with widening */
8235 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8236                                  int immh, int immb, int opcode, int rn, int rd)
8237 {
8238     int size = 32 - clz32(immh) - 1;
8239     int immhb = immh << 3 | immb;
8240     int shift = immhb - (8 << size);
8241     int dsize = 64;
8242     int esize = 8 << size;
8243     int elements = dsize/esize;
8244     TCGv_i64 tcg_rn = new_tmp_a64(s);
8245     TCGv_i64 tcg_rd = new_tmp_a64(s);
8246     int i;
8247
8248     if (size >= 3) {
8249         unallocated_encoding(s);
8250         return;
8251     }
8252
8253     if (!fp_access_check(s)) {
8254         return;
8255     }
8256
8257     /* For the LL variants the store is larger than the load,
8258      * so if rd == rn we would overwrite parts of our input.
8259      * So load everything right now and use shifts in the main loop.
8260      */
8261     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8262
8263     for (i = 0; i < elements; i++) {
8264         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8265         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8266         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8267         write_vec_element(s, tcg_rd, rd, i, size + 1);
8268     }
8269 }
8270
8271 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8272 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8273                                  int immh, int immb, int opcode, int rn, int rd)
8274 {
8275     int immhb = immh << 3 | immb;
8276     int size = 32 - clz32(immh) - 1;
8277     int dsize = 64;
8278     int esize = 8 << size;
8279     int elements = dsize/esize;
8280     int shift = (2 * esize) - immhb;
8281     bool round = extract32(opcode, 0, 1);
8282     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8283     TCGv_i64 tcg_round;
8284     int i;
8285
8286     if (extract32(immh, 3, 1)) {
8287         unallocated_encoding(s);
8288         return;
8289     }
8290
8291     if (!fp_access_check(s)) {
8292         return;
8293     }
8294
8295     tcg_rn = tcg_temp_new_i64();
8296     tcg_rd = tcg_temp_new_i64();
8297     tcg_final = tcg_temp_new_i64();
8298     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8299
8300     if (round) {
8301         uint64_t round_const = 1ULL << (shift - 1);
8302         tcg_round = tcg_const_i64(round_const);
8303     } else {
8304         TCGV_UNUSED_I64(tcg_round);
8305     }
8306
8307     for (i = 0; i < elements; i++) {
8308         read_vec_element(s, tcg_rn, rn, i, size+1);
8309         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8310                                 false, true, size+1, shift);
8311
8312         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8313     }
8314
8315     if (!is_q) {
8316         clear_vec_high(s, rd);
8317         write_vec_element(s, tcg_final, rd, 0, MO_64);
8318     } else {
8319         write_vec_element(s, tcg_final, rd, 1, MO_64);
8320     }
8321
8322     if (round) {
8323         tcg_temp_free_i64(tcg_round);
8324     }
8325     tcg_temp_free_i64(tcg_rn);
8326     tcg_temp_free_i64(tcg_rd);
8327     tcg_temp_free_i64(tcg_final);
8328     return;
8329 }
8330
8331
8332 /* C3.6.14 AdvSIMD shift by immediate
8333  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8334  * +---+---+---+-------------+------+------+--------+---+------+------+
8335  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8336  * +---+---+---+-------------+------+------+--------+---+------+------+
8337  */
8338 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8339 {
8340     int rd = extract32(insn, 0, 5);
8341     int rn = extract32(insn, 5, 5);
8342     int opcode = extract32(insn, 11, 5);
8343     int immb = extract32(insn, 16, 3);
8344     int immh = extract32(insn, 19, 4);
8345     bool is_u = extract32(insn, 29, 1);
8346     bool is_q = extract32(insn, 30, 1);
8347
8348     switch (opcode) {
8349     case 0x08: /* SRI */
8350         if (!is_u) {
8351             unallocated_encoding(s);
8352             return;
8353         }
8354         /* fall through */
8355     case 0x00: /* SSHR / USHR */
8356     case 0x02: /* SSRA / USRA (accumulate) */
8357     case 0x04: /* SRSHR / URSHR (rounding) */
8358     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8359         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8360         break;
8361     case 0x0a: /* SHL / SLI */
8362         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8363         break;
8364     case 0x10: /* SHRN */
8365     case 0x11: /* RSHRN / SQRSHRUN */
8366         if (is_u) {
8367             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8368                                    opcode, rn, rd);
8369         } else {
8370             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8371         }
8372         break;
8373     case 0x12: /* SQSHRN / UQSHRN */
8374     case 0x13: /* SQRSHRN / UQRSHRN */
8375         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8376                                opcode, rn, rd);
8377         break;
8378     case 0x14: /* SSHLL / USHLL */
8379         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8380         break;
8381     case 0x1c: /* SCVTF / UCVTF */
8382         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8383                                      opcode, rn, rd);
8384         break;
8385     case 0xc: /* SQSHLU */
8386         if (!is_u) {
8387             unallocated_encoding(s);
8388             return;
8389         }
8390         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8391         break;
8392     case 0xe: /* SQSHL, UQSHL */
8393         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8394         break;
8395     case 0x1f: /* FCVTZS/ FCVTZU */
8396         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8397         return;
8398     default:
8399         unallocated_encoding(s);
8400         return;
8401     }
8402 }
8403
8404 /* Generate code to do a "long" addition or subtraction, ie one done in
8405  * TCGv_i64 on vector lanes twice the width specified by size.
8406  */
8407 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8408                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8409 {
8410     static NeonGenTwo64OpFn * const fns[3][2] = {
8411         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8412         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8413         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8414     };
8415     NeonGenTwo64OpFn *genfn;
8416     assert(size < 3);
8417
8418     genfn = fns[size][is_sub];
8419     genfn(tcg_res, tcg_op1, tcg_op2);
8420 }
8421
8422 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8423                                 int opcode, int rd, int rn, int rm)
8424 {
8425     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8426     TCGv_i64 tcg_res[2];
8427     int pass, accop;
8428
8429     tcg_res[0] = tcg_temp_new_i64();
8430     tcg_res[1] = tcg_temp_new_i64();
8431
8432     /* Does this op do an adding accumulate, a subtracting accumulate,
8433      * or no accumulate at all?
8434      */
8435     switch (opcode) {
8436     case 5:
8437     case 8:
8438     case 9:
8439         accop = 1;
8440         break;
8441     case 10:
8442     case 11:
8443         accop = -1;
8444         break;
8445     default:
8446         accop = 0;
8447         break;
8448     }
8449
8450     if (accop != 0) {
8451         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8452         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8453     }
8454
8455     /* size == 2 means two 32x32->64 operations; this is worth special
8456      * casing because we can generally handle it inline.
8457      */
8458     if (size == 2) {
8459         for (pass = 0; pass < 2; pass++) {
8460             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8461             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8462             TCGv_i64 tcg_passres;
8463             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8464
8465             int elt = pass + is_q * 2;
8466
8467             read_vec_element(s, tcg_op1, rn, elt, memop);
8468             read_vec_element(s, tcg_op2, rm, elt, memop);
8469
8470             if (accop == 0) {
8471                 tcg_passres = tcg_res[pass];
8472             } else {
8473                 tcg_passres = tcg_temp_new_i64();
8474             }
8475
8476             switch (opcode) {
8477             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8478                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8479                 break;
8480             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8481                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8482                 break;
8483             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8484             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8485             {
8486                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8487                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8488
8489                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8490                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8491                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8492                                     tcg_passres,
8493                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8494                 tcg_temp_free_i64(tcg_tmp1);
8495                 tcg_temp_free_i64(tcg_tmp2);
8496                 break;
8497             }
8498             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8499             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8500             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8501                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8502                 break;
8503             case 9: /* SQDMLAL, SQDMLAL2 */
8504             case 11: /* SQDMLSL, SQDMLSL2 */
8505             case 13: /* SQDMULL, SQDMULL2 */
8506                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8507                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8508                                                   tcg_passres, tcg_passres);
8509                 break;
8510             default:
8511                 g_assert_not_reached();
8512             }
8513
8514             if (opcode == 9 || opcode == 11) {
8515                 /* saturating accumulate ops */
8516                 if (accop < 0) {
8517                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8518                 }
8519                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8520                                                   tcg_res[pass], tcg_passres);
8521             } else if (accop > 0) {
8522                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8523             } else if (accop < 0) {
8524                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8525             }
8526
8527             if (accop != 0) {
8528                 tcg_temp_free_i64(tcg_passres);
8529             }
8530
8531             tcg_temp_free_i64(tcg_op1);
8532             tcg_temp_free_i64(tcg_op2);
8533         }
8534     } else {
8535         /* size 0 or 1, generally helper functions */
8536         for (pass = 0; pass < 2; pass++) {
8537             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8538             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8539             TCGv_i64 tcg_passres;
8540             int elt = pass + is_q * 2;
8541
8542             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8543             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8544
8545             if (accop == 0) {
8546                 tcg_passres = tcg_res[pass];
8547             } else {
8548                 tcg_passres = tcg_temp_new_i64();
8549             }
8550
8551             switch (opcode) {
8552             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8553             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8554             {
8555                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8556                 static NeonGenWidenFn * const widenfns[2][2] = {
8557                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8558                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8559                 };
8560                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8561
8562                 widenfn(tcg_op2_64, tcg_op2);
8563                 widenfn(tcg_passres, tcg_op1);
8564                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8565                               tcg_passres, tcg_op2_64);
8566                 tcg_temp_free_i64(tcg_op2_64);
8567                 break;
8568             }
8569             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8570             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8571                 if (size == 0) {
8572                     if (is_u) {
8573                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8574                     } else {
8575                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8576                     }
8577                 } else {
8578                     if (is_u) {
8579                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8580                     } else {
8581                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8582                     }
8583                 }
8584                 break;
8585             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8586             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8587             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8588                 if (size == 0) {
8589                     if (is_u) {
8590                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8591                     } else {
8592                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8593                     }
8594                 } else {
8595                     if (is_u) {
8596                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8597                     } else {
8598                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8599                     }
8600                 }
8601                 break;
8602             case 9: /* SQDMLAL, SQDMLAL2 */
8603             case 11: /* SQDMLSL, SQDMLSL2 */
8604             case 13: /* SQDMULL, SQDMULL2 */
8605                 assert(size == 1);
8606                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8607                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8608                                                   tcg_passres, tcg_passres);
8609                 break;
8610             case 14: /* PMULL */
8611                 assert(size == 0);
8612                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8613                 break;
8614             default:
8615                 g_assert_not_reached();
8616             }
8617             tcg_temp_free_i32(tcg_op1);
8618             tcg_temp_free_i32(tcg_op2);
8619
8620             if (accop != 0) {
8621                 if (opcode == 9 || opcode == 11) {
8622                     /* saturating accumulate ops */
8623                     if (accop < 0) {
8624                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8625                     }
8626                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8627                                                       tcg_res[pass],
8628                                                       tcg_passres);
8629                 } else {
8630                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8631                                   tcg_res[pass], tcg_passres);
8632                 }
8633                 tcg_temp_free_i64(tcg_passres);
8634             }
8635         }
8636     }
8637
8638     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8639     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8640     tcg_temp_free_i64(tcg_res[0]);
8641     tcg_temp_free_i64(tcg_res[1]);
8642 }
8643
8644 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8645                             int opcode, int rd, int rn, int rm)
8646 {
8647     TCGv_i64 tcg_res[2];
8648     int part = is_q ? 2 : 0;
8649     int pass;
8650
8651     for (pass = 0; pass < 2; pass++) {
8652         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8653         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8654         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8655         static NeonGenWidenFn * const widenfns[3][2] = {
8656             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8657             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8658             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8659         };
8660         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8661
8662         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8663         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8664         widenfn(tcg_op2_wide, tcg_op2);
8665         tcg_temp_free_i32(tcg_op2);
8666         tcg_res[pass] = tcg_temp_new_i64();
8667         gen_neon_addl(size, (opcode == 3),
8668                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8669         tcg_temp_free_i64(tcg_op1);
8670         tcg_temp_free_i64(tcg_op2_wide);
8671     }
8672
8673     for (pass = 0; pass < 2; pass++) {
8674         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8675         tcg_temp_free_i64(tcg_res[pass]);
8676     }
8677 }
8678
8679 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8680 {
8681     tcg_gen_addi_i64(in, in, 1U << 31);
8682     tcg_gen_extrh_i64_i32(res, in);
8683 }
8684
8685 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8686                                  int opcode, int rd, int rn, int rm)
8687 {
8688     TCGv_i32 tcg_res[2];
8689     int part = is_q ? 2 : 0;
8690     int pass;
8691
8692     for (pass = 0; pass < 2; pass++) {
8693         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8694         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8695         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8696         static NeonGenNarrowFn * const narrowfns[3][2] = {
8697             { gen_helper_neon_narrow_high_u8,
8698               gen_helper_neon_narrow_round_high_u8 },
8699             { gen_helper_neon_narrow_high_u16,
8700               gen_helper_neon_narrow_round_high_u16 },
8701             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8702         };
8703         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8704
8705         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8706         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8707
8708         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8709
8710         tcg_temp_free_i64(tcg_op1);
8711         tcg_temp_free_i64(tcg_op2);
8712
8713         tcg_res[pass] = tcg_temp_new_i32();
8714         gennarrow(tcg_res[pass], tcg_wideres);
8715         tcg_temp_free_i64(tcg_wideres);
8716     }
8717
8718     for (pass = 0; pass < 2; pass++) {
8719         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8720         tcg_temp_free_i32(tcg_res[pass]);
8721     }
8722     if (!is_q) {
8723         clear_vec_high(s, rd);
8724     }
8725 }
8726
8727 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8728 {
8729     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8730      * is the only three-reg-diff instruction which produces a
8731      * 128-bit wide result from a single operation. However since
8732      * it's possible to calculate the two halves more or less
8733      * separately we just use two helper calls.
8734      */
8735     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8736     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8737     TCGv_i64 tcg_res = tcg_temp_new_i64();
8738
8739     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8740     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8741     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8742     write_vec_element(s, tcg_res, rd, 0, MO_64);
8743     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8744     write_vec_element(s, tcg_res, rd, 1, MO_64);
8745
8746     tcg_temp_free_i64(tcg_op1);
8747     tcg_temp_free_i64(tcg_op2);
8748     tcg_temp_free_i64(tcg_res);
8749 }
8750
8751 /* C3.6.15 AdvSIMD three different
8752  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8753  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8754  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8755  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8756  */
8757 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8758 {
8759     /* Instructions in this group fall into three basic classes
8760      * (in each case with the operation working on each element in
8761      * the input vectors):
8762      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8763      *     128 bit input)
8764      * (2) wide 64 x 128 -> 128
8765      * (3) narrowing 128 x 128 -> 64
8766      * Here we do initial decode, catch unallocated cases and
8767      * dispatch to separate functions for each class.
8768      */
8769     int is_q = extract32(insn, 30, 1);
8770     int is_u = extract32(insn, 29, 1);
8771     int size = extract32(insn, 22, 2);
8772     int opcode = extract32(insn, 12, 4);
8773     int rm = extract32(insn, 16, 5);
8774     int rn = extract32(insn, 5, 5);
8775     int rd = extract32(insn, 0, 5);
8776
8777     switch (opcode) {
8778     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8779     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8780         /* 64 x 128 -> 128 */
8781         if (size == 3) {
8782             unallocated_encoding(s);
8783             return;
8784         }
8785         if (!fp_access_check(s)) {
8786             return;
8787         }
8788         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8789         break;
8790     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8791     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8792         /* 128 x 128 -> 64 */
8793         if (size == 3) {
8794             unallocated_encoding(s);
8795             return;
8796         }
8797         if (!fp_access_check(s)) {
8798             return;
8799         }
8800         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8801         break;
8802     case 14: /* PMULL, PMULL2 */
8803         if (is_u || size == 1 || size == 2) {
8804             unallocated_encoding(s);
8805             return;
8806         }
8807         if (size == 3) {
8808             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8809                 unallocated_encoding(s);
8810                 return;
8811             }
8812             if (!fp_access_check(s)) {
8813                 return;
8814             }
8815             handle_pmull_64(s, is_q, rd, rn, rm);
8816             return;
8817         }
8818         goto is_widening;
8819     case 9: /* SQDMLAL, SQDMLAL2 */
8820     case 11: /* SQDMLSL, SQDMLSL2 */
8821     case 13: /* SQDMULL, SQDMULL2 */
8822         if (is_u || size == 0) {
8823             unallocated_encoding(s);
8824             return;
8825         }
8826         /* fall through */
8827     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8828     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8829     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8830     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8831     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8832     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8833     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8834         /* 64 x 64 -> 128 */
8835         if (size == 3) {
8836             unallocated_encoding(s);
8837             return;
8838         }
8839     is_widening:
8840         if (!fp_access_check(s)) {
8841             return;
8842         }
8843
8844         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8845         break;
8846     default:
8847         /* opcode 15 not allocated */
8848         unallocated_encoding(s);
8849         break;
8850     }
8851 }
8852
8853 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8854 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8855 {
8856     int rd = extract32(insn, 0, 5);
8857     int rn = extract32(insn, 5, 5);
8858     int rm = extract32(insn, 16, 5);
8859     int size = extract32(insn, 22, 2);
8860     bool is_u = extract32(insn, 29, 1);
8861     bool is_q = extract32(insn, 30, 1);
8862     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8863     int pass;
8864
8865     if (!fp_access_check(s)) {
8866         return;
8867     }
8868
8869     tcg_op1 = tcg_temp_new_i64();
8870     tcg_op2 = tcg_temp_new_i64();
8871     tcg_res[0] = tcg_temp_new_i64();
8872     tcg_res[1] = tcg_temp_new_i64();
8873
8874     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8875         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8876         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8877
8878         if (!is_u) {
8879             switch (size) {
8880             case 0: /* AND */
8881                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8882                 break;
8883             case 1: /* BIC */
8884                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8885                 break;
8886             case 2: /* ORR */
8887                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8888                 break;
8889             case 3: /* ORN */
8890                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8891                 break;
8892             }
8893         } else {
8894             if (size != 0) {
8895                 /* B* ops need res loaded to operate on */
8896                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8897             }
8898
8899             switch (size) {
8900             case 0: /* EOR */
8901                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8902                 break;
8903             case 1: /* BSL bitwise select */
8904                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8905                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8906                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8907                 break;
8908             case 2: /* BIT, bitwise insert if true */
8909                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8910                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8911                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8912                 break;
8913             case 3: /* BIF, bitwise insert if false */
8914                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8915                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8916                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8917                 break;
8918             }
8919         }
8920     }
8921
8922     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8923     if (!is_q) {
8924         tcg_gen_movi_i64(tcg_res[1], 0);
8925     }
8926     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8927
8928     tcg_temp_free_i64(tcg_op1);
8929     tcg_temp_free_i64(tcg_op2);
8930     tcg_temp_free_i64(tcg_res[0]);
8931     tcg_temp_free_i64(tcg_res[1]);
8932 }
8933
8934 /* Helper functions for 32 bit comparisons */
8935 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8936 {
8937     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8938 }
8939
8940 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8941 {
8942     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8943 }
8944
8945 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8946 {
8947     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8948 }
8949
8950 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8951 {
8952     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8953 }
8954
8955 /* Pairwise op subgroup of C3.6.16.
8956  *
8957  * This is called directly or via the handle_3same_float for float pairwise
8958  * operations where the opcode and size are calculated differently.
8959  */
8960 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8961                                    int size, int rn, int rm, int rd)
8962 {
8963     TCGv_ptr fpst;
8964     int pass;
8965
8966     /* Floating point operations need fpst */
8967     if (opcode >= 0x58) {
8968         fpst = get_fpstatus_ptr();
8969     } else {
8970         TCGV_UNUSED_PTR(fpst);
8971     }
8972
8973     if (!fp_access_check(s)) {
8974         return;
8975     }
8976
8977     /* These operations work on the concatenated rm:rn, with each pair of
8978      * adjacent elements being operated on to produce an element in the result.
8979      */
8980     if (size == 3) {
8981         TCGv_i64 tcg_res[2];
8982
8983         for (pass = 0; pass < 2; pass++) {
8984             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8985             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8986             int passreg = (pass == 0) ? rn : rm;
8987
8988             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
8989             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
8990             tcg_res[pass] = tcg_temp_new_i64();
8991
8992             switch (opcode) {
8993             case 0x17: /* ADDP */
8994                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
8995                 break;
8996             case 0x58: /* FMAXNMP */
8997                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8998                 break;
8999             case 0x5a: /* FADDP */
9000                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9001                 break;
9002             case 0x5e: /* FMAXP */
9003                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9004                 break;
9005             case 0x78: /* FMINNMP */
9006                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9007                 break;
9008             case 0x7e: /* FMINP */
9009                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9010                 break;
9011             default:
9012                 g_assert_not_reached();
9013             }
9014
9015             tcg_temp_free_i64(tcg_op1);
9016             tcg_temp_free_i64(tcg_op2);
9017         }
9018
9019         for (pass = 0; pass < 2; pass++) {
9020             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9021             tcg_temp_free_i64(tcg_res[pass]);
9022         }
9023     } else {
9024         int maxpass = is_q ? 4 : 2;
9025         TCGv_i32 tcg_res[4];
9026
9027         for (pass = 0; pass < maxpass; pass++) {
9028             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9029             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9030             NeonGenTwoOpFn *genfn = NULL;
9031             int passreg = pass < (maxpass / 2) ? rn : rm;
9032             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9033
9034             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9035             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9036             tcg_res[pass] = tcg_temp_new_i32();
9037
9038             switch (opcode) {
9039             case 0x17: /* ADDP */
9040             {
9041                 static NeonGenTwoOpFn * const fns[3] = {
9042                     gen_helper_neon_padd_u8,
9043                     gen_helper_neon_padd_u16,
9044                     tcg_gen_add_i32,
9045                 };
9046                 genfn = fns[size];
9047                 break;
9048             }
9049             case 0x14: /* SMAXP, UMAXP */
9050             {
9051                 static NeonGenTwoOpFn * const fns[3][2] = {
9052                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9053                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9054                     { gen_max_s32, gen_max_u32 },
9055                 };
9056                 genfn = fns[size][u];
9057                 break;
9058             }
9059             case 0x15: /* SMINP, UMINP */
9060             {
9061                 static NeonGenTwoOpFn * const fns[3][2] = {
9062                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9063                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9064                     { gen_min_s32, gen_min_u32 },
9065                 };
9066                 genfn = fns[size][u];
9067                 break;
9068             }
9069             /* The FP operations are all on single floats (32 bit) */
9070             case 0x58: /* FMAXNMP */
9071                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9072                 break;
9073             case 0x5a: /* FADDP */
9074                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9075                 break;
9076             case 0x5e: /* FMAXP */
9077                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9078                 break;
9079             case 0x78: /* FMINNMP */
9080                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9081                 break;
9082             case 0x7e: /* FMINP */
9083                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9084                 break;
9085             default:
9086                 g_assert_not_reached();
9087             }
9088
9089             /* FP ops called directly, otherwise call now */
9090             if (genfn) {
9091                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9092             }
9093
9094             tcg_temp_free_i32(tcg_op1);
9095             tcg_temp_free_i32(tcg_op2);
9096         }
9097
9098         for (pass = 0; pass < maxpass; pass++) {
9099             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9100             tcg_temp_free_i32(tcg_res[pass]);
9101         }
9102         if (!is_q) {
9103             clear_vec_high(s, rd);
9104         }
9105     }
9106
9107     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9108         tcg_temp_free_ptr(fpst);
9109     }
9110 }
9111
9112 /* Floating point op subgroup of C3.6.16. */
9113 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9114 {
9115     /* For floating point ops, the U, size[1] and opcode bits
9116      * together indicate the operation. size[0] indicates single
9117      * or double.
9118      */
9119     int fpopcode = extract32(insn, 11, 5)
9120         | (extract32(insn, 23, 1) << 5)
9121         | (extract32(insn, 29, 1) << 6);
9122     int is_q = extract32(insn, 30, 1);
9123     int size = extract32(insn, 22, 1);
9124     int rm = extract32(insn, 16, 5);
9125     int rn = extract32(insn, 5, 5);
9126     int rd = extract32(insn, 0, 5);
9127
9128     int datasize = is_q ? 128 : 64;
9129     int esize = 32 << size;
9130     int elements = datasize / esize;
9131
9132     if (size == 1 && !is_q) {
9133         unallocated_encoding(s);
9134         return;
9135     }
9136
9137     switch (fpopcode) {
9138     case 0x58: /* FMAXNMP */
9139     case 0x5a: /* FADDP */
9140     case 0x5e: /* FMAXP */
9141     case 0x78: /* FMINNMP */
9142     case 0x7e: /* FMINP */
9143         if (size && !is_q) {
9144             unallocated_encoding(s);
9145             return;
9146         }
9147         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9148                                rn, rm, rd);
9149         return;
9150     case 0x1b: /* FMULX */
9151     case 0x1f: /* FRECPS */
9152     case 0x3f: /* FRSQRTS */
9153     case 0x5d: /* FACGE */
9154     case 0x7d: /* FACGT */
9155     case 0x19: /* FMLA */
9156     case 0x39: /* FMLS */
9157     case 0x18: /* FMAXNM */
9158     case 0x1a: /* FADD */
9159     case 0x1c: /* FCMEQ */
9160     case 0x1e: /* FMAX */
9161     case 0x38: /* FMINNM */
9162     case 0x3a: /* FSUB */
9163     case 0x3e: /* FMIN */
9164     case 0x5b: /* FMUL */
9165     case 0x5c: /* FCMGE */
9166     case 0x5f: /* FDIV */
9167     case 0x7a: /* FABD */
9168     case 0x7c: /* FCMGT */
9169         if (!fp_access_check(s)) {
9170             return;
9171         }
9172
9173         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9174         return;
9175     default:
9176         unallocated_encoding(s);
9177         return;
9178     }
9179 }
9180
9181 /* Integer op subgroup of C3.6.16. */
9182 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9183 {
9184     int is_q = extract32(insn, 30, 1);
9185     int u = extract32(insn, 29, 1);
9186     int size = extract32(insn, 22, 2);
9187     int opcode = extract32(insn, 11, 5);
9188     int rm = extract32(insn, 16, 5);
9189     int rn = extract32(insn, 5, 5);
9190     int rd = extract32(insn, 0, 5);
9191     int pass;
9192
9193     switch (opcode) {
9194     case 0x13: /* MUL, PMUL */
9195         if (u && size != 0) {
9196             unallocated_encoding(s);
9197             return;
9198         }
9199         /* fall through */
9200     case 0x0: /* SHADD, UHADD */
9201     case 0x2: /* SRHADD, URHADD */
9202     case 0x4: /* SHSUB, UHSUB */
9203     case 0xc: /* SMAX, UMAX */
9204     case 0xd: /* SMIN, UMIN */
9205     case 0xe: /* SABD, UABD */
9206     case 0xf: /* SABA, UABA */
9207     case 0x12: /* MLA, MLS */
9208         if (size == 3) {
9209             unallocated_encoding(s);
9210             return;
9211         }
9212         break;
9213     case 0x16: /* SQDMULH, SQRDMULH */
9214         if (size == 0 || size == 3) {
9215             unallocated_encoding(s);
9216             return;
9217         }
9218         break;
9219     default:
9220         if (size == 3 && !is_q) {
9221             unallocated_encoding(s);
9222             return;
9223         }
9224         break;
9225     }
9226
9227     if (!fp_access_check(s)) {
9228         return;
9229     }
9230
9231     if (size == 3) {
9232         assert(is_q);
9233         for (pass = 0; pass < 2; pass++) {
9234             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9235             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9236             TCGv_i64 tcg_res = tcg_temp_new_i64();
9237
9238             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9239             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9240
9241             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9242
9243             write_vec_element(s, tcg_res, rd, pass, MO_64);
9244
9245             tcg_temp_free_i64(tcg_res);
9246             tcg_temp_free_i64(tcg_op1);
9247             tcg_temp_free_i64(tcg_op2);
9248         }
9249     } else {
9250         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9251             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9252             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9253             TCGv_i32 tcg_res = tcg_temp_new_i32();
9254             NeonGenTwoOpFn *genfn = NULL;
9255             NeonGenTwoOpEnvFn *genenvfn = NULL;
9256
9257             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9258             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9259
9260             switch (opcode) {
9261             case 0x0: /* SHADD, UHADD */
9262             {
9263                 static NeonGenTwoOpFn * const fns[3][2] = {
9264                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9265                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9266                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9267                 };
9268                 genfn = fns[size][u];
9269                 break;
9270             }
9271             case 0x1: /* SQADD, UQADD */
9272             {
9273                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9274                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9275                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9276                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9277                 };
9278                 genenvfn = fns[size][u];
9279                 break;
9280             }
9281             case 0x2: /* SRHADD, URHADD */
9282             {
9283                 static NeonGenTwoOpFn * const fns[3][2] = {
9284                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9285                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9286                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9287                 };
9288                 genfn = fns[size][u];
9289                 break;
9290             }
9291             case 0x4: /* SHSUB, UHSUB */
9292             {
9293                 static NeonGenTwoOpFn * const fns[3][2] = {
9294                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9295                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9296                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9297                 };
9298                 genfn = fns[size][u];
9299                 break;
9300             }
9301             case 0x5: /* SQSUB, UQSUB */
9302             {
9303                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9304                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9305                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9306                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9307                 };
9308                 genenvfn = fns[size][u];
9309                 break;
9310             }
9311             case 0x6: /* CMGT, CMHI */
9312             {
9313                 static NeonGenTwoOpFn * const fns[3][2] = {
9314                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9315                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9316                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9317                 };
9318                 genfn = fns[size][u];
9319                 break;
9320             }
9321             case 0x7: /* CMGE, CMHS */
9322             {
9323                 static NeonGenTwoOpFn * const fns[3][2] = {
9324                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9325                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9326                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9327                 };
9328                 genfn = fns[size][u];
9329                 break;
9330             }
9331             case 0x8: /* SSHL, USHL */
9332             {
9333                 static NeonGenTwoOpFn * const fns[3][2] = {
9334                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9335                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9336                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9337                 };
9338                 genfn = fns[size][u];
9339                 break;
9340             }
9341             case 0x9: /* SQSHL, UQSHL */
9342             {
9343                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9344                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9345                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9346                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9347                 };
9348                 genenvfn = fns[size][u];
9349                 break;
9350             }
9351             case 0xa: /* SRSHL, URSHL */
9352             {
9353                 static NeonGenTwoOpFn * const fns[3][2] = {
9354                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9355                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9356                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9357                 };
9358                 genfn = fns[size][u];
9359                 break;
9360             }
9361             case 0xb: /* SQRSHL, UQRSHL */
9362             {
9363                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9364                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9365                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9366                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9367                 };
9368                 genenvfn = fns[size][u];
9369                 break;
9370             }
9371             case 0xc: /* SMAX, UMAX */
9372             {
9373                 static NeonGenTwoOpFn * const fns[3][2] = {
9374                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9375                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9376                     { gen_max_s32, gen_max_u32 },
9377                 };
9378                 genfn = fns[size][u];
9379                 break;
9380             }
9381
9382             case 0xd: /* SMIN, UMIN */
9383             {
9384                 static NeonGenTwoOpFn * const fns[3][2] = {
9385                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9386                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9387                     { gen_min_s32, gen_min_u32 },
9388                 };
9389                 genfn = fns[size][u];
9390                 break;
9391             }
9392             case 0xe: /* SABD, UABD */
9393             case 0xf: /* SABA, UABA */
9394             {
9395                 static NeonGenTwoOpFn * const fns[3][2] = {
9396                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9397                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9398                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9399                 };
9400                 genfn = fns[size][u];
9401                 break;
9402             }
9403             case 0x10: /* ADD, SUB */
9404             {
9405                 static NeonGenTwoOpFn * const fns[3][2] = {
9406                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9407                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9408                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9409                 };
9410                 genfn = fns[size][u];
9411                 break;
9412             }
9413             case 0x11: /* CMTST, CMEQ */
9414             {
9415                 static NeonGenTwoOpFn * const fns[3][2] = {
9416                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9417                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9418                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9419                 };
9420                 genfn = fns[size][u];
9421                 break;
9422             }
9423             case 0x13: /* MUL, PMUL */
9424                 if (u) {
9425                     /* PMUL */
9426                     assert(size == 0);
9427                     genfn = gen_helper_neon_mul_p8;
9428                     break;
9429                 }
9430                 /* fall through : MUL */
9431             case 0x12: /* MLA, MLS */
9432             {
9433                 static NeonGenTwoOpFn * const fns[3] = {
9434                     gen_helper_neon_mul_u8,
9435                     gen_helper_neon_mul_u16,
9436                     tcg_gen_mul_i32,
9437                 };
9438                 genfn = fns[size];
9439                 break;
9440             }
9441             case 0x16: /* SQDMULH, SQRDMULH */
9442             {
9443                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9444                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9445                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9446                 };
9447                 assert(size == 1 || size == 2);
9448                 genenvfn = fns[size - 1][u];
9449                 break;
9450             }
9451             default:
9452                 g_assert_not_reached();
9453             }
9454
9455             if (genenvfn) {
9456                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9457             } else {
9458                 genfn(tcg_res, tcg_op1, tcg_op2);
9459             }
9460
9461             if (opcode == 0xf || opcode == 0x12) {
9462                 /* SABA, UABA, MLA, MLS: accumulating ops */
9463                 static NeonGenTwoOpFn * const fns[3][2] = {
9464                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9465                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9466                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9467                 };
9468                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9469
9470                 genfn = fns[size][is_sub];
9471                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9472                 genfn(tcg_res, tcg_op1, tcg_res);
9473             }
9474
9475             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9476
9477             tcg_temp_free_i32(tcg_res);
9478             tcg_temp_free_i32(tcg_op1);
9479             tcg_temp_free_i32(tcg_op2);
9480         }
9481     }
9482
9483     if (!is_q) {
9484         clear_vec_high(s, rd);
9485     }
9486 }
9487
9488 /* C3.6.16 AdvSIMD three same
9489  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9490  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9491  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9492  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9493  */
9494 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9495 {
9496     int opcode = extract32(insn, 11, 5);
9497
9498     switch (opcode) {
9499     case 0x3: /* logic ops */
9500         disas_simd_3same_logic(s, insn);
9501         break;
9502     case 0x17: /* ADDP */
9503     case 0x14: /* SMAXP, UMAXP */
9504     case 0x15: /* SMINP, UMINP */
9505     {
9506         /* Pairwise operations */
9507         int is_q = extract32(insn, 30, 1);
9508         int u = extract32(insn, 29, 1);
9509         int size = extract32(insn, 22, 2);
9510         int rm = extract32(insn, 16, 5);
9511         int rn = extract32(insn, 5, 5);
9512         int rd = extract32(insn, 0, 5);
9513         if (opcode == 0x17) {
9514             if (u || (size == 3 && !is_q)) {
9515                 unallocated_encoding(s);
9516                 return;
9517             }
9518         } else {
9519             if (size == 3) {
9520                 unallocated_encoding(s);
9521                 return;
9522             }
9523         }
9524         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9525         break;
9526     }
9527     case 0x18 ... 0x31:
9528         /* floating point ops, sz[1] and U are part of opcode */
9529         disas_simd_3same_float(s, insn);
9530         break;
9531     default:
9532         disas_simd_3same_int(s, insn);
9533         break;
9534     }
9535 }
9536
9537 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9538                                   int size, int rn, int rd)
9539 {
9540     /* Handle 2-reg-misc ops which are widening (so each size element
9541      * in the source becomes a 2*size element in the destination.
9542      * The only instruction like this is FCVTL.
9543      */
9544     int pass;
9545
9546     if (size == 3) {
9547         /* 32 -> 64 bit fp conversion */
9548         TCGv_i64 tcg_res[2];
9549         int srcelt = is_q ? 2 : 0;
9550
9551         for (pass = 0; pass < 2; pass++) {
9552             TCGv_i32 tcg_op = tcg_temp_new_i32();
9553             tcg_res[pass] = tcg_temp_new_i64();
9554
9555             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9556             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9557             tcg_temp_free_i32(tcg_op);
9558         }
9559         for (pass = 0; pass < 2; pass++) {
9560             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9561             tcg_temp_free_i64(tcg_res[pass]);
9562         }
9563     } else {
9564         /* 16 -> 32 bit fp conversion */
9565         int srcelt = is_q ? 4 : 0;
9566         TCGv_i32 tcg_res[4];
9567
9568         for (pass = 0; pass < 4; pass++) {
9569             tcg_res[pass] = tcg_temp_new_i32();
9570
9571             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9572             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9573                                            cpu_env);
9574         }
9575         for (pass = 0; pass < 4; pass++) {
9576             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9577             tcg_temp_free_i32(tcg_res[pass]);
9578         }
9579     }
9580 }
9581
9582 static void handle_rev(DisasContext *s, int opcode, bool u,
9583                        bool is_q, int size, int rn, int rd)
9584 {
9585     int op = (opcode << 1) | u;
9586     int opsz = op + size;
9587     int grp_size = 3 - opsz;
9588     int dsize = is_q ? 128 : 64;
9589     int i;
9590
9591     if (opsz >= 3) {
9592         unallocated_encoding(s);
9593         return;
9594     }
9595
9596     if (!fp_access_check(s)) {
9597         return;
9598     }
9599
9600     if (size == 0) {
9601         /* Special case bytes, use bswap op on each group of elements */
9602         int groups = dsize / (8 << grp_size);
9603
9604         for (i = 0; i < groups; i++) {
9605             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9606
9607             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9608             switch (grp_size) {
9609             case MO_16:
9610                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9611                 break;
9612             case MO_32:
9613                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9614                 break;
9615             case MO_64:
9616                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9617                 break;
9618             default:
9619                 g_assert_not_reached();
9620             }
9621             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9622             tcg_temp_free_i64(tcg_tmp);
9623         }
9624         if (!is_q) {
9625             clear_vec_high(s, rd);
9626         }
9627     } else {
9628         int revmask = (1 << grp_size) - 1;
9629         int esize = 8 << size;
9630         int elements = dsize / esize;
9631         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9632         TCGv_i64 tcg_rd = tcg_const_i64(0);
9633         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9634
9635         for (i = 0; i < elements; i++) {
9636             int e_rev = (i & 0xf) ^ revmask;
9637             int off = e_rev * esize;
9638             read_vec_element(s, tcg_rn, rn, i, size);
9639             if (off >= 64) {
9640                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9641                                     tcg_rn, off - 64, esize);
9642             } else {
9643                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9644             }
9645         }
9646         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9647         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9648
9649         tcg_temp_free_i64(tcg_rd_hi);
9650         tcg_temp_free_i64(tcg_rd);
9651         tcg_temp_free_i64(tcg_rn);
9652     }
9653 }
9654
9655 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9656                                   bool is_q, int size, int rn, int rd)
9657 {
9658     /* Implement the pairwise operations from 2-misc:
9659      * SADDLP, UADDLP, SADALP, UADALP.
9660      * These all add pairs of elements in the input to produce a
9661      * double-width result element in the output (possibly accumulating).
9662      */
9663     bool accum = (opcode == 0x6);
9664     int maxpass = is_q ? 2 : 1;
9665     int pass;
9666     TCGv_i64 tcg_res[2];
9667
9668     if (size == 2) {
9669         /* 32 + 32 -> 64 op */
9670         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9671
9672         for (pass = 0; pass < maxpass; pass++) {
9673             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9674             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9675
9676             tcg_res[pass] = tcg_temp_new_i64();
9677
9678             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9679             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9680             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9681             if (accum) {
9682                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9683                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9684             }
9685
9686             tcg_temp_free_i64(tcg_op1);
9687             tcg_temp_free_i64(tcg_op2);
9688         }
9689     } else {
9690         for (pass = 0; pass < maxpass; pass++) {
9691             TCGv_i64 tcg_op = tcg_temp_new_i64();
9692             NeonGenOneOpFn *genfn;
9693             static NeonGenOneOpFn * const fns[2][2] = {
9694                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9695                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9696             };
9697
9698             genfn = fns[size][u];
9699
9700             tcg_res[pass] = tcg_temp_new_i64();
9701
9702             read_vec_element(s, tcg_op, rn, pass, MO_64);
9703             genfn(tcg_res[pass], tcg_op);
9704
9705             if (accum) {
9706                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9707                 if (size == 0) {
9708                     gen_helper_neon_addl_u16(tcg_res[pass],
9709                                              tcg_res[pass], tcg_op);
9710                 } else {
9711                     gen_helper_neon_addl_u32(tcg_res[pass],
9712                                              tcg_res[pass], tcg_op);
9713                 }
9714             }
9715             tcg_temp_free_i64(tcg_op);
9716         }
9717     }
9718     if (!is_q) {
9719         tcg_res[1] = tcg_const_i64(0);
9720     }
9721     for (pass = 0; pass < 2; pass++) {
9722         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9723         tcg_temp_free_i64(tcg_res[pass]);
9724     }
9725 }
9726
9727 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9728 {
9729     /* Implement SHLL and SHLL2 */
9730     int pass;
9731     int part = is_q ? 2 : 0;
9732     TCGv_i64 tcg_res[2];
9733
9734     for (pass = 0; pass < 2; pass++) {
9735         static NeonGenWidenFn * const widenfns[3] = {
9736             gen_helper_neon_widen_u8,
9737             gen_helper_neon_widen_u16,
9738             tcg_gen_extu_i32_i64,
9739         };
9740         NeonGenWidenFn *widenfn = widenfns[size];
9741         TCGv_i32 tcg_op = tcg_temp_new_i32();
9742
9743         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9744         tcg_res[pass] = tcg_temp_new_i64();
9745         widenfn(tcg_res[pass], tcg_op);
9746         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9747
9748         tcg_temp_free_i32(tcg_op);
9749     }
9750
9751     for (pass = 0; pass < 2; pass++) {
9752         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9753         tcg_temp_free_i64(tcg_res[pass]);
9754     }
9755 }
9756
9757 /* C3.6.17 AdvSIMD two reg misc
9758  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9759  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9760  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9761  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9762  */
9763 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9764 {
9765     int size = extract32(insn, 22, 2);
9766     int opcode = extract32(insn, 12, 5);
9767     bool u = extract32(insn, 29, 1);
9768     bool is_q = extract32(insn, 30, 1);
9769     int rn = extract32(insn, 5, 5);
9770     int rd = extract32(insn, 0, 5);
9771     bool need_fpstatus = false;
9772     bool need_rmode = false;
9773     int rmode = -1;
9774     TCGv_i32 tcg_rmode;
9775     TCGv_ptr tcg_fpstatus;
9776
9777     switch (opcode) {
9778     case 0x0: /* REV64, REV32 */
9779     case 0x1: /* REV16 */
9780         handle_rev(s, opcode, u, is_q, size, rn, rd);
9781         return;
9782     case 0x5: /* CNT, NOT, RBIT */
9783         if (u && size == 0) {
9784             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9785             size = 3;
9786             break;
9787         } else if (u && size == 1) {
9788             /* RBIT */
9789             break;
9790         } else if (!u && size == 0) {
9791             /* CNT */
9792             break;
9793         }
9794         unallocated_encoding(s);
9795         return;
9796     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9797     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9798         if (size == 3) {
9799             unallocated_encoding(s);
9800             return;
9801         }
9802         if (!fp_access_check(s)) {
9803             return;
9804         }
9805
9806         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9807         return;
9808     case 0x4: /* CLS, CLZ */
9809         if (size == 3) {
9810             unallocated_encoding(s);
9811             return;
9812         }
9813         break;
9814     case 0x2: /* SADDLP, UADDLP */
9815     case 0x6: /* SADALP, UADALP */
9816         if (size == 3) {
9817             unallocated_encoding(s);
9818             return;
9819         }
9820         if (!fp_access_check(s)) {
9821             return;
9822         }
9823         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9824         return;
9825     case 0x13: /* SHLL, SHLL2 */
9826         if (u == 0 || size == 3) {
9827             unallocated_encoding(s);
9828             return;
9829         }
9830         if (!fp_access_check(s)) {
9831             return;
9832         }
9833         handle_shll(s, is_q, size, rn, rd);
9834         return;
9835     case 0xa: /* CMLT */
9836         if (u == 1) {
9837             unallocated_encoding(s);
9838             return;
9839         }
9840         /* fall through */
9841     case 0x8: /* CMGT, CMGE */
9842     case 0x9: /* CMEQ, CMLE */
9843     case 0xb: /* ABS, NEG */
9844         if (size == 3 && !is_q) {
9845             unallocated_encoding(s);
9846             return;
9847         }
9848         break;
9849     case 0x3: /* SUQADD, USQADD */
9850         if (size == 3 && !is_q) {
9851             unallocated_encoding(s);
9852             return;
9853         }
9854         if (!fp_access_check(s)) {
9855             return;
9856         }
9857         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9858         return;
9859     case 0x7: /* SQABS, SQNEG */
9860         if (size == 3 && !is_q) {
9861             unallocated_encoding(s);
9862             return;
9863         }
9864         break;
9865     case 0xc ... 0xf:
9866     case 0x16 ... 0x1d:
9867     case 0x1f:
9868     {
9869         /* Floating point: U, size[1] and opcode indicate operation;
9870          * size[0] indicates single or double precision.
9871          */
9872         int is_double = extract32(size, 0, 1);
9873         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9874         size = is_double ? 3 : 2;
9875         switch (opcode) {
9876         case 0x2f: /* FABS */
9877         case 0x6f: /* FNEG */
9878             if (size == 3 && !is_q) {
9879                 unallocated_encoding(s);
9880                 return;
9881             }
9882             break;
9883         case 0x1d: /* SCVTF */
9884         case 0x5d: /* UCVTF */
9885         {
9886             bool is_signed = (opcode == 0x1d) ? true : false;
9887             int elements = is_double ? 2 : is_q ? 4 : 2;
9888             if (is_double && !is_q) {
9889                 unallocated_encoding(s);
9890                 return;
9891             }
9892             if (!fp_access_check(s)) {
9893                 return;
9894             }
9895             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9896             return;
9897         }
9898         case 0x2c: /* FCMGT (zero) */
9899         case 0x2d: /* FCMEQ (zero) */
9900         case 0x2e: /* FCMLT (zero) */
9901         case 0x6c: /* FCMGE (zero) */
9902         case 0x6d: /* FCMLE (zero) */
9903             if (size == 3 && !is_q) {
9904                 unallocated_encoding(s);
9905                 return;
9906             }
9907             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9908             return;
9909         case 0x7f: /* FSQRT */
9910             if (size == 3 && !is_q) {
9911                 unallocated_encoding(s);
9912                 return;
9913             }
9914             break;
9915         case 0x1a: /* FCVTNS */
9916         case 0x1b: /* FCVTMS */
9917         case 0x3a: /* FCVTPS */
9918         case 0x3b: /* FCVTZS */
9919         case 0x5a: /* FCVTNU */
9920         case 0x5b: /* FCVTMU */
9921         case 0x7a: /* FCVTPU */
9922         case 0x7b: /* FCVTZU */
9923             need_fpstatus = true;
9924             need_rmode = true;
9925             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9926             if (size == 3 && !is_q) {
9927                 unallocated_encoding(s);
9928                 return;
9929             }
9930             break;
9931         case 0x5c: /* FCVTAU */
9932         case 0x1c: /* FCVTAS */
9933             need_fpstatus = true;
9934             need_rmode = true;
9935             rmode = FPROUNDING_TIEAWAY;
9936             if (size == 3 && !is_q) {
9937                 unallocated_encoding(s);
9938                 return;
9939             }
9940             break;
9941         case 0x3c: /* URECPE */
9942             if (size == 3) {
9943                 unallocated_encoding(s);
9944                 return;
9945             }
9946             /* fall through */
9947         case 0x3d: /* FRECPE */
9948         case 0x7d: /* FRSQRTE */
9949             if (size == 3 && !is_q) {
9950                 unallocated_encoding(s);
9951                 return;
9952             }
9953             if (!fp_access_check(s)) {
9954                 return;
9955             }
9956             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9957             return;
9958         case 0x56: /* FCVTXN, FCVTXN2 */
9959             if (size == 2) {
9960                 unallocated_encoding(s);
9961                 return;
9962             }
9963             /* fall through */
9964         case 0x16: /* FCVTN, FCVTN2 */
9965             /* handle_2misc_narrow does a 2*size -> size operation, but these
9966              * instructions encode the source size rather than dest size.
9967              */
9968             if (!fp_access_check(s)) {
9969                 return;
9970             }
9971             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9972             return;
9973         case 0x17: /* FCVTL, FCVTL2 */
9974             if (!fp_access_check(s)) {
9975                 return;
9976             }
9977             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9978             return;
9979         case 0x18: /* FRINTN */
9980         case 0x19: /* FRINTM */
9981         case 0x38: /* FRINTP */
9982         case 0x39: /* FRINTZ */
9983             need_rmode = true;
9984             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9985             /* fall through */
9986         case 0x59: /* FRINTX */
9987         case 0x79: /* FRINTI */
9988             need_fpstatus = true;
9989             if (size == 3 && !is_q) {
9990                 unallocated_encoding(s);
9991                 return;
9992             }
9993             break;
9994         case 0x58: /* FRINTA */
9995             need_rmode = true;
9996             rmode = FPROUNDING_TIEAWAY;
9997             need_fpstatus = true;
9998             if (size == 3 && !is_q) {
9999                 unallocated_encoding(s);
10000                 return;
10001             }
10002             break;
10003         case 0x7c: /* URSQRTE */
10004             if (size == 3) {
10005                 unallocated_encoding(s);
10006                 return;
10007             }
10008             need_fpstatus = true;
10009             break;
10010         default:
10011             unallocated_encoding(s);
10012             return;
10013         }
10014         break;
10015     }
10016     default:
10017         unallocated_encoding(s);
10018         return;
10019     }
10020
10021     if (!fp_access_check(s)) {
10022         return;
10023     }
10024
10025     if (need_fpstatus) {
10026         tcg_fpstatus = get_fpstatus_ptr();
10027     } else {
10028         TCGV_UNUSED_PTR(tcg_fpstatus);
10029     }
10030     if (need_rmode) {
10031         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10032         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10033     } else {
10034         TCGV_UNUSED_I32(tcg_rmode);
10035     }
10036
10037     if (size == 3) {
10038         /* All 64-bit element operations can be shared with scalar 2misc */
10039         int pass;
10040
10041         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10042             TCGv_i64 tcg_op = tcg_temp_new_i64();
10043             TCGv_i64 tcg_res = tcg_temp_new_i64();
10044
10045             read_vec_element(s, tcg_op, rn, pass, MO_64);
10046
10047             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10048                             tcg_rmode, tcg_fpstatus);
10049
10050             write_vec_element(s, tcg_res, rd, pass, MO_64);
10051
10052             tcg_temp_free_i64(tcg_res);
10053             tcg_temp_free_i64(tcg_op);
10054         }
10055     } else {
10056         int pass;
10057
10058         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10059             TCGv_i32 tcg_op = tcg_temp_new_i32();
10060             TCGv_i32 tcg_res = tcg_temp_new_i32();
10061             TCGCond cond;
10062
10063             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10064
10065             if (size == 2) {
10066                 /* Special cases for 32 bit elements */
10067                 switch (opcode) {
10068                 case 0xa: /* CMLT */
10069                     /* 32 bit integer comparison against zero, result is
10070                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10071                      * and inverting.
10072                      */
10073                     cond = TCG_COND_LT;
10074                 do_cmop:
10075                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10076                     tcg_gen_neg_i32(tcg_res, tcg_res);
10077                     break;
10078                 case 0x8: /* CMGT, CMGE */
10079                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10080                     goto do_cmop;
10081                 case 0x9: /* CMEQ, CMLE */
10082                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10083                     goto do_cmop;
10084                 case 0x4: /* CLS */
10085                     if (u) {
10086                         gen_helper_clz32(tcg_res, tcg_op);
10087                     } else {
10088                         gen_helper_cls32(tcg_res, tcg_op);
10089                     }
10090                     break;
10091                 case 0x7: /* SQABS, SQNEG */
10092                     if (u) {
10093                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10094                     } else {
10095                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10096                     }
10097                     break;
10098                 case 0xb: /* ABS, NEG */
10099                     if (u) {
10100                         tcg_gen_neg_i32(tcg_res, tcg_op);
10101                     } else {
10102                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10103                         tcg_gen_neg_i32(tcg_res, tcg_op);
10104                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10105                                             tcg_zero, tcg_op, tcg_res);
10106                         tcg_temp_free_i32(tcg_zero);
10107                     }
10108                     break;
10109                 case 0x2f: /* FABS */
10110                     gen_helper_vfp_abss(tcg_res, tcg_op);
10111                     break;
10112                 case 0x6f: /* FNEG */
10113                     gen_helper_vfp_negs(tcg_res, tcg_op);
10114                     break;
10115                 case 0x7f: /* FSQRT */
10116                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10117                     break;
10118                 case 0x1a: /* FCVTNS */
10119                 case 0x1b: /* FCVTMS */
10120                 case 0x1c: /* FCVTAS */
10121                 case 0x3a: /* FCVTPS */
10122                 case 0x3b: /* FCVTZS */
10123                 {
10124                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10125                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10126                                          tcg_shift, tcg_fpstatus);
10127                     tcg_temp_free_i32(tcg_shift);
10128                     break;
10129                 }
10130                 case 0x5a: /* FCVTNU */
10131                 case 0x5b: /* FCVTMU */
10132                 case 0x5c: /* FCVTAU */
10133                 case 0x7a: /* FCVTPU */
10134                 case 0x7b: /* FCVTZU */
10135                 {
10136                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10137                     gen_helper_vfp_touls(tcg_res, tcg_op,
10138                                          tcg_shift, tcg_fpstatus);
10139                     tcg_temp_free_i32(tcg_shift);
10140                     break;
10141                 }
10142                 case 0x18: /* FRINTN */
10143                 case 0x19: /* FRINTM */
10144                 case 0x38: /* FRINTP */
10145                 case 0x39: /* FRINTZ */
10146                 case 0x58: /* FRINTA */
10147                 case 0x79: /* FRINTI */
10148                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10149                     break;
10150                 case 0x59: /* FRINTX */
10151                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10152                     break;
10153                 case 0x7c: /* URSQRTE */
10154                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10155                     break;
10156                 default:
10157                     g_assert_not_reached();
10158                 }
10159             } else {
10160                 /* Use helpers for 8 and 16 bit elements */
10161                 switch (opcode) {
10162                 case 0x5: /* CNT, RBIT */
10163                     /* For these two insns size is part of the opcode specifier
10164                      * (handled earlier); they always operate on byte elements.
10165                      */
10166                     if (u) {
10167                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10168                     } else {
10169                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10170                     }
10171                     break;
10172                 case 0x7: /* SQABS, SQNEG */
10173                 {
10174                     NeonGenOneOpEnvFn *genfn;
10175                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10176                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10177                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10178                     };
10179                     genfn = fns[size][u];
10180                     genfn(tcg_res, cpu_env, tcg_op);
10181                     break;
10182                 }
10183                 case 0x8: /* CMGT, CMGE */
10184                 case 0x9: /* CMEQ, CMLE */
10185                 case 0xa: /* CMLT */
10186                 {
10187                     static NeonGenTwoOpFn * const fns[3][2] = {
10188                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10189                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10190                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10191                     };
10192                     NeonGenTwoOpFn *genfn;
10193                     int comp;
10194                     bool reverse;
10195                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10196
10197                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10198                     comp = (opcode - 0x8) * 2 + u;
10199                     /* ...but LE, LT are implemented as reverse GE, GT */
10200                     reverse = (comp > 2);
10201                     if (reverse) {
10202                         comp = 4 - comp;
10203                     }
10204                     genfn = fns[comp][size];
10205                     if (reverse) {
10206                         genfn(tcg_res, tcg_zero, tcg_op);
10207                     } else {
10208                         genfn(tcg_res, tcg_op, tcg_zero);
10209                     }
10210                     tcg_temp_free_i32(tcg_zero);
10211                     break;
10212                 }
10213                 case 0xb: /* ABS, NEG */
10214                     if (u) {
10215                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10216                         if (size) {
10217                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10218                         } else {
10219                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10220                         }
10221                         tcg_temp_free_i32(tcg_zero);
10222                     } else {
10223                         if (size) {
10224                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10225                         } else {
10226                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10227                         }
10228                     }
10229                     break;
10230                 case 0x4: /* CLS, CLZ */
10231                     if (u) {
10232                         if (size == 0) {
10233                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10234                         } else {
10235                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10236                         }
10237                     } else {
10238                         if (size == 0) {
10239                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10240                         } else {
10241                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10242                         }
10243                     }
10244                     break;
10245                 default:
10246                     g_assert_not_reached();
10247                 }
10248             }
10249
10250             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10251
10252             tcg_temp_free_i32(tcg_res);
10253             tcg_temp_free_i32(tcg_op);
10254         }
10255     }
10256     if (!is_q) {
10257         clear_vec_high(s, rd);
10258     }
10259
10260     if (need_rmode) {
10261         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10262         tcg_temp_free_i32(tcg_rmode);
10263     }
10264     if (need_fpstatus) {
10265         tcg_temp_free_ptr(tcg_fpstatus);
10266     }
10267 }
10268
10269 /* C3.6.13 AdvSIMD scalar x indexed element
10270  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10271  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10272  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10273  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10274  * C3.6.18 AdvSIMD vector x indexed element
10275  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10276  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10277  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10278  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10279  */
10280 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10281 {
10282     /* This encoding has two kinds of instruction:
10283      *  normal, where we perform elt x idxelt => elt for each
10284      *     element in the vector
10285      *  long, where we perform elt x idxelt and generate a result of
10286      *     double the width of the input element
10287      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10288      */
10289     bool is_scalar = extract32(insn, 28, 1);
10290     bool is_q = extract32(insn, 30, 1);
10291     bool u = extract32(insn, 29, 1);
10292     int size = extract32(insn, 22, 2);
10293     int l = extract32(insn, 21, 1);
10294     int m = extract32(insn, 20, 1);
10295     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10296     int rm = extract32(insn, 16, 4);
10297     int opcode = extract32(insn, 12, 4);
10298     int h = extract32(insn, 11, 1);
10299     int rn = extract32(insn, 5, 5);
10300     int rd = extract32(insn, 0, 5);
10301     bool is_long = false;
10302     bool is_fp = false;
10303     int index;
10304     TCGv_ptr fpst;
10305
10306     switch (opcode) {
10307     case 0x0: /* MLA */
10308     case 0x4: /* MLS */
10309         if (!u || is_scalar) {
10310             unallocated_encoding(s);
10311             return;
10312         }
10313         break;
10314     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10315     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10316     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10317         if (is_scalar) {
10318             unallocated_encoding(s);
10319             return;
10320         }
10321         is_long = true;
10322         break;
10323     case 0x3: /* SQDMLAL, SQDMLAL2 */
10324     case 0x7: /* SQDMLSL, SQDMLSL2 */
10325     case 0xb: /* SQDMULL, SQDMULL2 */
10326         is_long = true;
10327         /* fall through */
10328     case 0xc: /* SQDMULH */
10329     case 0xd: /* SQRDMULH */
10330         if (u) {
10331             unallocated_encoding(s);
10332             return;
10333         }
10334         break;
10335     case 0x8: /* MUL */
10336         if (u || is_scalar) {
10337             unallocated_encoding(s);
10338             return;
10339         }
10340         break;
10341     case 0x1: /* FMLA */
10342     case 0x5: /* FMLS */
10343         if (u) {
10344             unallocated_encoding(s);
10345             return;
10346         }
10347         /* fall through */
10348     case 0x9: /* FMUL, FMULX */
10349         if (!extract32(size, 1, 1)) {
10350             unallocated_encoding(s);
10351             return;
10352         }
10353         is_fp = true;
10354         break;
10355     default:
10356         unallocated_encoding(s);
10357         return;
10358     }
10359
10360     if (is_fp) {
10361         /* low bit of size indicates single/double */
10362         size = extract32(size, 0, 1) ? 3 : 2;
10363         if (size == 2) {
10364             index = h << 1 | l;
10365         } else {
10366             if (l || !is_q) {
10367                 unallocated_encoding(s);
10368                 return;
10369             }
10370             index = h;
10371         }
10372         rm |= (m << 4);
10373     } else {
10374         switch (size) {
10375         case 1:
10376             index = h << 2 | l << 1 | m;
10377             break;
10378         case 2:
10379             index = h << 1 | l;
10380             rm |= (m << 4);
10381             break;
10382         default:
10383             unallocated_encoding(s);
10384             return;
10385         }
10386     }
10387
10388     if (!fp_access_check(s)) {
10389         return;
10390     }
10391
10392     if (is_fp) {
10393         fpst = get_fpstatus_ptr();
10394     } else {
10395         TCGV_UNUSED_PTR(fpst);
10396     }
10397
10398     if (size == 3) {
10399         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10400         int pass;
10401
10402         assert(is_fp && is_q && !is_long);
10403
10404         read_vec_element(s, tcg_idx, rm, index, MO_64);
10405
10406         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10407             TCGv_i64 tcg_op = tcg_temp_new_i64();
10408             TCGv_i64 tcg_res = tcg_temp_new_i64();
10409
10410             read_vec_element(s, tcg_op, rn, pass, MO_64);
10411
10412             switch (opcode) {
10413             case 0x5: /* FMLS */
10414                 /* As usual for ARM, separate negation for fused multiply-add */
10415                 gen_helper_vfp_negd(tcg_op, tcg_op);
10416                 /* fall through */
10417             case 0x1: /* FMLA */
10418                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10419                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10420                 break;
10421             case 0x9: /* FMUL, FMULX */
10422                 if (u) {
10423                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10424                 } else {
10425                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10426                 }
10427                 break;
10428             default:
10429                 g_assert_not_reached();
10430             }
10431
10432             write_vec_element(s, tcg_res, rd, pass, MO_64);
10433             tcg_temp_free_i64(tcg_op);
10434             tcg_temp_free_i64(tcg_res);
10435         }
10436
10437         if (is_scalar) {
10438             clear_vec_high(s, rd);
10439         }
10440
10441         tcg_temp_free_i64(tcg_idx);
10442     } else if (!is_long) {
10443         /* 32 bit floating point, or 16 or 32 bit integer.
10444          * For the 16 bit scalar case we use the usual Neon helpers and
10445          * rely on the fact that 0 op 0 == 0 with no side effects.
10446          */
10447         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10448         int pass, maxpasses;
10449
10450         if (is_scalar) {
10451             maxpasses = 1;
10452         } else {
10453             maxpasses = is_q ? 4 : 2;
10454         }
10455
10456         read_vec_element_i32(s, tcg_idx, rm, index, size);
10457
10458         if (size == 1 && !is_scalar) {
10459             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10460              * the index into both halves of the 32 bit tcg_idx and then use
10461              * the usual Neon helpers.
10462              */
10463             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10464         }
10465
10466         for (pass = 0; pass < maxpasses; pass++) {
10467             TCGv_i32 tcg_op = tcg_temp_new_i32();
10468             TCGv_i32 tcg_res = tcg_temp_new_i32();
10469
10470             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10471
10472             switch (opcode) {
10473             case 0x0: /* MLA */
10474             case 0x4: /* MLS */
10475             case 0x8: /* MUL */
10476             {
10477                 static NeonGenTwoOpFn * const fns[2][2] = {
10478                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10479                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10480                 };
10481                 NeonGenTwoOpFn *genfn;
10482                 bool is_sub = opcode == 0x4;
10483
10484                 if (size == 1) {
10485                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10486                 } else {
10487                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10488                 }
10489                 if (opcode == 0x8) {
10490                     break;
10491                 }
10492                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10493                 genfn = fns[size - 1][is_sub];
10494                 genfn(tcg_res, tcg_op, tcg_res);
10495                 break;
10496             }
10497             case 0x5: /* FMLS */
10498                 /* As usual for ARM, separate negation for fused multiply-add */
10499                 gen_helper_vfp_negs(tcg_op, tcg_op);
10500                 /* fall through */
10501             case 0x1: /* FMLA */
10502                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10503                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10504                 break;
10505             case 0x9: /* FMUL, FMULX */
10506                 if (u) {
10507                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10508                 } else {
10509                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10510                 }
10511                 break;
10512             case 0xc: /* SQDMULH */
10513                 if (size == 1) {
10514                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10515                                                tcg_op, tcg_idx);
10516                 } else {
10517                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10518                                                tcg_op, tcg_idx);
10519                 }
10520                 break;
10521             case 0xd: /* SQRDMULH */
10522                 if (size == 1) {
10523                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10524                                                 tcg_op, tcg_idx);
10525                 } else {
10526                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10527                                                 tcg_op, tcg_idx);
10528                 }
10529                 break;
10530             default:
10531                 g_assert_not_reached();
10532             }
10533
10534             if (is_scalar) {
10535                 write_fp_sreg(s, rd, tcg_res);
10536             } else {
10537                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10538             }
10539
10540             tcg_temp_free_i32(tcg_op);
10541             tcg_temp_free_i32(tcg_res);
10542         }
10543
10544         tcg_temp_free_i32(tcg_idx);
10545
10546         if (!is_q) {
10547             clear_vec_high(s, rd);
10548         }
10549     } else {
10550         /* long ops: 16x16->32 or 32x32->64 */
10551         TCGv_i64 tcg_res[2];
10552         int pass;
10553         bool satop = extract32(opcode, 0, 1);
10554         TCGMemOp memop = MO_32;
10555
10556         if (satop || !u) {
10557             memop |= MO_SIGN;
10558         }
10559
10560         if (size == 2) {
10561             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10562
10563             read_vec_element(s, tcg_idx, rm, index, memop);
10564
10565             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10566                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10567                 TCGv_i64 tcg_passres;
10568                 int passelt;
10569
10570                 if (is_scalar) {
10571                     passelt = 0;
10572                 } else {
10573                     passelt = pass + (is_q * 2);
10574                 }
10575
10576                 read_vec_element(s, tcg_op, rn, passelt, memop);
10577
10578                 tcg_res[pass] = tcg_temp_new_i64();
10579
10580                 if (opcode == 0xa || opcode == 0xb) {
10581                     /* Non-accumulating ops */
10582                     tcg_passres = tcg_res[pass];
10583                 } else {
10584                     tcg_passres = tcg_temp_new_i64();
10585                 }
10586
10587                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10588                 tcg_temp_free_i64(tcg_op);
10589
10590                 if (satop) {
10591                     /* saturating, doubling */
10592                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10593                                                       tcg_passres, tcg_passres);
10594                 }
10595
10596                 if (opcode == 0xa || opcode == 0xb) {
10597                     continue;
10598                 }
10599
10600                 /* Accumulating op: handle accumulate step */
10601                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10602
10603                 switch (opcode) {
10604                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10605                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10606                     break;
10607                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10608                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10609                     break;
10610                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10611                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10612                     /* fall through */
10613                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10614                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10615                                                       tcg_res[pass],
10616                                                       tcg_passres);
10617                     break;
10618                 default:
10619                     g_assert_not_reached();
10620                 }
10621                 tcg_temp_free_i64(tcg_passres);
10622             }
10623             tcg_temp_free_i64(tcg_idx);
10624
10625             if (is_scalar) {
10626                 clear_vec_high(s, rd);
10627             }
10628         } else {
10629             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10630
10631             assert(size == 1);
10632             read_vec_element_i32(s, tcg_idx, rm, index, size);
10633
10634             if (!is_scalar) {
10635                 /* The simplest way to handle the 16x16 indexed ops is to
10636                  * duplicate the index into both halves of the 32 bit tcg_idx
10637                  * and then use the usual Neon helpers.
10638                  */
10639                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10640             }
10641
10642             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10643                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10644                 TCGv_i64 tcg_passres;
10645
10646                 if (is_scalar) {
10647                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10648                 } else {
10649                     read_vec_element_i32(s, tcg_op, rn,
10650                                          pass + (is_q * 2), MO_32);
10651                 }
10652
10653                 tcg_res[pass] = tcg_temp_new_i64();
10654
10655                 if (opcode == 0xa || opcode == 0xb) {
10656                     /* Non-accumulating ops */
10657                     tcg_passres = tcg_res[pass];
10658                 } else {
10659                     tcg_passres = tcg_temp_new_i64();
10660                 }
10661
10662                 if (memop & MO_SIGN) {
10663                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10664                 } else {
10665                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10666                 }
10667                 if (satop) {
10668                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10669                                                       tcg_passres, tcg_passres);
10670                 }
10671                 tcg_temp_free_i32(tcg_op);
10672
10673                 if (opcode == 0xa || opcode == 0xb) {
10674                     continue;
10675                 }
10676
10677                 /* Accumulating op: handle accumulate step */
10678                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10679
10680                 switch (opcode) {
10681                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10682                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10683                                              tcg_passres);
10684                     break;
10685                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10686                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10687                                              tcg_passres);
10688                     break;
10689                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10690                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10691                     /* fall through */
10692                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10693                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10694                                                       tcg_res[pass],
10695                                                       tcg_passres);
10696                     break;
10697                 default:
10698                     g_assert_not_reached();
10699                 }
10700                 tcg_temp_free_i64(tcg_passres);
10701             }
10702             tcg_temp_free_i32(tcg_idx);
10703
10704             if (is_scalar) {
10705                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10706             }
10707         }
10708
10709         if (is_scalar) {
10710             tcg_res[1] = tcg_const_i64(0);
10711         }
10712
10713         for (pass = 0; pass < 2; pass++) {
10714             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10715             tcg_temp_free_i64(tcg_res[pass]);
10716         }
10717     }
10718
10719     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10720         tcg_temp_free_ptr(fpst);
10721     }
10722 }
10723
10724 /* C3.6.19 Crypto AES
10725  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10726  * +-----------------+------+-----------+--------+-----+------+------+
10727  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10728  * +-----------------+------+-----------+--------+-----+------+------+
10729  */
10730 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10731 {
10732     int size = extract32(insn, 22, 2);
10733     int opcode = extract32(insn, 12, 5);
10734     int rn = extract32(insn, 5, 5);
10735     int rd = extract32(insn, 0, 5);
10736     int decrypt;
10737     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10738     CryptoThreeOpEnvFn *genfn;
10739
10740     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10741         || size != 0) {
10742         unallocated_encoding(s);
10743         return;
10744     }
10745
10746     switch (opcode) {
10747     case 0x4: /* AESE */
10748         decrypt = 0;
10749         genfn = gen_helper_crypto_aese;
10750         break;
10751     case 0x6: /* AESMC */
10752         decrypt = 0;
10753         genfn = gen_helper_crypto_aesmc;
10754         break;
10755     case 0x5: /* AESD */
10756         decrypt = 1;
10757         genfn = gen_helper_crypto_aese;
10758         break;
10759     case 0x7: /* AESIMC */
10760         decrypt = 1;
10761         genfn = gen_helper_crypto_aesmc;
10762         break;
10763     default:
10764         unallocated_encoding(s);
10765         return;
10766     }
10767
10768     /* Note that we convert the Vx register indexes into the
10769      * index within the vfp.regs[] array, so we can share the
10770      * helper with the AArch32 instructions.
10771      */
10772     tcg_rd_regno = tcg_const_i32(rd << 1);
10773     tcg_rn_regno = tcg_const_i32(rn << 1);
10774     tcg_decrypt = tcg_const_i32(decrypt);
10775
10776     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10777
10778     tcg_temp_free_i32(tcg_rd_regno);
10779     tcg_temp_free_i32(tcg_rn_regno);
10780     tcg_temp_free_i32(tcg_decrypt);
10781 }
10782
10783 /* C3.6.20 Crypto three-reg SHA
10784  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10785  * +-----------------+------+---+------+---+--------+-----+------+------+
10786  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10787  * +-----------------+------+---+------+---+--------+-----+------+------+
10788  */
10789 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10790 {
10791     int size = extract32(insn, 22, 2);
10792     int opcode = extract32(insn, 12, 3);
10793     int rm = extract32(insn, 16, 5);
10794     int rn = extract32(insn, 5, 5);
10795     int rd = extract32(insn, 0, 5);
10796     CryptoThreeOpEnvFn *genfn;
10797     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10798     int feature = ARM_FEATURE_V8_SHA256;
10799
10800     if (size != 0) {
10801         unallocated_encoding(s);
10802         return;
10803     }
10804
10805     switch (opcode) {
10806     case 0: /* SHA1C */
10807     case 1: /* SHA1P */
10808     case 2: /* SHA1M */
10809     case 3: /* SHA1SU0 */
10810         genfn = NULL;
10811         feature = ARM_FEATURE_V8_SHA1;
10812         break;
10813     case 4: /* SHA256H */
10814         genfn = gen_helper_crypto_sha256h;
10815         break;
10816     case 5: /* SHA256H2 */
10817         genfn = gen_helper_crypto_sha256h2;
10818         break;
10819     case 6: /* SHA256SU1 */
10820         genfn = gen_helper_crypto_sha256su1;
10821         break;
10822     default:
10823         unallocated_encoding(s);
10824         return;
10825     }
10826
10827     if (!arm_dc_feature(s, feature)) {
10828         unallocated_encoding(s);
10829         return;
10830     }
10831
10832     tcg_rd_regno = tcg_const_i32(rd << 1);
10833     tcg_rn_regno = tcg_const_i32(rn << 1);
10834     tcg_rm_regno = tcg_const_i32(rm << 1);
10835
10836     if (genfn) {
10837         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10838     } else {
10839         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10840
10841         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10842                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10843         tcg_temp_free_i32(tcg_opcode);
10844     }
10845
10846     tcg_temp_free_i32(tcg_rd_regno);
10847     tcg_temp_free_i32(tcg_rn_regno);
10848     tcg_temp_free_i32(tcg_rm_regno);
10849 }
10850
10851 /* C3.6.21 Crypto two-reg SHA
10852  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10853  * +-----------------+------+-----------+--------+-----+------+------+
10854  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10855  * +-----------------+------+-----------+--------+-----+------+------+
10856  */
10857 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10858 {
10859     int size = extract32(insn, 22, 2);
10860     int opcode = extract32(insn, 12, 5);
10861     int rn = extract32(insn, 5, 5);
10862     int rd = extract32(insn, 0, 5);
10863     CryptoTwoOpEnvFn *genfn;
10864     int feature;
10865     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10866
10867     if (size != 0) {
10868         unallocated_encoding(s);
10869         return;
10870     }
10871
10872     switch (opcode) {
10873     case 0: /* SHA1H */
10874         feature = ARM_FEATURE_V8_SHA1;
10875         genfn = gen_helper_crypto_sha1h;
10876         break;
10877     case 1: /* SHA1SU1 */
10878         feature = ARM_FEATURE_V8_SHA1;
10879         genfn = gen_helper_crypto_sha1su1;
10880         break;
10881     case 2: /* SHA256SU0 */
10882         feature = ARM_FEATURE_V8_SHA256;
10883         genfn = gen_helper_crypto_sha256su0;
10884         break;
10885     default:
10886         unallocated_encoding(s);
10887         return;
10888     }
10889
10890     if (!arm_dc_feature(s, feature)) {
10891         unallocated_encoding(s);
10892         return;
10893     }
10894
10895     tcg_rd_regno = tcg_const_i32(rd << 1);
10896     tcg_rn_regno = tcg_const_i32(rn << 1);
10897
10898     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10899
10900     tcg_temp_free_i32(tcg_rd_regno);
10901     tcg_temp_free_i32(tcg_rn_regno);
10902 }
10903
10904 /* C3.6 Data processing - SIMD, inc Crypto
10905  *
10906  * As the decode gets a little complex we are using a table based
10907  * approach for this part of the decode.
10908  */
10909 static const AArch64DecodeTable data_proc_simd[] = {
10910     /* pattern  ,  mask     ,  fn                        */
10911     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10912     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10913     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10914     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10915     { 0x0e000400, 0x9fe08400, disas_simd_copy },
10916     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10917     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10918     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10919     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10920     { 0x0e000000, 0xbf208c00, disas_simd_tb },
10921     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10922     { 0x2e000000, 0xbf208400, disas_simd_ext },
10923     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10924     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10925     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10926     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10927     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10928     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10929     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10930     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10931     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10932     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10933     { 0x00000000, 0x00000000, NULL }
10934 };
10935
10936 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10937 {
10938     /* Note that this is called with all non-FP cases from
10939      * table C3-6 so it must UNDEF for entries not specifically
10940      * allocated to instructions in that table.
10941      */
10942     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10943     if (fn) {
10944         fn(s, insn);
10945     } else {
10946         unallocated_encoding(s);
10947     }
10948 }
10949
10950 /* C3.6 Data processing - SIMD and floating point */
10951 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10952 {
10953     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10954         disas_data_proc_fp(s, insn);
10955     } else {
10956         /* SIMD, including crypto */
10957         disas_data_proc_simd(s, insn);
10958     }
10959 }
10960
10961 /* C3.1 A64 instruction index by encoding */
10962 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10963 {
10964     uint32_t insn;
10965
10966     insn = arm_ldl_code(env, s->pc, s->bswap_code);
10967     s->insn = insn;
10968     s->pc += 4;
10969
10970     s->fp_access_checked = false;
10971
10972     switch (extract32(insn, 25, 4)) {
10973     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10974         unallocated_encoding(s);
10975         break;
10976     case 0x8: case 0x9: /* Data processing - immediate */
10977         disas_data_proc_imm(s, insn);
10978         break;
10979     case 0xa: case 0xb: /* Branch, exception generation and system insns */
10980         disas_b_exc_sys(s, insn);
10981         break;
10982     case 0x4:
10983     case 0x6:
10984     case 0xc:
10985     case 0xe:      /* Loads and stores */
10986         disas_ldst(s, insn);
10987         break;
10988     case 0x5:
10989     case 0xd:      /* Data processing - register */
10990         disas_data_proc_reg(s, insn);
10991         break;
10992     case 0x7:
10993     case 0xf:      /* Data processing - SIMD and floating point */
10994         disas_data_proc_simd_fp(s, insn);
10995         break;
10996     default:
10997         assert(FALSE); /* all 15 cases should be handled above */
10998         break;
10999     }
11000
11001     /* if we allocated any temporaries, free them here */
11002     free_tmp_a64(s);
11003 }
11004
11005 void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11006 {
11007     CPUState *cs = CPU(cpu);
11008     CPUARMState *env = &cpu->env;
11009     DisasContext dc1, *dc = &dc1;
11010     target_ulong pc_start;
11011     target_ulong next_page_start;
11012     int num_insns;
11013     int max_insns;
11014
11015     pc_start = tb->pc;
11016
11017     dc->tb = tb;
11018
11019     dc->is_jmp = DISAS_NEXT;
11020     dc->pc = pc_start;
11021     dc->singlestep_enabled = cs->singlestep_enabled;
11022     dc->condjmp = 0;
11023
11024     dc->aarch64 = 1;
11025     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11026      * there is no secure EL1, so we route exceptions to EL3.
11027      */
11028     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11029                                !arm_el_is_aa64(env, 3);
11030     dc->thumb = 0;
11031     dc->bswap_code = 0;
11032     dc->condexec_mask = 0;
11033     dc->condexec_cond = 0;
11034     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11035     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11036 #if !defined(CONFIG_USER_ONLY)
11037     dc->user = (dc->current_el == 0);
11038 #endif
11039     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11040     dc->vec_len = 0;
11041     dc->vec_stride = 0;
11042     dc->cp_regs = cpu->cp_regs;
11043     dc->features = env->features;
11044
11045     /* Single step state. The code-generation logic here is:
11046      *  SS_ACTIVE == 0:
11047      *   generate code with no special handling for single-stepping (except
11048      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11049      *   this happens anyway because those changes are all system register or
11050      *   PSTATE writes).
11051      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11052      *   emit code for one insn
11053      *   emit code to clear PSTATE.SS
11054      *   emit code to generate software step exception for completed step
11055      *   end TB (as usual for having generated an exception)
11056      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11057      *   emit code to generate a software step exception
11058      *   end the TB
11059      */
11060     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11061     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11062     dc->is_ldex = false;
11063     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11064
11065     init_tmp_a64_array(dc);
11066
11067     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11068     num_insns = 0;
11069     max_insns = tb->cflags & CF_COUNT_MASK;
11070     if (max_insns == 0) {
11071         max_insns = CF_COUNT_MASK;
11072     }
11073     if (max_insns > TCG_MAX_INSNS) {
11074         max_insns = TCG_MAX_INSNS;
11075     }
11076
11077     gen_tb_start(tb);
11078
11079     tcg_clear_temp_count();
11080
11081     do {
11082         tcg_gen_insn_start(dc->pc, 0);
11083         num_insns++;
11084
11085         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11086             CPUBreakpoint *bp;
11087             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11088                 if (bp->pc == dc->pc) {
11089                     if (bp->flags & BP_CPU) {
11090                         gen_a64_set_pc_im(dc->pc);
11091                         gen_helper_check_breakpoints(cpu_env);
11092                         /* End the TB early; it likely won't be executed */
11093                         dc->is_jmp = DISAS_UPDATE;
11094                     } else {
11095                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11096                         /* The address covered by the breakpoint must be
11097                            included in [tb->pc, tb->pc + tb->size) in order
11098                            to for it to be properly cleared -- thus we
11099                            increment the PC here so that the logic setting
11100                            tb->size below does the right thing.  */
11101                         dc->pc += 4;
11102                         goto done_generating;
11103                     }
11104                     break;
11105                 }
11106             }
11107         }
11108
11109         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11110             gen_io_start();
11111         }
11112
11113         if (dc->ss_active && !dc->pstate_ss) {
11114             /* Singlestep state is Active-pending.
11115              * If we're in this state at the start of a TB then either
11116              *  a) we just took an exception to an EL which is being debugged
11117              *     and this is the first insn in the exception handler
11118              *  b) debug exceptions were masked and we just unmasked them
11119              *     without changing EL (eg by clearing PSTATE.D)
11120              * In either case we're going to take a swstep exception in the
11121              * "did not step an insn" case, and so the syndrome ISV and EX
11122              * bits should be zero.
11123              */
11124             assert(num_insns == 1);
11125             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11126                           default_exception_el(dc));
11127             dc->is_jmp = DISAS_EXC;
11128             break;
11129         }
11130
11131         disas_a64_insn(env, dc);
11132
11133         if (tcg_check_temp_count()) {
11134             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11135                     dc->pc);
11136         }
11137
11138         /* Translation stops when a conditional branch is encountered.
11139          * Otherwise the subsequent code could get translated several times.
11140          * Also stop translation when a page boundary is reached.  This
11141          * ensures prefetch aborts occur at the right place.
11142          */
11143     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11144              !cs->singlestep_enabled &&
11145              !singlestep &&
11146              !dc->ss_active &&
11147              dc->pc < next_page_start &&
11148              num_insns < max_insns);
11149
11150     if (tb->cflags & CF_LAST_IO) {
11151         gen_io_end();
11152     }
11153
11154     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11155         && dc->is_jmp != DISAS_EXC) {
11156         /* Note that this means single stepping WFI doesn't halt the CPU.
11157          * For conditional branch insns this is harmless unreachable code as
11158          * gen_goto_tb() has already handled emitting the debug exception
11159          * (and thus a tb-jump is not possible when singlestepping).
11160          */
11161         assert(dc->is_jmp != DISAS_TB_JUMP);
11162         if (dc->is_jmp != DISAS_JUMP) {
11163             gen_a64_set_pc_im(dc->pc);
11164         }
11165         if (cs->singlestep_enabled) {
11166             gen_exception_internal(EXCP_DEBUG);
11167         } else {
11168             gen_step_complete_exception(dc);
11169         }
11170     } else {
11171         switch (dc->is_jmp) {
11172         case DISAS_NEXT:
11173             gen_goto_tb(dc, 1, dc->pc);
11174             break;
11175         default:
11176         case DISAS_UPDATE:
11177             gen_a64_set_pc_im(dc->pc);
11178             /* fall through */
11179         case DISAS_JUMP:
11180             /* indicate that the hash table must be used to find the next TB */
11181             tcg_gen_exit_tb(0);
11182             break;
11183         case DISAS_TB_JUMP:
11184         case DISAS_EXC:
11185         case DISAS_SWI:
11186             break;
11187         case DISAS_WFE:
11188             gen_a64_set_pc_im(dc->pc);
11189             gen_helper_wfe(cpu_env);
11190             break;
11191         case DISAS_YIELD:
11192             gen_a64_set_pc_im(dc->pc);
11193             gen_helper_yield(cpu_env);
11194             break;
11195         case DISAS_WFI:
11196             /* This is a special case because we don't want to just halt the CPU
11197              * if trying to debug across a WFI.
11198              */
11199             gen_a64_set_pc_im(dc->pc);
11200             gen_helper_wfi(cpu_env);
11201             /* The helper doesn't necessarily throw an exception, but we
11202              * must go back to the main loop to check for interrupts anyway.
11203              */
11204             tcg_gen_exit_tb(0);
11205             break;
11206         }
11207     }
11208
11209 done_generating:
11210     gen_tb_end(tb, num_insns);
11211
11212 #ifdef DEBUG_DISAS
11213     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11214         qemu_log("----------------\n");
11215         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11216         log_target_disas(cs, pc_start, dc->pc - pc_start,
11217                          4 | (dc->bswap_code << 1));
11218         qemu_log("\n");
11219     }
11220 #endif
11221     tb->size = dc->pc - pc_start;
11222     tb->icount = num_insns;
11223 }