target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "tcg-op.h"
  23 #include "qemu/log.h"
  24 #include "arm_ldst.h"
  25 #include "translate.h"
  26 #include "internals.h"
  27 #include "qemu/host-utils.h"
  28
  29 #include "exec/semihost.h"
  30 #include "exec/gen-icount.h"
  31
  32 #include "exec/helper-proto.h"
  33 #include "exec/helper-gen.h"
  34 #include "exec/log.h"
  35
  36 #include "trace-tcg.h"
  37
  38 static TCGv_i64 cpu_X[32];
  39 static TCGv_i64 cpu_pc;
  40
  41 /* Load/store exclusive handling */
  42 static TCGv_i64 cpu_exclusive_high;
  43
  44 static const char *regnames[] = {
  45     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  46     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  47     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  48     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  49 };
  50
  51 enum a64_shift_type {
  52     A64_SHIFT_TYPE_LSL = 0,
  53     A64_SHIFT_TYPE_LSR = 1,
  54     A64_SHIFT_TYPE_ASR = 2,
  55     A64_SHIFT_TYPE_ROR = 3
  56 };
  57
  58 /* Table based decoder typedefs - used when the relevant bits for decode
  59  * are too awkwardly scattered across the instruction (eg SIMD).
  60  */
  61 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  62
  63 typedef struct AArch64DecodeTable {
  64     uint32_t pattern;
  65     uint32_t mask;
  66     AArch64DecodeFn *disas_fn;
  67 } AArch64DecodeTable;
  68
  69 /* Function prototype for gen_ functions for calling Neon helpers */
  70 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  71 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  72 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  73 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  74 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  75 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  76 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  77 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  78 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  79 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  80 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  81 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  82 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  83
  84 /* initialize TCG globals.  */
  85 void a64_translate_init(void)
  86 {
  87     int i;
  88
  89     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  90                                     offsetof(CPUARMState, pc),
  91                                     "pc");
  92     for (i = 0; i < 32; i++) {
  93         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  94                                           offsetof(CPUARMState, xregs[i]),
  95                                           regnames[i]);
  96     }
  97
  98     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 100 }
 101
 102 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 103 {
 104     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 105      *  if EL1, access as if EL0; otherwise access at current EL
 106      */
 107     switch (s->mmu_idx) {
 108     case ARMMMUIdx_S12NSE1:
 109         return ARMMMUIdx_S12NSE0;
 110     case ARMMMUIdx_S1SE1:
 111         return ARMMMUIdx_S1SE0;
 112     case ARMMMUIdx_S2NS:
 113         g_assert_not_reached();
 114     default:
 115         return s->mmu_idx;
 116     }
 117 }
 118
 119 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 120                             fprintf_function cpu_fprintf, int flags)
 121 {
 122     ARMCPU *cpu = ARM_CPU(cs);
 123     CPUARMState *env = &cpu->env;
 124     uint32_t psr = pstate_read(env);
 125     int i;
 126     int el = arm_current_el(env);
 127     const char *ns_status;
 128
 129     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 130             env->pc, env->xregs[31]);
 131     for (i = 0; i < 31; i++) {
 132         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 133         if ((i % 4) == 3) {
 134             cpu_fprintf(f, "\n");
 135         } else {
 136             cpu_fprintf(f, " ");
 137         }
 138     }
 139
 140     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 141         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 142     } else {
 143         ns_status = "";
 144     }
 145
 146     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 147                 psr,
 148                 psr & PSTATE_N ? 'N' : '-',
 149                 psr & PSTATE_Z ? 'Z' : '-',
 150                 psr & PSTATE_C ? 'C' : '-',
 151                 psr & PSTATE_V ? 'V' : '-',
 152                 ns_status,
 153                 el,
 154                 psr & PSTATE_SP ? 'h' : 't');
 155
 156     if (flags & CPU_DUMP_FPU) {
 157         int numvfpregs = 32;
 158         for (i = 0; i < numvfpregs; i += 2) {
 159             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 160             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 161             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 162                         i, vhi, vlo);
 163             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 164             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 165             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 166                         i + 1, vhi, vlo);
 167         }
 168         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 169                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 170     }
 171 }
 172
 173 void gen_a64_set_pc_im(uint64_t val)
 174 {
 175     tcg_gen_movi_i64(cpu_pc, val);
 176 }
 177
 178 typedef struct DisasCompare64 {
 179     TCGCond cond;
 180     TCGv_i64 value;
 181 } DisasCompare64;
 182
 183 static void a64_test_cc(DisasCompare64 *c64, int cc)
 184 {
 185     DisasCompare c32;
 186
 187     arm_test_cc(&c32, cc);
 188
 189     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 190        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 191     c64->cond = c32.cond;
 192     c64->value = tcg_temp_new_i64();
 193     tcg_gen_ext_i32_i64(c64->value, c32.value);
 194
 195     arm_free_cc(&c32);
 196 }
 197
 198 static void a64_free_cc(DisasCompare64 *c64)
 199 {
 200     tcg_temp_free_i64(c64->value);
 201 }
 202
 203 static void gen_exception_internal(int excp)
 204 {
 205     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 206
 207     assert(excp_is_internal(excp));
 208     gen_helper_exception_internal(cpu_env, tcg_excp);
 209     tcg_temp_free_i32(tcg_excp);
 210 }
 211
 212 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 213 {
 214     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 215     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 216     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 217
 218     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 219                                        tcg_syn, tcg_el);
 220     tcg_temp_free_i32(tcg_el);
 221     tcg_temp_free_i32(tcg_syn);
 222     tcg_temp_free_i32(tcg_excp);
 223 }
 224
 225 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 226 {
 227     gen_a64_set_pc_im(s->pc - offset);
 228     gen_exception_internal(excp);
 229     s->is_jmp = DISAS_EXC;
 230 }
 231
 232 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 233                                uint32_t syndrome, uint32_t target_el)
 234 {
 235     gen_a64_set_pc_im(s->pc - offset);
 236     gen_exception(excp, syndrome, target_el);
 237     s->is_jmp = DISAS_EXC;
 238 }
 239
 240 static void gen_ss_advance(DisasContext *s)
 241 {
 242     /* If the singlestep state is Active-not-pending, advance to
 243      * Active-pending.
 244      */
 245     if (s->ss_active) {
 246         s->pstate_ss = 0;
 247         gen_helper_clear_pstate_ss(cpu_env);
 248     }
 249 }
 250
 251 static void gen_step_complete_exception(DisasContext *s)
 252 {
 253     /* We just completed step of an insn. Move from Active-not-pending
 254      * to Active-pending, and then also take the swstep exception.
 255      * This corresponds to making the (IMPDEF) choice to prioritize
 256      * swstep exceptions over asynchronous exceptions taken to an exception
 257      * level where debug is disabled. This choice has the advantage that
 258      * we do not need to maintain internal state corresponding to the
 259      * ISV/EX syndrome bits between completion of the step and generation
 260      * of the exception, and our syndrome information is always correct.
 261      */
 262     gen_ss_advance(s);
 263     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 264                   default_exception_el(s));
 265     s->is_jmp = DISAS_EXC;
 266 }
 267
 268 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 269 {
 270     /* No direct tb linking with singlestep (either QEMU's or the ARM
 271      * debug architecture kind) or deterministic io
 272      */
 273     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 274         return false;
 275     }
 276
 277 #ifndef CONFIG_USER_ONLY
 278     /* Only link tbs from inside the same guest page */
 279     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 280         return false;
 281     }
 282 #endif
 283
 284     return true;
 285 }
 286
 287 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 288 {
 289     TranslationBlock *tb;
 290
 291     tb = s->tb;
 292     if (use_goto_tb(s, n, dest)) {
 293         tcg_gen_goto_tb(n);
 294         gen_a64_set_pc_im(dest);
 295         tcg_gen_exit_tb((intptr_t)tb + n);
 296         s->is_jmp = DISAS_TB_JUMP;
 297     } else {
 298         gen_a64_set_pc_im(dest);
 299         if (s->ss_active) {
 300             gen_step_complete_exception(s);
 301         } else if (s->singlestep_enabled) {
 302             gen_exception_internal(EXCP_DEBUG);
 303         } else {
 304             tcg_gen_exit_tb(0);
 305             s->is_jmp = DISAS_TB_JUMP;
 306         }
 307     }
 308 }
 309
 310 static void unallocated_encoding(DisasContext *s)
 311 {
 312     /* Unallocated and reserved encodings are uncategorized */
 313     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 314                        default_exception_el(s));
 315 }
 316
 317 #define unsupported_encoding(s, insn)                                    \
 318     do {                                                                 \
 319         qemu_log_mask(LOG_UNIMP,                                         \
 320                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 321                       "at pc=%016" PRIx64 "\n",                          \
 322                       __FILE__, __LINE__, insn, s->pc - 4);              \
 323         unallocated_encoding(s);                                         \
 324     } while (0);
 325
 326 static void init_tmp_a64_array(DisasContext *s)
 327 {
 328 #ifdef CONFIG_DEBUG_TCG
 329     int i;
 330     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 331         TCGV_UNUSED_I64(s->tmp_a64[i]);
 332     }
 333 #endif
 334     s->tmp_a64_count = 0;
 335 }
 336
 337 static void free_tmp_a64(DisasContext *s)
 338 {
 339     int i;
 340     for (i = 0; i < s->tmp_a64_count; i++) {
 341         tcg_temp_free_i64(s->tmp_a64[i]);
 342     }
 343     init_tmp_a64_array(s);
 344 }
 345
 346 static TCGv_i64 new_tmp_a64(DisasContext *s)
 347 {
 348     assert(s->tmp_a64_count < TMP_A64_MAX);
 349     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 350 }
 351
 352 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 353 {
 354     TCGv_i64 t = new_tmp_a64(s);
 355     tcg_gen_movi_i64(t, 0);
 356     return t;
 357 }
 358
 359 /*
 360  * Register access functions
 361  *
 362  * These functions are used for directly accessing a register in where
 363  * changes to the final register value are likely to be made. If you
 364  * need to use a register for temporary calculation (e.g. index type
 365  * operations) use the read_* form.
 366  *
 367  * B1.2.1 Register mappings
 368  *
 369  * In instruction register encoding 31 can refer to ZR (zero register) or
 370  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 371  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 372  * This is the point of the _sp forms.
 373  */
 374 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 375 {
 376     if (reg == 31) {
 377         return new_tmp_a64_zero(s);
 378     } else {
 379         return cpu_X[reg];
 380     }
 381 }
 382
 383 /* register access for when 31 == SP */
 384 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 385 {
 386     return cpu_X[reg];
 387 }
 388
 389 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 390  * representing the register contents. This TCGv is an auto-freed
 391  * temporary so it need not be explicitly freed, and may be modified.
 392  */
 393 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 394 {
 395     TCGv_i64 v = new_tmp_a64(s);
 396     if (reg != 31) {
 397         if (sf) {
 398             tcg_gen_mov_i64(v, cpu_X[reg]);
 399         } else {
 400             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 401         }
 402     } else {
 403         tcg_gen_movi_i64(v, 0);
 404     }
 405     return v;
 406 }
 407
 408 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 409 {
 410     TCGv_i64 v = new_tmp_a64(s);
 411     if (sf) {
 412         tcg_gen_mov_i64(v, cpu_X[reg]);
 413     } else {
 414         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 415     }
 416     return v;
 417 }
 418
 419 /* We should have at some point before trying to access an FP register
 420  * done the necessary access check, so assert that
 421  * (a) we did the check and
 422  * (b) we didn't then just plough ahead anyway if it failed.
 423  * Print the instruction pattern in the abort message so we can figure
 424  * out what we need to fix if a user encounters this problem in the wild.
 425  */
 426 static inline void assert_fp_access_checked(DisasContext *s)
 427 {
 428 #ifdef CONFIG_DEBUG_TCG
 429     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 430         fprintf(stderr, "target-arm: FP access check missing for "
 431                 "instruction 0x%08x\n", s->insn);
 432         abort();
 433     }
 434 #endif
 435 }
 436
 437 /* Return the offset into CPUARMState of an element of specified
 438  * size, 'element' places in from the least significant end of
 439  * the FP/vector register Qn.
 440  */
 441 static inline int vec_reg_offset(DisasContext *s, int regno,
 442                                  int element, TCGMemOp size)
 443 {
 444     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 445 #ifdef HOST_WORDS_BIGENDIAN
 446     /* This is complicated slightly because vfp.regs[2n] is
 447      * still the low half and  vfp.regs[2n+1] the high half
 448      * of the 128 bit vector, even on big endian systems.
 449      * Calculate the offset assuming a fully bigendian 128 bits,
 450      * then XOR to account for the order of the two 64 bit halves.
 451      */
 452     offs += (16 - ((element + 1) * (1 << size)));
 453     offs ^= 8;
 454 #else
 455     offs += element * (1 << size);
 456 #endif
 457     assert_fp_access_checked(s);
 458     return offs;
 459 }
 460
 461 /* Return the offset into CPUARMState of a slice (from
 462  * the least significant end) of FP register Qn (ie
 463  * Dn, Sn, Hn or Bn).
 464  * (Note that this is not the same mapping as for A32; see cpu.h)
 465  */
 466 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 467 {
 468     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 469 #ifdef HOST_WORDS_BIGENDIAN
 470     offs += (8 - (1 << size));
 471 #endif
 472     assert_fp_access_checked(s);
 473     return offs;
 474 }
 475
 476 /* Offset of the high half of the 128 bit vector Qn */
 477 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 478 {
 479     assert_fp_access_checked(s);
 480     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 481 }
 482
 483 /* Convenience accessors for reading and writing single and double
 484  * FP registers. Writing clears the upper parts of the associated
 485  * 128 bit vector register, as required by the architecture.
 486  * Note that unlike the GP register accessors, the values returned
 487  * by the read functions must be manually freed.
 488  */
 489 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 490 {
 491     TCGv_i64 v = tcg_temp_new_i64();
 492
 493     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 494     return v;
 495 }
 496
 497 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 498 {
 499     TCGv_i32 v = tcg_temp_new_i32();
 500
 501     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 502     return v;
 503 }
 504
 505 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 506 {
 507     TCGv_i64 tcg_zero = tcg_const_i64(0);
 508
 509     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 510     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 511     tcg_temp_free_i64(tcg_zero);
 512 }
 513
 514 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 515 {
 516     TCGv_i64 tmp = tcg_temp_new_i64();
 517
 518     tcg_gen_extu_i32_i64(tmp, v);
 519     write_fp_dreg(s, reg, tmp);
 520     tcg_temp_free_i64(tmp);
 521 }
 522
 523 static TCGv_ptr get_fpstatus_ptr(void)
 524 {
 525     TCGv_ptr statusptr = tcg_temp_new_ptr();
 526     int offset;
 527
 528     /* In A64 all instructions (both FP and Neon) use the FPCR;
 529      * there is no equivalent of the A32 Neon "standard FPSCR value"
 530      * and all operations use vfp.fp_status.
 531      */
 532     offset = offsetof(CPUARMState, vfp.fp_status);
 533     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 534     return statusptr;
 535 }
 536
 537 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 538  * than the 32 bit equivalent.
 539  */
 540 static inline void gen_set_NZ64(TCGv_i64 result)
 541 {
 542     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 543     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 544 }
 545
 546 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 547 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 548 {
 549     if (sf) {
 550         gen_set_NZ64(result);
 551     } else {
 552         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 553         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 554     }
 555     tcg_gen_movi_i32(cpu_CF, 0);
 556     tcg_gen_movi_i32(cpu_VF, 0);
 557 }
 558
 559 /* dest = T0 + T1; compute C, N, V and Z flags */
 560 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 561 {
 562     if (sf) {
 563         TCGv_i64 result, flag, tmp;
 564         result = tcg_temp_new_i64();
 565         flag = tcg_temp_new_i64();
 566         tmp = tcg_temp_new_i64();
 567
 568         tcg_gen_movi_i64(tmp, 0);
 569         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 570
 571         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 572
 573         gen_set_NZ64(result);
 574
 575         tcg_gen_xor_i64(flag, result, t0);
 576         tcg_gen_xor_i64(tmp, t0, t1);
 577         tcg_gen_andc_i64(flag, flag, tmp);
 578         tcg_temp_free_i64(tmp);
 579         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 580
 581         tcg_gen_mov_i64(dest, result);
 582         tcg_temp_free_i64(result);
 583         tcg_temp_free_i64(flag);
 584     } else {
 585         /* 32 bit arithmetic */
 586         TCGv_i32 t0_32 = tcg_temp_new_i32();
 587         TCGv_i32 t1_32 = tcg_temp_new_i32();
 588         TCGv_i32 tmp = tcg_temp_new_i32();
 589
 590         tcg_gen_movi_i32(tmp, 0);
 591         tcg_gen_extrl_i64_i32(t0_32, t0);
 592         tcg_gen_extrl_i64_i32(t1_32, t1);
 593         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 594         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 595         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 596         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 597         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 598         tcg_gen_extu_i32_i64(dest, cpu_NF);
 599
 600         tcg_temp_free_i32(tmp);
 601         tcg_temp_free_i32(t0_32);
 602         tcg_temp_free_i32(t1_32);
 603     }
 604 }
 605
 606 /* dest = T0 - T1; compute C, N, V and Z flags */
 607 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 608 {
 609     if (sf) {
 610         /* 64 bit arithmetic */
 611         TCGv_i64 result, flag, tmp;
 612
 613         result = tcg_temp_new_i64();
 614         flag = tcg_temp_new_i64();
 615         tcg_gen_sub_i64(result, t0, t1);
 616
 617         gen_set_NZ64(result);
 618
 619         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 620         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 621
 622         tcg_gen_xor_i64(flag, result, t0);
 623         tmp = tcg_temp_new_i64();
 624         tcg_gen_xor_i64(tmp, t0, t1);
 625         tcg_gen_and_i64(flag, flag, tmp);
 626         tcg_temp_free_i64(tmp);
 627         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 628         tcg_gen_mov_i64(dest, result);
 629         tcg_temp_free_i64(flag);
 630         tcg_temp_free_i64(result);
 631     } else {
 632         /* 32 bit arithmetic */
 633         TCGv_i32 t0_32 = tcg_temp_new_i32();
 634         TCGv_i32 t1_32 = tcg_temp_new_i32();
 635         TCGv_i32 tmp;
 636
 637         tcg_gen_extrl_i64_i32(t0_32, t0);
 638         tcg_gen_extrl_i64_i32(t1_32, t1);
 639         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 640         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 641         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 642         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 643         tmp = tcg_temp_new_i32();
 644         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 645         tcg_temp_free_i32(t0_32);
 646         tcg_temp_free_i32(t1_32);
 647         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 648         tcg_temp_free_i32(tmp);
 649         tcg_gen_extu_i32_i64(dest, cpu_NF);
 650     }
 651 }
 652
 653 /* dest = T0 + T1 + CF; do not compute flags. */
 654 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 655 {
 656     TCGv_i64 flag = tcg_temp_new_i64();
 657     tcg_gen_extu_i32_i64(flag, cpu_CF);
 658     tcg_gen_add_i64(dest, t0, t1);
 659     tcg_gen_add_i64(dest, dest, flag);
 660     tcg_temp_free_i64(flag);
 661
 662     if (!sf) {
 663         tcg_gen_ext32u_i64(dest, dest);
 664     }
 665 }
 666
 667 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 668 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 669 {
 670     if (sf) {
 671         TCGv_i64 result, cf_64, vf_64, tmp;
 672         result = tcg_temp_new_i64();
 673         cf_64 = tcg_temp_new_i64();
 674         vf_64 = tcg_temp_new_i64();
 675         tmp = tcg_const_i64(0);
 676
 677         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 678         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 679         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 680         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 681         gen_set_NZ64(result);
 682
 683         tcg_gen_xor_i64(vf_64, result, t0);
 684         tcg_gen_xor_i64(tmp, t0, t1);
 685         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 686         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 687
 688         tcg_gen_mov_i64(dest, result);
 689
 690         tcg_temp_free_i64(tmp);
 691         tcg_temp_free_i64(vf_64);
 692         tcg_temp_free_i64(cf_64);
 693         tcg_temp_free_i64(result);
 694     } else {
 695         TCGv_i32 t0_32, t1_32, tmp;
 696         t0_32 = tcg_temp_new_i32();
 697         t1_32 = tcg_temp_new_i32();
 698         tmp = tcg_const_i32(0);
 699
 700         tcg_gen_extrl_i64_i32(t0_32, t0);
 701         tcg_gen_extrl_i64_i32(t1_32, t1);
 702         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 703         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 704
 705         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 706         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 707         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 708         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 709         tcg_gen_extu_i32_i64(dest, cpu_NF);
 710
 711         tcg_temp_free_i32(tmp);
 712         tcg_temp_free_i32(t1_32);
 713         tcg_temp_free_i32(t0_32);
 714     }
 715 }
 716
 717 /*
 718  * Load/Store generators
 719  */
 720
 721 /*
 722  * Store from GPR register to memory.
 723  */
 724 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 725                              TCGv_i64 tcg_addr, int size, int memidx)
 726 {
 727     g_assert(size <= 3);
 728     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 729 }
 730
 731 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 732                       TCGv_i64 tcg_addr, int size)
 733 {
 734     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 735 }
 736
 737 /*
 738  * Load from memory to GPR register
 739  */
 740 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 741                              int size, bool is_signed, bool extend, int memidx)
 742 {
 743     TCGMemOp memop = s->be_data + size;
 744
 745     g_assert(size <= 3);
 746
 747     if (is_signed) {
 748         memop += MO_SIGN;
 749     }
 750
 751     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 752
 753     if (extend && is_signed) {
 754         g_assert(size < 3);
 755         tcg_gen_ext32u_i64(dest, dest);
 756     }
 757 }
 758
 759 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 760                       int size, bool is_signed, bool extend)
 761 {
 762     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 763                      get_mem_index(s));
 764 }
 765
 766 /*
 767  * Store from FP register to memory
 768  */
 769 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 770 {
 771     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 772     TCGv_i64 tmp = tcg_temp_new_i64();
 773     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 774     if (size < 4) {
 775         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 776                             s->be_data + size);
 777     } else {
 778         bool be = s->be_data == MO_BE;
 779         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 780
 781         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 782         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 783                             s->be_data | MO_Q);
 784         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 785         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 786                             s->be_data | MO_Q);
 787         tcg_temp_free_i64(tcg_hiaddr);
 788     }
 789
 790     tcg_temp_free_i64(tmp);
 791 }
 792
 793 /*
 794  * Load from memory to FP register
 795  */
 796 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 797 {
 798     /* This always zero-extends and writes to a full 128 bit wide vector */
 799     TCGv_i64 tmplo = tcg_temp_new_i64();
 800     TCGv_i64 tmphi;
 801
 802     if (size < 4) {
 803         TCGMemOp memop = s->be_data + size;
 804         tmphi = tcg_const_i64(0);
 805         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 806     } else {
 807         bool be = s->be_data == MO_BE;
 808         TCGv_i64 tcg_hiaddr;
 809
 810         tmphi = tcg_temp_new_i64();
 811         tcg_hiaddr = tcg_temp_new_i64();
 812
 813         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 814         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 815                             s->be_data | MO_Q);
 816         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 817                             s->be_data | MO_Q);
 818         tcg_temp_free_i64(tcg_hiaddr);
 819     }
 820
 821     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 822     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 823
 824     tcg_temp_free_i64(tmplo);
 825     tcg_temp_free_i64(tmphi);
 826 }
 827
 828 /*
 829  * Vector load/store helpers.
 830  *
 831  * The principal difference between this and a FP load is that we don't
 832  * zero extend as we are filling a partial chunk of the vector register.
 833  * These functions don't support 128 bit loads/stores, which would be
 834  * normal load/store operations.
 835  *
 836  * The _i32 versions are useful when operating on 32 bit quantities
 837  * (eg for floating point single or using Neon helper functions).
 838  */
 839
 840 /* Get value of an element within a vector register */
 841 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 842                              int element, TCGMemOp memop)
 843 {
 844     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 845     switch (memop) {
 846     case MO_8:
 847         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 848         break;
 849     case MO_16:
 850         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 851         break;
 852     case MO_32:
 853         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 854         break;
 855     case MO_8|MO_SIGN:
 856         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 857         break;
 858     case MO_16|MO_SIGN:
 859         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 860         break;
 861     case MO_32|MO_SIGN:
 862         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 863         break;
 864     case MO_64:
 865     case MO_64|MO_SIGN:
 866         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 867         break;
 868     default:
 869         g_assert_not_reached();
 870     }
 871 }
 872
 873 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 874                                  int element, TCGMemOp memop)
 875 {
 876     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 877     switch (memop) {
 878     case MO_8:
 879         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 880         break;
 881     case MO_16:
 882         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 883         break;
 884     case MO_8|MO_SIGN:
 885         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 886         break;
 887     case MO_16|MO_SIGN:
 888         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 889         break;
 890     case MO_32:
 891     case MO_32|MO_SIGN:
 892         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 893         break;
 894     default:
 895         g_assert_not_reached();
 896     }
 897 }
 898
 899 /* Set value of an element within a vector register */
 900 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 901                               int element, TCGMemOp memop)
 902 {
 903     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 904     switch (memop) {
 905     case MO_8:
 906         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 907         break;
 908     case MO_16:
 909         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 910         break;
 911     case MO_32:
 912         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 913         break;
 914     case MO_64:
 915         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 916         break;
 917     default:
 918         g_assert_not_reached();
 919     }
 920 }
 921
 922 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 923                                   int destidx, int element, TCGMemOp memop)
 924 {
 925     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 926     switch (memop) {
 927     case MO_8:
 928         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 929         break;
 930     case MO_16:
 931         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 932         break;
 933     case MO_32:
 934         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 935         break;
 936     default:
 937         g_assert_not_reached();
 938     }
 939 }
 940
 941 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 942  * vector ops all need to do this).
 943  */
 944 static void clear_vec_high(DisasContext *s, int rd)
 945 {
 946     TCGv_i64 tcg_zero = tcg_const_i64(0);
 947
 948     write_vec_element(s, tcg_zero, rd, 1, MO_64);
 949     tcg_temp_free_i64(tcg_zero);
 950 }
 951
 952 /* Store from vector register to memory */
 953 static void do_vec_st(DisasContext *s, int srcidx, int element,
 954                       TCGv_i64 tcg_addr, int size)
 955 {
 956     TCGMemOp memop = s->be_data + size;
 957     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 958
 959     read_vec_element(s, tcg_tmp, srcidx, element, size);
 960     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 961
 962     tcg_temp_free_i64(tcg_tmp);
 963 }
 964
 965 /* Load from memory to vector register */
 966 static void do_vec_ld(DisasContext *s, int destidx, int element,
 967                       TCGv_i64 tcg_addr, int size)
 968 {
 969     TCGMemOp memop = s->be_data + size;
 970     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 971
 972     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 973     write_vec_element(s, tcg_tmp, destidx, element, size);
 974
 975     tcg_temp_free_i64(tcg_tmp);
 976 }
 977
 978 /* Check that FP/Neon access is enabled. If it is, return
 979  * true. If not, emit code to generate an appropriate exception,
 980  * and return false; the caller should not emit any code for
 981  * the instruction. Note that this check must happen after all
 982  * unallocated-encoding checks (otherwise the syndrome information
 983  * for the resulting exception will be incorrect).
 984  */
 985 static inline bool fp_access_check(DisasContext *s)
 986 {
 987     assert(!s->fp_access_checked);
 988     s->fp_access_checked = true;
 989
 990     if (!s->fp_excp_el) {
 991         return true;
 992     }
 993
 994     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
 995                        s->fp_excp_el);
 996     return false;
 997 }
 998
 999 /*
1000  * This utility function is for doing register extension with an
1001  * optional shift. You will likely want to pass a temporary for the
1002  * destination register. See DecodeRegExtend() in the ARM ARM.
1003  */
1004 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1005                               int option, unsigned int shift)
1006 {
1007     int extsize = extract32(option, 0, 2);
1008     bool is_signed = extract32(option, 2, 1);
1009
1010     if (is_signed) {
1011         switch (extsize) {
1012         case 0:
1013             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1014             break;
1015         case 1:
1016             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1017             break;
1018         case 2:
1019             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1020             break;
1021         case 3:
1022             tcg_gen_mov_i64(tcg_out, tcg_in);
1023             break;
1024         }
1025     } else {
1026         switch (extsize) {
1027         case 0:
1028             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1029             break;
1030         case 1:
1031             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1032             break;
1033         case 2:
1034             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1035             break;
1036         case 3:
1037             tcg_gen_mov_i64(tcg_out, tcg_in);
1038             break;
1039         }
1040     }
1041
1042     if (shift) {
1043         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1044     }
1045 }
1046
1047 static inline void gen_check_sp_alignment(DisasContext *s)
1048 {
1049     /* The AArch64 architecture mandates that (if enabled via PSTATE
1050      * or SCTLR bits) there is a check that SP is 16-aligned on every
1051      * SP-relative load or store (with an exception generated if it is not).
1052      * In line with general QEMU practice regarding misaligned accesses,
1053      * we omit these checks for the sake of guest program performance.
1054      * This function is provided as a hook so we can more easily add these
1055      * checks in future (possibly as a "favour catching guest program bugs
1056      * over speed" user selectable option).
1057      */
1058 }
1059
1060 /*
1061  * This provides a simple table based table lookup decoder. It is
1062  * intended to be used when the relevant bits for decode are too
1063  * awkwardly placed and switch/if based logic would be confusing and
1064  * deeply nested. Since it's a linear search through the table, tables
1065  * should be kept small.
1066  *
1067  * It returns the first handler where insn & mask == pattern, or
1068  * NULL if there is no match.
1069  * The table is terminated by an empty mask (i.e. 0)
1070  */
1071 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1072                                                uint32_t insn)
1073 {
1074     const AArch64DecodeTable *tptr = table;
1075
1076     while (tptr->mask) {
1077         if ((insn & tptr->mask) == tptr->pattern) {
1078             return tptr->disas_fn;
1079         }
1080         tptr++;
1081     }
1082     return NULL;
1083 }
1084
1085 /*
1086  * the instruction disassembly implemented here matches
1087  * the instruction encoding classifications in chapter 3 (C3)
1088  * of the ARM Architecture Reference Manual (DDI0487A_a)
1089  */
1090
1091 /* C3.2.7 Unconditional branch (immediate)
1092  *   31  30       26 25                                  0
1093  * +----+-----------+-------------------------------------+
1094  * | op | 0 0 1 0 1 |                 imm26               |
1095  * +----+-----------+-------------------------------------+
1096  */
1097 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1098 {
1099     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1100
1101     if (insn & (1U << 31)) {
1102         /* C5.6.26 BL Branch with link */
1103         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1104     }
1105
1106     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1107     gen_goto_tb(s, 0, addr);
1108 }
1109
1110 /* C3.2.1 Compare & branch (immediate)
1111  *   31  30         25  24  23                  5 4      0
1112  * +----+-------------+----+---------------------+--------+
1113  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1114  * +----+-------------+----+---------------------+--------+
1115  */
1116 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1117 {
1118     unsigned int sf, op, rt;
1119     uint64_t addr;
1120     TCGLabel *label_match;
1121     TCGv_i64 tcg_cmp;
1122
1123     sf = extract32(insn, 31, 1);
1124     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1125     rt = extract32(insn, 0, 5);
1126     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1127
1128     tcg_cmp = read_cpu_reg(s, rt, sf);
1129     label_match = gen_new_label();
1130
1131     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1132                         tcg_cmp, 0, label_match);
1133
1134     gen_goto_tb(s, 0, s->pc);
1135     gen_set_label(label_match);
1136     gen_goto_tb(s, 1, addr);
1137 }
1138
1139 /* C3.2.5 Test & branch (immediate)
1140  *   31  30         25  24  23   19 18          5 4    0
1141  * +----+-------------+----+-------+-------------+------+
1142  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1143  * +----+-------------+----+-------+-------------+------+
1144  */
1145 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1146 {
1147     unsigned int bit_pos, op, rt;
1148     uint64_t addr;
1149     TCGLabel *label_match;
1150     TCGv_i64 tcg_cmp;
1151
1152     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1153     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1154     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1155     rt = extract32(insn, 0, 5);
1156
1157     tcg_cmp = tcg_temp_new_i64();
1158     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1159     label_match = gen_new_label();
1160     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1161                         tcg_cmp, 0, label_match);
1162     tcg_temp_free_i64(tcg_cmp);
1163     gen_goto_tb(s, 0, s->pc);
1164     gen_set_label(label_match);
1165     gen_goto_tb(s, 1, addr);
1166 }
1167
1168 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1169  *  31           25  24  23                  5   4  3    0
1170  * +---------------+----+---------------------+----+------+
1171  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1172  * +---------------+----+---------------------+----+------+
1173  */
1174 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1175 {
1176     unsigned int cond;
1177     uint64_t addr;
1178
1179     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1180         unallocated_encoding(s);
1181         return;
1182     }
1183     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1184     cond = extract32(insn, 0, 4);
1185
1186     if (cond < 0x0e) {
1187         /* genuinely conditional branches */
1188         TCGLabel *label_match = gen_new_label();
1189         arm_gen_test_cc(cond, label_match);
1190         gen_goto_tb(s, 0, s->pc);
1191         gen_set_label(label_match);
1192         gen_goto_tb(s, 1, addr);
1193     } else {
1194         /* 0xe and 0xf are both "always" conditions */
1195         gen_goto_tb(s, 0, addr);
1196     }
1197 }
1198
1199 /* C5.6.68 HINT */
1200 static void handle_hint(DisasContext *s, uint32_t insn,
1201                         unsigned int op1, unsigned int op2, unsigned int crm)
1202 {
1203     unsigned int selector = crm << 3 | op2;
1204
1205     if (op1 != 3) {
1206         unallocated_encoding(s);
1207         return;
1208     }
1209
1210     switch (selector) {
1211     case 0: /* NOP */
1212         return;
1213     case 3: /* WFI */
1214         s->is_jmp = DISAS_WFI;
1215         return;
1216     case 1: /* YIELD */
1217         s->is_jmp = DISAS_YIELD;
1218         return;
1219     case 2: /* WFE */
1220         s->is_jmp = DISAS_WFE;
1221         return;
1222     case 4: /* SEV */
1223     case 5: /* SEVL */
1224         /* we treat all as NOP at least for now */
1225         return;
1226     default:
1227         /* default specified as NOP equivalent */
1228         return;
1229     }
1230 }
1231
1232 static void gen_clrex(DisasContext *s, uint32_t insn)
1233 {
1234     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1235 }
1236
1237 /* CLREX, DSB, DMB, ISB */
1238 static void handle_sync(DisasContext *s, uint32_t insn,
1239                         unsigned int op1, unsigned int op2, unsigned int crm)
1240 {
1241     if (op1 != 3) {
1242         unallocated_encoding(s);
1243         return;
1244     }
1245
1246     switch (op2) {
1247     case 2: /* CLREX */
1248         gen_clrex(s, insn);
1249         return;
1250     case 4: /* DSB */
1251     case 5: /* DMB */
1252         /* We don't emulate caches so barriers are no-ops */
1253         return;
1254     case 6: /* ISB */
1255         /* We need to break the TB after this insn to execute
1256          * a self-modified code correctly and also to take
1257          * any pending interrupts immediately.
1258          */
1259         s->is_jmp = DISAS_UPDATE;
1260         return;
1261     default:
1262         unallocated_encoding(s);
1263         return;
1264     }
1265 }
1266
1267 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1268 static void handle_msr_i(DisasContext *s, uint32_t insn,
1269                          unsigned int op1, unsigned int op2, unsigned int crm)
1270 {
1271     int op = op1 << 3 | op2;
1272     switch (op) {
1273     case 0x05: /* SPSel */
1274         if (s->current_el == 0) {
1275             unallocated_encoding(s);
1276             return;
1277         }
1278         /* fall through */
1279     case 0x1e: /* DAIFSet */
1280     case 0x1f: /* DAIFClear */
1281     {
1282         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1283         TCGv_i32 tcg_op = tcg_const_i32(op);
1284         gen_a64_set_pc_im(s->pc - 4);
1285         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1286         tcg_temp_free_i32(tcg_imm);
1287         tcg_temp_free_i32(tcg_op);
1288         s->is_jmp = DISAS_UPDATE;
1289         break;
1290     }
1291     default:
1292         unallocated_encoding(s);
1293         return;
1294     }
1295 }
1296
1297 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1298 {
1299     TCGv_i32 tmp = tcg_temp_new_i32();
1300     TCGv_i32 nzcv = tcg_temp_new_i32();
1301
1302     /* build bit 31, N */
1303     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1304     /* build bit 30, Z */
1305     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1306     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1307     /* build bit 29, C */
1308     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1309     /* build bit 28, V */
1310     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1311     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1312     /* generate result */
1313     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1314
1315     tcg_temp_free_i32(nzcv);
1316     tcg_temp_free_i32(tmp);
1317 }
1318
1319 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1320
1321 {
1322     TCGv_i32 nzcv = tcg_temp_new_i32();
1323
1324     /* take NZCV from R[t] */
1325     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1326
1327     /* bit 31, N */
1328     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1329     /* bit 30, Z */
1330     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1331     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1332     /* bit 29, C */
1333     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1334     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1335     /* bit 28, V */
1336     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1337     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1338     tcg_temp_free_i32(nzcv);
1339 }
1340
1341 /* C5.6.129 MRS - move from system register
1342  * C5.6.131 MSR (register) - move to system register
1343  * C5.6.204 SYS
1344  * C5.6.205 SYSL
1345  * These are all essentially the same insn in 'read' and 'write'
1346  * versions, with varying op0 fields.
1347  */
1348 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1349                        unsigned int op0, unsigned int op1, unsigned int op2,
1350                        unsigned int crn, unsigned int crm, unsigned int rt)
1351 {
1352     const ARMCPRegInfo *ri;
1353     TCGv_i64 tcg_rt;
1354
1355     ri = get_arm_cp_reginfo(s->cp_regs,
1356                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1357                                                crn, crm, op0, op1, op2));
1358
1359     if (!ri) {
1360         /* Unknown register; this might be a guest error or a QEMU
1361          * unimplemented feature.
1362          */
1363         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1364                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1365                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1366         unallocated_encoding(s);
1367         return;
1368     }
1369
1370     /* Check access permissions */
1371     if (!cp_access_ok(s->current_el, ri, isread)) {
1372         unallocated_encoding(s);
1373         return;
1374     }
1375
1376     if (ri->accessfn) {
1377         /* Emit code to perform further access permissions checks at
1378          * runtime; this may result in an exception.
1379          */
1380         TCGv_ptr tmpptr;
1381         TCGv_i32 tcg_syn, tcg_isread;
1382         uint32_t syndrome;
1383
1384         gen_a64_set_pc_im(s->pc - 4);
1385         tmpptr = tcg_const_ptr(ri);
1386         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1387         tcg_syn = tcg_const_i32(syndrome);
1388         tcg_isread = tcg_const_i32(isread);
1389         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1390         tcg_temp_free_ptr(tmpptr);
1391         tcg_temp_free_i32(tcg_syn);
1392         tcg_temp_free_i32(tcg_isread);
1393     }
1394
1395     /* Handle special cases first */
1396     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1397     case ARM_CP_NOP:
1398         return;
1399     case ARM_CP_NZCV:
1400         tcg_rt = cpu_reg(s, rt);
1401         if (isread) {
1402             gen_get_nzcv(tcg_rt);
1403         } else {
1404             gen_set_nzcv(tcg_rt);
1405         }
1406         return;
1407     case ARM_CP_CURRENTEL:
1408         /* Reads as current EL value from pstate, which is
1409          * guaranteed to be constant by the tb flags.
1410          */
1411         tcg_rt = cpu_reg(s, rt);
1412         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1413         return;
1414     case ARM_CP_DC_ZVA:
1415         /* Writes clear the aligned block of memory which rt points into. */
1416         tcg_rt = cpu_reg(s, rt);
1417         gen_helper_dc_zva(cpu_env, tcg_rt);
1418         return;
1419     default:
1420         break;
1421     }
1422
1423     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1424         gen_io_start();
1425     }
1426
1427     tcg_rt = cpu_reg(s, rt);
1428
1429     if (isread) {
1430         if (ri->type & ARM_CP_CONST) {
1431             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1432         } else if (ri->readfn) {
1433             TCGv_ptr tmpptr;
1434             tmpptr = tcg_const_ptr(ri);
1435             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1436             tcg_temp_free_ptr(tmpptr);
1437         } else {
1438             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1439         }
1440     } else {
1441         if (ri->type & ARM_CP_CONST) {
1442             /* If not forbidden by access permissions, treat as WI */
1443             return;
1444         } else if (ri->writefn) {
1445             TCGv_ptr tmpptr;
1446             tmpptr = tcg_const_ptr(ri);
1447             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1448             tcg_temp_free_ptr(tmpptr);
1449         } else {
1450             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1451         }
1452     }
1453
1454     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1455         /* I/O operations must end the TB here (whether read or write) */
1456         gen_io_end();
1457         s->is_jmp = DISAS_UPDATE;
1458     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1459         /* We default to ending the TB on a coprocessor register write,
1460          * but allow this to be suppressed by the register definition
1461          * (usually only necessary to work around guest bugs).
1462          */
1463         s->is_jmp = DISAS_UPDATE;
1464     }
1465 }
1466
1467 /* C3.2.4 System
1468  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1469  * +---------------------+---+-----+-----+-------+-------+-----+------+
1470  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1471  * +---------------------+---+-----+-----+-------+-------+-----+------+
1472  */
1473 static void disas_system(DisasContext *s, uint32_t insn)
1474 {
1475     unsigned int l, op0, op1, crn, crm, op2, rt;
1476     l = extract32(insn, 21, 1);
1477     op0 = extract32(insn, 19, 2);
1478     op1 = extract32(insn, 16, 3);
1479     crn = extract32(insn, 12, 4);
1480     crm = extract32(insn, 8, 4);
1481     op2 = extract32(insn, 5, 3);
1482     rt = extract32(insn, 0, 5);
1483
1484     if (op0 == 0) {
1485         if (l || rt != 31) {
1486             unallocated_encoding(s);
1487             return;
1488         }
1489         switch (crn) {
1490         case 2: /* C5.6.68 HINT */
1491             handle_hint(s, insn, op1, op2, crm);
1492             break;
1493         case 3: /* CLREX, DSB, DMB, ISB */
1494             handle_sync(s, insn, op1, op2, crm);
1495             break;
1496         case 4: /* C5.6.130 MSR (immediate) */
1497             handle_msr_i(s, insn, op1, op2, crm);
1498             break;
1499         default:
1500             unallocated_encoding(s);
1501             break;
1502         }
1503         return;
1504     }
1505     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1506 }
1507
1508 /* C3.2.3 Exception generation
1509  *
1510  *  31             24 23 21 20                     5 4   2 1  0
1511  * +-----------------+-----+------------------------+-----+----+
1512  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1513  * +-----------------------+------------------------+----------+
1514  */
1515 static void disas_exc(DisasContext *s, uint32_t insn)
1516 {
1517     int opc = extract32(insn, 21, 3);
1518     int op2_ll = extract32(insn, 0, 5);
1519     int imm16 = extract32(insn, 5, 16);
1520     TCGv_i32 tmp;
1521
1522     switch (opc) {
1523     case 0:
1524         /* For SVC, HVC and SMC we advance the single-step state
1525          * machine before taking the exception. This is architecturally
1526          * mandated, to ensure that single-stepping a system call
1527          * instruction works properly.
1528          */
1529         switch (op2_ll) {
1530         case 1:
1531             gen_ss_advance(s);
1532             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1533                                default_exception_el(s));
1534             break;
1535         case 2:
1536             if (s->current_el == 0) {
1537                 unallocated_encoding(s);
1538                 break;
1539             }
1540             /* The pre HVC helper handles cases when HVC gets trapped
1541              * as an undefined insn by runtime configuration.
1542              */
1543             gen_a64_set_pc_im(s->pc - 4);
1544             gen_helper_pre_hvc(cpu_env);
1545             gen_ss_advance(s);
1546             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1547             break;
1548         case 3:
1549             if (s->current_el == 0) {
1550                 unallocated_encoding(s);
1551                 break;
1552             }
1553             gen_a64_set_pc_im(s->pc - 4);
1554             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1555             gen_helper_pre_smc(cpu_env, tmp);
1556             tcg_temp_free_i32(tmp);
1557             gen_ss_advance(s);
1558             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1559             break;
1560         default:
1561             unallocated_encoding(s);
1562             break;
1563         }
1564         break;
1565     case 1:
1566         if (op2_ll != 0) {
1567             unallocated_encoding(s);
1568             break;
1569         }
1570         /* BRK */
1571         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1572                            default_exception_el(s));
1573         break;
1574     case 2:
1575         if (op2_ll != 0) {
1576             unallocated_encoding(s);
1577             break;
1578         }
1579         /* HLT. This has two purposes.
1580          * Architecturally, it is an external halting debug instruction.
1581          * Since QEMU doesn't implement external debug, we treat this as
1582          * it is required for halting debug disabled: it will UNDEF.
1583          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1584          */
1585         if (semihosting_enabled() && imm16 == 0xf000) {
1586 #ifndef CONFIG_USER_ONLY
1587             /* In system mode, don't allow userspace access to semihosting,
1588              * to provide some semblance of security (and for consistency
1589              * with our 32-bit semihosting).
1590              */
1591             if (s->current_el == 0) {
1592                 unsupported_encoding(s, insn);
1593                 break;
1594             }
1595 #endif
1596             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1597         } else {
1598             unsupported_encoding(s, insn);
1599         }
1600         break;
1601     case 5:
1602         if (op2_ll < 1 || op2_ll > 3) {
1603             unallocated_encoding(s);
1604             break;
1605         }
1606         /* DCPS1, DCPS2, DCPS3 */
1607         unsupported_encoding(s, insn);
1608         break;
1609     default:
1610         unallocated_encoding(s);
1611         break;
1612     }
1613 }
1614
1615 /* C3.2.7 Unconditional branch (register)
1616  *  31           25 24   21 20   16 15   10 9    5 4     0
1617  * +---------------+-------+-------+-------+------+-------+
1618  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1619  * +---------------+-------+-------+-------+------+-------+
1620  */
1621 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1622 {
1623     unsigned int opc, op2, op3, rn, op4;
1624
1625     opc = extract32(insn, 21, 4);
1626     op2 = extract32(insn, 16, 5);
1627     op3 = extract32(insn, 10, 6);
1628     rn = extract32(insn, 5, 5);
1629     op4 = extract32(insn, 0, 5);
1630
1631     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1632         unallocated_encoding(s);
1633         return;
1634     }
1635
1636     switch (opc) {
1637     case 0: /* BR */
1638     case 2: /* RET */
1639         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1640         break;
1641     case 1: /* BLR */
1642         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1643         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1644         break;
1645     case 4: /* ERET */
1646         if (s->current_el == 0) {
1647             unallocated_encoding(s);
1648             return;
1649         }
1650         gen_helper_exception_return(cpu_env);
1651         s->is_jmp = DISAS_JUMP;
1652         return;
1653     case 5: /* DRPS */
1654         if (rn != 0x1f) {
1655             unallocated_encoding(s);
1656         } else {
1657             unsupported_encoding(s, insn);
1658         }
1659         return;
1660     default:
1661         unallocated_encoding(s);
1662         return;
1663     }
1664
1665     s->is_jmp = DISAS_JUMP;
1666 }
1667
1668 /* C3.2 Branches, exception generating and system instructions */
1669 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1670 {
1671     switch (extract32(insn, 25, 7)) {
1672     case 0x0a: case 0x0b:
1673     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1674         disas_uncond_b_imm(s, insn);
1675         break;
1676     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1677         disas_comp_b_imm(s, insn);
1678         break;
1679     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1680         disas_test_b_imm(s, insn);
1681         break;
1682     case 0x2a: /* Conditional branch (immediate) */
1683         disas_cond_b_imm(s, insn);
1684         break;
1685     case 0x6a: /* Exception generation / System */
1686         if (insn & (1 << 24)) {
1687             disas_system(s, insn);
1688         } else {
1689             disas_exc(s, insn);
1690         }
1691         break;
1692     case 0x6b: /* Unconditional branch (register) */
1693         disas_uncond_b_reg(s, insn);
1694         break;
1695     default:
1696         unallocated_encoding(s);
1697         break;
1698     }
1699 }
1700
1701 /*
1702  * Load/Store exclusive instructions are implemented by remembering
1703  * the value/address loaded, and seeing if these are the same
1704  * when the store is performed. This is not actually the architecturally
1705  * mandated semantics, but it works for typical guest code sequences
1706  * and avoids having to monitor regular stores.
1707  *
1708  * In system emulation mode only one CPU will be running at once, so
1709  * this sequence is effectively atomic.  In user emulation mode we
1710  * throw an exception and handle the atomic operation elsewhere.
1711  */
1712 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1713                                TCGv_i64 addr, int size, bool is_pair)
1714 {
1715     TCGv_i64 tmp = tcg_temp_new_i64();
1716     TCGMemOp memop = s->be_data + size;
1717
1718     g_assert(size <= 3);
1719     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1720
1721     if (is_pair) {
1722         TCGv_i64 addr2 = tcg_temp_new_i64();
1723         TCGv_i64 hitmp = tcg_temp_new_i64();
1724
1725         g_assert(size >= 2);
1726         tcg_gen_addi_i64(addr2, addr, 1 << size);
1727         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1728         tcg_temp_free_i64(addr2);
1729         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1730         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1731         tcg_temp_free_i64(hitmp);
1732     }
1733
1734     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1735     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1736
1737     tcg_temp_free_i64(tmp);
1738     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1739 }
1740
1741 #ifdef CONFIG_USER_ONLY
1742 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1743                                 TCGv_i64 addr, int size, int is_pair)
1744 {
1745     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1746     tcg_gen_movi_i32(cpu_exclusive_info,
1747                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1748     gen_exception_internal_insn(s, 4, EXCP_STREX);
1749 }
1750 #else
1751 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1752                                 TCGv_i64 inaddr, int size, int is_pair)
1753 {
1754     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1755      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1756      *     [addr] = {Rt};
1757      *     if (is_pair) {
1758      *         [addr + datasize] = {Rt2};
1759      *     }
1760      *     {Rd} = 0;
1761      * } else {
1762      *     {Rd} = 1;
1763      * }
1764      * env->exclusive_addr = -1;
1765      */
1766     TCGLabel *fail_label = gen_new_label();
1767     TCGLabel *done_label = gen_new_label();
1768     TCGv_i64 addr = tcg_temp_local_new_i64();
1769     TCGv_i64 tmp;
1770
1771     /* Copy input into a local temp so it is not trashed when the
1772      * basic block ends at the branch insn.
1773      */
1774     tcg_gen_mov_i64(addr, inaddr);
1775     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1776
1777     tmp = tcg_temp_new_i64();
1778     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), s->be_data + size);
1779     tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1780     tcg_temp_free_i64(tmp);
1781
1782     if (is_pair) {
1783         TCGv_i64 addrhi = tcg_temp_new_i64();
1784         TCGv_i64 tmphi = tcg_temp_new_i64();
1785
1786         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1787         tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s),
1788                             s->be_data + size);
1789         tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1790
1791         tcg_temp_free_i64(tmphi);
1792         tcg_temp_free_i64(addrhi);
1793     }
1794
1795     /* We seem to still have the exclusive monitor, so do the store */
1796     tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s),
1797                         s->be_data + size);
1798     if (is_pair) {
1799         TCGv_i64 addrhi = tcg_temp_new_i64();
1800
1801         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1802         tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1803                             get_mem_index(s), s->be_data + size);
1804         tcg_temp_free_i64(addrhi);
1805     }
1806
1807     tcg_temp_free_i64(addr);
1808
1809     tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1810     tcg_gen_br(done_label);
1811     gen_set_label(fail_label);
1812     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1813     gen_set_label(done_label);
1814     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1815
1816 }
1817 #endif
1818
1819 /* C3.3.6 Load/store exclusive
1820  *
1821  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1822  * +-----+-------------+----+---+----+------+----+-------+------+------+
1823  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1824  * +-----+-------------+----+---+----+------+----+-------+------+------+
1825  *
1826  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1827  *   L: 0 -> store, 1 -> load
1828  *  o2: 0 -> exclusive, 1 -> not
1829  *  o1: 0 -> single register, 1 -> register pair
1830  *  o0: 1 -> load-acquire/store-release, 0 -> not
1831  */
1832 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1833 {
1834     int rt = extract32(insn, 0, 5);
1835     int rn = extract32(insn, 5, 5);
1836     int rt2 = extract32(insn, 10, 5);
1837     int is_lasr = extract32(insn, 15, 1);
1838     int rs = extract32(insn, 16, 5);
1839     int is_pair = extract32(insn, 21, 1);
1840     int is_store = !extract32(insn, 22, 1);
1841     int is_excl = !extract32(insn, 23, 1);
1842     int size = extract32(insn, 30, 2);
1843     TCGv_i64 tcg_addr;
1844
1845     if ((!is_excl && !is_pair && !is_lasr) ||
1846         (!is_excl && is_pair) ||
1847         (is_pair && size < 2)) {
1848         unallocated_encoding(s);
1849         return;
1850     }
1851
1852     if (rn == 31) {
1853         gen_check_sp_alignment(s);
1854     }
1855     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1856
1857     /* Note that since TCG is single threaded load-acquire/store-release
1858      * semantics require no extra if (is_lasr) { ... } handling.
1859      */
1860
1861     if (is_excl) {
1862         if (!is_store) {
1863             s->is_ldex = true;
1864             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1865         } else {
1866             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1867         }
1868     } else {
1869         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1870         if (is_store) {
1871             do_gpr_st(s, tcg_rt, tcg_addr, size);
1872         } else {
1873             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1874         }
1875     }
1876 }
1877
1878 /*
1879  * C3.3.5 Load register (literal)
1880  *
1881  *  31 30 29   27  26 25 24 23                5 4     0
1882  * +-----+-------+---+-----+-------------------+-------+
1883  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1884  * +-----+-------+---+-----+-------------------+-------+
1885  *
1886  * V: 1 -> vector (simd/fp)
1887  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1888  *                   10-> 32 bit signed, 11 -> prefetch
1889  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1890  */
1891 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1892 {
1893     int rt = extract32(insn, 0, 5);
1894     int64_t imm = sextract32(insn, 5, 19) << 2;
1895     bool is_vector = extract32(insn, 26, 1);
1896     int opc = extract32(insn, 30, 2);
1897     bool is_signed = false;
1898     int size = 2;
1899     TCGv_i64 tcg_rt, tcg_addr;
1900
1901     if (is_vector) {
1902         if (opc == 3) {
1903             unallocated_encoding(s);
1904             return;
1905         }
1906         size = 2 + opc;
1907         if (!fp_access_check(s)) {
1908             return;
1909         }
1910     } else {
1911         if (opc == 3) {
1912             /* PRFM (literal) : prefetch */
1913             return;
1914         }
1915         size = 2 + extract32(opc, 0, 1);
1916         is_signed = extract32(opc, 1, 1);
1917     }
1918
1919     tcg_rt = cpu_reg(s, rt);
1920
1921     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1922     if (is_vector) {
1923         do_fp_ld(s, rt, tcg_addr, size);
1924     } else {
1925         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1926     }
1927     tcg_temp_free_i64(tcg_addr);
1928 }
1929
1930 /*
1931  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1932  * C5.6.81 LDP (Load Pair - non vector)
1933  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1934  * C5.6.176 STNP (Store Pair - non-temporal hint)
1935  * C5.6.177 STP (Store Pair - non vector)
1936  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1937  * C6.3.165 LDP (Load Pair of SIMD&FP)
1938  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1939  * C6.3.284 STP (Store Pair of SIMD&FP)
1940  *
1941  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1942  * +-----+-------+---+---+-------+---+-----------------------------+
1943  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1944  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1945  *
1946  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1947  *      LDPSW                    01
1948  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1949  *   V: 0 -> GPR, 1 -> Vector
1950  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1951  *      10 -> signed offset, 11 -> pre-index
1952  *   L: 0 -> Store 1 -> Load
1953  *
1954  * Rt, Rt2 = GPR or SIMD registers to be stored
1955  * Rn = general purpose register containing address
1956  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1957  */
1958 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1959 {
1960     int rt = extract32(insn, 0, 5);
1961     int rn = extract32(insn, 5, 5);
1962     int rt2 = extract32(insn, 10, 5);
1963     uint64_t offset = sextract64(insn, 15, 7);
1964     int index = extract32(insn, 23, 2);
1965     bool is_vector = extract32(insn, 26, 1);
1966     bool is_load = extract32(insn, 22, 1);
1967     int opc = extract32(insn, 30, 2);
1968
1969     bool is_signed = false;
1970     bool postindex = false;
1971     bool wback = false;
1972
1973     TCGv_i64 tcg_addr; /* calculated address */
1974     int size;
1975
1976     if (opc == 3) {
1977         unallocated_encoding(s);
1978         return;
1979     }
1980
1981     if (is_vector) {
1982         size = 2 + opc;
1983     } else {
1984         size = 2 + extract32(opc, 1, 1);
1985         is_signed = extract32(opc, 0, 1);
1986         if (!is_load && is_signed) {
1987             unallocated_encoding(s);
1988             return;
1989         }
1990     }
1991
1992     switch (index) {
1993     case 1: /* post-index */
1994         postindex = true;
1995         wback = true;
1996         break;
1997     case 0:
1998         /* signed offset with "non-temporal" hint. Since we don't emulate
1999          * caches we don't care about hints to the cache system about
2000          * data access patterns, and handle this identically to plain
2001          * signed offset.
2002          */
2003         if (is_signed) {
2004             /* There is no non-temporal-hint version of LDPSW */
2005             unallocated_encoding(s);
2006             return;
2007         }
2008         postindex = false;
2009         break;
2010     case 2: /* signed offset, rn not updated */
2011         postindex = false;
2012         break;
2013     case 3: /* pre-index */
2014         postindex = false;
2015         wback = true;
2016         break;
2017     }
2018
2019     if (is_vector && !fp_access_check(s)) {
2020         return;
2021     }
2022
2023     offset <<= size;
2024
2025     if (rn == 31) {
2026         gen_check_sp_alignment(s);
2027     }
2028
2029     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2030
2031     if (!postindex) {
2032         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2033     }
2034
2035     if (is_vector) {
2036         if (is_load) {
2037             do_fp_ld(s, rt, tcg_addr, size);
2038         } else {
2039             do_fp_st(s, rt, tcg_addr, size);
2040         }
2041     } else {
2042         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2043         if (is_load) {
2044             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2045         } else {
2046             do_gpr_st(s, tcg_rt, tcg_addr, size);
2047         }
2048     }
2049     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2050     if (is_vector) {
2051         if (is_load) {
2052             do_fp_ld(s, rt2, tcg_addr, size);
2053         } else {
2054             do_fp_st(s, rt2, tcg_addr, size);
2055         }
2056     } else {
2057         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2058         if (is_load) {
2059             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2060         } else {
2061             do_gpr_st(s, tcg_rt2, tcg_addr, size);
2062         }
2063     }
2064
2065     if (wback) {
2066         if (postindex) {
2067             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2068         } else {
2069             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2070         }
2071         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2072     }
2073 }
2074
2075 /*
2076  * C3.3.8 Load/store (immediate post-indexed)
2077  * C3.3.9 Load/store (immediate pre-indexed)
2078  * C3.3.12 Load/store (unscaled immediate)
2079  *
2080  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2081  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2082  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2083  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2084  *
2085  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2086          10 -> unprivileged
2087  * V = 0 -> non-vector
2088  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2089  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2090  */
2091 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2092                                 int opc,
2093                                 int size,
2094                                 int rt,
2095                                 bool is_vector)
2096 {
2097     int rn = extract32(insn, 5, 5);
2098     int imm9 = sextract32(insn, 12, 9);
2099     int idx = extract32(insn, 10, 2);
2100     bool is_signed = false;
2101     bool is_store = false;
2102     bool is_extended = false;
2103     bool is_unpriv = (idx == 2);
2104     bool post_index;
2105     bool writeback;
2106
2107     TCGv_i64 tcg_addr;
2108
2109     if (is_vector) {
2110         size |= (opc & 2) << 1;
2111         if (size > 4 || is_unpriv) {
2112             unallocated_encoding(s);
2113             return;
2114         }
2115         is_store = ((opc & 1) == 0);
2116         if (!fp_access_check(s)) {
2117             return;
2118         }
2119     } else {
2120         if (size == 3 && opc == 2) {
2121             /* PRFM - prefetch */
2122             if (is_unpriv) {
2123                 unallocated_encoding(s);
2124                 return;
2125             }
2126             return;
2127         }
2128         if (opc == 3 && size > 1) {
2129             unallocated_encoding(s);
2130             return;
2131         }
2132         is_store = (opc == 0);
2133         is_signed = extract32(opc, 1, 1);
2134         is_extended = (size < 3) && extract32(opc, 0, 1);
2135     }
2136
2137     switch (idx) {
2138     case 0:
2139     case 2:
2140         post_index = false;
2141         writeback = false;
2142         break;
2143     case 1:
2144         post_index = true;
2145         writeback = true;
2146         break;
2147     case 3:
2148         post_index = false;
2149         writeback = true;
2150         break;
2151     }
2152
2153     if (rn == 31) {
2154         gen_check_sp_alignment(s);
2155     }
2156     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2157
2158     if (!post_index) {
2159         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2160     }
2161
2162     if (is_vector) {
2163         if (is_store) {
2164             do_fp_st(s, rt, tcg_addr, size);
2165         } else {
2166             do_fp_ld(s, rt, tcg_addr, size);
2167         }
2168     } else {
2169         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2170         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2171
2172         if (is_store) {
2173             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2174         } else {
2175             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2176                              is_signed, is_extended, memidx);
2177         }
2178     }
2179
2180     if (writeback) {
2181         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2182         if (post_index) {
2183             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2184         }
2185         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2186     }
2187 }
2188
2189 /*
2190  * C3.3.10 Load/store (register offset)
2191  *
2192  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2193  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2194  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2195  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2196  *
2197  * For non-vector:
2198  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2199  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2200  * For vector:
2201  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2202  *   opc<0>: 0 -> store, 1 -> load
2203  * V: 1 -> vector/simd
2204  * opt: extend encoding (see DecodeRegExtend)
2205  * S: if S=1 then scale (essentially index by sizeof(size))
2206  * Rt: register to transfer into/out of
2207  * Rn: address register or SP for base
2208  * Rm: offset register or ZR for offset
2209  */
2210 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2211                                    int opc,
2212                                    int size,
2213                                    int rt,
2214                                    bool is_vector)
2215 {
2216     int rn = extract32(insn, 5, 5);
2217     int shift = extract32(insn, 12, 1);
2218     int rm = extract32(insn, 16, 5);
2219     int opt = extract32(insn, 13, 3);
2220     bool is_signed = false;
2221     bool is_store = false;
2222     bool is_extended = false;
2223
2224     TCGv_i64 tcg_rm;
2225     TCGv_i64 tcg_addr;
2226
2227     if (extract32(opt, 1, 1) == 0) {
2228         unallocated_encoding(s);
2229         return;
2230     }
2231
2232     if (is_vector) {
2233         size |= (opc & 2) << 1;
2234         if (size > 4) {
2235             unallocated_encoding(s);
2236             return;
2237         }
2238         is_store = !extract32(opc, 0, 1);
2239         if (!fp_access_check(s)) {
2240             return;
2241         }
2242     } else {
2243         if (size == 3 && opc == 2) {
2244             /* PRFM - prefetch */
2245             return;
2246         }
2247         if (opc == 3 && size > 1) {
2248             unallocated_encoding(s);
2249             return;
2250         }
2251         is_store = (opc == 0);
2252         is_signed = extract32(opc, 1, 1);
2253         is_extended = (size < 3) && extract32(opc, 0, 1);
2254     }
2255
2256     if (rn == 31) {
2257         gen_check_sp_alignment(s);
2258     }
2259     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2260
2261     tcg_rm = read_cpu_reg(s, rm, 1);
2262     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2263
2264     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2265
2266     if (is_vector) {
2267         if (is_store) {
2268             do_fp_st(s, rt, tcg_addr, size);
2269         } else {
2270             do_fp_ld(s, rt, tcg_addr, size);
2271         }
2272     } else {
2273         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2274         if (is_store) {
2275             do_gpr_st(s, tcg_rt, tcg_addr, size);
2276         } else {
2277             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2278         }
2279     }
2280 }
2281
2282 /*
2283  * C3.3.13 Load/store (unsigned immediate)
2284  *
2285  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2286  * +----+-------+---+-----+-----+------------+-------+------+
2287  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2288  * +----+-------+---+-----+-----+------------+-------+------+
2289  *
2290  * For non-vector:
2291  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2292  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2293  * For vector:
2294  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2295  *   opc<0>: 0 -> store, 1 -> load
2296  * Rn: base address register (inc SP)
2297  * Rt: target register
2298  */
2299 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2300                                         int opc,
2301                                         int size,
2302                                         int rt,
2303                                         bool is_vector)
2304 {
2305     int rn = extract32(insn, 5, 5);
2306     unsigned int imm12 = extract32(insn, 10, 12);
2307     unsigned int offset;
2308
2309     TCGv_i64 tcg_addr;
2310
2311     bool is_store;
2312     bool is_signed = false;
2313     bool is_extended = false;
2314
2315     if (is_vector) {
2316         size |= (opc & 2) << 1;
2317         if (size > 4) {
2318             unallocated_encoding(s);
2319             return;
2320         }
2321         is_store = !extract32(opc, 0, 1);
2322         if (!fp_access_check(s)) {
2323             return;
2324         }
2325     } else {
2326         if (size == 3 && opc == 2) {
2327             /* PRFM - prefetch */
2328             return;
2329         }
2330         if (opc == 3 && size > 1) {
2331             unallocated_encoding(s);
2332             return;
2333         }
2334         is_store = (opc == 0);
2335         is_signed = extract32(opc, 1, 1);
2336         is_extended = (size < 3) && extract32(opc, 0, 1);
2337     }
2338
2339     if (rn == 31) {
2340         gen_check_sp_alignment(s);
2341     }
2342     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2343     offset = imm12 << size;
2344     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2345
2346     if (is_vector) {
2347         if (is_store) {
2348             do_fp_st(s, rt, tcg_addr, size);
2349         } else {
2350             do_fp_ld(s, rt, tcg_addr, size);
2351         }
2352     } else {
2353         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2354         if (is_store) {
2355             do_gpr_st(s, tcg_rt, tcg_addr, size);
2356         } else {
2357             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2358         }
2359     }
2360 }
2361
2362 /* Load/store register (all forms) */
2363 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2364 {
2365     int rt = extract32(insn, 0, 5);
2366     int opc = extract32(insn, 22, 2);
2367     bool is_vector = extract32(insn, 26, 1);
2368     int size = extract32(insn, 30, 2);
2369
2370     switch (extract32(insn, 24, 2)) {
2371     case 0:
2372         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2373             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2374         } else {
2375             /* Load/store register (unscaled immediate)
2376              * Load/store immediate pre/post-indexed
2377              * Load/store register unprivileged
2378              */
2379             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2380         }
2381         break;
2382     case 1:
2383         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2384         break;
2385     default:
2386         unallocated_encoding(s);
2387         break;
2388     }
2389 }
2390
2391 /* C3.3.1 AdvSIMD load/store multiple structures
2392  *
2393  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2394  * +---+---+---------------+---+-------------+--------+------+------+------+
2395  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2396  * +---+---+---------------+---+-------------+--------+------+------+------+
2397  *
2398  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2399  *
2400  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2401  * +---+---+---------------+---+---+---------+--------+------+------+------+
2402  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2403  * +---+---+---------------+---+---+---------+--------+------+------+------+
2404  *
2405  * Rt: first (or only) SIMD&FP register to be transferred
2406  * Rn: base address or SP
2407  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2408  */
2409 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2410 {
2411     int rt = extract32(insn, 0, 5);
2412     int rn = extract32(insn, 5, 5);
2413     int size = extract32(insn, 10, 2);
2414     int opcode = extract32(insn, 12, 4);
2415     bool is_store = !extract32(insn, 22, 1);
2416     bool is_postidx = extract32(insn, 23, 1);
2417     bool is_q = extract32(insn, 30, 1);
2418     TCGv_i64 tcg_addr, tcg_rn;
2419
2420     int ebytes = 1 << size;
2421     int elements = (is_q ? 128 : 64) / (8 << size);
2422     int rpt;    /* num iterations */
2423     int selem;  /* structure elements */
2424     int r;
2425
2426     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2427         unallocated_encoding(s);
2428         return;
2429     }
2430
2431     /* From the shared decode logic */
2432     switch (opcode) {
2433     case 0x0:
2434         rpt = 1;
2435         selem = 4;
2436         break;
2437     case 0x2:
2438         rpt = 4;
2439         selem = 1;
2440         break;
2441     case 0x4:
2442         rpt = 1;
2443         selem = 3;
2444         break;
2445     case 0x6:
2446         rpt = 3;
2447         selem = 1;
2448         break;
2449     case 0x7:
2450         rpt = 1;
2451         selem = 1;
2452         break;
2453     case 0x8:
2454         rpt = 1;
2455         selem = 2;
2456         break;
2457     case 0xa:
2458         rpt = 2;
2459         selem = 1;
2460         break;
2461     default:
2462         unallocated_encoding(s);
2463         return;
2464     }
2465
2466     if (size == 3 && !is_q && selem != 1) {
2467         /* reserved */
2468         unallocated_encoding(s);
2469         return;
2470     }
2471
2472     if (!fp_access_check(s)) {
2473         return;
2474     }
2475
2476     if (rn == 31) {
2477         gen_check_sp_alignment(s);
2478     }
2479
2480     tcg_rn = cpu_reg_sp(s, rn);
2481     tcg_addr = tcg_temp_new_i64();
2482     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2483
2484     for (r = 0; r < rpt; r++) {
2485         int e;
2486         for (e = 0; e < elements; e++) {
2487             int tt = (rt + r) % 32;
2488             int xs;
2489             for (xs = 0; xs < selem; xs++) {
2490                 if (is_store) {
2491                     do_vec_st(s, tt, e, tcg_addr, size);
2492                 } else {
2493                     do_vec_ld(s, tt, e, tcg_addr, size);
2494
2495                     /* For non-quad operations, setting a slice of the low
2496                      * 64 bits of the register clears the high 64 bits (in
2497                      * the ARM ARM pseudocode this is implicit in the fact
2498                      * that 'rval' is a 64 bit wide variable). We optimize
2499                      * by noticing that we only need to do this the first
2500                      * time we touch a register.
2501                      */
2502                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2503                         clear_vec_high(s, tt);
2504                     }
2505                 }
2506                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2507                 tt = (tt + 1) % 32;
2508             }
2509         }
2510     }
2511
2512     if (is_postidx) {
2513         int rm = extract32(insn, 16, 5);
2514         if (rm == 31) {
2515             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2516         } else {
2517             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2518         }
2519     }
2520     tcg_temp_free_i64(tcg_addr);
2521 }
2522
2523 /* C3.3.3 AdvSIMD load/store single structure
2524  *
2525  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2526  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2527  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2528  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2529  *
2530  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2531  *
2532  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2533  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2534  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2535  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2536  *
2537  * Rt: first (or only) SIMD&FP register to be transferred
2538  * Rn: base address or SP
2539  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2540  * index = encoded in Q:S:size dependent on size
2541  *
2542  * lane_size = encoded in R, opc
2543  * transfer width = encoded in opc, S, size
2544  */
2545 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2546 {
2547     int rt = extract32(insn, 0, 5);
2548     int rn = extract32(insn, 5, 5);
2549     int size = extract32(insn, 10, 2);
2550     int S = extract32(insn, 12, 1);
2551     int opc = extract32(insn, 13, 3);
2552     int R = extract32(insn, 21, 1);
2553     int is_load = extract32(insn, 22, 1);
2554     int is_postidx = extract32(insn, 23, 1);
2555     int is_q = extract32(insn, 30, 1);
2556
2557     int scale = extract32(opc, 1, 2);
2558     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2559     bool replicate = false;
2560     int index = is_q << 3 | S << 2 | size;
2561     int ebytes, xs;
2562     TCGv_i64 tcg_addr, tcg_rn;
2563
2564     switch (scale) {
2565     case 3:
2566         if (!is_load || S) {
2567             unallocated_encoding(s);
2568             return;
2569         }
2570         scale = size;
2571         replicate = true;
2572         break;
2573     case 0:
2574         break;
2575     case 1:
2576         if (extract32(size, 0, 1)) {
2577             unallocated_encoding(s);
2578             return;
2579         }
2580         index >>= 1;
2581         break;
2582     case 2:
2583         if (extract32(size, 1, 1)) {
2584             unallocated_encoding(s);
2585             return;
2586         }
2587         if (!extract32(size, 0, 1)) {
2588             index >>= 2;
2589         } else {
2590             if (S) {
2591                 unallocated_encoding(s);
2592                 return;
2593             }
2594             index >>= 3;
2595             scale = 3;
2596         }
2597         break;
2598     default:
2599         g_assert_not_reached();
2600     }
2601
2602     if (!fp_access_check(s)) {
2603         return;
2604     }
2605
2606     ebytes = 1 << scale;
2607
2608     if (rn == 31) {
2609         gen_check_sp_alignment(s);
2610     }
2611
2612     tcg_rn = cpu_reg_sp(s, rn);
2613     tcg_addr = tcg_temp_new_i64();
2614     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2615
2616     for (xs = 0; xs < selem; xs++) {
2617         if (replicate) {
2618             /* Load and replicate to all elements */
2619             uint64_t mulconst;
2620             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2621
2622             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2623                                 get_mem_index(s), s->be_data + scale);
2624             switch (scale) {
2625             case 0:
2626                 mulconst = 0x0101010101010101ULL;
2627                 break;
2628             case 1:
2629                 mulconst = 0x0001000100010001ULL;
2630                 break;
2631             case 2:
2632                 mulconst = 0x0000000100000001ULL;
2633                 break;
2634             case 3:
2635                 mulconst = 0;
2636                 break;
2637             default:
2638                 g_assert_not_reached();
2639             }
2640             if (mulconst) {
2641                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2642             }
2643             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2644             if (is_q) {
2645                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2646             } else {
2647                 clear_vec_high(s, rt);
2648             }
2649             tcg_temp_free_i64(tcg_tmp);
2650         } else {
2651             /* Load/store one element per register */
2652             if (is_load) {
2653                 do_vec_ld(s, rt, index, tcg_addr, s->be_data + scale);
2654             } else {
2655                 do_vec_st(s, rt, index, tcg_addr, s->be_data + scale);
2656             }
2657         }
2658         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2659         rt = (rt + 1) % 32;
2660     }
2661
2662     if (is_postidx) {
2663         int rm = extract32(insn, 16, 5);
2664         if (rm == 31) {
2665             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2666         } else {
2667             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2668         }
2669     }
2670     tcg_temp_free_i64(tcg_addr);
2671 }
2672
2673 /* C3.3 Loads and stores */
2674 static void disas_ldst(DisasContext *s, uint32_t insn)
2675 {
2676     switch (extract32(insn, 24, 6)) {
2677     case 0x08: /* Load/store exclusive */
2678         disas_ldst_excl(s, insn);
2679         break;
2680     case 0x18: case 0x1c: /* Load register (literal) */
2681         disas_ld_lit(s, insn);
2682         break;
2683     case 0x28: case 0x29:
2684     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2685         disas_ldst_pair(s, insn);
2686         break;
2687     case 0x38: case 0x39:
2688     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2689         disas_ldst_reg(s, insn);
2690         break;
2691     case 0x0c: /* AdvSIMD load/store multiple structures */
2692         disas_ldst_multiple_struct(s, insn);
2693         break;
2694     case 0x0d: /* AdvSIMD load/store single structure */
2695         disas_ldst_single_struct(s, insn);
2696         break;
2697     default:
2698         unallocated_encoding(s);
2699         break;
2700     }
2701 }
2702
2703 /* C3.4.6 PC-rel. addressing
2704  *   31  30   29 28       24 23                5 4    0
2705  * +----+-------+-----------+-------------------+------+
2706  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2707  * +----+-------+-----------+-------------------+------+
2708  */
2709 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2710 {
2711     unsigned int page, rd;
2712     uint64_t base;
2713     uint64_t offset;
2714
2715     page = extract32(insn, 31, 1);
2716     /* SignExtend(immhi:immlo) -> offset */
2717     offset = sextract64(insn, 5, 19);
2718     offset = offset << 2 | extract32(insn, 29, 2);
2719     rd = extract32(insn, 0, 5);
2720     base = s->pc - 4;
2721
2722     if (page) {
2723         /* ADRP (page based) */
2724         base &= ~0xfff;
2725         offset <<= 12;
2726     }
2727
2728     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2729 }
2730
2731 /*
2732  * C3.4.1 Add/subtract (immediate)
2733  *
2734  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2735  * +--+--+--+-----------+-----+-------------+-----+-----+
2736  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2737  * +--+--+--+-----------+-----+-------------+-----+-----+
2738  *
2739  *    sf: 0 -> 32bit, 1 -> 64bit
2740  *    op: 0 -> add  , 1 -> sub
2741  *     S: 1 -> set flags
2742  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2743  */
2744 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2745 {
2746     int rd = extract32(insn, 0, 5);
2747     int rn = extract32(insn, 5, 5);
2748     uint64_t imm = extract32(insn, 10, 12);
2749     int shift = extract32(insn, 22, 2);
2750     bool setflags = extract32(insn, 29, 1);
2751     bool sub_op = extract32(insn, 30, 1);
2752     bool is_64bit = extract32(insn, 31, 1);
2753
2754     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2755     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2756     TCGv_i64 tcg_result;
2757
2758     switch (shift) {
2759     case 0x0:
2760         break;
2761     case 0x1:
2762         imm <<= 12;
2763         break;
2764     default:
2765         unallocated_encoding(s);
2766         return;
2767     }
2768
2769     tcg_result = tcg_temp_new_i64();
2770     if (!setflags) {
2771         if (sub_op) {
2772             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2773         } else {
2774             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2775         }
2776     } else {
2777         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2778         if (sub_op) {
2779             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2780         } else {
2781             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2782         }
2783         tcg_temp_free_i64(tcg_imm);
2784     }
2785
2786     if (is_64bit) {
2787         tcg_gen_mov_i64(tcg_rd, tcg_result);
2788     } else {
2789         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2790     }
2791
2792     tcg_temp_free_i64(tcg_result);
2793 }
2794
2795 /* The input should be a value in the bottom e bits (with higher
2796  * bits zero); returns that value replicated into every element
2797  * of size e in a 64 bit integer.
2798  */
2799 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2800 {
2801     assert(e != 0);
2802     while (e < 64) {
2803         mask |= mask << e;
2804         e *= 2;
2805     }
2806     return mask;
2807 }
2808
2809 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2810 static inline uint64_t bitmask64(unsigned int length)
2811 {
2812     assert(length > 0 && length <= 64);
2813     return ~0ULL >> (64 - length);
2814 }
2815
2816 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2817  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2818  * value (ie should cause a guest UNDEF exception), and true if they are
2819  * valid, in which case the decoded bit pattern is written to result.
2820  */
2821 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2822                                    unsigned int imms, unsigned int immr)
2823 {
2824     uint64_t mask;
2825     unsigned e, levels, s, r;
2826     int len;
2827
2828     assert(immn < 2 && imms < 64 && immr < 64);
2829
2830     /* The bit patterns we create here are 64 bit patterns which
2831      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2832      * 64 bits each. Each element contains the same value: a run
2833      * of between 1 and e-1 non-zero bits, rotated within the
2834      * element by between 0 and e-1 bits.
2835      *
2836      * The element size and run length are encoded into immn (1 bit)
2837      * and imms (6 bits) as follows:
2838      * 64 bit elements: immn = 1, imms = <length of run - 1>
2839      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2840      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2841      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2842      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2843      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2844      * Notice that immn = 0, imms = 11111x is the only combination
2845      * not covered by one of the above options; this is reserved.
2846      * Further, <length of run - 1> all-ones is a reserved pattern.
2847      *
2848      * In all cases the rotation is by immr % e (and immr is 6 bits).
2849      */
2850
2851     /* First determine the element size */
2852     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2853     if (len < 1) {
2854         /* This is the immn == 0, imms == 0x11111x case */
2855         return false;
2856     }
2857     e = 1 << len;
2858
2859     levels = e - 1;
2860     s = imms & levels;
2861     r = immr & levels;
2862
2863     if (s == levels) {
2864         /* <length of run - 1> mustn't be all-ones. */
2865         return false;
2866     }
2867
2868     /* Create the value of one element: s+1 set bits rotated
2869      * by r within the element (which is e bits wide)...
2870      */
2871     mask = bitmask64(s + 1);
2872     if (r) {
2873         mask = (mask >> r) | (mask << (e - r));
2874         mask &= bitmask64(e);
2875     }
2876     /* ...then replicate the element over the whole 64 bit value */
2877     mask = bitfield_replicate(mask, e);
2878     *result = mask;
2879     return true;
2880 }
2881
2882 /* C3.4.4 Logical (immediate)
2883  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2884  * +----+-----+-------------+---+------+------+------+------+
2885  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2886  * +----+-----+-------------+---+------+------+------+------+
2887  */
2888 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2889 {
2890     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2891     TCGv_i64 tcg_rd, tcg_rn;
2892     uint64_t wmask;
2893     bool is_and = false;
2894
2895     sf = extract32(insn, 31, 1);
2896     opc = extract32(insn, 29, 2);
2897     is_n = extract32(insn, 22, 1);
2898     immr = extract32(insn, 16, 6);
2899     imms = extract32(insn, 10, 6);
2900     rn = extract32(insn, 5, 5);
2901     rd = extract32(insn, 0, 5);
2902
2903     if (!sf && is_n) {
2904         unallocated_encoding(s);
2905         return;
2906     }
2907
2908     if (opc == 0x3) { /* ANDS */
2909         tcg_rd = cpu_reg(s, rd);
2910     } else {
2911         tcg_rd = cpu_reg_sp(s, rd);
2912     }
2913     tcg_rn = cpu_reg(s, rn);
2914
2915     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2916         /* some immediate field values are reserved */
2917         unallocated_encoding(s);
2918         return;
2919     }
2920
2921     if (!sf) {
2922         wmask &= 0xffffffff;
2923     }
2924
2925     switch (opc) {
2926     case 0x3: /* ANDS */
2927     case 0x0: /* AND */
2928         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2929         is_and = true;
2930         break;
2931     case 0x1: /* ORR */
2932         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2933         break;
2934     case 0x2: /* EOR */
2935         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2936         break;
2937     default:
2938         assert(FALSE); /* must handle all above */
2939         break;
2940     }
2941
2942     if (!sf && !is_and) {
2943         /* zero extend final result; we know we can skip this for AND
2944          * since the immediate had the high 32 bits clear.
2945          */
2946         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2947     }
2948
2949     if (opc == 3) { /* ANDS */
2950         gen_logic_CC(sf, tcg_rd);
2951     }
2952 }
2953
2954 /*
2955  * C3.4.5 Move wide (immediate)
2956  *
2957  *  31 30 29 28         23 22 21 20             5 4    0
2958  * +--+-----+-------------+-----+----------------+------+
2959  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2960  * +--+-----+-------------+-----+----------------+------+
2961  *
2962  * sf: 0 -> 32 bit, 1 -> 64 bit
2963  * opc: 00 -> N, 10 -> Z, 11 -> K
2964  * hw: shift/16 (0,16, and sf only 32, 48)
2965  */
2966 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2967 {
2968     int rd = extract32(insn, 0, 5);
2969     uint64_t imm = extract32(insn, 5, 16);
2970     int sf = extract32(insn, 31, 1);
2971     int opc = extract32(insn, 29, 2);
2972     int pos = extract32(insn, 21, 2) << 4;
2973     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2974     TCGv_i64 tcg_imm;
2975
2976     if (!sf && (pos >= 32)) {
2977         unallocated_encoding(s);
2978         return;
2979     }
2980
2981     switch (opc) {
2982     case 0: /* MOVN */
2983     case 2: /* MOVZ */
2984         imm <<= pos;
2985         if (opc == 0) {
2986             imm = ~imm;
2987         }
2988         if (!sf) {
2989             imm &= 0xffffffffu;
2990         }
2991         tcg_gen_movi_i64(tcg_rd, imm);
2992         break;
2993     case 3: /* MOVK */
2994         tcg_imm = tcg_const_i64(imm);
2995         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2996         tcg_temp_free_i64(tcg_imm);
2997         if (!sf) {
2998             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2999         }
3000         break;
3001     default:
3002         unallocated_encoding(s);
3003         break;
3004     }
3005 }
3006
3007 /* C3.4.2 Bitfield
3008  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3009  * +----+-----+-------------+---+------+------+------+------+
3010  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3011  * +----+-----+-------------+---+------+------+------+------+
3012  */
3013 static void disas_bitfield(DisasContext *s, uint32_t insn)
3014 {
3015     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3016     TCGv_i64 tcg_rd, tcg_tmp;
3017
3018     sf = extract32(insn, 31, 1);
3019     opc = extract32(insn, 29, 2);
3020     n = extract32(insn, 22, 1);
3021     ri = extract32(insn, 16, 6);
3022     si = extract32(insn, 10, 6);
3023     rn = extract32(insn, 5, 5);
3024     rd = extract32(insn, 0, 5);
3025     bitsize = sf ? 64 : 32;
3026
3027     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3028         unallocated_encoding(s);
3029         return;
3030     }
3031
3032     tcg_rd = cpu_reg(s, rd);
3033
3034     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3035        to be smaller than bitsize, we'll never reference data outside the
3036        low 32-bits anyway.  */
3037     tcg_tmp = read_cpu_reg(s, rn, 1);
3038
3039     /* Recognize the common aliases.  */
3040     if (opc == 0) { /* SBFM */
3041         if (ri == 0) {
3042             if (si == 7) { /* SXTB */
3043                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3044                 goto done;
3045             } else if (si == 15) { /* SXTH */
3046                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3047                 goto done;
3048             } else if (si == 31) { /* SXTW */
3049                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3050                 goto done;
3051             }
3052         }
3053         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3054             if (si == 31) {
3055                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3056             }
3057             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3058             goto done;
3059         }
3060     } else if (opc == 2) { /* UBFM */
3061         if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3062             tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3063             return;
3064         }
3065         if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3066             if (si == 31) {
3067                 tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3068             }
3069             tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3070             return;
3071         }
3072         if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3073             int shift = bitsize - 1 - si;
3074             tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3075             goto done;
3076         }
3077     }
3078
3079     if (opc != 1) { /* SBFM or UBFM */
3080         tcg_gen_movi_i64(tcg_rd, 0);
3081     }
3082
3083     /* do the bit move operation */
3084     if (si >= ri) {
3085         /* Wd<s-r:0> = Wn<s:r> */
3086         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3087         pos = 0;
3088         len = (si - ri) + 1;
3089     } else {
3090         /* Wd<32+s-r,32-r> = Wn<s:0> */
3091         pos = bitsize - ri;
3092         len = si + 1;
3093     }
3094
3095     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3096
3097     if (opc == 0) { /* SBFM - sign extend the destination field */
3098         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3099         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3100     }
3101
3102  done:
3103     if (!sf) { /* zero extend final result */
3104         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3105     }
3106 }
3107
3108 /* C3.4.3 Extract
3109  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3110  * +----+------+-------------+---+----+------+--------+------+------+
3111  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3112  * +----+------+-------------+---+----+------+--------+------+------+
3113  */
3114 static void disas_extract(DisasContext *s, uint32_t insn)
3115 {
3116     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3117
3118     sf = extract32(insn, 31, 1);
3119     n = extract32(insn, 22, 1);
3120     rm = extract32(insn, 16, 5);
3121     imm = extract32(insn, 10, 6);
3122     rn = extract32(insn, 5, 5);
3123     rd = extract32(insn, 0, 5);
3124     op21 = extract32(insn, 29, 2);
3125     op0 = extract32(insn, 21, 1);
3126     bitsize = sf ? 64 : 32;
3127
3128     if (sf != n || op21 || op0 || imm >= bitsize) {
3129         unallocated_encoding(s);
3130     } else {
3131         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3132
3133         tcg_rd = cpu_reg(s, rd);
3134
3135         if (unlikely(imm == 0)) {
3136             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3137              * so an extract from bit 0 is a special case.
3138              */
3139             if (sf) {
3140                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3141             } else {
3142                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3143             }
3144         } else if (rm == rn) { /* ROR */
3145             tcg_rm = cpu_reg(s, rm);
3146             if (sf) {
3147                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3148             } else {
3149                 TCGv_i32 tmp = tcg_temp_new_i32();
3150                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3151                 tcg_gen_rotri_i32(tmp, tmp, imm);
3152                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3153                 tcg_temp_free_i32(tmp);
3154             }
3155         } else {
3156             tcg_rm = read_cpu_reg(s, rm, sf);
3157             tcg_rn = read_cpu_reg(s, rn, sf);
3158             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3159             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3160             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3161             if (!sf) {
3162                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3163             }
3164         }
3165     }
3166 }
3167
3168 /* C3.4 Data processing - immediate */
3169 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3170 {
3171     switch (extract32(insn, 23, 6)) {
3172     case 0x20: case 0x21: /* PC-rel. addressing */
3173         disas_pc_rel_adr(s, insn);
3174         break;
3175     case 0x22: case 0x23: /* Add/subtract (immediate) */
3176         disas_add_sub_imm(s, insn);
3177         break;
3178     case 0x24: /* Logical (immediate) */
3179         disas_logic_imm(s, insn);
3180         break;
3181     case 0x25: /* Move wide (immediate) */
3182         disas_movw_imm(s, insn);
3183         break;
3184     case 0x26: /* Bitfield */
3185         disas_bitfield(s, insn);
3186         break;
3187     case 0x27: /* Extract */
3188         disas_extract(s, insn);
3189         break;
3190     default:
3191         unallocated_encoding(s);
3192         break;
3193     }
3194 }
3195
3196 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3197  * Note that it is the caller's responsibility to ensure that the
3198  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3199  * mandated semantics for out of range shifts.
3200  */
3201 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3202                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3203 {
3204     switch (shift_type) {
3205     case A64_SHIFT_TYPE_LSL:
3206         tcg_gen_shl_i64(dst, src, shift_amount);
3207         break;
3208     case A64_SHIFT_TYPE_LSR:
3209         tcg_gen_shr_i64(dst, src, shift_amount);
3210         break;
3211     case A64_SHIFT_TYPE_ASR:
3212         if (!sf) {
3213             tcg_gen_ext32s_i64(dst, src);
3214         }
3215         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3216         break;
3217     case A64_SHIFT_TYPE_ROR:
3218         if (sf) {
3219             tcg_gen_rotr_i64(dst, src, shift_amount);
3220         } else {
3221             TCGv_i32 t0, t1;
3222             t0 = tcg_temp_new_i32();
3223             t1 = tcg_temp_new_i32();
3224             tcg_gen_extrl_i64_i32(t0, src);
3225             tcg_gen_extrl_i64_i32(t1, shift_amount);
3226             tcg_gen_rotr_i32(t0, t0, t1);
3227             tcg_gen_extu_i32_i64(dst, t0);
3228             tcg_temp_free_i32(t0);
3229             tcg_temp_free_i32(t1);
3230         }
3231         break;
3232     default:
3233         assert(FALSE); /* all shift types should be handled */
3234         break;
3235     }
3236
3237     if (!sf) { /* zero extend final result */
3238         tcg_gen_ext32u_i64(dst, dst);
3239     }
3240 }
3241
3242 /* Shift a TCGv src by immediate, put result in dst.
3243  * The shift amount must be in range (this should always be true as the
3244  * relevant instructions will UNDEF on bad shift immediates).
3245  */
3246 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3247                           enum a64_shift_type shift_type, unsigned int shift_i)
3248 {
3249     assert(shift_i < (sf ? 64 : 32));
3250
3251     if (shift_i == 0) {
3252         tcg_gen_mov_i64(dst, src);
3253     } else {
3254         TCGv_i64 shift_const;
3255
3256         shift_const = tcg_const_i64(shift_i);
3257         shift_reg(dst, src, sf, shift_type, shift_const);
3258         tcg_temp_free_i64(shift_const);
3259     }
3260 }
3261
3262 /* C3.5.10 Logical (shifted register)
3263  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3264  * +----+-----+-----------+-------+---+------+--------+------+------+
3265  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3266  * +----+-----+-----------+-------+---+------+--------+------+------+
3267  */
3268 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3269 {
3270     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3271     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3272
3273     sf = extract32(insn, 31, 1);
3274     opc = extract32(insn, 29, 2);
3275     shift_type = extract32(insn, 22, 2);
3276     invert = extract32(insn, 21, 1);
3277     rm = extract32(insn, 16, 5);
3278     shift_amount = extract32(insn, 10, 6);
3279     rn = extract32(insn, 5, 5);
3280     rd = extract32(insn, 0, 5);
3281
3282     if (!sf && (shift_amount & (1 << 5))) {
3283         unallocated_encoding(s);
3284         return;
3285     }
3286
3287     tcg_rd = cpu_reg(s, rd);
3288
3289     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3290         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3291          * register-register MOV and MVN, so it is worth special casing.
3292          */
3293         tcg_rm = cpu_reg(s, rm);
3294         if (invert) {
3295             tcg_gen_not_i64(tcg_rd, tcg_rm);
3296             if (!sf) {
3297                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3298             }
3299         } else {
3300             if (sf) {
3301                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3302             } else {
3303                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3304             }
3305         }
3306         return;
3307     }
3308
3309     tcg_rm = read_cpu_reg(s, rm, sf);
3310
3311     if (shift_amount) {
3312         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3313     }
3314
3315     tcg_rn = cpu_reg(s, rn);
3316
3317     switch (opc | (invert << 2)) {
3318     case 0: /* AND */
3319     case 3: /* ANDS */
3320         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3321         break;
3322     case 1: /* ORR */
3323         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3324         break;
3325     case 2: /* EOR */
3326         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3327         break;
3328     case 4: /* BIC */
3329     case 7: /* BICS */
3330         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3331         break;
3332     case 5: /* ORN */
3333         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3334         break;
3335     case 6: /* EON */
3336         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3337         break;
3338     default:
3339         assert(FALSE);
3340         break;
3341     }
3342
3343     if (!sf) {
3344         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3345     }
3346
3347     if (opc == 3) {
3348         gen_logic_CC(sf, tcg_rd);
3349     }
3350 }
3351
3352 /*
3353  * C3.5.1 Add/subtract (extended register)
3354  *
3355  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3356  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3357  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3358  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3359  *
3360  *  sf: 0 -> 32bit, 1 -> 64bit
3361  *  op: 0 -> add  , 1 -> sub
3362  *   S: 1 -> set flags
3363  * opt: 00
3364  * option: extension type (see DecodeRegExtend)
3365  * imm3: optional shift to Rm
3366  *
3367  * Rd = Rn + LSL(extend(Rm), amount)
3368  */
3369 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3370 {
3371     int rd = extract32(insn, 0, 5);
3372     int rn = extract32(insn, 5, 5);
3373     int imm3 = extract32(insn, 10, 3);
3374     int option = extract32(insn, 13, 3);
3375     int rm = extract32(insn, 16, 5);
3376     bool setflags = extract32(insn, 29, 1);
3377     bool sub_op = extract32(insn, 30, 1);
3378     bool sf = extract32(insn, 31, 1);
3379
3380     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3381     TCGv_i64 tcg_rd;
3382     TCGv_i64 tcg_result;
3383
3384     if (imm3 > 4) {
3385         unallocated_encoding(s);
3386         return;
3387     }
3388
3389     /* non-flag setting ops may use SP */
3390     if (!setflags) {
3391         tcg_rd = cpu_reg_sp(s, rd);
3392     } else {
3393         tcg_rd = cpu_reg(s, rd);
3394     }
3395     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3396
3397     tcg_rm = read_cpu_reg(s, rm, sf);
3398     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3399
3400     tcg_result = tcg_temp_new_i64();
3401
3402     if (!setflags) {
3403         if (sub_op) {
3404             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3405         } else {
3406             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3407         }
3408     } else {
3409         if (sub_op) {
3410             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3411         } else {
3412             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3413         }
3414     }
3415
3416     if (sf) {
3417         tcg_gen_mov_i64(tcg_rd, tcg_result);
3418     } else {
3419         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3420     }
3421
3422     tcg_temp_free_i64(tcg_result);
3423 }
3424
3425 /*
3426  * C3.5.2 Add/subtract (shifted register)
3427  *
3428  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3429  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3430  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3431  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3432  *
3433  *    sf: 0 -> 32bit, 1 -> 64bit
3434  *    op: 0 -> add  , 1 -> sub
3435  *     S: 1 -> set flags
3436  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3437  *  imm6: Shift amount to apply to Rm before the add/sub
3438  */
3439 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3440 {
3441     int rd = extract32(insn, 0, 5);
3442     int rn = extract32(insn, 5, 5);
3443     int imm6 = extract32(insn, 10, 6);
3444     int rm = extract32(insn, 16, 5);
3445     int shift_type = extract32(insn, 22, 2);
3446     bool setflags = extract32(insn, 29, 1);
3447     bool sub_op = extract32(insn, 30, 1);
3448     bool sf = extract32(insn, 31, 1);
3449
3450     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3451     TCGv_i64 tcg_rn, tcg_rm;
3452     TCGv_i64 tcg_result;
3453
3454     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3455         unallocated_encoding(s);
3456         return;
3457     }
3458
3459     tcg_rn = read_cpu_reg(s, rn, sf);
3460     tcg_rm = read_cpu_reg(s, rm, sf);
3461
3462     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3463
3464     tcg_result = tcg_temp_new_i64();
3465
3466     if (!setflags) {
3467         if (sub_op) {
3468             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3469         } else {
3470             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3471         }
3472     } else {
3473         if (sub_op) {
3474             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3475         } else {
3476             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3477         }
3478     }
3479
3480     if (sf) {
3481         tcg_gen_mov_i64(tcg_rd, tcg_result);
3482     } else {
3483         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3484     }
3485
3486     tcg_temp_free_i64(tcg_result);
3487 }
3488
3489 /* C3.5.9 Data-processing (3 source)
3490
3491    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3492   +--+------+-----------+------+------+----+------+------+------+
3493   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3494   +--+------+-----------+------+------+----+------+------+------+
3495
3496  */
3497 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3498 {
3499     int rd = extract32(insn, 0, 5);
3500     int rn = extract32(insn, 5, 5);
3501     int ra = extract32(insn, 10, 5);
3502     int rm = extract32(insn, 16, 5);
3503     int op_id = (extract32(insn, 29, 3) << 4) |
3504         (extract32(insn, 21, 3) << 1) |
3505         extract32(insn, 15, 1);
3506     bool sf = extract32(insn, 31, 1);
3507     bool is_sub = extract32(op_id, 0, 1);
3508     bool is_high = extract32(op_id, 2, 1);
3509     bool is_signed = false;
3510     TCGv_i64 tcg_op1;
3511     TCGv_i64 tcg_op2;
3512     TCGv_i64 tcg_tmp;
3513
3514     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3515     switch (op_id) {
3516     case 0x42: /* SMADDL */
3517     case 0x43: /* SMSUBL */
3518     case 0x44: /* SMULH */
3519         is_signed = true;
3520         break;
3521     case 0x0: /* MADD (32bit) */
3522     case 0x1: /* MSUB (32bit) */
3523     case 0x40: /* MADD (64bit) */
3524     case 0x41: /* MSUB (64bit) */
3525     case 0x4a: /* UMADDL */
3526     case 0x4b: /* UMSUBL */
3527     case 0x4c: /* UMULH */
3528         break;
3529     default:
3530         unallocated_encoding(s);
3531         return;
3532     }
3533
3534     if (is_high) {
3535         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3536         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3537         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3538         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3539
3540         if (is_signed) {
3541             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3542         } else {
3543             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3544         }
3545
3546         tcg_temp_free_i64(low_bits);
3547         return;
3548     }
3549
3550     tcg_op1 = tcg_temp_new_i64();
3551     tcg_op2 = tcg_temp_new_i64();
3552     tcg_tmp = tcg_temp_new_i64();
3553
3554     if (op_id < 0x42) {
3555         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3556         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3557     } else {
3558         if (is_signed) {
3559             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3560             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3561         } else {
3562             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3563             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3564         }
3565     }
3566
3567     if (ra == 31 && !is_sub) {
3568         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3569         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3570     } else {
3571         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3572         if (is_sub) {
3573             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3574         } else {
3575             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3576         }
3577     }
3578
3579     if (!sf) {
3580         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3581     }
3582
3583     tcg_temp_free_i64(tcg_op1);
3584     tcg_temp_free_i64(tcg_op2);
3585     tcg_temp_free_i64(tcg_tmp);
3586 }
3587
3588 /* C3.5.3 - Add/subtract (with carry)
3589  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3590  * +--+--+--+------------------------+------+---------+------+-----+
3591  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3592  * +--+--+--+------------------------+------+---------+------+-----+
3593  *                                            [000000]
3594  */
3595
3596 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3597 {
3598     unsigned int sf, op, setflags, rm, rn, rd;
3599     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3600
3601     if (extract32(insn, 10, 6) != 0) {
3602         unallocated_encoding(s);
3603         return;
3604     }
3605
3606     sf = extract32(insn, 31, 1);
3607     op = extract32(insn, 30, 1);
3608     setflags = extract32(insn, 29, 1);
3609     rm = extract32(insn, 16, 5);
3610     rn = extract32(insn, 5, 5);
3611     rd = extract32(insn, 0, 5);
3612
3613     tcg_rd = cpu_reg(s, rd);
3614     tcg_rn = cpu_reg(s, rn);
3615
3616     if (op) {
3617         tcg_y = new_tmp_a64(s);
3618         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3619     } else {
3620         tcg_y = cpu_reg(s, rm);
3621     }
3622
3623     if (setflags) {
3624         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3625     } else {
3626         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3627     }
3628 }
3629
3630 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3631  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3632  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3633  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3634  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3635  *        [1]                             y                [0]       [0]
3636  */
3637 static void disas_cc(DisasContext *s, uint32_t insn)
3638 {
3639     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3640     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3641     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3642     DisasCompare c;
3643
3644     if (!extract32(insn, 29, 1)) {
3645         unallocated_encoding(s);
3646         return;
3647     }
3648     if (insn & (1 << 10 | 1 << 4)) {
3649         unallocated_encoding(s);
3650         return;
3651     }
3652     sf = extract32(insn, 31, 1);
3653     op = extract32(insn, 30, 1);
3654     is_imm = extract32(insn, 11, 1);
3655     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3656     cond = extract32(insn, 12, 4);
3657     rn = extract32(insn, 5, 5);
3658     nzcv = extract32(insn, 0, 4);
3659
3660     /* Set T0 = !COND.  */
3661     tcg_t0 = tcg_temp_new_i32();
3662     arm_test_cc(&c, cond);
3663     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3664     arm_free_cc(&c);
3665
3666     /* Load the arguments for the new comparison.  */
3667     if (is_imm) {
3668         tcg_y = new_tmp_a64(s);
3669         tcg_gen_movi_i64(tcg_y, y);
3670     } else {
3671         tcg_y = cpu_reg(s, y);
3672     }
3673     tcg_rn = cpu_reg(s, rn);
3674
3675     /* Set the flags for the new comparison.  */
3676     tcg_tmp = tcg_temp_new_i64();
3677     if (op) {
3678         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3679     } else {
3680         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3681     }
3682     tcg_temp_free_i64(tcg_tmp);
3683
3684     /* If COND was false, force the flags to #nzcv.  Compute two masks
3685      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3686      * For tcg hosts that support ANDC, we can make do with just T1.
3687      * In either case, allow the tcg optimizer to delete any unused mask.
3688      */
3689     tcg_t1 = tcg_temp_new_i32();
3690     tcg_t2 = tcg_temp_new_i32();
3691     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3692     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3693
3694     if (nzcv & 8) { /* N */
3695         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3696     } else {
3697         if (TCG_TARGET_HAS_andc_i32) {
3698             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3699         } else {
3700             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3701         }
3702     }
3703     if (nzcv & 4) { /* Z */
3704         if (TCG_TARGET_HAS_andc_i32) {
3705             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3706         } else {
3707             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3708         }
3709     } else {
3710         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3711     }
3712     if (nzcv & 2) { /* C */
3713         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3714     } else {
3715         if (TCG_TARGET_HAS_andc_i32) {
3716             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3717         } else {
3718             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3719         }
3720     }
3721     if (nzcv & 1) { /* V */
3722         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3723     } else {
3724         if (TCG_TARGET_HAS_andc_i32) {
3725             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3726         } else {
3727             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3728         }
3729     }
3730     tcg_temp_free_i32(tcg_t0);
3731     tcg_temp_free_i32(tcg_t1);
3732     tcg_temp_free_i32(tcg_t2);
3733 }
3734
3735 /* C3.5.6 Conditional select
3736  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3737  * +----+----+---+-----------------+------+------+-----+------+------+
3738  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3739  * +----+----+---+-----------------+------+------+-----+------+------+
3740  */
3741 static void disas_cond_select(DisasContext *s, uint32_t insn)
3742 {
3743     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3744     TCGv_i64 tcg_rd, zero;
3745     DisasCompare64 c;
3746
3747     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3748         /* S == 1 or op2<1> == 1 */
3749         unallocated_encoding(s);
3750         return;
3751     }
3752     sf = extract32(insn, 31, 1);
3753     else_inv = extract32(insn, 30, 1);
3754     rm = extract32(insn, 16, 5);
3755     cond = extract32(insn, 12, 4);
3756     else_inc = extract32(insn, 10, 1);
3757     rn = extract32(insn, 5, 5);
3758     rd = extract32(insn, 0, 5);
3759
3760     tcg_rd = cpu_reg(s, rd);
3761
3762     a64_test_cc(&c, cond);
3763     zero = tcg_const_i64(0);
3764
3765     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3766         /* CSET & CSETM.  */
3767         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3768         if (else_inv) {
3769             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3770         }
3771     } else {
3772         TCGv_i64 t_true = cpu_reg(s, rn);
3773         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3774         if (else_inv && else_inc) {
3775             tcg_gen_neg_i64(t_false, t_false);
3776         } else if (else_inv) {
3777             tcg_gen_not_i64(t_false, t_false);
3778         } else if (else_inc) {
3779             tcg_gen_addi_i64(t_false, t_false, 1);
3780         }
3781         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3782     }
3783
3784     tcg_temp_free_i64(zero);
3785     a64_free_cc(&c);
3786
3787     if (!sf) {
3788         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3789     }
3790 }
3791
3792 static void handle_clz(DisasContext *s, unsigned int sf,
3793                        unsigned int rn, unsigned int rd)
3794 {
3795     TCGv_i64 tcg_rd, tcg_rn;
3796     tcg_rd = cpu_reg(s, rd);
3797     tcg_rn = cpu_reg(s, rn);
3798
3799     if (sf) {
3800         gen_helper_clz64(tcg_rd, tcg_rn);
3801     } else {
3802         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3803         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3804         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3805         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3806         tcg_temp_free_i32(tcg_tmp32);
3807     }
3808 }
3809
3810 static void handle_cls(DisasContext *s, unsigned int sf,
3811                        unsigned int rn, unsigned int rd)
3812 {
3813     TCGv_i64 tcg_rd, tcg_rn;
3814     tcg_rd = cpu_reg(s, rd);
3815     tcg_rn = cpu_reg(s, rn);
3816
3817     if (sf) {
3818         gen_helper_cls64(tcg_rd, tcg_rn);
3819     } else {
3820         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3821         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3822         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3823         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3824         tcg_temp_free_i32(tcg_tmp32);
3825     }
3826 }
3827
3828 static void handle_rbit(DisasContext *s, unsigned int sf,
3829                         unsigned int rn, unsigned int rd)
3830 {
3831     TCGv_i64 tcg_rd, tcg_rn;
3832     tcg_rd = cpu_reg(s, rd);
3833     tcg_rn = cpu_reg(s, rn);
3834
3835     if (sf) {
3836         gen_helper_rbit64(tcg_rd, tcg_rn);
3837     } else {
3838         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3839         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3840         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3841         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3842         tcg_temp_free_i32(tcg_tmp32);
3843     }
3844 }
3845
3846 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3847 static void handle_rev64(DisasContext *s, unsigned int sf,
3848                          unsigned int rn, unsigned int rd)
3849 {
3850     if (!sf) {
3851         unallocated_encoding(s);
3852         return;
3853     }
3854     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3855 }
3856
3857 /* C5.6.149 REV with sf==0, opcode==2
3858  * C5.6.151 REV32 (sf==1, opcode==2)
3859  */
3860 static void handle_rev32(DisasContext *s, unsigned int sf,
3861                          unsigned int rn, unsigned int rd)
3862 {
3863     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3864
3865     if (sf) {
3866         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3867         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3868
3869         /* bswap32_i64 requires zero high word */
3870         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3871         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3872         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3873         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3874         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3875
3876         tcg_temp_free_i64(tcg_tmp);
3877     } else {
3878         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3879         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3880     }
3881 }
3882
3883 /* C5.6.150 REV16 (opcode==1) */
3884 static void handle_rev16(DisasContext *s, unsigned int sf,
3885                          unsigned int rn, unsigned int rd)
3886 {
3887     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3888     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3889     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3890
3891     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3892     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3893
3894     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3895     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3896     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3897     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3898
3899     if (sf) {
3900         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3901         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3902         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3903         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3904
3905         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3906         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3907         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3908     }
3909
3910     tcg_temp_free_i64(tcg_tmp);
3911 }
3912
3913 /* C3.5.7 Data-processing (1 source)
3914  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3915  * +----+---+---+-----------------+---------+--------+------+------+
3916  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3917  * +----+---+---+-----------------+---------+--------+------+------+
3918  */
3919 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3920 {
3921     unsigned int sf, opcode, rn, rd;
3922
3923     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3924         unallocated_encoding(s);
3925         return;
3926     }
3927
3928     sf = extract32(insn, 31, 1);
3929     opcode = extract32(insn, 10, 6);
3930     rn = extract32(insn, 5, 5);
3931     rd = extract32(insn, 0, 5);
3932
3933     switch (opcode) {
3934     case 0: /* RBIT */
3935         handle_rbit(s, sf, rn, rd);
3936         break;
3937     case 1: /* REV16 */
3938         handle_rev16(s, sf, rn, rd);
3939         break;
3940     case 2: /* REV32 */
3941         handle_rev32(s, sf, rn, rd);
3942         break;
3943     case 3: /* REV64 */
3944         handle_rev64(s, sf, rn, rd);
3945         break;
3946     case 4: /* CLZ */
3947         handle_clz(s, sf, rn, rd);
3948         break;
3949     case 5: /* CLS */
3950         handle_cls(s, sf, rn, rd);
3951         break;
3952     }
3953 }
3954
3955 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3956                        unsigned int rm, unsigned int rn, unsigned int rd)
3957 {
3958     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3959     tcg_rd = cpu_reg(s, rd);
3960
3961     if (!sf && is_signed) {
3962         tcg_n = new_tmp_a64(s);
3963         tcg_m = new_tmp_a64(s);
3964         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3965         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3966     } else {
3967         tcg_n = read_cpu_reg(s, rn, sf);
3968         tcg_m = read_cpu_reg(s, rm, sf);
3969     }
3970
3971     if (is_signed) {
3972         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3973     } else {
3974         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3975     }
3976
3977     if (!sf) { /* zero extend final result */
3978         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3979     }
3980 }
3981
3982 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3983 static void handle_shift_reg(DisasContext *s,
3984                              enum a64_shift_type shift_type, unsigned int sf,
3985                              unsigned int rm, unsigned int rn, unsigned int rd)
3986 {
3987     TCGv_i64 tcg_shift = tcg_temp_new_i64();
3988     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3989     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3990
3991     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3992     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3993     tcg_temp_free_i64(tcg_shift);
3994 }
3995
3996 /* CRC32[BHWX], CRC32C[BHWX] */
3997 static void handle_crc32(DisasContext *s,
3998                          unsigned int sf, unsigned int sz, bool crc32c,
3999                          unsigned int rm, unsigned int rn, unsigned int rd)
4000 {
4001     TCGv_i64 tcg_acc, tcg_val;
4002     TCGv_i32 tcg_bytes;
4003
4004     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4005         || (sf == 1 && sz != 3)
4006         || (sf == 0 && sz == 3)) {
4007         unallocated_encoding(s);
4008         return;
4009     }
4010
4011     if (sz == 3) {
4012         tcg_val = cpu_reg(s, rm);
4013     } else {
4014         uint64_t mask;
4015         switch (sz) {
4016         case 0:
4017             mask = 0xFF;
4018             break;
4019         case 1:
4020             mask = 0xFFFF;
4021             break;
4022         case 2:
4023             mask = 0xFFFFFFFF;
4024             break;
4025         default:
4026             g_assert_not_reached();
4027         }
4028         tcg_val = new_tmp_a64(s);
4029         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4030     }
4031
4032     tcg_acc = cpu_reg(s, rn);
4033     tcg_bytes = tcg_const_i32(1 << sz);
4034
4035     if (crc32c) {
4036         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4037     } else {
4038         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4039     }
4040
4041     tcg_temp_free_i32(tcg_bytes);
4042 }
4043
4044 /* C3.5.8 Data-processing (2 source)
4045  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4046  * +----+---+---+-----------------+------+--------+------+------+
4047  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4048  * +----+---+---+-----------------+------+--------+------+------+
4049  */
4050 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4051 {
4052     unsigned int sf, rm, opcode, rn, rd;
4053     sf = extract32(insn, 31, 1);
4054     rm = extract32(insn, 16, 5);
4055     opcode = extract32(insn, 10, 6);
4056     rn = extract32(insn, 5, 5);
4057     rd = extract32(insn, 0, 5);
4058
4059     if (extract32(insn, 29, 1)) {
4060         unallocated_encoding(s);
4061         return;
4062     }
4063
4064     switch (opcode) {
4065     case 2: /* UDIV */
4066         handle_div(s, false, sf, rm, rn, rd);
4067         break;
4068     case 3: /* SDIV */
4069         handle_div(s, true, sf, rm, rn, rd);
4070         break;
4071     case 8: /* LSLV */
4072         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4073         break;
4074     case 9: /* LSRV */
4075         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4076         break;
4077     case 10: /* ASRV */
4078         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4079         break;
4080     case 11: /* RORV */
4081         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4082         break;
4083     case 16:
4084     case 17:
4085     case 18:
4086     case 19:
4087     case 20:
4088     case 21:
4089     case 22:
4090     case 23: /* CRC32 */
4091     {
4092         int sz = extract32(opcode, 0, 2);
4093         bool crc32c = extract32(opcode, 2, 1);
4094         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4095         break;
4096     }
4097     default:
4098         unallocated_encoding(s);
4099         break;
4100     }
4101 }
4102
4103 /* C3.5 Data processing - register */
4104 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4105 {
4106     switch (extract32(insn, 24, 5)) {
4107     case 0x0a: /* Logical (shifted register) */
4108         disas_logic_reg(s, insn);
4109         break;
4110     case 0x0b: /* Add/subtract */
4111         if (insn & (1 << 21)) { /* (extended register) */
4112             disas_add_sub_ext_reg(s, insn);
4113         } else {
4114             disas_add_sub_reg(s, insn);
4115         }
4116         break;
4117     case 0x1b: /* Data-processing (3 source) */
4118         disas_data_proc_3src(s, insn);
4119         break;
4120     case 0x1a:
4121         switch (extract32(insn, 21, 3)) {
4122         case 0x0: /* Add/subtract (with carry) */
4123             disas_adc_sbc(s, insn);
4124             break;
4125         case 0x2: /* Conditional compare */
4126             disas_cc(s, insn); /* both imm and reg forms */
4127             break;
4128         case 0x4: /* Conditional select */
4129             disas_cond_select(s, insn);
4130             break;
4131         case 0x6: /* Data-processing */
4132             if (insn & (1 << 30)) { /* (1 source) */
4133                 disas_data_proc_1src(s, insn);
4134             } else {            /* (2 source) */
4135                 disas_data_proc_2src(s, insn);
4136             }
4137             break;
4138         default:
4139             unallocated_encoding(s);
4140             break;
4141         }
4142         break;
4143     default:
4144         unallocated_encoding(s);
4145         break;
4146     }
4147 }
4148
4149 static void handle_fp_compare(DisasContext *s, bool is_double,
4150                               unsigned int rn, unsigned int rm,
4151                               bool cmp_with_zero, bool signal_all_nans)
4152 {
4153     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4154     TCGv_ptr fpst = get_fpstatus_ptr();
4155
4156     if (is_double) {
4157         TCGv_i64 tcg_vn, tcg_vm;
4158
4159         tcg_vn = read_fp_dreg(s, rn);
4160         if (cmp_with_zero) {
4161             tcg_vm = tcg_const_i64(0);
4162         } else {
4163             tcg_vm = read_fp_dreg(s, rm);
4164         }
4165         if (signal_all_nans) {
4166             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4167         } else {
4168             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4169         }
4170         tcg_temp_free_i64(tcg_vn);
4171         tcg_temp_free_i64(tcg_vm);
4172     } else {
4173         TCGv_i32 tcg_vn, tcg_vm;
4174
4175         tcg_vn = read_fp_sreg(s, rn);
4176         if (cmp_with_zero) {
4177             tcg_vm = tcg_const_i32(0);
4178         } else {
4179             tcg_vm = read_fp_sreg(s, rm);
4180         }
4181         if (signal_all_nans) {
4182             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4183         } else {
4184             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4185         }
4186         tcg_temp_free_i32(tcg_vn);
4187         tcg_temp_free_i32(tcg_vm);
4188     }
4189
4190     tcg_temp_free_ptr(fpst);
4191
4192     gen_set_nzcv(tcg_flags);
4193
4194     tcg_temp_free_i64(tcg_flags);
4195 }
4196
4197 /* C3.6.22 Floating point compare
4198  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4199  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4200  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4201  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4202  */
4203 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4204 {
4205     unsigned int mos, type, rm, op, rn, opc, op2r;
4206
4207     mos = extract32(insn, 29, 3);
4208     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4209     rm = extract32(insn, 16, 5);
4210     op = extract32(insn, 14, 2);
4211     rn = extract32(insn, 5, 5);
4212     opc = extract32(insn, 3, 2);
4213     op2r = extract32(insn, 0, 3);
4214
4215     if (mos || op || op2r || type > 1) {
4216         unallocated_encoding(s);
4217         return;
4218     }
4219
4220     if (!fp_access_check(s)) {
4221         return;
4222     }
4223
4224     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4225 }
4226
4227 /* C3.6.23 Floating point conditional compare
4228  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4229  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4230  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4231  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4232  */
4233 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4234 {
4235     unsigned int mos, type, rm, cond, rn, op, nzcv;
4236     TCGv_i64 tcg_flags;
4237     TCGLabel *label_continue = NULL;
4238
4239     mos = extract32(insn, 29, 3);
4240     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4241     rm = extract32(insn, 16, 5);
4242     cond = extract32(insn, 12, 4);
4243     rn = extract32(insn, 5, 5);
4244     op = extract32(insn, 4, 1);
4245     nzcv = extract32(insn, 0, 4);
4246
4247     if (mos || type > 1) {
4248         unallocated_encoding(s);
4249         return;
4250     }
4251
4252     if (!fp_access_check(s)) {
4253         return;
4254     }
4255
4256     if (cond < 0x0e) { /* not always */
4257         TCGLabel *label_match = gen_new_label();
4258         label_continue = gen_new_label();
4259         arm_gen_test_cc(cond, label_match);
4260         /* nomatch: */
4261         tcg_flags = tcg_const_i64(nzcv << 28);
4262         gen_set_nzcv(tcg_flags);
4263         tcg_temp_free_i64(tcg_flags);
4264         tcg_gen_br(label_continue);
4265         gen_set_label(label_match);
4266     }
4267
4268     handle_fp_compare(s, type, rn, rm, false, op);
4269
4270     if (cond < 0x0e) {
4271         gen_set_label(label_continue);
4272     }
4273 }
4274
4275 /* C3.6.24 Floating point conditional select
4276  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4277  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4278  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4279  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4280  */
4281 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4282 {
4283     unsigned int mos, type, rm, cond, rn, rd;
4284     TCGv_i64 t_true, t_false, t_zero;
4285     DisasCompare64 c;
4286
4287     mos = extract32(insn, 29, 3);
4288     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4289     rm = extract32(insn, 16, 5);
4290     cond = extract32(insn, 12, 4);
4291     rn = extract32(insn, 5, 5);
4292     rd = extract32(insn, 0, 5);
4293
4294     if (mos || type > 1) {
4295         unallocated_encoding(s);
4296         return;
4297     }
4298
4299     if (!fp_access_check(s)) {
4300         return;
4301     }
4302
4303     /* Zero extend sreg inputs to 64 bits now.  */
4304     t_true = tcg_temp_new_i64();
4305     t_false = tcg_temp_new_i64();
4306     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4307     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4308
4309     a64_test_cc(&c, cond);
4310     t_zero = tcg_const_i64(0);
4311     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4312     tcg_temp_free_i64(t_zero);
4313     tcg_temp_free_i64(t_false);
4314     a64_free_cc(&c);
4315
4316     /* Note that sregs write back zeros to the high bits,
4317        and we've already done the zero-extension.  */
4318     write_fp_dreg(s, rd, t_true);
4319     tcg_temp_free_i64(t_true);
4320 }
4321
4322 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4323 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4324 {
4325     TCGv_ptr fpst;
4326     TCGv_i32 tcg_op;
4327     TCGv_i32 tcg_res;
4328
4329     fpst = get_fpstatus_ptr();
4330     tcg_op = read_fp_sreg(s, rn);
4331     tcg_res = tcg_temp_new_i32();
4332
4333     switch (opcode) {
4334     case 0x0: /* FMOV */
4335         tcg_gen_mov_i32(tcg_res, tcg_op);
4336         break;
4337     case 0x1: /* FABS */
4338         gen_helper_vfp_abss(tcg_res, tcg_op);
4339         break;
4340     case 0x2: /* FNEG */
4341         gen_helper_vfp_negs(tcg_res, tcg_op);
4342         break;
4343     case 0x3: /* FSQRT */
4344         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4345         break;
4346     case 0x8: /* FRINTN */
4347     case 0x9: /* FRINTP */
4348     case 0xa: /* FRINTM */
4349     case 0xb: /* FRINTZ */
4350     case 0xc: /* FRINTA */
4351     {
4352         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4353
4354         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4355         gen_helper_rints(tcg_res, tcg_op, fpst);
4356
4357         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4358         tcg_temp_free_i32(tcg_rmode);
4359         break;
4360     }
4361     case 0xe: /* FRINTX */
4362         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4363         break;
4364     case 0xf: /* FRINTI */
4365         gen_helper_rints(tcg_res, tcg_op, fpst);
4366         break;
4367     default:
4368         abort();
4369     }
4370
4371     write_fp_sreg(s, rd, tcg_res);
4372
4373     tcg_temp_free_ptr(fpst);
4374     tcg_temp_free_i32(tcg_op);
4375     tcg_temp_free_i32(tcg_res);
4376 }
4377
4378 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4379 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4380 {
4381     TCGv_ptr fpst;
4382     TCGv_i64 tcg_op;
4383     TCGv_i64 tcg_res;
4384
4385     fpst = get_fpstatus_ptr();
4386     tcg_op = read_fp_dreg(s, rn);
4387     tcg_res = tcg_temp_new_i64();
4388
4389     switch (opcode) {
4390     case 0x0: /* FMOV */
4391         tcg_gen_mov_i64(tcg_res, tcg_op);
4392         break;
4393     case 0x1: /* FABS */
4394         gen_helper_vfp_absd(tcg_res, tcg_op);
4395         break;
4396     case 0x2: /* FNEG */
4397         gen_helper_vfp_negd(tcg_res, tcg_op);
4398         break;
4399     case 0x3: /* FSQRT */
4400         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4401         break;
4402     case 0x8: /* FRINTN */
4403     case 0x9: /* FRINTP */
4404     case 0xa: /* FRINTM */
4405     case 0xb: /* FRINTZ */
4406     case 0xc: /* FRINTA */
4407     {
4408         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4409
4410         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4411         gen_helper_rintd(tcg_res, tcg_op, fpst);
4412
4413         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4414         tcg_temp_free_i32(tcg_rmode);
4415         break;
4416     }
4417     case 0xe: /* FRINTX */
4418         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4419         break;
4420     case 0xf: /* FRINTI */
4421         gen_helper_rintd(tcg_res, tcg_op, fpst);
4422         break;
4423     default:
4424         abort();
4425     }
4426
4427     write_fp_dreg(s, rd, tcg_res);
4428
4429     tcg_temp_free_ptr(fpst);
4430     tcg_temp_free_i64(tcg_op);
4431     tcg_temp_free_i64(tcg_res);
4432 }
4433
4434 static void handle_fp_fcvt(DisasContext *s, int opcode,
4435                            int rd, int rn, int dtype, int ntype)
4436 {
4437     switch (ntype) {
4438     case 0x0:
4439     {
4440         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4441         if (dtype == 1) {
4442             /* Single to double */
4443             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4444             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4445             write_fp_dreg(s, rd, tcg_rd);
4446             tcg_temp_free_i64(tcg_rd);
4447         } else {
4448             /* Single to half */
4449             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4450             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4451             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4452             write_fp_sreg(s, rd, tcg_rd);
4453             tcg_temp_free_i32(tcg_rd);
4454         }
4455         tcg_temp_free_i32(tcg_rn);
4456         break;
4457     }
4458     case 0x1:
4459     {
4460         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4461         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4462         if (dtype == 0) {
4463             /* Double to single */
4464             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4465         } else {
4466             /* Double to half */
4467             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4468             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4469         }
4470         write_fp_sreg(s, rd, tcg_rd);
4471         tcg_temp_free_i32(tcg_rd);
4472         tcg_temp_free_i64(tcg_rn);
4473         break;
4474     }
4475     case 0x3:
4476     {
4477         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4478         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4479         if (dtype == 0) {
4480             /* Half to single */
4481             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4482             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4483             write_fp_sreg(s, rd, tcg_rd);
4484             tcg_temp_free_i32(tcg_rd);
4485         } else {
4486             /* Half to double */
4487             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4488             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4489             write_fp_dreg(s, rd, tcg_rd);
4490             tcg_temp_free_i64(tcg_rd);
4491         }
4492         tcg_temp_free_i32(tcg_rn);
4493         break;
4494     }
4495     default:
4496         abort();
4497     }
4498 }
4499
4500 /* C3.6.25 Floating point data-processing (1 source)
4501  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4502  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4503  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4504  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4505  */
4506 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4507 {
4508     int type = extract32(insn, 22, 2);
4509     int opcode = extract32(insn, 15, 6);
4510     int rn = extract32(insn, 5, 5);
4511     int rd = extract32(insn, 0, 5);
4512
4513     switch (opcode) {
4514     case 0x4: case 0x5: case 0x7:
4515     {
4516         /* FCVT between half, single and double precision */
4517         int dtype = extract32(opcode, 0, 2);
4518         if (type == 2 || dtype == type) {
4519             unallocated_encoding(s);
4520             return;
4521         }
4522         if (!fp_access_check(s)) {
4523             return;
4524         }
4525
4526         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4527         break;
4528     }
4529     case 0x0 ... 0x3:
4530     case 0x8 ... 0xc:
4531     case 0xe ... 0xf:
4532         /* 32-to-32 and 64-to-64 ops */
4533         switch (type) {
4534         case 0:
4535             if (!fp_access_check(s)) {
4536                 return;
4537             }
4538
4539             handle_fp_1src_single(s, opcode, rd, rn);
4540             break;
4541         case 1:
4542             if (!fp_access_check(s)) {
4543                 return;
4544             }
4545
4546             handle_fp_1src_double(s, opcode, rd, rn);
4547             break;
4548         default:
4549             unallocated_encoding(s);
4550         }
4551         break;
4552     default:
4553         unallocated_encoding(s);
4554         break;
4555     }
4556 }
4557
4558 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4559 static void handle_fp_2src_single(DisasContext *s, int opcode,
4560                                   int rd, int rn, int rm)
4561 {
4562     TCGv_i32 tcg_op1;
4563     TCGv_i32 tcg_op2;
4564     TCGv_i32 tcg_res;
4565     TCGv_ptr fpst;
4566
4567     tcg_res = tcg_temp_new_i32();
4568     fpst = get_fpstatus_ptr();
4569     tcg_op1 = read_fp_sreg(s, rn);
4570     tcg_op2 = read_fp_sreg(s, rm);
4571
4572     switch (opcode) {
4573     case 0x0: /* FMUL */
4574         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4575         break;
4576     case 0x1: /* FDIV */
4577         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4578         break;
4579     case 0x2: /* FADD */
4580         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4581         break;
4582     case 0x3: /* FSUB */
4583         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4584         break;
4585     case 0x4: /* FMAX */
4586         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4587         break;
4588     case 0x5: /* FMIN */
4589         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4590         break;
4591     case 0x6: /* FMAXNM */
4592         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4593         break;
4594     case 0x7: /* FMINNM */
4595         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4596         break;
4597     case 0x8: /* FNMUL */
4598         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4599         gen_helper_vfp_negs(tcg_res, tcg_res);
4600         break;
4601     }
4602
4603     write_fp_sreg(s, rd, tcg_res);
4604
4605     tcg_temp_free_ptr(fpst);
4606     tcg_temp_free_i32(tcg_op1);
4607     tcg_temp_free_i32(tcg_op2);
4608     tcg_temp_free_i32(tcg_res);
4609 }
4610
4611 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4612 static void handle_fp_2src_double(DisasContext *s, int opcode,
4613                                   int rd, int rn, int rm)
4614 {
4615     TCGv_i64 tcg_op1;
4616     TCGv_i64 tcg_op2;
4617     TCGv_i64 tcg_res;
4618     TCGv_ptr fpst;
4619
4620     tcg_res = tcg_temp_new_i64();
4621     fpst = get_fpstatus_ptr();
4622     tcg_op1 = read_fp_dreg(s, rn);
4623     tcg_op2 = read_fp_dreg(s, rm);
4624
4625     switch (opcode) {
4626     case 0x0: /* FMUL */
4627         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4628         break;
4629     case 0x1: /* FDIV */
4630         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4631         break;
4632     case 0x2: /* FADD */
4633         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4634         break;
4635     case 0x3: /* FSUB */
4636         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4637         break;
4638     case 0x4: /* FMAX */
4639         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4640         break;
4641     case 0x5: /* FMIN */
4642         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4643         break;
4644     case 0x6: /* FMAXNM */
4645         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4646         break;
4647     case 0x7: /* FMINNM */
4648         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4649         break;
4650     case 0x8: /* FNMUL */
4651         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4652         gen_helper_vfp_negd(tcg_res, tcg_res);
4653         break;
4654     }
4655
4656     write_fp_dreg(s, rd, tcg_res);
4657
4658     tcg_temp_free_ptr(fpst);
4659     tcg_temp_free_i64(tcg_op1);
4660     tcg_temp_free_i64(tcg_op2);
4661     tcg_temp_free_i64(tcg_res);
4662 }
4663
4664 /* C3.6.26 Floating point data-processing (2 source)
4665  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4666  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4667  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4668  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4669  */
4670 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4671 {
4672     int type = extract32(insn, 22, 2);
4673     int rd = extract32(insn, 0, 5);
4674     int rn = extract32(insn, 5, 5);
4675     int rm = extract32(insn, 16, 5);
4676     int opcode = extract32(insn, 12, 4);
4677
4678     if (opcode > 8) {
4679         unallocated_encoding(s);
4680         return;
4681     }
4682
4683     switch (type) {
4684     case 0:
4685         if (!fp_access_check(s)) {
4686             return;
4687         }
4688         handle_fp_2src_single(s, opcode, rd, rn, rm);
4689         break;
4690     case 1:
4691         if (!fp_access_check(s)) {
4692             return;
4693         }
4694         handle_fp_2src_double(s, opcode, rd, rn, rm);
4695         break;
4696     default:
4697         unallocated_encoding(s);
4698     }
4699 }
4700
4701 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4702 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4703                                   int rd, int rn, int rm, int ra)
4704 {
4705     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4706     TCGv_i32 tcg_res = tcg_temp_new_i32();
4707     TCGv_ptr fpst = get_fpstatus_ptr();
4708
4709     tcg_op1 = read_fp_sreg(s, rn);
4710     tcg_op2 = read_fp_sreg(s, rm);
4711     tcg_op3 = read_fp_sreg(s, ra);
4712
4713     /* These are fused multiply-add, and must be done as one
4714      * floating point operation with no rounding between the
4715      * multiplication and addition steps.
4716      * NB that doing the negations here as separate steps is
4717      * correct : an input NaN should come out with its sign bit
4718      * flipped if it is a negated-input.
4719      */
4720     if (o1 == true) {
4721         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4722     }
4723
4724     if (o0 != o1) {
4725         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4726     }
4727
4728     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4729
4730     write_fp_sreg(s, rd, tcg_res);
4731
4732     tcg_temp_free_ptr(fpst);
4733     tcg_temp_free_i32(tcg_op1);
4734     tcg_temp_free_i32(tcg_op2);
4735     tcg_temp_free_i32(tcg_op3);
4736     tcg_temp_free_i32(tcg_res);
4737 }
4738
4739 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4740 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4741                                   int rd, int rn, int rm, int ra)
4742 {
4743     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4744     TCGv_i64 tcg_res = tcg_temp_new_i64();
4745     TCGv_ptr fpst = get_fpstatus_ptr();
4746
4747     tcg_op1 = read_fp_dreg(s, rn);
4748     tcg_op2 = read_fp_dreg(s, rm);
4749     tcg_op3 = read_fp_dreg(s, ra);
4750
4751     /* These are fused multiply-add, and must be done as one
4752      * floating point operation with no rounding between the
4753      * multiplication and addition steps.
4754      * NB that doing the negations here as separate steps is
4755      * correct : an input NaN should come out with its sign bit
4756      * flipped if it is a negated-input.
4757      */
4758     if (o1 == true) {
4759         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4760     }
4761
4762     if (o0 != o1) {
4763         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4764     }
4765
4766     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4767
4768     write_fp_dreg(s, rd, tcg_res);
4769
4770     tcg_temp_free_ptr(fpst);
4771     tcg_temp_free_i64(tcg_op1);
4772     tcg_temp_free_i64(tcg_op2);
4773     tcg_temp_free_i64(tcg_op3);
4774     tcg_temp_free_i64(tcg_res);
4775 }
4776
4777 /* C3.6.27 Floating point data-processing (3 source)
4778  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4779  * +---+---+---+-----------+------+----+------+----+------+------+------+
4780  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4781  * +---+---+---+-----------+------+----+------+----+------+------+------+
4782  */
4783 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4784 {
4785     int type = extract32(insn, 22, 2);
4786     int rd = extract32(insn, 0, 5);
4787     int rn = extract32(insn, 5, 5);
4788     int ra = extract32(insn, 10, 5);
4789     int rm = extract32(insn, 16, 5);
4790     bool o0 = extract32(insn, 15, 1);
4791     bool o1 = extract32(insn, 21, 1);
4792
4793     switch (type) {
4794     case 0:
4795         if (!fp_access_check(s)) {
4796             return;
4797         }
4798         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4799         break;
4800     case 1:
4801         if (!fp_access_check(s)) {
4802             return;
4803         }
4804         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4805         break;
4806     default:
4807         unallocated_encoding(s);
4808     }
4809 }
4810
4811 /* C3.6.28 Floating point immediate
4812  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4813  * +---+---+---+-----------+------+---+------------+-------+------+------+
4814  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4815  * +---+---+---+-----------+------+---+------------+-------+------+------+
4816  */
4817 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4818 {
4819     int rd = extract32(insn, 0, 5);
4820     int imm8 = extract32(insn, 13, 8);
4821     int is_double = extract32(insn, 22, 2);
4822     uint64_t imm;
4823     TCGv_i64 tcg_res;
4824
4825     if (is_double > 1) {
4826         unallocated_encoding(s);
4827         return;
4828     }
4829
4830     if (!fp_access_check(s)) {
4831         return;
4832     }
4833
4834     /* The imm8 encodes the sign bit, enough bits to represent
4835      * an exponent in the range 01....1xx to 10....0xx,
4836      * and the most significant 4 bits of the mantissa; see
4837      * VFPExpandImm() in the v8 ARM ARM.
4838      */
4839     if (is_double) {
4840         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4841             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4842             extract32(imm8, 0, 6);
4843         imm <<= 48;
4844     } else {
4845         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4846             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4847             (extract32(imm8, 0, 6) << 3);
4848         imm <<= 16;
4849     }
4850
4851     tcg_res = tcg_const_i64(imm);
4852     write_fp_dreg(s, rd, tcg_res);
4853     tcg_temp_free_i64(tcg_res);
4854 }
4855
4856 /* Handle floating point <=> fixed point conversions. Note that we can
4857  * also deal with fp <=> integer conversions as a special case (scale == 64)
4858  * OPTME: consider handling that special case specially or at least skipping
4859  * the call to scalbn in the helpers for zero shifts.
4860  */
4861 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4862                            bool itof, int rmode, int scale, int sf, int type)
4863 {
4864     bool is_signed = !(opcode & 1);
4865     bool is_double = type;
4866     TCGv_ptr tcg_fpstatus;
4867     TCGv_i32 tcg_shift;
4868
4869     tcg_fpstatus = get_fpstatus_ptr();
4870
4871     tcg_shift = tcg_const_i32(64 - scale);
4872
4873     if (itof) {
4874         TCGv_i64 tcg_int = cpu_reg(s, rn);
4875         if (!sf) {
4876             TCGv_i64 tcg_extend = new_tmp_a64(s);
4877
4878             if (is_signed) {
4879                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4880             } else {
4881                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4882             }
4883
4884             tcg_int = tcg_extend;
4885         }
4886
4887         if (is_double) {
4888             TCGv_i64 tcg_double = tcg_temp_new_i64();
4889             if (is_signed) {
4890                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4891                                      tcg_shift, tcg_fpstatus);
4892             } else {
4893                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4894                                      tcg_shift, tcg_fpstatus);
4895             }
4896             write_fp_dreg(s, rd, tcg_double);
4897             tcg_temp_free_i64(tcg_double);
4898         } else {
4899             TCGv_i32 tcg_single = tcg_temp_new_i32();
4900             if (is_signed) {
4901                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4902                                      tcg_shift, tcg_fpstatus);
4903             } else {
4904                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4905                                      tcg_shift, tcg_fpstatus);
4906             }
4907             write_fp_sreg(s, rd, tcg_single);
4908             tcg_temp_free_i32(tcg_single);
4909         }
4910     } else {
4911         TCGv_i64 tcg_int = cpu_reg(s, rd);
4912         TCGv_i32 tcg_rmode;
4913
4914         if (extract32(opcode, 2, 1)) {
4915             /* There are too many rounding modes to all fit into rmode,
4916              * so FCVTA[US] is a special case.
4917              */
4918             rmode = FPROUNDING_TIEAWAY;
4919         }
4920
4921         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4922
4923         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4924
4925         if (is_double) {
4926             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4927             if (is_signed) {
4928                 if (!sf) {
4929                     gen_helper_vfp_tosld(tcg_int, tcg_double,
4930                                          tcg_shift, tcg_fpstatus);
4931                 } else {
4932                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
4933                                          tcg_shift, tcg_fpstatus);
4934                 }
4935             } else {
4936                 if (!sf) {
4937                     gen_helper_vfp_tould(tcg_int, tcg_double,
4938                                          tcg_shift, tcg_fpstatus);
4939                 } else {
4940                     gen_helper_vfp_touqd(tcg_int, tcg_double,
4941                                          tcg_shift, tcg_fpstatus);
4942                 }
4943             }
4944             tcg_temp_free_i64(tcg_double);
4945         } else {
4946             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4947             if (sf) {
4948                 if (is_signed) {
4949                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
4950                                          tcg_shift, tcg_fpstatus);
4951                 } else {
4952                     gen_helper_vfp_touqs(tcg_int, tcg_single,
4953                                          tcg_shift, tcg_fpstatus);
4954                 }
4955             } else {
4956                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4957                 if (is_signed) {
4958                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
4959                                          tcg_shift, tcg_fpstatus);
4960                 } else {
4961                     gen_helper_vfp_touls(tcg_dest, tcg_single,
4962                                          tcg_shift, tcg_fpstatus);
4963                 }
4964                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4965                 tcg_temp_free_i32(tcg_dest);
4966             }
4967             tcg_temp_free_i32(tcg_single);
4968         }
4969
4970         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4971         tcg_temp_free_i32(tcg_rmode);
4972
4973         if (!sf) {
4974             tcg_gen_ext32u_i64(tcg_int, tcg_int);
4975         }
4976     }
4977
4978     tcg_temp_free_ptr(tcg_fpstatus);
4979     tcg_temp_free_i32(tcg_shift);
4980 }
4981
4982 /* C3.6.29 Floating point <-> fixed point conversions
4983  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4984  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4985  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4986  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4987  */
4988 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4989 {
4990     int rd = extract32(insn, 0, 5);
4991     int rn = extract32(insn, 5, 5);
4992     int scale = extract32(insn, 10, 6);
4993     int opcode = extract32(insn, 16, 3);
4994     int rmode = extract32(insn, 19, 2);
4995     int type = extract32(insn, 22, 2);
4996     bool sbit = extract32(insn, 29, 1);
4997     bool sf = extract32(insn, 31, 1);
4998     bool itof;
4999
5000     if (sbit || (type > 1)
5001         || (!sf && scale < 32)) {
5002         unallocated_encoding(s);
5003         return;
5004     }
5005
5006     switch ((rmode << 3) | opcode) {
5007     case 0x2: /* SCVTF */
5008     case 0x3: /* UCVTF */
5009         itof = true;
5010         break;
5011     case 0x18: /* FCVTZS */
5012     case 0x19: /* FCVTZU */
5013         itof = false;
5014         break;
5015     default:
5016         unallocated_encoding(s);
5017         return;
5018     }
5019
5020     if (!fp_access_check(s)) {
5021         return;
5022     }
5023
5024     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5025 }
5026
5027 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5028 {
5029     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5030      * without conversion.
5031      */
5032
5033     if (itof) {
5034         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5035
5036         switch (type) {
5037         case 0:
5038         {
5039             /* 32 bit */
5040             TCGv_i64 tmp = tcg_temp_new_i64();
5041             tcg_gen_ext32u_i64(tmp, tcg_rn);
5042             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5043             tcg_gen_movi_i64(tmp, 0);
5044             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5045             tcg_temp_free_i64(tmp);
5046             break;
5047         }
5048         case 1:
5049         {
5050             /* 64 bit */
5051             TCGv_i64 tmp = tcg_const_i64(0);
5052             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5053             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5054             tcg_temp_free_i64(tmp);
5055             break;
5056         }
5057         case 2:
5058             /* 64 bit to top half. */
5059             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5060             break;
5061         }
5062     } else {
5063         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5064
5065         switch (type) {
5066         case 0:
5067             /* 32 bit */
5068             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5069             break;
5070         case 1:
5071             /* 64 bit */
5072             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5073             break;
5074         case 2:
5075             /* 64 bits from top half */
5076             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5077             break;
5078         }
5079     }
5080 }
5081
5082 /* C3.6.30 Floating point <-> integer conversions
5083  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5084  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5085  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5086  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5087  */
5088 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5089 {
5090     int rd = extract32(insn, 0, 5);
5091     int rn = extract32(insn, 5, 5);
5092     int opcode = extract32(insn, 16, 3);
5093     int rmode = extract32(insn, 19, 2);
5094     int type = extract32(insn, 22, 2);
5095     bool sbit = extract32(insn, 29, 1);
5096     bool sf = extract32(insn, 31, 1);
5097
5098     if (sbit) {
5099         unallocated_encoding(s);
5100         return;
5101     }
5102
5103     if (opcode > 5) {
5104         /* FMOV */
5105         bool itof = opcode & 1;
5106
5107         if (rmode >= 2) {
5108             unallocated_encoding(s);
5109             return;
5110         }
5111
5112         switch (sf << 3 | type << 1 | rmode) {
5113         case 0x0: /* 32 bit */
5114         case 0xa: /* 64 bit */
5115         case 0xd: /* 64 bit to top half of quad */
5116             break;
5117         default:
5118             /* all other sf/type/rmode combinations are invalid */
5119             unallocated_encoding(s);
5120             break;
5121         }
5122
5123         if (!fp_access_check(s)) {
5124             return;
5125         }
5126         handle_fmov(s, rd, rn, type, itof);
5127     } else {
5128         /* actual FP conversions */
5129         bool itof = extract32(opcode, 1, 1);
5130
5131         if (type > 1 || (rmode != 0 && opcode > 1)) {
5132             unallocated_encoding(s);
5133             return;
5134         }
5135
5136         if (!fp_access_check(s)) {
5137             return;
5138         }
5139         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5140     }
5141 }
5142
5143 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5144  *   31  30  29 28     25 24                          0
5145  * +---+---+---+---------+-----------------------------+
5146  * |   | 0 |   | 1 1 1 1 |                             |
5147  * +---+---+---+---------+-----------------------------+
5148  */
5149 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5150 {
5151     if (extract32(insn, 24, 1)) {
5152         /* Floating point data-processing (3 source) */
5153         disas_fp_3src(s, insn);
5154     } else if (extract32(insn, 21, 1) == 0) {
5155         /* Floating point to fixed point conversions */
5156         disas_fp_fixed_conv(s, insn);
5157     } else {
5158         switch (extract32(insn, 10, 2)) {
5159         case 1:
5160             /* Floating point conditional compare */
5161             disas_fp_ccomp(s, insn);
5162             break;
5163         case 2:
5164             /* Floating point data-processing (2 source) */
5165             disas_fp_2src(s, insn);
5166             break;
5167         case 3:
5168             /* Floating point conditional select */
5169             disas_fp_csel(s, insn);
5170             break;
5171         case 0:
5172             switch (ctz32(extract32(insn, 12, 4))) {
5173             case 0: /* [15:12] == xxx1 */
5174                 /* Floating point immediate */
5175                 disas_fp_imm(s, insn);
5176                 break;
5177             case 1: /* [15:12] == xx10 */
5178                 /* Floating point compare */
5179                 disas_fp_compare(s, insn);
5180                 break;
5181             case 2: /* [15:12] == x100 */
5182                 /* Floating point data-processing (1 source) */
5183                 disas_fp_1src(s, insn);
5184                 break;
5185             case 3: /* [15:12] == 1000 */
5186                 unallocated_encoding(s);
5187                 break;
5188             default: /* [15:12] == 0000 */
5189                 /* Floating point <-> integer conversions */
5190                 disas_fp_int_conv(s, insn);
5191                 break;
5192             }
5193             break;
5194         }
5195     }
5196 }
5197
5198 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5199                      int pos)
5200 {
5201     /* Extract 64 bits from the middle of two concatenated 64 bit
5202      * vector register slices left:right. The extracted bits start
5203      * at 'pos' bits into the right (least significant) side.
5204      * We return the result in tcg_right, and guarantee not to
5205      * trash tcg_left.
5206      */
5207     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5208     assert(pos > 0 && pos < 64);
5209
5210     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5211     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5212     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5213
5214     tcg_temp_free_i64(tcg_tmp);
5215 }
5216
5217 /* C3.6.1 EXT
5218  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5219  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5220  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5221  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5222  */
5223 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5224 {
5225     int is_q = extract32(insn, 30, 1);
5226     int op2 = extract32(insn, 22, 2);
5227     int imm4 = extract32(insn, 11, 4);
5228     int rm = extract32(insn, 16, 5);
5229     int rn = extract32(insn, 5, 5);
5230     int rd = extract32(insn, 0, 5);
5231     int pos = imm4 << 3;
5232     TCGv_i64 tcg_resl, tcg_resh;
5233
5234     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5235         unallocated_encoding(s);
5236         return;
5237     }
5238
5239     if (!fp_access_check(s)) {
5240         return;
5241     }
5242
5243     tcg_resh = tcg_temp_new_i64();
5244     tcg_resl = tcg_temp_new_i64();
5245
5246     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5247      * either extracting 128 bits from a 128:128 concatenation, or
5248      * extracting 64 bits from a 64:64 concatenation.
5249      */
5250     if (!is_q) {
5251         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5252         if (pos != 0) {
5253             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5254             do_ext64(s, tcg_resh, tcg_resl, pos);
5255         }
5256         tcg_gen_movi_i64(tcg_resh, 0);
5257     } else {
5258         TCGv_i64 tcg_hh;
5259         typedef struct {
5260             int reg;
5261             int elt;
5262         } EltPosns;
5263         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5264         EltPosns *elt = eltposns;
5265
5266         if (pos >= 64) {
5267             elt++;
5268             pos -= 64;
5269         }
5270
5271         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5272         elt++;
5273         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5274         elt++;
5275         if (pos != 0) {
5276             do_ext64(s, tcg_resh, tcg_resl, pos);
5277             tcg_hh = tcg_temp_new_i64();
5278             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5279             do_ext64(s, tcg_hh, tcg_resh, pos);
5280             tcg_temp_free_i64(tcg_hh);
5281         }
5282     }
5283
5284     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5285     tcg_temp_free_i64(tcg_resl);
5286     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5287     tcg_temp_free_i64(tcg_resh);
5288 }
5289
5290 /* C3.6.2 TBL/TBX
5291  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5292  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5293  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5294  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5295  */
5296 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5297 {
5298     int op2 = extract32(insn, 22, 2);
5299     int is_q = extract32(insn, 30, 1);
5300     int rm = extract32(insn, 16, 5);
5301     int rn = extract32(insn, 5, 5);
5302     int rd = extract32(insn, 0, 5);
5303     int is_tblx = extract32(insn, 12, 1);
5304     int len = extract32(insn, 13, 2);
5305     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5306     TCGv_i32 tcg_regno, tcg_numregs;
5307
5308     if (op2 != 0) {
5309         unallocated_encoding(s);
5310         return;
5311     }
5312
5313     if (!fp_access_check(s)) {
5314         return;
5315     }
5316
5317     /* This does a table lookup: for every byte element in the input
5318      * we index into a table formed from up to four vector registers,
5319      * and then the output is the result of the lookups. Our helper
5320      * function does the lookup operation for a single 64 bit part of
5321      * the input.
5322      */
5323     tcg_resl = tcg_temp_new_i64();
5324     tcg_resh = tcg_temp_new_i64();
5325
5326     if (is_tblx) {
5327         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5328     } else {
5329         tcg_gen_movi_i64(tcg_resl, 0);
5330     }
5331     if (is_tblx && is_q) {
5332         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5333     } else {
5334         tcg_gen_movi_i64(tcg_resh, 0);
5335     }
5336
5337     tcg_idx = tcg_temp_new_i64();
5338     tcg_regno = tcg_const_i32(rn);
5339     tcg_numregs = tcg_const_i32(len + 1);
5340     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5341     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5342                         tcg_regno, tcg_numregs);
5343     if (is_q) {
5344         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5345         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5346                             tcg_regno, tcg_numregs);
5347     }
5348     tcg_temp_free_i64(tcg_idx);
5349     tcg_temp_free_i32(tcg_regno);
5350     tcg_temp_free_i32(tcg_numregs);
5351
5352     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5353     tcg_temp_free_i64(tcg_resl);
5354     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5355     tcg_temp_free_i64(tcg_resh);
5356 }
5357
5358 /* C3.6.3 ZIP/UZP/TRN
5359  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5360  * +---+---+-------------+------+---+------+---+------------------+------+
5361  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5362  * +---+---+-------------+------+---+------+---+------------------+------+
5363  */
5364 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5365 {
5366     int rd = extract32(insn, 0, 5);
5367     int rn = extract32(insn, 5, 5);
5368     int rm = extract32(insn, 16, 5);
5369     int size = extract32(insn, 22, 2);
5370     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5371      * bit 2 indicates 1 vs 2 variant of the insn.
5372      */
5373     int opcode = extract32(insn, 12, 2);
5374     bool part = extract32(insn, 14, 1);
5375     bool is_q = extract32(insn, 30, 1);
5376     int esize = 8 << size;
5377     int i, ofs;
5378     int datasize = is_q ? 128 : 64;
5379     int elements = datasize / esize;
5380     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5381
5382     if (opcode == 0 || (size == 3 && !is_q)) {
5383         unallocated_encoding(s);
5384         return;
5385     }
5386
5387     if (!fp_access_check(s)) {
5388         return;
5389     }
5390
5391     tcg_resl = tcg_const_i64(0);
5392     tcg_resh = tcg_const_i64(0);
5393     tcg_res = tcg_temp_new_i64();
5394
5395     for (i = 0; i < elements; i++) {
5396         switch (opcode) {
5397         case 1: /* UZP1/2 */
5398         {
5399             int midpoint = elements / 2;
5400             if (i < midpoint) {
5401                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5402             } else {
5403                 read_vec_element(s, tcg_res, rm,
5404                                  2 * (i - midpoint) + part, size);
5405             }
5406             break;
5407         }
5408         case 2: /* TRN1/2 */
5409             if (i & 1) {
5410                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5411             } else {
5412                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5413             }
5414             break;
5415         case 3: /* ZIP1/2 */
5416         {
5417             int base = part * elements / 2;
5418             if (i & 1) {
5419                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5420             } else {
5421                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5422             }
5423             break;
5424         }
5425         default:
5426             g_assert_not_reached();
5427         }
5428
5429         ofs = i * esize;
5430         if (ofs < 64) {
5431             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5432             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5433         } else {
5434             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5435             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5436         }
5437     }
5438
5439     tcg_temp_free_i64(tcg_res);
5440
5441     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5442     tcg_temp_free_i64(tcg_resl);
5443     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5444     tcg_temp_free_i64(tcg_resh);
5445 }
5446
5447 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5448                         int opc, bool is_min, TCGv_ptr fpst)
5449 {
5450     /* Helper function for disas_simd_across_lanes: do a single precision
5451      * min/max operation on the specified two inputs,
5452      * and return the result in tcg_elt1.
5453      */
5454     if (opc == 0xc) {
5455         if (is_min) {
5456             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5457         } else {
5458             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5459         }
5460     } else {
5461         assert(opc == 0xf);
5462         if (is_min) {
5463             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5464         } else {
5465             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5466         }
5467     }
5468 }
5469
5470 /* C3.6.4 AdvSIMD across lanes
5471  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5472  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5473  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5474  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5475  */
5476 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5477 {
5478     int rd = extract32(insn, 0, 5);
5479     int rn = extract32(insn, 5, 5);
5480     int size = extract32(insn, 22, 2);
5481     int opcode = extract32(insn, 12, 5);
5482     bool is_q = extract32(insn, 30, 1);
5483     bool is_u = extract32(insn, 29, 1);
5484     bool is_fp = false;
5485     bool is_min = false;
5486     int esize;
5487     int elements;
5488     int i;
5489     TCGv_i64 tcg_res, tcg_elt;
5490
5491     switch (opcode) {
5492     case 0x1b: /* ADDV */
5493         if (is_u) {
5494             unallocated_encoding(s);
5495             return;
5496         }
5497         /* fall through */
5498     case 0x3: /* SADDLV, UADDLV */
5499     case 0xa: /* SMAXV, UMAXV */
5500     case 0x1a: /* SMINV, UMINV */
5501         if (size == 3 || (size == 2 && !is_q)) {
5502             unallocated_encoding(s);
5503             return;
5504         }
5505         break;
5506     case 0xc: /* FMAXNMV, FMINNMV */
5507     case 0xf: /* FMAXV, FMINV */
5508         if (!is_u || !is_q || extract32(size, 0, 1)) {
5509             unallocated_encoding(s);
5510             return;
5511         }
5512         /* Bit 1 of size field encodes min vs max, and actual size is always
5513          * 32 bits: adjust the size variable so following code can rely on it
5514          */
5515         is_min = extract32(size, 1, 1);
5516         is_fp = true;
5517         size = 2;
5518         break;
5519     default:
5520         unallocated_encoding(s);
5521         return;
5522     }
5523
5524     if (!fp_access_check(s)) {
5525         return;
5526     }
5527
5528     esize = 8 << size;
5529     elements = (is_q ? 128 : 64) / esize;
5530
5531     tcg_res = tcg_temp_new_i64();
5532     tcg_elt = tcg_temp_new_i64();
5533
5534     /* These instructions operate across all lanes of a vector
5535      * to produce a single result. We can guarantee that a 64
5536      * bit intermediate is sufficient:
5537      *  + for [US]ADDLV the maximum element size is 32 bits, and
5538      *    the result type is 64 bits
5539      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5540      *    same as the element size, which is 32 bits at most
5541      * For the integer operations we can choose to work at 64
5542      * or 32 bits and truncate at the end; for simplicity
5543      * we use 64 bits always. The floating point
5544      * ops do require 32 bit intermediates, though.
5545      */
5546     if (!is_fp) {
5547         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5548
5549         for (i = 1; i < elements; i++) {
5550             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5551
5552             switch (opcode) {
5553             case 0x03: /* SADDLV / UADDLV */
5554             case 0x1b: /* ADDV */
5555                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5556                 break;
5557             case 0x0a: /* SMAXV / UMAXV */
5558                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5559                                     tcg_res,
5560                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5561                 break;
5562             case 0x1a: /* SMINV / UMINV */
5563                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5564                                     tcg_res,
5565                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5566                 break;
5567                 break;
5568             default:
5569                 g_assert_not_reached();
5570             }
5571
5572         }
5573     } else {
5574         /* Floating point ops which work on 32 bit (single) intermediates.
5575          * Note that correct NaN propagation requires that we do these
5576          * operations in exactly the order specified by the pseudocode.
5577          */
5578         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5579         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5580         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5581         TCGv_ptr fpst = get_fpstatus_ptr();
5582
5583         assert(esize == 32);
5584         assert(elements == 4);
5585
5586         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5587         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5588         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5589         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5590
5591         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5592
5593         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5594         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5595         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5596         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5597
5598         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5599
5600         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5601
5602         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5603         tcg_temp_free_i32(tcg_elt1);
5604         tcg_temp_free_i32(tcg_elt2);
5605         tcg_temp_free_i32(tcg_elt3);
5606         tcg_temp_free_ptr(fpst);
5607     }
5608
5609     tcg_temp_free_i64(tcg_elt);
5610
5611     /* Now truncate the result to the width required for the final output */
5612     if (opcode == 0x03) {
5613         /* SADDLV, UADDLV: result is 2*esize */
5614         size++;
5615     }
5616
5617     switch (size) {
5618     case 0:
5619         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5620         break;
5621     case 1:
5622         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5623         break;
5624     case 2:
5625         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5626         break;
5627     case 3:
5628         break;
5629     default:
5630         g_assert_not_reached();
5631     }
5632
5633     write_fp_dreg(s, rd, tcg_res);
5634     tcg_temp_free_i64(tcg_res);
5635 }
5636
5637 /* C6.3.31 DUP (Element, Vector)
5638  *
5639  *  31  30   29              21 20    16 15        10  9    5 4    0
5640  * +---+---+-------------------+--------+-------------+------+------+
5641  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5642  * +---+---+-------------------+--------+-------------+------+------+
5643  *
5644  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5645  */
5646 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5647                              int imm5)
5648 {
5649     int size = ctz32(imm5);
5650     int esize = 8 << size;
5651     int elements = (is_q ? 128 : 64) / esize;
5652     int index, i;
5653     TCGv_i64 tmp;
5654
5655     if (size > 3 || (size == 3 && !is_q)) {
5656         unallocated_encoding(s);
5657         return;
5658     }
5659
5660     if (!fp_access_check(s)) {
5661         return;
5662     }
5663
5664     index = imm5 >> (size + 1);
5665
5666     tmp = tcg_temp_new_i64();
5667     read_vec_element(s, tmp, rn, index, size);
5668
5669     for (i = 0; i < elements; i++) {
5670         write_vec_element(s, tmp, rd, i, size);
5671     }
5672
5673     if (!is_q) {
5674         clear_vec_high(s, rd);
5675     }
5676
5677     tcg_temp_free_i64(tmp);
5678 }
5679
5680 /* C6.3.31 DUP (element, scalar)
5681  *  31                   21 20    16 15        10  9    5 4    0
5682  * +-----------------------+--------+-------------+------+------+
5683  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5684  * +-----------------------+--------+-------------+------+------+
5685  */
5686 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5687                               int imm5)
5688 {
5689     int size = ctz32(imm5);
5690     int index;
5691     TCGv_i64 tmp;
5692
5693     if (size > 3) {
5694         unallocated_encoding(s);
5695         return;
5696     }
5697
5698     if (!fp_access_check(s)) {
5699         return;
5700     }
5701
5702     index = imm5 >> (size + 1);
5703
5704     /* This instruction just extracts the specified element and
5705      * zero-extends it into the bottom of the destination register.
5706      */
5707     tmp = tcg_temp_new_i64();
5708     read_vec_element(s, tmp, rn, index, size);
5709     write_fp_dreg(s, rd, tmp);
5710     tcg_temp_free_i64(tmp);
5711 }
5712
5713 /* C6.3.32 DUP (General)
5714  *
5715  *  31  30   29              21 20    16 15        10  9    5 4    0
5716  * +---+---+-------------------+--------+-------------+------+------+
5717  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5718  * +---+---+-------------------+--------+-------------+------+------+
5719  *
5720  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5721  */
5722 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5723                              int imm5)
5724 {
5725     int size = ctz32(imm5);
5726     int esize = 8 << size;
5727     int elements = (is_q ? 128 : 64)/esize;
5728     int i = 0;
5729
5730     if (size > 3 || ((size == 3) && !is_q)) {
5731         unallocated_encoding(s);
5732         return;
5733     }
5734
5735     if (!fp_access_check(s)) {
5736         return;
5737     }
5738
5739     for (i = 0; i < elements; i++) {
5740         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5741     }
5742     if (!is_q) {
5743         clear_vec_high(s, rd);
5744     }
5745 }
5746
5747 /* C6.3.150 INS (Element)
5748  *
5749  *  31                   21 20    16 15  14    11  10 9    5 4    0
5750  * +-----------------------+--------+------------+---+------+------+
5751  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5752  * +-----------------------+--------+------------+---+------+------+
5753  *
5754  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5755  * index: encoded in imm5<4:size+1>
5756  */
5757 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5758                              int imm4, int imm5)
5759 {
5760     int size = ctz32(imm5);
5761     int src_index, dst_index;
5762     TCGv_i64 tmp;
5763
5764     if (size > 3) {
5765         unallocated_encoding(s);
5766         return;
5767     }
5768
5769     if (!fp_access_check(s)) {
5770         return;
5771     }
5772
5773     dst_index = extract32(imm5, 1+size, 5);
5774     src_index = extract32(imm4, size, 4);
5775
5776     tmp = tcg_temp_new_i64();
5777
5778     read_vec_element(s, tmp, rn, src_index, size);
5779     write_vec_element(s, tmp, rd, dst_index, size);
5780
5781     tcg_temp_free_i64(tmp);
5782 }
5783
5784
5785 /* C6.3.151 INS (General)
5786  *
5787  *  31                   21 20    16 15        10  9    5 4    0
5788  * +-----------------------+--------+-------------+------+------+
5789  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5790  * +-----------------------+--------+-------------+------+------+
5791  *
5792  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5793  * index: encoded in imm5<4:size+1>
5794  */
5795 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5796 {
5797     int size = ctz32(imm5);
5798     int idx;
5799
5800     if (size > 3) {
5801         unallocated_encoding(s);
5802         return;
5803     }
5804
5805     if (!fp_access_check(s)) {
5806         return;
5807     }
5808
5809     idx = extract32(imm5, 1 + size, 4 - size);
5810     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5811 }
5812
5813 /*
5814  * C6.3.321 UMOV (General)
5815  * C6.3.237 SMOV (General)
5816  *
5817  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5818  * +---+---+-------------------+--------+-------------+------+------+
5819  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5820  * +---+---+-------------------+--------+-------------+------+------+
5821  *
5822  * U: unsigned when set
5823  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5824  */
5825 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5826                                   int rn, int rd, int imm5)
5827 {
5828     int size = ctz32(imm5);
5829     int element;
5830     TCGv_i64 tcg_rd;
5831
5832     /* Check for UnallocatedEncodings */
5833     if (is_signed) {
5834         if (size > 2 || (size == 2 && !is_q)) {
5835             unallocated_encoding(s);
5836             return;
5837         }
5838     } else {
5839         if (size > 3
5840             || (size < 3 && is_q)
5841             || (size == 3 && !is_q)) {
5842             unallocated_encoding(s);
5843             return;
5844         }
5845     }
5846
5847     if (!fp_access_check(s)) {
5848         return;
5849     }
5850
5851     element = extract32(imm5, 1+size, 4);
5852
5853     tcg_rd = cpu_reg(s, rd);
5854     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5855     if (is_signed && !is_q) {
5856         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5857     }
5858 }
5859
5860 /* C3.6.5 AdvSIMD copy
5861  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5862  * +---+---+----+-----------------+------+---+------+---+------+------+
5863  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5864  * +---+---+----+-----------------+------+---+------+---+------+------+
5865  */
5866 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5867 {
5868     int rd = extract32(insn, 0, 5);
5869     int rn = extract32(insn, 5, 5);
5870     int imm4 = extract32(insn, 11, 4);
5871     int op = extract32(insn, 29, 1);
5872     int is_q = extract32(insn, 30, 1);
5873     int imm5 = extract32(insn, 16, 5);
5874
5875     if (op) {
5876         if (is_q) {
5877             /* INS (element) */
5878             handle_simd_inse(s, rd, rn, imm4, imm5);
5879         } else {
5880             unallocated_encoding(s);
5881         }
5882     } else {
5883         switch (imm4) {
5884         case 0:
5885             /* DUP (element - vector) */
5886             handle_simd_dupe(s, is_q, rd, rn, imm5);
5887             break;
5888         case 1:
5889             /* DUP (general) */
5890             handle_simd_dupg(s, is_q, rd, rn, imm5);
5891             break;
5892         case 3:
5893             if (is_q) {
5894                 /* INS (general) */
5895                 handle_simd_insg(s, rd, rn, imm5);
5896             } else {
5897                 unallocated_encoding(s);
5898             }
5899             break;
5900         case 5:
5901         case 7:
5902             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5903             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5904             break;
5905         default:
5906             unallocated_encoding(s);
5907             break;
5908         }
5909     }
5910 }
5911
5912 /* C3.6.6 AdvSIMD modified immediate
5913  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5914  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5915  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5916  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5917  *
5918  * There are a number of operations that can be carried out here:
5919  *   MOVI - move (shifted) imm into register
5920  *   MVNI - move inverted (shifted) imm into register
5921  *   ORR  - bitwise OR of (shifted) imm with register
5922  *   BIC  - bitwise clear of (shifted) imm with register
5923  */
5924 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5925 {
5926     int rd = extract32(insn, 0, 5);
5927     int cmode = extract32(insn, 12, 4);
5928     int cmode_3_1 = extract32(cmode, 1, 3);
5929     int cmode_0 = extract32(cmode, 0, 1);
5930     int o2 = extract32(insn, 11, 1);
5931     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5932     bool is_neg = extract32(insn, 29, 1);
5933     bool is_q = extract32(insn, 30, 1);
5934     uint64_t imm = 0;
5935     TCGv_i64 tcg_rd, tcg_imm;
5936     int i;
5937
5938     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5939         unallocated_encoding(s);
5940         return;
5941     }
5942
5943     if (!fp_access_check(s)) {
5944         return;
5945     }
5946
5947     /* See AdvSIMDExpandImm() in ARM ARM */
5948     switch (cmode_3_1) {
5949     case 0: /* Replicate(Zeros(24):imm8, 2) */
5950     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5951     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5952     case 3: /* Replicate(imm8:Zeros(24), 2) */
5953     {
5954         int shift = cmode_3_1 * 8;
5955         imm = bitfield_replicate(abcdefgh << shift, 32);
5956         break;
5957     }
5958     case 4: /* Replicate(Zeros(8):imm8, 4) */
5959     case 5: /* Replicate(imm8:Zeros(8), 4) */
5960     {
5961         int shift = (cmode_3_1 & 0x1) * 8;
5962         imm = bitfield_replicate(abcdefgh << shift, 16);
5963         break;
5964     }
5965     case 6:
5966         if (cmode_0) {
5967             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5968             imm = (abcdefgh << 16) | 0xffff;
5969         } else {
5970             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5971             imm = (abcdefgh << 8) | 0xff;
5972         }
5973         imm = bitfield_replicate(imm, 32);
5974         break;
5975     case 7:
5976         if (!cmode_0 && !is_neg) {
5977             imm = bitfield_replicate(abcdefgh, 8);
5978         } else if (!cmode_0 && is_neg) {
5979             int i;
5980             imm = 0;
5981             for (i = 0; i < 8; i++) {
5982                 if ((abcdefgh) & (1 << i)) {
5983                     imm |= 0xffULL << (i * 8);
5984                 }
5985             }
5986         } else if (cmode_0) {
5987             if (is_neg) {
5988                 imm = (abcdefgh & 0x3f) << 48;
5989                 if (abcdefgh & 0x80) {
5990                     imm |= 0x8000000000000000ULL;
5991                 }
5992                 if (abcdefgh & 0x40) {
5993                     imm |= 0x3fc0000000000000ULL;
5994                 } else {
5995                     imm |= 0x4000000000000000ULL;
5996                 }
5997             } else {
5998                 imm = (abcdefgh & 0x3f) << 19;
5999                 if (abcdefgh & 0x80) {
6000                     imm |= 0x80000000;
6001                 }
6002                 if (abcdefgh & 0x40) {
6003                     imm |= 0x3e000000;
6004                 } else {
6005                     imm |= 0x40000000;
6006                 }
6007                 imm |= (imm << 32);
6008             }
6009         }
6010         break;
6011     }
6012
6013     if (cmode_3_1 != 7 && is_neg) {
6014         imm = ~imm;
6015     }
6016
6017     tcg_imm = tcg_const_i64(imm);
6018     tcg_rd = new_tmp_a64(s);
6019
6020     for (i = 0; i < 2; i++) {
6021         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6022
6023         if (i == 1 && !is_q) {
6024             /* non-quad ops clear high half of vector */
6025             tcg_gen_movi_i64(tcg_rd, 0);
6026         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6027             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6028             if (is_neg) {
6029                 /* AND (BIC) */
6030                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6031             } else {
6032                 /* ORR */
6033                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6034             }
6035         } else {
6036             /* MOVI */
6037             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6038         }
6039         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6040     }
6041
6042     tcg_temp_free_i64(tcg_imm);
6043 }
6044
6045 /* C3.6.7 AdvSIMD scalar copy
6046  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6047  * +-----+----+-----------------+------+---+------+---+------+------+
6048  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6049  * +-----+----+-----------------+------+---+------+---+------+------+
6050  */
6051 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6052 {
6053     int rd = extract32(insn, 0, 5);
6054     int rn = extract32(insn, 5, 5);
6055     int imm4 = extract32(insn, 11, 4);
6056     int imm5 = extract32(insn, 16, 5);
6057     int op = extract32(insn, 29, 1);
6058
6059     if (op != 0 || imm4 != 0) {
6060         unallocated_encoding(s);
6061         return;
6062     }
6063
6064     /* DUP (element, scalar) */
6065     handle_simd_dupes(s, rd, rn, imm5);
6066 }
6067
6068 /* C3.6.8 AdvSIMD scalar pairwise
6069  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6070  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6071  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6072  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6073  */
6074 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6075 {
6076     int u = extract32(insn, 29, 1);
6077     int size = extract32(insn, 22, 2);
6078     int opcode = extract32(insn, 12, 5);
6079     int rn = extract32(insn, 5, 5);
6080     int rd = extract32(insn, 0, 5);
6081     TCGv_ptr fpst;
6082
6083     /* For some ops (the FP ones), size[1] is part of the encoding.
6084      * For ADDP strictly it is not but size[1] is always 1 for valid
6085      * encodings.
6086      */
6087     opcode |= (extract32(size, 1, 1) << 5);
6088
6089     switch (opcode) {
6090     case 0x3b: /* ADDP */
6091         if (u || size != 3) {
6092             unallocated_encoding(s);
6093             return;
6094         }
6095         if (!fp_access_check(s)) {
6096             return;
6097         }
6098
6099         TCGV_UNUSED_PTR(fpst);
6100         break;
6101     case 0xc: /* FMAXNMP */
6102     case 0xd: /* FADDP */
6103     case 0xf: /* FMAXP */
6104     case 0x2c: /* FMINNMP */
6105     case 0x2f: /* FMINP */
6106         /* FP op, size[0] is 32 or 64 bit */
6107         if (!u) {
6108             unallocated_encoding(s);
6109             return;
6110         }
6111         if (!fp_access_check(s)) {
6112             return;
6113         }
6114
6115         size = extract32(size, 0, 1) ? 3 : 2;
6116         fpst = get_fpstatus_ptr();
6117         break;
6118     default:
6119         unallocated_encoding(s);
6120         return;
6121     }
6122
6123     if (size == 3) {
6124         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6125         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6126         TCGv_i64 tcg_res = tcg_temp_new_i64();
6127
6128         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6129         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6130
6131         switch (opcode) {
6132         case 0x3b: /* ADDP */
6133             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6134             break;
6135         case 0xc: /* FMAXNMP */
6136             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6137             break;
6138         case 0xd: /* FADDP */
6139             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6140             break;
6141         case 0xf: /* FMAXP */
6142             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6143             break;
6144         case 0x2c: /* FMINNMP */
6145             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6146             break;
6147         case 0x2f: /* FMINP */
6148             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6149             break;
6150         default:
6151             g_assert_not_reached();
6152         }
6153
6154         write_fp_dreg(s, rd, tcg_res);
6155
6156         tcg_temp_free_i64(tcg_op1);
6157         tcg_temp_free_i64(tcg_op2);
6158         tcg_temp_free_i64(tcg_res);
6159     } else {
6160         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6161         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6162         TCGv_i32 tcg_res = tcg_temp_new_i32();
6163
6164         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6165         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6166
6167         switch (opcode) {
6168         case 0xc: /* FMAXNMP */
6169             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6170             break;
6171         case 0xd: /* FADDP */
6172             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6173             break;
6174         case 0xf: /* FMAXP */
6175             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6176             break;
6177         case 0x2c: /* FMINNMP */
6178             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6179             break;
6180         case 0x2f: /* FMINP */
6181             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6182             break;
6183         default:
6184             g_assert_not_reached();
6185         }
6186
6187         write_fp_sreg(s, rd, tcg_res);
6188
6189         tcg_temp_free_i32(tcg_op1);
6190         tcg_temp_free_i32(tcg_op2);
6191         tcg_temp_free_i32(tcg_res);
6192     }
6193
6194     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6195         tcg_temp_free_ptr(fpst);
6196     }
6197 }
6198
6199 /*
6200  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6201  *
6202  * This code is handles the common shifting code and is used by both
6203  * the vector and scalar code.
6204  */
6205 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6206                                     TCGv_i64 tcg_rnd, bool accumulate,
6207                                     bool is_u, int size, int shift)
6208 {
6209     bool extended_result = false;
6210     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6211     int ext_lshift = 0;
6212     TCGv_i64 tcg_src_hi;
6213
6214     if (round && size == 3) {
6215         extended_result = true;
6216         ext_lshift = 64 - shift;
6217         tcg_src_hi = tcg_temp_new_i64();
6218     } else if (shift == 64) {
6219         if (!accumulate && is_u) {
6220             /* result is zero */
6221             tcg_gen_movi_i64(tcg_res, 0);
6222             return;
6223         }
6224     }
6225
6226     /* Deal with the rounding step */
6227     if (round) {
6228         if (extended_result) {
6229             TCGv_i64 tcg_zero = tcg_const_i64(0);
6230             if (!is_u) {
6231                 /* take care of sign extending tcg_res */
6232                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6233                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6234                                  tcg_src, tcg_src_hi,
6235                                  tcg_rnd, tcg_zero);
6236             } else {
6237                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6238                                  tcg_src, tcg_zero,
6239                                  tcg_rnd, tcg_zero);
6240             }
6241             tcg_temp_free_i64(tcg_zero);
6242         } else {
6243             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6244         }
6245     }
6246
6247     /* Now do the shift right */
6248     if (round && extended_result) {
6249         /* extended case, >64 bit precision required */
6250         if (ext_lshift == 0) {
6251             /* special case, only high bits matter */
6252             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6253         } else {
6254             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6255             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6256             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6257         }
6258     } else {
6259         if (is_u) {
6260             if (shift == 64) {
6261                 /* essentially shifting in 64 zeros */
6262                 tcg_gen_movi_i64(tcg_src, 0);
6263             } else {
6264                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6265             }
6266         } else {
6267             if (shift == 64) {
6268                 /* effectively extending the sign-bit */
6269                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6270             } else {
6271                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6272             }
6273         }
6274     }
6275
6276     if (accumulate) {
6277         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6278     } else {
6279         tcg_gen_mov_i64(tcg_res, tcg_src);
6280     }
6281
6282     if (extended_result) {
6283         tcg_temp_free_i64(tcg_src_hi);
6284     }
6285 }
6286
6287 /* Common SHL/SLI - Shift left with an optional insert */
6288 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6289                                  bool insert, int shift)
6290 {
6291     if (insert) { /* SLI */
6292         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6293     } else { /* SHL */
6294         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6295     }
6296 }
6297
6298 /* SRI: shift right with insert */
6299 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6300                                  int size, int shift)
6301 {
6302     int esize = 8 << size;
6303
6304     /* shift count same as element size is valid but does nothing;
6305      * special case to avoid potential shift by 64.
6306      */
6307     if (shift != esize) {
6308         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6309         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6310     }
6311 }
6312
6313 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6314 static void handle_scalar_simd_shri(DisasContext *s,
6315                                     bool is_u, int immh, int immb,
6316                                     int opcode, int rn, int rd)
6317 {
6318     const int size = 3;
6319     int immhb = immh << 3 | immb;
6320     int shift = 2 * (8 << size) - immhb;
6321     bool accumulate = false;
6322     bool round = false;
6323     bool insert = false;
6324     TCGv_i64 tcg_rn;
6325     TCGv_i64 tcg_rd;
6326     TCGv_i64 tcg_round;
6327
6328     if (!extract32(immh, 3, 1)) {
6329         unallocated_encoding(s);
6330         return;
6331     }
6332
6333     if (!fp_access_check(s)) {
6334         return;
6335     }
6336
6337     switch (opcode) {
6338     case 0x02: /* SSRA / USRA (accumulate) */
6339         accumulate = true;
6340         break;
6341     case 0x04: /* SRSHR / URSHR (rounding) */
6342         round = true;
6343         break;
6344     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6345         accumulate = round = true;
6346         break;
6347     case 0x08: /* SRI */
6348         insert = true;
6349         break;
6350     }
6351
6352     if (round) {
6353         uint64_t round_const = 1ULL << (shift - 1);
6354         tcg_round = tcg_const_i64(round_const);
6355     } else {
6356         TCGV_UNUSED_I64(tcg_round);
6357     }
6358
6359     tcg_rn = read_fp_dreg(s, rn);
6360     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6361
6362     if (insert) {
6363         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6364     } else {
6365         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6366                                 accumulate, is_u, size, shift);
6367     }
6368
6369     write_fp_dreg(s, rd, tcg_rd);
6370
6371     tcg_temp_free_i64(tcg_rn);
6372     tcg_temp_free_i64(tcg_rd);
6373     if (round) {
6374         tcg_temp_free_i64(tcg_round);
6375     }
6376 }
6377
6378 /* SHL/SLI - Scalar shift left */
6379 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6380                                     int immh, int immb, int opcode,
6381                                     int rn, int rd)
6382 {
6383     int size = 32 - clz32(immh) - 1;
6384     int immhb = immh << 3 | immb;
6385     int shift = immhb - (8 << size);
6386     TCGv_i64 tcg_rn = new_tmp_a64(s);
6387     TCGv_i64 tcg_rd = new_tmp_a64(s);
6388
6389     if (!extract32(immh, 3, 1)) {
6390         unallocated_encoding(s);
6391         return;
6392     }
6393
6394     if (!fp_access_check(s)) {
6395         return;
6396     }
6397
6398     tcg_rn = read_fp_dreg(s, rn);
6399     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6400
6401     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6402
6403     write_fp_dreg(s, rd, tcg_rd);
6404
6405     tcg_temp_free_i64(tcg_rn);
6406     tcg_temp_free_i64(tcg_rd);
6407 }
6408
6409 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6410  * (signed/unsigned) narrowing */
6411 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6412                                    bool is_u_shift, bool is_u_narrow,
6413                                    int immh, int immb, int opcode,
6414                                    int rn, int rd)
6415 {
6416     int immhb = immh << 3 | immb;
6417     int size = 32 - clz32(immh) - 1;
6418     int esize = 8 << size;
6419     int shift = (2 * esize) - immhb;
6420     int elements = is_scalar ? 1 : (64 / esize);
6421     bool round = extract32(opcode, 0, 1);
6422     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6423     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6424     TCGv_i32 tcg_rd_narrowed;
6425     TCGv_i64 tcg_final;
6426
6427     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6428         { gen_helper_neon_narrow_sat_s8,
6429           gen_helper_neon_unarrow_sat8 },
6430         { gen_helper_neon_narrow_sat_s16,
6431           gen_helper_neon_unarrow_sat16 },
6432         { gen_helper_neon_narrow_sat_s32,
6433           gen_helper_neon_unarrow_sat32 },
6434         { NULL, NULL },
6435     };
6436     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6437         gen_helper_neon_narrow_sat_u8,
6438         gen_helper_neon_narrow_sat_u16,
6439         gen_helper_neon_narrow_sat_u32,
6440         NULL
6441     };
6442     NeonGenNarrowEnvFn *narrowfn;
6443
6444     int i;
6445
6446     assert(size < 4);
6447
6448     if (extract32(immh, 3, 1)) {
6449         unallocated_encoding(s);
6450         return;
6451     }
6452
6453     if (!fp_access_check(s)) {
6454         return;
6455     }
6456
6457     if (is_u_shift) {
6458         narrowfn = unsigned_narrow_fns[size];
6459     } else {
6460         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6461     }
6462
6463     tcg_rn = tcg_temp_new_i64();
6464     tcg_rd = tcg_temp_new_i64();
6465     tcg_rd_narrowed = tcg_temp_new_i32();
6466     tcg_final = tcg_const_i64(0);
6467
6468     if (round) {
6469         uint64_t round_const = 1ULL << (shift - 1);
6470         tcg_round = tcg_const_i64(round_const);
6471     } else {
6472         TCGV_UNUSED_I64(tcg_round);
6473     }
6474
6475     for (i = 0; i < elements; i++) {
6476         read_vec_element(s, tcg_rn, rn, i, ldop);
6477         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6478                                 false, is_u_shift, size+1, shift);
6479         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6480         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6481         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6482     }
6483
6484     if (!is_q) {
6485         clear_vec_high(s, rd);
6486         write_vec_element(s, tcg_final, rd, 0, MO_64);
6487     } else {
6488         write_vec_element(s, tcg_final, rd, 1, MO_64);
6489     }
6490
6491     if (round) {
6492         tcg_temp_free_i64(tcg_round);
6493     }
6494     tcg_temp_free_i64(tcg_rn);
6495     tcg_temp_free_i64(tcg_rd);
6496     tcg_temp_free_i32(tcg_rd_narrowed);
6497     tcg_temp_free_i64(tcg_final);
6498     return;
6499 }
6500
6501 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6502 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6503                              bool src_unsigned, bool dst_unsigned,
6504                              int immh, int immb, int rn, int rd)
6505 {
6506     int immhb = immh << 3 | immb;
6507     int size = 32 - clz32(immh) - 1;
6508     int shift = immhb - (8 << size);
6509     int pass;
6510
6511     assert(immh != 0);
6512     assert(!(scalar && is_q));
6513
6514     if (!scalar) {
6515         if (!is_q && extract32(immh, 3, 1)) {
6516             unallocated_encoding(s);
6517             return;
6518         }
6519
6520         /* Since we use the variable-shift helpers we must
6521          * replicate the shift count into each element of
6522          * the tcg_shift value.
6523          */
6524         switch (size) {
6525         case 0:
6526             shift |= shift << 8;
6527             /* fall through */
6528         case 1:
6529             shift |= shift << 16;
6530             break;
6531         case 2:
6532         case 3:
6533             break;
6534         default:
6535             g_assert_not_reached();
6536         }
6537     }
6538
6539     if (!fp_access_check(s)) {
6540         return;
6541     }
6542
6543     if (size == 3) {
6544         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6545         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6546             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6547             { NULL, gen_helper_neon_qshl_u64 },
6548         };
6549         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6550         int maxpass = is_q ? 2 : 1;
6551
6552         for (pass = 0; pass < maxpass; pass++) {
6553             TCGv_i64 tcg_op = tcg_temp_new_i64();
6554
6555             read_vec_element(s, tcg_op, rn, pass, MO_64);
6556             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6557             write_vec_element(s, tcg_op, rd, pass, MO_64);
6558
6559             tcg_temp_free_i64(tcg_op);
6560         }
6561         tcg_temp_free_i64(tcg_shift);
6562
6563         if (!is_q) {
6564             clear_vec_high(s, rd);
6565         }
6566     } else {
6567         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6568         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6569             {
6570                 { gen_helper_neon_qshl_s8,
6571                   gen_helper_neon_qshl_s16,
6572                   gen_helper_neon_qshl_s32 },
6573                 { gen_helper_neon_qshlu_s8,
6574                   gen_helper_neon_qshlu_s16,
6575                   gen_helper_neon_qshlu_s32 }
6576             }, {
6577                 { NULL, NULL, NULL },
6578                 { gen_helper_neon_qshl_u8,
6579                   gen_helper_neon_qshl_u16,
6580                   gen_helper_neon_qshl_u32 }
6581             }
6582         };
6583         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6584         TCGMemOp memop = scalar ? size : MO_32;
6585         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6586
6587         for (pass = 0; pass < maxpass; pass++) {
6588             TCGv_i32 tcg_op = tcg_temp_new_i32();
6589
6590             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6591             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6592             if (scalar) {
6593                 switch (size) {
6594                 case 0:
6595                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6596                     break;
6597                 case 1:
6598                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6599                     break;
6600                 case 2:
6601                     break;
6602                 default:
6603                     g_assert_not_reached();
6604                 }
6605                 write_fp_sreg(s, rd, tcg_op);
6606             } else {
6607                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6608             }
6609
6610             tcg_temp_free_i32(tcg_op);
6611         }
6612         tcg_temp_free_i32(tcg_shift);
6613
6614         if (!is_q && !scalar) {
6615             clear_vec_high(s, rd);
6616         }
6617     }
6618 }
6619
6620 /* Common vector code for handling integer to FP conversion */
6621 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6622                                    int elements, int is_signed,
6623                                    int fracbits, int size)
6624 {
6625     bool is_double = size == 3 ? true : false;
6626     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6627     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6628     TCGv_i64 tcg_int = tcg_temp_new_i64();
6629     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6630     int pass;
6631
6632     for (pass = 0; pass < elements; pass++) {
6633         read_vec_element(s, tcg_int, rn, pass, mop);
6634
6635         if (is_double) {
6636             TCGv_i64 tcg_double = tcg_temp_new_i64();
6637             if (is_signed) {
6638                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6639                                      tcg_shift, tcg_fpst);
6640             } else {
6641                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6642                                      tcg_shift, tcg_fpst);
6643             }
6644             if (elements == 1) {
6645                 write_fp_dreg(s, rd, tcg_double);
6646             } else {
6647                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6648             }
6649             tcg_temp_free_i64(tcg_double);
6650         } else {
6651             TCGv_i32 tcg_single = tcg_temp_new_i32();
6652             if (is_signed) {
6653                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6654                                      tcg_shift, tcg_fpst);
6655             } else {
6656                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6657                                      tcg_shift, tcg_fpst);
6658             }
6659             if (elements == 1) {
6660                 write_fp_sreg(s, rd, tcg_single);
6661             } else {
6662                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6663             }
6664             tcg_temp_free_i32(tcg_single);
6665         }
6666     }
6667
6668     if (!is_double && elements == 2) {
6669         clear_vec_high(s, rd);
6670     }
6671
6672     tcg_temp_free_i64(tcg_int);
6673     tcg_temp_free_ptr(tcg_fpst);
6674     tcg_temp_free_i32(tcg_shift);
6675 }
6676
6677 /* UCVTF/SCVTF - Integer to FP conversion */
6678 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6679                                          bool is_q, bool is_u,
6680                                          int immh, int immb, int opcode,
6681                                          int rn, int rd)
6682 {
6683     bool is_double = extract32(immh, 3, 1);
6684     int size = is_double ? MO_64 : MO_32;
6685     int elements;
6686     int immhb = immh << 3 | immb;
6687     int fracbits = (is_double ? 128 : 64) - immhb;
6688
6689     if (!extract32(immh, 2, 2)) {
6690         unallocated_encoding(s);
6691         return;
6692     }
6693
6694     if (is_scalar) {
6695         elements = 1;
6696     } else {
6697         elements = is_double ? 2 : is_q ? 4 : 2;
6698         if (is_double && !is_q) {
6699             unallocated_encoding(s);
6700             return;
6701         }
6702     }
6703
6704     if (!fp_access_check(s)) {
6705         return;
6706     }
6707
6708     /* immh == 0 would be a failure of the decode logic */
6709     g_assert(immh);
6710
6711     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6712 }
6713
6714 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6715 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6716                                          bool is_q, bool is_u,
6717                                          int immh, int immb, int rn, int rd)
6718 {
6719     bool is_double = extract32(immh, 3, 1);
6720     int immhb = immh << 3 | immb;
6721     int fracbits = (is_double ? 128 : 64) - immhb;
6722     int pass;
6723     TCGv_ptr tcg_fpstatus;
6724     TCGv_i32 tcg_rmode, tcg_shift;
6725
6726     if (!extract32(immh, 2, 2)) {
6727         unallocated_encoding(s);
6728         return;
6729     }
6730
6731     if (!is_scalar && !is_q && is_double) {
6732         unallocated_encoding(s);
6733         return;
6734     }
6735
6736     if (!fp_access_check(s)) {
6737         return;
6738     }
6739
6740     assert(!(is_scalar && is_q));
6741
6742     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6743     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6744     tcg_fpstatus = get_fpstatus_ptr();
6745     tcg_shift = tcg_const_i32(fracbits);
6746
6747     if (is_double) {
6748         int maxpass = is_scalar ? 1 : 2;
6749
6750         for (pass = 0; pass < maxpass; pass++) {
6751             TCGv_i64 tcg_op = tcg_temp_new_i64();
6752
6753             read_vec_element(s, tcg_op, rn, pass, MO_64);
6754             if (is_u) {
6755                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6756             } else {
6757                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6758             }
6759             write_vec_element(s, tcg_op, rd, pass, MO_64);
6760             tcg_temp_free_i64(tcg_op);
6761         }
6762         if (!is_q) {
6763             clear_vec_high(s, rd);
6764         }
6765     } else {
6766         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6767         for (pass = 0; pass < maxpass; pass++) {
6768             TCGv_i32 tcg_op = tcg_temp_new_i32();
6769
6770             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6771             if (is_u) {
6772                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6773             } else {
6774                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6775             }
6776             if (is_scalar) {
6777                 write_fp_sreg(s, rd, tcg_op);
6778             } else {
6779                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6780             }
6781             tcg_temp_free_i32(tcg_op);
6782         }
6783         if (!is_q && !is_scalar) {
6784             clear_vec_high(s, rd);
6785         }
6786     }
6787
6788     tcg_temp_free_ptr(tcg_fpstatus);
6789     tcg_temp_free_i32(tcg_shift);
6790     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6791     tcg_temp_free_i32(tcg_rmode);
6792 }
6793
6794 /* C3.6.9 AdvSIMD scalar shift by immediate
6795  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6796  * +-----+---+-------------+------+------+--------+---+------+------+
6797  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6798  * +-----+---+-------------+------+------+--------+---+------+------+
6799  *
6800  * This is the scalar version so it works on a fixed sized registers
6801  */
6802 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6803 {
6804     int rd = extract32(insn, 0, 5);
6805     int rn = extract32(insn, 5, 5);
6806     int opcode = extract32(insn, 11, 5);
6807     int immb = extract32(insn, 16, 3);
6808     int immh = extract32(insn, 19, 4);
6809     bool is_u = extract32(insn, 29, 1);
6810
6811     if (immh == 0) {
6812         unallocated_encoding(s);
6813         return;
6814     }
6815
6816     switch (opcode) {
6817     case 0x08: /* SRI */
6818         if (!is_u) {
6819             unallocated_encoding(s);
6820             return;
6821         }
6822         /* fall through */
6823     case 0x00: /* SSHR / USHR */
6824     case 0x02: /* SSRA / USRA */
6825     case 0x04: /* SRSHR / URSHR */
6826     case 0x06: /* SRSRA / URSRA */
6827         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6828         break;
6829     case 0x0a: /* SHL / SLI */
6830         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6831         break;
6832     case 0x1c: /* SCVTF, UCVTF */
6833         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6834                                      opcode, rn, rd);
6835         break;
6836     case 0x10: /* SQSHRUN, SQSHRUN2 */
6837     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6838         if (!is_u) {
6839             unallocated_encoding(s);
6840             return;
6841         }
6842         handle_vec_simd_sqshrn(s, true, false, false, true,
6843                                immh, immb, opcode, rn, rd);
6844         break;
6845     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6846     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6847         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6848                                immh, immb, opcode, rn, rd);
6849         break;
6850     case 0xc: /* SQSHLU */
6851         if (!is_u) {
6852             unallocated_encoding(s);
6853             return;
6854         }
6855         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6856         break;
6857     case 0xe: /* SQSHL, UQSHL */
6858         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6859         break;
6860     case 0x1f: /* FCVTZS, FCVTZU */
6861         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6862         break;
6863     default:
6864         unallocated_encoding(s);
6865         break;
6866     }
6867 }
6868
6869 /* C3.6.10 AdvSIMD scalar three different
6870  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6871  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6872  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6873  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6874  */
6875 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6876 {
6877     bool is_u = extract32(insn, 29, 1);
6878     int size = extract32(insn, 22, 2);
6879     int opcode = extract32(insn, 12, 4);
6880     int rm = extract32(insn, 16, 5);
6881     int rn = extract32(insn, 5, 5);
6882     int rd = extract32(insn, 0, 5);
6883
6884     if (is_u) {
6885         unallocated_encoding(s);
6886         return;
6887     }
6888
6889     switch (opcode) {
6890     case 0x9: /* SQDMLAL, SQDMLAL2 */
6891     case 0xb: /* SQDMLSL, SQDMLSL2 */
6892     case 0xd: /* SQDMULL, SQDMULL2 */
6893         if (size == 0 || size == 3) {
6894             unallocated_encoding(s);
6895             return;
6896         }
6897         break;
6898     default:
6899         unallocated_encoding(s);
6900         return;
6901     }
6902
6903     if (!fp_access_check(s)) {
6904         return;
6905     }
6906
6907     if (size == 2) {
6908         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6909         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6910         TCGv_i64 tcg_res = tcg_temp_new_i64();
6911
6912         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6913         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6914
6915         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6916         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6917
6918         switch (opcode) {
6919         case 0xd: /* SQDMULL, SQDMULL2 */
6920             break;
6921         case 0xb: /* SQDMLSL, SQDMLSL2 */
6922             tcg_gen_neg_i64(tcg_res, tcg_res);
6923             /* fall through */
6924         case 0x9: /* SQDMLAL, SQDMLAL2 */
6925             read_vec_element(s, tcg_op1, rd, 0, MO_64);
6926             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6927                                               tcg_res, tcg_op1);
6928             break;
6929         default:
6930             g_assert_not_reached();
6931         }
6932
6933         write_fp_dreg(s, rd, tcg_res);
6934
6935         tcg_temp_free_i64(tcg_op1);
6936         tcg_temp_free_i64(tcg_op2);
6937         tcg_temp_free_i64(tcg_res);
6938     } else {
6939         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6940         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6941         TCGv_i64 tcg_res = tcg_temp_new_i64();
6942
6943         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6944         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6945
6946         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6947         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6948
6949         switch (opcode) {
6950         case 0xd: /* SQDMULL, SQDMULL2 */
6951             break;
6952         case 0xb: /* SQDMLSL, SQDMLSL2 */
6953             gen_helper_neon_negl_u32(tcg_res, tcg_res);
6954             /* fall through */
6955         case 0x9: /* SQDMLAL, SQDMLAL2 */
6956         {
6957             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6958             read_vec_element(s, tcg_op3, rd, 0, MO_32);
6959             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6960                                               tcg_res, tcg_op3);
6961             tcg_temp_free_i64(tcg_op3);
6962             break;
6963         }
6964         default:
6965             g_assert_not_reached();
6966         }
6967
6968         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6969         write_fp_dreg(s, rd, tcg_res);
6970
6971         tcg_temp_free_i32(tcg_op1);
6972         tcg_temp_free_i32(tcg_op2);
6973         tcg_temp_free_i64(tcg_res);
6974     }
6975 }
6976
6977 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6978                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6979 {
6980     /* Handle 64x64->64 opcodes which are shared between the scalar
6981      * and vector 3-same groups. We cover every opcode where size == 3
6982      * is valid in either the three-reg-same (integer, not pairwise)
6983      * or scalar-three-reg-same groups. (Some opcodes are not yet
6984      * implemented.)
6985      */
6986     TCGCond cond;
6987
6988     switch (opcode) {
6989     case 0x1: /* SQADD */
6990         if (u) {
6991             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6992         } else {
6993             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6994         }
6995         break;
6996     case 0x5: /* SQSUB */
6997         if (u) {
6998             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6999         } else {
7000             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7001         }
7002         break;
7003     case 0x6: /* CMGT, CMHI */
7004         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7005          * We implement this using setcond (test) and then negating.
7006          */
7007         cond = u ? TCG_COND_GTU : TCG_COND_GT;
7008     do_cmop:
7009         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7010         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7011         break;
7012     case 0x7: /* CMGE, CMHS */
7013         cond = u ? TCG_COND_GEU : TCG_COND_GE;
7014         goto do_cmop;
7015     case 0x11: /* CMTST, CMEQ */
7016         if (u) {
7017             cond = TCG_COND_EQ;
7018             goto do_cmop;
7019         }
7020         /* CMTST : test is "if (X & Y != 0)". */
7021         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7022         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7023         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7024         break;
7025     case 0x8: /* SSHL, USHL */
7026         if (u) {
7027             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7028         } else {
7029             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7030         }
7031         break;
7032     case 0x9: /* SQSHL, UQSHL */
7033         if (u) {
7034             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7035         } else {
7036             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7037         }
7038         break;
7039     case 0xa: /* SRSHL, URSHL */
7040         if (u) {
7041             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7042         } else {
7043             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7044         }
7045         break;
7046     case 0xb: /* SQRSHL, UQRSHL */
7047         if (u) {
7048             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7049         } else {
7050             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7051         }
7052         break;
7053     case 0x10: /* ADD, SUB */
7054         if (u) {
7055             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7056         } else {
7057             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7058         }
7059         break;
7060     default:
7061         g_assert_not_reached();
7062     }
7063 }
7064
7065 /* Handle the 3-same-operands float operations; shared by the scalar
7066  * and vector encodings. The caller must filter out any encodings
7067  * not allocated for the encoding it is dealing with.
7068  */
7069 static void handle_3same_float(DisasContext *s, int size, int elements,
7070                                int fpopcode, int rd, int rn, int rm)
7071 {
7072     int pass;
7073     TCGv_ptr fpst = get_fpstatus_ptr();
7074
7075     for (pass = 0; pass < elements; pass++) {
7076         if (size) {
7077             /* Double */
7078             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7079             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7080             TCGv_i64 tcg_res = tcg_temp_new_i64();
7081
7082             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7083             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7084
7085             switch (fpopcode) {
7086             case 0x39: /* FMLS */
7087                 /* As usual for ARM, separate negation for fused multiply-add */
7088                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7089                 /* fall through */
7090             case 0x19: /* FMLA */
7091                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7092                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7093                                        tcg_res, fpst);
7094                 break;
7095             case 0x18: /* FMAXNM */
7096                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7097                 break;
7098             case 0x1a: /* FADD */
7099                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7100                 break;
7101             case 0x1b: /* FMULX */
7102                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7103                 break;
7104             case 0x1c: /* FCMEQ */
7105                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7106                 break;
7107             case 0x1e: /* FMAX */
7108                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7109                 break;
7110             case 0x1f: /* FRECPS */
7111                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7112                 break;
7113             case 0x38: /* FMINNM */
7114                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7115                 break;
7116             case 0x3a: /* FSUB */
7117                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7118                 break;
7119             case 0x3e: /* FMIN */
7120                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7121                 break;
7122             case 0x3f: /* FRSQRTS */
7123                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7124                 break;
7125             case 0x5b: /* FMUL */
7126                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7127                 break;
7128             case 0x5c: /* FCMGE */
7129                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7130                 break;
7131             case 0x5d: /* FACGE */
7132                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7133                 break;
7134             case 0x5f: /* FDIV */
7135                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7136                 break;
7137             case 0x7a: /* FABD */
7138                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7139                 gen_helper_vfp_absd(tcg_res, tcg_res);
7140                 break;
7141             case 0x7c: /* FCMGT */
7142                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7143                 break;
7144             case 0x7d: /* FACGT */
7145                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7146                 break;
7147             default:
7148                 g_assert_not_reached();
7149             }
7150
7151             write_vec_element(s, tcg_res, rd, pass, MO_64);
7152
7153             tcg_temp_free_i64(tcg_res);
7154             tcg_temp_free_i64(tcg_op1);
7155             tcg_temp_free_i64(tcg_op2);
7156         } else {
7157             /* Single */
7158             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7159             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7160             TCGv_i32 tcg_res = tcg_temp_new_i32();
7161
7162             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7163             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7164
7165             switch (fpopcode) {
7166             case 0x39: /* FMLS */
7167                 /* As usual for ARM, separate negation for fused multiply-add */
7168                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7169                 /* fall through */
7170             case 0x19: /* FMLA */
7171                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7172                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7173                                        tcg_res, fpst);
7174                 break;
7175             case 0x1a: /* FADD */
7176                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7177                 break;
7178             case 0x1b: /* FMULX */
7179                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7180                 break;
7181             case 0x1c: /* FCMEQ */
7182                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7183                 break;
7184             case 0x1e: /* FMAX */
7185                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7186                 break;
7187             case 0x1f: /* FRECPS */
7188                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7189                 break;
7190             case 0x18: /* FMAXNM */
7191                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7192                 break;
7193             case 0x38: /* FMINNM */
7194                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7195                 break;
7196             case 0x3a: /* FSUB */
7197                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7198                 break;
7199             case 0x3e: /* FMIN */
7200                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7201                 break;
7202             case 0x3f: /* FRSQRTS */
7203                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7204                 break;
7205             case 0x5b: /* FMUL */
7206                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7207                 break;
7208             case 0x5c: /* FCMGE */
7209                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7210                 break;
7211             case 0x5d: /* FACGE */
7212                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7213                 break;
7214             case 0x5f: /* FDIV */
7215                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7216                 break;
7217             case 0x7a: /* FABD */
7218                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7219                 gen_helper_vfp_abss(tcg_res, tcg_res);
7220                 break;
7221             case 0x7c: /* FCMGT */
7222                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7223                 break;
7224             case 0x7d: /* FACGT */
7225                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7226                 break;
7227             default:
7228                 g_assert_not_reached();
7229             }
7230
7231             if (elements == 1) {
7232                 /* scalar single so clear high part */
7233                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7234
7235                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7236                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7237                 tcg_temp_free_i64(tcg_tmp);
7238             } else {
7239                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7240             }
7241
7242             tcg_temp_free_i32(tcg_res);
7243             tcg_temp_free_i32(tcg_op1);
7244             tcg_temp_free_i32(tcg_op2);
7245         }
7246     }
7247
7248     tcg_temp_free_ptr(fpst);
7249
7250     if ((elements << size) < 4) {
7251         /* scalar, or non-quad vector op */
7252         clear_vec_high(s, rd);
7253     }
7254 }
7255
7256 /* C3.6.11 AdvSIMD scalar three same
7257  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7258  * +-----+---+-----------+------+---+------+--------+---+------+------+
7259  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7260  * +-----+---+-----------+------+---+------+--------+---+------+------+
7261  */
7262 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7263 {
7264     int rd = extract32(insn, 0, 5);
7265     int rn = extract32(insn, 5, 5);
7266     int opcode = extract32(insn, 11, 5);
7267     int rm = extract32(insn, 16, 5);
7268     int size = extract32(insn, 22, 2);
7269     bool u = extract32(insn, 29, 1);
7270     TCGv_i64 tcg_rd;
7271
7272     if (opcode >= 0x18) {
7273         /* Floating point: U, size[1] and opcode indicate operation */
7274         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7275         switch (fpopcode) {
7276         case 0x1b: /* FMULX */
7277         case 0x1f: /* FRECPS */
7278         case 0x3f: /* FRSQRTS */
7279         case 0x5d: /* FACGE */
7280         case 0x7d: /* FACGT */
7281         case 0x1c: /* FCMEQ */
7282         case 0x5c: /* FCMGE */
7283         case 0x7c: /* FCMGT */
7284         case 0x7a: /* FABD */
7285             break;
7286         default:
7287             unallocated_encoding(s);
7288             return;
7289         }
7290
7291         if (!fp_access_check(s)) {
7292             return;
7293         }
7294
7295         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7296         return;
7297     }
7298
7299     switch (opcode) {
7300     case 0x1: /* SQADD, UQADD */
7301     case 0x5: /* SQSUB, UQSUB */
7302     case 0x9: /* SQSHL, UQSHL */
7303     case 0xb: /* SQRSHL, UQRSHL */
7304         break;
7305     case 0x8: /* SSHL, USHL */
7306     case 0xa: /* SRSHL, URSHL */
7307     case 0x6: /* CMGT, CMHI */
7308     case 0x7: /* CMGE, CMHS */
7309     case 0x11: /* CMTST, CMEQ */
7310     case 0x10: /* ADD, SUB (vector) */
7311         if (size != 3) {
7312             unallocated_encoding(s);
7313             return;
7314         }
7315         break;
7316     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7317         if (size != 1 && size != 2) {
7318             unallocated_encoding(s);
7319             return;
7320         }
7321         break;
7322     default:
7323         unallocated_encoding(s);
7324         return;
7325     }
7326
7327     if (!fp_access_check(s)) {
7328         return;
7329     }
7330
7331     tcg_rd = tcg_temp_new_i64();
7332
7333     if (size == 3) {
7334         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7335         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7336
7337         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7338         tcg_temp_free_i64(tcg_rn);
7339         tcg_temp_free_i64(tcg_rm);
7340     } else {
7341         /* Do a single operation on the lowest element in the vector.
7342          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7343          * no side effects for all these operations.
7344          * OPTME: special-purpose helpers would avoid doing some
7345          * unnecessary work in the helper for the 8 and 16 bit cases.
7346          */
7347         NeonGenTwoOpEnvFn *genenvfn;
7348         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7349         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7350         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7351
7352         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7353         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7354
7355         switch (opcode) {
7356         case 0x1: /* SQADD, UQADD */
7357         {
7358             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7359                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7360                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7361                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7362             };
7363             genenvfn = fns[size][u];
7364             break;
7365         }
7366         case 0x5: /* SQSUB, UQSUB */
7367         {
7368             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7369                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7370                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7371                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7372             };
7373             genenvfn = fns[size][u];
7374             break;
7375         }
7376         case 0x9: /* SQSHL, UQSHL */
7377         {
7378             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7379                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7380                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7381                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7382             };
7383             genenvfn = fns[size][u];
7384             break;
7385         }
7386         case 0xb: /* SQRSHL, UQRSHL */
7387         {
7388             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7389                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7390                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7391                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7392             };
7393             genenvfn = fns[size][u];
7394             break;
7395         }
7396         case 0x16: /* SQDMULH, SQRDMULH */
7397         {
7398             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7399                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7400                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7401             };
7402             assert(size == 1 || size == 2);
7403             genenvfn = fns[size - 1][u];
7404             break;
7405         }
7406         default:
7407             g_assert_not_reached();
7408         }
7409
7410         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7411         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7412         tcg_temp_free_i32(tcg_rd32);
7413         tcg_temp_free_i32(tcg_rn);
7414         tcg_temp_free_i32(tcg_rm);
7415     }
7416
7417     write_fp_dreg(s, rd, tcg_rd);
7418
7419     tcg_temp_free_i64(tcg_rd);
7420 }
7421
7422 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7423                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7424                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7425 {
7426     /* Handle 64->64 opcodes which are shared between the scalar and
7427      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7428      * is valid in either group and also the double-precision fp ops.
7429      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7430      * requires them.
7431      */
7432     TCGCond cond;
7433
7434     switch (opcode) {
7435     case 0x4: /* CLS, CLZ */
7436         if (u) {
7437             gen_helper_clz64(tcg_rd, tcg_rn);
7438         } else {
7439             gen_helper_cls64(tcg_rd, tcg_rn);
7440         }
7441         break;
7442     case 0x5: /* NOT */
7443         /* This opcode is shared with CNT and RBIT but we have earlier
7444          * enforced that size == 3 if and only if this is the NOT insn.
7445          */
7446         tcg_gen_not_i64(tcg_rd, tcg_rn);
7447         break;
7448     case 0x7: /* SQABS, SQNEG */
7449         if (u) {
7450             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7451         } else {
7452             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7453         }
7454         break;
7455     case 0xa: /* CMLT */
7456         /* 64 bit integer comparison against zero, result is
7457          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7458          * subtracting 1.
7459          */
7460         cond = TCG_COND_LT;
7461     do_cmop:
7462         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7463         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7464         break;
7465     case 0x8: /* CMGT, CMGE */
7466         cond = u ? TCG_COND_GE : TCG_COND_GT;
7467         goto do_cmop;
7468     case 0x9: /* CMEQ, CMLE */
7469         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7470         goto do_cmop;
7471     case 0xb: /* ABS, NEG */
7472         if (u) {
7473             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7474         } else {
7475             TCGv_i64 tcg_zero = tcg_const_i64(0);
7476             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7477             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7478                                 tcg_rn, tcg_rd);
7479             tcg_temp_free_i64(tcg_zero);
7480         }
7481         break;
7482     case 0x2f: /* FABS */
7483         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7484         break;
7485     case 0x6f: /* FNEG */
7486         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7487         break;
7488     case 0x7f: /* FSQRT */
7489         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7490         break;
7491     case 0x1a: /* FCVTNS */
7492     case 0x1b: /* FCVTMS */
7493     case 0x1c: /* FCVTAS */
7494     case 0x3a: /* FCVTPS */
7495     case 0x3b: /* FCVTZS */
7496     {
7497         TCGv_i32 tcg_shift = tcg_const_i32(0);
7498         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7499         tcg_temp_free_i32(tcg_shift);
7500         break;
7501     }
7502     case 0x5a: /* FCVTNU */
7503     case 0x5b: /* FCVTMU */
7504     case 0x5c: /* FCVTAU */
7505     case 0x7a: /* FCVTPU */
7506     case 0x7b: /* FCVTZU */
7507     {
7508         TCGv_i32 tcg_shift = tcg_const_i32(0);
7509         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7510         tcg_temp_free_i32(tcg_shift);
7511         break;
7512     }
7513     case 0x18: /* FRINTN */
7514     case 0x19: /* FRINTM */
7515     case 0x38: /* FRINTP */
7516     case 0x39: /* FRINTZ */
7517     case 0x58: /* FRINTA */
7518     case 0x79: /* FRINTI */
7519         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7520         break;
7521     case 0x59: /* FRINTX */
7522         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7523         break;
7524     default:
7525         g_assert_not_reached();
7526     }
7527 }
7528
7529 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7530                                    bool is_scalar, bool is_u, bool is_q,
7531                                    int size, int rn, int rd)
7532 {
7533     bool is_double = (size == 3);
7534     TCGv_ptr fpst;
7535
7536     if (!fp_access_check(s)) {
7537         return;
7538     }
7539
7540     fpst = get_fpstatus_ptr();
7541
7542     if (is_double) {
7543         TCGv_i64 tcg_op = tcg_temp_new_i64();
7544         TCGv_i64 tcg_zero = tcg_const_i64(0);
7545         TCGv_i64 tcg_res = tcg_temp_new_i64();
7546         NeonGenTwoDoubleOPFn *genfn;
7547         bool swap = false;
7548         int pass;
7549
7550         switch (opcode) {
7551         case 0x2e: /* FCMLT (zero) */
7552             swap = true;
7553             /* fallthrough */
7554         case 0x2c: /* FCMGT (zero) */
7555             genfn = gen_helper_neon_cgt_f64;
7556             break;
7557         case 0x2d: /* FCMEQ (zero) */
7558             genfn = gen_helper_neon_ceq_f64;
7559             break;
7560         case 0x6d: /* FCMLE (zero) */
7561             swap = true;
7562             /* fall through */
7563         case 0x6c: /* FCMGE (zero) */
7564             genfn = gen_helper_neon_cge_f64;
7565             break;
7566         default:
7567             g_assert_not_reached();
7568         }
7569
7570         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7571             read_vec_element(s, tcg_op, rn, pass, MO_64);
7572             if (swap) {
7573                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7574             } else {
7575                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7576             }
7577             write_vec_element(s, tcg_res, rd, pass, MO_64);
7578         }
7579         if (is_scalar) {
7580             clear_vec_high(s, rd);
7581         }
7582
7583         tcg_temp_free_i64(tcg_res);
7584         tcg_temp_free_i64(tcg_zero);
7585         tcg_temp_free_i64(tcg_op);
7586     } else {
7587         TCGv_i32 tcg_op = tcg_temp_new_i32();
7588         TCGv_i32 tcg_zero = tcg_const_i32(0);
7589         TCGv_i32 tcg_res = tcg_temp_new_i32();
7590         NeonGenTwoSingleOPFn *genfn;
7591         bool swap = false;
7592         int pass, maxpasses;
7593
7594         switch (opcode) {
7595         case 0x2e: /* FCMLT (zero) */
7596             swap = true;
7597             /* fall through */
7598         case 0x2c: /* FCMGT (zero) */
7599             genfn = gen_helper_neon_cgt_f32;
7600             break;
7601         case 0x2d: /* FCMEQ (zero) */
7602             genfn = gen_helper_neon_ceq_f32;
7603             break;
7604         case 0x6d: /* FCMLE (zero) */
7605             swap = true;
7606             /* fall through */
7607         case 0x6c: /* FCMGE (zero) */
7608             genfn = gen_helper_neon_cge_f32;
7609             break;
7610         default:
7611             g_assert_not_reached();
7612         }
7613
7614         if (is_scalar) {
7615             maxpasses = 1;
7616         } else {
7617             maxpasses = is_q ? 4 : 2;
7618         }
7619
7620         for (pass = 0; pass < maxpasses; pass++) {
7621             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7622             if (swap) {
7623                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7624             } else {
7625                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7626             }
7627             if (is_scalar) {
7628                 write_fp_sreg(s, rd, tcg_res);
7629             } else {
7630                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7631             }
7632         }
7633         tcg_temp_free_i32(tcg_res);
7634         tcg_temp_free_i32(tcg_zero);
7635         tcg_temp_free_i32(tcg_op);
7636         if (!is_q && !is_scalar) {
7637             clear_vec_high(s, rd);
7638         }
7639     }
7640
7641     tcg_temp_free_ptr(fpst);
7642 }
7643
7644 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7645                                     bool is_scalar, bool is_u, bool is_q,
7646                                     int size, int rn, int rd)
7647 {
7648     bool is_double = (size == 3);
7649     TCGv_ptr fpst = get_fpstatus_ptr();
7650
7651     if (is_double) {
7652         TCGv_i64 tcg_op = tcg_temp_new_i64();
7653         TCGv_i64 tcg_res = tcg_temp_new_i64();
7654         int pass;
7655
7656         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7657             read_vec_element(s, tcg_op, rn, pass, MO_64);
7658             switch (opcode) {
7659             case 0x3d: /* FRECPE */
7660                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7661                 break;
7662             case 0x3f: /* FRECPX */
7663                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7664                 break;
7665             case 0x7d: /* FRSQRTE */
7666                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7667                 break;
7668             default:
7669                 g_assert_not_reached();
7670             }
7671             write_vec_element(s, tcg_res, rd, pass, MO_64);
7672         }
7673         if (is_scalar) {
7674             clear_vec_high(s, rd);
7675         }
7676
7677         tcg_temp_free_i64(tcg_res);
7678         tcg_temp_free_i64(tcg_op);
7679     } else {
7680         TCGv_i32 tcg_op = tcg_temp_new_i32();
7681         TCGv_i32 tcg_res = tcg_temp_new_i32();
7682         int pass, maxpasses;
7683
7684         if (is_scalar) {
7685             maxpasses = 1;
7686         } else {
7687             maxpasses = is_q ? 4 : 2;
7688         }
7689
7690         for (pass = 0; pass < maxpasses; pass++) {
7691             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7692
7693             switch (opcode) {
7694             case 0x3c: /* URECPE */
7695                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7696                 break;
7697             case 0x3d: /* FRECPE */
7698                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7699                 break;
7700             case 0x3f: /* FRECPX */
7701                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7702                 break;
7703             case 0x7d: /* FRSQRTE */
7704                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7705                 break;
7706             default:
7707                 g_assert_not_reached();
7708             }
7709
7710             if (is_scalar) {
7711                 write_fp_sreg(s, rd, tcg_res);
7712             } else {
7713                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7714             }
7715         }
7716         tcg_temp_free_i32(tcg_res);
7717         tcg_temp_free_i32(tcg_op);
7718         if (!is_q && !is_scalar) {
7719             clear_vec_high(s, rd);
7720         }
7721     }
7722     tcg_temp_free_ptr(fpst);
7723 }
7724
7725 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7726                                 int opcode, bool u, bool is_q,
7727                                 int size, int rn, int rd)
7728 {
7729     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7730      * in the source becomes a size element in the destination).
7731      */
7732     int pass;
7733     TCGv_i32 tcg_res[2];
7734     int destelt = is_q ? 2 : 0;
7735     int passes = scalar ? 1 : 2;
7736
7737     if (scalar) {
7738         tcg_res[1] = tcg_const_i32(0);
7739     }
7740
7741     for (pass = 0; pass < passes; pass++) {
7742         TCGv_i64 tcg_op = tcg_temp_new_i64();
7743         NeonGenNarrowFn *genfn = NULL;
7744         NeonGenNarrowEnvFn *genenvfn = NULL;
7745
7746         if (scalar) {
7747             read_vec_element(s, tcg_op, rn, pass, size + 1);
7748         } else {
7749             read_vec_element(s, tcg_op, rn, pass, MO_64);
7750         }
7751         tcg_res[pass] = tcg_temp_new_i32();
7752
7753         switch (opcode) {
7754         case 0x12: /* XTN, SQXTUN */
7755         {
7756             static NeonGenNarrowFn * const xtnfns[3] = {
7757                 gen_helper_neon_narrow_u8,
7758                 gen_helper_neon_narrow_u16,
7759                 tcg_gen_extrl_i64_i32,
7760             };
7761             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7762                 gen_helper_neon_unarrow_sat8,
7763                 gen_helper_neon_unarrow_sat16,
7764                 gen_helper_neon_unarrow_sat32,
7765             };
7766             if (u) {
7767                 genenvfn = sqxtunfns[size];
7768             } else {
7769                 genfn = xtnfns[size];
7770             }
7771             break;
7772         }
7773         case 0x14: /* SQXTN, UQXTN */
7774         {
7775             static NeonGenNarrowEnvFn * const fns[3][2] = {
7776                 { gen_helper_neon_narrow_sat_s8,
7777                   gen_helper_neon_narrow_sat_u8 },
7778                 { gen_helper_neon_narrow_sat_s16,
7779                   gen_helper_neon_narrow_sat_u16 },
7780                 { gen_helper_neon_narrow_sat_s32,
7781                   gen_helper_neon_narrow_sat_u32 },
7782             };
7783             genenvfn = fns[size][u];
7784             break;
7785         }
7786         case 0x16: /* FCVTN, FCVTN2 */
7787             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7788             if (size == 2) {
7789                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7790             } else {
7791                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7792                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7793                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7794                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7795                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7796                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7797                 tcg_temp_free_i32(tcg_lo);
7798                 tcg_temp_free_i32(tcg_hi);
7799             }
7800             break;
7801         case 0x56:  /* FCVTXN, FCVTXN2 */
7802             /* 64 bit to 32 bit float conversion
7803              * with von Neumann rounding (round to odd)
7804              */
7805             assert(size == 2);
7806             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7807             break;
7808         default:
7809             g_assert_not_reached();
7810         }
7811
7812         if (genfn) {
7813             genfn(tcg_res[pass], tcg_op);
7814         } else if (genenvfn) {
7815             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7816         }
7817
7818         tcg_temp_free_i64(tcg_op);
7819     }
7820
7821     for (pass = 0; pass < 2; pass++) {
7822         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7823         tcg_temp_free_i32(tcg_res[pass]);
7824     }
7825     if (!is_q) {
7826         clear_vec_high(s, rd);
7827     }
7828 }
7829
7830 /* Remaining saturating accumulating ops */
7831 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7832                                 bool is_q, int size, int rn, int rd)
7833 {
7834     bool is_double = (size == 3);
7835
7836     if (is_double) {
7837         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7838         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7839         int pass;
7840
7841         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7842             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7843             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7844
7845             if (is_u) { /* USQADD */
7846                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7847             } else { /* SUQADD */
7848                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7849             }
7850             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7851         }
7852         if (is_scalar) {
7853             clear_vec_high(s, rd);
7854         }
7855
7856         tcg_temp_free_i64(tcg_rd);
7857         tcg_temp_free_i64(tcg_rn);
7858     } else {
7859         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7860         TCGv_i32 tcg_rd = tcg_temp_new_i32();
7861         int pass, maxpasses;
7862
7863         if (is_scalar) {
7864             maxpasses = 1;
7865         } else {
7866             maxpasses = is_q ? 4 : 2;
7867         }
7868
7869         for (pass = 0; pass < maxpasses; pass++) {
7870             if (is_scalar) {
7871                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7872                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7873             } else {
7874                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7875                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7876             }
7877
7878             if (is_u) { /* USQADD */
7879                 switch (size) {
7880                 case 0:
7881                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7882                     break;
7883                 case 1:
7884                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7885                     break;
7886                 case 2:
7887                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7888                     break;
7889                 default:
7890                     g_assert_not_reached();
7891                 }
7892             } else { /* SUQADD */
7893                 switch (size) {
7894                 case 0:
7895                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7896                     break;
7897                 case 1:
7898                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7899                     break;
7900                 case 2:
7901                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7902                     break;
7903                 default:
7904                     g_assert_not_reached();
7905                 }
7906             }
7907
7908             if (is_scalar) {
7909                 TCGv_i64 tcg_zero = tcg_const_i64(0);
7910                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7911                 tcg_temp_free_i64(tcg_zero);
7912             }
7913             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7914         }
7915
7916         if (!is_q) {
7917             clear_vec_high(s, rd);
7918         }
7919
7920         tcg_temp_free_i32(tcg_rd);
7921         tcg_temp_free_i32(tcg_rn);
7922     }
7923 }
7924
7925 /* C3.6.12 AdvSIMD scalar two reg misc
7926  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7927  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7928  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7929  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7930  */
7931 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7932 {
7933     int rd = extract32(insn, 0, 5);
7934     int rn = extract32(insn, 5, 5);
7935     int opcode = extract32(insn, 12, 5);
7936     int size = extract32(insn, 22, 2);
7937     bool u = extract32(insn, 29, 1);
7938     bool is_fcvt = false;
7939     int rmode;
7940     TCGv_i32 tcg_rmode;
7941     TCGv_ptr tcg_fpstatus;
7942
7943     switch (opcode) {
7944     case 0x3: /* USQADD / SUQADD*/
7945         if (!fp_access_check(s)) {
7946             return;
7947         }
7948         handle_2misc_satacc(s, true, u, false, size, rn, rd);
7949         return;
7950     case 0x7: /* SQABS / SQNEG */
7951         break;
7952     case 0xa: /* CMLT */
7953         if (u) {
7954             unallocated_encoding(s);
7955             return;
7956         }
7957         /* fall through */
7958     case 0x8: /* CMGT, CMGE */
7959     case 0x9: /* CMEQ, CMLE */
7960     case 0xb: /* ABS, NEG */
7961         if (size != 3) {
7962             unallocated_encoding(s);
7963             return;
7964         }
7965         break;
7966     case 0x12: /* SQXTUN */
7967         if (!u) {
7968             unallocated_encoding(s);
7969             return;
7970         }
7971         /* fall through */
7972     case 0x14: /* SQXTN, UQXTN */
7973         if (size == 3) {
7974             unallocated_encoding(s);
7975             return;
7976         }
7977         if (!fp_access_check(s)) {
7978             return;
7979         }
7980         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7981         return;
7982     case 0xc ... 0xf:
7983     case 0x16 ... 0x1d:
7984     case 0x1f:
7985         /* Floating point: U, size[1] and opcode indicate operation;
7986          * size[0] indicates single or double precision.
7987          */
7988         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7989         size = extract32(size, 0, 1) ? 3 : 2;
7990         switch (opcode) {
7991         case 0x2c: /* FCMGT (zero) */
7992         case 0x2d: /* FCMEQ (zero) */
7993         case 0x2e: /* FCMLT (zero) */
7994         case 0x6c: /* FCMGE (zero) */
7995         case 0x6d: /* FCMLE (zero) */
7996             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7997             return;
7998         case 0x1d: /* SCVTF */
7999         case 0x5d: /* UCVTF */
8000         {
8001             bool is_signed = (opcode == 0x1d);
8002             if (!fp_access_check(s)) {
8003                 return;
8004             }
8005             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8006             return;
8007         }
8008         case 0x3d: /* FRECPE */
8009         case 0x3f: /* FRECPX */
8010         case 0x7d: /* FRSQRTE */
8011             if (!fp_access_check(s)) {
8012                 return;
8013             }
8014             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8015             return;
8016         case 0x1a: /* FCVTNS */
8017         case 0x1b: /* FCVTMS */
8018         case 0x3a: /* FCVTPS */
8019         case 0x3b: /* FCVTZS */
8020         case 0x5a: /* FCVTNU */
8021         case 0x5b: /* FCVTMU */
8022         case 0x7a: /* FCVTPU */
8023         case 0x7b: /* FCVTZU */
8024             is_fcvt = true;
8025             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8026             break;
8027         case 0x1c: /* FCVTAS */
8028         case 0x5c: /* FCVTAU */
8029             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8030             is_fcvt = true;
8031             rmode = FPROUNDING_TIEAWAY;
8032             break;
8033         case 0x56: /* FCVTXN, FCVTXN2 */
8034             if (size == 2) {
8035                 unallocated_encoding(s);
8036                 return;
8037             }
8038             if (!fp_access_check(s)) {
8039                 return;
8040             }
8041             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8042             return;
8043         default:
8044             unallocated_encoding(s);
8045             return;
8046         }
8047         break;
8048     default:
8049         unallocated_encoding(s);
8050         return;
8051     }
8052
8053     if (!fp_access_check(s)) {
8054         return;
8055     }
8056
8057     if (is_fcvt) {
8058         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8059         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8060         tcg_fpstatus = get_fpstatus_ptr();
8061     } else {
8062         TCGV_UNUSED_I32(tcg_rmode);
8063         TCGV_UNUSED_PTR(tcg_fpstatus);
8064     }
8065
8066     if (size == 3) {
8067         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8068         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8069
8070         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8071         write_fp_dreg(s, rd, tcg_rd);
8072         tcg_temp_free_i64(tcg_rd);
8073         tcg_temp_free_i64(tcg_rn);
8074     } else {
8075         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8076         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8077
8078         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8079
8080         switch (opcode) {
8081         case 0x7: /* SQABS, SQNEG */
8082         {
8083             NeonGenOneOpEnvFn *genfn;
8084             static NeonGenOneOpEnvFn * const fns[3][2] = {
8085                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8086                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8087                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8088             };
8089             genfn = fns[size][u];
8090             genfn(tcg_rd, cpu_env, tcg_rn);
8091             break;
8092         }
8093         case 0x1a: /* FCVTNS */
8094         case 0x1b: /* FCVTMS */
8095         case 0x1c: /* FCVTAS */
8096         case 0x3a: /* FCVTPS */
8097         case 0x3b: /* FCVTZS */
8098         {
8099             TCGv_i32 tcg_shift = tcg_const_i32(0);
8100             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8101             tcg_temp_free_i32(tcg_shift);
8102             break;
8103         }
8104         case 0x5a: /* FCVTNU */
8105         case 0x5b: /* FCVTMU */
8106         case 0x5c: /* FCVTAU */
8107         case 0x7a: /* FCVTPU */
8108         case 0x7b: /* FCVTZU */
8109         {
8110             TCGv_i32 tcg_shift = tcg_const_i32(0);
8111             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8112             tcg_temp_free_i32(tcg_shift);
8113             break;
8114         }
8115         default:
8116             g_assert_not_reached();
8117         }
8118
8119         write_fp_sreg(s, rd, tcg_rd);
8120         tcg_temp_free_i32(tcg_rd);
8121         tcg_temp_free_i32(tcg_rn);
8122     }
8123
8124     if (is_fcvt) {
8125         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8126         tcg_temp_free_i32(tcg_rmode);
8127         tcg_temp_free_ptr(tcg_fpstatus);
8128     }
8129 }
8130
8131 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8132 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8133                                  int immh, int immb, int opcode, int rn, int rd)
8134 {
8135     int size = 32 - clz32(immh) - 1;
8136     int immhb = immh << 3 | immb;
8137     int shift = 2 * (8 << size) - immhb;
8138     bool accumulate = false;
8139     bool round = false;
8140     bool insert = false;
8141     int dsize = is_q ? 128 : 64;
8142     int esize = 8 << size;
8143     int elements = dsize/esize;
8144     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8145     TCGv_i64 tcg_rn = new_tmp_a64(s);
8146     TCGv_i64 tcg_rd = new_tmp_a64(s);
8147     TCGv_i64 tcg_round;
8148     int i;
8149
8150     if (extract32(immh, 3, 1) && !is_q) {
8151         unallocated_encoding(s);
8152         return;
8153     }
8154
8155     if (size > 3 && !is_q) {
8156         unallocated_encoding(s);
8157         return;
8158     }
8159
8160     if (!fp_access_check(s)) {
8161         return;
8162     }
8163
8164     switch (opcode) {
8165     case 0x02: /* SSRA / USRA (accumulate) */
8166         accumulate = true;
8167         break;
8168     case 0x04: /* SRSHR / URSHR (rounding) */
8169         round = true;
8170         break;
8171     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8172         accumulate = round = true;
8173         break;
8174     case 0x08: /* SRI */
8175         insert = true;
8176         break;
8177     }
8178
8179     if (round) {
8180         uint64_t round_const = 1ULL << (shift - 1);
8181         tcg_round = tcg_const_i64(round_const);
8182     } else {
8183         TCGV_UNUSED_I64(tcg_round);
8184     }
8185
8186     for (i = 0; i < elements; i++) {
8187         read_vec_element(s, tcg_rn, rn, i, memop);
8188         if (accumulate || insert) {
8189             read_vec_element(s, tcg_rd, rd, i, memop);
8190         }
8191
8192         if (insert) {
8193             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8194         } else {
8195             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8196                                     accumulate, is_u, size, shift);
8197         }
8198
8199         write_vec_element(s, tcg_rd, rd, i, size);
8200     }
8201
8202     if (!is_q) {
8203         clear_vec_high(s, rd);
8204     }
8205
8206     if (round) {
8207         tcg_temp_free_i64(tcg_round);
8208     }
8209 }
8210
8211 /* SHL/SLI - Vector shift left */
8212 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8213                                 int immh, int immb, int opcode, int rn, int rd)
8214 {
8215     int size = 32 - clz32(immh) - 1;
8216     int immhb = immh << 3 | immb;
8217     int shift = immhb - (8 << size);
8218     int dsize = is_q ? 128 : 64;
8219     int esize = 8 << size;
8220     int elements = dsize/esize;
8221     TCGv_i64 tcg_rn = new_tmp_a64(s);
8222     TCGv_i64 tcg_rd = new_tmp_a64(s);
8223     int i;
8224
8225     if (extract32(immh, 3, 1) && !is_q) {
8226         unallocated_encoding(s);
8227         return;
8228     }
8229
8230     if (size > 3 && !is_q) {
8231         unallocated_encoding(s);
8232         return;
8233     }
8234
8235     if (!fp_access_check(s)) {
8236         return;
8237     }
8238
8239     for (i = 0; i < elements; i++) {
8240         read_vec_element(s, tcg_rn, rn, i, size);
8241         if (insert) {
8242             read_vec_element(s, tcg_rd, rd, i, size);
8243         }
8244
8245         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8246
8247         write_vec_element(s, tcg_rd, rd, i, size);
8248     }
8249
8250     if (!is_q) {
8251         clear_vec_high(s, rd);
8252     }
8253 }
8254
8255 /* USHLL/SHLL - Vector shift left with widening */
8256 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8257                                  int immh, int immb, int opcode, int rn, int rd)
8258 {
8259     int size = 32 - clz32(immh) - 1;
8260     int immhb = immh << 3 | immb;
8261     int shift = immhb - (8 << size);
8262     int dsize = 64;
8263     int esize = 8 << size;
8264     int elements = dsize/esize;
8265     TCGv_i64 tcg_rn = new_tmp_a64(s);
8266     TCGv_i64 tcg_rd = new_tmp_a64(s);
8267     int i;
8268
8269     if (size >= 3) {
8270         unallocated_encoding(s);
8271         return;
8272     }
8273
8274     if (!fp_access_check(s)) {
8275         return;
8276     }
8277
8278     /* For the LL variants the store is larger than the load,
8279      * so if rd == rn we would overwrite parts of our input.
8280      * So load everything right now and use shifts in the main loop.
8281      */
8282     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8283
8284     for (i = 0; i < elements; i++) {
8285         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8286         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8287         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8288         write_vec_element(s, tcg_rd, rd, i, size + 1);
8289     }
8290 }
8291
8292 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8293 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8294                                  int immh, int immb, int opcode, int rn, int rd)
8295 {
8296     int immhb = immh << 3 | immb;
8297     int size = 32 - clz32(immh) - 1;
8298     int dsize = 64;
8299     int esize = 8 << size;
8300     int elements = dsize/esize;
8301     int shift = (2 * esize) - immhb;
8302     bool round = extract32(opcode, 0, 1);
8303     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8304     TCGv_i64 tcg_round;
8305     int i;
8306
8307     if (extract32(immh, 3, 1)) {
8308         unallocated_encoding(s);
8309         return;
8310     }
8311
8312     if (!fp_access_check(s)) {
8313         return;
8314     }
8315
8316     tcg_rn = tcg_temp_new_i64();
8317     tcg_rd = tcg_temp_new_i64();
8318     tcg_final = tcg_temp_new_i64();
8319     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8320
8321     if (round) {
8322         uint64_t round_const = 1ULL << (shift - 1);
8323         tcg_round = tcg_const_i64(round_const);
8324     } else {
8325         TCGV_UNUSED_I64(tcg_round);
8326     }
8327
8328     for (i = 0; i < elements; i++) {
8329         read_vec_element(s, tcg_rn, rn, i, size+1);
8330         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8331                                 false, true, size+1, shift);
8332
8333         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8334     }
8335
8336     if (!is_q) {
8337         clear_vec_high(s, rd);
8338         write_vec_element(s, tcg_final, rd, 0, MO_64);
8339     } else {
8340         write_vec_element(s, tcg_final, rd, 1, MO_64);
8341     }
8342
8343     if (round) {
8344         tcg_temp_free_i64(tcg_round);
8345     }
8346     tcg_temp_free_i64(tcg_rn);
8347     tcg_temp_free_i64(tcg_rd);
8348     tcg_temp_free_i64(tcg_final);
8349     return;
8350 }
8351
8352
8353 /* C3.6.14 AdvSIMD shift by immediate
8354  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8355  * +---+---+---+-------------+------+------+--------+---+------+------+
8356  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8357  * +---+---+---+-------------+------+------+--------+---+------+------+
8358  */
8359 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8360 {
8361     int rd = extract32(insn, 0, 5);
8362     int rn = extract32(insn, 5, 5);
8363     int opcode = extract32(insn, 11, 5);
8364     int immb = extract32(insn, 16, 3);
8365     int immh = extract32(insn, 19, 4);
8366     bool is_u = extract32(insn, 29, 1);
8367     bool is_q = extract32(insn, 30, 1);
8368
8369     switch (opcode) {
8370     case 0x08: /* SRI */
8371         if (!is_u) {
8372             unallocated_encoding(s);
8373             return;
8374         }
8375         /* fall through */
8376     case 0x00: /* SSHR / USHR */
8377     case 0x02: /* SSRA / USRA (accumulate) */
8378     case 0x04: /* SRSHR / URSHR (rounding) */
8379     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8380         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8381         break;
8382     case 0x0a: /* SHL / SLI */
8383         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8384         break;
8385     case 0x10: /* SHRN */
8386     case 0x11: /* RSHRN / SQRSHRUN */
8387         if (is_u) {
8388             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8389                                    opcode, rn, rd);
8390         } else {
8391             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8392         }
8393         break;
8394     case 0x12: /* SQSHRN / UQSHRN */
8395     case 0x13: /* SQRSHRN / UQRSHRN */
8396         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8397                                opcode, rn, rd);
8398         break;
8399     case 0x14: /* SSHLL / USHLL */
8400         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8401         break;
8402     case 0x1c: /* SCVTF / UCVTF */
8403         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8404                                      opcode, rn, rd);
8405         break;
8406     case 0xc: /* SQSHLU */
8407         if (!is_u) {
8408             unallocated_encoding(s);
8409             return;
8410         }
8411         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8412         break;
8413     case 0xe: /* SQSHL, UQSHL */
8414         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8415         break;
8416     case 0x1f: /* FCVTZS/ FCVTZU */
8417         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8418         return;
8419     default:
8420         unallocated_encoding(s);
8421         return;
8422     }
8423 }
8424
8425 /* Generate code to do a "long" addition or subtraction, ie one done in
8426  * TCGv_i64 on vector lanes twice the width specified by size.
8427  */
8428 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8429                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8430 {
8431     static NeonGenTwo64OpFn * const fns[3][2] = {
8432         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8433         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8434         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8435     };
8436     NeonGenTwo64OpFn *genfn;
8437     assert(size < 3);
8438
8439     genfn = fns[size][is_sub];
8440     genfn(tcg_res, tcg_op1, tcg_op2);
8441 }
8442
8443 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8444                                 int opcode, int rd, int rn, int rm)
8445 {
8446     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8447     TCGv_i64 tcg_res[2];
8448     int pass, accop;
8449
8450     tcg_res[0] = tcg_temp_new_i64();
8451     tcg_res[1] = tcg_temp_new_i64();
8452
8453     /* Does this op do an adding accumulate, a subtracting accumulate,
8454      * or no accumulate at all?
8455      */
8456     switch (opcode) {
8457     case 5:
8458     case 8:
8459     case 9:
8460         accop = 1;
8461         break;
8462     case 10:
8463     case 11:
8464         accop = -1;
8465         break;
8466     default:
8467         accop = 0;
8468         break;
8469     }
8470
8471     if (accop != 0) {
8472         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8473         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8474     }
8475
8476     /* size == 2 means two 32x32->64 operations; this is worth special
8477      * casing because we can generally handle it inline.
8478      */
8479     if (size == 2) {
8480         for (pass = 0; pass < 2; pass++) {
8481             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8482             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8483             TCGv_i64 tcg_passres;
8484             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8485
8486             int elt = pass + is_q * 2;
8487
8488             read_vec_element(s, tcg_op1, rn, elt, memop);
8489             read_vec_element(s, tcg_op2, rm, elt, memop);
8490
8491             if (accop == 0) {
8492                 tcg_passres = tcg_res[pass];
8493             } else {
8494                 tcg_passres = tcg_temp_new_i64();
8495             }
8496
8497             switch (opcode) {
8498             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8499                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8500                 break;
8501             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8502                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8503                 break;
8504             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8505             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8506             {
8507                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8508                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8509
8510                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8511                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8512                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8513                                     tcg_passres,
8514                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8515                 tcg_temp_free_i64(tcg_tmp1);
8516                 tcg_temp_free_i64(tcg_tmp2);
8517                 break;
8518             }
8519             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8520             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8521             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8522                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8523                 break;
8524             case 9: /* SQDMLAL, SQDMLAL2 */
8525             case 11: /* SQDMLSL, SQDMLSL2 */
8526             case 13: /* SQDMULL, SQDMULL2 */
8527                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8528                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8529                                                   tcg_passres, tcg_passres);
8530                 break;
8531             default:
8532                 g_assert_not_reached();
8533             }
8534
8535             if (opcode == 9 || opcode == 11) {
8536                 /* saturating accumulate ops */
8537                 if (accop < 0) {
8538                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8539                 }
8540                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8541                                                   tcg_res[pass], tcg_passres);
8542             } else if (accop > 0) {
8543                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8544             } else if (accop < 0) {
8545                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8546             }
8547
8548             if (accop != 0) {
8549                 tcg_temp_free_i64(tcg_passres);
8550             }
8551
8552             tcg_temp_free_i64(tcg_op1);
8553             tcg_temp_free_i64(tcg_op2);
8554         }
8555     } else {
8556         /* size 0 or 1, generally helper functions */
8557         for (pass = 0; pass < 2; pass++) {
8558             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8559             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8560             TCGv_i64 tcg_passres;
8561             int elt = pass + is_q * 2;
8562
8563             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8564             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8565
8566             if (accop == 0) {
8567                 tcg_passres = tcg_res[pass];
8568             } else {
8569                 tcg_passres = tcg_temp_new_i64();
8570             }
8571
8572             switch (opcode) {
8573             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8574             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8575             {
8576                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8577                 static NeonGenWidenFn * const widenfns[2][2] = {
8578                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8579                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8580                 };
8581                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8582
8583                 widenfn(tcg_op2_64, tcg_op2);
8584                 widenfn(tcg_passres, tcg_op1);
8585                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8586                               tcg_passres, tcg_op2_64);
8587                 tcg_temp_free_i64(tcg_op2_64);
8588                 break;
8589             }
8590             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8591             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8592                 if (size == 0) {
8593                     if (is_u) {
8594                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8595                     } else {
8596                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8597                     }
8598                 } else {
8599                     if (is_u) {
8600                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8601                     } else {
8602                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8603                     }
8604                 }
8605                 break;
8606             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8607             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8608             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8609                 if (size == 0) {
8610                     if (is_u) {
8611                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8612                     } else {
8613                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8614                     }
8615                 } else {
8616                     if (is_u) {
8617                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8618                     } else {
8619                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8620                     }
8621                 }
8622                 break;
8623             case 9: /* SQDMLAL, SQDMLAL2 */
8624             case 11: /* SQDMLSL, SQDMLSL2 */
8625             case 13: /* SQDMULL, SQDMULL2 */
8626                 assert(size == 1);
8627                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8628                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8629                                                   tcg_passres, tcg_passres);
8630                 break;
8631             case 14: /* PMULL */
8632                 assert(size == 0);
8633                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8634                 break;
8635             default:
8636                 g_assert_not_reached();
8637             }
8638             tcg_temp_free_i32(tcg_op1);
8639             tcg_temp_free_i32(tcg_op2);
8640
8641             if (accop != 0) {
8642                 if (opcode == 9 || opcode == 11) {
8643                     /* saturating accumulate ops */
8644                     if (accop < 0) {
8645                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8646                     }
8647                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8648                                                       tcg_res[pass],
8649                                                       tcg_passres);
8650                 } else {
8651                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8652                                   tcg_res[pass], tcg_passres);
8653                 }
8654                 tcg_temp_free_i64(tcg_passres);
8655             }
8656         }
8657     }
8658
8659     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8660     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8661     tcg_temp_free_i64(tcg_res[0]);
8662     tcg_temp_free_i64(tcg_res[1]);
8663 }
8664
8665 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8666                             int opcode, int rd, int rn, int rm)
8667 {
8668     TCGv_i64 tcg_res[2];
8669     int part = is_q ? 2 : 0;
8670     int pass;
8671
8672     for (pass = 0; pass < 2; pass++) {
8673         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8674         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8675         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8676         static NeonGenWidenFn * const widenfns[3][2] = {
8677             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8678             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8679             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8680         };
8681         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8682
8683         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8684         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8685         widenfn(tcg_op2_wide, tcg_op2);
8686         tcg_temp_free_i32(tcg_op2);
8687         tcg_res[pass] = tcg_temp_new_i64();
8688         gen_neon_addl(size, (opcode == 3),
8689                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8690         tcg_temp_free_i64(tcg_op1);
8691         tcg_temp_free_i64(tcg_op2_wide);
8692     }
8693
8694     for (pass = 0; pass < 2; pass++) {
8695         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8696         tcg_temp_free_i64(tcg_res[pass]);
8697     }
8698 }
8699
8700 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8701 {
8702     tcg_gen_addi_i64(in, in, 1U << 31);
8703     tcg_gen_extrh_i64_i32(res, in);
8704 }
8705
8706 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8707                                  int opcode, int rd, int rn, int rm)
8708 {
8709     TCGv_i32 tcg_res[2];
8710     int part = is_q ? 2 : 0;
8711     int pass;
8712
8713     for (pass = 0; pass < 2; pass++) {
8714         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8715         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8716         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8717         static NeonGenNarrowFn * const narrowfns[3][2] = {
8718             { gen_helper_neon_narrow_high_u8,
8719               gen_helper_neon_narrow_round_high_u8 },
8720             { gen_helper_neon_narrow_high_u16,
8721               gen_helper_neon_narrow_round_high_u16 },
8722             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8723         };
8724         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8725
8726         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8727         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8728
8729         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8730
8731         tcg_temp_free_i64(tcg_op1);
8732         tcg_temp_free_i64(tcg_op2);
8733
8734         tcg_res[pass] = tcg_temp_new_i32();
8735         gennarrow(tcg_res[pass], tcg_wideres);
8736         tcg_temp_free_i64(tcg_wideres);
8737     }
8738
8739     for (pass = 0; pass < 2; pass++) {
8740         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8741         tcg_temp_free_i32(tcg_res[pass]);
8742     }
8743     if (!is_q) {
8744         clear_vec_high(s, rd);
8745     }
8746 }
8747
8748 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8749 {
8750     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8751      * is the only three-reg-diff instruction which produces a
8752      * 128-bit wide result from a single operation. However since
8753      * it's possible to calculate the two halves more or less
8754      * separately we just use two helper calls.
8755      */
8756     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8757     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8758     TCGv_i64 tcg_res = tcg_temp_new_i64();
8759
8760     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8761     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8762     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8763     write_vec_element(s, tcg_res, rd, 0, MO_64);
8764     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8765     write_vec_element(s, tcg_res, rd, 1, MO_64);
8766
8767     tcg_temp_free_i64(tcg_op1);
8768     tcg_temp_free_i64(tcg_op2);
8769     tcg_temp_free_i64(tcg_res);
8770 }
8771
8772 /* C3.6.15 AdvSIMD three different
8773  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8774  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8775  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8776  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8777  */
8778 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8779 {
8780     /* Instructions in this group fall into three basic classes
8781      * (in each case with the operation working on each element in
8782      * the input vectors):
8783      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8784      *     128 bit input)
8785      * (2) wide 64 x 128 -> 128
8786      * (3) narrowing 128 x 128 -> 64
8787      * Here we do initial decode, catch unallocated cases and
8788      * dispatch to separate functions for each class.
8789      */
8790     int is_q = extract32(insn, 30, 1);
8791     int is_u = extract32(insn, 29, 1);
8792     int size = extract32(insn, 22, 2);
8793     int opcode = extract32(insn, 12, 4);
8794     int rm = extract32(insn, 16, 5);
8795     int rn = extract32(insn, 5, 5);
8796     int rd = extract32(insn, 0, 5);
8797
8798     switch (opcode) {
8799     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8800     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8801         /* 64 x 128 -> 128 */
8802         if (size == 3) {
8803             unallocated_encoding(s);
8804             return;
8805         }
8806         if (!fp_access_check(s)) {
8807             return;
8808         }
8809         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8810         break;
8811     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8812     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8813         /* 128 x 128 -> 64 */
8814         if (size == 3) {
8815             unallocated_encoding(s);
8816             return;
8817         }
8818         if (!fp_access_check(s)) {
8819             return;
8820         }
8821         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8822         break;
8823     case 14: /* PMULL, PMULL2 */
8824         if (is_u || size == 1 || size == 2) {
8825             unallocated_encoding(s);
8826             return;
8827         }
8828         if (size == 3) {
8829             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8830                 unallocated_encoding(s);
8831                 return;
8832             }
8833             if (!fp_access_check(s)) {
8834                 return;
8835             }
8836             handle_pmull_64(s, is_q, rd, rn, rm);
8837             return;
8838         }
8839         goto is_widening;
8840     case 9: /* SQDMLAL, SQDMLAL2 */
8841     case 11: /* SQDMLSL, SQDMLSL2 */
8842     case 13: /* SQDMULL, SQDMULL2 */
8843         if (is_u || size == 0) {
8844             unallocated_encoding(s);
8845             return;
8846         }
8847         /* fall through */
8848     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8849     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8850     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8851     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8852     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8853     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8854     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8855         /* 64 x 64 -> 128 */
8856         if (size == 3) {
8857             unallocated_encoding(s);
8858             return;
8859         }
8860     is_widening:
8861         if (!fp_access_check(s)) {
8862             return;
8863         }
8864
8865         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8866         break;
8867     default:
8868         /* opcode 15 not allocated */
8869         unallocated_encoding(s);
8870         break;
8871     }
8872 }
8873
8874 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8875 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8876 {
8877     int rd = extract32(insn, 0, 5);
8878     int rn = extract32(insn, 5, 5);
8879     int rm = extract32(insn, 16, 5);
8880     int size = extract32(insn, 22, 2);
8881     bool is_u = extract32(insn, 29, 1);
8882     bool is_q = extract32(insn, 30, 1);
8883     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8884     int pass;
8885
8886     if (!fp_access_check(s)) {
8887         return;
8888     }
8889
8890     tcg_op1 = tcg_temp_new_i64();
8891     tcg_op2 = tcg_temp_new_i64();
8892     tcg_res[0] = tcg_temp_new_i64();
8893     tcg_res[1] = tcg_temp_new_i64();
8894
8895     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8896         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8897         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8898
8899         if (!is_u) {
8900             switch (size) {
8901             case 0: /* AND */
8902                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8903                 break;
8904             case 1: /* BIC */
8905                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8906                 break;
8907             case 2: /* ORR */
8908                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8909                 break;
8910             case 3: /* ORN */
8911                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8912                 break;
8913             }
8914         } else {
8915             if (size != 0) {
8916                 /* B* ops need res loaded to operate on */
8917                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8918             }
8919
8920             switch (size) {
8921             case 0: /* EOR */
8922                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8923                 break;
8924             case 1: /* BSL bitwise select */
8925                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8926                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8927                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8928                 break;
8929             case 2: /* BIT, bitwise insert if true */
8930                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8931                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8932                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8933                 break;
8934             case 3: /* BIF, bitwise insert if false */
8935                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8936                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8937                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8938                 break;
8939             }
8940         }
8941     }
8942
8943     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8944     if (!is_q) {
8945         tcg_gen_movi_i64(tcg_res[1], 0);
8946     }
8947     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8948
8949     tcg_temp_free_i64(tcg_op1);
8950     tcg_temp_free_i64(tcg_op2);
8951     tcg_temp_free_i64(tcg_res[0]);
8952     tcg_temp_free_i64(tcg_res[1]);
8953 }
8954
8955 /* Helper functions for 32 bit comparisons */
8956 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8957 {
8958     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8959 }
8960
8961 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8962 {
8963     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8964 }
8965
8966 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8967 {
8968     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8969 }
8970
8971 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8972 {
8973     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8974 }
8975
8976 /* Pairwise op subgroup of C3.6.16.
8977  *
8978  * This is called directly or via the handle_3same_float for float pairwise
8979  * operations where the opcode and size are calculated differently.
8980  */
8981 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8982                                    int size, int rn, int rm, int rd)
8983 {
8984     TCGv_ptr fpst;
8985     int pass;
8986
8987     /* Floating point operations need fpst */
8988     if (opcode >= 0x58) {
8989         fpst = get_fpstatus_ptr();
8990     } else {
8991         TCGV_UNUSED_PTR(fpst);
8992     }
8993
8994     if (!fp_access_check(s)) {
8995         return;
8996     }
8997
8998     /* These operations work on the concatenated rm:rn, with each pair of
8999      * adjacent elements being operated on to produce an element in the result.
9000      */
9001     if (size == 3) {
9002         TCGv_i64 tcg_res[2];
9003
9004         for (pass = 0; pass < 2; pass++) {
9005             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9006             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9007             int passreg = (pass == 0) ? rn : rm;
9008
9009             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9010             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9011             tcg_res[pass] = tcg_temp_new_i64();
9012
9013             switch (opcode) {
9014             case 0x17: /* ADDP */
9015                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9016                 break;
9017             case 0x58: /* FMAXNMP */
9018                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9019                 break;
9020             case 0x5a: /* FADDP */
9021                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9022                 break;
9023             case 0x5e: /* FMAXP */
9024                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9025                 break;
9026             case 0x78: /* FMINNMP */
9027                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9028                 break;
9029             case 0x7e: /* FMINP */
9030                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9031                 break;
9032             default:
9033                 g_assert_not_reached();
9034             }
9035
9036             tcg_temp_free_i64(tcg_op1);
9037             tcg_temp_free_i64(tcg_op2);
9038         }
9039
9040         for (pass = 0; pass < 2; pass++) {
9041             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9042             tcg_temp_free_i64(tcg_res[pass]);
9043         }
9044     } else {
9045         int maxpass = is_q ? 4 : 2;
9046         TCGv_i32 tcg_res[4];
9047
9048         for (pass = 0; pass < maxpass; pass++) {
9049             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9050             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9051             NeonGenTwoOpFn *genfn = NULL;
9052             int passreg = pass < (maxpass / 2) ? rn : rm;
9053             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9054
9055             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9056             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9057             tcg_res[pass] = tcg_temp_new_i32();
9058
9059             switch (opcode) {
9060             case 0x17: /* ADDP */
9061             {
9062                 static NeonGenTwoOpFn * const fns[3] = {
9063                     gen_helper_neon_padd_u8,
9064                     gen_helper_neon_padd_u16,
9065                     tcg_gen_add_i32,
9066                 };
9067                 genfn = fns[size];
9068                 break;
9069             }
9070             case 0x14: /* SMAXP, UMAXP */
9071             {
9072                 static NeonGenTwoOpFn * const fns[3][2] = {
9073                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9074                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9075                     { gen_max_s32, gen_max_u32 },
9076                 };
9077                 genfn = fns[size][u];
9078                 break;
9079             }
9080             case 0x15: /* SMINP, UMINP */
9081             {
9082                 static NeonGenTwoOpFn * const fns[3][2] = {
9083                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9084                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9085                     { gen_min_s32, gen_min_u32 },
9086                 };
9087                 genfn = fns[size][u];
9088                 break;
9089             }
9090             /* The FP operations are all on single floats (32 bit) */
9091             case 0x58: /* FMAXNMP */
9092                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9093                 break;
9094             case 0x5a: /* FADDP */
9095                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9096                 break;
9097             case 0x5e: /* FMAXP */
9098                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9099                 break;
9100             case 0x78: /* FMINNMP */
9101                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9102                 break;
9103             case 0x7e: /* FMINP */
9104                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9105                 break;
9106             default:
9107                 g_assert_not_reached();
9108             }
9109
9110             /* FP ops called directly, otherwise call now */
9111             if (genfn) {
9112                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9113             }
9114
9115             tcg_temp_free_i32(tcg_op1);
9116             tcg_temp_free_i32(tcg_op2);
9117         }
9118
9119         for (pass = 0; pass < maxpass; pass++) {
9120             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9121             tcg_temp_free_i32(tcg_res[pass]);
9122         }
9123         if (!is_q) {
9124             clear_vec_high(s, rd);
9125         }
9126     }
9127
9128     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9129         tcg_temp_free_ptr(fpst);
9130     }
9131 }
9132
9133 /* Floating point op subgroup of C3.6.16. */
9134 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9135 {
9136     /* For floating point ops, the U, size[1] and opcode bits
9137      * together indicate the operation. size[0] indicates single
9138      * or double.
9139      */
9140     int fpopcode = extract32(insn, 11, 5)
9141         | (extract32(insn, 23, 1) << 5)
9142         | (extract32(insn, 29, 1) << 6);
9143     int is_q = extract32(insn, 30, 1);
9144     int size = extract32(insn, 22, 1);
9145     int rm = extract32(insn, 16, 5);
9146     int rn = extract32(insn, 5, 5);
9147     int rd = extract32(insn, 0, 5);
9148
9149     int datasize = is_q ? 128 : 64;
9150     int esize = 32 << size;
9151     int elements = datasize / esize;
9152
9153     if (size == 1 && !is_q) {
9154         unallocated_encoding(s);
9155         return;
9156     }
9157
9158     switch (fpopcode) {
9159     case 0x58: /* FMAXNMP */
9160     case 0x5a: /* FADDP */
9161     case 0x5e: /* FMAXP */
9162     case 0x78: /* FMINNMP */
9163     case 0x7e: /* FMINP */
9164         if (size && !is_q) {
9165             unallocated_encoding(s);
9166             return;
9167         }
9168         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9169                                rn, rm, rd);
9170         return;
9171     case 0x1b: /* FMULX */
9172     case 0x1f: /* FRECPS */
9173     case 0x3f: /* FRSQRTS */
9174     case 0x5d: /* FACGE */
9175     case 0x7d: /* FACGT */
9176     case 0x19: /* FMLA */
9177     case 0x39: /* FMLS */
9178     case 0x18: /* FMAXNM */
9179     case 0x1a: /* FADD */
9180     case 0x1c: /* FCMEQ */
9181     case 0x1e: /* FMAX */
9182     case 0x38: /* FMINNM */
9183     case 0x3a: /* FSUB */
9184     case 0x3e: /* FMIN */
9185     case 0x5b: /* FMUL */
9186     case 0x5c: /* FCMGE */
9187     case 0x5f: /* FDIV */
9188     case 0x7a: /* FABD */
9189     case 0x7c: /* FCMGT */
9190         if (!fp_access_check(s)) {
9191             return;
9192         }
9193
9194         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9195         return;
9196     default:
9197         unallocated_encoding(s);
9198         return;
9199     }
9200 }
9201
9202 /* Integer op subgroup of C3.6.16. */
9203 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9204 {
9205     int is_q = extract32(insn, 30, 1);
9206     int u = extract32(insn, 29, 1);
9207     int size = extract32(insn, 22, 2);
9208     int opcode = extract32(insn, 11, 5);
9209     int rm = extract32(insn, 16, 5);
9210     int rn = extract32(insn, 5, 5);
9211     int rd = extract32(insn, 0, 5);
9212     int pass;
9213
9214     switch (opcode) {
9215     case 0x13: /* MUL, PMUL */
9216         if (u && size != 0) {
9217             unallocated_encoding(s);
9218             return;
9219         }
9220         /* fall through */
9221     case 0x0: /* SHADD, UHADD */
9222     case 0x2: /* SRHADD, URHADD */
9223     case 0x4: /* SHSUB, UHSUB */
9224     case 0xc: /* SMAX, UMAX */
9225     case 0xd: /* SMIN, UMIN */
9226     case 0xe: /* SABD, UABD */
9227     case 0xf: /* SABA, UABA */
9228     case 0x12: /* MLA, MLS */
9229         if (size == 3) {
9230             unallocated_encoding(s);
9231             return;
9232         }
9233         break;
9234     case 0x16: /* SQDMULH, SQRDMULH */
9235         if (size == 0 || size == 3) {
9236             unallocated_encoding(s);
9237             return;
9238         }
9239         break;
9240     default:
9241         if (size == 3 && !is_q) {
9242             unallocated_encoding(s);
9243             return;
9244         }
9245         break;
9246     }
9247
9248     if (!fp_access_check(s)) {
9249         return;
9250     }
9251
9252     if (size == 3) {
9253         assert(is_q);
9254         for (pass = 0; pass < 2; pass++) {
9255             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9256             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9257             TCGv_i64 tcg_res = tcg_temp_new_i64();
9258
9259             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9260             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9261
9262             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9263
9264             write_vec_element(s, tcg_res, rd, pass, MO_64);
9265
9266             tcg_temp_free_i64(tcg_res);
9267             tcg_temp_free_i64(tcg_op1);
9268             tcg_temp_free_i64(tcg_op2);
9269         }
9270     } else {
9271         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9272             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9273             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9274             TCGv_i32 tcg_res = tcg_temp_new_i32();
9275             NeonGenTwoOpFn *genfn = NULL;
9276             NeonGenTwoOpEnvFn *genenvfn = NULL;
9277
9278             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9279             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9280
9281             switch (opcode) {
9282             case 0x0: /* SHADD, UHADD */
9283             {
9284                 static NeonGenTwoOpFn * const fns[3][2] = {
9285                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9286                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9287                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9288                 };
9289                 genfn = fns[size][u];
9290                 break;
9291             }
9292             case 0x1: /* SQADD, UQADD */
9293             {
9294                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9295                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9296                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9297                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9298                 };
9299                 genenvfn = fns[size][u];
9300                 break;
9301             }
9302             case 0x2: /* SRHADD, URHADD */
9303             {
9304                 static NeonGenTwoOpFn * const fns[3][2] = {
9305                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9306                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9307                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9308                 };
9309                 genfn = fns[size][u];
9310                 break;
9311             }
9312             case 0x4: /* SHSUB, UHSUB */
9313             {
9314                 static NeonGenTwoOpFn * const fns[3][2] = {
9315                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9316                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9317                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9318                 };
9319                 genfn = fns[size][u];
9320                 break;
9321             }
9322             case 0x5: /* SQSUB, UQSUB */
9323             {
9324                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9325                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9326                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9327                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9328                 };
9329                 genenvfn = fns[size][u];
9330                 break;
9331             }
9332             case 0x6: /* CMGT, CMHI */
9333             {
9334                 static NeonGenTwoOpFn * const fns[3][2] = {
9335                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9336                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9337                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9338                 };
9339                 genfn = fns[size][u];
9340                 break;
9341             }
9342             case 0x7: /* CMGE, CMHS */
9343             {
9344                 static NeonGenTwoOpFn * const fns[3][2] = {
9345                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9346                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9347                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9348                 };
9349                 genfn = fns[size][u];
9350                 break;
9351             }
9352             case 0x8: /* SSHL, USHL */
9353             {
9354                 static NeonGenTwoOpFn * const fns[3][2] = {
9355                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9356                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9357                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9358                 };
9359                 genfn = fns[size][u];
9360                 break;
9361             }
9362             case 0x9: /* SQSHL, UQSHL */
9363             {
9364                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9365                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9366                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9367                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9368                 };
9369                 genenvfn = fns[size][u];
9370                 break;
9371             }
9372             case 0xa: /* SRSHL, URSHL */
9373             {
9374                 static NeonGenTwoOpFn * const fns[3][2] = {
9375                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9376                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9377                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9378                 };
9379                 genfn = fns[size][u];
9380                 break;
9381             }
9382             case 0xb: /* SQRSHL, UQRSHL */
9383             {
9384                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9385                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9386                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9387                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9388                 };
9389                 genenvfn = fns[size][u];
9390                 break;
9391             }
9392             case 0xc: /* SMAX, UMAX */
9393             {
9394                 static NeonGenTwoOpFn * const fns[3][2] = {
9395                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9396                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9397                     { gen_max_s32, gen_max_u32 },
9398                 };
9399                 genfn = fns[size][u];
9400                 break;
9401             }
9402
9403             case 0xd: /* SMIN, UMIN */
9404             {
9405                 static NeonGenTwoOpFn * const fns[3][2] = {
9406                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9407                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9408                     { gen_min_s32, gen_min_u32 },
9409                 };
9410                 genfn = fns[size][u];
9411                 break;
9412             }
9413             case 0xe: /* SABD, UABD */
9414             case 0xf: /* SABA, UABA */
9415             {
9416                 static NeonGenTwoOpFn * const fns[3][2] = {
9417                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9418                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9419                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9420                 };
9421                 genfn = fns[size][u];
9422                 break;
9423             }
9424             case 0x10: /* ADD, SUB */
9425             {
9426                 static NeonGenTwoOpFn * const fns[3][2] = {
9427                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9428                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9429                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9430                 };
9431                 genfn = fns[size][u];
9432                 break;
9433             }
9434             case 0x11: /* CMTST, CMEQ */
9435             {
9436                 static NeonGenTwoOpFn * const fns[3][2] = {
9437                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9438                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9439                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9440                 };
9441                 genfn = fns[size][u];
9442                 break;
9443             }
9444             case 0x13: /* MUL, PMUL */
9445                 if (u) {
9446                     /* PMUL */
9447                     assert(size == 0);
9448                     genfn = gen_helper_neon_mul_p8;
9449                     break;
9450                 }
9451                 /* fall through : MUL */
9452             case 0x12: /* MLA, MLS */
9453             {
9454                 static NeonGenTwoOpFn * const fns[3] = {
9455                     gen_helper_neon_mul_u8,
9456                     gen_helper_neon_mul_u16,
9457                     tcg_gen_mul_i32,
9458                 };
9459                 genfn = fns[size];
9460                 break;
9461             }
9462             case 0x16: /* SQDMULH, SQRDMULH */
9463             {
9464                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9465                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9466                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9467                 };
9468                 assert(size == 1 || size == 2);
9469                 genenvfn = fns[size - 1][u];
9470                 break;
9471             }
9472             default:
9473                 g_assert_not_reached();
9474             }
9475
9476             if (genenvfn) {
9477                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9478             } else {
9479                 genfn(tcg_res, tcg_op1, tcg_op2);
9480             }
9481
9482             if (opcode == 0xf || opcode == 0x12) {
9483                 /* SABA, UABA, MLA, MLS: accumulating ops */
9484                 static NeonGenTwoOpFn * const fns[3][2] = {
9485                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9486                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9487                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9488                 };
9489                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9490
9491                 genfn = fns[size][is_sub];
9492                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9493                 genfn(tcg_res, tcg_op1, tcg_res);
9494             }
9495
9496             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9497
9498             tcg_temp_free_i32(tcg_res);
9499             tcg_temp_free_i32(tcg_op1);
9500             tcg_temp_free_i32(tcg_op2);
9501         }
9502     }
9503
9504     if (!is_q) {
9505         clear_vec_high(s, rd);
9506     }
9507 }
9508
9509 /* C3.6.16 AdvSIMD three same
9510  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9511  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9512  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9513  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9514  */
9515 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9516 {
9517     int opcode = extract32(insn, 11, 5);
9518
9519     switch (opcode) {
9520     case 0x3: /* logic ops */
9521         disas_simd_3same_logic(s, insn);
9522         break;
9523     case 0x17: /* ADDP */
9524     case 0x14: /* SMAXP, UMAXP */
9525     case 0x15: /* SMINP, UMINP */
9526     {
9527         /* Pairwise operations */
9528         int is_q = extract32(insn, 30, 1);
9529         int u = extract32(insn, 29, 1);
9530         int size = extract32(insn, 22, 2);
9531         int rm = extract32(insn, 16, 5);
9532         int rn = extract32(insn, 5, 5);
9533         int rd = extract32(insn, 0, 5);
9534         if (opcode == 0x17) {
9535             if (u || (size == 3 && !is_q)) {
9536                 unallocated_encoding(s);
9537                 return;
9538             }
9539         } else {
9540             if (size == 3) {
9541                 unallocated_encoding(s);
9542                 return;
9543             }
9544         }
9545         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9546         break;
9547     }
9548     case 0x18 ... 0x31:
9549         /* floating point ops, sz[1] and U are part of opcode */
9550         disas_simd_3same_float(s, insn);
9551         break;
9552     default:
9553         disas_simd_3same_int(s, insn);
9554         break;
9555     }
9556 }
9557
9558 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9559                                   int size, int rn, int rd)
9560 {
9561     /* Handle 2-reg-misc ops which are widening (so each size element
9562      * in the source becomes a 2*size element in the destination.
9563      * The only instruction like this is FCVTL.
9564      */
9565     int pass;
9566
9567     if (size == 3) {
9568         /* 32 -> 64 bit fp conversion */
9569         TCGv_i64 tcg_res[2];
9570         int srcelt = is_q ? 2 : 0;
9571
9572         for (pass = 0; pass < 2; pass++) {
9573             TCGv_i32 tcg_op = tcg_temp_new_i32();
9574             tcg_res[pass] = tcg_temp_new_i64();
9575
9576             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9577             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9578             tcg_temp_free_i32(tcg_op);
9579         }
9580         for (pass = 0; pass < 2; pass++) {
9581             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9582             tcg_temp_free_i64(tcg_res[pass]);
9583         }
9584     } else {
9585         /* 16 -> 32 bit fp conversion */
9586         int srcelt = is_q ? 4 : 0;
9587         TCGv_i32 tcg_res[4];
9588
9589         for (pass = 0; pass < 4; pass++) {
9590             tcg_res[pass] = tcg_temp_new_i32();
9591
9592             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9593             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9594                                            cpu_env);
9595         }
9596         for (pass = 0; pass < 4; pass++) {
9597             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9598             tcg_temp_free_i32(tcg_res[pass]);
9599         }
9600     }
9601 }
9602
9603 static void handle_rev(DisasContext *s, int opcode, bool u,
9604                        bool is_q, int size, int rn, int rd)
9605 {
9606     int op = (opcode << 1) | u;
9607     int opsz = op + size;
9608     int grp_size = 3 - opsz;
9609     int dsize = is_q ? 128 : 64;
9610     int i;
9611
9612     if (opsz >= 3) {
9613         unallocated_encoding(s);
9614         return;
9615     }
9616
9617     if (!fp_access_check(s)) {
9618         return;
9619     }
9620
9621     if (size == 0) {
9622         /* Special case bytes, use bswap op on each group of elements */
9623         int groups = dsize / (8 << grp_size);
9624
9625         for (i = 0; i < groups; i++) {
9626             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9627
9628             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9629             switch (grp_size) {
9630             case MO_16:
9631                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9632                 break;
9633             case MO_32:
9634                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9635                 break;
9636             case MO_64:
9637                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9638                 break;
9639             default:
9640                 g_assert_not_reached();
9641             }
9642             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9643             tcg_temp_free_i64(tcg_tmp);
9644         }
9645         if (!is_q) {
9646             clear_vec_high(s, rd);
9647         }
9648     } else {
9649         int revmask = (1 << grp_size) - 1;
9650         int esize = 8 << size;
9651         int elements = dsize / esize;
9652         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9653         TCGv_i64 tcg_rd = tcg_const_i64(0);
9654         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9655
9656         for (i = 0; i < elements; i++) {
9657             int e_rev = (i & 0xf) ^ revmask;
9658             int off = e_rev * esize;
9659             read_vec_element(s, tcg_rn, rn, i, size);
9660             if (off >= 64) {
9661                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9662                                     tcg_rn, off - 64, esize);
9663             } else {
9664                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9665             }
9666         }
9667         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9668         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9669
9670         tcg_temp_free_i64(tcg_rd_hi);
9671         tcg_temp_free_i64(tcg_rd);
9672         tcg_temp_free_i64(tcg_rn);
9673     }
9674 }
9675
9676 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9677                                   bool is_q, int size, int rn, int rd)
9678 {
9679     /* Implement the pairwise operations from 2-misc:
9680      * SADDLP, UADDLP, SADALP, UADALP.
9681      * These all add pairs of elements in the input to produce a
9682      * double-width result element in the output (possibly accumulating).
9683      */
9684     bool accum = (opcode == 0x6);
9685     int maxpass = is_q ? 2 : 1;
9686     int pass;
9687     TCGv_i64 tcg_res[2];
9688
9689     if (size == 2) {
9690         /* 32 + 32 -> 64 op */
9691         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9692
9693         for (pass = 0; pass < maxpass; pass++) {
9694             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9695             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9696
9697             tcg_res[pass] = tcg_temp_new_i64();
9698
9699             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9700             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9701             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9702             if (accum) {
9703                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9704                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9705             }
9706
9707             tcg_temp_free_i64(tcg_op1);
9708             tcg_temp_free_i64(tcg_op2);
9709         }
9710     } else {
9711         for (pass = 0; pass < maxpass; pass++) {
9712             TCGv_i64 tcg_op = tcg_temp_new_i64();
9713             NeonGenOneOpFn *genfn;
9714             static NeonGenOneOpFn * const fns[2][2] = {
9715                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9716                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9717             };
9718
9719             genfn = fns[size][u];
9720
9721             tcg_res[pass] = tcg_temp_new_i64();
9722
9723             read_vec_element(s, tcg_op, rn, pass, MO_64);
9724             genfn(tcg_res[pass], tcg_op);
9725
9726             if (accum) {
9727                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9728                 if (size == 0) {
9729                     gen_helper_neon_addl_u16(tcg_res[pass],
9730                                              tcg_res[pass], tcg_op);
9731                 } else {
9732                     gen_helper_neon_addl_u32(tcg_res[pass],
9733                                              tcg_res[pass], tcg_op);
9734                 }
9735             }
9736             tcg_temp_free_i64(tcg_op);
9737         }
9738     }
9739     if (!is_q) {
9740         tcg_res[1] = tcg_const_i64(0);
9741     }
9742     for (pass = 0; pass < 2; pass++) {
9743         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9744         tcg_temp_free_i64(tcg_res[pass]);
9745     }
9746 }
9747
9748 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9749 {
9750     /* Implement SHLL and SHLL2 */
9751     int pass;
9752     int part = is_q ? 2 : 0;
9753     TCGv_i64 tcg_res[2];
9754
9755     for (pass = 0; pass < 2; pass++) {
9756         static NeonGenWidenFn * const widenfns[3] = {
9757             gen_helper_neon_widen_u8,
9758             gen_helper_neon_widen_u16,
9759             tcg_gen_extu_i32_i64,
9760         };
9761         NeonGenWidenFn *widenfn = widenfns[size];
9762         TCGv_i32 tcg_op = tcg_temp_new_i32();
9763
9764         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9765         tcg_res[pass] = tcg_temp_new_i64();
9766         widenfn(tcg_res[pass], tcg_op);
9767         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9768
9769         tcg_temp_free_i32(tcg_op);
9770     }
9771
9772     for (pass = 0; pass < 2; pass++) {
9773         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9774         tcg_temp_free_i64(tcg_res[pass]);
9775     }
9776 }
9777
9778 /* C3.6.17 AdvSIMD two reg misc
9779  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9780  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9781  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9782  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9783  */
9784 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9785 {
9786     int size = extract32(insn, 22, 2);
9787     int opcode = extract32(insn, 12, 5);
9788     bool u = extract32(insn, 29, 1);
9789     bool is_q = extract32(insn, 30, 1);
9790     int rn = extract32(insn, 5, 5);
9791     int rd = extract32(insn, 0, 5);
9792     bool need_fpstatus = false;
9793     bool need_rmode = false;
9794     int rmode = -1;
9795     TCGv_i32 tcg_rmode;
9796     TCGv_ptr tcg_fpstatus;
9797
9798     switch (opcode) {
9799     case 0x0: /* REV64, REV32 */
9800     case 0x1: /* REV16 */
9801         handle_rev(s, opcode, u, is_q, size, rn, rd);
9802         return;
9803     case 0x5: /* CNT, NOT, RBIT */
9804         if (u && size == 0) {
9805             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9806             size = 3;
9807             break;
9808         } else if (u && size == 1) {
9809             /* RBIT */
9810             break;
9811         } else if (!u && size == 0) {
9812             /* CNT */
9813             break;
9814         }
9815         unallocated_encoding(s);
9816         return;
9817     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9818     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9819         if (size == 3) {
9820             unallocated_encoding(s);
9821             return;
9822         }
9823         if (!fp_access_check(s)) {
9824             return;
9825         }
9826
9827         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9828         return;
9829     case 0x4: /* CLS, CLZ */
9830         if (size == 3) {
9831             unallocated_encoding(s);
9832             return;
9833         }
9834         break;
9835     case 0x2: /* SADDLP, UADDLP */
9836     case 0x6: /* SADALP, UADALP */
9837         if (size == 3) {
9838             unallocated_encoding(s);
9839             return;
9840         }
9841         if (!fp_access_check(s)) {
9842             return;
9843         }
9844         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9845         return;
9846     case 0x13: /* SHLL, SHLL2 */
9847         if (u == 0 || size == 3) {
9848             unallocated_encoding(s);
9849             return;
9850         }
9851         if (!fp_access_check(s)) {
9852             return;
9853         }
9854         handle_shll(s, is_q, size, rn, rd);
9855         return;
9856     case 0xa: /* CMLT */
9857         if (u == 1) {
9858             unallocated_encoding(s);
9859             return;
9860         }
9861         /* fall through */
9862     case 0x8: /* CMGT, CMGE */
9863     case 0x9: /* CMEQ, CMLE */
9864     case 0xb: /* ABS, NEG */
9865         if (size == 3 && !is_q) {
9866             unallocated_encoding(s);
9867             return;
9868         }
9869         break;
9870     case 0x3: /* SUQADD, USQADD */
9871         if (size == 3 && !is_q) {
9872             unallocated_encoding(s);
9873             return;
9874         }
9875         if (!fp_access_check(s)) {
9876             return;
9877         }
9878         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9879         return;
9880     case 0x7: /* SQABS, SQNEG */
9881         if (size == 3 && !is_q) {
9882             unallocated_encoding(s);
9883             return;
9884         }
9885         break;
9886     case 0xc ... 0xf:
9887     case 0x16 ... 0x1d:
9888     case 0x1f:
9889     {
9890         /* Floating point: U, size[1] and opcode indicate operation;
9891          * size[0] indicates single or double precision.
9892          */
9893         int is_double = extract32(size, 0, 1);
9894         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9895         size = is_double ? 3 : 2;
9896         switch (opcode) {
9897         case 0x2f: /* FABS */
9898         case 0x6f: /* FNEG */
9899             if (size == 3 && !is_q) {
9900                 unallocated_encoding(s);
9901                 return;
9902             }
9903             break;
9904         case 0x1d: /* SCVTF */
9905         case 0x5d: /* UCVTF */
9906         {
9907             bool is_signed = (opcode == 0x1d) ? true : false;
9908             int elements = is_double ? 2 : is_q ? 4 : 2;
9909             if (is_double && !is_q) {
9910                 unallocated_encoding(s);
9911                 return;
9912             }
9913             if (!fp_access_check(s)) {
9914                 return;
9915             }
9916             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9917             return;
9918         }
9919         case 0x2c: /* FCMGT (zero) */
9920         case 0x2d: /* FCMEQ (zero) */
9921         case 0x2e: /* FCMLT (zero) */
9922         case 0x6c: /* FCMGE (zero) */
9923         case 0x6d: /* FCMLE (zero) */
9924             if (size == 3 && !is_q) {
9925                 unallocated_encoding(s);
9926                 return;
9927             }
9928             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9929             return;
9930         case 0x7f: /* FSQRT */
9931             if (size == 3 && !is_q) {
9932                 unallocated_encoding(s);
9933                 return;
9934             }
9935             break;
9936         case 0x1a: /* FCVTNS */
9937         case 0x1b: /* FCVTMS */
9938         case 0x3a: /* FCVTPS */
9939         case 0x3b: /* FCVTZS */
9940         case 0x5a: /* FCVTNU */
9941         case 0x5b: /* FCVTMU */
9942         case 0x7a: /* FCVTPU */
9943         case 0x7b: /* FCVTZU */
9944             need_fpstatus = true;
9945             need_rmode = true;
9946             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9947             if (size == 3 && !is_q) {
9948                 unallocated_encoding(s);
9949                 return;
9950             }
9951             break;
9952         case 0x5c: /* FCVTAU */
9953         case 0x1c: /* FCVTAS */
9954             need_fpstatus = true;
9955             need_rmode = true;
9956             rmode = FPROUNDING_TIEAWAY;
9957             if (size == 3 && !is_q) {
9958                 unallocated_encoding(s);
9959                 return;
9960             }
9961             break;
9962         case 0x3c: /* URECPE */
9963             if (size == 3) {
9964                 unallocated_encoding(s);
9965                 return;
9966             }
9967             /* fall through */
9968         case 0x3d: /* FRECPE */
9969         case 0x7d: /* FRSQRTE */
9970             if (size == 3 && !is_q) {
9971                 unallocated_encoding(s);
9972                 return;
9973             }
9974             if (!fp_access_check(s)) {
9975                 return;
9976             }
9977             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9978             return;
9979         case 0x56: /* FCVTXN, FCVTXN2 */
9980             if (size == 2) {
9981                 unallocated_encoding(s);
9982                 return;
9983             }
9984             /* fall through */
9985         case 0x16: /* FCVTN, FCVTN2 */
9986             /* handle_2misc_narrow does a 2*size -> size operation, but these
9987              * instructions encode the source size rather than dest size.
9988              */
9989             if (!fp_access_check(s)) {
9990                 return;
9991             }
9992             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9993             return;
9994         case 0x17: /* FCVTL, FCVTL2 */
9995             if (!fp_access_check(s)) {
9996                 return;
9997             }
9998             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9999             return;
10000         case 0x18: /* FRINTN */
10001         case 0x19: /* FRINTM */
10002         case 0x38: /* FRINTP */
10003         case 0x39: /* FRINTZ */
10004             need_rmode = true;
10005             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10006             /* fall through */
10007         case 0x59: /* FRINTX */
10008         case 0x79: /* FRINTI */
10009             need_fpstatus = true;
10010             if (size == 3 && !is_q) {
10011                 unallocated_encoding(s);
10012                 return;
10013             }
10014             break;
10015         case 0x58: /* FRINTA */
10016             need_rmode = true;
10017             rmode = FPROUNDING_TIEAWAY;
10018             need_fpstatus = true;
10019             if (size == 3 && !is_q) {
10020                 unallocated_encoding(s);
10021                 return;
10022             }
10023             break;
10024         case 0x7c: /* URSQRTE */
10025             if (size == 3) {
10026                 unallocated_encoding(s);
10027                 return;
10028             }
10029             need_fpstatus = true;
10030             break;
10031         default:
10032             unallocated_encoding(s);
10033             return;
10034         }
10035         break;
10036     }
10037     default:
10038         unallocated_encoding(s);
10039         return;
10040     }
10041
10042     if (!fp_access_check(s)) {
10043         return;
10044     }
10045
10046     if (need_fpstatus) {
10047         tcg_fpstatus = get_fpstatus_ptr();
10048     } else {
10049         TCGV_UNUSED_PTR(tcg_fpstatus);
10050     }
10051     if (need_rmode) {
10052         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10053         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10054     } else {
10055         TCGV_UNUSED_I32(tcg_rmode);
10056     }
10057
10058     if (size == 3) {
10059         /* All 64-bit element operations can be shared with scalar 2misc */
10060         int pass;
10061
10062         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10063             TCGv_i64 tcg_op = tcg_temp_new_i64();
10064             TCGv_i64 tcg_res = tcg_temp_new_i64();
10065
10066             read_vec_element(s, tcg_op, rn, pass, MO_64);
10067
10068             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10069                             tcg_rmode, tcg_fpstatus);
10070
10071             write_vec_element(s, tcg_res, rd, pass, MO_64);
10072
10073             tcg_temp_free_i64(tcg_res);
10074             tcg_temp_free_i64(tcg_op);
10075         }
10076     } else {
10077         int pass;
10078
10079         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10080             TCGv_i32 tcg_op = tcg_temp_new_i32();
10081             TCGv_i32 tcg_res = tcg_temp_new_i32();
10082             TCGCond cond;
10083
10084             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10085
10086             if (size == 2) {
10087                 /* Special cases for 32 bit elements */
10088                 switch (opcode) {
10089                 case 0xa: /* CMLT */
10090                     /* 32 bit integer comparison against zero, result is
10091                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10092                      * and inverting.
10093                      */
10094                     cond = TCG_COND_LT;
10095                 do_cmop:
10096                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10097                     tcg_gen_neg_i32(tcg_res, tcg_res);
10098                     break;
10099                 case 0x8: /* CMGT, CMGE */
10100                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10101                     goto do_cmop;
10102                 case 0x9: /* CMEQ, CMLE */
10103                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10104                     goto do_cmop;
10105                 case 0x4: /* CLS */
10106                     if (u) {
10107                         gen_helper_clz32(tcg_res, tcg_op);
10108                     } else {
10109                         gen_helper_cls32(tcg_res, tcg_op);
10110                     }
10111                     break;
10112                 case 0x7: /* SQABS, SQNEG */
10113                     if (u) {
10114                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10115                     } else {
10116                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10117                     }
10118                     break;
10119                 case 0xb: /* ABS, NEG */
10120                     if (u) {
10121                         tcg_gen_neg_i32(tcg_res, tcg_op);
10122                     } else {
10123                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10124                         tcg_gen_neg_i32(tcg_res, tcg_op);
10125                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10126                                             tcg_zero, tcg_op, tcg_res);
10127                         tcg_temp_free_i32(tcg_zero);
10128                     }
10129                     break;
10130                 case 0x2f: /* FABS */
10131                     gen_helper_vfp_abss(tcg_res, tcg_op);
10132                     break;
10133                 case 0x6f: /* FNEG */
10134                     gen_helper_vfp_negs(tcg_res, tcg_op);
10135                     break;
10136                 case 0x7f: /* FSQRT */
10137                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10138                     break;
10139                 case 0x1a: /* FCVTNS */
10140                 case 0x1b: /* FCVTMS */
10141                 case 0x1c: /* FCVTAS */
10142                 case 0x3a: /* FCVTPS */
10143                 case 0x3b: /* FCVTZS */
10144                 {
10145                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10146                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10147                                          tcg_shift, tcg_fpstatus);
10148                     tcg_temp_free_i32(tcg_shift);
10149                     break;
10150                 }
10151                 case 0x5a: /* FCVTNU */
10152                 case 0x5b: /* FCVTMU */
10153                 case 0x5c: /* FCVTAU */
10154                 case 0x7a: /* FCVTPU */
10155                 case 0x7b: /* FCVTZU */
10156                 {
10157                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10158                     gen_helper_vfp_touls(tcg_res, tcg_op,
10159                                          tcg_shift, tcg_fpstatus);
10160                     tcg_temp_free_i32(tcg_shift);
10161                     break;
10162                 }
10163                 case 0x18: /* FRINTN */
10164                 case 0x19: /* FRINTM */
10165                 case 0x38: /* FRINTP */
10166                 case 0x39: /* FRINTZ */
10167                 case 0x58: /* FRINTA */
10168                 case 0x79: /* FRINTI */
10169                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10170                     break;
10171                 case 0x59: /* FRINTX */
10172                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10173                     break;
10174                 case 0x7c: /* URSQRTE */
10175                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10176                     break;
10177                 default:
10178                     g_assert_not_reached();
10179                 }
10180             } else {
10181                 /* Use helpers for 8 and 16 bit elements */
10182                 switch (opcode) {
10183                 case 0x5: /* CNT, RBIT */
10184                     /* For these two insns size is part of the opcode specifier
10185                      * (handled earlier); they always operate on byte elements.
10186                      */
10187                     if (u) {
10188                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10189                     } else {
10190                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10191                     }
10192                     break;
10193                 case 0x7: /* SQABS, SQNEG */
10194                 {
10195                     NeonGenOneOpEnvFn *genfn;
10196                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10197                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10198                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10199                     };
10200                     genfn = fns[size][u];
10201                     genfn(tcg_res, cpu_env, tcg_op);
10202                     break;
10203                 }
10204                 case 0x8: /* CMGT, CMGE */
10205                 case 0x9: /* CMEQ, CMLE */
10206                 case 0xa: /* CMLT */
10207                 {
10208                     static NeonGenTwoOpFn * const fns[3][2] = {
10209                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10210                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10211                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10212                     };
10213                     NeonGenTwoOpFn *genfn;
10214                     int comp;
10215                     bool reverse;
10216                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10217
10218                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10219                     comp = (opcode - 0x8) * 2 + u;
10220                     /* ...but LE, LT are implemented as reverse GE, GT */
10221                     reverse = (comp > 2);
10222                     if (reverse) {
10223                         comp = 4 - comp;
10224                     }
10225                     genfn = fns[comp][size];
10226                     if (reverse) {
10227                         genfn(tcg_res, tcg_zero, tcg_op);
10228                     } else {
10229                         genfn(tcg_res, tcg_op, tcg_zero);
10230                     }
10231                     tcg_temp_free_i32(tcg_zero);
10232                     break;
10233                 }
10234                 case 0xb: /* ABS, NEG */
10235                     if (u) {
10236                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10237                         if (size) {
10238                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10239                         } else {
10240                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10241                         }
10242                         tcg_temp_free_i32(tcg_zero);
10243                     } else {
10244                         if (size) {
10245                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10246                         } else {
10247                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10248                         }
10249                     }
10250                     break;
10251                 case 0x4: /* CLS, CLZ */
10252                     if (u) {
10253                         if (size == 0) {
10254                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10255                         } else {
10256                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10257                         }
10258                     } else {
10259                         if (size == 0) {
10260                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10261                         } else {
10262                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10263                         }
10264                     }
10265                     break;
10266                 default:
10267                     g_assert_not_reached();
10268                 }
10269             }
10270
10271             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10272
10273             tcg_temp_free_i32(tcg_res);
10274             tcg_temp_free_i32(tcg_op);
10275         }
10276     }
10277     if (!is_q) {
10278         clear_vec_high(s, rd);
10279     }
10280
10281     if (need_rmode) {
10282         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10283         tcg_temp_free_i32(tcg_rmode);
10284     }
10285     if (need_fpstatus) {
10286         tcg_temp_free_ptr(tcg_fpstatus);
10287     }
10288 }
10289
10290 /* C3.6.13 AdvSIMD scalar x indexed element
10291  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10292  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10293  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10294  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10295  * C3.6.18 AdvSIMD vector x indexed element
10296  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10297  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10298  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10299  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10300  */
10301 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10302 {
10303     /* This encoding has two kinds of instruction:
10304      *  normal, where we perform elt x idxelt => elt for each
10305      *     element in the vector
10306      *  long, where we perform elt x idxelt and generate a result of
10307      *     double the width of the input element
10308      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10309      */
10310     bool is_scalar = extract32(insn, 28, 1);
10311     bool is_q = extract32(insn, 30, 1);
10312     bool u = extract32(insn, 29, 1);
10313     int size = extract32(insn, 22, 2);
10314     int l = extract32(insn, 21, 1);
10315     int m = extract32(insn, 20, 1);
10316     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10317     int rm = extract32(insn, 16, 4);
10318     int opcode = extract32(insn, 12, 4);
10319     int h = extract32(insn, 11, 1);
10320     int rn = extract32(insn, 5, 5);
10321     int rd = extract32(insn, 0, 5);
10322     bool is_long = false;
10323     bool is_fp = false;
10324     int index;
10325     TCGv_ptr fpst;
10326
10327     switch (opcode) {
10328     case 0x0: /* MLA */
10329     case 0x4: /* MLS */
10330         if (!u || is_scalar) {
10331             unallocated_encoding(s);
10332             return;
10333         }
10334         break;
10335     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10336     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10337     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10338         if (is_scalar) {
10339             unallocated_encoding(s);
10340             return;
10341         }
10342         is_long = true;
10343         break;
10344     case 0x3: /* SQDMLAL, SQDMLAL2 */
10345     case 0x7: /* SQDMLSL, SQDMLSL2 */
10346     case 0xb: /* SQDMULL, SQDMULL2 */
10347         is_long = true;
10348         /* fall through */
10349     case 0xc: /* SQDMULH */
10350     case 0xd: /* SQRDMULH */
10351         if (u) {
10352             unallocated_encoding(s);
10353             return;
10354         }
10355         break;
10356     case 0x8: /* MUL */
10357         if (u || is_scalar) {
10358             unallocated_encoding(s);
10359             return;
10360         }
10361         break;
10362     case 0x1: /* FMLA */
10363     case 0x5: /* FMLS */
10364         if (u) {
10365             unallocated_encoding(s);
10366             return;
10367         }
10368         /* fall through */
10369     case 0x9: /* FMUL, FMULX */
10370         if (!extract32(size, 1, 1)) {
10371             unallocated_encoding(s);
10372             return;
10373         }
10374         is_fp = true;
10375         break;
10376     default:
10377         unallocated_encoding(s);
10378         return;
10379     }
10380
10381     if (is_fp) {
10382         /* low bit of size indicates single/double */
10383         size = extract32(size, 0, 1) ? 3 : 2;
10384         if (size == 2) {
10385             index = h << 1 | l;
10386         } else {
10387             if (l || !is_q) {
10388                 unallocated_encoding(s);
10389                 return;
10390             }
10391             index = h;
10392         }
10393         rm |= (m << 4);
10394     } else {
10395         switch (size) {
10396         case 1:
10397             index = h << 2 | l << 1 | m;
10398             break;
10399         case 2:
10400             index = h << 1 | l;
10401             rm |= (m << 4);
10402             break;
10403         default:
10404             unallocated_encoding(s);
10405             return;
10406         }
10407     }
10408
10409     if (!fp_access_check(s)) {
10410         return;
10411     }
10412
10413     if (is_fp) {
10414         fpst = get_fpstatus_ptr();
10415     } else {
10416         TCGV_UNUSED_PTR(fpst);
10417     }
10418
10419     if (size == 3) {
10420         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10421         int pass;
10422
10423         assert(is_fp && is_q && !is_long);
10424
10425         read_vec_element(s, tcg_idx, rm, index, MO_64);
10426
10427         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10428             TCGv_i64 tcg_op = tcg_temp_new_i64();
10429             TCGv_i64 tcg_res = tcg_temp_new_i64();
10430
10431             read_vec_element(s, tcg_op, rn, pass, MO_64);
10432
10433             switch (opcode) {
10434             case 0x5: /* FMLS */
10435                 /* As usual for ARM, separate negation for fused multiply-add */
10436                 gen_helper_vfp_negd(tcg_op, tcg_op);
10437                 /* fall through */
10438             case 0x1: /* FMLA */
10439                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10440                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10441                 break;
10442             case 0x9: /* FMUL, FMULX */
10443                 if (u) {
10444                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10445                 } else {
10446                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10447                 }
10448                 break;
10449             default:
10450                 g_assert_not_reached();
10451             }
10452
10453             write_vec_element(s, tcg_res, rd, pass, MO_64);
10454             tcg_temp_free_i64(tcg_op);
10455             tcg_temp_free_i64(tcg_res);
10456         }
10457
10458         if (is_scalar) {
10459             clear_vec_high(s, rd);
10460         }
10461
10462         tcg_temp_free_i64(tcg_idx);
10463     } else if (!is_long) {
10464         /* 32 bit floating point, or 16 or 32 bit integer.
10465          * For the 16 bit scalar case we use the usual Neon helpers and
10466          * rely on the fact that 0 op 0 == 0 with no side effects.
10467          */
10468         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10469         int pass, maxpasses;
10470
10471         if (is_scalar) {
10472             maxpasses = 1;
10473         } else {
10474             maxpasses = is_q ? 4 : 2;
10475         }
10476
10477         read_vec_element_i32(s, tcg_idx, rm, index, size);
10478
10479         if (size == 1 && !is_scalar) {
10480             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10481              * the index into both halves of the 32 bit tcg_idx and then use
10482              * the usual Neon helpers.
10483              */
10484             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10485         }
10486
10487         for (pass = 0; pass < maxpasses; pass++) {
10488             TCGv_i32 tcg_op = tcg_temp_new_i32();
10489             TCGv_i32 tcg_res = tcg_temp_new_i32();
10490
10491             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10492
10493             switch (opcode) {
10494             case 0x0: /* MLA */
10495             case 0x4: /* MLS */
10496             case 0x8: /* MUL */
10497             {
10498                 static NeonGenTwoOpFn * const fns[2][2] = {
10499                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10500                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10501                 };
10502                 NeonGenTwoOpFn *genfn;
10503                 bool is_sub = opcode == 0x4;
10504
10505                 if (size == 1) {
10506                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10507                 } else {
10508                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10509                 }
10510                 if (opcode == 0x8) {
10511                     break;
10512                 }
10513                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10514                 genfn = fns[size - 1][is_sub];
10515                 genfn(tcg_res, tcg_op, tcg_res);
10516                 break;
10517             }
10518             case 0x5: /* FMLS */
10519                 /* As usual for ARM, separate negation for fused multiply-add */
10520                 gen_helper_vfp_negs(tcg_op, tcg_op);
10521                 /* fall through */
10522             case 0x1: /* FMLA */
10523                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10524                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10525                 break;
10526             case 0x9: /* FMUL, FMULX */
10527                 if (u) {
10528                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10529                 } else {
10530                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10531                 }
10532                 break;
10533             case 0xc: /* SQDMULH */
10534                 if (size == 1) {
10535                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10536                                                tcg_op, tcg_idx);
10537                 } else {
10538                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10539                                                tcg_op, tcg_idx);
10540                 }
10541                 break;
10542             case 0xd: /* SQRDMULH */
10543                 if (size == 1) {
10544                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10545                                                 tcg_op, tcg_idx);
10546                 } else {
10547                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10548                                                 tcg_op, tcg_idx);
10549                 }
10550                 break;
10551             default:
10552                 g_assert_not_reached();
10553             }
10554
10555             if (is_scalar) {
10556                 write_fp_sreg(s, rd, tcg_res);
10557             } else {
10558                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10559             }
10560
10561             tcg_temp_free_i32(tcg_op);
10562             tcg_temp_free_i32(tcg_res);
10563         }
10564
10565         tcg_temp_free_i32(tcg_idx);
10566
10567         if (!is_q) {
10568             clear_vec_high(s, rd);
10569         }
10570     } else {
10571         /* long ops: 16x16->32 or 32x32->64 */
10572         TCGv_i64 tcg_res[2];
10573         int pass;
10574         bool satop = extract32(opcode, 0, 1);
10575         TCGMemOp memop = MO_32;
10576
10577         if (satop || !u) {
10578             memop |= MO_SIGN;
10579         }
10580
10581         if (size == 2) {
10582             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10583
10584             read_vec_element(s, tcg_idx, rm, index, memop);
10585
10586             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10587                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10588                 TCGv_i64 tcg_passres;
10589                 int passelt;
10590
10591                 if (is_scalar) {
10592                     passelt = 0;
10593                 } else {
10594                     passelt = pass + (is_q * 2);
10595                 }
10596
10597                 read_vec_element(s, tcg_op, rn, passelt, memop);
10598
10599                 tcg_res[pass] = tcg_temp_new_i64();
10600
10601                 if (opcode == 0xa || opcode == 0xb) {
10602                     /* Non-accumulating ops */
10603                     tcg_passres = tcg_res[pass];
10604                 } else {
10605                     tcg_passres = tcg_temp_new_i64();
10606                 }
10607
10608                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10609                 tcg_temp_free_i64(tcg_op);
10610
10611                 if (satop) {
10612                     /* saturating, doubling */
10613                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10614                                                       tcg_passres, tcg_passres);
10615                 }
10616
10617                 if (opcode == 0xa || opcode == 0xb) {
10618                     continue;
10619                 }
10620
10621                 /* Accumulating op: handle accumulate step */
10622                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10623
10624                 switch (opcode) {
10625                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10626                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10627                     break;
10628                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10629                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10630                     break;
10631                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10632                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10633                     /* fall through */
10634                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10635                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10636                                                       tcg_res[pass],
10637                                                       tcg_passres);
10638                     break;
10639                 default:
10640                     g_assert_not_reached();
10641                 }
10642                 tcg_temp_free_i64(tcg_passres);
10643             }
10644             tcg_temp_free_i64(tcg_idx);
10645
10646             if (is_scalar) {
10647                 clear_vec_high(s, rd);
10648             }
10649         } else {
10650             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10651
10652             assert(size == 1);
10653             read_vec_element_i32(s, tcg_idx, rm, index, size);
10654
10655             if (!is_scalar) {
10656                 /* The simplest way to handle the 16x16 indexed ops is to
10657                  * duplicate the index into both halves of the 32 bit tcg_idx
10658                  * and then use the usual Neon helpers.
10659                  */
10660                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10661             }
10662
10663             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10664                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10665                 TCGv_i64 tcg_passres;
10666
10667                 if (is_scalar) {
10668                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10669                 } else {
10670                     read_vec_element_i32(s, tcg_op, rn,
10671                                          pass + (is_q * 2), MO_32);
10672                 }
10673
10674                 tcg_res[pass] = tcg_temp_new_i64();
10675
10676                 if (opcode == 0xa || opcode == 0xb) {
10677                     /* Non-accumulating ops */
10678                     tcg_passres = tcg_res[pass];
10679                 } else {
10680                     tcg_passres = tcg_temp_new_i64();
10681                 }
10682
10683                 if (memop & MO_SIGN) {
10684                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10685                 } else {
10686                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10687                 }
10688                 if (satop) {
10689                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10690                                                       tcg_passres, tcg_passres);
10691                 }
10692                 tcg_temp_free_i32(tcg_op);
10693
10694                 if (opcode == 0xa || opcode == 0xb) {
10695                     continue;
10696                 }
10697
10698                 /* Accumulating op: handle accumulate step */
10699                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10700
10701                 switch (opcode) {
10702                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10703                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10704                                              tcg_passres);
10705                     break;
10706                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10707                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10708                                              tcg_passres);
10709                     break;
10710                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10711                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10712                     /* fall through */
10713                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10714                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10715                                                       tcg_res[pass],
10716                                                       tcg_passres);
10717                     break;
10718                 default:
10719                     g_assert_not_reached();
10720                 }
10721                 tcg_temp_free_i64(tcg_passres);
10722             }
10723             tcg_temp_free_i32(tcg_idx);
10724
10725             if (is_scalar) {
10726                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10727             }
10728         }
10729
10730         if (is_scalar) {
10731             tcg_res[1] = tcg_const_i64(0);
10732         }
10733
10734         for (pass = 0; pass < 2; pass++) {
10735             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10736             tcg_temp_free_i64(tcg_res[pass]);
10737         }
10738     }
10739
10740     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10741         tcg_temp_free_ptr(fpst);
10742     }
10743 }
10744
10745 /* C3.6.19 Crypto AES
10746  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10747  * +-----------------+------+-----------+--------+-----+------+------+
10748  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10749  * +-----------------+------+-----------+--------+-----+------+------+
10750  */
10751 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10752 {
10753     int size = extract32(insn, 22, 2);
10754     int opcode = extract32(insn, 12, 5);
10755     int rn = extract32(insn, 5, 5);
10756     int rd = extract32(insn, 0, 5);
10757     int decrypt;
10758     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10759     CryptoThreeOpEnvFn *genfn;
10760
10761     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10762         || size != 0) {
10763         unallocated_encoding(s);
10764         return;
10765     }
10766
10767     switch (opcode) {
10768     case 0x4: /* AESE */
10769         decrypt = 0;
10770         genfn = gen_helper_crypto_aese;
10771         break;
10772     case 0x6: /* AESMC */
10773         decrypt = 0;
10774         genfn = gen_helper_crypto_aesmc;
10775         break;
10776     case 0x5: /* AESD */
10777         decrypt = 1;
10778         genfn = gen_helper_crypto_aese;
10779         break;
10780     case 0x7: /* AESIMC */
10781         decrypt = 1;
10782         genfn = gen_helper_crypto_aesmc;
10783         break;
10784     default:
10785         unallocated_encoding(s);
10786         return;
10787     }
10788
10789     /* Note that we convert the Vx register indexes into the
10790      * index within the vfp.regs[] array, so we can share the
10791      * helper with the AArch32 instructions.
10792      */
10793     tcg_rd_regno = tcg_const_i32(rd << 1);
10794     tcg_rn_regno = tcg_const_i32(rn << 1);
10795     tcg_decrypt = tcg_const_i32(decrypt);
10796
10797     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10798
10799     tcg_temp_free_i32(tcg_rd_regno);
10800     tcg_temp_free_i32(tcg_rn_regno);
10801     tcg_temp_free_i32(tcg_decrypt);
10802 }
10803
10804 /* C3.6.20 Crypto three-reg SHA
10805  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10806  * +-----------------+------+---+------+---+--------+-----+------+------+
10807  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10808  * +-----------------+------+---+------+---+--------+-----+------+------+
10809  */
10810 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10811 {
10812     int size = extract32(insn, 22, 2);
10813     int opcode = extract32(insn, 12, 3);
10814     int rm = extract32(insn, 16, 5);
10815     int rn = extract32(insn, 5, 5);
10816     int rd = extract32(insn, 0, 5);
10817     CryptoThreeOpEnvFn *genfn;
10818     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10819     int feature = ARM_FEATURE_V8_SHA256;
10820
10821     if (size != 0) {
10822         unallocated_encoding(s);
10823         return;
10824     }
10825
10826     switch (opcode) {
10827     case 0: /* SHA1C */
10828     case 1: /* SHA1P */
10829     case 2: /* SHA1M */
10830     case 3: /* SHA1SU0 */
10831         genfn = NULL;
10832         feature = ARM_FEATURE_V8_SHA1;
10833         break;
10834     case 4: /* SHA256H */
10835         genfn = gen_helper_crypto_sha256h;
10836         break;
10837     case 5: /* SHA256H2 */
10838         genfn = gen_helper_crypto_sha256h2;
10839         break;
10840     case 6: /* SHA256SU1 */
10841         genfn = gen_helper_crypto_sha256su1;
10842         break;
10843     default:
10844         unallocated_encoding(s);
10845         return;
10846     }
10847
10848     if (!arm_dc_feature(s, feature)) {
10849         unallocated_encoding(s);
10850         return;
10851     }
10852
10853     tcg_rd_regno = tcg_const_i32(rd << 1);
10854     tcg_rn_regno = tcg_const_i32(rn << 1);
10855     tcg_rm_regno = tcg_const_i32(rm << 1);
10856
10857     if (genfn) {
10858         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10859     } else {
10860         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10861
10862         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10863                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10864         tcg_temp_free_i32(tcg_opcode);
10865     }
10866
10867     tcg_temp_free_i32(tcg_rd_regno);
10868     tcg_temp_free_i32(tcg_rn_regno);
10869     tcg_temp_free_i32(tcg_rm_regno);
10870 }
10871
10872 /* C3.6.21 Crypto two-reg SHA
10873  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10874  * +-----------------+------+-----------+--------+-----+------+------+
10875  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10876  * +-----------------+------+-----------+--------+-----+------+------+
10877  */
10878 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10879 {
10880     int size = extract32(insn, 22, 2);
10881     int opcode = extract32(insn, 12, 5);
10882     int rn = extract32(insn, 5, 5);
10883     int rd = extract32(insn, 0, 5);
10884     CryptoTwoOpEnvFn *genfn;
10885     int feature;
10886     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10887
10888     if (size != 0) {
10889         unallocated_encoding(s);
10890         return;
10891     }
10892
10893     switch (opcode) {
10894     case 0: /* SHA1H */
10895         feature = ARM_FEATURE_V8_SHA1;
10896         genfn = gen_helper_crypto_sha1h;
10897         break;
10898     case 1: /* SHA1SU1 */
10899         feature = ARM_FEATURE_V8_SHA1;
10900         genfn = gen_helper_crypto_sha1su1;
10901         break;
10902     case 2: /* SHA256SU0 */
10903         feature = ARM_FEATURE_V8_SHA256;
10904         genfn = gen_helper_crypto_sha256su0;
10905         break;
10906     default:
10907         unallocated_encoding(s);
10908         return;
10909     }
10910
10911     if (!arm_dc_feature(s, feature)) {
10912         unallocated_encoding(s);
10913         return;
10914     }
10915
10916     tcg_rd_regno = tcg_const_i32(rd << 1);
10917     tcg_rn_regno = tcg_const_i32(rn << 1);
10918
10919     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10920
10921     tcg_temp_free_i32(tcg_rd_regno);
10922     tcg_temp_free_i32(tcg_rn_regno);
10923 }
10924
10925 /* C3.6 Data processing - SIMD, inc Crypto
10926  *
10927  * As the decode gets a little complex we are using a table based
10928  * approach for this part of the decode.
10929  */
10930 static const AArch64DecodeTable data_proc_simd[] = {
10931     /* pattern  ,  mask     ,  fn                        */
10932     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10933     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10934     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10935     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10936     { 0x0e000400, 0x9fe08400, disas_simd_copy },
10937     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10938     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10939     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10940     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10941     { 0x0e000000, 0xbf208c00, disas_simd_tb },
10942     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10943     { 0x2e000000, 0xbf208400, disas_simd_ext },
10944     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10945     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10946     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10947     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10948     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10949     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10950     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10951     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10952     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10953     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10954     { 0x00000000, 0x00000000, NULL }
10955 };
10956
10957 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10958 {
10959     /* Note that this is called with all non-FP cases from
10960      * table C3-6 so it must UNDEF for entries not specifically
10961      * allocated to instructions in that table.
10962      */
10963     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10964     if (fn) {
10965         fn(s, insn);
10966     } else {
10967         unallocated_encoding(s);
10968     }
10969 }
10970
10971 /* C3.6 Data processing - SIMD and floating point */
10972 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10973 {
10974     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10975         disas_data_proc_fp(s, insn);
10976     } else {
10977         /* SIMD, including crypto */
10978         disas_data_proc_simd(s, insn);
10979     }
10980 }
10981
10982 /* C3.1 A64 instruction index by encoding */
10983 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10984 {
10985     uint32_t insn;
10986
10987     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
10988     s->insn = insn;
10989     s->pc += 4;
10990
10991     s->fp_access_checked = false;
10992
10993     switch (extract32(insn, 25, 4)) {
10994     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10995         unallocated_encoding(s);
10996         break;
10997     case 0x8: case 0x9: /* Data processing - immediate */
10998         disas_data_proc_imm(s, insn);
10999         break;
11000     case 0xa: case 0xb: /* Branch, exception generation and system insns */
11001         disas_b_exc_sys(s, insn);
11002         break;
11003     case 0x4:
11004     case 0x6:
11005     case 0xc:
11006     case 0xe:      /* Loads and stores */
11007         disas_ldst(s, insn);
11008         break;
11009     case 0x5:
11010     case 0xd:      /* Data processing - register */
11011         disas_data_proc_reg(s, insn);
11012         break;
11013     case 0x7:
11014     case 0xf:      /* Data processing - SIMD and floating point */
11015         disas_data_proc_simd_fp(s, insn);
11016         break;
11017     default:
11018         assert(FALSE); /* all 15 cases should be handled above */
11019         break;
11020     }
11021
11022     /* if we allocated any temporaries, free them here */
11023     free_tmp_a64(s);
11024 }
11025
11026 void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11027 {
11028     CPUState *cs = CPU(cpu);
11029     CPUARMState *env = &cpu->env;
11030     DisasContext dc1, *dc = &dc1;
11031     target_ulong pc_start;
11032     target_ulong next_page_start;
11033     int num_insns;
11034     int max_insns;
11035
11036     pc_start = tb->pc;
11037
11038     dc->tb = tb;
11039
11040     dc->is_jmp = DISAS_NEXT;
11041     dc->pc = pc_start;
11042     dc->singlestep_enabled = cs->singlestep_enabled;
11043     dc->condjmp = 0;
11044
11045     dc->aarch64 = 1;
11046     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11047      * there is no secure EL1, so we route exceptions to EL3.
11048      */
11049     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11050                                !arm_el_is_aa64(env, 3);
11051     dc->thumb = 0;
11052     dc->sctlr_b = 0;
11053     dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11054     dc->condexec_mask = 0;
11055     dc->condexec_cond = 0;
11056     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11057     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11058 #if !defined(CONFIG_USER_ONLY)
11059     dc->user = (dc->current_el == 0);
11060 #endif
11061     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11062     dc->vec_len = 0;
11063     dc->vec_stride = 0;
11064     dc->cp_regs = cpu->cp_regs;
11065     dc->features = env->features;
11066
11067     /* Single step state. The code-generation logic here is:
11068      *  SS_ACTIVE == 0:
11069      *   generate code with no special handling for single-stepping (except
11070      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11071      *   this happens anyway because those changes are all system register or
11072      *   PSTATE writes).
11073      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11074      *   emit code for one insn
11075      *   emit code to clear PSTATE.SS
11076      *   emit code to generate software step exception for completed step
11077      *   end TB (as usual for having generated an exception)
11078      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11079      *   emit code to generate a software step exception
11080      *   end the TB
11081      */
11082     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11083     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11084     dc->is_ldex = false;
11085     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11086
11087     init_tmp_a64_array(dc);
11088
11089     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11090     num_insns = 0;
11091     max_insns = tb->cflags & CF_COUNT_MASK;
11092     if (max_insns == 0) {
11093         max_insns = CF_COUNT_MASK;
11094     }
11095     if (max_insns > TCG_MAX_INSNS) {
11096         max_insns = TCG_MAX_INSNS;
11097     }
11098
11099     gen_tb_start(tb);
11100
11101     tcg_clear_temp_count();
11102
11103     do {
11104         tcg_gen_insn_start(dc->pc, 0);
11105         num_insns++;
11106
11107         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11108             CPUBreakpoint *bp;
11109             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11110                 if (bp->pc == dc->pc) {
11111                     if (bp->flags & BP_CPU) {
11112                         gen_a64_set_pc_im(dc->pc);
11113                         gen_helper_check_breakpoints(cpu_env);
11114                         /* End the TB early; it likely won't be executed */
11115                         dc->is_jmp = DISAS_UPDATE;
11116                     } else {
11117                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11118                         /* The address covered by the breakpoint must be
11119                            included in [tb->pc, tb->pc + tb->size) in order
11120                            to for it to be properly cleared -- thus we
11121                            increment the PC here so that the logic setting
11122                            tb->size below does the right thing.  */
11123                         dc->pc += 4;
11124                         goto done_generating;
11125                     }
11126                     break;
11127                 }
11128             }
11129         }
11130
11131         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11132             gen_io_start();
11133         }
11134
11135         if (dc->ss_active && !dc->pstate_ss) {
11136             /* Singlestep state is Active-pending.
11137              * If we're in this state at the start of a TB then either
11138              *  a) we just took an exception to an EL which is being debugged
11139              *     and this is the first insn in the exception handler
11140              *  b) debug exceptions were masked and we just unmasked them
11141              *     without changing EL (eg by clearing PSTATE.D)
11142              * In either case we're going to take a swstep exception in the
11143              * "did not step an insn" case, and so the syndrome ISV and EX
11144              * bits should be zero.
11145              */
11146             assert(num_insns == 1);
11147             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11148                           default_exception_el(dc));
11149             dc->is_jmp = DISAS_EXC;
11150             break;
11151         }
11152
11153         disas_a64_insn(env, dc);
11154
11155         if (tcg_check_temp_count()) {
11156             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11157                     dc->pc);
11158         }
11159
11160         /* Translation stops when a conditional branch is encountered.
11161          * Otherwise the subsequent code could get translated several times.
11162          * Also stop translation when a page boundary is reached.  This
11163          * ensures prefetch aborts occur at the right place.
11164          */
11165     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11166              !cs->singlestep_enabled &&
11167              !singlestep &&
11168              !dc->ss_active &&
11169              dc->pc < next_page_start &&
11170              num_insns < max_insns);
11171
11172     if (tb->cflags & CF_LAST_IO) {
11173         gen_io_end();
11174     }
11175
11176     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11177         && dc->is_jmp != DISAS_EXC) {
11178         /* Note that this means single stepping WFI doesn't halt the CPU.
11179          * For conditional branch insns this is harmless unreachable code as
11180          * gen_goto_tb() has already handled emitting the debug exception
11181          * (and thus a tb-jump is not possible when singlestepping).
11182          */
11183         assert(dc->is_jmp != DISAS_TB_JUMP);
11184         if (dc->is_jmp != DISAS_JUMP) {
11185             gen_a64_set_pc_im(dc->pc);
11186         }
11187         if (cs->singlestep_enabled) {
11188             gen_exception_internal(EXCP_DEBUG);
11189         } else {
11190             gen_step_complete_exception(dc);
11191         }
11192     } else {
11193         switch (dc->is_jmp) {
11194         case DISAS_NEXT:
11195             gen_goto_tb(dc, 1, dc->pc);
11196             break;
11197         default:
11198         case DISAS_UPDATE:
11199             gen_a64_set_pc_im(dc->pc);
11200             /* fall through */
11201         case DISAS_JUMP:
11202             /* indicate that the hash table must be used to find the next TB */
11203             tcg_gen_exit_tb(0);
11204             break;
11205         case DISAS_TB_JUMP:
11206         case DISAS_EXC:
11207         case DISAS_SWI:
11208             break;
11209         case DISAS_WFE:
11210             gen_a64_set_pc_im(dc->pc);
11211             gen_helper_wfe(cpu_env);
11212             break;
11213         case DISAS_YIELD:
11214             gen_a64_set_pc_im(dc->pc);
11215             gen_helper_yield(cpu_env);
11216             break;
11217         case DISAS_WFI:
11218             /* This is a special case because we don't want to just halt the CPU
11219              * if trying to debug across a WFI.
11220              */
11221             gen_a64_set_pc_im(dc->pc);
11222             gen_helper_wfi(cpu_env);
11223             /* The helper doesn't necessarily throw an exception, but we
11224              * must go back to the main loop to check for interrupts anyway.
11225              */
11226             tcg_gen_exit_tb(0);
11227             break;
11228         }
11229     }
11230
11231 done_generating:
11232     gen_tb_end(tb, num_insns);
11233
11234 #ifdef DEBUG_DISAS
11235     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11236         qemu_log_in_addr_range(pc_start)) {
11237         qemu_log("----------------\n");
11238         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11239         log_target_disas(cs, pc_start, dc->pc - pc_start,
11240                          4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11241         qemu_log("\n");
11242     }
11243 #endif
11244     tb->size = dc->pc - pc_start;
11245     tb->icount = num_insns;
11246 }