target/arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "qemu/log.h"
  25 #include "arm_ldst.h"
  26 #include "translate.h"
  27 #include "internals.h"
  28 #include "qemu/host-utils.h"
  29
  30 #include "exec/semihost.h"
  31 #include "exec/gen-icount.h"
  32
  33 #include "exec/helper-proto.h"
  34 #include "exec/helper-gen.h"
  35 #include "exec/log.h"
  36
  37 #include "trace-tcg.h"
  38
  39 static TCGv_i64 cpu_X[32];
  40 static TCGv_i64 cpu_pc;
  41
  42 /* Load/store exclusive handling */
  43 static TCGv_i64 cpu_exclusive_high;
  44 static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  45
  46 static const char *regnames[] = {
  47     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  48     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  49     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  50     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  51 };
  52
  53 enum a64_shift_type {
  54     A64_SHIFT_TYPE_LSL = 0,
  55     A64_SHIFT_TYPE_LSR = 1,
  56     A64_SHIFT_TYPE_ASR = 2,
  57     A64_SHIFT_TYPE_ROR = 3
  58 };
  59
  60 /* Table based decoder typedefs - used when the relevant bits for decode
  61  * are too awkwardly scattered across the instruction (eg SIMD).
  62  */
  63 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  64
  65 typedef struct AArch64DecodeTable {
  66     uint32_t pattern;
  67     uint32_t mask;
  68     AArch64DecodeFn *disas_fn;
  69 } AArch64DecodeTable;
  70
  71 /* Function prototype for gen_ functions for calling Neon helpers */
  72 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  73 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  74 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  76 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  78 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  79 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  80 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  81 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  82 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  83 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  84 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  85
  86 /* initialize TCG globals.  */
  87 void a64_translate_init(void)
  88 {
  89     int i;
  90
  91     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  92                                     offsetof(CPUARMState, pc),
  93                                     "pc");
  94     for (i = 0; i < 32; i++) {
  95         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  96                                           offsetof(CPUARMState, xregs[i]),
  97                                           regnames[i]);
  98     }
  99
 100     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 101         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 102 }
 103
 104 static inline int get_a64_user_mem_index(DisasContext *s)
 105 {
 106     /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 107      *  if EL1, access as if EL0; otherwise access at current EL
 108      */
 109     ARMMMUIdx useridx;
 110
 111     switch (s->mmu_idx) {
 112     case ARMMMUIdx_S12NSE1:
 113         useridx = ARMMMUIdx_S12NSE0;
 114         break;
 115     case ARMMMUIdx_S1SE1:
 116         useridx = ARMMMUIdx_S1SE0;
 117         break;
 118     case ARMMMUIdx_S2NS:
 119         g_assert_not_reached();
 120     default:
 121         useridx = s->mmu_idx;
 122         break;
 123     }
 124     return arm_to_core_mmu_idx(useridx);
 125 }
 126
 127 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 128                             fprintf_function cpu_fprintf, int flags)
 129 {
 130     ARMCPU *cpu = ARM_CPU(cs);
 131     CPUARMState *env = &cpu->env;
 132     uint32_t psr = pstate_read(env);
 133     int i;
 134     int el = arm_current_el(env);
 135     const char *ns_status;
 136
 137     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 138             env->pc, env->xregs[31]);
 139     for (i = 0; i < 31; i++) {
 140         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 141         if ((i % 4) == 3) {
 142             cpu_fprintf(f, "\n");
 143         } else {
 144             cpu_fprintf(f, " ");
 145         }
 146     }
 147
 148     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 149         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 150     } else {
 151         ns_status = "";
 152     }
 153
 154     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 155                 psr,
 156                 psr & PSTATE_N ? 'N' : '-',
 157                 psr & PSTATE_Z ? 'Z' : '-',
 158                 psr & PSTATE_C ? 'C' : '-',
 159                 psr & PSTATE_V ? 'V' : '-',
 160                 ns_status,
 161                 el,
 162                 psr & PSTATE_SP ? 'h' : 't');
 163
 164     if (flags & CPU_DUMP_FPU) {
 165         int numvfpregs = 32;
 166         for (i = 0; i < numvfpregs; i += 2) {
 167             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 168             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 169             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 170                         i, vhi, vlo);
 171             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 172             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 173             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 174                         i + 1, vhi, vlo);
 175         }
 176         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 177                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 178     }
 179 }
 180
 181 void gen_a64_set_pc_im(uint64_t val)
 182 {
 183     tcg_gen_movi_i64(cpu_pc, val);
 184 }
 185
 186 /* Load the PC from a generic TCG variable.
 187  *
 188  * If address tagging is enabled via the TCR TBI bits, then loading
 189  * an address into the PC will clear out any tag in the it:
 190  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 191  *    then the address is zero-extended, clearing bits [63:56]
 192  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 193  *    and TBI1 controls addressses with bit 55 == 1.
 194  *    If the appropriate TBI bit is set for the address then
 195  *    the address is sign-extended from bit 55 into bits [63:56]
 196  *
 197  * We can avoid doing this for relative-branches, because the
 198  * PC + offset can never overflow into the tag bits (assuming
 199  * that virtual addresses are less than 56 bits wide, as they
 200  * are currently), but we must handle it for branch-to-register.
 201  */
 202 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 203 {
 204
 205     if (s->current_el <= 1) {
 206         /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 207          * examine bit 55 of address, can just generate code.
 208          * If mixed, then test via generated code
 209          */
 210         if (s->tbi0 && s->tbi1) {
 211             TCGv_i64 tmp_reg = tcg_temp_new_i64();
 212             /* Both bits set, sign extension from bit 55 into [63:56] will
 213              * cover both cases
 214              */
 215             tcg_gen_shli_i64(tmp_reg, src, 8);
 216             tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 217             tcg_temp_free_i64(tmp_reg);
 218         } else if (!s->tbi0 && !s->tbi1) {
 219             /* Neither bit set, just load it as-is */
 220             tcg_gen_mov_i64(cpu_pc, src);
 221         } else {
 222             TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 223             TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 224             TCGv_i64 tcg_zero   = tcg_const_i64(0);
 225
 226             tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 227
 228             if (s->tbi0) {
 229                 /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 230                 tcg_gen_andi_i64(tcg_tmpval, src,
 231                                  0x00FFFFFFFFFFFFFFull);
 232                 tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 233                                     tcg_tmpval, src);
 234             } else {
 235                 /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 236                 tcg_gen_ori_i64(tcg_tmpval, src,
 237                                 0xFF00000000000000ull);
 238                 tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 239                                     tcg_tmpval, src);
 240             }
 241             tcg_temp_free_i64(tcg_zero);
 242             tcg_temp_free_i64(tcg_bit55);
 243             tcg_temp_free_i64(tcg_tmpval);
 244         }
 245     } else {  /* EL > 1 */
 246         if (s->tbi0) {
 247             /* Force tag byte to all zero */
 248             tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 249         } else {
 250             /* Load unmodified address */
 251             tcg_gen_mov_i64(cpu_pc, src);
 252         }
 253     }
 254 }
 255
 256 typedef struct DisasCompare64 {
 257     TCGCond cond;
 258     TCGv_i64 value;
 259 } DisasCompare64;
 260
 261 static void a64_test_cc(DisasCompare64 *c64, int cc)
 262 {
 263     DisasCompare c32;
 264
 265     arm_test_cc(&c32, cc);
 266
 267     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 268        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 269     c64->cond = c32.cond;
 270     c64->value = tcg_temp_new_i64();
 271     tcg_gen_ext_i32_i64(c64->value, c32.value);
 272
 273     arm_free_cc(&c32);
 274 }
 275
 276 static void a64_free_cc(DisasCompare64 *c64)
 277 {
 278     tcg_temp_free_i64(c64->value);
 279 }
 280
 281 static void gen_exception_internal(int excp)
 282 {
 283     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 284
 285     assert(excp_is_internal(excp));
 286     gen_helper_exception_internal(cpu_env, tcg_excp);
 287     tcg_temp_free_i32(tcg_excp);
 288 }
 289
 290 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 291 {
 292     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 293     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 294     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 295
 296     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 297                                        tcg_syn, tcg_el);
 298     tcg_temp_free_i32(tcg_el);
 299     tcg_temp_free_i32(tcg_syn);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 304 {
 305     gen_a64_set_pc_im(s->pc - offset);
 306     gen_exception_internal(excp);
 307     s->is_jmp = DISAS_EXC;
 308 }
 309
 310 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 311                                uint32_t syndrome, uint32_t target_el)
 312 {
 313     gen_a64_set_pc_im(s->pc - offset);
 314     gen_exception(excp, syndrome, target_el);
 315     s->is_jmp = DISAS_EXC;
 316 }
 317
 318 static void gen_ss_advance(DisasContext *s)
 319 {
 320     /* If the singlestep state is Active-not-pending, advance to
 321      * Active-pending.
 322      */
 323     if (s->ss_active) {
 324         s->pstate_ss = 0;
 325         gen_helper_clear_pstate_ss(cpu_env);
 326     }
 327 }
 328
 329 static void gen_step_complete_exception(DisasContext *s)
 330 {
 331     /* We just completed step of an insn. Move from Active-not-pending
 332      * to Active-pending, and then also take the swstep exception.
 333      * This corresponds to making the (IMPDEF) choice to prioritize
 334      * swstep exceptions over asynchronous exceptions taken to an exception
 335      * level where debug is disabled. This choice has the advantage that
 336      * we do not need to maintain internal state corresponding to the
 337      * ISV/EX syndrome bits between completion of the step and generation
 338      * of the exception, and our syndrome information is always correct.
 339      */
 340     gen_ss_advance(s);
 341     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 342                   default_exception_el(s));
 343     s->is_jmp = DISAS_EXC;
 344 }
 345
 346 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 347 {
 348     /* No direct tb linking with singlestep (either QEMU's or the ARM
 349      * debug architecture kind) or deterministic io
 350      */
 351     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 352         return false;
 353     }
 354
 355 #ifndef CONFIG_USER_ONLY
 356     /* Only link tbs from inside the same guest page */
 357     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 358         return false;
 359     }
 360 #endif
 361
 362     return true;
 363 }
 364
 365 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 366 {
 367     TranslationBlock *tb;
 368
 369     tb = s->tb;
 370     if (use_goto_tb(s, n, dest)) {
 371         tcg_gen_goto_tb(n);
 372         gen_a64_set_pc_im(dest);
 373         tcg_gen_exit_tb((intptr_t)tb + n);
 374         s->is_jmp = DISAS_TB_JUMP;
 375     } else {
 376         gen_a64_set_pc_im(dest);
 377         if (s->ss_active) {
 378             gen_step_complete_exception(s);
 379         } else if (s->singlestep_enabled) {
 380             gen_exception_internal(EXCP_DEBUG);
 381         } else {
 382             tcg_gen_lookup_and_goto_ptr(cpu_pc);
 383             s->is_jmp = DISAS_TB_JUMP;
 384         }
 385     }
 386 }
 387
 388 static void unallocated_encoding(DisasContext *s)
 389 {
 390     /* Unallocated and reserved encodings are uncategorized */
 391     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 392                        default_exception_el(s));
 393 }
 394
 395 #define unsupported_encoding(s, insn)                                    \
 396     do {                                                                 \
 397         qemu_log_mask(LOG_UNIMP,                                         \
 398                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 399                       "at pc=%016" PRIx64 "\n",                          \
 400                       __FILE__, __LINE__, insn, s->pc - 4);              \
 401         unallocated_encoding(s);                                         \
 402     } while (0);
 403
 404 static void init_tmp_a64_array(DisasContext *s)
 405 {
 406 #ifdef CONFIG_DEBUG_TCG
 407     int i;
 408     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 409         TCGV_UNUSED_I64(s->tmp_a64[i]);
 410     }
 411 #endif
 412     s->tmp_a64_count = 0;
 413 }
 414
 415 static void free_tmp_a64(DisasContext *s)
 416 {
 417     int i;
 418     for (i = 0; i < s->tmp_a64_count; i++) {
 419         tcg_temp_free_i64(s->tmp_a64[i]);
 420     }
 421     init_tmp_a64_array(s);
 422 }
 423
 424 static TCGv_i64 new_tmp_a64(DisasContext *s)
 425 {
 426     assert(s->tmp_a64_count < TMP_A64_MAX);
 427     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 428 }
 429
 430 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 431 {
 432     TCGv_i64 t = new_tmp_a64(s);
 433     tcg_gen_movi_i64(t, 0);
 434     return t;
 435 }
 436
 437 /*
 438  * Register access functions
 439  *
 440  * These functions are used for directly accessing a register in where
 441  * changes to the final register value are likely to be made. If you
 442  * need to use a register for temporary calculation (e.g. index type
 443  * operations) use the read_* form.
 444  *
 445  * B1.2.1 Register mappings
 446  *
 447  * In instruction register encoding 31 can refer to ZR (zero register) or
 448  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 449  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 450  * This is the point of the _sp forms.
 451  */
 452 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 453 {
 454     if (reg == 31) {
 455         return new_tmp_a64_zero(s);
 456     } else {
 457         return cpu_X[reg];
 458     }
 459 }
 460
 461 /* register access for when 31 == SP */
 462 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 463 {
 464     return cpu_X[reg];
 465 }
 466
 467 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 468  * representing the register contents. This TCGv is an auto-freed
 469  * temporary so it need not be explicitly freed, and may be modified.
 470  */
 471 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 472 {
 473     TCGv_i64 v = new_tmp_a64(s);
 474     if (reg != 31) {
 475         if (sf) {
 476             tcg_gen_mov_i64(v, cpu_X[reg]);
 477         } else {
 478             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 479         }
 480     } else {
 481         tcg_gen_movi_i64(v, 0);
 482     }
 483     return v;
 484 }
 485
 486 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 487 {
 488     TCGv_i64 v = new_tmp_a64(s);
 489     if (sf) {
 490         tcg_gen_mov_i64(v, cpu_X[reg]);
 491     } else {
 492         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 493     }
 494     return v;
 495 }
 496
 497 /* We should have at some point before trying to access an FP register
 498  * done the necessary access check, so assert that
 499  * (a) we did the check and
 500  * (b) we didn't then just plough ahead anyway if it failed.
 501  * Print the instruction pattern in the abort message so we can figure
 502  * out what we need to fix if a user encounters this problem in the wild.
 503  */
 504 static inline void assert_fp_access_checked(DisasContext *s)
 505 {
 506 #ifdef CONFIG_DEBUG_TCG
 507     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 508         fprintf(stderr, "target-arm: FP access check missing for "
 509                 "instruction 0x%08x\n", s->insn);
 510         abort();
 511     }
 512 #endif
 513 }
 514
 515 /* Return the offset into CPUARMState of an element of specified
 516  * size, 'element' places in from the least significant end of
 517  * the FP/vector register Qn.
 518  */
 519 static inline int vec_reg_offset(DisasContext *s, int regno,
 520                                  int element, TCGMemOp size)
 521 {
 522     int offs = 0;
 523 #ifdef HOST_WORDS_BIGENDIAN
 524     /* This is complicated slightly because vfp.regs[2n] is
 525      * still the low half and  vfp.regs[2n+1] the high half
 526      * of the 128 bit vector, even on big endian systems.
 527      * Calculate the offset assuming a fully bigendian 128 bits,
 528      * then XOR to account for the order of the two 64 bit halves.
 529      */
 530     offs += (16 - ((element + 1) * (1 << size)));
 531     offs ^= 8;
 532 #else
 533     offs += element * (1 << size);
 534 #endif
 535     offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
 536     assert_fp_access_checked(s);
 537     return offs;
 538 }
 539
 540 /* Return the offset into CPUARMState of a slice (from
 541  * the least significant end) of FP register Qn (ie
 542  * Dn, Sn, Hn or Bn).
 543  * (Note that this is not the same mapping as for A32; see cpu.h)
 544  */
 545 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 546 {
 547     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 548 #ifdef HOST_WORDS_BIGENDIAN
 549     offs += (8 - (1 << size));
 550 #endif
 551     assert_fp_access_checked(s);
 552     return offs;
 553 }
 554
 555 /* Offset of the high half of the 128 bit vector Qn */
 556 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 557 {
 558     assert_fp_access_checked(s);
 559     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 560 }
 561
 562 /* Convenience accessors for reading and writing single and double
 563  * FP registers. Writing clears the upper parts of the associated
 564  * 128 bit vector register, as required by the architecture.
 565  * Note that unlike the GP register accessors, the values returned
 566  * by the read functions must be manually freed.
 567  */
 568 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 569 {
 570     TCGv_i64 v = tcg_temp_new_i64();
 571
 572     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 573     return v;
 574 }
 575
 576 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 577 {
 578     TCGv_i32 v = tcg_temp_new_i32();
 579
 580     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 581     return v;
 582 }
 583
 584 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 585 {
 586     TCGv_i64 tcg_zero = tcg_const_i64(0);
 587
 588     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 589     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 590     tcg_temp_free_i64(tcg_zero);
 591 }
 592
 593 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 594 {
 595     TCGv_i64 tmp = tcg_temp_new_i64();
 596
 597     tcg_gen_extu_i32_i64(tmp, v);
 598     write_fp_dreg(s, reg, tmp);
 599     tcg_temp_free_i64(tmp);
 600 }
 601
 602 static TCGv_ptr get_fpstatus_ptr(void)
 603 {
 604     TCGv_ptr statusptr = tcg_temp_new_ptr();
 605     int offset;
 606
 607     /* In A64 all instructions (both FP and Neon) use the FPCR;
 608      * there is no equivalent of the A32 Neon "standard FPSCR value"
 609      * and all operations use vfp.fp_status.
 610      */
 611     offset = offsetof(CPUARMState, vfp.fp_status);
 612     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 613     return statusptr;
 614 }
 615
 616 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 617  * than the 32 bit equivalent.
 618  */
 619 static inline void gen_set_NZ64(TCGv_i64 result)
 620 {
 621     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 622     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 623 }
 624
 625 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 626 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 627 {
 628     if (sf) {
 629         gen_set_NZ64(result);
 630     } else {
 631         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 632         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 633     }
 634     tcg_gen_movi_i32(cpu_CF, 0);
 635     tcg_gen_movi_i32(cpu_VF, 0);
 636 }
 637
 638 /* dest = T0 + T1; compute C, N, V and Z flags */
 639 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 640 {
 641     if (sf) {
 642         TCGv_i64 result, flag, tmp;
 643         result = tcg_temp_new_i64();
 644         flag = tcg_temp_new_i64();
 645         tmp = tcg_temp_new_i64();
 646
 647         tcg_gen_movi_i64(tmp, 0);
 648         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 649
 650         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 651
 652         gen_set_NZ64(result);
 653
 654         tcg_gen_xor_i64(flag, result, t0);
 655         tcg_gen_xor_i64(tmp, t0, t1);
 656         tcg_gen_andc_i64(flag, flag, tmp);
 657         tcg_temp_free_i64(tmp);
 658         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 659
 660         tcg_gen_mov_i64(dest, result);
 661         tcg_temp_free_i64(result);
 662         tcg_temp_free_i64(flag);
 663     } else {
 664         /* 32 bit arithmetic */
 665         TCGv_i32 t0_32 = tcg_temp_new_i32();
 666         TCGv_i32 t1_32 = tcg_temp_new_i32();
 667         TCGv_i32 tmp = tcg_temp_new_i32();
 668
 669         tcg_gen_movi_i32(tmp, 0);
 670         tcg_gen_extrl_i64_i32(t0_32, t0);
 671         tcg_gen_extrl_i64_i32(t1_32, t1);
 672         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 673         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 674         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 675         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 676         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 677         tcg_gen_extu_i32_i64(dest, cpu_NF);
 678
 679         tcg_temp_free_i32(tmp);
 680         tcg_temp_free_i32(t0_32);
 681         tcg_temp_free_i32(t1_32);
 682     }
 683 }
 684
 685 /* dest = T0 - T1; compute C, N, V and Z flags */
 686 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 687 {
 688     if (sf) {
 689         /* 64 bit arithmetic */
 690         TCGv_i64 result, flag, tmp;
 691
 692         result = tcg_temp_new_i64();
 693         flag = tcg_temp_new_i64();
 694         tcg_gen_sub_i64(result, t0, t1);
 695
 696         gen_set_NZ64(result);
 697
 698         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 699         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 700
 701         tcg_gen_xor_i64(flag, result, t0);
 702         tmp = tcg_temp_new_i64();
 703         tcg_gen_xor_i64(tmp, t0, t1);
 704         tcg_gen_and_i64(flag, flag, tmp);
 705         tcg_temp_free_i64(tmp);
 706         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 707         tcg_gen_mov_i64(dest, result);
 708         tcg_temp_free_i64(flag);
 709         tcg_temp_free_i64(result);
 710     } else {
 711         /* 32 bit arithmetic */
 712         TCGv_i32 t0_32 = tcg_temp_new_i32();
 713         TCGv_i32 t1_32 = tcg_temp_new_i32();
 714         TCGv_i32 tmp;
 715
 716         tcg_gen_extrl_i64_i32(t0_32, t0);
 717         tcg_gen_extrl_i64_i32(t1_32, t1);
 718         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 719         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 720         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 721         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 722         tmp = tcg_temp_new_i32();
 723         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 724         tcg_temp_free_i32(t0_32);
 725         tcg_temp_free_i32(t1_32);
 726         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 727         tcg_temp_free_i32(tmp);
 728         tcg_gen_extu_i32_i64(dest, cpu_NF);
 729     }
 730 }
 731
 732 /* dest = T0 + T1 + CF; do not compute flags. */
 733 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 734 {
 735     TCGv_i64 flag = tcg_temp_new_i64();
 736     tcg_gen_extu_i32_i64(flag, cpu_CF);
 737     tcg_gen_add_i64(dest, t0, t1);
 738     tcg_gen_add_i64(dest, dest, flag);
 739     tcg_temp_free_i64(flag);
 740
 741     if (!sf) {
 742         tcg_gen_ext32u_i64(dest, dest);
 743     }
 744 }
 745
 746 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 747 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 748 {
 749     if (sf) {
 750         TCGv_i64 result, cf_64, vf_64, tmp;
 751         result = tcg_temp_new_i64();
 752         cf_64 = tcg_temp_new_i64();
 753         vf_64 = tcg_temp_new_i64();
 754         tmp = tcg_const_i64(0);
 755
 756         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 757         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 758         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 759         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 760         gen_set_NZ64(result);
 761
 762         tcg_gen_xor_i64(vf_64, result, t0);
 763         tcg_gen_xor_i64(tmp, t0, t1);
 764         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 765         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 766
 767         tcg_gen_mov_i64(dest, result);
 768
 769         tcg_temp_free_i64(tmp);
 770         tcg_temp_free_i64(vf_64);
 771         tcg_temp_free_i64(cf_64);
 772         tcg_temp_free_i64(result);
 773     } else {
 774         TCGv_i32 t0_32, t1_32, tmp;
 775         t0_32 = tcg_temp_new_i32();
 776         t1_32 = tcg_temp_new_i32();
 777         tmp = tcg_const_i32(0);
 778
 779         tcg_gen_extrl_i64_i32(t0_32, t0);
 780         tcg_gen_extrl_i64_i32(t1_32, t1);
 781         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 782         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 783
 784         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 785         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 786         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 787         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 788         tcg_gen_extu_i32_i64(dest, cpu_NF);
 789
 790         tcg_temp_free_i32(tmp);
 791         tcg_temp_free_i32(t1_32);
 792         tcg_temp_free_i32(t0_32);
 793     }
 794 }
 795
 796 /*
 797  * Load/Store generators
 798  */
 799
 800 /*
 801  * Store from GPR register to memory.
 802  */
 803 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 804                              TCGv_i64 tcg_addr, int size, int memidx,
 805                              bool iss_valid,
 806                              unsigned int iss_srt,
 807                              bool iss_sf, bool iss_ar)
 808 {
 809     g_assert(size <= 3);
 810     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 811
 812     if (iss_valid) {
 813         uint32_t syn;
 814
 815         syn = syn_data_abort_with_iss(0,
 816                                       size,
 817                                       false,
 818                                       iss_srt,
 819                                       iss_sf,
 820                                       iss_ar,
 821                                       0, 0, 0, 0, 0, false);
 822         disas_set_insn_syndrome(s, syn);
 823     }
 824 }
 825
 826 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 827                       TCGv_i64 tcg_addr, int size,
 828                       bool iss_valid,
 829                       unsigned int iss_srt,
 830                       bool iss_sf, bool iss_ar)
 831 {
 832     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 833                      iss_valid, iss_srt, iss_sf, iss_ar);
 834 }
 835
 836 /*
 837  * Load from memory to GPR register
 838  */
 839 static void do_gpr_ld_memidx(DisasContext *s,
 840                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 841                              int size, bool is_signed,
 842                              bool extend, int memidx,
 843                              bool iss_valid, unsigned int iss_srt,
 844                              bool iss_sf, bool iss_ar)
 845 {
 846     TCGMemOp memop = s->be_data + size;
 847
 848     g_assert(size <= 3);
 849
 850     if (is_signed) {
 851         memop += MO_SIGN;
 852     }
 853
 854     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 855
 856     if (extend && is_signed) {
 857         g_assert(size < 3);
 858         tcg_gen_ext32u_i64(dest, dest);
 859     }
 860
 861     if (iss_valid) {
 862         uint32_t syn;
 863
 864         syn = syn_data_abort_with_iss(0,
 865                                       size,
 866                                       is_signed,
 867                                       iss_srt,
 868                                       iss_sf,
 869                                       iss_ar,
 870                                       0, 0, 0, 0, 0, false);
 871         disas_set_insn_syndrome(s, syn);
 872     }
 873 }
 874
 875 static void do_gpr_ld(DisasContext *s,
 876                       TCGv_i64 dest, TCGv_i64 tcg_addr,
 877                       int size, bool is_signed, bool extend,
 878                       bool iss_valid, unsigned int iss_srt,
 879                       bool iss_sf, bool iss_ar)
 880 {
 881     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 882                      get_mem_index(s),
 883                      iss_valid, iss_srt, iss_sf, iss_ar);
 884 }
 885
 886 /*
 887  * Store from FP register to memory
 888  */
 889 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 890 {
 891     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 892     TCGv_i64 tmp = tcg_temp_new_i64();
 893     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 894     if (size < 4) {
 895         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 896                             s->be_data + size);
 897     } else {
 898         bool be = s->be_data == MO_BE;
 899         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 900
 901         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 902         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 903                             s->be_data | MO_Q);
 904         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 905         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 906                             s->be_data | MO_Q);
 907         tcg_temp_free_i64(tcg_hiaddr);
 908     }
 909
 910     tcg_temp_free_i64(tmp);
 911 }
 912
 913 /*
 914  * Load from memory to FP register
 915  */
 916 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 917 {
 918     /* This always zero-extends and writes to a full 128 bit wide vector */
 919     TCGv_i64 tmplo = tcg_temp_new_i64();
 920     TCGv_i64 tmphi;
 921
 922     if (size < 4) {
 923         TCGMemOp memop = s->be_data + size;
 924         tmphi = tcg_const_i64(0);
 925         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 926     } else {
 927         bool be = s->be_data == MO_BE;
 928         TCGv_i64 tcg_hiaddr;
 929
 930         tmphi = tcg_temp_new_i64();
 931         tcg_hiaddr = tcg_temp_new_i64();
 932
 933         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 934         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 935                             s->be_data | MO_Q);
 936         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 937                             s->be_data | MO_Q);
 938         tcg_temp_free_i64(tcg_hiaddr);
 939     }
 940
 941     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 942     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 943
 944     tcg_temp_free_i64(tmplo);
 945     tcg_temp_free_i64(tmphi);
 946 }
 947
 948 /*
 949  * Vector load/store helpers.
 950  *
 951  * The principal difference between this and a FP load is that we don't
 952  * zero extend as we are filling a partial chunk of the vector register.
 953  * These functions don't support 128 bit loads/stores, which would be
 954  * normal load/store operations.
 955  *
 956  * The _i32 versions are useful when operating on 32 bit quantities
 957  * (eg for floating point single or using Neon helper functions).
 958  */
 959
 960 /* Get value of an element within a vector register */
 961 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 962                              int element, TCGMemOp memop)
 963 {
 964     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 965     switch (memop) {
 966     case MO_8:
 967         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 968         break;
 969     case MO_16:
 970         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 971         break;
 972     case MO_32:
 973         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 974         break;
 975     case MO_8|MO_SIGN:
 976         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 977         break;
 978     case MO_16|MO_SIGN:
 979         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 980         break;
 981     case MO_32|MO_SIGN:
 982         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 983         break;
 984     case MO_64:
 985     case MO_64|MO_SIGN:
 986         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 987         break;
 988     default:
 989         g_assert_not_reached();
 990     }
 991 }
 992
 993 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 994                                  int element, TCGMemOp memop)
 995 {
 996     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 997     switch (memop) {
 998     case MO_8:
 999         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1000         break;
1001     case MO_16:
1002         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1003         break;
1004     case MO_8|MO_SIGN:
1005         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1006         break;
1007     case MO_16|MO_SIGN:
1008         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1009         break;
1010     case MO_32:
1011     case MO_32|MO_SIGN:
1012         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1013         break;
1014     default:
1015         g_assert_not_reached();
1016     }
1017 }
1018
1019 /* Set value of an element within a vector register */
1020 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1021                               int element, TCGMemOp memop)
1022 {
1023     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1024     switch (memop) {
1025     case MO_8:
1026         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1027         break;
1028     case MO_16:
1029         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1030         break;
1031     case MO_32:
1032         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1033         break;
1034     case MO_64:
1035         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1036         break;
1037     default:
1038         g_assert_not_reached();
1039     }
1040 }
1041
1042 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1043                                   int destidx, int element, TCGMemOp memop)
1044 {
1045     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1046     switch (memop) {
1047     case MO_8:
1048         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1049         break;
1050     case MO_16:
1051         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1052         break;
1053     case MO_32:
1054         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1055         break;
1056     default:
1057         g_assert_not_reached();
1058     }
1059 }
1060
1061 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
1062  * vector ops all need to do this).
1063  */
1064 static void clear_vec_high(DisasContext *s, int rd)
1065 {
1066     TCGv_i64 tcg_zero = tcg_const_i64(0);
1067
1068     write_vec_element(s, tcg_zero, rd, 1, MO_64);
1069     tcg_temp_free_i64(tcg_zero);
1070 }
1071
1072 /* Store from vector register to memory */
1073 static void do_vec_st(DisasContext *s, int srcidx, int element,
1074                       TCGv_i64 tcg_addr, int size)
1075 {
1076     TCGMemOp memop = s->be_data + size;
1077     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1078
1079     read_vec_element(s, tcg_tmp, srcidx, element, size);
1080     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1081
1082     tcg_temp_free_i64(tcg_tmp);
1083 }
1084
1085 /* Load from memory to vector register */
1086 static void do_vec_ld(DisasContext *s, int destidx, int element,
1087                       TCGv_i64 tcg_addr, int size)
1088 {
1089     TCGMemOp memop = s->be_data + size;
1090     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1091
1092     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1093     write_vec_element(s, tcg_tmp, destidx, element, size);
1094
1095     tcg_temp_free_i64(tcg_tmp);
1096 }
1097
1098 /* Check that FP/Neon access is enabled. If it is, return
1099  * true. If not, emit code to generate an appropriate exception,
1100  * and return false; the caller should not emit any code for
1101  * the instruction. Note that this check must happen after all
1102  * unallocated-encoding checks (otherwise the syndrome information
1103  * for the resulting exception will be incorrect).
1104  */
1105 static inline bool fp_access_check(DisasContext *s)
1106 {
1107     assert(!s->fp_access_checked);
1108     s->fp_access_checked = true;
1109
1110     if (!s->fp_excp_el) {
1111         return true;
1112     }
1113
1114     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1115                        s->fp_excp_el);
1116     return false;
1117 }
1118
1119 /*
1120  * This utility function is for doing register extension with an
1121  * optional shift. You will likely want to pass a temporary for the
1122  * destination register. See DecodeRegExtend() in the ARM ARM.
1123  */
1124 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1125                               int option, unsigned int shift)
1126 {
1127     int extsize = extract32(option, 0, 2);
1128     bool is_signed = extract32(option, 2, 1);
1129
1130     if (is_signed) {
1131         switch (extsize) {
1132         case 0:
1133             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1134             break;
1135         case 1:
1136             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1137             break;
1138         case 2:
1139             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1140             break;
1141         case 3:
1142             tcg_gen_mov_i64(tcg_out, tcg_in);
1143             break;
1144         }
1145     } else {
1146         switch (extsize) {
1147         case 0:
1148             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1149             break;
1150         case 1:
1151             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1152             break;
1153         case 2:
1154             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1155             break;
1156         case 3:
1157             tcg_gen_mov_i64(tcg_out, tcg_in);
1158             break;
1159         }
1160     }
1161
1162     if (shift) {
1163         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1164     }
1165 }
1166
1167 static inline void gen_check_sp_alignment(DisasContext *s)
1168 {
1169     /* The AArch64 architecture mandates that (if enabled via PSTATE
1170      * or SCTLR bits) there is a check that SP is 16-aligned on every
1171      * SP-relative load or store (with an exception generated if it is not).
1172      * In line with general QEMU practice regarding misaligned accesses,
1173      * we omit these checks for the sake of guest program performance.
1174      * This function is provided as a hook so we can more easily add these
1175      * checks in future (possibly as a "favour catching guest program bugs
1176      * over speed" user selectable option).
1177      */
1178 }
1179
1180 /*
1181  * This provides a simple table based table lookup decoder. It is
1182  * intended to be used when the relevant bits for decode are too
1183  * awkwardly placed and switch/if based logic would be confusing and
1184  * deeply nested. Since it's a linear search through the table, tables
1185  * should be kept small.
1186  *
1187  * It returns the first handler where insn & mask == pattern, or
1188  * NULL if there is no match.
1189  * The table is terminated by an empty mask (i.e. 0)
1190  */
1191 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1192                                                uint32_t insn)
1193 {
1194     const AArch64DecodeTable *tptr = table;
1195
1196     while (tptr->mask) {
1197         if ((insn & tptr->mask) == tptr->pattern) {
1198             return tptr->disas_fn;
1199         }
1200         tptr++;
1201     }
1202     return NULL;
1203 }
1204
1205 /*
1206  * the instruction disassembly implemented here matches
1207  * the instruction encoding classifications in chapter 3 (C3)
1208  * of the ARM Architecture Reference Manual (DDI0487A_a)
1209  */
1210
1211 /* C3.2.7 Unconditional branch (immediate)
1212  *   31  30       26 25                                  0
1213  * +----+-----------+-------------------------------------+
1214  * | op | 0 0 1 0 1 |                 imm26               |
1215  * +----+-----------+-------------------------------------+
1216  */
1217 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1218 {
1219     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1220
1221     if (insn & (1U << 31)) {
1222         /* C5.6.26 BL Branch with link */
1223         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1224     }
1225
1226     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1227     gen_goto_tb(s, 0, addr);
1228 }
1229
1230 /* C3.2.1 Compare & branch (immediate)
1231  *   31  30         25  24  23                  5 4      0
1232  * +----+-------------+----+---------------------+--------+
1233  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1234  * +----+-------------+----+---------------------+--------+
1235  */
1236 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1237 {
1238     unsigned int sf, op, rt;
1239     uint64_t addr;
1240     TCGLabel *label_match;
1241     TCGv_i64 tcg_cmp;
1242
1243     sf = extract32(insn, 31, 1);
1244     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1245     rt = extract32(insn, 0, 5);
1246     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1247
1248     tcg_cmp = read_cpu_reg(s, rt, sf);
1249     label_match = gen_new_label();
1250
1251     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1252                         tcg_cmp, 0, label_match);
1253
1254     gen_goto_tb(s, 0, s->pc);
1255     gen_set_label(label_match);
1256     gen_goto_tb(s, 1, addr);
1257 }
1258
1259 /* C3.2.5 Test & branch (immediate)
1260  *   31  30         25  24  23   19 18          5 4    0
1261  * +----+-------------+----+-------+-------------+------+
1262  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1263  * +----+-------------+----+-------+-------------+------+
1264  */
1265 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1266 {
1267     unsigned int bit_pos, op, rt;
1268     uint64_t addr;
1269     TCGLabel *label_match;
1270     TCGv_i64 tcg_cmp;
1271
1272     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1273     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1274     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1275     rt = extract32(insn, 0, 5);
1276
1277     tcg_cmp = tcg_temp_new_i64();
1278     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1279     label_match = gen_new_label();
1280     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1281                         tcg_cmp, 0, label_match);
1282     tcg_temp_free_i64(tcg_cmp);
1283     gen_goto_tb(s, 0, s->pc);
1284     gen_set_label(label_match);
1285     gen_goto_tb(s, 1, addr);
1286 }
1287
1288 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1289  *  31           25  24  23                  5   4  3    0
1290  * +---------------+----+---------------------+----+------+
1291  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1292  * +---------------+----+---------------------+----+------+
1293  */
1294 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1295 {
1296     unsigned int cond;
1297     uint64_t addr;
1298
1299     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1300         unallocated_encoding(s);
1301         return;
1302     }
1303     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1304     cond = extract32(insn, 0, 4);
1305
1306     if (cond < 0x0e) {
1307         /* genuinely conditional branches */
1308         TCGLabel *label_match = gen_new_label();
1309         arm_gen_test_cc(cond, label_match);
1310         gen_goto_tb(s, 0, s->pc);
1311         gen_set_label(label_match);
1312         gen_goto_tb(s, 1, addr);
1313     } else {
1314         /* 0xe and 0xf are both "always" conditions */
1315         gen_goto_tb(s, 0, addr);
1316     }
1317 }
1318
1319 /* C5.6.68 HINT */
1320 static void handle_hint(DisasContext *s, uint32_t insn,
1321                         unsigned int op1, unsigned int op2, unsigned int crm)
1322 {
1323     unsigned int selector = crm << 3 | op2;
1324
1325     if (op1 != 3) {
1326         unallocated_encoding(s);
1327         return;
1328     }
1329
1330     switch (selector) {
1331     case 0: /* NOP */
1332         return;
1333     case 3: /* WFI */
1334         s->is_jmp = DISAS_WFI;
1335         return;
1336     case 1: /* YIELD */
1337         if (!parallel_cpus) {
1338             s->is_jmp = DISAS_YIELD;
1339         }
1340         return;
1341     case 2: /* WFE */
1342         if (!parallel_cpus) {
1343             s->is_jmp = DISAS_WFE;
1344         }
1345         return;
1346     case 4: /* SEV */
1347     case 5: /* SEVL */
1348         /* we treat all as NOP at least for now */
1349         return;
1350     default:
1351         /* default specified as NOP equivalent */
1352         return;
1353     }
1354 }
1355
1356 static void gen_clrex(DisasContext *s, uint32_t insn)
1357 {
1358     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1359 }
1360
1361 /* CLREX, DSB, DMB, ISB */
1362 static void handle_sync(DisasContext *s, uint32_t insn,
1363                         unsigned int op1, unsigned int op2, unsigned int crm)
1364 {
1365     TCGBar bar;
1366
1367     if (op1 != 3) {
1368         unallocated_encoding(s);
1369         return;
1370     }
1371
1372     switch (op2) {
1373     case 2: /* CLREX */
1374         gen_clrex(s, insn);
1375         return;
1376     case 4: /* DSB */
1377     case 5: /* DMB */
1378         switch (crm & 3) {
1379         case 1: /* MBReqTypes_Reads */
1380             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1381             break;
1382         case 2: /* MBReqTypes_Writes */
1383             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1384             break;
1385         default: /* MBReqTypes_All */
1386             bar = TCG_BAR_SC | TCG_MO_ALL;
1387             break;
1388         }
1389         tcg_gen_mb(bar);
1390         return;
1391     case 6: /* ISB */
1392         /* We need to break the TB after this insn to execute
1393          * a self-modified code correctly and also to take
1394          * any pending interrupts immediately.
1395          */
1396         gen_goto_tb(s, 0, s->pc);
1397         return;
1398     default:
1399         unallocated_encoding(s);
1400         return;
1401     }
1402 }
1403
1404 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1405 static void handle_msr_i(DisasContext *s, uint32_t insn,
1406                          unsigned int op1, unsigned int op2, unsigned int crm)
1407 {
1408     int op = op1 << 3 | op2;
1409     switch (op) {
1410     case 0x05: /* SPSel */
1411         if (s->current_el == 0) {
1412             unallocated_encoding(s);
1413             return;
1414         }
1415         /* fall through */
1416     case 0x1e: /* DAIFSet */
1417     case 0x1f: /* DAIFClear */
1418     {
1419         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1420         TCGv_i32 tcg_op = tcg_const_i32(op);
1421         gen_a64_set_pc_im(s->pc - 4);
1422         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1423         tcg_temp_free_i32(tcg_imm);
1424         tcg_temp_free_i32(tcg_op);
1425         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1426         gen_a64_set_pc_im(s->pc);
1427         s->is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
1428         break;
1429     }
1430     default:
1431         unallocated_encoding(s);
1432         return;
1433     }
1434 }
1435
1436 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1437 {
1438     TCGv_i32 tmp = tcg_temp_new_i32();
1439     TCGv_i32 nzcv = tcg_temp_new_i32();
1440
1441     /* build bit 31, N */
1442     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1443     /* build bit 30, Z */
1444     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1445     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1446     /* build bit 29, C */
1447     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1448     /* build bit 28, V */
1449     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1450     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1451     /* generate result */
1452     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1453
1454     tcg_temp_free_i32(nzcv);
1455     tcg_temp_free_i32(tmp);
1456 }
1457
1458 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1459
1460 {
1461     TCGv_i32 nzcv = tcg_temp_new_i32();
1462
1463     /* take NZCV from R[t] */
1464     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1465
1466     /* bit 31, N */
1467     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1468     /* bit 30, Z */
1469     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1470     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1471     /* bit 29, C */
1472     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1473     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1474     /* bit 28, V */
1475     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1476     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1477     tcg_temp_free_i32(nzcv);
1478 }
1479
1480 /* C5.6.129 MRS - move from system register
1481  * C5.6.131 MSR (register) - move to system register
1482  * C5.6.204 SYS
1483  * C5.6.205 SYSL
1484  * These are all essentially the same insn in 'read' and 'write'
1485  * versions, with varying op0 fields.
1486  */
1487 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1488                        unsigned int op0, unsigned int op1, unsigned int op2,
1489                        unsigned int crn, unsigned int crm, unsigned int rt)
1490 {
1491     const ARMCPRegInfo *ri;
1492     TCGv_i64 tcg_rt;
1493
1494     ri = get_arm_cp_reginfo(s->cp_regs,
1495                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1496                                                crn, crm, op0, op1, op2));
1497
1498     if (!ri) {
1499         /* Unknown register; this might be a guest error or a QEMU
1500          * unimplemented feature.
1501          */
1502         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1503                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1504                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1505         unallocated_encoding(s);
1506         return;
1507     }
1508
1509     /* Check access permissions */
1510     if (!cp_access_ok(s->current_el, ri, isread)) {
1511         unallocated_encoding(s);
1512         return;
1513     }
1514
1515     if (ri->accessfn) {
1516         /* Emit code to perform further access permissions checks at
1517          * runtime; this may result in an exception.
1518          */
1519         TCGv_ptr tmpptr;
1520         TCGv_i32 tcg_syn, tcg_isread;
1521         uint32_t syndrome;
1522
1523         gen_a64_set_pc_im(s->pc - 4);
1524         tmpptr = tcg_const_ptr(ri);
1525         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1526         tcg_syn = tcg_const_i32(syndrome);
1527         tcg_isread = tcg_const_i32(isread);
1528         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1529         tcg_temp_free_ptr(tmpptr);
1530         tcg_temp_free_i32(tcg_syn);
1531         tcg_temp_free_i32(tcg_isread);
1532     }
1533
1534     /* Handle special cases first */
1535     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1536     case ARM_CP_NOP:
1537         return;
1538     case ARM_CP_NZCV:
1539         tcg_rt = cpu_reg(s, rt);
1540         if (isread) {
1541             gen_get_nzcv(tcg_rt);
1542         } else {
1543             gen_set_nzcv(tcg_rt);
1544         }
1545         return;
1546     case ARM_CP_CURRENTEL:
1547         /* Reads as current EL value from pstate, which is
1548          * guaranteed to be constant by the tb flags.
1549          */
1550         tcg_rt = cpu_reg(s, rt);
1551         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1552         return;
1553     case ARM_CP_DC_ZVA:
1554         /* Writes clear the aligned block of memory which rt points into. */
1555         tcg_rt = cpu_reg(s, rt);
1556         gen_helper_dc_zva(cpu_env, tcg_rt);
1557         return;
1558     default:
1559         break;
1560     }
1561
1562     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1563         gen_io_start();
1564     }
1565
1566     tcg_rt = cpu_reg(s, rt);
1567
1568     if (isread) {
1569         if (ri->type & ARM_CP_CONST) {
1570             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1571         } else if (ri->readfn) {
1572             TCGv_ptr tmpptr;
1573             tmpptr = tcg_const_ptr(ri);
1574             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1575             tcg_temp_free_ptr(tmpptr);
1576         } else {
1577             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1578         }
1579     } else {
1580         if (ri->type & ARM_CP_CONST) {
1581             /* If not forbidden by access permissions, treat as WI */
1582             return;
1583         } else if (ri->writefn) {
1584             TCGv_ptr tmpptr;
1585             tmpptr = tcg_const_ptr(ri);
1586             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1587             tcg_temp_free_ptr(tmpptr);
1588         } else {
1589             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1590         }
1591     }
1592
1593     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1594         /* I/O operations must end the TB here (whether read or write) */
1595         gen_io_end();
1596         s->is_jmp = DISAS_UPDATE;
1597     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1598         /* We default to ending the TB on a coprocessor register write,
1599          * but allow this to be suppressed by the register definition
1600          * (usually only necessary to work around guest bugs).
1601          */
1602         s->is_jmp = DISAS_UPDATE;
1603     }
1604 }
1605
1606 /* C3.2.4 System
1607  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1608  * +---------------------+---+-----+-----+-------+-------+-----+------+
1609  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1610  * +---------------------+---+-----+-----+-------+-------+-----+------+
1611  */
1612 static void disas_system(DisasContext *s, uint32_t insn)
1613 {
1614     unsigned int l, op0, op1, crn, crm, op2, rt;
1615     l = extract32(insn, 21, 1);
1616     op0 = extract32(insn, 19, 2);
1617     op1 = extract32(insn, 16, 3);
1618     crn = extract32(insn, 12, 4);
1619     crm = extract32(insn, 8, 4);
1620     op2 = extract32(insn, 5, 3);
1621     rt = extract32(insn, 0, 5);
1622
1623     if (op0 == 0) {
1624         if (l || rt != 31) {
1625             unallocated_encoding(s);
1626             return;
1627         }
1628         switch (crn) {
1629         case 2: /* C5.6.68 HINT */
1630             handle_hint(s, insn, op1, op2, crm);
1631             break;
1632         case 3: /* CLREX, DSB, DMB, ISB */
1633             handle_sync(s, insn, op1, op2, crm);
1634             break;
1635         case 4: /* C5.6.130 MSR (immediate) */
1636             handle_msr_i(s, insn, op1, op2, crm);
1637             break;
1638         default:
1639             unallocated_encoding(s);
1640             break;
1641         }
1642         return;
1643     }
1644     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1645 }
1646
1647 /* C3.2.3 Exception generation
1648  *
1649  *  31             24 23 21 20                     5 4   2 1  0
1650  * +-----------------+-----+------------------------+-----+----+
1651  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1652  * +-----------------------+------------------------+----------+
1653  */
1654 static void disas_exc(DisasContext *s, uint32_t insn)
1655 {
1656     int opc = extract32(insn, 21, 3);
1657     int op2_ll = extract32(insn, 0, 5);
1658     int imm16 = extract32(insn, 5, 16);
1659     TCGv_i32 tmp;
1660
1661     switch (opc) {
1662     case 0:
1663         /* For SVC, HVC and SMC we advance the single-step state
1664          * machine before taking the exception. This is architecturally
1665          * mandated, to ensure that single-stepping a system call
1666          * instruction works properly.
1667          */
1668         switch (op2_ll) {
1669         case 1:                                                     /* SVC */
1670             gen_ss_advance(s);
1671             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1672                                default_exception_el(s));
1673             break;
1674         case 2:                                                     /* HVC */
1675             if (s->current_el == 0) {
1676                 unallocated_encoding(s);
1677                 break;
1678             }
1679             /* The pre HVC helper handles cases when HVC gets trapped
1680              * as an undefined insn by runtime configuration.
1681              */
1682             gen_a64_set_pc_im(s->pc - 4);
1683             gen_helper_pre_hvc(cpu_env);
1684             gen_ss_advance(s);
1685             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1686             break;
1687         case 3:                                                     /* SMC */
1688             if (s->current_el == 0) {
1689                 unallocated_encoding(s);
1690                 break;
1691             }
1692             gen_a64_set_pc_im(s->pc - 4);
1693             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1694             gen_helper_pre_smc(cpu_env, tmp);
1695             tcg_temp_free_i32(tmp);
1696             gen_ss_advance(s);
1697             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1698             break;
1699         default:
1700             unallocated_encoding(s);
1701             break;
1702         }
1703         break;
1704     case 1:
1705         if (op2_ll != 0) {
1706             unallocated_encoding(s);
1707             break;
1708         }
1709         /* BRK */
1710         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1711                            default_exception_el(s));
1712         break;
1713     case 2:
1714         if (op2_ll != 0) {
1715             unallocated_encoding(s);
1716             break;
1717         }
1718         /* HLT. This has two purposes.
1719          * Architecturally, it is an external halting debug instruction.
1720          * Since QEMU doesn't implement external debug, we treat this as
1721          * it is required for halting debug disabled: it will UNDEF.
1722          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1723          */
1724         if (semihosting_enabled() && imm16 == 0xf000) {
1725 #ifndef CONFIG_USER_ONLY
1726             /* In system mode, don't allow userspace access to semihosting,
1727              * to provide some semblance of security (and for consistency
1728              * with our 32-bit semihosting).
1729              */
1730             if (s->current_el == 0) {
1731                 unsupported_encoding(s, insn);
1732                 break;
1733             }
1734 #endif
1735             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1736         } else {
1737             unsupported_encoding(s, insn);
1738         }
1739         break;
1740     case 5:
1741         if (op2_ll < 1 || op2_ll > 3) {
1742             unallocated_encoding(s);
1743             break;
1744         }
1745         /* DCPS1, DCPS2, DCPS3 */
1746         unsupported_encoding(s, insn);
1747         break;
1748     default:
1749         unallocated_encoding(s);
1750         break;
1751     }
1752 }
1753
1754 /* C3.2.7 Unconditional branch (register)
1755  *  31           25 24   21 20   16 15   10 9    5 4     0
1756  * +---------------+-------+-------+-------+------+-------+
1757  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1758  * +---------------+-------+-------+-------+------+-------+
1759  */
1760 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1761 {
1762     unsigned int opc, op2, op3, rn, op4;
1763
1764     opc = extract32(insn, 21, 4);
1765     op2 = extract32(insn, 16, 5);
1766     op3 = extract32(insn, 10, 6);
1767     rn = extract32(insn, 5, 5);
1768     op4 = extract32(insn, 0, 5);
1769
1770     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1771         unallocated_encoding(s);
1772         return;
1773     }
1774
1775     switch (opc) {
1776     case 0: /* BR */
1777     case 1: /* BLR */
1778     case 2: /* RET */
1779         gen_a64_set_pc(s, cpu_reg(s, rn));
1780         /* BLR also needs to load return address */
1781         if (opc == 1) {
1782             tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1783         }
1784         break;
1785     case 4: /* ERET */
1786         if (s->current_el == 0) {
1787             unallocated_encoding(s);
1788             return;
1789         }
1790         gen_helper_exception_return(cpu_env);
1791         /* Must exit loop to check un-masked IRQs */
1792         s->is_jmp = DISAS_EXIT;
1793         return;
1794     case 5: /* DRPS */
1795         if (rn != 0x1f) {
1796             unallocated_encoding(s);
1797         } else {
1798             unsupported_encoding(s, insn);
1799         }
1800         return;
1801     default:
1802         unallocated_encoding(s);
1803         return;
1804     }
1805
1806     s->is_jmp = DISAS_JUMP;
1807 }
1808
1809 /* C3.2 Branches, exception generating and system instructions */
1810 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1811 {
1812     switch (extract32(insn, 25, 7)) {
1813     case 0x0a: case 0x0b:
1814     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1815         disas_uncond_b_imm(s, insn);
1816         break;
1817     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1818         disas_comp_b_imm(s, insn);
1819         break;
1820     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1821         disas_test_b_imm(s, insn);
1822         break;
1823     case 0x2a: /* Conditional branch (immediate) */
1824         disas_cond_b_imm(s, insn);
1825         break;
1826     case 0x6a: /* Exception generation / System */
1827         if (insn & (1 << 24)) {
1828             disas_system(s, insn);
1829         } else {
1830             disas_exc(s, insn);
1831         }
1832         break;
1833     case 0x6b: /* Unconditional branch (register) */
1834         disas_uncond_b_reg(s, insn);
1835         break;
1836     default:
1837         unallocated_encoding(s);
1838         break;
1839     }
1840 }
1841
1842 /*
1843  * Load/Store exclusive instructions are implemented by remembering
1844  * the value/address loaded, and seeing if these are the same
1845  * when the store is performed. This is not actually the architecturally
1846  * mandated semantics, but it works for typical guest code sequences
1847  * and avoids having to monitor regular stores.
1848  *
1849  * The store exclusive uses the atomic cmpxchg primitives to avoid
1850  * races in multi-threaded linux-user and when MTTCG softmmu is
1851  * enabled.
1852  */
1853 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1854                                TCGv_i64 addr, int size, bool is_pair)
1855 {
1856     int idx = get_mem_index(s);
1857     TCGMemOp memop = s->be_data;
1858
1859     g_assert(size <= 3);
1860     if (is_pair) {
1861         g_assert(size >= 2);
1862         if (size == 2) {
1863             /* The pair must be single-copy atomic for the doubleword.  */
1864             memop |= MO_64 | MO_ALIGN;
1865             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1866             if (s->be_data == MO_LE) {
1867                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
1868                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
1869             } else {
1870                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
1871                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
1872             }
1873         } else {
1874             /* The pair must be single-copy atomic for *each* doubleword, not
1875                the entire quadword, however it must be quadword aligned.  */
1876             memop |= MO_64;
1877             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
1878                                 memop | MO_ALIGN_16);
1879
1880             TCGv_i64 addr2 = tcg_temp_new_i64();
1881             tcg_gen_addi_i64(addr2, addr, 8);
1882             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
1883             tcg_temp_free_i64(addr2);
1884
1885             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1886             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
1887         }
1888     } else {
1889         memop |= size | MO_ALIGN;
1890         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1891         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1892     }
1893     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1894 }
1895
1896 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1897                                 TCGv_i64 inaddr, int size, int is_pair)
1898 {
1899     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1900      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1901      *     [addr] = {Rt};
1902      *     if (is_pair) {
1903      *         [addr + datasize] = {Rt2};
1904      *     }
1905      *     {Rd} = 0;
1906      * } else {
1907      *     {Rd} = 1;
1908      * }
1909      * env->exclusive_addr = -1;
1910      */
1911     TCGLabel *fail_label = gen_new_label();
1912     TCGLabel *done_label = gen_new_label();
1913     TCGv_i64 addr = tcg_temp_local_new_i64();
1914     TCGv_i64 tmp;
1915
1916     /* Copy input into a local temp so it is not trashed when the
1917      * basic block ends at the branch insn.
1918      */
1919     tcg_gen_mov_i64(addr, inaddr);
1920     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1921
1922     tmp = tcg_temp_new_i64();
1923     if (is_pair) {
1924         if (size == 2) {
1925             if (s->be_data == MO_LE) {
1926                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
1927             } else {
1928                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
1929             }
1930             tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, tmp,
1931                                        get_mem_index(s),
1932                                        MO_64 | MO_ALIGN | s->be_data);
1933             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1934         } else if (s->be_data == MO_LE) {
1935             gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
1936                                            cpu_reg(s, rt2));
1937         } else {
1938             gen_helper_paired_cmpxchg64_be(tmp, cpu_env, addr, cpu_reg(s, rt),
1939                                            cpu_reg(s, rt2));
1940         }
1941     } else {
1942         TCGv_i64 val = cpu_reg(s, rt);
1943         tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, val,
1944                                    get_mem_index(s),
1945                                    size | MO_ALIGN | s->be_data);
1946         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1947     }
1948
1949     tcg_temp_free_i64(addr);
1950
1951     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
1952     tcg_temp_free_i64(tmp);
1953     tcg_gen_br(done_label);
1954
1955     gen_set_label(fail_label);
1956     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1957     gen_set_label(done_label);
1958     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1959 }
1960
1961 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
1962  * from the ARMv8 specs for LDR (Shared decode for all encodings).
1963  */
1964 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1965 {
1966     int opc0 = extract32(opc, 0, 1);
1967     int regsize;
1968
1969     if (is_signed) {
1970         regsize = opc0 ? 32 : 64;
1971     } else {
1972         regsize = size == 3 ? 64 : 32;
1973     }
1974     return regsize == 64;
1975 }
1976
1977 /* C3.3.6 Load/store exclusive
1978  *
1979  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1980  * +-----+-------------+----+---+----+------+----+-------+------+------+
1981  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1982  * +-----+-------------+----+---+----+------+----+-------+------+------+
1983  *
1984  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1985  *   L: 0 -> store, 1 -> load
1986  *  o2: 0 -> exclusive, 1 -> not
1987  *  o1: 0 -> single register, 1 -> register pair
1988  *  o0: 1 -> load-acquire/store-release, 0 -> not
1989  */
1990 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1991 {
1992     int rt = extract32(insn, 0, 5);
1993     int rn = extract32(insn, 5, 5);
1994     int rt2 = extract32(insn, 10, 5);
1995     int is_lasr = extract32(insn, 15, 1);
1996     int rs = extract32(insn, 16, 5);
1997     int is_pair = extract32(insn, 21, 1);
1998     int is_store = !extract32(insn, 22, 1);
1999     int is_excl = !extract32(insn, 23, 1);
2000     int size = extract32(insn, 30, 2);
2001     TCGv_i64 tcg_addr;
2002
2003     if ((!is_excl && !is_pair && !is_lasr) ||
2004         (!is_excl && is_pair) ||
2005         (is_pair && size < 2)) {
2006         unallocated_encoding(s);
2007         return;
2008     }
2009
2010     if (rn == 31) {
2011         gen_check_sp_alignment(s);
2012     }
2013     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2014
2015     /* Note that since TCG is single threaded load-acquire/store-release
2016      * semantics require no extra if (is_lasr) { ... } handling.
2017      */
2018
2019     if (is_excl) {
2020         if (!is_store) {
2021             s->is_ldex = true;
2022             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
2023             if (is_lasr) {
2024                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2025             }
2026         } else {
2027             if (is_lasr) {
2028                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2029             }
2030             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2031         }
2032     } else {
2033         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2034         bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2035
2036         /* Generate ISS for non-exclusive accesses including LASR.  */
2037         if (is_store) {
2038             if (is_lasr) {
2039                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2040             }
2041             do_gpr_st(s, tcg_rt, tcg_addr, size,
2042                       true, rt, iss_sf, is_lasr);
2043         } else {
2044             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2045                       true, rt, iss_sf, is_lasr);
2046             if (is_lasr) {
2047                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2048             }
2049         }
2050     }
2051 }
2052
2053 /*
2054  * C3.3.5 Load register (literal)
2055  *
2056  *  31 30 29   27  26 25 24 23                5 4     0
2057  * +-----+-------+---+-----+-------------------+-------+
2058  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2059  * +-----+-------+---+-----+-------------------+-------+
2060  *
2061  * V: 1 -> vector (simd/fp)
2062  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2063  *                   10-> 32 bit signed, 11 -> prefetch
2064  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2065  */
2066 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2067 {
2068     int rt = extract32(insn, 0, 5);
2069     int64_t imm = sextract32(insn, 5, 19) << 2;
2070     bool is_vector = extract32(insn, 26, 1);
2071     int opc = extract32(insn, 30, 2);
2072     bool is_signed = false;
2073     int size = 2;
2074     TCGv_i64 tcg_rt, tcg_addr;
2075
2076     if (is_vector) {
2077         if (opc == 3) {
2078             unallocated_encoding(s);
2079             return;
2080         }
2081         size = 2 + opc;
2082         if (!fp_access_check(s)) {
2083             return;
2084         }
2085     } else {
2086         if (opc == 3) {
2087             /* PRFM (literal) : prefetch */
2088             return;
2089         }
2090         size = 2 + extract32(opc, 0, 1);
2091         is_signed = extract32(opc, 1, 1);
2092     }
2093
2094     tcg_rt = cpu_reg(s, rt);
2095
2096     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2097     if (is_vector) {
2098         do_fp_ld(s, rt, tcg_addr, size);
2099     } else {
2100         /* Only unsigned 32bit loads target 32bit registers.  */
2101         bool iss_sf = opc != 0;
2102
2103         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2104                   true, rt, iss_sf, false);
2105     }
2106     tcg_temp_free_i64(tcg_addr);
2107 }
2108
2109 /*
2110  * C5.6.80 LDNP (Load Pair - non-temporal hint)
2111  * C5.6.81 LDP (Load Pair - non vector)
2112  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
2113  * C5.6.176 STNP (Store Pair - non-temporal hint)
2114  * C5.6.177 STP (Store Pair - non vector)
2115  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
2116  * C6.3.165 LDP (Load Pair of SIMD&FP)
2117  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
2118  * C6.3.284 STP (Store Pair of SIMD&FP)
2119  *
2120  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2121  * +-----+-------+---+---+-------+---+-----------------------------+
2122  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2123  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2124  *
2125  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2126  *      LDPSW                    01
2127  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2128  *   V: 0 -> GPR, 1 -> Vector
2129  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2130  *      10 -> signed offset, 11 -> pre-index
2131  *   L: 0 -> Store 1 -> Load
2132  *
2133  * Rt, Rt2 = GPR or SIMD registers to be stored
2134  * Rn = general purpose register containing address
2135  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2136  */
2137 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2138 {
2139     int rt = extract32(insn, 0, 5);
2140     int rn = extract32(insn, 5, 5);
2141     int rt2 = extract32(insn, 10, 5);
2142     uint64_t offset = sextract64(insn, 15, 7);
2143     int index = extract32(insn, 23, 2);
2144     bool is_vector = extract32(insn, 26, 1);
2145     bool is_load = extract32(insn, 22, 1);
2146     int opc = extract32(insn, 30, 2);
2147
2148     bool is_signed = false;
2149     bool postindex = false;
2150     bool wback = false;
2151
2152     TCGv_i64 tcg_addr; /* calculated address */
2153     int size;
2154
2155     if (opc == 3) {
2156         unallocated_encoding(s);
2157         return;
2158     }
2159
2160     if (is_vector) {
2161         size = 2 + opc;
2162     } else {
2163         size = 2 + extract32(opc, 1, 1);
2164         is_signed = extract32(opc, 0, 1);
2165         if (!is_load && is_signed) {
2166             unallocated_encoding(s);
2167             return;
2168         }
2169     }
2170
2171     switch (index) {
2172     case 1: /* post-index */
2173         postindex = true;
2174         wback = true;
2175         break;
2176     case 0:
2177         /* signed offset with "non-temporal" hint. Since we don't emulate
2178          * caches we don't care about hints to the cache system about
2179          * data access patterns, and handle this identically to plain
2180          * signed offset.
2181          */
2182         if (is_signed) {
2183             /* There is no non-temporal-hint version of LDPSW */
2184             unallocated_encoding(s);
2185             return;
2186         }
2187         postindex = false;
2188         break;
2189     case 2: /* signed offset, rn not updated */
2190         postindex = false;
2191         break;
2192     case 3: /* pre-index */
2193         postindex = false;
2194         wback = true;
2195         break;
2196     }
2197
2198     if (is_vector && !fp_access_check(s)) {
2199         return;
2200     }
2201
2202     offset <<= size;
2203
2204     if (rn == 31) {
2205         gen_check_sp_alignment(s);
2206     }
2207
2208     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2209
2210     if (!postindex) {
2211         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2212     }
2213
2214     if (is_vector) {
2215         if (is_load) {
2216             do_fp_ld(s, rt, tcg_addr, size);
2217         } else {
2218             do_fp_st(s, rt, tcg_addr, size);
2219         }
2220     } else {
2221         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2222         if (is_load) {
2223             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2224                       false, 0, false, false);
2225         } else {
2226             do_gpr_st(s, tcg_rt, tcg_addr, size,
2227                       false, 0, false, false);
2228         }
2229     }
2230     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2231     if (is_vector) {
2232         if (is_load) {
2233             do_fp_ld(s, rt2, tcg_addr, size);
2234         } else {
2235             do_fp_st(s, rt2, tcg_addr, size);
2236         }
2237     } else {
2238         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2239         if (is_load) {
2240             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2241                       false, 0, false, false);
2242         } else {
2243             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2244                       false, 0, false, false);
2245         }
2246     }
2247
2248     if (wback) {
2249         if (postindex) {
2250             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2251         } else {
2252             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2253         }
2254         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2255     }
2256 }
2257
2258 /*
2259  * C3.3.8 Load/store (immediate post-indexed)
2260  * C3.3.9 Load/store (immediate pre-indexed)
2261  * C3.3.12 Load/store (unscaled immediate)
2262  *
2263  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2264  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2265  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2266  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2267  *
2268  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2269          10 -> unprivileged
2270  * V = 0 -> non-vector
2271  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2272  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2273  */
2274 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2275                                 int opc,
2276                                 int size,
2277                                 int rt,
2278                                 bool is_vector)
2279 {
2280     int rn = extract32(insn, 5, 5);
2281     int imm9 = sextract32(insn, 12, 9);
2282     int idx = extract32(insn, 10, 2);
2283     bool is_signed = false;
2284     bool is_store = false;
2285     bool is_extended = false;
2286     bool is_unpriv = (idx == 2);
2287     bool iss_valid = !is_vector;
2288     bool post_index;
2289     bool writeback;
2290
2291     TCGv_i64 tcg_addr;
2292
2293     if (is_vector) {
2294         size |= (opc & 2) << 1;
2295         if (size > 4 || is_unpriv) {
2296             unallocated_encoding(s);
2297             return;
2298         }
2299         is_store = ((opc & 1) == 0);
2300         if (!fp_access_check(s)) {
2301             return;
2302         }
2303     } else {
2304         if (size == 3 && opc == 2) {
2305             /* PRFM - prefetch */
2306             if (is_unpriv) {
2307                 unallocated_encoding(s);
2308                 return;
2309             }
2310             return;
2311         }
2312         if (opc == 3 && size > 1) {
2313             unallocated_encoding(s);
2314             return;
2315         }
2316         is_store = (opc == 0);
2317         is_signed = extract32(opc, 1, 1);
2318         is_extended = (size < 3) && extract32(opc, 0, 1);
2319     }
2320
2321     switch (idx) {
2322     case 0:
2323     case 2:
2324         post_index = false;
2325         writeback = false;
2326         break;
2327     case 1:
2328         post_index = true;
2329         writeback = true;
2330         break;
2331     case 3:
2332         post_index = false;
2333         writeback = true;
2334         break;
2335     }
2336
2337     if (rn == 31) {
2338         gen_check_sp_alignment(s);
2339     }
2340     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2341
2342     if (!post_index) {
2343         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2344     }
2345
2346     if (is_vector) {
2347         if (is_store) {
2348             do_fp_st(s, rt, tcg_addr, size);
2349         } else {
2350             do_fp_ld(s, rt, tcg_addr, size);
2351         }
2352     } else {
2353         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2354         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2355         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2356
2357         if (is_store) {
2358             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2359                              iss_valid, rt, iss_sf, false);
2360         } else {
2361             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2362                              is_signed, is_extended, memidx,
2363                              iss_valid, rt, iss_sf, false);
2364         }
2365     }
2366
2367     if (writeback) {
2368         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2369         if (post_index) {
2370             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2371         }
2372         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2373     }
2374 }
2375
2376 /*
2377  * C3.3.10 Load/store (register offset)
2378  *
2379  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2380  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2381  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2382  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2383  *
2384  * For non-vector:
2385  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2386  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2387  * For vector:
2388  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2389  *   opc<0>: 0 -> store, 1 -> load
2390  * V: 1 -> vector/simd
2391  * opt: extend encoding (see DecodeRegExtend)
2392  * S: if S=1 then scale (essentially index by sizeof(size))
2393  * Rt: register to transfer into/out of
2394  * Rn: address register or SP for base
2395  * Rm: offset register or ZR for offset
2396  */
2397 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2398                                    int opc,
2399                                    int size,
2400                                    int rt,
2401                                    bool is_vector)
2402 {
2403     int rn = extract32(insn, 5, 5);
2404     int shift = extract32(insn, 12, 1);
2405     int rm = extract32(insn, 16, 5);
2406     int opt = extract32(insn, 13, 3);
2407     bool is_signed = false;
2408     bool is_store = false;
2409     bool is_extended = false;
2410
2411     TCGv_i64 tcg_rm;
2412     TCGv_i64 tcg_addr;
2413
2414     if (extract32(opt, 1, 1) == 0) {
2415         unallocated_encoding(s);
2416         return;
2417     }
2418
2419     if (is_vector) {
2420         size |= (opc & 2) << 1;
2421         if (size > 4) {
2422             unallocated_encoding(s);
2423             return;
2424         }
2425         is_store = !extract32(opc, 0, 1);
2426         if (!fp_access_check(s)) {
2427             return;
2428         }
2429     } else {
2430         if (size == 3 && opc == 2) {
2431             /* PRFM - prefetch */
2432             return;
2433         }
2434         if (opc == 3 && size > 1) {
2435             unallocated_encoding(s);
2436             return;
2437         }
2438         is_store = (opc == 0);
2439         is_signed = extract32(opc, 1, 1);
2440         is_extended = (size < 3) && extract32(opc, 0, 1);
2441     }
2442
2443     if (rn == 31) {
2444         gen_check_sp_alignment(s);
2445     }
2446     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2447
2448     tcg_rm = read_cpu_reg(s, rm, 1);
2449     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2450
2451     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2452
2453     if (is_vector) {
2454         if (is_store) {
2455             do_fp_st(s, rt, tcg_addr, size);
2456         } else {
2457             do_fp_ld(s, rt, tcg_addr, size);
2458         }
2459     } else {
2460         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2461         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2462         if (is_store) {
2463             do_gpr_st(s, tcg_rt, tcg_addr, size,
2464                       true, rt, iss_sf, false);
2465         } else {
2466             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2467                       is_signed, is_extended,
2468                       true, rt, iss_sf, false);
2469         }
2470     }
2471 }
2472
2473 /*
2474  * C3.3.13 Load/store (unsigned immediate)
2475  *
2476  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2477  * +----+-------+---+-----+-----+------------+-------+------+
2478  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2479  * +----+-------+---+-----+-----+------------+-------+------+
2480  *
2481  * For non-vector:
2482  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2483  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2484  * For vector:
2485  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2486  *   opc<0>: 0 -> store, 1 -> load
2487  * Rn: base address register (inc SP)
2488  * Rt: target register
2489  */
2490 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2491                                         int opc,
2492                                         int size,
2493                                         int rt,
2494                                         bool is_vector)
2495 {
2496     int rn = extract32(insn, 5, 5);
2497     unsigned int imm12 = extract32(insn, 10, 12);
2498     unsigned int offset;
2499
2500     TCGv_i64 tcg_addr;
2501
2502     bool is_store;
2503     bool is_signed = false;
2504     bool is_extended = false;
2505
2506     if (is_vector) {
2507         size |= (opc & 2) << 1;
2508         if (size > 4) {
2509             unallocated_encoding(s);
2510             return;
2511         }
2512         is_store = !extract32(opc, 0, 1);
2513         if (!fp_access_check(s)) {
2514             return;
2515         }
2516     } else {
2517         if (size == 3 && opc == 2) {
2518             /* PRFM - prefetch */
2519             return;
2520         }
2521         if (opc == 3 && size > 1) {
2522             unallocated_encoding(s);
2523             return;
2524         }
2525         is_store = (opc == 0);
2526         is_signed = extract32(opc, 1, 1);
2527         is_extended = (size < 3) && extract32(opc, 0, 1);
2528     }
2529
2530     if (rn == 31) {
2531         gen_check_sp_alignment(s);
2532     }
2533     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2534     offset = imm12 << size;
2535     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2536
2537     if (is_vector) {
2538         if (is_store) {
2539             do_fp_st(s, rt, tcg_addr, size);
2540         } else {
2541             do_fp_ld(s, rt, tcg_addr, size);
2542         }
2543     } else {
2544         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2545         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2546         if (is_store) {
2547             do_gpr_st(s, tcg_rt, tcg_addr, size,
2548                       true, rt, iss_sf, false);
2549         } else {
2550             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2551                       true, rt, iss_sf, false);
2552         }
2553     }
2554 }
2555
2556 /* Load/store register (all forms) */
2557 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2558 {
2559     int rt = extract32(insn, 0, 5);
2560     int opc = extract32(insn, 22, 2);
2561     bool is_vector = extract32(insn, 26, 1);
2562     int size = extract32(insn, 30, 2);
2563
2564     switch (extract32(insn, 24, 2)) {
2565     case 0:
2566         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2567             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2568         } else {
2569             /* Load/store register (unscaled immediate)
2570              * Load/store immediate pre/post-indexed
2571              * Load/store register unprivileged
2572              */
2573             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2574         }
2575         break;
2576     case 1:
2577         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2578         break;
2579     default:
2580         unallocated_encoding(s);
2581         break;
2582     }
2583 }
2584
2585 /* C3.3.1 AdvSIMD load/store multiple structures
2586  *
2587  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2588  * +---+---+---------------+---+-------------+--------+------+------+------+
2589  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2590  * +---+---+---------------+---+-------------+--------+------+------+------+
2591  *
2592  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2593  *
2594  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2595  * +---+---+---------------+---+---+---------+--------+------+------+------+
2596  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2597  * +---+---+---------------+---+---+---------+--------+------+------+------+
2598  *
2599  * Rt: first (or only) SIMD&FP register to be transferred
2600  * Rn: base address or SP
2601  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2602  */
2603 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2604 {
2605     int rt = extract32(insn, 0, 5);
2606     int rn = extract32(insn, 5, 5);
2607     int size = extract32(insn, 10, 2);
2608     int opcode = extract32(insn, 12, 4);
2609     bool is_store = !extract32(insn, 22, 1);
2610     bool is_postidx = extract32(insn, 23, 1);
2611     bool is_q = extract32(insn, 30, 1);
2612     TCGv_i64 tcg_addr, tcg_rn;
2613
2614     int ebytes = 1 << size;
2615     int elements = (is_q ? 128 : 64) / (8 << size);
2616     int rpt;    /* num iterations */
2617     int selem;  /* structure elements */
2618     int r;
2619
2620     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2621         unallocated_encoding(s);
2622         return;
2623     }
2624
2625     /* From the shared decode logic */
2626     switch (opcode) {
2627     case 0x0:
2628         rpt = 1;
2629         selem = 4;
2630         break;
2631     case 0x2:
2632         rpt = 4;
2633         selem = 1;
2634         break;
2635     case 0x4:
2636         rpt = 1;
2637         selem = 3;
2638         break;
2639     case 0x6:
2640         rpt = 3;
2641         selem = 1;
2642         break;
2643     case 0x7:
2644         rpt = 1;
2645         selem = 1;
2646         break;
2647     case 0x8:
2648         rpt = 1;
2649         selem = 2;
2650         break;
2651     case 0xa:
2652         rpt = 2;
2653         selem = 1;
2654         break;
2655     default:
2656         unallocated_encoding(s);
2657         return;
2658     }
2659
2660     if (size == 3 && !is_q && selem != 1) {
2661         /* reserved */
2662         unallocated_encoding(s);
2663         return;
2664     }
2665
2666     if (!fp_access_check(s)) {
2667         return;
2668     }
2669
2670     if (rn == 31) {
2671         gen_check_sp_alignment(s);
2672     }
2673
2674     tcg_rn = cpu_reg_sp(s, rn);
2675     tcg_addr = tcg_temp_new_i64();
2676     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2677
2678     for (r = 0; r < rpt; r++) {
2679         int e;
2680         for (e = 0; e < elements; e++) {
2681             int tt = (rt + r) % 32;
2682             int xs;
2683             for (xs = 0; xs < selem; xs++) {
2684                 if (is_store) {
2685                     do_vec_st(s, tt, e, tcg_addr, size);
2686                 } else {
2687                     do_vec_ld(s, tt, e, tcg_addr, size);
2688
2689                     /* For non-quad operations, setting a slice of the low
2690                      * 64 bits of the register clears the high 64 bits (in
2691                      * the ARM ARM pseudocode this is implicit in the fact
2692                      * that 'rval' is a 64 bit wide variable). We optimize
2693                      * by noticing that we only need to do this the first
2694                      * time we touch a register.
2695                      */
2696                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2697                         clear_vec_high(s, tt);
2698                     }
2699                 }
2700                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2701                 tt = (tt + 1) % 32;
2702             }
2703         }
2704     }
2705
2706     if (is_postidx) {
2707         int rm = extract32(insn, 16, 5);
2708         if (rm == 31) {
2709             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2710         } else {
2711             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2712         }
2713     }
2714     tcg_temp_free_i64(tcg_addr);
2715 }
2716
2717 /* C3.3.3 AdvSIMD load/store single structure
2718  *
2719  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2720  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2721  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2722  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2723  *
2724  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2725  *
2726  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2727  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2728  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2729  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2730  *
2731  * Rt: first (or only) SIMD&FP register to be transferred
2732  * Rn: base address or SP
2733  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2734  * index = encoded in Q:S:size dependent on size
2735  *
2736  * lane_size = encoded in R, opc
2737  * transfer width = encoded in opc, S, size
2738  */
2739 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2740 {
2741     int rt = extract32(insn, 0, 5);
2742     int rn = extract32(insn, 5, 5);
2743     int size = extract32(insn, 10, 2);
2744     int S = extract32(insn, 12, 1);
2745     int opc = extract32(insn, 13, 3);
2746     int R = extract32(insn, 21, 1);
2747     int is_load = extract32(insn, 22, 1);
2748     int is_postidx = extract32(insn, 23, 1);
2749     int is_q = extract32(insn, 30, 1);
2750
2751     int scale = extract32(opc, 1, 2);
2752     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2753     bool replicate = false;
2754     int index = is_q << 3 | S << 2 | size;
2755     int ebytes, xs;
2756     TCGv_i64 tcg_addr, tcg_rn;
2757
2758     switch (scale) {
2759     case 3:
2760         if (!is_load || S) {
2761             unallocated_encoding(s);
2762             return;
2763         }
2764         scale = size;
2765         replicate = true;
2766         break;
2767     case 0:
2768         break;
2769     case 1:
2770         if (extract32(size, 0, 1)) {
2771             unallocated_encoding(s);
2772             return;
2773         }
2774         index >>= 1;
2775         break;
2776     case 2:
2777         if (extract32(size, 1, 1)) {
2778             unallocated_encoding(s);
2779             return;
2780         }
2781         if (!extract32(size, 0, 1)) {
2782             index >>= 2;
2783         } else {
2784             if (S) {
2785                 unallocated_encoding(s);
2786                 return;
2787             }
2788             index >>= 3;
2789             scale = 3;
2790         }
2791         break;
2792     default:
2793         g_assert_not_reached();
2794     }
2795
2796     if (!fp_access_check(s)) {
2797         return;
2798     }
2799
2800     ebytes = 1 << scale;
2801
2802     if (rn == 31) {
2803         gen_check_sp_alignment(s);
2804     }
2805
2806     tcg_rn = cpu_reg_sp(s, rn);
2807     tcg_addr = tcg_temp_new_i64();
2808     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2809
2810     for (xs = 0; xs < selem; xs++) {
2811         if (replicate) {
2812             /* Load and replicate to all elements */
2813             uint64_t mulconst;
2814             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2815
2816             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2817                                 get_mem_index(s), s->be_data + scale);
2818             switch (scale) {
2819             case 0:
2820                 mulconst = 0x0101010101010101ULL;
2821                 break;
2822             case 1:
2823                 mulconst = 0x0001000100010001ULL;
2824                 break;
2825             case 2:
2826                 mulconst = 0x0000000100000001ULL;
2827                 break;
2828             case 3:
2829                 mulconst = 0;
2830                 break;
2831             default:
2832                 g_assert_not_reached();
2833             }
2834             if (mulconst) {
2835                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2836             }
2837             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2838             if (is_q) {
2839                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2840             } else {
2841                 clear_vec_high(s, rt);
2842             }
2843             tcg_temp_free_i64(tcg_tmp);
2844         } else {
2845             /* Load/store one element per register */
2846             if (is_load) {
2847                 do_vec_ld(s, rt, index, tcg_addr, scale);
2848             } else {
2849                 do_vec_st(s, rt, index, tcg_addr, scale);
2850             }
2851         }
2852         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2853         rt = (rt + 1) % 32;
2854     }
2855
2856     if (is_postidx) {
2857         int rm = extract32(insn, 16, 5);
2858         if (rm == 31) {
2859             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2860         } else {
2861             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2862         }
2863     }
2864     tcg_temp_free_i64(tcg_addr);
2865 }
2866
2867 /* C3.3 Loads and stores */
2868 static void disas_ldst(DisasContext *s, uint32_t insn)
2869 {
2870     switch (extract32(insn, 24, 6)) {
2871     case 0x08: /* Load/store exclusive */
2872         disas_ldst_excl(s, insn);
2873         break;
2874     case 0x18: case 0x1c: /* Load register (literal) */
2875         disas_ld_lit(s, insn);
2876         break;
2877     case 0x28: case 0x29:
2878     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2879         disas_ldst_pair(s, insn);
2880         break;
2881     case 0x38: case 0x39:
2882     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2883         disas_ldst_reg(s, insn);
2884         break;
2885     case 0x0c: /* AdvSIMD load/store multiple structures */
2886         disas_ldst_multiple_struct(s, insn);
2887         break;
2888     case 0x0d: /* AdvSIMD load/store single structure */
2889         disas_ldst_single_struct(s, insn);
2890         break;
2891     default:
2892         unallocated_encoding(s);
2893         break;
2894     }
2895 }
2896
2897 /* C3.4.6 PC-rel. addressing
2898  *   31  30   29 28       24 23                5 4    0
2899  * +----+-------+-----------+-------------------+------+
2900  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2901  * +----+-------+-----------+-------------------+------+
2902  */
2903 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2904 {
2905     unsigned int page, rd;
2906     uint64_t base;
2907     uint64_t offset;
2908
2909     page = extract32(insn, 31, 1);
2910     /* SignExtend(immhi:immlo) -> offset */
2911     offset = sextract64(insn, 5, 19);
2912     offset = offset << 2 | extract32(insn, 29, 2);
2913     rd = extract32(insn, 0, 5);
2914     base = s->pc - 4;
2915
2916     if (page) {
2917         /* ADRP (page based) */
2918         base &= ~0xfff;
2919         offset <<= 12;
2920     }
2921
2922     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2923 }
2924
2925 /*
2926  * C3.4.1 Add/subtract (immediate)
2927  *
2928  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2929  * +--+--+--+-----------+-----+-------------+-----+-----+
2930  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2931  * +--+--+--+-----------+-----+-------------+-----+-----+
2932  *
2933  *    sf: 0 -> 32bit, 1 -> 64bit
2934  *    op: 0 -> add  , 1 -> sub
2935  *     S: 1 -> set flags
2936  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2937  */
2938 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2939 {
2940     int rd = extract32(insn, 0, 5);
2941     int rn = extract32(insn, 5, 5);
2942     uint64_t imm = extract32(insn, 10, 12);
2943     int shift = extract32(insn, 22, 2);
2944     bool setflags = extract32(insn, 29, 1);
2945     bool sub_op = extract32(insn, 30, 1);
2946     bool is_64bit = extract32(insn, 31, 1);
2947
2948     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2949     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2950     TCGv_i64 tcg_result;
2951
2952     switch (shift) {
2953     case 0x0:
2954         break;
2955     case 0x1:
2956         imm <<= 12;
2957         break;
2958     default:
2959         unallocated_encoding(s);
2960         return;
2961     }
2962
2963     tcg_result = tcg_temp_new_i64();
2964     if (!setflags) {
2965         if (sub_op) {
2966             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2967         } else {
2968             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2969         }
2970     } else {
2971         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2972         if (sub_op) {
2973             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2974         } else {
2975             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2976         }
2977         tcg_temp_free_i64(tcg_imm);
2978     }
2979
2980     if (is_64bit) {
2981         tcg_gen_mov_i64(tcg_rd, tcg_result);
2982     } else {
2983         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2984     }
2985
2986     tcg_temp_free_i64(tcg_result);
2987 }
2988
2989 /* The input should be a value in the bottom e bits (with higher
2990  * bits zero); returns that value replicated into every element
2991  * of size e in a 64 bit integer.
2992  */
2993 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2994 {
2995     assert(e != 0);
2996     while (e < 64) {
2997         mask |= mask << e;
2998         e *= 2;
2999     }
3000     return mask;
3001 }
3002
3003 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
3004 static inline uint64_t bitmask64(unsigned int length)
3005 {
3006     assert(length > 0 && length <= 64);
3007     return ~0ULL >> (64 - length);
3008 }
3009
3010 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3011  * only require the wmask. Returns false if the imms/immr/immn are a reserved
3012  * value (ie should cause a guest UNDEF exception), and true if they are
3013  * valid, in which case the decoded bit pattern is written to result.
3014  */
3015 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3016                                    unsigned int imms, unsigned int immr)
3017 {
3018     uint64_t mask;
3019     unsigned e, levels, s, r;
3020     int len;
3021
3022     assert(immn < 2 && imms < 64 && immr < 64);
3023
3024     /* The bit patterns we create here are 64 bit patterns which
3025      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3026      * 64 bits each. Each element contains the same value: a run
3027      * of between 1 and e-1 non-zero bits, rotated within the
3028      * element by between 0 and e-1 bits.
3029      *
3030      * The element size and run length are encoded into immn (1 bit)
3031      * and imms (6 bits) as follows:
3032      * 64 bit elements: immn = 1, imms = <length of run - 1>
3033      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3034      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3035      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3036      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3037      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3038      * Notice that immn = 0, imms = 11111x is the only combination
3039      * not covered by one of the above options; this is reserved.
3040      * Further, <length of run - 1> all-ones is a reserved pattern.
3041      *
3042      * In all cases the rotation is by immr % e (and immr is 6 bits).
3043      */
3044
3045     /* First determine the element size */
3046     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3047     if (len < 1) {
3048         /* This is the immn == 0, imms == 0x11111x case */
3049         return false;
3050     }
3051     e = 1 << len;
3052
3053     levels = e - 1;
3054     s = imms & levels;
3055     r = immr & levels;
3056
3057     if (s == levels) {
3058         /* <length of run - 1> mustn't be all-ones. */
3059         return false;
3060     }
3061
3062     /* Create the value of one element: s+1 set bits rotated
3063      * by r within the element (which is e bits wide)...
3064      */
3065     mask = bitmask64(s + 1);
3066     if (r) {
3067         mask = (mask >> r) | (mask << (e - r));
3068         mask &= bitmask64(e);
3069     }
3070     /* ...then replicate the element over the whole 64 bit value */
3071     mask = bitfield_replicate(mask, e);
3072     *result = mask;
3073     return true;
3074 }
3075
3076 /* C3.4.4 Logical (immediate)
3077  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3078  * +----+-----+-------------+---+------+------+------+------+
3079  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3080  * +----+-----+-------------+---+------+------+------+------+
3081  */
3082 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3083 {
3084     unsigned int sf, opc, is_n, immr, imms, rn, rd;
3085     TCGv_i64 tcg_rd, tcg_rn;
3086     uint64_t wmask;
3087     bool is_and = false;
3088
3089     sf = extract32(insn, 31, 1);
3090     opc = extract32(insn, 29, 2);
3091     is_n = extract32(insn, 22, 1);
3092     immr = extract32(insn, 16, 6);
3093     imms = extract32(insn, 10, 6);
3094     rn = extract32(insn, 5, 5);
3095     rd = extract32(insn, 0, 5);
3096
3097     if (!sf && is_n) {
3098         unallocated_encoding(s);
3099         return;
3100     }
3101
3102     if (opc == 0x3) { /* ANDS */
3103         tcg_rd = cpu_reg(s, rd);
3104     } else {
3105         tcg_rd = cpu_reg_sp(s, rd);
3106     }
3107     tcg_rn = cpu_reg(s, rn);
3108
3109     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3110         /* some immediate field values are reserved */
3111         unallocated_encoding(s);
3112         return;
3113     }
3114
3115     if (!sf) {
3116         wmask &= 0xffffffff;
3117     }
3118
3119     switch (opc) {
3120     case 0x3: /* ANDS */
3121     case 0x0: /* AND */
3122         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3123         is_and = true;
3124         break;
3125     case 0x1: /* ORR */
3126         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3127         break;
3128     case 0x2: /* EOR */
3129         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3130         break;
3131     default:
3132         assert(FALSE); /* must handle all above */
3133         break;
3134     }
3135
3136     if (!sf && !is_and) {
3137         /* zero extend final result; we know we can skip this for AND
3138          * since the immediate had the high 32 bits clear.
3139          */
3140         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3141     }
3142
3143     if (opc == 3) { /* ANDS */
3144         gen_logic_CC(sf, tcg_rd);
3145     }
3146 }
3147
3148 /*
3149  * C3.4.5 Move wide (immediate)
3150  *
3151  *  31 30 29 28         23 22 21 20             5 4    0
3152  * +--+-----+-------------+-----+----------------+------+
3153  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3154  * +--+-----+-------------+-----+----------------+------+
3155  *
3156  * sf: 0 -> 32 bit, 1 -> 64 bit
3157  * opc: 00 -> N, 10 -> Z, 11 -> K
3158  * hw: shift/16 (0,16, and sf only 32, 48)
3159  */
3160 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3161 {
3162     int rd = extract32(insn, 0, 5);
3163     uint64_t imm = extract32(insn, 5, 16);
3164     int sf = extract32(insn, 31, 1);
3165     int opc = extract32(insn, 29, 2);
3166     int pos = extract32(insn, 21, 2) << 4;
3167     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3168     TCGv_i64 tcg_imm;
3169
3170     if (!sf && (pos >= 32)) {
3171         unallocated_encoding(s);
3172         return;
3173     }
3174
3175     switch (opc) {
3176     case 0: /* MOVN */
3177     case 2: /* MOVZ */
3178         imm <<= pos;
3179         if (opc == 0) {
3180             imm = ~imm;
3181         }
3182         if (!sf) {
3183             imm &= 0xffffffffu;
3184         }
3185         tcg_gen_movi_i64(tcg_rd, imm);
3186         break;
3187     case 3: /* MOVK */
3188         tcg_imm = tcg_const_i64(imm);
3189         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3190         tcg_temp_free_i64(tcg_imm);
3191         if (!sf) {
3192             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3193         }
3194         break;
3195     default:
3196         unallocated_encoding(s);
3197         break;
3198     }
3199 }
3200
3201 /* C3.4.2 Bitfield
3202  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3203  * +----+-----+-------------+---+------+------+------+------+
3204  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3205  * +----+-----+-------------+---+------+------+------+------+
3206  */
3207 static void disas_bitfield(DisasContext *s, uint32_t insn)
3208 {
3209     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3210     TCGv_i64 tcg_rd, tcg_tmp;
3211
3212     sf = extract32(insn, 31, 1);
3213     opc = extract32(insn, 29, 2);
3214     n = extract32(insn, 22, 1);
3215     ri = extract32(insn, 16, 6);
3216     si = extract32(insn, 10, 6);
3217     rn = extract32(insn, 5, 5);
3218     rd = extract32(insn, 0, 5);
3219     bitsize = sf ? 64 : 32;
3220
3221     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3222         unallocated_encoding(s);
3223         return;
3224     }
3225
3226     tcg_rd = cpu_reg(s, rd);
3227
3228     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3229        to be smaller than bitsize, we'll never reference data outside the
3230        low 32-bits anyway.  */
3231     tcg_tmp = read_cpu_reg(s, rn, 1);
3232
3233     /* Recognize simple(r) extractions.  */
3234     if (si >= ri) {
3235         /* Wd<s-r:0> = Wn<s:r> */
3236         len = (si - ri) + 1;
3237         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3238             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3239             goto done;
3240         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3241             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3242             return;
3243         }
3244         /* opc == 1, BXFIL fall through to deposit */
3245         tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3246         pos = 0;
3247     } else {
3248         /* Handle the ri > si case with a deposit
3249          * Wd<32+s-r,32-r> = Wn<s:0>
3250          */
3251         len = si + 1;
3252         pos = (bitsize - ri) & (bitsize - 1);
3253     }
3254
3255     if (opc == 0 && len < ri) {
3256         /* SBFM: sign extend the destination field from len to fill
3257            the balance of the word.  Let the deposit below insert all
3258            of those sign bits.  */
3259         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3260         len = ri;
3261     }
3262
3263     if (opc == 1) { /* BFM, BXFIL */
3264         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3265     } else {
3266         /* SBFM or UBFM: We start with zero, and we haven't modified
3267            any bits outside bitsize, therefore the zero-extension
3268            below is unneeded.  */
3269         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3270         return;
3271     }
3272
3273  done:
3274     if (!sf) { /* zero extend final result */
3275         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3276     }
3277 }
3278
3279 /* C3.4.3 Extract
3280  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3281  * +----+------+-------------+---+----+------+--------+------+------+
3282  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3283  * +----+------+-------------+---+----+------+--------+------+------+
3284  */
3285 static void disas_extract(DisasContext *s, uint32_t insn)
3286 {
3287     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3288
3289     sf = extract32(insn, 31, 1);
3290     n = extract32(insn, 22, 1);
3291     rm = extract32(insn, 16, 5);
3292     imm = extract32(insn, 10, 6);
3293     rn = extract32(insn, 5, 5);
3294     rd = extract32(insn, 0, 5);
3295     op21 = extract32(insn, 29, 2);
3296     op0 = extract32(insn, 21, 1);
3297     bitsize = sf ? 64 : 32;
3298
3299     if (sf != n || op21 || op0 || imm >= bitsize) {
3300         unallocated_encoding(s);
3301     } else {
3302         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3303
3304         tcg_rd = cpu_reg(s, rd);
3305
3306         if (unlikely(imm == 0)) {
3307             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3308              * so an extract from bit 0 is a special case.
3309              */
3310             if (sf) {
3311                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3312             } else {
3313                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3314             }
3315         } else if (rm == rn) { /* ROR */
3316             tcg_rm = cpu_reg(s, rm);
3317             if (sf) {
3318                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3319             } else {
3320                 TCGv_i32 tmp = tcg_temp_new_i32();
3321                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3322                 tcg_gen_rotri_i32(tmp, tmp, imm);
3323                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3324                 tcg_temp_free_i32(tmp);
3325             }
3326         } else {
3327             tcg_rm = read_cpu_reg(s, rm, sf);
3328             tcg_rn = read_cpu_reg(s, rn, sf);
3329             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3330             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3331             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3332             if (!sf) {
3333                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3334             }
3335         }
3336     }
3337 }
3338
3339 /* C3.4 Data processing - immediate */
3340 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3341 {
3342     switch (extract32(insn, 23, 6)) {
3343     case 0x20: case 0x21: /* PC-rel. addressing */
3344         disas_pc_rel_adr(s, insn);
3345         break;
3346     case 0x22: case 0x23: /* Add/subtract (immediate) */
3347         disas_add_sub_imm(s, insn);
3348         break;
3349     case 0x24: /* Logical (immediate) */
3350         disas_logic_imm(s, insn);
3351         break;
3352     case 0x25: /* Move wide (immediate) */
3353         disas_movw_imm(s, insn);
3354         break;
3355     case 0x26: /* Bitfield */
3356         disas_bitfield(s, insn);
3357         break;
3358     case 0x27: /* Extract */
3359         disas_extract(s, insn);
3360         break;
3361     default:
3362         unallocated_encoding(s);
3363         break;
3364     }
3365 }
3366
3367 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3368  * Note that it is the caller's responsibility to ensure that the
3369  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3370  * mandated semantics for out of range shifts.
3371  */
3372 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3373                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3374 {
3375     switch (shift_type) {
3376     case A64_SHIFT_TYPE_LSL:
3377         tcg_gen_shl_i64(dst, src, shift_amount);
3378         break;
3379     case A64_SHIFT_TYPE_LSR:
3380         tcg_gen_shr_i64(dst, src, shift_amount);
3381         break;
3382     case A64_SHIFT_TYPE_ASR:
3383         if (!sf) {
3384             tcg_gen_ext32s_i64(dst, src);
3385         }
3386         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3387         break;
3388     case A64_SHIFT_TYPE_ROR:
3389         if (sf) {
3390             tcg_gen_rotr_i64(dst, src, shift_amount);
3391         } else {
3392             TCGv_i32 t0, t1;
3393             t0 = tcg_temp_new_i32();
3394             t1 = tcg_temp_new_i32();
3395             tcg_gen_extrl_i64_i32(t0, src);
3396             tcg_gen_extrl_i64_i32(t1, shift_amount);
3397             tcg_gen_rotr_i32(t0, t0, t1);
3398             tcg_gen_extu_i32_i64(dst, t0);
3399             tcg_temp_free_i32(t0);
3400             tcg_temp_free_i32(t1);
3401         }
3402         break;
3403     default:
3404         assert(FALSE); /* all shift types should be handled */
3405         break;
3406     }
3407
3408     if (!sf) { /* zero extend final result */
3409         tcg_gen_ext32u_i64(dst, dst);
3410     }
3411 }
3412
3413 /* Shift a TCGv src by immediate, put result in dst.
3414  * The shift amount must be in range (this should always be true as the
3415  * relevant instructions will UNDEF on bad shift immediates).
3416  */
3417 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3418                           enum a64_shift_type shift_type, unsigned int shift_i)
3419 {
3420     assert(shift_i < (sf ? 64 : 32));
3421
3422     if (shift_i == 0) {
3423         tcg_gen_mov_i64(dst, src);
3424     } else {
3425         TCGv_i64 shift_const;
3426
3427         shift_const = tcg_const_i64(shift_i);
3428         shift_reg(dst, src, sf, shift_type, shift_const);
3429         tcg_temp_free_i64(shift_const);
3430     }
3431 }
3432
3433 /* C3.5.10 Logical (shifted register)
3434  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3435  * +----+-----+-----------+-------+---+------+--------+------+------+
3436  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3437  * +----+-----+-----------+-------+---+------+--------+------+------+
3438  */
3439 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3440 {
3441     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3442     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3443
3444     sf = extract32(insn, 31, 1);
3445     opc = extract32(insn, 29, 2);
3446     shift_type = extract32(insn, 22, 2);
3447     invert = extract32(insn, 21, 1);
3448     rm = extract32(insn, 16, 5);
3449     shift_amount = extract32(insn, 10, 6);
3450     rn = extract32(insn, 5, 5);
3451     rd = extract32(insn, 0, 5);
3452
3453     if (!sf && (shift_amount & (1 << 5))) {
3454         unallocated_encoding(s);
3455         return;
3456     }
3457
3458     tcg_rd = cpu_reg(s, rd);
3459
3460     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3461         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3462          * register-register MOV and MVN, so it is worth special casing.
3463          */
3464         tcg_rm = cpu_reg(s, rm);
3465         if (invert) {
3466             tcg_gen_not_i64(tcg_rd, tcg_rm);
3467             if (!sf) {
3468                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3469             }
3470         } else {
3471             if (sf) {
3472                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3473             } else {
3474                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3475             }
3476         }
3477         return;
3478     }
3479
3480     tcg_rm = read_cpu_reg(s, rm, sf);
3481
3482     if (shift_amount) {
3483         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3484     }
3485
3486     tcg_rn = cpu_reg(s, rn);
3487
3488     switch (opc | (invert << 2)) {
3489     case 0: /* AND */
3490     case 3: /* ANDS */
3491         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3492         break;
3493     case 1: /* ORR */
3494         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3495         break;
3496     case 2: /* EOR */
3497         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3498         break;
3499     case 4: /* BIC */
3500     case 7: /* BICS */
3501         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3502         break;
3503     case 5: /* ORN */
3504         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3505         break;
3506     case 6: /* EON */
3507         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3508         break;
3509     default:
3510         assert(FALSE);
3511         break;
3512     }
3513
3514     if (!sf) {
3515         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3516     }
3517
3518     if (opc == 3) {
3519         gen_logic_CC(sf, tcg_rd);
3520     }
3521 }
3522
3523 /*
3524  * C3.5.1 Add/subtract (extended register)
3525  *
3526  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3527  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3528  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3529  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3530  *
3531  *  sf: 0 -> 32bit, 1 -> 64bit
3532  *  op: 0 -> add  , 1 -> sub
3533  *   S: 1 -> set flags
3534  * opt: 00
3535  * option: extension type (see DecodeRegExtend)
3536  * imm3: optional shift to Rm
3537  *
3538  * Rd = Rn + LSL(extend(Rm), amount)
3539  */
3540 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3541 {
3542     int rd = extract32(insn, 0, 5);
3543     int rn = extract32(insn, 5, 5);
3544     int imm3 = extract32(insn, 10, 3);
3545     int option = extract32(insn, 13, 3);
3546     int rm = extract32(insn, 16, 5);
3547     bool setflags = extract32(insn, 29, 1);
3548     bool sub_op = extract32(insn, 30, 1);
3549     bool sf = extract32(insn, 31, 1);
3550
3551     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3552     TCGv_i64 tcg_rd;
3553     TCGv_i64 tcg_result;
3554
3555     if (imm3 > 4) {
3556         unallocated_encoding(s);
3557         return;
3558     }
3559
3560     /* non-flag setting ops may use SP */
3561     if (!setflags) {
3562         tcg_rd = cpu_reg_sp(s, rd);
3563     } else {
3564         tcg_rd = cpu_reg(s, rd);
3565     }
3566     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3567
3568     tcg_rm = read_cpu_reg(s, rm, sf);
3569     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3570
3571     tcg_result = tcg_temp_new_i64();
3572
3573     if (!setflags) {
3574         if (sub_op) {
3575             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3576         } else {
3577             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3578         }
3579     } else {
3580         if (sub_op) {
3581             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3582         } else {
3583             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3584         }
3585     }
3586
3587     if (sf) {
3588         tcg_gen_mov_i64(tcg_rd, tcg_result);
3589     } else {
3590         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3591     }
3592
3593     tcg_temp_free_i64(tcg_result);
3594 }
3595
3596 /*
3597  * C3.5.2 Add/subtract (shifted register)
3598  *
3599  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3600  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3601  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3602  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3603  *
3604  *    sf: 0 -> 32bit, 1 -> 64bit
3605  *    op: 0 -> add  , 1 -> sub
3606  *     S: 1 -> set flags
3607  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3608  *  imm6: Shift amount to apply to Rm before the add/sub
3609  */
3610 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3611 {
3612     int rd = extract32(insn, 0, 5);
3613     int rn = extract32(insn, 5, 5);
3614     int imm6 = extract32(insn, 10, 6);
3615     int rm = extract32(insn, 16, 5);
3616     int shift_type = extract32(insn, 22, 2);
3617     bool setflags = extract32(insn, 29, 1);
3618     bool sub_op = extract32(insn, 30, 1);
3619     bool sf = extract32(insn, 31, 1);
3620
3621     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3622     TCGv_i64 tcg_rn, tcg_rm;
3623     TCGv_i64 tcg_result;
3624
3625     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3626         unallocated_encoding(s);
3627         return;
3628     }
3629
3630     tcg_rn = read_cpu_reg(s, rn, sf);
3631     tcg_rm = read_cpu_reg(s, rm, sf);
3632
3633     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3634
3635     tcg_result = tcg_temp_new_i64();
3636
3637     if (!setflags) {
3638         if (sub_op) {
3639             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3640         } else {
3641             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3642         }
3643     } else {
3644         if (sub_op) {
3645             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3646         } else {
3647             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3648         }
3649     }
3650
3651     if (sf) {
3652         tcg_gen_mov_i64(tcg_rd, tcg_result);
3653     } else {
3654         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3655     }
3656
3657     tcg_temp_free_i64(tcg_result);
3658 }
3659
3660 /* C3.5.9 Data-processing (3 source)
3661
3662    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3663   +--+------+-----------+------+------+----+------+------+------+
3664   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3665   +--+------+-----------+------+------+----+------+------+------+
3666
3667  */
3668 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3669 {
3670     int rd = extract32(insn, 0, 5);
3671     int rn = extract32(insn, 5, 5);
3672     int ra = extract32(insn, 10, 5);
3673     int rm = extract32(insn, 16, 5);
3674     int op_id = (extract32(insn, 29, 3) << 4) |
3675         (extract32(insn, 21, 3) << 1) |
3676         extract32(insn, 15, 1);
3677     bool sf = extract32(insn, 31, 1);
3678     bool is_sub = extract32(op_id, 0, 1);
3679     bool is_high = extract32(op_id, 2, 1);
3680     bool is_signed = false;
3681     TCGv_i64 tcg_op1;
3682     TCGv_i64 tcg_op2;
3683     TCGv_i64 tcg_tmp;
3684
3685     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3686     switch (op_id) {
3687     case 0x42: /* SMADDL */
3688     case 0x43: /* SMSUBL */
3689     case 0x44: /* SMULH */
3690         is_signed = true;
3691         break;
3692     case 0x0: /* MADD (32bit) */
3693     case 0x1: /* MSUB (32bit) */
3694     case 0x40: /* MADD (64bit) */
3695     case 0x41: /* MSUB (64bit) */
3696     case 0x4a: /* UMADDL */
3697     case 0x4b: /* UMSUBL */
3698     case 0x4c: /* UMULH */
3699         break;
3700     default:
3701         unallocated_encoding(s);
3702         return;
3703     }
3704
3705     if (is_high) {
3706         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3707         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3708         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3709         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3710
3711         if (is_signed) {
3712             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3713         } else {
3714             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3715         }
3716
3717         tcg_temp_free_i64(low_bits);
3718         return;
3719     }
3720
3721     tcg_op1 = tcg_temp_new_i64();
3722     tcg_op2 = tcg_temp_new_i64();
3723     tcg_tmp = tcg_temp_new_i64();
3724
3725     if (op_id < 0x42) {
3726         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3727         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3728     } else {
3729         if (is_signed) {
3730             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3731             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3732         } else {
3733             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3734             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3735         }
3736     }
3737
3738     if (ra == 31 && !is_sub) {
3739         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3740         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3741     } else {
3742         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3743         if (is_sub) {
3744             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3745         } else {
3746             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3747         }
3748     }
3749
3750     if (!sf) {
3751         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3752     }
3753
3754     tcg_temp_free_i64(tcg_op1);
3755     tcg_temp_free_i64(tcg_op2);
3756     tcg_temp_free_i64(tcg_tmp);
3757 }
3758
3759 /* C3.5.3 - Add/subtract (with carry)
3760  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3761  * +--+--+--+------------------------+------+---------+------+-----+
3762  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3763  * +--+--+--+------------------------+------+---------+------+-----+
3764  *                                            [000000]
3765  */
3766
3767 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3768 {
3769     unsigned int sf, op, setflags, rm, rn, rd;
3770     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3771
3772     if (extract32(insn, 10, 6) != 0) {
3773         unallocated_encoding(s);
3774         return;
3775     }
3776
3777     sf = extract32(insn, 31, 1);
3778     op = extract32(insn, 30, 1);
3779     setflags = extract32(insn, 29, 1);
3780     rm = extract32(insn, 16, 5);
3781     rn = extract32(insn, 5, 5);
3782     rd = extract32(insn, 0, 5);
3783
3784     tcg_rd = cpu_reg(s, rd);
3785     tcg_rn = cpu_reg(s, rn);
3786
3787     if (op) {
3788         tcg_y = new_tmp_a64(s);
3789         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3790     } else {
3791         tcg_y = cpu_reg(s, rm);
3792     }
3793
3794     if (setflags) {
3795         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3796     } else {
3797         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3798     }
3799 }
3800
3801 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3802  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3803  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3804  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3805  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3806  *        [1]                             y                [0]       [0]
3807  */
3808 static void disas_cc(DisasContext *s, uint32_t insn)
3809 {
3810     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3811     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3812     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3813     DisasCompare c;
3814
3815     if (!extract32(insn, 29, 1)) {
3816         unallocated_encoding(s);
3817         return;
3818     }
3819     if (insn & (1 << 10 | 1 << 4)) {
3820         unallocated_encoding(s);
3821         return;
3822     }
3823     sf = extract32(insn, 31, 1);
3824     op = extract32(insn, 30, 1);
3825     is_imm = extract32(insn, 11, 1);
3826     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3827     cond = extract32(insn, 12, 4);
3828     rn = extract32(insn, 5, 5);
3829     nzcv = extract32(insn, 0, 4);
3830
3831     /* Set T0 = !COND.  */
3832     tcg_t0 = tcg_temp_new_i32();
3833     arm_test_cc(&c, cond);
3834     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3835     arm_free_cc(&c);
3836
3837     /* Load the arguments for the new comparison.  */
3838     if (is_imm) {
3839         tcg_y = new_tmp_a64(s);
3840         tcg_gen_movi_i64(tcg_y, y);
3841     } else {
3842         tcg_y = cpu_reg(s, y);
3843     }
3844     tcg_rn = cpu_reg(s, rn);
3845
3846     /* Set the flags for the new comparison.  */
3847     tcg_tmp = tcg_temp_new_i64();
3848     if (op) {
3849         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3850     } else {
3851         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3852     }
3853     tcg_temp_free_i64(tcg_tmp);
3854
3855     /* If COND was false, force the flags to #nzcv.  Compute two masks
3856      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3857      * For tcg hosts that support ANDC, we can make do with just T1.
3858      * In either case, allow the tcg optimizer to delete any unused mask.
3859      */
3860     tcg_t1 = tcg_temp_new_i32();
3861     tcg_t2 = tcg_temp_new_i32();
3862     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3863     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3864
3865     if (nzcv & 8) { /* N */
3866         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3867     } else {
3868         if (TCG_TARGET_HAS_andc_i32) {
3869             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3870         } else {
3871             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3872         }
3873     }
3874     if (nzcv & 4) { /* Z */
3875         if (TCG_TARGET_HAS_andc_i32) {
3876             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3877         } else {
3878             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3879         }
3880     } else {
3881         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3882     }
3883     if (nzcv & 2) { /* C */
3884         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3885     } else {
3886         if (TCG_TARGET_HAS_andc_i32) {
3887             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3888         } else {
3889             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3890         }
3891     }
3892     if (nzcv & 1) { /* V */
3893         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3894     } else {
3895         if (TCG_TARGET_HAS_andc_i32) {
3896             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3897         } else {
3898             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3899         }
3900     }
3901     tcg_temp_free_i32(tcg_t0);
3902     tcg_temp_free_i32(tcg_t1);
3903     tcg_temp_free_i32(tcg_t2);
3904 }
3905
3906 /* C3.5.6 Conditional select
3907  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3908  * +----+----+---+-----------------+------+------+-----+------+------+
3909  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3910  * +----+----+---+-----------------+------+------+-----+------+------+
3911  */
3912 static void disas_cond_select(DisasContext *s, uint32_t insn)
3913 {
3914     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3915     TCGv_i64 tcg_rd, zero;
3916     DisasCompare64 c;
3917
3918     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3919         /* S == 1 or op2<1> == 1 */
3920         unallocated_encoding(s);
3921         return;
3922     }
3923     sf = extract32(insn, 31, 1);
3924     else_inv = extract32(insn, 30, 1);
3925     rm = extract32(insn, 16, 5);
3926     cond = extract32(insn, 12, 4);
3927     else_inc = extract32(insn, 10, 1);
3928     rn = extract32(insn, 5, 5);
3929     rd = extract32(insn, 0, 5);
3930
3931     tcg_rd = cpu_reg(s, rd);
3932
3933     a64_test_cc(&c, cond);
3934     zero = tcg_const_i64(0);
3935
3936     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3937         /* CSET & CSETM.  */
3938         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3939         if (else_inv) {
3940             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3941         }
3942     } else {
3943         TCGv_i64 t_true = cpu_reg(s, rn);
3944         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3945         if (else_inv && else_inc) {
3946             tcg_gen_neg_i64(t_false, t_false);
3947         } else if (else_inv) {
3948             tcg_gen_not_i64(t_false, t_false);
3949         } else if (else_inc) {
3950             tcg_gen_addi_i64(t_false, t_false, 1);
3951         }
3952         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3953     }
3954
3955     tcg_temp_free_i64(zero);
3956     a64_free_cc(&c);
3957
3958     if (!sf) {
3959         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3960     }
3961 }
3962
3963 static void handle_clz(DisasContext *s, unsigned int sf,
3964                        unsigned int rn, unsigned int rd)
3965 {
3966     TCGv_i64 tcg_rd, tcg_rn;
3967     tcg_rd = cpu_reg(s, rd);
3968     tcg_rn = cpu_reg(s, rn);
3969
3970     if (sf) {
3971         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
3972     } else {
3973         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3974         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3975         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
3976         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3977         tcg_temp_free_i32(tcg_tmp32);
3978     }
3979 }
3980
3981 static void handle_cls(DisasContext *s, unsigned int sf,
3982                        unsigned int rn, unsigned int rd)
3983 {
3984     TCGv_i64 tcg_rd, tcg_rn;
3985     tcg_rd = cpu_reg(s, rd);
3986     tcg_rn = cpu_reg(s, rn);
3987
3988     if (sf) {
3989         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
3990     } else {
3991         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3992         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3993         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
3994         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3995         tcg_temp_free_i32(tcg_tmp32);
3996     }
3997 }
3998
3999 static void handle_rbit(DisasContext *s, unsigned int sf,
4000                         unsigned int rn, unsigned int rd)
4001 {
4002     TCGv_i64 tcg_rd, tcg_rn;
4003     tcg_rd = cpu_reg(s, rd);
4004     tcg_rn = cpu_reg(s, rn);
4005
4006     if (sf) {
4007         gen_helper_rbit64(tcg_rd, tcg_rn);
4008     } else {
4009         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4010         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4011         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4012         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4013         tcg_temp_free_i32(tcg_tmp32);
4014     }
4015 }
4016
4017 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
4018 static void handle_rev64(DisasContext *s, unsigned int sf,
4019                          unsigned int rn, unsigned int rd)
4020 {
4021     if (!sf) {
4022         unallocated_encoding(s);
4023         return;
4024     }
4025     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4026 }
4027
4028 /* C5.6.149 REV with sf==0, opcode==2
4029  * C5.6.151 REV32 (sf==1, opcode==2)
4030  */
4031 static void handle_rev32(DisasContext *s, unsigned int sf,
4032                          unsigned int rn, unsigned int rd)
4033 {
4034     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4035
4036     if (sf) {
4037         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4038         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4039
4040         /* bswap32_i64 requires zero high word */
4041         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4042         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4043         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4044         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4045         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4046
4047         tcg_temp_free_i64(tcg_tmp);
4048     } else {
4049         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4050         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4051     }
4052 }
4053
4054 /* C5.6.150 REV16 (opcode==1) */
4055 static void handle_rev16(DisasContext *s, unsigned int sf,
4056                          unsigned int rn, unsigned int rd)
4057 {
4058     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4059     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4060     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4061     TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4062
4063     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4064     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4065     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4066     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4067     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4068
4069     tcg_temp_free_i64(mask);
4070     tcg_temp_free_i64(tcg_tmp);
4071 }
4072
4073 /* C3.5.7 Data-processing (1 source)
4074  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4075  * +----+---+---+-----------------+---------+--------+------+------+
4076  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4077  * +----+---+---+-----------------+---------+--------+------+------+
4078  */
4079 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4080 {
4081     unsigned int sf, opcode, rn, rd;
4082
4083     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4084         unallocated_encoding(s);
4085         return;
4086     }
4087
4088     sf = extract32(insn, 31, 1);
4089     opcode = extract32(insn, 10, 6);
4090     rn = extract32(insn, 5, 5);
4091     rd = extract32(insn, 0, 5);
4092
4093     switch (opcode) {
4094     case 0: /* RBIT */
4095         handle_rbit(s, sf, rn, rd);
4096         break;
4097     case 1: /* REV16 */
4098         handle_rev16(s, sf, rn, rd);
4099         break;
4100     case 2: /* REV32 */
4101         handle_rev32(s, sf, rn, rd);
4102         break;
4103     case 3: /* REV64 */
4104         handle_rev64(s, sf, rn, rd);
4105         break;
4106     case 4: /* CLZ */
4107         handle_clz(s, sf, rn, rd);
4108         break;
4109     case 5: /* CLS */
4110         handle_cls(s, sf, rn, rd);
4111         break;
4112     }
4113 }
4114
4115 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4116                        unsigned int rm, unsigned int rn, unsigned int rd)
4117 {
4118     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4119     tcg_rd = cpu_reg(s, rd);
4120
4121     if (!sf && is_signed) {
4122         tcg_n = new_tmp_a64(s);
4123         tcg_m = new_tmp_a64(s);
4124         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4125         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4126     } else {
4127         tcg_n = read_cpu_reg(s, rn, sf);
4128         tcg_m = read_cpu_reg(s, rm, sf);
4129     }
4130
4131     if (is_signed) {
4132         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4133     } else {
4134         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4135     }
4136
4137     if (!sf) { /* zero extend final result */
4138         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4139     }
4140 }
4141
4142 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
4143 static void handle_shift_reg(DisasContext *s,
4144                              enum a64_shift_type shift_type, unsigned int sf,
4145                              unsigned int rm, unsigned int rn, unsigned int rd)
4146 {
4147     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4148     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4149     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4150
4151     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4152     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4153     tcg_temp_free_i64(tcg_shift);
4154 }
4155
4156 /* CRC32[BHWX], CRC32C[BHWX] */
4157 static void handle_crc32(DisasContext *s,
4158                          unsigned int sf, unsigned int sz, bool crc32c,
4159                          unsigned int rm, unsigned int rn, unsigned int rd)
4160 {
4161     TCGv_i64 tcg_acc, tcg_val;
4162     TCGv_i32 tcg_bytes;
4163
4164     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4165         || (sf == 1 && sz != 3)
4166         || (sf == 0 && sz == 3)) {
4167         unallocated_encoding(s);
4168         return;
4169     }
4170
4171     if (sz == 3) {
4172         tcg_val = cpu_reg(s, rm);
4173     } else {
4174         uint64_t mask;
4175         switch (sz) {
4176         case 0:
4177             mask = 0xFF;
4178             break;
4179         case 1:
4180             mask = 0xFFFF;
4181             break;
4182         case 2:
4183             mask = 0xFFFFFFFF;
4184             break;
4185         default:
4186             g_assert_not_reached();
4187         }
4188         tcg_val = new_tmp_a64(s);
4189         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4190     }
4191
4192     tcg_acc = cpu_reg(s, rn);
4193     tcg_bytes = tcg_const_i32(1 << sz);
4194
4195     if (crc32c) {
4196         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4197     } else {
4198         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4199     }
4200
4201     tcg_temp_free_i32(tcg_bytes);
4202 }
4203
4204 /* C3.5.8 Data-processing (2 source)
4205  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4206  * +----+---+---+-----------------+------+--------+------+------+
4207  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4208  * +----+---+---+-----------------+------+--------+------+------+
4209  */
4210 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4211 {
4212     unsigned int sf, rm, opcode, rn, rd;
4213     sf = extract32(insn, 31, 1);
4214     rm = extract32(insn, 16, 5);
4215     opcode = extract32(insn, 10, 6);
4216     rn = extract32(insn, 5, 5);
4217     rd = extract32(insn, 0, 5);
4218
4219     if (extract32(insn, 29, 1)) {
4220         unallocated_encoding(s);
4221         return;
4222     }
4223
4224     switch (opcode) {
4225     case 2: /* UDIV */
4226         handle_div(s, false, sf, rm, rn, rd);
4227         break;
4228     case 3: /* SDIV */
4229         handle_div(s, true, sf, rm, rn, rd);
4230         break;
4231     case 8: /* LSLV */
4232         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4233         break;
4234     case 9: /* LSRV */
4235         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4236         break;
4237     case 10: /* ASRV */
4238         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4239         break;
4240     case 11: /* RORV */
4241         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4242         break;
4243     case 16:
4244     case 17:
4245     case 18:
4246     case 19:
4247     case 20:
4248     case 21:
4249     case 22:
4250     case 23: /* CRC32 */
4251     {
4252         int sz = extract32(opcode, 0, 2);
4253         bool crc32c = extract32(opcode, 2, 1);
4254         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4255         break;
4256     }
4257     default:
4258         unallocated_encoding(s);
4259         break;
4260     }
4261 }
4262
4263 /* C3.5 Data processing - register */
4264 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4265 {
4266     switch (extract32(insn, 24, 5)) {
4267     case 0x0a: /* Logical (shifted register) */
4268         disas_logic_reg(s, insn);
4269         break;
4270     case 0x0b: /* Add/subtract */
4271         if (insn & (1 << 21)) { /* (extended register) */
4272             disas_add_sub_ext_reg(s, insn);
4273         } else {
4274             disas_add_sub_reg(s, insn);
4275         }
4276         break;
4277     case 0x1b: /* Data-processing (3 source) */
4278         disas_data_proc_3src(s, insn);
4279         break;
4280     case 0x1a:
4281         switch (extract32(insn, 21, 3)) {
4282         case 0x0: /* Add/subtract (with carry) */
4283             disas_adc_sbc(s, insn);
4284             break;
4285         case 0x2: /* Conditional compare */
4286             disas_cc(s, insn); /* both imm and reg forms */
4287             break;
4288         case 0x4: /* Conditional select */
4289             disas_cond_select(s, insn);
4290             break;
4291         case 0x6: /* Data-processing */
4292             if (insn & (1 << 30)) { /* (1 source) */
4293                 disas_data_proc_1src(s, insn);
4294             } else {            /* (2 source) */
4295                 disas_data_proc_2src(s, insn);
4296             }
4297             break;
4298         default:
4299             unallocated_encoding(s);
4300             break;
4301         }
4302         break;
4303     default:
4304         unallocated_encoding(s);
4305         break;
4306     }
4307 }
4308
4309 static void handle_fp_compare(DisasContext *s, bool is_double,
4310                               unsigned int rn, unsigned int rm,
4311                               bool cmp_with_zero, bool signal_all_nans)
4312 {
4313     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4314     TCGv_ptr fpst = get_fpstatus_ptr();
4315
4316     if (is_double) {
4317         TCGv_i64 tcg_vn, tcg_vm;
4318
4319         tcg_vn = read_fp_dreg(s, rn);
4320         if (cmp_with_zero) {
4321             tcg_vm = tcg_const_i64(0);
4322         } else {
4323             tcg_vm = read_fp_dreg(s, rm);
4324         }
4325         if (signal_all_nans) {
4326             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4327         } else {
4328             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4329         }
4330         tcg_temp_free_i64(tcg_vn);
4331         tcg_temp_free_i64(tcg_vm);
4332     } else {
4333         TCGv_i32 tcg_vn, tcg_vm;
4334
4335         tcg_vn = read_fp_sreg(s, rn);
4336         if (cmp_with_zero) {
4337             tcg_vm = tcg_const_i32(0);
4338         } else {
4339             tcg_vm = read_fp_sreg(s, rm);
4340         }
4341         if (signal_all_nans) {
4342             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4343         } else {
4344             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4345         }
4346         tcg_temp_free_i32(tcg_vn);
4347         tcg_temp_free_i32(tcg_vm);
4348     }
4349
4350     tcg_temp_free_ptr(fpst);
4351
4352     gen_set_nzcv(tcg_flags);
4353
4354     tcg_temp_free_i64(tcg_flags);
4355 }
4356
4357 /* C3.6.22 Floating point compare
4358  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4359  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4360  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4361  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4362  */
4363 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4364 {
4365     unsigned int mos, type, rm, op, rn, opc, op2r;
4366
4367     mos = extract32(insn, 29, 3);
4368     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4369     rm = extract32(insn, 16, 5);
4370     op = extract32(insn, 14, 2);
4371     rn = extract32(insn, 5, 5);
4372     opc = extract32(insn, 3, 2);
4373     op2r = extract32(insn, 0, 3);
4374
4375     if (mos || op || op2r || type > 1) {
4376         unallocated_encoding(s);
4377         return;
4378     }
4379
4380     if (!fp_access_check(s)) {
4381         return;
4382     }
4383
4384     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4385 }
4386
4387 /* C3.6.23 Floating point conditional compare
4388  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4389  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4390  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4391  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4392  */
4393 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4394 {
4395     unsigned int mos, type, rm, cond, rn, op, nzcv;
4396     TCGv_i64 tcg_flags;
4397     TCGLabel *label_continue = NULL;
4398
4399     mos = extract32(insn, 29, 3);
4400     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4401     rm = extract32(insn, 16, 5);
4402     cond = extract32(insn, 12, 4);
4403     rn = extract32(insn, 5, 5);
4404     op = extract32(insn, 4, 1);
4405     nzcv = extract32(insn, 0, 4);
4406
4407     if (mos || type > 1) {
4408         unallocated_encoding(s);
4409         return;
4410     }
4411
4412     if (!fp_access_check(s)) {
4413         return;
4414     }
4415
4416     if (cond < 0x0e) { /* not always */
4417         TCGLabel *label_match = gen_new_label();
4418         label_continue = gen_new_label();
4419         arm_gen_test_cc(cond, label_match);
4420         /* nomatch: */
4421         tcg_flags = tcg_const_i64(nzcv << 28);
4422         gen_set_nzcv(tcg_flags);
4423         tcg_temp_free_i64(tcg_flags);
4424         tcg_gen_br(label_continue);
4425         gen_set_label(label_match);
4426     }
4427
4428     handle_fp_compare(s, type, rn, rm, false, op);
4429
4430     if (cond < 0x0e) {
4431         gen_set_label(label_continue);
4432     }
4433 }
4434
4435 /* C3.6.24 Floating point conditional select
4436  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4437  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4438  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4439  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4440  */
4441 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4442 {
4443     unsigned int mos, type, rm, cond, rn, rd;
4444     TCGv_i64 t_true, t_false, t_zero;
4445     DisasCompare64 c;
4446
4447     mos = extract32(insn, 29, 3);
4448     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4449     rm = extract32(insn, 16, 5);
4450     cond = extract32(insn, 12, 4);
4451     rn = extract32(insn, 5, 5);
4452     rd = extract32(insn, 0, 5);
4453
4454     if (mos || type > 1) {
4455         unallocated_encoding(s);
4456         return;
4457     }
4458
4459     if (!fp_access_check(s)) {
4460         return;
4461     }
4462
4463     /* Zero extend sreg inputs to 64 bits now.  */
4464     t_true = tcg_temp_new_i64();
4465     t_false = tcg_temp_new_i64();
4466     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4467     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4468
4469     a64_test_cc(&c, cond);
4470     t_zero = tcg_const_i64(0);
4471     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4472     tcg_temp_free_i64(t_zero);
4473     tcg_temp_free_i64(t_false);
4474     a64_free_cc(&c);
4475
4476     /* Note that sregs write back zeros to the high bits,
4477        and we've already done the zero-extension.  */
4478     write_fp_dreg(s, rd, t_true);
4479     tcg_temp_free_i64(t_true);
4480 }
4481
4482 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4483 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4484 {
4485     TCGv_ptr fpst;
4486     TCGv_i32 tcg_op;
4487     TCGv_i32 tcg_res;
4488
4489     fpst = get_fpstatus_ptr();
4490     tcg_op = read_fp_sreg(s, rn);
4491     tcg_res = tcg_temp_new_i32();
4492
4493     switch (opcode) {
4494     case 0x0: /* FMOV */
4495         tcg_gen_mov_i32(tcg_res, tcg_op);
4496         break;
4497     case 0x1: /* FABS */
4498         gen_helper_vfp_abss(tcg_res, tcg_op);
4499         break;
4500     case 0x2: /* FNEG */
4501         gen_helper_vfp_negs(tcg_res, tcg_op);
4502         break;
4503     case 0x3: /* FSQRT */
4504         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4505         break;
4506     case 0x8: /* FRINTN */
4507     case 0x9: /* FRINTP */
4508     case 0xa: /* FRINTM */
4509     case 0xb: /* FRINTZ */
4510     case 0xc: /* FRINTA */
4511     {
4512         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4513
4514         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4515         gen_helper_rints(tcg_res, tcg_op, fpst);
4516
4517         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4518         tcg_temp_free_i32(tcg_rmode);
4519         break;
4520     }
4521     case 0xe: /* FRINTX */
4522         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4523         break;
4524     case 0xf: /* FRINTI */
4525         gen_helper_rints(tcg_res, tcg_op, fpst);
4526         break;
4527     default:
4528         abort();
4529     }
4530
4531     write_fp_sreg(s, rd, tcg_res);
4532
4533     tcg_temp_free_ptr(fpst);
4534     tcg_temp_free_i32(tcg_op);
4535     tcg_temp_free_i32(tcg_res);
4536 }
4537
4538 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4539 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4540 {
4541     TCGv_ptr fpst;
4542     TCGv_i64 tcg_op;
4543     TCGv_i64 tcg_res;
4544
4545     fpst = get_fpstatus_ptr();
4546     tcg_op = read_fp_dreg(s, rn);
4547     tcg_res = tcg_temp_new_i64();
4548
4549     switch (opcode) {
4550     case 0x0: /* FMOV */
4551         tcg_gen_mov_i64(tcg_res, tcg_op);
4552         break;
4553     case 0x1: /* FABS */
4554         gen_helper_vfp_absd(tcg_res, tcg_op);
4555         break;
4556     case 0x2: /* FNEG */
4557         gen_helper_vfp_negd(tcg_res, tcg_op);
4558         break;
4559     case 0x3: /* FSQRT */
4560         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4561         break;
4562     case 0x8: /* FRINTN */
4563     case 0x9: /* FRINTP */
4564     case 0xa: /* FRINTM */
4565     case 0xb: /* FRINTZ */
4566     case 0xc: /* FRINTA */
4567     {
4568         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4569
4570         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4571         gen_helper_rintd(tcg_res, tcg_op, fpst);
4572
4573         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4574         tcg_temp_free_i32(tcg_rmode);
4575         break;
4576     }
4577     case 0xe: /* FRINTX */
4578         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4579         break;
4580     case 0xf: /* FRINTI */
4581         gen_helper_rintd(tcg_res, tcg_op, fpst);
4582         break;
4583     default:
4584         abort();
4585     }
4586
4587     write_fp_dreg(s, rd, tcg_res);
4588
4589     tcg_temp_free_ptr(fpst);
4590     tcg_temp_free_i64(tcg_op);
4591     tcg_temp_free_i64(tcg_res);
4592 }
4593
4594 static void handle_fp_fcvt(DisasContext *s, int opcode,
4595                            int rd, int rn, int dtype, int ntype)
4596 {
4597     switch (ntype) {
4598     case 0x0:
4599     {
4600         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4601         if (dtype == 1) {
4602             /* Single to double */
4603             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4604             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4605             write_fp_dreg(s, rd, tcg_rd);
4606             tcg_temp_free_i64(tcg_rd);
4607         } else {
4608             /* Single to half */
4609             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4610             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4611             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4612             write_fp_sreg(s, rd, tcg_rd);
4613             tcg_temp_free_i32(tcg_rd);
4614         }
4615         tcg_temp_free_i32(tcg_rn);
4616         break;
4617     }
4618     case 0x1:
4619     {
4620         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4621         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4622         if (dtype == 0) {
4623             /* Double to single */
4624             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4625         } else {
4626             /* Double to half */
4627             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4628             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4629         }
4630         write_fp_sreg(s, rd, tcg_rd);
4631         tcg_temp_free_i32(tcg_rd);
4632         tcg_temp_free_i64(tcg_rn);
4633         break;
4634     }
4635     case 0x3:
4636     {
4637         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4638         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4639         if (dtype == 0) {
4640             /* Half to single */
4641             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4642             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4643             write_fp_sreg(s, rd, tcg_rd);
4644             tcg_temp_free_i32(tcg_rd);
4645         } else {
4646             /* Half to double */
4647             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4648             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4649             write_fp_dreg(s, rd, tcg_rd);
4650             tcg_temp_free_i64(tcg_rd);
4651         }
4652         tcg_temp_free_i32(tcg_rn);
4653         break;
4654     }
4655     default:
4656         abort();
4657     }
4658 }
4659
4660 /* C3.6.25 Floating point data-processing (1 source)
4661  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4662  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4663  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4664  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4665  */
4666 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4667 {
4668     int type = extract32(insn, 22, 2);
4669     int opcode = extract32(insn, 15, 6);
4670     int rn = extract32(insn, 5, 5);
4671     int rd = extract32(insn, 0, 5);
4672
4673     switch (opcode) {
4674     case 0x4: case 0x5: case 0x7:
4675     {
4676         /* FCVT between half, single and double precision */
4677         int dtype = extract32(opcode, 0, 2);
4678         if (type == 2 || dtype == type) {
4679             unallocated_encoding(s);
4680             return;
4681         }
4682         if (!fp_access_check(s)) {
4683             return;
4684         }
4685
4686         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4687         break;
4688     }
4689     case 0x0 ... 0x3:
4690     case 0x8 ... 0xc:
4691     case 0xe ... 0xf:
4692         /* 32-to-32 and 64-to-64 ops */
4693         switch (type) {
4694         case 0:
4695             if (!fp_access_check(s)) {
4696                 return;
4697             }
4698
4699             handle_fp_1src_single(s, opcode, rd, rn);
4700             break;
4701         case 1:
4702             if (!fp_access_check(s)) {
4703                 return;
4704             }
4705
4706             handle_fp_1src_double(s, opcode, rd, rn);
4707             break;
4708         default:
4709             unallocated_encoding(s);
4710         }
4711         break;
4712     default:
4713         unallocated_encoding(s);
4714         break;
4715     }
4716 }
4717
4718 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4719 static void handle_fp_2src_single(DisasContext *s, int opcode,
4720                                   int rd, int rn, int rm)
4721 {
4722     TCGv_i32 tcg_op1;
4723     TCGv_i32 tcg_op2;
4724     TCGv_i32 tcg_res;
4725     TCGv_ptr fpst;
4726
4727     tcg_res = tcg_temp_new_i32();
4728     fpst = get_fpstatus_ptr();
4729     tcg_op1 = read_fp_sreg(s, rn);
4730     tcg_op2 = read_fp_sreg(s, rm);
4731
4732     switch (opcode) {
4733     case 0x0: /* FMUL */
4734         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4735         break;
4736     case 0x1: /* FDIV */
4737         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4738         break;
4739     case 0x2: /* FADD */
4740         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4741         break;
4742     case 0x3: /* FSUB */
4743         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4744         break;
4745     case 0x4: /* FMAX */
4746         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4747         break;
4748     case 0x5: /* FMIN */
4749         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4750         break;
4751     case 0x6: /* FMAXNM */
4752         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4753         break;
4754     case 0x7: /* FMINNM */
4755         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4756         break;
4757     case 0x8: /* FNMUL */
4758         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4759         gen_helper_vfp_negs(tcg_res, tcg_res);
4760         break;
4761     }
4762
4763     write_fp_sreg(s, rd, tcg_res);
4764
4765     tcg_temp_free_ptr(fpst);
4766     tcg_temp_free_i32(tcg_op1);
4767     tcg_temp_free_i32(tcg_op2);
4768     tcg_temp_free_i32(tcg_res);
4769 }
4770
4771 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4772 static void handle_fp_2src_double(DisasContext *s, int opcode,
4773                                   int rd, int rn, int rm)
4774 {
4775     TCGv_i64 tcg_op1;
4776     TCGv_i64 tcg_op2;
4777     TCGv_i64 tcg_res;
4778     TCGv_ptr fpst;
4779
4780     tcg_res = tcg_temp_new_i64();
4781     fpst = get_fpstatus_ptr();
4782     tcg_op1 = read_fp_dreg(s, rn);
4783     tcg_op2 = read_fp_dreg(s, rm);
4784
4785     switch (opcode) {
4786     case 0x0: /* FMUL */
4787         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4788         break;
4789     case 0x1: /* FDIV */
4790         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4791         break;
4792     case 0x2: /* FADD */
4793         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4794         break;
4795     case 0x3: /* FSUB */
4796         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4797         break;
4798     case 0x4: /* FMAX */
4799         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4800         break;
4801     case 0x5: /* FMIN */
4802         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4803         break;
4804     case 0x6: /* FMAXNM */
4805         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4806         break;
4807     case 0x7: /* FMINNM */
4808         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4809         break;
4810     case 0x8: /* FNMUL */
4811         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4812         gen_helper_vfp_negd(tcg_res, tcg_res);
4813         break;
4814     }
4815
4816     write_fp_dreg(s, rd, tcg_res);
4817
4818     tcg_temp_free_ptr(fpst);
4819     tcg_temp_free_i64(tcg_op1);
4820     tcg_temp_free_i64(tcg_op2);
4821     tcg_temp_free_i64(tcg_res);
4822 }
4823
4824 /* C3.6.26 Floating point data-processing (2 source)
4825  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4826  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4827  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4828  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4829  */
4830 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4831 {
4832     int type = extract32(insn, 22, 2);
4833     int rd = extract32(insn, 0, 5);
4834     int rn = extract32(insn, 5, 5);
4835     int rm = extract32(insn, 16, 5);
4836     int opcode = extract32(insn, 12, 4);
4837
4838     if (opcode > 8) {
4839         unallocated_encoding(s);
4840         return;
4841     }
4842
4843     switch (type) {
4844     case 0:
4845         if (!fp_access_check(s)) {
4846             return;
4847         }
4848         handle_fp_2src_single(s, opcode, rd, rn, rm);
4849         break;
4850     case 1:
4851         if (!fp_access_check(s)) {
4852             return;
4853         }
4854         handle_fp_2src_double(s, opcode, rd, rn, rm);
4855         break;
4856     default:
4857         unallocated_encoding(s);
4858     }
4859 }
4860
4861 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4862 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4863                                   int rd, int rn, int rm, int ra)
4864 {
4865     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4866     TCGv_i32 tcg_res = tcg_temp_new_i32();
4867     TCGv_ptr fpst = get_fpstatus_ptr();
4868
4869     tcg_op1 = read_fp_sreg(s, rn);
4870     tcg_op2 = read_fp_sreg(s, rm);
4871     tcg_op3 = read_fp_sreg(s, ra);
4872
4873     /* These are fused multiply-add, and must be done as one
4874      * floating point operation with no rounding between the
4875      * multiplication and addition steps.
4876      * NB that doing the negations here as separate steps is
4877      * correct : an input NaN should come out with its sign bit
4878      * flipped if it is a negated-input.
4879      */
4880     if (o1 == true) {
4881         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4882     }
4883
4884     if (o0 != o1) {
4885         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4886     }
4887
4888     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4889
4890     write_fp_sreg(s, rd, tcg_res);
4891
4892     tcg_temp_free_ptr(fpst);
4893     tcg_temp_free_i32(tcg_op1);
4894     tcg_temp_free_i32(tcg_op2);
4895     tcg_temp_free_i32(tcg_op3);
4896     tcg_temp_free_i32(tcg_res);
4897 }
4898
4899 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4900 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4901                                   int rd, int rn, int rm, int ra)
4902 {
4903     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4904     TCGv_i64 tcg_res = tcg_temp_new_i64();
4905     TCGv_ptr fpst = get_fpstatus_ptr();
4906
4907     tcg_op1 = read_fp_dreg(s, rn);
4908     tcg_op2 = read_fp_dreg(s, rm);
4909     tcg_op3 = read_fp_dreg(s, ra);
4910
4911     /* These are fused multiply-add, and must be done as one
4912      * floating point operation with no rounding between the
4913      * multiplication and addition steps.
4914      * NB that doing the negations here as separate steps is
4915      * correct : an input NaN should come out with its sign bit
4916      * flipped if it is a negated-input.
4917      */
4918     if (o1 == true) {
4919         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4920     }
4921
4922     if (o0 != o1) {
4923         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4924     }
4925
4926     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4927
4928     write_fp_dreg(s, rd, tcg_res);
4929
4930     tcg_temp_free_ptr(fpst);
4931     tcg_temp_free_i64(tcg_op1);
4932     tcg_temp_free_i64(tcg_op2);
4933     tcg_temp_free_i64(tcg_op3);
4934     tcg_temp_free_i64(tcg_res);
4935 }
4936
4937 /* C3.6.27 Floating point data-processing (3 source)
4938  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4939  * +---+---+---+-----------+------+----+------+----+------+------+------+
4940  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4941  * +---+---+---+-----------+------+----+------+----+------+------+------+
4942  */
4943 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4944 {
4945     int type = extract32(insn, 22, 2);
4946     int rd = extract32(insn, 0, 5);
4947     int rn = extract32(insn, 5, 5);
4948     int ra = extract32(insn, 10, 5);
4949     int rm = extract32(insn, 16, 5);
4950     bool o0 = extract32(insn, 15, 1);
4951     bool o1 = extract32(insn, 21, 1);
4952
4953     switch (type) {
4954     case 0:
4955         if (!fp_access_check(s)) {
4956             return;
4957         }
4958         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4959         break;
4960     case 1:
4961         if (!fp_access_check(s)) {
4962             return;
4963         }
4964         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4965         break;
4966     default:
4967         unallocated_encoding(s);
4968     }
4969 }
4970
4971 /* C3.6.28 Floating point immediate
4972  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4973  * +---+---+---+-----------+------+---+------------+-------+------+------+
4974  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4975  * +---+---+---+-----------+------+---+------------+-------+------+------+
4976  */
4977 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4978 {
4979     int rd = extract32(insn, 0, 5);
4980     int imm8 = extract32(insn, 13, 8);
4981     int is_double = extract32(insn, 22, 2);
4982     uint64_t imm;
4983     TCGv_i64 tcg_res;
4984
4985     if (is_double > 1) {
4986         unallocated_encoding(s);
4987         return;
4988     }
4989
4990     if (!fp_access_check(s)) {
4991         return;
4992     }
4993
4994     /* The imm8 encodes the sign bit, enough bits to represent
4995      * an exponent in the range 01....1xx to 10....0xx,
4996      * and the most significant 4 bits of the mantissa; see
4997      * VFPExpandImm() in the v8 ARM ARM.
4998      */
4999     if (is_double) {
5000         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5001             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5002             extract32(imm8, 0, 6);
5003         imm <<= 48;
5004     } else {
5005         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5006             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5007             (extract32(imm8, 0, 6) << 3);
5008         imm <<= 16;
5009     }
5010
5011     tcg_res = tcg_const_i64(imm);
5012     write_fp_dreg(s, rd, tcg_res);
5013     tcg_temp_free_i64(tcg_res);
5014 }
5015
5016 /* Handle floating point <=> fixed point conversions. Note that we can
5017  * also deal with fp <=> integer conversions as a special case (scale == 64)
5018  * OPTME: consider handling that special case specially or at least skipping
5019  * the call to scalbn in the helpers for zero shifts.
5020  */
5021 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5022                            bool itof, int rmode, int scale, int sf, int type)
5023 {
5024     bool is_signed = !(opcode & 1);
5025     bool is_double = type;
5026     TCGv_ptr tcg_fpstatus;
5027     TCGv_i32 tcg_shift;
5028
5029     tcg_fpstatus = get_fpstatus_ptr();
5030
5031     tcg_shift = tcg_const_i32(64 - scale);
5032
5033     if (itof) {
5034         TCGv_i64 tcg_int = cpu_reg(s, rn);
5035         if (!sf) {
5036             TCGv_i64 tcg_extend = new_tmp_a64(s);
5037
5038             if (is_signed) {
5039                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5040             } else {
5041                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5042             }
5043
5044             tcg_int = tcg_extend;
5045         }
5046
5047         if (is_double) {
5048             TCGv_i64 tcg_double = tcg_temp_new_i64();
5049             if (is_signed) {
5050                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
5051                                      tcg_shift, tcg_fpstatus);
5052             } else {
5053                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
5054                                      tcg_shift, tcg_fpstatus);
5055             }
5056             write_fp_dreg(s, rd, tcg_double);
5057             tcg_temp_free_i64(tcg_double);
5058         } else {
5059             TCGv_i32 tcg_single = tcg_temp_new_i32();
5060             if (is_signed) {
5061                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
5062                                      tcg_shift, tcg_fpstatus);
5063             } else {
5064                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5065                                      tcg_shift, tcg_fpstatus);
5066             }
5067             write_fp_sreg(s, rd, tcg_single);
5068             tcg_temp_free_i32(tcg_single);
5069         }
5070     } else {
5071         TCGv_i64 tcg_int = cpu_reg(s, rd);
5072         TCGv_i32 tcg_rmode;
5073
5074         if (extract32(opcode, 2, 1)) {
5075             /* There are too many rounding modes to all fit into rmode,
5076              * so FCVTA[US] is a special case.
5077              */
5078             rmode = FPROUNDING_TIEAWAY;
5079         }
5080
5081         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5082
5083         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5084
5085         if (is_double) {
5086             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5087             if (is_signed) {
5088                 if (!sf) {
5089                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5090                                          tcg_shift, tcg_fpstatus);
5091                 } else {
5092                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5093                                          tcg_shift, tcg_fpstatus);
5094                 }
5095             } else {
5096                 if (!sf) {
5097                     gen_helper_vfp_tould(tcg_int, tcg_double,
5098                                          tcg_shift, tcg_fpstatus);
5099                 } else {
5100                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5101                                          tcg_shift, tcg_fpstatus);
5102                 }
5103             }
5104             tcg_temp_free_i64(tcg_double);
5105         } else {
5106             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5107             if (sf) {
5108                 if (is_signed) {
5109                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5110                                          tcg_shift, tcg_fpstatus);
5111                 } else {
5112                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5113                                          tcg_shift, tcg_fpstatus);
5114                 }
5115             } else {
5116                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5117                 if (is_signed) {
5118                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5119                                          tcg_shift, tcg_fpstatus);
5120                 } else {
5121                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5122                                          tcg_shift, tcg_fpstatus);
5123                 }
5124                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5125                 tcg_temp_free_i32(tcg_dest);
5126             }
5127             tcg_temp_free_i32(tcg_single);
5128         }
5129
5130         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5131         tcg_temp_free_i32(tcg_rmode);
5132
5133         if (!sf) {
5134             tcg_gen_ext32u_i64(tcg_int, tcg_int);
5135         }
5136     }
5137
5138     tcg_temp_free_ptr(tcg_fpstatus);
5139     tcg_temp_free_i32(tcg_shift);
5140 }
5141
5142 /* C3.6.29 Floating point <-> fixed point conversions
5143  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5144  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5145  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5146  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5147  */
5148 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5149 {
5150     int rd = extract32(insn, 0, 5);
5151     int rn = extract32(insn, 5, 5);
5152     int scale = extract32(insn, 10, 6);
5153     int opcode = extract32(insn, 16, 3);
5154     int rmode = extract32(insn, 19, 2);
5155     int type = extract32(insn, 22, 2);
5156     bool sbit = extract32(insn, 29, 1);
5157     bool sf = extract32(insn, 31, 1);
5158     bool itof;
5159
5160     if (sbit || (type > 1)
5161         || (!sf && scale < 32)) {
5162         unallocated_encoding(s);
5163         return;
5164     }
5165
5166     switch ((rmode << 3) | opcode) {
5167     case 0x2: /* SCVTF */
5168     case 0x3: /* UCVTF */
5169         itof = true;
5170         break;
5171     case 0x18: /* FCVTZS */
5172     case 0x19: /* FCVTZU */
5173         itof = false;
5174         break;
5175     default:
5176         unallocated_encoding(s);
5177         return;
5178     }
5179
5180     if (!fp_access_check(s)) {
5181         return;
5182     }
5183
5184     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5185 }
5186
5187 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5188 {
5189     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5190      * without conversion.
5191      */
5192
5193     if (itof) {
5194         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5195
5196         switch (type) {
5197         case 0:
5198         {
5199             /* 32 bit */
5200             TCGv_i64 tmp = tcg_temp_new_i64();
5201             tcg_gen_ext32u_i64(tmp, tcg_rn);
5202             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5203             tcg_gen_movi_i64(tmp, 0);
5204             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5205             tcg_temp_free_i64(tmp);
5206             break;
5207         }
5208         case 1:
5209         {
5210             /* 64 bit */
5211             TCGv_i64 tmp = tcg_const_i64(0);
5212             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5213             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5214             tcg_temp_free_i64(tmp);
5215             break;
5216         }
5217         case 2:
5218             /* 64 bit to top half. */
5219             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5220             break;
5221         }
5222     } else {
5223         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5224
5225         switch (type) {
5226         case 0:
5227             /* 32 bit */
5228             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5229             break;
5230         case 1:
5231             /* 64 bit */
5232             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5233             break;
5234         case 2:
5235             /* 64 bits from top half */
5236             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5237             break;
5238         }
5239     }
5240 }
5241
5242 /* C3.6.30 Floating point <-> integer conversions
5243  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5244  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5245  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5246  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5247  */
5248 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5249 {
5250     int rd = extract32(insn, 0, 5);
5251     int rn = extract32(insn, 5, 5);
5252     int opcode = extract32(insn, 16, 3);
5253     int rmode = extract32(insn, 19, 2);
5254     int type = extract32(insn, 22, 2);
5255     bool sbit = extract32(insn, 29, 1);
5256     bool sf = extract32(insn, 31, 1);
5257
5258     if (sbit) {
5259         unallocated_encoding(s);
5260         return;
5261     }
5262
5263     if (opcode > 5) {
5264         /* FMOV */
5265         bool itof = opcode & 1;
5266
5267         if (rmode >= 2) {
5268             unallocated_encoding(s);
5269             return;
5270         }
5271
5272         switch (sf << 3 | type << 1 | rmode) {
5273         case 0x0: /* 32 bit */
5274         case 0xa: /* 64 bit */
5275         case 0xd: /* 64 bit to top half of quad */
5276             break;
5277         default:
5278             /* all other sf/type/rmode combinations are invalid */
5279             unallocated_encoding(s);
5280             break;
5281         }
5282
5283         if (!fp_access_check(s)) {
5284             return;
5285         }
5286         handle_fmov(s, rd, rn, type, itof);
5287     } else {
5288         /* actual FP conversions */
5289         bool itof = extract32(opcode, 1, 1);
5290
5291         if (type > 1 || (rmode != 0 && opcode > 1)) {
5292             unallocated_encoding(s);
5293             return;
5294         }
5295
5296         if (!fp_access_check(s)) {
5297             return;
5298         }
5299         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5300     }
5301 }
5302
5303 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5304  *   31  30  29 28     25 24                          0
5305  * +---+---+---+---------+-----------------------------+
5306  * |   | 0 |   | 1 1 1 1 |                             |
5307  * +---+---+---+---------+-----------------------------+
5308  */
5309 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5310 {
5311     if (extract32(insn, 24, 1)) {
5312         /* Floating point data-processing (3 source) */
5313         disas_fp_3src(s, insn);
5314     } else if (extract32(insn, 21, 1) == 0) {
5315         /* Floating point to fixed point conversions */
5316         disas_fp_fixed_conv(s, insn);
5317     } else {
5318         switch (extract32(insn, 10, 2)) {
5319         case 1:
5320             /* Floating point conditional compare */
5321             disas_fp_ccomp(s, insn);
5322             break;
5323         case 2:
5324             /* Floating point data-processing (2 source) */
5325             disas_fp_2src(s, insn);
5326             break;
5327         case 3:
5328             /* Floating point conditional select */
5329             disas_fp_csel(s, insn);
5330             break;
5331         case 0:
5332             switch (ctz32(extract32(insn, 12, 4))) {
5333             case 0: /* [15:12] == xxx1 */
5334                 /* Floating point immediate */
5335                 disas_fp_imm(s, insn);
5336                 break;
5337             case 1: /* [15:12] == xx10 */
5338                 /* Floating point compare */
5339                 disas_fp_compare(s, insn);
5340                 break;
5341             case 2: /* [15:12] == x100 */
5342                 /* Floating point data-processing (1 source) */
5343                 disas_fp_1src(s, insn);
5344                 break;
5345             case 3: /* [15:12] == 1000 */
5346                 unallocated_encoding(s);
5347                 break;
5348             default: /* [15:12] == 0000 */
5349                 /* Floating point <-> integer conversions */
5350                 disas_fp_int_conv(s, insn);
5351                 break;
5352             }
5353             break;
5354         }
5355     }
5356 }
5357
5358 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5359                      int pos)
5360 {
5361     /* Extract 64 bits from the middle of two concatenated 64 bit
5362      * vector register slices left:right. The extracted bits start
5363      * at 'pos' bits into the right (least significant) side.
5364      * We return the result in tcg_right, and guarantee not to
5365      * trash tcg_left.
5366      */
5367     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5368     assert(pos > 0 && pos < 64);
5369
5370     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5371     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5372     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5373
5374     tcg_temp_free_i64(tcg_tmp);
5375 }
5376
5377 /* C3.6.1 EXT
5378  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5379  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5380  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5381  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5382  */
5383 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5384 {
5385     int is_q = extract32(insn, 30, 1);
5386     int op2 = extract32(insn, 22, 2);
5387     int imm4 = extract32(insn, 11, 4);
5388     int rm = extract32(insn, 16, 5);
5389     int rn = extract32(insn, 5, 5);
5390     int rd = extract32(insn, 0, 5);
5391     int pos = imm4 << 3;
5392     TCGv_i64 tcg_resl, tcg_resh;
5393
5394     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5395         unallocated_encoding(s);
5396         return;
5397     }
5398
5399     if (!fp_access_check(s)) {
5400         return;
5401     }
5402
5403     tcg_resh = tcg_temp_new_i64();
5404     tcg_resl = tcg_temp_new_i64();
5405
5406     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5407      * either extracting 128 bits from a 128:128 concatenation, or
5408      * extracting 64 bits from a 64:64 concatenation.
5409      */
5410     if (!is_q) {
5411         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5412         if (pos != 0) {
5413             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5414             do_ext64(s, tcg_resh, tcg_resl, pos);
5415         }
5416         tcg_gen_movi_i64(tcg_resh, 0);
5417     } else {
5418         TCGv_i64 tcg_hh;
5419         typedef struct {
5420             int reg;
5421             int elt;
5422         } EltPosns;
5423         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5424         EltPosns *elt = eltposns;
5425
5426         if (pos >= 64) {
5427             elt++;
5428             pos -= 64;
5429         }
5430
5431         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5432         elt++;
5433         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5434         elt++;
5435         if (pos != 0) {
5436             do_ext64(s, tcg_resh, tcg_resl, pos);
5437             tcg_hh = tcg_temp_new_i64();
5438             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5439             do_ext64(s, tcg_hh, tcg_resh, pos);
5440             tcg_temp_free_i64(tcg_hh);
5441         }
5442     }
5443
5444     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5445     tcg_temp_free_i64(tcg_resl);
5446     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5447     tcg_temp_free_i64(tcg_resh);
5448 }
5449
5450 /* C3.6.2 TBL/TBX
5451  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5452  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5453  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5454  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5455  */
5456 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5457 {
5458     int op2 = extract32(insn, 22, 2);
5459     int is_q = extract32(insn, 30, 1);
5460     int rm = extract32(insn, 16, 5);
5461     int rn = extract32(insn, 5, 5);
5462     int rd = extract32(insn, 0, 5);
5463     int is_tblx = extract32(insn, 12, 1);
5464     int len = extract32(insn, 13, 2);
5465     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5466     TCGv_i32 tcg_regno, tcg_numregs;
5467
5468     if (op2 != 0) {
5469         unallocated_encoding(s);
5470         return;
5471     }
5472
5473     if (!fp_access_check(s)) {
5474         return;
5475     }
5476
5477     /* This does a table lookup: for every byte element in the input
5478      * we index into a table formed from up to four vector registers,
5479      * and then the output is the result of the lookups. Our helper
5480      * function does the lookup operation for a single 64 bit part of
5481      * the input.
5482      */
5483     tcg_resl = tcg_temp_new_i64();
5484     tcg_resh = tcg_temp_new_i64();
5485
5486     if (is_tblx) {
5487         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5488     } else {
5489         tcg_gen_movi_i64(tcg_resl, 0);
5490     }
5491     if (is_tblx && is_q) {
5492         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5493     } else {
5494         tcg_gen_movi_i64(tcg_resh, 0);
5495     }
5496
5497     tcg_idx = tcg_temp_new_i64();
5498     tcg_regno = tcg_const_i32(rn);
5499     tcg_numregs = tcg_const_i32(len + 1);
5500     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5501     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5502                         tcg_regno, tcg_numregs);
5503     if (is_q) {
5504         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5505         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5506                             tcg_regno, tcg_numregs);
5507     }
5508     tcg_temp_free_i64(tcg_idx);
5509     tcg_temp_free_i32(tcg_regno);
5510     tcg_temp_free_i32(tcg_numregs);
5511
5512     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5513     tcg_temp_free_i64(tcg_resl);
5514     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5515     tcg_temp_free_i64(tcg_resh);
5516 }
5517
5518 /* C3.6.3 ZIP/UZP/TRN
5519  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5520  * +---+---+-------------+------+---+------+---+------------------+------+
5521  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5522  * +---+---+-------------+------+---+------+---+------------------+------+
5523  */
5524 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5525 {
5526     int rd = extract32(insn, 0, 5);
5527     int rn = extract32(insn, 5, 5);
5528     int rm = extract32(insn, 16, 5);
5529     int size = extract32(insn, 22, 2);
5530     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5531      * bit 2 indicates 1 vs 2 variant of the insn.
5532      */
5533     int opcode = extract32(insn, 12, 2);
5534     bool part = extract32(insn, 14, 1);
5535     bool is_q = extract32(insn, 30, 1);
5536     int esize = 8 << size;
5537     int i, ofs;
5538     int datasize = is_q ? 128 : 64;
5539     int elements = datasize / esize;
5540     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5541
5542     if (opcode == 0 || (size == 3 && !is_q)) {
5543         unallocated_encoding(s);
5544         return;
5545     }
5546
5547     if (!fp_access_check(s)) {
5548         return;
5549     }
5550
5551     tcg_resl = tcg_const_i64(0);
5552     tcg_resh = tcg_const_i64(0);
5553     tcg_res = tcg_temp_new_i64();
5554
5555     for (i = 0; i < elements; i++) {
5556         switch (opcode) {
5557         case 1: /* UZP1/2 */
5558         {
5559             int midpoint = elements / 2;
5560             if (i < midpoint) {
5561                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5562             } else {
5563                 read_vec_element(s, tcg_res, rm,
5564                                  2 * (i - midpoint) + part, size);
5565             }
5566             break;
5567         }
5568         case 2: /* TRN1/2 */
5569             if (i & 1) {
5570                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5571             } else {
5572                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5573             }
5574             break;
5575         case 3: /* ZIP1/2 */
5576         {
5577             int base = part * elements / 2;
5578             if (i & 1) {
5579                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5580             } else {
5581                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5582             }
5583             break;
5584         }
5585         default:
5586             g_assert_not_reached();
5587         }
5588
5589         ofs = i * esize;
5590         if (ofs < 64) {
5591             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5592             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5593         } else {
5594             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5595             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5596         }
5597     }
5598
5599     tcg_temp_free_i64(tcg_res);
5600
5601     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5602     tcg_temp_free_i64(tcg_resl);
5603     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5604     tcg_temp_free_i64(tcg_resh);
5605 }
5606
5607 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5608                         int opc, bool is_min, TCGv_ptr fpst)
5609 {
5610     /* Helper function for disas_simd_across_lanes: do a single precision
5611      * min/max operation on the specified two inputs,
5612      * and return the result in tcg_elt1.
5613      */
5614     if (opc == 0xc) {
5615         if (is_min) {
5616             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5617         } else {
5618             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5619         }
5620     } else {
5621         assert(opc == 0xf);
5622         if (is_min) {
5623             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5624         } else {
5625             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5626         }
5627     }
5628 }
5629
5630 /* C3.6.4 AdvSIMD across lanes
5631  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5632  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5633  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5634  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5635  */
5636 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5637 {
5638     int rd = extract32(insn, 0, 5);
5639     int rn = extract32(insn, 5, 5);
5640     int size = extract32(insn, 22, 2);
5641     int opcode = extract32(insn, 12, 5);
5642     bool is_q = extract32(insn, 30, 1);
5643     bool is_u = extract32(insn, 29, 1);
5644     bool is_fp = false;
5645     bool is_min = false;
5646     int esize;
5647     int elements;
5648     int i;
5649     TCGv_i64 tcg_res, tcg_elt;
5650
5651     switch (opcode) {
5652     case 0x1b: /* ADDV */
5653         if (is_u) {
5654             unallocated_encoding(s);
5655             return;
5656         }
5657         /* fall through */
5658     case 0x3: /* SADDLV, UADDLV */
5659     case 0xa: /* SMAXV, UMAXV */
5660     case 0x1a: /* SMINV, UMINV */
5661         if (size == 3 || (size == 2 && !is_q)) {
5662             unallocated_encoding(s);
5663             return;
5664         }
5665         break;
5666     case 0xc: /* FMAXNMV, FMINNMV */
5667     case 0xf: /* FMAXV, FMINV */
5668         if (!is_u || !is_q || extract32(size, 0, 1)) {
5669             unallocated_encoding(s);
5670             return;
5671         }
5672         /* Bit 1 of size field encodes min vs max, and actual size is always
5673          * 32 bits: adjust the size variable so following code can rely on it
5674          */
5675         is_min = extract32(size, 1, 1);
5676         is_fp = true;
5677         size = 2;
5678         break;
5679     default:
5680         unallocated_encoding(s);
5681         return;
5682     }
5683
5684     if (!fp_access_check(s)) {
5685         return;
5686     }
5687
5688     esize = 8 << size;
5689     elements = (is_q ? 128 : 64) / esize;
5690
5691     tcg_res = tcg_temp_new_i64();
5692     tcg_elt = tcg_temp_new_i64();
5693
5694     /* These instructions operate across all lanes of a vector
5695      * to produce a single result. We can guarantee that a 64
5696      * bit intermediate is sufficient:
5697      *  + for [US]ADDLV the maximum element size is 32 bits, and
5698      *    the result type is 64 bits
5699      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5700      *    same as the element size, which is 32 bits at most
5701      * For the integer operations we can choose to work at 64
5702      * or 32 bits and truncate at the end; for simplicity
5703      * we use 64 bits always. The floating point
5704      * ops do require 32 bit intermediates, though.
5705      */
5706     if (!is_fp) {
5707         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5708
5709         for (i = 1; i < elements; i++) {
5710             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5711
5712             switch (opcode) {
5713             case 0x03: /* SADDLV / UADDLV */
5714             case 0x1b: /* ADDV */
5715                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5716                 break;
5717             case 0x0a: /* SMAXV / UMAXV */
5718                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5719                                     tcg_res,
5720                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5721                 break;
5722             case 0x1a: /* SMINV / UMINV */
5723                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5724                                     tcg_res,
5725                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5726                 break;
5727                 break;
5728             default:
5729                 g_assert_not_reached();
5730             }
5731
5732         }
5733     } else {
5734         /* Floating point ops which work on 32 bit (single) intermediates.
5735          * Note that correct NaN propagation requires that we do these
5736          * operations in exactly the order specified by the pseudocode.
5737          */
5738         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5739         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5740         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5741         TCGv_ptr fpst = get_fpstatus_ptr();
5742
5743         assert(esize == 32);
5744         assert(elements == 4);
5745
5746         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5747         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5748         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5749         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5750
5751         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5752
5753         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5754         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5755         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5756         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5757
5758         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5759
5760         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5761
5762         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5763         tcg_temp_free_i32(tcg_elt1);
5764         tcg_temp_free_i32(tcg_elt2);
5765         tcg_temp_free_i32(tcg_elt3);
5766         tcg_temp_free_ptr(fpst);
5767     }
5768
5769     tcg_temp_free_i64(tcg_elt);
5770
5771     /* Now truncate the result to the width required for the final output */
5772     if (opcode == 0x03) {
5773         /* SADDLV, UADDLV: result is 2*esize */
5774         size++;
5775     }
5776
5777     switch (size) {
5778     case 0:
5779         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5780         break;
5781     case 1:
5782         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5783         break;
5784     case 2:
5785         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5786         break;
5787     case 3:
5788         break;
5789     default:
5790         g_assert_not_reached();
5791     }
5792
5793     write_fp_dreg(s, rd, tcg_res);
5794     tcg_temp_free_i64(tcg_res);
5795 }
5796
5797 /* C6.3.31 DUP (Element, Vector)
5798  *
5799  *  31  30   29              21 20    16 15        10  9    5 4    0
5800  * +---+---+-------------------+--------+-------------+------+------+
5801  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5802  * +---+---+-------------------+--------+-------------+------+------+
5803  *
5804  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5805  */
5806 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5807                              int imm5)
5808 {
5809     int size = ctz32(imm5);
5810     int esize = 8 << size;
5811     int elements = (is_q ? 128 : 64) / esize;
5812     int index, i;
5813     TCGv_i64 tmp;
5814
5815     if (size > 3 || (size == 3 && !is_q)) {
5816         unallocated_encoding(s);
5817         return;
5818     }
5819
5820     if (!fp_access_check(s)) {
5821         return;
5822     }
5823
5824     index = imm5 >> (size + 1);
5825
5826     tmp = tcg_temp_new_i64();
5827     read_vec_element(s, tmp, rn, index, size);
5828
5829     for (i = 0; i < elements; i++) {
5830         write_vec_element(s, tmp, rd, i, size);
5831     }
5832
5833     if (!is_q) {
5834         clear_vec_high(s, rd);
5835     }
5836
5837     tcg_temp_free_i64(tmp);
5838 }
5839
5840 /* C6.3.31 DUP (element, scalar)
5841  *  31                   21 20    16 15        10  9    5 4    0
5842  * +-----------------------+--------+-------------+------+------+
5843  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5844  * +-----------------------+--------+-------------+------+------+
5845  */
5846 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5847                               int imm5)
5848 {
5849     int size = ctz32(imm5);
5850     int index;
5851     TCGv_i64 tmp;
5852
5853     if (size > 3) {
5854         unallocated_encoding(s);
5855         return;
5856     }
5857
5858     if (!fp_access_check(s)) {
5859         return;
5860     }
5861
5862     index = imm5 >> (size + 1);
5863
5864     /* This instruction just extracts the specified element and
5865      * zero-extends it into the bottom of the destination register.
5866      */
5867     tmp = tcg_temp_new_i64();
5868     read_vec_element(s, tmp, rn, index, size);
5869     write_fp_dreg(s, rd, tmp);
5870     tcg_temp_free_i64(tmp);
5871 }
5872
5873 /* C6.3.32 DUP (General)
5874  *
5875  *  31  30   29              21 20    16 15        10  9    5 4    0
5876  * +---+---+-------------------+--------+-------------+------+------+
5877  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5878  * +---+---+-------------------+--------+-------------+------+------+
5879  *
5880  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5881  */
5882 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5883                              int imm5)
5884 {
5885     int size = ctz32(imm5);
5886     int esize = 8 << size;
5887     int elements = (is_q ? 128 : 64)/esize;
5888     int i = 0;
5889
5890     if (size > 3 || ((size == 3) && !is_q)) {
5891         unallocated_encoding(s);
5892         return;
5893     }
5894
5895     if (!fp_access_check(s)) {
5896         return;
5897     }
5898
5899     for (i = 0; i < elements; i++) {
5900         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5901     }
5902     if (!is_q) {
5903         clear_vec_high(s, rd);
5904     }
5905 }
5906
5907 /* C6.3.150 INS (Element)
5908  *
5909  *  31                   21 20    16 15  14    11  10 9    5 4    0
5910  * +-----------------------+--------+------------+---+------+------+
5911  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5912  * +-----------------------+--------+------------+---+------+------+
5913  *
5914  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5915  * index: encoded in imm5<4:size+1>
5916  */
5917 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5918                              int imm4, int imm5)
5919 {
5920     int size = ctz32(imm5);
5921     int src_index, dst_index;
5922     TCGv_i64 tmp;
5923
5924     if (size > 3) {
5925         unallocated_encoding(s);
5926         return;
5927     }
5928
5929     if (!fp_access_check(s)) {
5930         return;
5931     }
5932
5933     dst_index = extract32(imm5, 1+size, 5);
5934     src_index = extract32(imm4, size, 4);
5935
5936     tmp = tcg_temp_new_i64();
5937
5938     read_vec_element(s, tmp, rn, src_index, size);
5939     write_vec_element(s, tmp, rd, dst_index, size);
5940
5941     tcg_temp_free_i64(tmp);
5942 }
5943
5944
5945 /* C6.3.151 INS (General)
5946  *
5947  *  31                   21 20    16 15        10  9    5 4    0
5948  * +-----------------------+--------+-------------+------+------+
5949  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5950  * +-----------------------+--------+-------------+------+------+
5951  *
5952  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5953  * index: encoded in imm5<4:size+1>
5954  */
5955 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5956 {
5957     int size = ctz32(imm5);
5958     int idx;
5959
5960     if (size > 3) {
5961         unallocated_encoding(s);
5962         return;
5963     }
5964
5965     if (!fp_access_check(s)) {
5966         return;
5967     }
5968
5969     idx = extract32(imm5, 1 + size, 4 - size);
5970     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5971 }
5972
5973 /*
5974  * C6.3.321 UMOV (General)
5975  * C6.3.237 SMOV (General)
5976  *
5977  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5978  * +---+---+-------------------+--------+-------------+------+------+
5979  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5980  * +---+---+-------------------+--------+-------------+------+------+
5981  *
5982  * U: unsigned when set
5983  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5984  */
5985 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5986                                   int rn, int rd, int imm5)
5987 {
5988     int size = ctz32(imm5);
5989     int element;
5990     TCGv_i64 tcg_rd;
5991
5992     /* Check for UnallocatedEncodings */
5993     if (is_signed) {
5994         if (size > 2 || (size == 2 && !is_q)) {
5995             unallocated_encoding(s);
5996             return;
5997         }
5998     } else {
5999         if (size > 3
6000             || (size < 3 && is_q)
6001             || (size == 3 && !is_q)) {
6002             unallocated_encoding(s);
6003             return;
6004         }
6005     }
6006
6007     if (!fp_access_check(s)) {
6008         return;
6009     }
6010
6011     element = extract32(imm5, 1+size, 4);
6012
6013     tcg_rd = cpu_reg(s, rd);
6014     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6015     if (is_signed && !is_q) {
6016         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6017     }
6018 }
6019
6020 /* C3.6.5 AdvSIMD copy
6021  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6022  * +---+---+----+-----------------+------+---+------+---+------+------+
6023  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6024  * +---+---+----+-----------------+------+---+------+---+------+------+
6025  */
6026 static void disas_simd_copy(DisasContext *s, uint32_t insn)
6027 {
6028     int rd = extract32(insn, 0, 5);
6029     int rn = extract32(insn, 5, 5);
6030     int imm4 = extract32(insn, 11, 4);
6031     int op = extract32(insn, 29, 1);
6032     int is_q = extract32(insn, 30, 1);
6033     int imm5 = extract32(insn, 16, 5);
6034
6035     if (op) {
6036         if (is_q) {
6037             /* INS (element) */
6038             handle_simd_inse(s, rd, rn, imm4, imm5);
6039         } else {
6040             unallocated_encoding(s);
6041         }
6042     } else {
6043         switch (imm4) {
6044         case 0:
6045             /* DUP (element - vector) */
6046             handle_simd_dupe(s, is_q, rd, rn, imm5);
6047             break;
6048         case 1:
6049             /* DUP (general) */
6050             handle_simd_dupg(s, is_q, rd, rn, imm5);
6051             break;
6052         case 3:
6053             if (is_q) {
6054                 /* INS (general) */
6055                 handle_simd_insg(s, rd, rn, imm5);
6056             } else {
6057                 unallocated_encoding(s);
6058             }
6059             break;
6060         case 5:
6061         case 7:
6062             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6063             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6064             break;
6065         default:
6066             unallocated_encoding(s);
6067             break;
6068         }
6069     }
6070 }
6071
6072 /* C3.6.6 AdvSIMD modified immediate
6073  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6074  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6075  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6076  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6077  *
6078  * There are a number of operations that can be carried out here:
6079  *   MOVI - move (shifted) imm into register
6080  *   MVNI - move inverted (shifted) imm into register
6081  *   ORR  - bitwise OR of (shifted) imm with register
6082  *   BIC  - bitwise clear of (shifted) imm with register
6083  */
6084 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6085 {
6086     int rd = extract32(insn, 0, 5);
6087     int cmode = extract32(insn, 12, 4);
6088     int cmode_3_1 = extract32(cmode, 1, 3);
6089     int cmode_0 = extract32(cmode, 0, 1);
6090     int o2 = extract32(insn, 11, 1);
6091     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6092     bool is_neg = extract32(insn, 29, 1);
6093     bool is_q = extract32(insn, 30, 1);
6094     uint64_t imm = 0;
6095     TCGv_i64 tcg_rd, tcg_imm;
6096     int i;
6097
6098     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6099         unallocated_encoding(s);
6100         return;
6101     }
6102
6103     if (!fp_access_check(s)) {
6104         return;
6105     }
6106
6107     /* See AdvSIMDExpandImm() in ARM ARM */
6108     switch (cmode_3_1) {
6109     case 0: /* Replicate(Zeros(24):imm8, 2) */
6110     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6111     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6112     case 3: /* Replicate(imm8:Zeros(24), 2) */
6113     {
6114         int shift = cmode_3_1 * 8;
6115         imm = bitfield_replicate(abcdefgh << shift, 32);
6116         break;
6117     }
6118     case 4: /* Replicate(Zeros(8):imm8, 4) */
6119     case 5: /* Replicate(imm8:Zeros(8), 4) */
6120     {
6121         int shift = (cmode_3_1 & 0x1) * 8;
6122         imm = bitfield_replicate(abcdefgh << shift, 16);
6123         break;
6124     }
6125     case 6:
6126         if (cmode_0) {
6127             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6128             imm = (abcdefgh << 16) | 0xffff;
6129         } else {
6130             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6131             imm = (abcdefgh << 8) | 0xff;
6132         }
6133         imm = bitfield_replicate(imm, 32);
6134         break;
6135     case 7:
6136         if (!cmode_0 && !is_neg) {
6137             imm = bitfield_replicate(abcdefgh, 8);
6138         } else if (!cmode_0 && is_neg) {
6139             int i;
6140             imm = 0;
6141             for (i = 0; i < 8; i++) {
6142                 if ((abcdefgh) & (1 << i)) {
6143                     imm |= 0xffULL << (i * 8);
6144                 }
6145             }
6146         } else if (cmode_0) {
6147             if (is_neg) {
6148                 imm = (abcdefgh & 0x3f) << 48;
6149                 if (abcdefgh & 0x80) {
6150                     imm |= 0x8000000000000000ULL;
6151                 }
6152                 if (abcdefgh & 0x40) {
6153                     imm |= 0x3fc0000000000000ULL;
6154                 } else {
6155                     imm |= 0x4000000000000000ULL;
6156                 }
6157             } else {
6158                 imm = (abcdefgh & 0x3f) << 19;
6159                 if (abcdefgh & 0x80) {
6160                     imm |= 0x80000000;
6161                 }
6162                 if (abcdefgh & 0x40) {
6163                     imm |= 0x3e000000;
6164                 } else {
6165                     imm |= 0x40000000;
6166                 }
6167                 imm |= (imm << 32);
6168             }
6169         }
6170         break;
6171     }
6172
6173     if (cmode_3_1 != 7 && is_neg) {
6174         imm = ~imm;
6175     }
6176
6177     tcg_imm = tcg_const_i64(imm);
6178     tcg_rd = new_tmp_a64(s);
6179
6180     for (i = 0; i < 2; i++) {
6181         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6182
6183         if (i == 1 && !is_q) {
6184             /* non-quad ops clear high half of vector */
6185             tcg_gen_movi_i64(tcg_rd, 0);
6186         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6187             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6188             if (is_neg) {
6189                 /* AND (BIC) */
6190                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6191             } else {
6192                 /* ORR */
6193                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6194             }
6195         } else {
6196             /* MOVI */
6197             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6198         }
6199         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6200     }
6201
6202     tcg_temp_free_i64(tcg_imm);
6203 }
6204
6205 /* C3.6.7 AdvSIMD scalar copy
6206  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6207  * +-----+----+-----------------+------+---+------+---+------+------+
6208  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6209  * +-----+----+-----------------+------+---+------+---+------+------+
6210  */
6211 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6212 {
6213     int rd = extract32(insn, 0, 5);
6214     int rn = extract32(insn, 5, 5);
6215     int imm4 = extract32(insn, 11, 4);
6216     int imm5 = extract32(insn, 16, 5);
6217     int op = extract32(insn, 29, 1);
6218
6219     if (op != 0 || imm4 != 0) {
6220         unallocated_encoding(s);
6221         return;
6222     }
6223
6224     /* DUP (element, scalar) */
6225     handle_simd_dupes(s, rd, rn, imm5);
6226 }
6227
6228 /* C3.6.8 AdvSIMD scalar pairwise
6229  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6230  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6231  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6232  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6233  */
6234 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6235 {
6236     int u = extract32(insn, 29, 1);
6237     int size = extract32(insn, 22, 2);
6238     int opcode = extract32(insn, 12, 5);
6239     int rn = extract32(insn, 5, 5);
6240     int rd = extract32(insn, 0, 5);
6241     TCGv_ptr fpst;
6242
6243     /* For some ops (the FP ones), size[1] is part of the encoding.
6244      * For ADDP strictly it is not but size[1] is always 1 for valid
6245      * encodings.
6246      */
6247     opcode |= (extract32(size, 1, 1) << 5);
6248
6249     switch (opcode) {
6250     case 0x3b: /* ADDP */
6251         if (u || size != 3) {
6252             unallocated_encoding(s);
6253             return;
6254         }
6255         if (!fp_access_check(s)) {
6256             return;
6257         }
6258
6259         TCGV_UNUSED_PTR(fpst);
6260         break;
6261     case 0xc: /* FMAXNMP */
6262     case 0xd: /* FADDP */
6263     case 0xf: /* FMAXP */
6264     case 0x2c: /* FMINNMP */
6265     case 0x2f: /* FMINP */
6266         /* FP op, size[0] is 32 or 64 bit */
6267         if (!u) {
6268             unallocated_encoding(s);
6269             return;
6270         }
6271         if (!fp_access_check(s)) {
6272             return;
6273         }
6274
6275         size = extract32(size, 0, 1) ? 3 : 2;
6276         fpst = get_fpstatus_ptr();
6277         break;
6278     default:
6279         unallocated_encoding(s);
6280         return;
6281     }
6282
6283     if (size == 3) {
6284         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6285         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6286         TCGv_i64 tcg_res = tcg_temp_new_i64();
6287
6288         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6289         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6290
6291         switch (opcode) {
6292         case 0x3b: /* ADDP */
6293             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6294             break;
6295         case 0xc: /* FMAXNMP */
6296             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6297             break;
6298         case 0xd: /* FADDP */
6299             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6300             break;
6301         case 0xf: /* FMAXP */
6302             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6303             break;
6304         case 0x2c: /* FMINNMP */
6305             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6306             break;
6307         case 0x2f: /* FMINP */
6308             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6309             break;
6310         default:
6311             g_assert_not_reached();
6312         }
6313
6314         write_fp_dreg(s, rd, tcg_res);
6315
6316         tcg_temp_free_i64(tcg_op1);
6317         tcg_temp_free_i64(tcg_op2);
6318         tcg_temp_free_i64(tcg_res);
6319     } else {
6320         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6321         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6322         TCGv_i32 tcg_res = tcg_temp_new_i32();
6323
6324         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6325         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6326
6327         switch (opcode) {
6328         case 0xc: /* FMAXNMP */
6329             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6330             break;
6331         case 0xd: /* FADDP */
6332             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6333             break;
6334         case 0xf: /* FMAXP */
6335             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6336             break;
6337         case 0x2c: /* FMINNMP */
6338             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6339             break;
6340         case 0x2f: /* FMINP */
6341             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6342             break;
6343         default:
6344             g_assert_not_reached();
6345         }
6346
6347         write_fp_sreg(s, rd, tcg_res);
6348
6349         tcg_temp_free_i32(tcg_op1);
6350         tcg_temp_free_i32(tcg_op2);
6351         tcg_temp_free_i32(tcg_res);
6352     }
6353
6354     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6355         tcg_temp_free_ptr(fpst);
6356     }
6357 }
6358
6359 /*
6360  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6361  *
6362  * This code is handles the common shifting code and is used by both
6363  * the vector and scalar code.
6364  */
6365 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6366                                     TCGv_i64 tcg_rnd, bool accumulate,
6367                                     bool is_u, int size, int shift)
6368 {
6369     bool extended_result = false;
6370     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6371     int ext_lshift = 0;
6372     TCGv_i64 tcg_src_hi;
6373
6374     if (round && size == 3) {
6375         extended_result = true;
6376         ext_lshift = 64 - shift;
6377         tcg_src_hi = tcg_temp_new_i64();
6378     } else if (shift == 64) {
6379         if (!accumulate && is_u) {
6380             /* result is zero */
6381             tcg_gen_movi_i64(tcg_res, 0);
6382             return;
6383         }
6384     }
6385
6386     /* Deal with the rounding step */
6387     if (round) {
6388         if (extended_result) {
6389             TCGv_i64 tcg_zero = tcg_const_i64(0);
6390             if (!is_u) {
6391                 /* take care of sign extending tcg_res */
6392                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6393                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6394                                  tcg_src, tcg_src_hi,
6395                                  tcg_rnd, tcg_zero);
6396             } else {
6397                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6398                                  tcg_src, tcg_zero,
6399                                  tcg_rnd, tcg_zero);
6400             }
6401             tcg_temp_free_i64(tcg_zero);
6402         } else {
6403             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6404         }
6405     }
6406
6407     /* Now do the shift right */
6408     if (round && extended_result) {
6409         /* extended case, >64 bit precision required */
6410         if (ext_lshift == 0) {
6411             /* special case, only high bits matter */
6412             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6413         } else {
6414             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6415             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6416             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6417         }
6418     } else {
6419         if (is_u) {
6420             if (shift == 64) {
6421                 /* essentially shifting in 64 zeros */
6422                 tcg_gen_movi_i64(tcg_src, 0);
6423             } else {
6424                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6425             }
6426         } else {
6427             if (shift == 64) {
6428                 /* effectively extending the sign-bit */
6429                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6430             } else {
6431                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6432             }
6433         }
6434     }
6435
6436     if (accumulate) {
6437         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6438     } else {
6439         tcg_gen_mov_i64(tcg_res, tcg_src);
6440     }
6441
6442     if (extended_result) {
6443         tcg_temp_free_i64(tcg_src_hi);
6444     }
6445 }
6446
6447 /* Common SHL/SLI - Shift left with an optional insert */
6448 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6449                                  bool insert, int shift)
6450 {
6451     if (insert) { /* SLI */
6452         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6453     } else { /* SHL */
6454         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6455     }
6456 }
6457
6458 /* SRI: shift right with insert */
6459 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6460                                  int size, int shift)
6461 {
6462     int esize = 8 << size;
6463
6464     /* shift count same as element size is valid but does nothing;
6465      * special case to avoid potential shift by 64.
6466      */
6467     if (shift != esize) {
6468         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6469         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6470     }
6471 }
6472
6473 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6474 static void handle_scalar_simd_shri(DisasContext *s,
6475                                     bool is_u, int immh, int immb,
6476                                     int opcode, int rn, int rd)
6477 {
6478     const int size = 3;
6479     int immhb = immh << 3 | immb;
6480     int shift = 2 * (8 << size) - immhb;
6481     bool accumulate = false;
6482     bool round = false;
6483     bool insert = false;
6484     TCGv_i64 tcg_rn;
6485     TCGv_i64 tcg_rd;
6486     TCGv_i64 tcg_round;
6487
6488     if (!extract32(immh, 3, 1)) {
6489         unallocated_encoding(s);
6490         return;
6491     }
6492
6493     if (!fp_access_check(s)) {
6494         return;
6495     }
6496
6497     switch (opcode) {
6498     case 0x02: /* SSRA / USRA (accumulate) */
6499         accumulate = true;
6500         break;
6501     case 0x04: /* SRSHR / URSHR (rounding) */
6502         round = true;
6503         break;
6504     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6505         accumulate = round = true;
6506         break;
6507     case 0x08: /* SRI */
6508         insert = true;
6509         break;
6510     }
6511
6512     if (round) {
6513         uint64_t round_const = 1ULL << (shift - 1);
6514         tcg_round = tcg_const_i64(round_const);
6515     } else {
6516         TCGV_UNUSED_I64(tcg_round);
6517     }
6518
6519     tcg_rn = read_fp_dreg(s, rn);
6520     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6521
6522     if (insert) {
6523         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6524     } else {
6525         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6526                                 accumulate, is_u, size, shift);
6527     }
6528
6529     write_fp_dreg(s, rd, tcg_rd);
6530
6531     tcg_temp_free_i64(tcg_rn);
6532     tcg_temp_free_i64(tcg_rd);
6533     if (round) {
6534         tcg_temp_free_i64(tcg_round);
6535     }
6536 }
6537
6538 /* SHL/SLI - Scalar shift left */
6539 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6540                                     int immh, int immb, int opcode,
6541                                     int rn, int rd)
6542 {
6543     int size = 32 - clz32(immh) - 1;
6544     int immhb = immh << 3 | immb;
6545     int shift = immhb - (8 << size);
6546     TCGv_i64 tcg_rn = new_tmp_a64(s);
6547     TCGv_i64 tcg_rd = new_tmp_a64(s);
6548
6549     if (!extract32(immh, 3, 1)) {
6550         unallocated_encoding(s);
6551         return;
6552     }
6553
6554     if (!fp_access_check(s)) {
6555         return;
6556     }
6557
6558     tcg_rn = read_fp_dreg(s, rn);
6559     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6560
6561     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6562
6563     write_fp_dreg(s, rd, tcg_rd);
6564
6565     tcg_temp_free_i64(tcg_rn);
6566     tcg_temp_free_i64(tcg_rd);
6567 }
6568
6569 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6570  * (signed/unsigned) narrowing */
6571 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6572                                    bool is_u_shift, bool is_u_narrow,
6573                                    int immh, int immb, int opcode,
6574                                    int rn, int rd)
6575 {
6576     int immhb = immh << 3 | immb;
6577     int size = 32 - clz32(immh) - 1;
6578     int esize = 8 << size;
6579     int shift = (2 * esize) - immhb;
6580     int elements = is_scalar ? 1 : (64 / esize);
6581     bool round = extract32(opcode, 0, 1);
6582     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6583     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6584     TCGv_i32 tcg_rd_narrowed;
6585     TCGv_i64 tcg_final;
6586
6587     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6588         { gen_helper_neon_narrow_sat_s8,
6589           gen_helper_neon_unarrow_sat8 },
6590         { gen_helper_neon_narrow_sat_s16,
6591           gen_helper_neon_unarrow_sat16 },
6592         { gen_helper_neon_narrow_sat_s32,
6593           gen_helper_neon_unarrow_sat32 },
6594         { NULL, NULL },
6595     };
6596     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6597         gen_helper_neon_narrow_sat_u8,
6598         gen_helper_neon_narrow_sat_u16,
6599         gen_helper_neon_narrow_sat_u32,
6600         NULL
6601     };
6602     NeonGenNarrowEnvFn *narrowfn;
6603
6604     int i;
6605
6606     assert(size < 4);
6607
6608     if (extract32(immh, 3, 1)) {
6609         unallocated_encoding(s);
6610         return;
6611     }
6612
6613     if (!fp_access_check(s)) {
6614         return;
6615     }
6616
6617     if (is_u_shift) {
6618         narrowfn = unsigned_narrow_fns[size];
6619     } else {
6620         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6621     }
6622
6623     tcg_rn = tcg_temp_new_i64();
6624     tcg_rd = tcg_temp_new_i64();
6625     tcg_rd_narrowed = tcg_temp_new_i32();
6626     tcg_final = tcg_const_i64(0);
6627
6628     if (round) {
6629         uint64_t round_const = 1ULL << (shift - 1);
6630         tcg_round = tcg_const_i64(round_const);
6631     } else {
6632         TCGV_UNUSED_I64(tcg_round);
6633     }
6634
6635     for (i = 0; i < elements; i++) {
6636         read_vec_element(s, tcg_rn, rn, i, ldop);
6637         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6638                                 false, is_u_shift, size+1, shift);
6639         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6640         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6641         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6642     }
6643
6644     if (!is_q) {
6645         clear_vec_high(s, rd);
6646         write_vec_element(s, tcg_final, rd, 0, MO_64);
6647     } else {
6648         write_vec_element(s, tcg_final, rd, 1, MO_64);
6649     }
6650
6651     if (round) {
6652         tcg_temp_free_i64(tcg_round);
6653     }
6654     tcg_temp_free_i64(tcg_rn);
6655     tcg_temp_free_i64(tcg_rd);
6656     tcg_temp_free_i32(tcg_rd_narrowed);
6657     tcg_temp_free_i64(tcg_final);
6658     return;
6659 }
6660
6661 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6662 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6663                              bool src_unsigned, bool dst_unsigned,
6664                              int immh, int immb, int rn, int rd)
6665 {
6666     int immhb = immh << 3 | immb;
6667     int size = 32 - clz32(immh) - 1;
6668     int shift = immhb - (8 << size);
6669     int pass;
6670
6671     assert(immh != 0);
6672     assert(!(scalar && is_q));
6673
6674     if (!scalar) {
6675         if (!is_q && extract32(immh, 3, 1)) {
6676             unallocated_encoding(s);
6677             return;
6678         }
6679
6680         /* Since we use the variable-shift helpers we must
6681          * replicate the shift count into each element of
6682          * the tcg_shift value.
6683          */
6684         switch (size) {
6685         case 0:
6686             shift |= shift << 8;
6687             /* fall through */
6688         case 1:
6689             shift |= shift << 16;
6690             break;
6691         case 2:
6692         case 3:
6693             break;
6694         default:
6695             g_assert_not_reached();
6696         }
6697     }
6698
6699     if (!fp_access_check(s)) {
6700         return;
6701     }
6702
6703     if (size == 3) {
6704         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6705         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6706             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6707             { NULL, gen_helper_neon_qshl_u64 },
6708         };
6709         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6710         int maxpass = is_q ? 2 : 1;
6711
6712         for (pass = 0; pass < maxpass; pass++) {
6713             TCGv_i64 tcg_op = tcg_temp_new_i64();
6714
6715             read_vec_element(s, tcg_op, rn, pass, MO_64);
6716             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6717             write_vec_element(s, tcg_op, rd, pass, MO_64);
6718
6719             tcg_temp_free_i64(tcg_op);
6720         }
6721         tcg_temp_free_i64(tcg_shift);
6722
6723         if (!is_q) {
6724             clear_vec_high(s, rd);
6725         }
6726     } else {
6727         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6728         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6729             {
6730                 { gen_helper_neon_qshl_s8,
6731                   gen_helper_neon_qshl_s16,
6732                   gen_helper_neon_qshl_s32 },
6733                 { gen_helper_neon_qshlu_s8,
6734                   gen_helper_neon_qshlu_s16,
6735                   gen_helper_neon_qshlu_s32 }
6736             }, {
6737                 { NULL, NULL, NULL },
6738                 { gen_helper_neon_qshl_u8,
6739                   gen_helper_neon_qshl_u16,
6740                   gen_helper_neon_qshl_u32 }
6741             }
6742         };
6743         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6744         TCGMemOp memop = scalar ? size : MO_32;
6745         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6746
6747         for (pass = 0; pass < maxpass; pass++) {
6748             TCGv_i32 tcg_op = tcg_temp_new_i32();
6749
6750             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6751             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6752             if (scalar) {
6753                 switch (size) {
6754                 case 0:
6755                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6756                     break;
6757                 case 1:
6758                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6759                     break;
6760                 case 2:
6761                     break;
6762                 default:
6763                     g_assert_not_reached();
6764                 }
6765                 write_fp_sreg(s, rd, tcg_op);
6766             } else {
6767                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6768             }
6769
6770             tcg_temp_free_i32(tcg_op);
6771         }
6772         tcg_temp_free_i32(tcg_shift);
6773
6774         if (!is_q && !scalar) {
6775             clear_vec_high(s, rd);
6776         }
6777     }
6778 }
6779
6780 /* Common vector code for handling integer to FP conversion */
6781 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6782                                    int elements, int is_signed,
6783                                    int fracbits, int size)
6784 {
6785     bool is_double = size == 3 ? true : false;
6786     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6787     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6788     TCGv_i64 tcg_int = tcg_temp_new_i64();
6789     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6790     int pass;
6791
6792     for (pass = 0; pass < elements; pass++) {
6793         read_vec_element(s, tcg_int, rn, pass, mop);
6794
6795         if (is_double) {
6796             TCGv_i64 tcg_double = tcg_temp_new_i64();
6797             if (is_signed) {
6798                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6799                                      tcg_shift, tcg_fpst);
6800             } else {
6801                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6802                                      tcg_shift, tcg_fpst);
6803             }
6804             if (elements == 1) {
6805                 write_fp_dreg(s, rd, tcg_double);
6806             } else {
6807                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6808             }
6809             tcg_temp_free_i64(tcg_double);
6810         } else {
6811             TCGv_i32 tcg_single = tcg_temp_new_i32();
6812             if (is_signed) {
6813                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6814                                      tcg_shift, tcg_fpst);
6815             } else {
6816                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6817                                      tcg_shift, tcg_fpst);
6818             }
6819             if (elements == 1) {
6820                 write_fp_sreg(s, rd, tcg_single);
6821             } else {
6822                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6823             }
6824             tcg_temp_free_i32(tcg_single);
6825         }
6826     }
6827
6828     if (!is_double && elements == 2) {
6829         clear_vec_high(s, rd);
6830     }
6831
6832     tcg_temp_free_i64(tcg_int);
6833     tcg_temp_free_ptr(tcg_fpst);
6834     tcg_temp_free_i32(tcg_shift);
6835 }
6836
6837 /* UCVTF/SCVTF - Integer to FP conversion */
6838 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6839                                          bool is_q, bool is_u,
6840                                          int immh, int immb, int opcode,
6841                                          int rn, int rd)
6842 {
6843     bool is_double = extract32(immh, 3, 1);
6844     int size = is_double ? MO_64 : MO_32;
6845     int elements;
6846     int immhb = immh << 3 | immb;
6847     int fracbits = (is_double ? 128 : 64) - immhb;
6848
6849     if (!extract32(immh, 2, 2)) {
6850         unallocated_encoding(s);
6851         return;
6852     }
6853
6854     if (is_scalar) {
6855         elements = 1;
6856     } else {
6857         elements = is_double ? 2 : is_q ? 4 : 2;
6858         if (is_double && !is_q) {
6859             unallocated_encoding(s);
6860             return;
6861         }
6862     }
6863
6864     if (!fp_access_check(s)) {
6865         return;
6866     }
6867
6868     /* immh == 0 would be a failure of the decode logic */
6869     g_assert(immh);
6870
6871     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6872 }
6873
6874 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6875 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6876                                          bool is_q, bool is_u,
6877                                          int immh, int immb, int rn, int rd)
6878 {
6879     bool is_double = extract32(immh, 3, 1);
6880     int immhb = immh << 3 | immb;
6881     int fracbits = (is_double ? 128 : 64) - immhb;
6882     int pass;
6883     TCGv_ptr tcg_fpstatus;
6884     TCGv_i32 tcg_rmode, tcg_shift;
6885
6886     if (!extract32(immh, 2, 2)) {
6887         unallocated_encoding(s);
6888         return;
6889     }
6890
6891     if (!is_scalar && !is_q && is_double) {
6892         unallocated_encoding(s);
6893         return;
6894     }
6895
6896     if (!fp_access_check(s)) {
6897         return;
6898     }
6899
6900     assert(!(is_scalar && is_q));
6901
6902     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6903     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6904     tcg_fpstatus = get_fpstatus_ptr();
6905     tcg_shift = tcg_const_i32(fracbits);
6906
6907     if (is_double) {
6908         int maxpass = is_scalar ? 1 : 2;
6909
6910         for (pass = 0; pass < maxpass; pass++) {
6911             TCGv_i64 tcg_op = tcg_temp_new_i64();
6912
6913             read_vec_element(s, tcg_op, rn, pass, MO_64);
6914             if (is_u) {
6915                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6916             } else {
6917                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6918             }
6919             write_vec_element(s, tcg_op, rd, pass, MO_64);
6920             tcg_temp_free_i64(tcg_op);
6921         }
6922         if (!is_q) {
6923             clear_vec_high(s, rd);
6924         }
6925     } else {
6926         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6927         for (pass = 0; pass < maxpass; pass++) {
6928             TCGv_i32 tcg_op = tcg_temp_new_i32();
6929
6930             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6931             if (is_u) {
6932                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6933             } else {
6934                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6935             }
6936             if (is_scalar) {
6937                 write_fp_sreg(s, rd, tcg_op);
6938             } else {
6939                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6940             }
6941             tcg_temp_free_i32(tcg_op);
6942         }
6943         if (!is_q && !is_scalar) {
6944             clear_vec_high(s, rd);
6945         }
6946     }
6947
6948     tcg_temp_free_ptr(tcg_fpstatus);
6949     tcg_temp_free_i32(tcg_shift);
6950     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6951     tcg_temp_free_i32(tcg_rmode);
6952 }
6953
6954 /* C3.6.9 AdvSIMD scalar shift by immediate
6955  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6956  * +-----+---+-------------+------+------+--------+---+------+------+
6957  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6958  * +-----+---+-------------+------+------+--------+---+------+------+
6959  *
6960  * This is the scalar version so it works on a fixed sized registers
6961  */
6962 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6963 {
6964     int rd = extract32(insn, 0, 5);
6965     int rn = extract32(insn, 5, 5);
6966     int opcode = extract32(insn, 11, 5);
6967     int immb = extract32(insn, 16, 3);
6968     int immh = extract32(insn, 19, 4);
6969     bool is_u = extract32(insn, 29, 1);
6970
6971     if (immh == 0) {
6972         unallocated_encoding(s);
6973         return;
6974     }
6975
6976     switch (opcode) {
6977     case 0x08: /* SRI */
6978         if (!is_u) {
6979             unallocated_encoding(s);
6980             return;
6981         }
6982         /* fall through */
6983     case 0x00: /* SSHR / USHR */
6984     case 0x02: /* SSRA / USRA */
6985     case 0x04: /* SRSHR / URSHR */
6986     case 0x06: /* SRSRA / URSRA */
6987         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6988         break;
6989     case 0x0a: /* SHL / SLI */
6990         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6991         break;
6992     case 0x1c: /* SCVTF, UCVTF */
6993         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6994                                      opcode, rn, rd);
6995         break;
6996     case 0x10: /* SQSHRUN, SQSHRUN2 */
6997     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6998         if (!is_u) {
6999             unallocated_encoding(s);
7000             return;
7001         }
7002         handle_vec_simd_sqshrn(s, true, false, false, true,
7003                                immh, immb, opcode, rn, rd);
7004         break;
7005     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7006     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7007         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7008                                immh, immb, opcode, rn, rd);
7009         break;
7010     case 0xc: /* SQSHLU */
7011         if (!is_u) {
7012             unallocated_encoding(s);
7013             return;
7014         }
7015         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7016         break;
7017     case 0xe: /* SQSHL, UQSHL */
7018         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7019         break;
7020     case 0x1f: /* FCVTZS, FCVTZU */
7021         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7022         break;
7023     default:
7024         unallocated_encoding(s);
7025         break;
7026     }
7027 }
7028
7029 /* C3.6.10 AdvSIMD scalar three different
7030  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7031  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7032  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7033  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7034  */
7035 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7036 {
7037     bool is_u = extract32(insn, 29, 1);
7038     int size = extract32(insn, 22, 2);
7039     int opcode = extract32(insn, 12, 4);
7040     int rm = extract32(insn, 16, 5);
7041     int rn = extract32(insn, 5, 5);
7042     int rd = extract32(insn, 0, 5);
7043
7044     if (is_u) {
7045         unallocated_encoding(s);
7046         return;
7047     }
7048
7049     switch (opcode) {
7050     case 0x9: /* SQDMLAL, SQDMLAL2 */
7051     case 0xb: /* SQDMLSL, SQDMLSL2 */
7052     case 0xd: /* SQDMULL, SQDMULL2 */
7053         if (size == 0 || size == 3) {
7054             unallocated_encoding(s);
7055             return;
7056         }
7057         break;
7058     default:
7059         unallocated_encoding(s);
7060         return;
7061     }
7062
7063     if (!fp_access_check(s)) {
7064         return;
7065     }
7066
7067     if (size == 2) {
7068         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7069         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7070         TCGv_i64 tcg_res = tcg_temp_new_i64();
7071
7072         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7073         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7074
7075         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7076         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7077
7078         switch (opcode) {
7079         case 0xd: /* SQDMULL, SQDMULL2 */
7080             break;
7081         case 0xb: /* SQDMLSL, SQDMLSL2 */
7082             tcg_gen_neg_i64(tcg_res, tcg_res);
7083             /* fall through */
7084         case 0x9: /* SQDMLAL, SQDMLAL2 */
7085             read_vec_element(s, tcg_op1, rd, 0, MO_64);
7086             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7087                                               tcg_res, tcg_op1);
7088             break;
7089         default:
7090             g_assert_not_reached();
7091         }
7092
7093         write_fp_dreg(s, rd, tcg_res);
7094
7095         tcg_temp_free_i64(tcg_op1);
7096         tcg_temp_free_i64(tcg_op2);
7097         tcg_temp_free_i64(tcg_res);
7098     } else {
7099         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7100         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7101         TCGv_i64 tcg_res = tcg_temp_new_i64();
7102
7103         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7104         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7105
7106         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7107         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7108
7109         switch (opcode) {
7110         case 0xd: /* SQDMULL, SQDMULL2 */
7111             break;
7112         case 0xb: /* SQDMLSL, SQDMLSL2 */
7113             gen_helper_neon_negl_u32(tcg_res, tcg_res);
7114             /* fall through */
7115         case 0x9: /* SQDMLAL, SQDMLAL2 */
7116         {
7117             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7118             read_vec_element(s, tcg_op3, rd, 0, MO_32);
7119             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7120                                               tcg_res, tcg_op3);
7121             tcg_temp_free_i64(tcg_op3);
7122             break;
7123         }
7124         default:
7125             g_assert_not_reached();
7126         }
7127
7128         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7129         write_fp_dreg(s, rd, tcg_res);
7130
7131         tcg_temp_free_i32(tcg_op1);
7132         tcg_temp_free_i32(tcg_op2);
7133         tcg_temp_free_i64(tcg_res);
7134     }
7135 }
7136
7137 static void handle_3same_64(DisasContext *s, int opcode, bool u,
7138                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7139 {
7140     /* Handle 64x64->64 opcodes which are shared between the scalar
7141      * and vector 3-same groups. We cover every opcode where size == 3
7142      * is valid in either the three-reg-same (integer, not pairwise)
7143      * or scalar-three-reg-same groups. (Some opcodes are not yet
7144      * implemented.)
7145      */
7146     TCGCond cond;
7147
7148     switch (opcode) {
7149     case 0x1: /* SQADD */
7150         if (u) {
7151             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7152         } else {
7153             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7154         }
7155         break;
7156     case 0x5: /* SQSUB */
7157         if (u) {
7158             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7159         } else {
7160             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7161         }
7162         break;
7163     case 0x6: /* CMGT, CMHI */
7164         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7165          * We implement this using setcond (test) and then negating.
7166          */
7167         cond = u ? TCG_COND_GTU : TCG_COND_GT;
7168     do_cmop:
7169         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7170         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7171         break;
7172     case 0x7: /* CMGE, CMHS */
7173         cond = u ? TCG_COND_GEU : TCG_COND_GE;
7174         goto do_cmop;
7175     case 0x11: /* CMTST, CMEQ */
7176         if (u) {
7177             cond = TCG_COND_EQ;
7178             goto do_cmop;
7179         }
7180         /* CMTST : test is "if (X & Y != 0)". */
7181         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7182         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7183         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7184         break;
7185     case 0x8: /* SSHL, USHL */
7186         if (u) {
7187             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7188         } else {
7189             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7190         }
7191         break;
7192     case 0x9: /* SQSHL, UQSHL */
7193         if (u) {
7194             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7195         } else {
7196             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7197         }
7198         break;
7199     case 0xa: /* SRSHL, URSHL */
7200         if (u) {
7201             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7202         } else {
7203             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7204         }
7205         break;
7206     case 0xb: /* SQRSHL, UQRSHL */
7207         if (u) {
7208             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7209         } else {
7210             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7211         }
7212         break;
7213     case 0x10: /* ADD, SUB */
7214         if (u) {
7215             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7216         } else {
7217             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7218         }
7219         break;
7220     default:
7221         g_assert_not_reached();
7222     }
7223 }
7224
7225 /* Handle the 3-same-operands float operations; shared by the scalar
7226  * and vector encodings. The caller must filter out any encodings
7227  * not allocated for the encoding it is dealing with.
7228  */
7229 static void handle_3same_float(DisasContext *s, int size, int elements,
7230                                int fpopcode, int rd, int rn, int rm)
7231 {
7232     int pass;
7233     TCGv_ptr fpst = get_fpstatus_ptr();
7234
7235     for (pass = 0; pass < elements; pass++) {
7236         if (size) {
7237             /* Double */
7238             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7239             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7240             TCGv_i64 tcg_res = tcg_temp_new_i64();
7241
7242             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7243             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7244
7245             switch (fpopcode) {
7246             case 0x39: /* FMLS */
7247                 /* As usual for ARM, separate negation for fused multiply-add */
7248                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7249                 /* fall through */
7250             case 0x19: /* FMLA */
7251                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7252                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7253                                        tcg_res, fpst);
7254                 break;
7255             case 0x18: /* FMAXNM */
7256                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7257                 break;
7258             case 0x1a: /* FADD */
7259                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7260                 break;
7261             case 0x1b: /* FMULX */
7262                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7263                 break;
7264             case 0x1c: /* FCMEQ */
7265                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7266                 break;
7267             case 0x1e: /* FMAX */
7268                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7269                 break;
7270             case 0x1f: /* FRECPS */
7271                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7272                 break;
7273             case 0x38: /* FMINNM */
7274                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7275                 break;
7276             case 0x3a: /* FSUB */
7277                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7278                 break;
7279             case 0x3e: /* FMIN */
7280                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7281                 break;
7282             case 0x3f: /* FRSQRTS */
7283                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7284                 break;
7285             case 0x5b: /* FMUL */
7286                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7287                 break;
7288             case 0x5c: /* FCMGE */
7289                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7290                 break;
7291             case 0x5d: /* FACGE */
7292                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7293                 break;
7294             case 0x5f: /* FDIV */
7295                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7296                 break;
7297             case 0x7a: /* FABD */
7298                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7299                 gen_helper_vfp_absd(tcg_res, tcg_res);
7300                 break;
7301             case 0x7c: /* FCMGT */
7302                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7303                 break;
7304             case 0x7d: /* FACGT */
7305                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7306                 break;
7307             default:
7308                 g_assert_not_reached();
7309             }
7310
7311             write_vec_element(s, tcg_res, rd, pass, MO_64);
7312
7313             tcg_temp_free_i64(tcg_res);
7314             tcg_temp_free_i64(tcg_op1);
7315             tcg_temp_free_i64(tcg_op2);
7316         } else {
7317             /* Single */
7318             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7319             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7320             TCGv_i32 tcg_res = tcg_temp_new_i32();
7321
7322             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7323             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7324
7325             switch (fpopcode) {
7326             case 0x39: /* FMLS */
7327                 /* As usual for ARM, separate negation for fused multiply-add */
7328                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7329                 /* fall through */
7330             case 0x19: /* FMLA */
7331                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7332                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7333                                        tcg_res, fpst);
7334                 break;
7335             case 0x1a: /* FADD */
7336                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7337                 break;
7338             case 0x1b: /* FMULX */
7339                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7340                 break;
7341             case 0x1c: /* FCMEQ */
7342                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7343                 break;
7344             case 0x1e: /* FMAX */
7345                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7346                 break;
7347             case 0x1f: /* FRECPS */
7348                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7349                 break;
7350             case 0x18: /* FMAXNM */
7351                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7352                 break;
7353             case 0x38: /* FMINNM */
7354                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7355                 break;
7356             case 0x3a: /* FSUB */
7357                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7358                 break;
7359             case 0x3e: /* FMIN */
7360                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7361                 break;
7362             case 0x3f: /* FRSQRTS */
7363                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7364                 break;
7365             case 0x5b: /* FMUL */
7366                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7367                 break;
7368             case 0x5c: /* FCMGE */
7369                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7370                 break;
7371             case 0x5d: /* FACGE */
7372                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7373                 break;
7374             case 0x5f: /* FDIV */
7375                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7376                 break;
7377             case 0x7a: /* FABD */
7378                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7379                 gen_helper_vfp_abss(tcg_res, tcg_res);
7380                 break;
7381             case 0x7c: /* FCMGT */
7382                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7383                 break;
7384             case 0x7d: /* FACGT */
7385                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7386                 break;
7387             default:
7388                 g_assert_not_reached();
7389             }
7390
7391             if (elements == 1) {
7392                 /* scalar single so clear high part */
7393                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7394
7395                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7396                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7397                 tcg_temp_free_i64(tcg_tmp);
7398             } else {
7399                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7400             }
7401
7402             tcg_temp_free_i32(tcg_res);
7403             tcg_temp_free_i32(tcg_op1);
7404             tcg_temp_free_i32(tcg_op2);
7405         }
7406     }
7407
7408     tcg_temp_free_ptr(fpst);
7409
7410     if ((elements << size) < 4) {
7411         /* scalar, or non-quad vector op */
7412         clear_vec_high(s, rd);
7413     }
7414 }
7415
7416 /* C3.6.11 AdvSIMD scalar three same
7417  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7418  * +-----+---+-----------+------+---+------+--------+---+------+------+
7419  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7420  * +-----+---+-----------+------+---+------+--------+---+------+------+
7421  */
7422 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7423 {
7424     int rd = extract32(insn, 0, 5);
7425     int rn = extract32(insn, 5, 5);
7426     int opcode = extract32(insn, 11, 5);
7427     int rm = extract32(insn, 16, 5);
7428     int size = extract32(insn, 22, 2);
7429     bool u = extract32(insn, 29, 1);
7430     TCGv_i64 tcg_rd;
7431
7432     if (opcode >= 0x18) {
7433         /* Floating point: U, size[1] and opcode indicate operation */
7434         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7435         switch (fpopcode) {
7436         case 0x1b: /* FMULX */
7437         case 0x1f: /* FRECPS */
7438         case 0x3f: /* FRSQRTS */
7439         case 0x5d: /* FACGE */
7440         case 0x7d: /* FACGT */
7441         case 0x1c: /* FCMEQ */
7442         case 0x5c: /* FCMGE */
7443         case 0x7c: /* FCMGT */
7444         case 0x7a: /* FABD */
7445             break;
7446         default:
7447             unallocated_encoding(s);
7448             return;
7449         }
7450
7451         if (!fp_access_check(s)) {
7452             return;
7453         }
7454
7455         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7456         return;
7457     }
7458
7459     switch (opcode) {
7460     case 0x1: /* SQADD, UQADD */
7461     case 0x5: /* SQSUB, UQSUB */
7462     case 0x9: /* SQSHL, UQSHL */
7463     case 0xb: /* SQRSHL, UQRSHL */
7464         break;
7465     case 0x8: /* SSHL, USHL */
7466     case 0xa: /* SRSHL, URSHL */
7467     case 0x6: /* CMGT, CMHI */
7468     case 0x7: /* CMGE, CMHS */
7469     case 0x11: /* CMTST, CMEQ */
7470     case 0x10: /* ADD, SUB (vector) */
7471         if (size != 3) {
7472             unallocated_encoding(s);
7473             return;
7474         }
7475         break;
7476     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7477         if (size != 1 && size != 2) {
7478             unallocated_encoding(s);
7479             return;
7480         }
7481         break;
7482     default:
7483         unallocated_encoding(s);
7484         return;
7485     }
7486
7487     if (!fp_access_check(s)) {
7488         return;
7489     }
7490
7491     tcg_rd = tcg_temp_new_i64();
7492
7493     if (size == 3) {
7494         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7495         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7496
7497         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7498         tcg_temp_free_i64(tcg_rn);
7499         tcg_temp_free_i64(tcg_rm);
7500     } else {
7501         /* Do a single operation on the lowest element in the vector.
7502          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7503          * no side effects for all these operations.
7504          * OPTME: special-purpose helpers would avoid doing some
7505          * unnecessary work in the helper for the 8 and 16 bit cases.
7506          */
7507         NeonGenTwoOpEnvFn *genenvfn;
7508         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7509         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7510         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7511
7512         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7513         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7514
7515         switch (opcode) {
7516         case 0x1: /* SQADD, UQADD */
7517         {
7518             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7519                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7520                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7521                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7522             };
7523             genenvfn = fns[size][u];
7524             break;
7525         }
7526         case 0x5: /* SQSUB, UQSUB */
7527         {
7528             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7529                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7530                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7531                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7532             };
7533             genenvfn = fns[size][u];
7534             break;
7535         }
7536         case 0x9: /* SQSHL, UQSHL */
7537         {
7538             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7539                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7540                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7541                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7542             };
7543             genenvfn = fns[size][u];
7544             break;
7545         }
7546         case 0xb: /* SQRSHL, UQRSHL */
7547         {
7548             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7549                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7550                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7551                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7552             };
7553             genenvfn = fns[size][u];
7554             break;
7555         }
7556         case 0x16: /* SQDMULH, SQRDMULH */
7557         {
7558             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7559                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7560                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7561             };
7562             assert(size == 1 || size == 2);
7563             genenvfn = fns[size - 1][u];
7564             break;
7565         }
7566         default:
7567             g_assert_not_reached();
7568         }
7569
7570         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7571         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7572         tcg_temp_free_i32(tcg_rd32);
7573         tcg_temp_free_i32(tcg_rn);
7574         tcg_temp_free_i32(tcg_rm);
7575     }
7576
7577     write_fp_dreg(s, rd, tcg_rd);
7578
7579     tcg_temp_free_i64(tcg_rd);
7580 }
7581
7582 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7583                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7584                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7585 {
7586     /* Handle 64->64 opcodes which are shared between the scalar and
7587      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7588      * is valid in either group and also the double-precision fp ops.
7589      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7590      * requires them.
7591      */
7592     TCGCond cond;
7593
7594     switch (opcode) {
7595     case 0x4: /* CLS, CLZ */
7596         if (u) {
7597             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7598         } else {
7599             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
7600         }
7601         break;
7602     case 0x5: /* NOT */
7603         /* This opcode is shared with CNT and RBIT but we have earlier
7604          * enforced that size == 3 if and only if this is the NOT insn.
7605          */
7606         tcg_gen_not_i64(tcg_rd, tcg_rn);
7607         break;
7608     case 0x7: /* SQABS, SQNEG */
7609         if (u) {
7610             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7611         } else {
7612             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7613         }
7614         break;
7615     case 0xa: /* CMLT */
7616         /* 64 bit integer comparison against zero, result is
7617          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7618          * subtracting 1.
7619          */
7620         cond = TCG_COND_LT;
7621     do_cmop:
7622         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7623         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7624         break;
7625     case 0x8: /* CMGT, CMGE */
7626         cond = u ? TCG_COND_GE : TCG_COND_GT;
7627         goto do_cmop;
7628     case 0x9: /* CMEQ, CMLE */
7629         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7630         goto do_cmop;
7631     case 0xb: /* ABS, NEG */
7632         if (u) {
7633             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7634         } else {
7635             TCGv_i64 tcg_zero = tcg_const_i64(0);
7636             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7637             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7638                                 tcg_rn, tcg_rd);
7639             tcg_temp_free_i64(tcg_zero);
7640         }
7641         break;
7642     case 0x2f: /* FABS */
7643         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7644         break;
7645     case 0x6f: /* FNEG */
7646         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7647         break;
7648     case 0x7f: /* FSQRT */
7649         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7650         break;
7651     case 0x1a: /* FCVTNS */
7652     case 0x1b: /* FCVTMS */
7653     case 0x1c: /* FCVTAS */
7654     case 0x3a: /* FCVTPS */
7655     case 0x3b: /* FCVTZS */
7656     {
7657         TCGv_i32 tcg_shift = tcg_const_i32(0);
7658         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7659         tcg_temp_free_i32(tcg_shift);
7660         break;
7661     }
7662     case 0x5a: /* FCVTNU */
7663     case 0x5b: /* FCVTMU */
7664     case 0x5c: /* FCVTAU */
7665     case 0x7a: /* FCVTPU */
7666     case 0x7b: /* FCVTZU */
7667     {
7668         TCGv_i32 tcg_shift = tcg_const_i32(0);
7669         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7670         tcg_temp_free_i32(tcg_shift);
7671         break;
7672     }
7673     case 0x18: /* FRINTN */
7674     case 0x19: /* FRINTM */
7675     case 0x38: /* FRINTP */
7676     case 0x39: /* FRINTZ */
7677     case 0x58: /* FRINTA */
7678     case 0x79: /* FRINTI */
7679         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7680         break;
7681     case 0x59: /* FRINTX */
7682         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7683         break;
7684     default:
7685         g_assert_not_reached();
7686     }
7687 }
7688
7689 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7690                                    bool is_scalar, bool is_u, bool is_q,
7691                                    int size, int rn, int rd)
7692 {
7693     bool is_double = (size == 3);
7694     TCGv_ptr fpst;
7695
7696     if (!fp_access_check(s)) {
7697         return;
7698     }
7699
7700     fpst = get_fpstatus_ptr();
7701
7702     if (is_double) {
7703         TCGv_i64 tcg_op = tcg_temp_new_i64();
7704         TCGv_i64 tcg_zero = tcg_const_i64(0);
7705         TCGv_i64 tcg_res = tcg_temp_new_i64();
7706         NeonGenTwoDoubleOPFn *genfn;
7707         bool swap = false;
7708         int pass;
7709
7710         switch (opcode) {
7711         case 0x2e: /* FCMLT (zero) */
7712             swap = true;
7713             /* fallthrough */
7714         case 0x2c: /* FCMGT (zero) */
7715             genfn = gen_helper_neon_cgt_f64;
7716             break;
7717         case 0x2d: /* FCMEQ (zero) */
7718             genfn = gen_helper_neon_ceq_f64;
7719             break;
7720         case 0x6d: /* FCMLE (zero) */
7721             swap = true;
7722             /* fall through */
7723         case 0x6c: /* FCMGE (zero) */
7724             genfn = gen_helper_neon_cge_f64;
7725             break;
7726         default:
7727             g_assert_not_reached();
7728         }
7729
7730         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7731             read_vec_element(s, tcg_op, rn, pass, MO_64);
7732             if (swap) {
7733                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7734             } else {
7735                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7736             }
7737             write_vec_element(s, tcg_res, rd, pass, MO_64);
7738         }
7739         if (is_scalar) {
7740             clear_vec_high(s, rd);
7741         }
7742
7743         tcg_temp_free_i64(tcg_res);
7744         tcg_temp_free_i64(tcg_zero);
7745         tcg_temp_free_i64(tcg_op);
7746     } else {
7747         TCGv_i32 tcg_op = tcg_temp_new_i32();
7748         TCGv_i32 tcg_zero = tcg_const_i32(0);
7749         TCGv_i32 tcg_res = tcg_temp_new_i32();
7750         NeonGenTwoSingleOPFn *genfn;
7751         bool swap = false;
7752         int pass, maxpasses;
7753
7754         switch (opcode) {
7755         case 0x2e: /* FCMLT (zero) */
7756             swap = true;
7757             /* fall through */
7758         case 0x2c: /* FCMGT (zero) */
7759             genfn = gen_helper_neon_cgt_f32;
7760             break;
7761         case 0x2d: /* FCMEQ (zero) */
7762             genfn = gen_helper_neon_ceq_f32;
7763             break;
7764         case 0x6d: /* FCMLE (zero) */
7765             swap = true;
7766             /* fall through */
7767         case 0x6c: /* FCMGE (zero) */
7768             genfn = gen_helper_neon_cge_f32;
7769             break;
7770         default:
7771             g_assert_not_reached();
7772         }
7773
7774         if (is_scalar) {
7775             maxpasses = 1;
7776         } else {
7777             maxpasses = is_q ? 4 : 2;
7778         }
7779
7780         for (pass = 0; pass < maxpasses; pass++) {
7781             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7782             if (swap) {
7783                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7784             } else {
7785                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7786             }
7787             if (is_scalar) {
7788                 write_fp_sreg(s, rd, tcg_res);
7789             } else {
7790                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7791             }
7792         }
7793         tcg_temp_free_i32(tcg_res);
7794         tcg_temp_free_i32(tcg_zero);
7795         tcg_temp_free_i32(tcg_op);
7796         if (!is_q && !is_scalar) {
7797             clear_vec_high(s, rd);
7798         }
7799     }
7800
7801     tcg_temp_free_ptr(fpst);
7802 }
7803
7804 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7805                                     bool is_scalar, bool is_u, bool is_q,
7806                                     int size, int rn, int rd)
7807 {
7808     bool is_double = (size == 3);
7809     TCGv_ptr fpst = get_fpstatus_ptr();
7810
7811     if (is_double) {
7812         TCGv_i64 tcg_op = tcg_temp_new_i64();
7813         TCGv_i64 tcg_res = tcg_temp_new_i64();
7814         int pass;
7815
7816         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7817             read_vec_element(s, tcg_op, rn, pass, MO_64);
7818             switch (opcode) {
7819             case 0x3d: /* FRECPE */
7820                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7821                 break;
7822             case 0x3f: /* FRECPX */
7823                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7824                 break;
7825             case 0x7d: /* FRSQRTE */
7826                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7827                 break;
7828             default:
7829                 g_assert_not_reached();
7830             }
7831             write_vec_element(s, tcg_res, rd, pass, MO_64);
7832         }
7833         if (is_scalar) {
7834             clear_vec_high(s, rd);
7835         }
7836
7837         tcg_temp_free_i64(tcg_res);
7838         tcg_temp_free_i64(tcg_op);
7839     } else {
7840         TCGv_i32 tcg_op = tcg_temp_new_i32();
7841         TCGv_i32 tcg_res = tcg_temp_new_i32();
7842         int pass, maxpasses;
7843
7844         if (is_scalar) {
7845             maxpasses = 1;
7846         } else {
7847             maxpasses = is_q ? 4 : 2;
7848         }
7849
7850         for (pass = 0; pass < maxpasses; pass++) {
7851             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7852
7853             switch (opcode) {
7854             case 0x3c: /* URECPE */
7855                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7856                 break;
7857             case 0x3d: /* FRECPE */
7858                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7859                 break;
7860             case 0x3f: /* FRECPX */
7861                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7862                 break;
7863             case 0x7d: /* FRSQRTE */
7864                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7865                 break;
7866             default:
7867                 g_assert_not_reached();
7868             }
7869
7870             if (is_scalar) {
7871                 write_fp_sreg(s, rd, tcg_res);
7872             } else {
7873                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7874             }
7875         }
7876         tcg_temp_free_i32(tcg_res);
7877         tcg_temp_free_i32(tcg_op);
7878         if (!is_q && !is_scalar) {
7879             clear_vec_high(s, rd);
7880         }
7881     }
7882     tcg_temp_free_ptr(fpst);
7883 }
7884
7885 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7886                                 int opcode, bool u, bool is_q,
7887                                 int size, int rn, int rd)
7888 {
7889     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7890      * in the source becomes a size element in the destination).
7891      */
7892     int pass;
7893     TCGv_i32 tcg_res[2];
7894     int destelt = is_q ? 2 : 0;
7895     int passes = scalar ? 1 : 2;
7896
7897     if (scalar) {
7898         tcg_res[1] = tcg_const_i32(0);
7899     }
7900
7901     for (pass = 0; pass < passes; pass++) {
7902         TCGv_i64 tcg_op = tcg_temp_new_i64();
7903         NeonGenNarrowFn *genfn = NULL;
7904         NeonGenNarrowEnvFn *genenvfn = NULL;
7905
7906         if (scalar) {
7907             read_vec_element(s, tcg_op, rn, pass, size + 1);
7908         } else {
7909             read_vec_element(s, tcg_op, rn, pass, MO_64);
7910         }
7911         tcg_res[pass] = tcg_temp_new_i32();
7912
7913         switch (opcode) {
7914         case 0x12: /* XTN, SQXTUN */
7915         {
7916             static NeonGenNarrowFn * const xtnfns[3] = {
7917                 gen_helper_neon_narrow_u8,
7918                 gen_helper_neon_narrow_u16,
7919                 tcg_gen_extrl_i64_i32,
7920             };
7921             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7922                 gen_helper_neon_unarrow_sat8,
7923                 gen_helper_neon_unarrow_sat16,
7924                 gen_helper_neon_unarrow_sat32,
7925             };
7926             if (u) {
7927                 genenvfn = sqxtunfns[size];
7928             } else {
7929                 genfn = xtnfns[size];
7930             }
7931             break;
7932         }
7933         case 0x14: /* SQXTN, UQXTN */
7934         {
7935             static NeonGenNarrowEnvFn * const fns[3][2] = {
7936                 { gen_helper_neon_narrow_sat_s8,
7937                   gen_helper_neon_narrow_sat_u8 },
7938                 { gen_helper_neon_narrow_sat_s16,
7939                   gen_helper_neon_narrow_sat_u16 },
7940                 { gen_helper_neon_narrow_sat_s32,
7941                   gen_helper_neon_narrow_sat_u32 },
7942             };
7943             genenvfn = fns[size][u];
7944             break;
7945         }
7946         case 0x16: /* FCVTN, FCVTN2 */
7947             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7948             if (size == 2) {
7949                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7950             } else {
7951                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7952                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7953                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7954                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7955                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7956                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7957                 tcg_temp_free_i32(tcg_lo);
7958                 tcg_temp_free_i32(tcg_hi);
7959             }
7960             break;
7961         case 0x56:  /* FCVTXN, FCVTXN2 */
7962             /* 64 bit to 32 bit float conversion
7963              * with von Neumann rounding (round to odd)
7964              */
7965             assert(size == 2);
7966             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7967             break;
7968         default:
7969             g_assert_not_reached();
7970         }
7971
7972         if (genfn) {
7973             genfn(tcg_res[pass], tcg_op);
7974         } else if (genenvfn) {
7975             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7976         }
7977
7978         tcg_temp_free_i64(tcg_op);
7979     }
7980
7981     for (pass = 0; pass < 2; pass++) {
7982         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7983         tcg_temp_free_i32(tcg_res[pass]);
7984     }
7985     if (!is_q) {
7986         clear_vec_high(s, rd);
7987     }
7988 }
7989
7990 /* Remaining saturating accumulating ops */
7991 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7992                                 bool is_q, int size, int rn, int rd)
7993 {
7994     bool is_double = (size == 3);
7995
7996     if (is_double) {
7997         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7998         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7999         int pass;
8000
8001         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8002             read_vec_element(s, tcg_rn, rn, pass, MO_64);
8003             read_vec_element(s, tcg_rd, rd, pass, MO_64);
8004
8005             if (is_u) { /* USQADD */
8006                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8007             } else { /* SUQADD */
8008                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8009             }
8010             write_vec_element(s, tcg_rd, rd, pass, MO_64);
8011         }
8012         if (is_scalar) {
8013             clear_vec_high(s, rd);
8014         }
8015
8016         tcg_temp_free_i64(tcg_rd);
8017         tcg_temp_free_i64(tcg_rn);
8018     } else {
8019         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8020         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8021         int pass, maxpasses;
8022
8023         if (is_scalar) {
8024             maxpasses = 1;
8025         } else {
8026             maxpasses = is_q ? 4 : 2;
8027         }
8028
8029         for (pass = 0; pass < maxpasses; pass++) {
8030             if (is_scalar) {
8031                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
8032                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
8033             } else {
8034                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8035                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8036             }
8037
8038             if (is_u) { /* USQADD */
8039                 switch (size) {
8040                 case 0:
8041                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8042                     break;
8043                 case 1:
8044                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8045                     break;
8046                 case 2:
8047                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8048                     break;
8049                 default:
8050                     g_assert_not_reached();
8051                 }
8052             } else { /* SUQADD */
8053                 switch (size) {
8054                 case 0:
8055                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8056                     break;
8057                 case 1:
8058                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8059                     break;
8060                 case 2:
8061                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8062                     break;
8063                 default:
8064                     g_assert_not_reached();
8065                 }
8066             }
8067
8068             if (is_scalar) {
8069                 TCGv_i64 tcg_zero = tcg_const_i64(0);
8070                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
8071                 tcg_temp_free_i64(tcg_zero);
8072             }
8073             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8074         }
8075
8076         if (!is_q) {
8077             clear_vec_high(s, rd);
8078         }
8079
8080         tcg_temp_free_i32(tcg_rd);
8081         tcg_temp_free_i32(tcg_rn);
8082     }
8083 }
8084
8085 /* C3.6.12 AdvSIMD scalar two reg misc
8086  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8087  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8088  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8089  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8090  */
8091 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8092 {
8093     int rd = extract32(insn, 0, 5);
8094     int rn = extract32(insn, 5, 5);
8095     int opcode = extract32(insn, 12, 5);
8096     int size = extract32(insn, 22, 2);
8097     bool u = extract32(insn, 29, 1);
8098     bool is_fcvt = false;
8099     int rmode;
8100     TCGv_i32 tcg_rmode;
8101     TCGv_ptr tcg_fpstatus;
8102
8103     switch (opcode) {
8104     case 0x3: /* USQADD / SUQADD*/
8105         if (!fp_access_check(s)) {
8106             return;
8107         }
8108         handle_2misc_satacc(s, true, u, false, size, rn, rd);
8109         return;
8110     case 0x7: /* SQABS / SQNEG */
8111         break;
8112     case 0xa: /* CMLT */
8113         if (u) {
8114             unallocated_encoding(s);
8115             return;
8116         }
8117         /* fall through */
8118     case 0x8: /* CMGT, CMGE */
8119     case 0x9: /* CMEQ, CMLE */
8120     case 0xb: /* ABS, NEG */
8121         if (size != 3) {
8122             unallocated_encoding(s);
8123             return;
8124         }
8125         break;
8126     case 0x12: /* SQXTUN */
8127         if (!u) {
8128             unallocated_encoding(s);
8129             return;
8130         }
8131         /* fall through */
8132     case 0x14: /* SQXTN, UQXTN */
8133         if (size == 3) {
8134             unallocated_encoding(s);
8135             return;
8136         }
8137         if (!fp_access_check(s)) {
8138             return;
8139         }
8140         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8141         return;
8142     case 0xc ... 0xf:
8143     case 0x16 ... 0x1d:
8144     case 0x1f:
8145         /* Floating point: U, size[1] and opcode indicate operation;
8146          * size[0] indicates single or double precision.
8147          */
8148         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8149         size = extract32(size, 0, 1) ? 3 : 2;
8150         switch (opcode) {
8151         case 0x2c: /* FCMGT (zero) */
8152         case 0x2d: /* FCMEQ (zero) */
8153         case 0x2e: /* FCMLT (zero) */
8154         case 0x6c: /* FCMGE (zero) */
8155         case 0x6d: /* FCMLE (zero) */
8156             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8157             return;
8158         case 0x1d: /* SCVTF */
8159         case 0x5d: /* UCVTF */
8160         {
8161             bool is_signed = (opcode == 0x1d);
8162             if (!fp_access_check(s)) {
8163                 return;
8164             }
8165             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8166             return;
8167         }
8168         case 0x3d: /* FRECPE */
8169         case 0x3f: /* FRECPX */
8170         case 0x7d: /* FRSQRTE */
8171             if (!fp_access_check(s)) {
8172                 return;
8173             }
8174             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8175             return;
8176         case 0x1a: /* FCVTNS */
8177         case 0x1b: /* FCVTMS */
8178         case 0x3a: /* FCVTPS */
8179         case 0x3b: /* FCVTZS */
8180         case 0x5a: /* FCVTNU */
8181         case 0x5b: /* FCVTMU */
8182         case 0x7a: /* FCVTPU */
8183         case 0x7b: /* FCVTZU */
8184             is_fcvt = true;
8185             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8186             break;
8187         case 0x1c: /* FCVTAS */
8188         case 0x5c: /* FCVTAU */
8189             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8190             is_fcvt = true;
8191             rmode = FPROUNDING_TIEAWAY;
8192             break;
8193         case 0x56: /* FCVTXN, FCVTXN2 */
8194             if (size == 2) {
8195                 unallocated_encoding(s);
8196                 return;
8197             }
8198             if (!fp_access_check(s)) {
8199                 return;
8200             }
8201             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8202             return;
8203         default:
8204             unallocated_encoding(s);
8205             return;
8206         }
8207         break;
8208     default:
8209         unallocated_encoding(s);
8210         return;
8211     }
8212
8213     if (!fp_access_check(s)) {
8214         return;
8215     }
8216
8217     if (is_fcvt) {
8218         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8219         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8220         tcg_fpstatus = get_fpstatus_ptr();
8221     } else {
8222         TCGV_UNUSED_I32(tcg_rmode);
8223         TCGV_UNUSED_PTR(tcg_fpstatus);
8224     }
8225
8226     if (size == 3) {
8227         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8228         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8229
8230         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8231         write_fp_dreg(s, rd, tcg_rd);
8232         tcg_temp_free_i64(tcg_rd);
8233         tcg_temp_free_i64(tcg_rn);
8234     } else {
8235         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8236         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8237
8238         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8239
8240         switch (opcode) {
8241         case 0x7: /* SQABS, SQNEG */
8242         {
8243             NeonGenOneOpEnvFn *genfn;
8244             static NeonGenOneOpEnvFn * const fns[3][2] = {
8245                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8246                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8247                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8248             };
8249             genfn = fns[size][u];
8250             genfn(tcg_rd, cpu_env, tcg_rn);
8251             break;
8252         }
8253         case 0x1a: /* FCVTNS */
8254         case 0x1b: /* FCVTMS */
8255         case 0x1c: /* FCVTAS */
8256         case 0x3a: /* FCVTPS */
8257         case 0x3b: /* FCVTZS */
8258         {
8259             TCGv_i32 tcg_shift = tcg_const_i32(0);
8260             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8261             tcg_temp_free_i32(tcg_shift);
8262             break;
8263         }
8264         case 0x5a: /* FCVTNU */
8265         case 0x5b: /* FCVTMU */
8266         case 0x5c: /* FCVTAU */
8267         case 0x7a: /* FCVTPU */
8268         case 0x7b: /* FCVTZU */
8269         {
8270             TCGv_i32 tcg_shift = tcg_const_i32(0);
8271             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8272             tcg_temp_free_i32(tcg_shift);
8273             break;
8274         }
8275         default:
8276             g_assert_not_reached();
8277         }
8278
8279         write_fp_sreg(s, rd, tcg_rd);
8280         tcg_temp_free_i32(tcg_rd);
8281         tcg_temp_free_i32(tcg_rn);
8282     }
8283
8284     if (is_fcvt) {
8285         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8286         tcg_temp_free_i32(tcg_rmode);
8287         tcg_temp_free_ptr(tcg_fpstatus);
8288     }
8289 }
8290
8291 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8292 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8293                                  int immh, int immb, int opcode, int rn, int rd)
8294 {
8295     int size = 32 - clz32(immh) - 1;
8296     int immhb = immh << 3 | immb;
8297     int shift = 2 * (8 << size) - immhb;
8298     bool accumulate = false;
8299     bool round = false;
8300     bool insert = false;
8301     int dsize = is_q ? 128 : 64;
8302     int esize = 8 << size;
8303     int elements = dsize/esize;
8304     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8305     TCGv_i64 tcg_rn = new_tmp_a64(s);
8306     TCGv_i64 tcg_rd = new_tmp_a64(s);
8307     TCGv_i64 tcg_round;
8308     int i;
8309
8310     if (extract32(immh, 3, 1) && !is_q) {
8311         unallocated_encoding(s);
8312         return;
8313     }
8314
8315     if (size > 3 && !is_q) {
8316         unallocated_encoding(s);
8317         return;
8318     }
8319
8320     if (!fp_access_check(s)) {
8321         return;
8322     }
8323
8324     switch (opcode) {
8325     case 0x02: /* SSRA / USRA (accumulate) */
8326         accumulate = true;
8327         break;
8328     case 0x04: /* SRSHR / URSHR (rounding) */
8329         round = true;
8330         break;
8331     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8332         accumulate = round = true;
8333         break;
8334     case 0x08: /* SRI */
8335         insert = true;
8336         break;
8337     }
8338
8339     if (round) {
8340         uint64_t round_const = 1ULL << (shift - 1);
8341         tcg_round = tcg_const_i64(round_const);
8342     } else {
8343         TCGV_UNUSED_I64(tcg_round);
8344     }
8345
8346     for (i = 0; i < elements; i++) {
8347         read_vec_element(s, tcg_rn, rn, i, memop);
8348         if (accumulate || insert) {
8349             read_vec_element(s, tcg_rd, rd, i, memop);
8350         }
8351
8352         if (insert) {
8353             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8354         } else {
8355             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8356                                     accumulate, is_u, size, shift);
8357         }
8358
8359         write_vec_element(s, tcg_rd, rd, i, size);
8360     }
8361
8362     if (!is_q) {
8363         clear_vec_high(s, rd);
8364     }
8365
8366     if (round) {
8367         tcg_temp_free_i64(tcg_round);
8368     }
8369 }
8370
8371 /* SHL/SLI - Vector shift left */
8372 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8373                                 int immh, int immb, int opcode, int rn, int rd)
8374 {
8375     int size = 32 - clz32(immh) - 1;
8376     int immhb = immh << 3 | immb;
8377     int shift = immhb - (8 << size);
8378     int dsize = is_q ? 128 : 64;
8379     int esize = 8 << size;
8380     int elements = dsize/esize;
8381     TCGv_i64 tcg_rn = new_tmp_a64(s);
8382     TCGv_i64 tcg_rd = new_tmp_a64(s);
8383     int i;
8384
8385     if (extract32(immh, 3, 1) && !is_q) {
8386         unallocated_encoding(s);
8387         return;
8388     }
8389
8390     if (size > 3 && !is_q) {
8391         unallocated_encoding(s);
8392         return;
8393     }
8394
8395     if (!fp_access_check(s)) {
8396         return;
8397     }
8398
8399     for (i = 0; i < elements; i++) {
8400         read_vec_element(s, tcg_rn, rn, i, size);
8401         if (insert) {
8402             read_vec_element(s, tcg_rd, rd, i, size);
8403         }
8404
8405         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8406
8407         write_vec_element(s, tcg_rd, rd, i, size);
8408     }
8409
8410     if (!is_q) {
8411         clear_vec_high(s, rd);
8412     }
8413 }
8414
8415 /* USHLL/SHLL - Vector shift left with widening */
8416 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8417                                  int immh, int immb, int opcode, int rn, int rd)
8418 {
8419     int size = 32 - clz32(immh) - 1;
8420     int immhb = immh << 3 | immb;
8421     int shift = immhb - (8 << size);
8422     int dsize = 64;
8423     int esize = 8 << size;
8424     int elements = dsize/esize;
8425     TCGv_i64 tcg_rn = new_tmp_a64(s);
8426     TCGv_i64 tcg_rd = new_tmp_a64(s);
8427     int i;
8428
8429     if (size >= 3) {
8430         unallocated_encoding(s);
8431         return;
8432     }
8433
8434     if (!fp_access_check(s)) {
8435         return;
8436     }
8437
8438     /* For the LL variants the store is larger than the load,
8439      * so if rd == rn we would overwrite parts of our input.
8440      * So load everything right now and use shifts in the main loop.
8441      */
8442     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8443
8444     for (i = 0; i < elements; i++) {
8445         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8446         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8447         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8448         write_vec_element(s, tcg_rd, rd, i, size + 1);
8449     }
8450 }
8451
8452 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8453 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8454                                  int immh, int immb, int opcode, int rn, int rd)
8455 {
8456     int immhb = immh << 3 | immb;
8457     int size = 32 - clz32(immh) - 1;
8458     int dsize = 64;
8459     int esize = 8 << size;
8460     int elements = dsize/esize;
8461     int shift = (2 * esize) - immhb;
8462     bool round = extract32(opcode, 0, 1);
8463     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8464     TCGv_i64 tcg_round;
8465     int i;
8466
8467     if (extract32(immh, 3, 1)) {
8468         unallocated_encoding(s);
8469         return;
8470     }
8471
8472     if (!fp_access_check(s)) {
8473         return;
8474     }
8475
8476     tcg_rn = tcg_temp_new_i64();
8477     tcg_rd = tcg_temp_new_i64();
8478     tcg_final = tcg_temp_new_i64();
8479     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8480
8481     if (round) {
8482         uint64_t round_const = 1ULL << (shift - 1);
8483         tcg_round = tcg_const_i64(round_const);
8484     } else {
8485         TCGV_UNUSED_I64(tcg_round);
8486     }
8487
8488     for (i = 0; i < elements; i++) {
8489         read_vec_element(s, tcg_rn, rn, i, size+1);
8490         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8491                                 false, true, size+1, shift);
8492
8493         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8494     }
8495
8496     if (!is_q) {
8497         clear_vec_high(s, rd);
8498         write_vec_element(s, tcg_final, rd, 0, MO_64);
8499     } else {
8500         write_vec_element(s, tcg_final, rd, 1, MO_64);
8501     }
8502
8503     if (round) {
8504         tcg_temp_free_i64(tcg_round);
8505     }
8506     tcg_temp_free_i64(tcg_rn);
8507     tcg_temp_free_i64(tcg_rd);
8508     tcg_temp_free_i64(tcg_final);
8509     return;
8510 }
8511
8512
8513 /* C3.6.14 AdvSIMD shift by immediate
8514  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8515  * +---+---+---+-------------+------+------+--------+---+------+------+
8516  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8517  * +---+---+---+-------------+------+------+--------+---+------+------+
8518  */
8519 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8520 {
8521     int rd = extract32(insn, 0, 5);
8522     int rn = extract32(insn, 5, 5);
8523     int opcode = extract32(insn, 11, 5);
8524     int immb = extract32(insn, 16, 3);
8525     int immh = extract32(insn, 19, 4);
8526     bool is_u = extract32(insn, 29, 1);
8527     bool is_q = extract32(insn, 30, 1);
8528
8529     switch (opcode) {
8530     case 0x08: /* SRI */
8531         if (!is_u) {
8532             unallocated_encoding(s);
8533             return;
8534         }
8535         /* fall through */
8536     case 0x00: /* SSHR / USHR */
8537     case 0x02: /* SSRA / USRA (accumulate) */
8538     case 0x04: /* SRSHR / URSHR (rounding) */
8539     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8540         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8541         break;
8542     case 0x0a: /* SHL / SLI */
8543         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8544         break;
8545     case 0x10: /* SHRN */
8546     case 0x11: /* RSHRN / SQRSHRUN */
8547         if (is_u) {
8548             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8549                                    opcode, rn, rd);
8550         } else {
8551             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8552         }
8553         break;
8554     case 0x12: /* SQSHRN / UQSHRN */
8555     case 0x13: /* SQRSHRN / UQRSHRN */
8556         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8557                                opcode, rn, rd);
8558         break;
8559     case 0x14: /* SSHLL / USHLL */
8560         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8561         break;
8562     case 0x1c: /* SCVTF / UCVTF */
8563         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8564                                      opcode, rn, rd);
8565         break;
8566     case 0xc: /* SQSHLU */
8567         if (!is_u) {
8568             unallocated_encoding(s);
8569             return;
8570         }
8571         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8572         break;
8573     case 0xe: /* SQSHL, UQSHL */
8574         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8575         break;
8576     case 0x1f: /* FCVTZS/ FCVTZU */
8577         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8578         return;
8579     default:
8580         unallocated_encoding(s);
8581         return;
8582     }
8583 }
8584
8585 /* Generate code to do a "long" addition or subtraction, ie one done in
8586  * TCGv_i64 on vector lanes twice the width specified by size.
8587  */
8588 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8589                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8590 {
8591     static NeonGenTwo64OpFn * const fns[3][2] = {
8592         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8593         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8594         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8595     };
8596     NeonGenTwo64OpFn *genfn;
8597     assert(size < 3);
8598
8599     genfn = fns[size][is_sub];
8600     genfn(tcg_res, tcg_op1, tcg_op2);
8601 }
8602
8603 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8604                                 int opcode, int rd, int rn, int rm)
8605 {
8606     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8607     TCGv_i64 tcg_res[2];
8608     int pass, accop;
8609
8610     tcg_res[0] = tcg_temp_new_i64();
8611     tcg_res[1] = tcg_temp_new_i64();
8612
8613     /* Does this op do an adding accumulate, a subtracting accumulate,
8614      * or no accumulate at all?
8615      */
8616     switch (opcode) {
8617     case 5:
8618     case 8:
8619     case 9:
8620         accop = 1;
8621         break;
8622     case 10:
8623     case 11:
8624         accop = -1;
8625         break;
8626     default:
8627         accop = 0;
8628         break;
8629     }
8630
8631     if (accop != 0) {
8632         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8633         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8634     }
8635
8636     /* size == 2 means two 32x32->64 operations; this is worth special
8637      * casing because we can generally handle it inline.
8638      */
8639     if (size == 2) {
8640         for (pass = 0; pass < 2; pass++) {
8641             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8642             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8643             TCGv_i64 tcg_passres;
8644             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8645
8646             int elt = pass + is_q * 2;
8647
8648             read_vec_element(s, tcg_op1, rn, elt, memop);
8649             read_vec_element(s, tcg_op2, rm, elt, memop);
8650
8651             if (accop == 0) {
8652                 tcg_passres = tcg_res[pass];
8653             } else {
8654                 tcg_passres = tcg_temp_new_i64();
8655             }
8656
8657             switch (opcode) {
8658             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8659                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8660                 break;
8661             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8662                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8663                 break;
8664             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8665             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8666             {
8667                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8668                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8669
8670                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8671                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8672                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8673                                     tcg_passres,
8674                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8675                 tcg_temp_free_i64(tcg_tmp1);
8676                 tcg_temp_free_i64(tcg_tmp2);
8677                 break;
8678             }
8679             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8680             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8681             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8682                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8683                 break;
8684             case 9: /* SQDMLAL, SQDMLAL2 */
8685             case 11: /* SQDMLSL, SQDMLSL2 */
8686             case 13: /* SQDMULL, SQDMULL2 */
8687                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8688                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8689                                                   tcg_passres, tcg_passres);
8690                 break;
8691             default:
8692                 g_assert_not_reached();
8693             }
8694
8695             if (opcode == 9 || opcode == 11) {
8696                 /* saturating accumulate ops */
8697                 if (accop < 0) {
8698                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8699                 }
8700                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8701                                                   tcg_res[pass], tcg_passres);
8702             } else if (accop > 0) {
8703                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8704             } else if (accop < 0) {
8705                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8706             }
8707
8708             if (accop != 0) {
8709                 tcg_temp_free_i64(tcg_passres);
8710             }
8711
8712             tcg_temp_free_i64(tcg_op1);
8713             tcg_temp_free_i64(tcg_op2);
8714         }
8715     } else {
8716         /* size 0 or 1, generally helper functions */
8717         for (pass = 0; pass < 2; pass++) {
8718             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8719             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8720             TCGv_i64 tcg_passres;
8721             int elt = pass + is_q * 2;
8722
8723             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8724             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8725
8726             if (accop == 0) {
8727                 tcg_passres = tcg_res[pass];
8728             } else {
8729                 tcg_passres = tcg_temp_new_i64();
8730             }
8731
8732             switch (opcode) {
8733             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8734             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8735             {
8736                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8737                 static NeonGenWidenFn * const widenfns[2][2] = {
8738                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8739                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8740                 };
8741                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8742
8743                 widenfn(tcg_op2_64, tcg_op2);
8744                 widenfn(tcg_passres, tcg_op1);
8745                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8746                               tcg_passres, tcg_op2_64);
8747                 tcg_temp_free_i64(tcg_op2_64);
8748                 break;
8749             }
8750             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8751             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8752                 if (size == 0) {
8753                     if (is_u) {
8754                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8755                     } else {
8756                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8757                     }
8758                 } else {
8759                     if (is_u) {
8760                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8761                     } else {
8762                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8763                     }
8764                 }
8765                 break;
8766             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8767             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8768             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8769                 if (size == 0) {
8770                     if (is_u) {
8771                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8772                     } else {
8773                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8774                     }
8775                 } else {
8776                     if (is_u) {
8777                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8778                     } else {
8779                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8780                     }
8781                 }
8782                 break;
8783             case 9: /* SQDMLAL, SQDMLAL2 */
8784             case 11: /* SQDMLSL, SQDMLSL2 */
8785             case 13: /* SQDMULL, SQDMULL2 */
8786                 assert(size == 1);
8787                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8788                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8789                                                   tcg_passres, tcg_passres);
8790                 break;
8791             case 14: /* PMULL */
8792                 assert(size == 0);
8793                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8794                 break;
8795             default:
8796                 g_assert_not_reached();
8797             }
8798             tcg_temp_free_i32(tcg_op1);
8799             tcg_temp_free_i32(tcg_op2);
8800
8801             if (accop != 0) {
8802                 if (opcode == 9 || opcode == 11) {
8803                     /* saturating accumulate ops */
8804                     if (accop < 0) {
8805                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8806                     }
8807                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8808                                                       tcg_res[pass],
8809                                                       tcg_passres);
8810                 } else {
8811                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8812                                   tcg_res[pass], tcg_passres);
8813                 }
8814                 tcg_temp_free_i64(tcg_passres);
8815             }
8816         }
8817     }
8818
8819     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8820     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8821     tcg_temp_free_i64(tcg_res[0]);
8822     tcg_temp_free_i64(tcg_res[1]);
8823 }
8824
8825 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8826                             int opcode, int rd, int rn, int rm)
8827 {
8828     TCGv_i64 tcg_res[2];
8829     int part = is_q ? 2 : 0;
8830     int pass;
8831
8832     for (pass = 0; pass < 2; pass++) {
8833         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8834         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8835         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8836         static NeonGenWidenFn * const widenfns[3][2] = {
8837             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8838             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8839             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8840         };
8841         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8842
8843         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8844         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8845         widenfn(tcg_op2_wide, tcg_op2);
8846         tcg_temp_free_i32(tcg_op2);
8847         tcg_res[pass] = tcg_temp_new_i64();
8848         gen_neon_addl(size, (opcode == 3),
8849                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8850         tcg_temp_free_i64(tcg_op1);
8851         tcg_temp_free_i64(tcg_op2_wide);
8852     }
8853
8854     for (pass = 0; pass < 2; pass++) {
8855         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8856         tcg_temp_free_i64(tcg_res[pass]);
8857     }
8858 }
8859
8860 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8861 {
8862     tcg_gen_addi_i64(in, in, 1U << 31);
8863     tcg_gen_extrh_i64_i32(res, in);
8864 }
8865
8866 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8867                                  int opcode, int rd, int rn, int rm)
8868 {
8869     TCGv_i32 tcg_res[2];
8870     int part = is_q ? 2 : 0;
8871     int pass;
8872
8873     for (pass = 0; pass < 2; pass++) {
8874         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8875         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8876         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8877         static NeonGenNarrowFn * const narrowfns[3][2] = {
8878             { gen_helper_neon_narrow_high_u8,
8879               gen_helper_neon_narrow_round_high_u8 },
8880             { gen_helper_neon_narrow_high_u16,
8881               gen_helper_neon_narrow_round_high_u16 },
8882             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8883         };
8884         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8885
8886         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8887         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8888
8889         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8890
8891         tcg_temp_free_i64(tcg_op1);
8892         tcg_temp_free_i64(tcg_op2);
8893
8894         tcg_res[pass] = tcg_temp_new_i32();
8895         gennarrow(tcg_res[pass], tcg_wideres);
8896         tcg_temp_free_i64(tcg_wideres);
8897     }
8898
8899     for (pass = 0; pass < 2; pass++) {
8900         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8901         tcg_temp_free_i32(tcg_res[pass]);
8902     }
8903     if (!is_q) {
8904         clear_vec_high(s, rd);
8905     }
8906 }
8907
8908 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8909 {
8910     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8911      * is the only three-reg-diff instruction which produces a
8912      * 128-bit wide result from a single operation. However since
8913      * it's possible to calculate the two halves more or less
8914      * separately we just use two helper calls.
8915      */
8916     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8917     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8918     TCGv_i64 tcg_res = tcg_temp_new_i64();
8919
8920     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8921     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8922     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8923     write_vec_element(s, tcg_res, rd, 0, MO_64);
8924     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8925     write_vec_element(s, tcg_res, rd, 1, MO_64);
8926
8927     tcg_temp_free_i64(tcg_op1);
8928     tcg_temp_free_i64(tcg_op2);
8929     tcg_temp_free_i64(tcg_res);
8930 }
8931
8932 /* C3.6.15 AdvSIMD three different
8933  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8934  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8935  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8936  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8937  */
8938 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8939 {
8940     /* Instructions in this group fall into three basic classes
8941      * (in each case with the operation working on each element in
8942      * the input vectors):
8943      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8944      *     128 bit input)
8945      * (2) wide 64 x 128 -> 128
8946      * (3) narrowing 128 x 128 -> 64
8947      * Here we do initial decode, catch unallocated cases and
8948      * dispatch to separate functions for each class.
8949      */
8950     int is_q = extract32(insn, 30, 1);
8951     int is_u = extract32(insn, 29, 1);
8952     int size = extract32(insn, 22, 2);
8953     int opcode = extract32(insn, 12, 4);
8954     int rm = extract32(insn, 16, 5);
8955     int rn = extract32(insn, 5, 5);
8956     int rd = extract32(insn, 0, 5);
8957
8958     switch (opcode) {
8959     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8960     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8961         /* 64 x 128 -> 128 */
8962         if (size == 3) {
8963             unallocated_encoding(s);
8964             return;
8965         }
8966         if (!fp_access_check(s)) {
8967             return;
8968         }
8969         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8970         break;
8971     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8972     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8973         /* 128 x 128 -> 64 */
8974         if (size == 3) {
8975             unallocated_encoding(s);
8976             return;
8977         }
8978         if (!fp_access_check(s)) {
8979             return;
8980         }
8981         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8982         break;
8983     case 14: /* PMULL, PMULL2 */
8984         if (is_u || size == 1 || size == 2) {
8985             unallocated_encoding(s);
8986             return;
8987         }
8988         if (size == 3) {
8989             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8990                 unallocated_encoding(s);
8991                 return;
8992             }
8993             if (!fp_access_check(s)) {
8994                 return;
8995             }
8996             handle_pmull_64(s, is_q, rd, rn, rm);
8997             return;
8998         }
8999         goto is_widening;
9000     case 9: /* SQDMLAL, SQDMLAL2 */
9001     case 11: /* SQDMLSL, SQDMLSL2 */
9002     case 13: /* SQDMULL, SQDMULL2 */
9003         if (is_u || size == 0) {
9004             unallocated_encoding(s);
9005             return;
9006         }
9007         /* fall through */
9008     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9009     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9010     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9011     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9012     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9013     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9014     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
9015         /* 64 x 64 -> 128 */
9016         if (size == 3) {
9017             unallocated_encoding(s);
9018             return;
9019         }
9020     is_widening:
9021         if (!fp_access_check(s)) {
9022             return;
9023         }
9024
9025         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9026         break;
9027     default:
9028         /* opcode 15 not allocated */
9029         unallocated_encoding(s);
9030         break;
9031     }
9032 }
9033
9034 /* Logic op (opcode == 3) subgroup of C3.6.16. */
9035 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9036 {
9037     int rd = extract32(insn, 0, 5);
9038     int rn = extract32(insn, 5, 5);
9039     int rm = extract32(insn, 16, 5);
9040     int size = extract32(insn, 22, 2);
9041     bool is_u = extract32(insn, 29, 1);
9042     bool is_q = extract32(insn, 30, 1);
9043     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
9044     int pass;
9045
9046     if (!fp_access_check(s)) {
9047         return;
9048     }
9049
9050     tcg_op1 = tcg_temp_new_i64();
9051     tcg_op2 = tcg_temp_new_i64();
9052     tcg_res[0] = tcg_temp_new_i64();
9053     tcg_res[1] = tcg_temp_new_i64();
9054
9055     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
9056         read_vec_element(s, tcg_op1, rn, pass, MO_64);
9057         read_vec_element(s, tcg_op2, rm, pass, MO_64);
9058
9059         if (!is_u) {
9060             switch (size) {
9061             case 0: /* AND */
9062                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
9063                 break;
9064             case 1: /* BIC */
9065                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9066                 break;
9067             case 2: /* ORR */
9068                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
9069                 break;
9070             case 3: /* ORN */
9071                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9072                 break;
9073             }
9074         } else {
9075             if (size != 0) {
9076                 /* B* ops need res loaded to operate on */
9077                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9078             }
9079
9080             switch (size) {
9081             case 0: /* EOR */
9082                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
9083                 break;
9084             case 1: /* BSL bitwise select */
9085                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
9086                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9087                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
9088                 break;
9089             case 2: /* BIT, bitwise insert if true */
9090                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9091                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
9092                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9093                 break;
9094             case 3: /* BIF, bitwise insert if false */
9095                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9096                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
9097                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9098                 break;
9099             }
9100         }
9101     }
9102
9103     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9104     if (!is_q) {
9105         tcg_gen_movi_i64(tcg_res[1], 0);
9106     }
9107     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9108
9109     tcg_temp_free_i64(tcg_op1);
9110     tcg_temp_free_i64(tcg_op2);
9111     tcg_temp_free_i64(tcg_res[0]);
9112     tcg_temp_free_i64(tcg_res[1]);
9113 }
9114
9115 /* Helper functions for 32 bit comparisons */
9116 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9117 {
9118     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9119 }
9120
9121 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9122 {
9123     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9124 }
9125
9126 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9127 {
9128     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9129 }
9130
9131 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9132 {
9133     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9134 }
9135
9136 /* Pairwise op subgroup of C3.6.16.
9137  *
9138  * This is called directly or via the handle_3same_float for float pairwise
9139  * operations where the opcode and size are calculated differently.
9140  */
9141 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9142                                    int size, int rn, int rm, int rd)
9143 {
9144     TCGv_ptr fpst;
9145     int pass;
9146
9147     /* Floating point operations need fpst */
9148     if (opcode >= 0x58) {
9149         fpst = get_fpstatus_ptr();
9150     } else {
9151         TCGV_UNUSED_PTR(fpst);
9152     }
9153
9154     if (!fp_access_check(s)) {
9155         return;
9156     }
9157
9158     /* These operations work on the concatenated rm:rn, with each pair of
9159      * adjacent elements being operated on to produce an element in the result.
9160      */
9161     if (size == 3) {
9162         TCGv_i64 tcg_res[2];
9163
9164         for (pass = 0; pass < 2; pass++) {
9165             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9166             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9167             int passreg = (pass == 0) ? rn : rm;
9168
9169             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9170             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9171             tcg_res[pass] = tcg_temp_new_i64();
9172
9173             switch (opcode) {
9174             case 0x17: /* ADDP */
9175                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9176                 break;
9177             case 0x58: /* FMAXNMP */
9178                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9179                 break;
9180             case 0x5a: /* FADDP */
9181                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9182                 break;
9183             case 0x5e: /* FMAXP */
9184                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9185                 break;
9186             case 0x78: /* FMINNMP */
9187                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9188                 break;
9189             case 0x7e: /* FMINP */
9190                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9191                 break;
9192             default:
9193                 g_assert_not_reached();
9194             }
9195
9196             tcg_temp_free_i64(tcg_op1);
9197             tcg_temp_free_i64(tcg_op2);
9198         }
9199
9200         for (pass = 0; pass < 2; pass++) {
9201             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9202             tcg_temp_free_i64(tcg_res[pass]);
9203         }
9204     } else {
9205         int maxpass = is_q ? 4 : 2;
9206         TCGv_i32 tcg_res[4];
9207
9208         for (pass = 0; pass < maxpass; pass++) {
9209             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9210             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9211             NeonGenTwoOpFn *genfn = NULL;
9212             int passreg = pass < (maxpass / 2) ? rn : rm;
9213             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9214
9215             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9216             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9217             tcg_res[pass] = tcg_temp_new_i32();
9218
9219             switch (opcode) {
9220             case 0x17: /* ADDP */
9221             {
9222                 static NeonGenTwoOpFn * const fns[3] = {
9223                     gen_helper_neon_padd_u8,
9224                     gen_helper_neon_padd_u16,
9225                     tcg_gen_add_i32,
9226                 };
9227                 genfn = fns[size];
9228                 break;
9229             }
9230             case 0x14: /* SMAXP, UMAXP */
9231             {
9232                 static NeonGenTwoOpFn * const fns[3][2] = {
9233                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9234                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9235                     { gen_max_s32, gen_max_u32 },
9236                 };
9237                 genfn = fns[size][u];
9238                 break;
9239             }
9240             case 0x15: /* SMINP, UMINP */
9241             {
9242                 static NeonGenTwoOpFn * const fns[3][2] = {
9243                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9244                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9245                     { gen_min_s32, gen_min_u32 },
9246                 };
9247                 genfn = fns[size][u];
9248                 break;
9249             }
9250             /* The FP operations are all on single floats (32 bit) */
9251             case 0x58: /* FMAXNMP */
9252                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9253                 break;
9254             case 0x5a: /* FADDP */
9255                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9256                 break;
9257             case 0x5e: /* FMAXP */
9258                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9259                 break;
9260             case 0x78: /* FMINNMP */
9261                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9262                 break;
9263             case 0x7e: /* FMINP */
9264                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9265                 break;
9266             default:
9267                 g_assert_not_reached();
9268             }
9269
9270             /* FP ops called directly, otherwise call now */
9271             if (genfn) {
9272                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9273             }
9274
9275             tcg_temp_free_i32(tcg_op1);
9276             tcg_temp_free_i32(tcg_op2);
9277         }
9278
9279         for (pass = 0; pass < maxpass; pass++) {
9280             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9281             tcg_temp_free_i32(tcg_res[pass]);
9282         }
9283         if (!is_q) {
9284             clear_vec_high(s, rd);
9285         }
9286     }
9287
9288     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9289         tcg_temp_free_ptr(fpst);
9290     }
9291 }
9292
9293 /* Floating point op subgroup of C3.6.16. */
9294 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9295 {
9296     /* For floating point ops, the U, size[1] and opcode bits
9297      * together indicate the operation. size[0] indicates single
9298      * or double.
9299      */
9300     int fpopcode = extract32(insn, 11, 5)
9301         | (extract32(insn, 23, 1) << 5)
9302         | (extract32(insn, 29, 1) << 6);
9303     int is_q = extract32(insn, 30, 1);
9304     int size = extract32(insn, 22, 1);
9305     int rm = extract32(insn, 16, 5);
9306     int rn = extract32(insn, 5, 5);
9307     int rd = extract32(insn, 0, 5);
9308
9309     int datasize = is_q ? 128 : 64;
9310     int esize = 32 << size;
9311     int elements = datasize / esize;
9312
9313     if (size == 1 && !is_q) {
9314         unallocated_encoding(s);
9315         return;
9316     }
9317
9318     switch (fpopcode) {
9319     case 0x58: /* FMAXNMP */
9320     case 0x5a: /* FADDP */
9321     case 0x5e: /* FMAXP */
9322     case 0x78: /* FMINNMP */
9323     case 0x7e: /* FMINP */
9324         if (size && !is_q) {
9325             unallocated_encoding(s);
9326             return;
9327         }
9328         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9329                                rn, rm, rd);
9330         return;
9331     case 0x1b: /* FMULX */
9332     case 0x1f: /* FRECPS */
9333     case 0x3f: /* FRSQRTS */
9334     case 0x5d: /* FACGE */
9335     case 0x7d: /* FACGT */
9336     case 0x19: /* FMLA */
9337     case 0x39: /* FMLS */
9338     case 0x18: /* FMAXNM */
9339     case 0x1a: /* FADD */
9340     case 0x1c: /* FCMEQ */
9341     case 0x1e: /* FMAX */
9342     case 0x38: /* FMINNM */
9343     case 0x3a: /* FSUB */
9344     case 0x3e: /* FMIN */
9345     case 0x5b: /* FMUL */
9346     case 0x5c: /* FCMGE */
9347     case 0x5f: /* FDIV */
9348     case 0x7a: /* FABD */
9349     case 0x7c: /* FCMGT */
9350         if (!fp_access_check(s)) {
9351             return;
9352         }
9353
9354         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9355         return;
9356     default:
9357         unallocated_encoding(s);
9358         return;
9359     }
9360 }
9361
9362 /* Integer op subgroup of C3.6.16. */
9363 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9364 {
9365     int is_q = extract32(insn, 30, 1);
9366     int u = extract32(insn, 29, 1);
9367     int size = extract32(insn, 22, 2);
9368     int opcode = extract32(insn, 11, 5);
9369     int rm = extract32(insn, 16, 5);
9370     int rn = extract32(insn, 5, 5);
9371     int rd = extract32(insn, 0, 5);
9372     int pass;
9373
9374     switch (opcode) {
9375     case 0x13: /* MUL, PMUL */
9376         if (u && size != 0) {
9377             unallocated_encoding(s);
9378             return;
9379         }
9380         /* fall through */
9381     case 0x0: /* SHADD, UHADD */
9382     case 0x2: /* SRHADD, URHADD */
9383     case 0x4: /* SHSUB, UHSUB */
9384     case 0xc: /* SMAX, UMAX */
9385     case 0xd: /* SMIN, UMIN */
9386     case 0xe: /* SABD, UABD */
9387     case 0xf: /* SABA, UABA */
9388     case 0x12: /* MLA, MLS */
9389         if (size == 3) {
9390             unallocated_encoding(s);
9391             return;
9392         }
9393         break;
9394     case 0x16: /* SQDMULH, SQRDMULH */
9395         if (size == 0 || size == 3) {
9396             unallocated_encoding(s);
9397             return;
9398         }
9399         break;
9400     default:
9401         if (size == 3 && !is_q) {
9402             unallocated_encoding(s);
9403             return;
9404         }
9405         break;
9406     }
9407
9408     if (!fp_access_check(s)) {
9409         return;
9410     }
9411
9412     if (size == 3) {
9413         assert(is_q);
9414         for (pass = 0; pass < 2; pass++) {
9415             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9416             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9417             TCGv_i64 tcg_res = tcg_temp_new_i64();
9418
9419             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9420             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9421
9422             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9423
9424             write_vec_element(s, tcg_res, rd, pass, MO_64);
9425
9426             tcg_temp_free_i64(tcg_res);
9427             tcg_temp_free_i64(tcg_op1);
9428             tcg_temp_free_i64(tcg_op2);
9429         }
9430     } else {
9431         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9432             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9433             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9434             TCGv_i32 tcg_res = tcg_temp_new_i32();
9435             NeonGenTwoOpFn *genfn = NULL;
9436             NeonGenTwoOpEnvFn *genenvfn = NULL;
9437
9438             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9439             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9440
9441             switch (opcode) {
9442             case 0x0: /* SHADD, UHADD */
9443             {
9444                 static NeonGenTwoOpFn * const fns[3][2] = {
9445                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9446                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9447                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9448                 };
9449                 genfn = fns[size][u];
9450                 break;
9451             }
9452             case 0x1: /* SQADD, UQADD */
9453             {
9454                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9455                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9456                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9457                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9458                 };
9459                 genenvfn = fns[size][u];
9460                 break;
9461             }
9462             case 0x2: /* SRHADD, URHADD */
9463             {
9464                 static NeonGenTwoOpFn * const fns[3][2] = {
9465                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9466                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9467                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9468                 };
9469                 genfn = fns[size][u];
9470                 break;
9471             }
9472             case 0x4: /* SHSUB, UHSUB */
9473             {
9474                 static NeonGenTwoOpFn * const fns[3][2] = {
9475                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9476                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9477                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9478                 };
9479                 genfn = fns[size][u];
9480                 break;
9481             }
9482             case 0x5: /* SQSUB, UQSUB */
9483             {
9484                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9485                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9486                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9487                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9488                 };
9489                 genenvfn = fns[size][u];
9490                 break;
9491             }
9492             case 0x6: /* CMGT, CMHI */
9493             {
9494                 static NeonGenTwoOpFn * const fns[3][2] = {
9495                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9496                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9497                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9498                 };
9499                 genfn = fns[size][u];
9500                 break;
9501             }
9502             case 0x7: /* CMGE, CMHS */
9503             {
9504                 static NeonGenTwoOpFn * const fns[3][2] = {
9505                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9506                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9507                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9508                 };
9509                 genfn = fns[size][u];
9510                 break;
9511             }
9512             case 0x8: /* SSHL, USHL */
9513             {
9514                 static NeonGenTwoOpFn * const fns[3][2] = {
9515                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9516                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9517                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9518                 };
9519                 genfn = fns[size][u];
9520                 break;
9521             }
9522             case 0x9: /* SQSHL, UQSHL */
9523             {
9524                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9525                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9526                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9527                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9528                 };
9529                 genenvfn = fns[size][u];
9530                 break;
9531             }
9532             case 0xa: /* SRSHL, URSHL */
9533             {
9534                 static NeonGenTwoOpFn * const fns[3][2] = {
9535                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9536                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9537                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9538                 };
9539                 genfn = fns[size][u];
9540                 break;
9541             }
9542             case 0xb: /* SQRSHL, UQRSHL */
9543             {
9544                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9545                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9546                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9547                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9548                 };
9549                 genenvfn = fns[size][u];
9550                 break;
9551             }
9552             case 0xc: /* SMAX, UMAX */
9553             {
9554                 static NeonGenTwoOpFn * const fns[3][2] = {
9555                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9556                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9557                     { gen_max_s32, gen_max_u32 },
9558                 };
9559                 genfn = fns[size][u];
9560                 break;
9561             }
9562
9563             case 0xd: /* SMIN, UMIN */
9564             {
9565                 static NeonGenTwoOpFn * const fns[3][2] = {
9566                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9567                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9568                     { gen_min_s32, gen_min_u32 },
9569                 };
9570                 genfn = fns[size][u];
9571                 break;
9572             }
9573             case 0xe: /* SABD, UABD */
9574             case 0xf: /* SABA, UABA */
9575             {
9576                 static NeonGenTwoOpFn * const fns[3][2] = {
9577                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9578                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9579                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9580                 };
9581                 genfn = fns[size][u];
9582                 break;
9583             }
9584             case 0x10: /* ADD, SUB */
9585             {
9586                 static NeonGenTwoOpFn * const fns[3][2] = {
9587                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9588                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9589                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9590                 };
9591                 genfn = fns[size][u];
9592                 break;
9593             }
9594             case 0x11: /* CMTST, CMEQ */
9595             {
9596                 static NeonGenTwoOpFn * const fns[3][2] = {
9597                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9598                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9599                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9600                 };
9601                 genfn = fns[size][u];
9602                 break;
9603             }
9604             case 0x13: /* MUL, PMUL */
9605                 if (u) {
9606                     /* PMUL */
9607                     assert(size == 0);
9608                     genfn = gen_helper_neon_mul_p8;
9609                     break;
9610                 }
9611                 /* fall through : MUL */
9612             case 0x12: /* MLA, MLS */
9613             {
9614                 static NeonGenTwoOpFn * const fns[3] = {
9615                     gen_helper_neon_mul_u8,
9616                     gen_helper_neon_mul_u16,
9617                     tcg_gen_mul_i32,
9618                 };
9619                 genfn = fns[size];
9620                 break;
9621             }
9622             case 0x16: /* SQDMULH, SQRDMULH */
9623             {
9624                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9625                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9626                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9627                 };
9628                 assert(size == 1 || size == 2);
9629                 genenvfn = fns[size - 1][u];
9630                 break;
9631             }
9632             default:
9633                 g_assert_not_reached();
9634             }
9635
9636             if (genenvfn) {
9637                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9638             } else {
9639                 genfn(tcg_res, tcg_op1, tcg_op2);
9640             }
9641
9642             if (opcode == 0xf || opcode == 0x12) {
9643                 /* SABA, UABA, MLA, MLS: accumulating ops */
9644                 static NeonGenTwoOpFn * const fns[3][2] = {
9645                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9646                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9647                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9648                 };
9649                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9650
9651                 genfn = fns[size][is_sub];
9652                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9653                 genfn(tcg_res, tcg_op1, tcg_res);
9654             }
9655
9656             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9657
9658             tcg_temp_free_i32(tcg_res);
9659             tcg_temp_free_i32(tcg_op1);
9660             tcg_temp_free_i32(tcg_op2);
9661         }
9662     }
9663
9664     if (!is_q) {
9665         clear_vec_high(s, rd);
9666     }
9667 }
9668
9669 /* C3.6.16 AdvSIMD three same
9670  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9671  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9672  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9673  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9674  */
9675 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9676 {
9677     int opcode = extract32(insn, 11, 5);
9678
9679     switch (opcode) {
9680     case 0x3: /* logic ops */
9681         disas_simd_3same_logic(s, insn);
9682         break;
9683     case 0x17: /* ADDP */
9684     case 0x14: /* SMAXP, UMAXP */
9685     case 0x15: /* SMINP, UMINP */
9686     {
9687         /* Pairwise operations */
9688         int is_q = extract32(insn, 30, 1);
9689         int u = extract32(insn, 29, 1);
9690         int size = extract32(insn, 22, 2);
9691         int rm = extract32(insn, 16, 5);
9692         int rn = extract32(insn, 5, 5);
9693         int rd = extract32(insn, 0, 5);
9694         if (opcode == 0x17) {
9695             if (u || (size == 3 && !is_q)) {
9696                 unallocated_encoding(s);
9697                 return;
9698             }
9699         } else {
9700             if (size == 3) {
9701                 unallocated_encoding(s);
9702                 return;
9703             }
9704         }
9705         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9706         break;
9707     }
9708     case 0x18 ... 0x31:
9709         /* floating point ops, sz[1] and U are part of opcode */
9710         disas_simd_3same_float(s, insn);
9711         break;
9712     default:
9713         disas_simd_3same_int(s, insn);
9714         break;
9715     }
9716 }
9717
9718 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9719                                   int size, int rn, int rd)
9720 {
9721     /* Handle 2-reg-misc ops which are widening (so each size element
9722      * in the source becomes a 2*size element in the destination.
9723      * The only instruction like this is FCVTL.
9724      */
9725     int pass;
9726
9727     if (size == 3) {
9728         /* 32 -> 64 bit fp conversion */
9729         TCGv_i64 tcg_res[2];
9730         int srcelt = is_q ? 2 : 0;
9731
9732         for (pass = 0; pass < 2; pass++) {
9733             TCGv_i32 tcg_op = tcg_temp_new_i32();
9734             tcg_res[pass] = tcg_temp_new_i64();
9735
9736             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9737             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9738             tcg_temp_free_i32(tcg_op);
9739         }
9740         for (pass = 0; pass < 2; pass++) {
9741             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9742             tcg_temp_free_i64(tcg_res[pass]);
9743         }
9744     } else {
9745         /* 16 -> 32 bit fp conversion */
9746         int srcelt = is_q ? 4 : 0;
9747         TCGv_i32 tcg_res[4];
9748
9749         for (pass = 0; pass < 4; pass++) {
9750             tcg_res[pass] = tcg_temp_new_i32();
9751
9752             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9753             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9754                                            cpu_env);
9755         }
9756         for (pass = 0; pass < 4; pass++) {
9757             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9758             tcg_temp_free_i32(tcg_res[pass]);
9759         }
9760     }
9761 }
9762
9763 static void handle_rev(DisasContext *s, int opcode, bool u,
9764                        bool is_q, int size, int rn, int rd)
9765 {
9766     int op = (opcode << 1) | u;
9767     int opsz = op + size;
9768     int grp_size = 3 - opsz;
9769     int dsize = is_q ? 128 : 64;
9770     int i;
9771
9772     if (opsz >= 3) {
9773         unallocated_encoding(s);
9774         return;
9775     }
9776
9777     if (!fp_access_check(s)) {
9778         return;
9779     }
9780
9781     if (size == 0) {
9782         /* Special case bytes, use bswap op on each group of elements */
9783         int groups = dsize / (8 << grp_size);
9784
9785         for (i = 0; i < groups; i++) {
9786             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9787
9788             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9789             switch (grp_size) {
9790             case MO_16:
9791                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9792                 break;
9793             case MO_32:
9794                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9795                 break;
9796             case MO_64:
9797                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9798                 break;
9799             default:
9800                 g_assert_not_reached();
9801             }
9802             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9803             tcg_temp_free_i64(tcg_tmp);
9804         }
9805         if (!is_q) {
9806             clear_vec_high(s, rd);
9807         }
9808     } else {
9809         int revmask = (1 << grp_size) - 1;
9810         int esize = 8 << size;
9811         int elements = dsize / esize;
9812         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9813         TCGv_i64 tcg_rd = tcg_const_i64(0);
9814         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9815
9816         for (i = 0; i < elements; i++) {
9817             int e_rev = (i & 0xf) ^ revmask;
9818             int off = e_rev * esize;
9819             read_vec_element(s, tcg_rn, rn, i, size);
9820             if (off >= 64) {
9821                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9822                                     tcg_rn, off - 64, esize);
9823             } else {
9824                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9825             }
9826         }
9827         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9828         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9829
9830         tcg_temp_free_i64(tcg_rd_hi);
9831         tcg_temp_free_i64(tcg_rd);
9832         tcg_temp_free_i64(tcg_rn);
9833     }
9834 }
9835
9836 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9837                                   bool is_q, int size, int rn, int rd)
9838 {
9839     /* Implement the pairwise operations from 2-misc:
9840      * SADDLP, UADDLP, SADALP, UADALP.
9841      * These all add pairs of elements in the input to produce a
9842      * double-width result element in the output (possibly accumulating).
9843      */
9844     bool accum = (opcode == 0x6);
9845     int maxpass = is_q ? 2 : 1;
9846     int pass;
9847     TCGv_i64 tcg_res[2];
9848
9849     if (size == 2) {
9850         /* 32 + 32 -> 64 op */
9851         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9852
9853         for (pass = 0; pass < maxpass; pass++) {
9854             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9855             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9856
9857             tcg_res[pass] = tcg_temp_new_i64();
9858
9859             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9860             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9861             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9862             if (accum) {
9863                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9864                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9865             }
9866
9867             tcg_temp_free_i64(tcg_op1);
9868             tcg_temp_free_i64(tcg_op2);
9869         }
9870     } else {
9871         for (pass = 0; pass < maxpass; pass++) {
9872             TCGv_i64 tcg_op = tcg_temp_new_i64();
9873             NeonGenOneOpFn *genfn;
9874             static NeonGenOneOpFn * const fns[2][2] = {
9875                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9876                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9877             };
9878
9879             genfn = fns[size][u];
9880
9881             tcg_res[pass] = tcg_temp_new_i64();
9882
9883             read_vec_element(s, tcg_op, rn, pass, MO_64);
9884             genfn(tcg_res[pass], tcg_op);
9885
9886             if (accum) {
9887                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9888                 if (size == 0) {
9889                     gen_helper_neon_addl_u16(tcg_res[pass],
9890                                              tcg_res[pass], tcg_op);
9891                 } else {
9892                     gen_helper_neon_addl_u32(tcg_res[pass],
9893                                              tcg_res[pass], tcg_op);
9894                 }
9895             }
9896             tcg_temp_free_i64(tcg_op);
9897         }
9898     }
9899     if (!is_q) {
9900         tcg_res[1] = tcg_const_i64(0);
9901     }
9902     for (pass = 0; pass < 2; pass++) {
9903         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9904         tcg_temp_free_i64(tcg_res[pass]);
9905     }
9906 }
9907
9908 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9909 {
9910     /* Implement SHLL and SHLL2 */
9911     int pass;
9912     int part = is_q ? 2 : 0;
9913     TCGv_i64 tcg_res[2];
9914
9915     for (pass = 0; pass < 2; pass++) {
9916         static NeonGenWidenFn * const widenfns[3] = {
9917             gen_helper_neon_widen_u8,
9918             gen_helper_neon_widen_u16,
9919             tcg_gen_extu_i32_i64,
9920         };
9921         NeonGenWidenFn *widenfn = widenfns[size];
9922         TCGv_i32 tcg_op = tcg_temp_new_i32();
9923
9924         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9925         tcg_res[pass] = tcg_temp_new_i64();
9926         widenfn(tcg_res[pass], tcg_op);
9927         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9928
9929         tcg_temp_free_i32(tcg_op);
9930     }
9931
9932     for (pass = 0; pass < 2; pass++) {
9933         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9934         tcg_temp_free_i64(tcg_res[pass]);
9935     }
9936 }
9937
9938 /* C3.6.17 AdvSIMD two reg misc
9939  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9940  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9941  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9942  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9943  */
9944 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9945 {
9946     int size = extract32(insn, 22, 2);
9947     int opcode = extract32(insn, 12, 5);
9948     bool u = extract32(insn, 29, 1);
9949     bool is_q = extract32(insn, 30, 1);
9950     int rn = extract32(insn, 5, 5);
9951     int rd = extract32(insn, 0, 5);
9952     bool need_fpstatus = false;
9953     bool need_rmode = false;
9954     int rmode = -1;
9955     TCGv_i32 tcg_rmode;
9956     TCGv_ptr tcg_fpstatus;
9957
9958     switch (opcode) {
9959     case 0x0: /* REV64, REV32 */
9960     case 0x1: /* REV16 */
9961         handle_rev(s, opcode, u, is_q, size, rn, rd);
9962         return;
9963     case 0x5: /* CNT, NOT, RBIT */
9964         if (u && size == 0) {
9965             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9966             size = 3;
9967             break;
9968         } else if (u && size == 1) {
9969             /* RBIT */
9970             break;
9971         } else if (!u && size == 0) {
9972             /* CNT */
9973             break;
9974         }
9975         unallocated_encoding(s);
9976         return;
9977     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9978     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9979         if (size == 3) {
9980             unallocated_encoding(s);
9981             return;
9982         }
9983         if (!fp_access_check(s)) {
9984             return;
9985         }
9986
9987         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9988         return;
9989     case 0x4: /* CLS, CLZ */
9990         if (size == 3) {
9991             unallocated_encoding(s);
9992             return;
9993         }
9994         break;
9995     case 0x2: /* SADDLP, UADDLP */
9996     case 0x6: /* SADALP, UADALP */
9997         if (size == 3) {
9998             unallocated_encoding(s);
9999             return;
10000         }
10001         if (!fp_access_check(s)) {
10002             return;
10003         }
10004         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
10005         return;
10006     case 0x13: /* SHLL, SHLL2 */
10007         if (u == 0 || size == 3) {
10008             unallocated_encoding(s);
10009             return;
10010         }
10011         if (!fp_access_check(s)) {
10012             return;
10013         }
10014         handle_shll(s, is_q, size, rn, rd);
10015         return;
10016     case 0xa: /* CMLT */
10017         if (u == 1) {
10018             unallocated_encoding(s);
10019             return;
10020         }
10021         /* fall through */
10022     case 0x8: /* CMGT, CMGE */
10023     case 0x9: /* CMEQ, CMLE */
10024     case 0xb: /* ABS, NEG */
10025         if (size == 3 && !is_q) {
10026             unallocated_encoding(s);
10027             return;
10028         }
10029         break;
10030     case 0x3: /* SUQADD, USQADD */
10031         if (size == 3 && !is_q) {
10032             unallocated_encoding(s);
10033             return;
10034         }
10035         if (!fp_access_check(s)) {
10036             return;
10037         }
10038         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
10039         return;
10040     case 0x7: /* SQABS, SQNEG */
10041         if (size == 3 && !is_q) {
10042             unallocated_encoding(s);
10043             return;
10044         }
10045         break;
10046     case 0xc ... 0xf:
10047     case 0x16 ... 0x1d:
10048     case 0x1f:
10049     {
10050         /* Floating point: U, size[1] and opcode indicate operation;
10051          * size[0] indicates single or double precision.
10052          */
10053         int is_double = extract32(size, 0, 1);
10054         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10055         size = is_double ? 3 : 2;
10056         switch (opcode) {
10057         case 0x2f: /* FABS */
10058         case 0x6f: /* FNEG */
10059             if (size == 3 && !is_q) {
10060                 unallocated_encoding(s);
10061                 return;
10062             }
10063             break;
10064         case 0x1d: /* SCVTF */
10065         case 0x5d: /* UCVTF */
10066         {
10067             bool is_signed = (opcode == 0x1d) ? true : false;
10068             int elements = is_double ? 2 : is_q ? 4 : 2;
10069             if (is_double && !is_q) {
10070                 unallocated_encoding(s);
10071                 return;
10072             }
10073             if (!fp_access_check(s)) {
10074                 return;
10075             }
10076             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10077             return;
10078         }
10079         case 0x2c: /* FCMGT (zero) */
10080         case 0x2d: /* FCMEQ (zero) */
10081         case 0x2e: /* FCMLT (zero) */
10082         case 0x6c: /* FCMGE (zero) */
10083         case 0x6d: /* FCMLE (zero) */
10084             if (size == 3 && !is_q) {
10085                 unallocated_encoding(s);
10086                 return;
10087             }
10088             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10089             return;
10090         case 0x7f: /* FSQRT */
10091             if (size == 3 && !is_q) {
10092                 unallocated_encoding(s);
10093                 return;
10094             }
10095             break;
10096         case 0x1a: /* FCVTNS */
10097         case 0x1b: /* FCVTMS */
10098         case 0x3a: /* FCVTPS */
10099         case 0x3b: /* FCVTZS */
10100         case 0x5a: /* FCVTNU */
10101         case 0x5b: /* FCVTMU */
10102         case 0x7a: /* FCVTPU */
10103         case 0x7b: /* FCVTZU */
10104             need_fpstatus = true;
10105             need_rmode = true;
10106             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10107             if (size == 3 && !is_q) {
10108                 unallocated_encoding(s);
10109                 return;
10110             }
10111             break;
10112         case 0x5c: /* FCVTAU */
10113         case 0x1c: /* FCVTAS */
10114             need_fpstatus = true;
10115             need_rmode = true;
10116             rmode = FPROUNDING_TIEAWAY;
10117             if (size == 3 && !is_q) {
10118                 unallocated_encoding(s);
10119                 return;
10120             }
10121             break;
10122         case 0x3c: /* URECPE */
10123             if (size == 3) {
10124                 unallocated_encoding(s);
10125                 return;
10126             }
10127             /* fall through */
10128         case 0x3d: /* FRECPE */
10129         case 0x7d: /* FRSQRTE */
10130             if (size == 3 && !is_q) {
10131                 unallocated_encoding(s);
10132                 return;
10133             }
10134             if (!fp_access_check(s)) {
10135                 return;
10136             }
10137             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10138             return;
10139         case 0x56: /* FCVTXN, FCVTXN2 */
10140             if (size == 2) {
10141                 unallocated_encoding(s);
10142                 return;
10143             }
10144             /* fall through */
10145         case 0x16: /* FCVTN, FCVTN2 */
10146             /* handle_2misc_narrow does a 2*size -> size operation, but these
10147              * instructions encode the source size rather than dest size.
10148              */
10149             if (!fp_access_check(s)) {
10150                 return;
10151             }
10152             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10153             return;
10154         case 0x17: /* FCVTL, FCVTL2 */
10155             if (!fp_access_check(s)) {
10156                 return;
10157             }
10158             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10159             return;
10160         case 0x18: /* FRINTN */
10161         case 0x19: /* FRINTM */
10162         case 0x38: /* FRINTP */
10163         case 0x39: /* FRINTZ */
10164             need_rmode = true;
10165             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10166             /* fall through */
10167         case 0x59: /* FRINTX */
10168         case 0x79: /* FRINTI */
10169             need_fpstatus = true;
10170             if (size == 3 && !is_q) {
10171                 unallocated_encoding(s);
10172                 return;
10173             }
10174             break;
10175         case 0x58: /* FRINTA */
10176             need_rmode = true;
10177             rmode = FPROUNDING_TIEAWAY;
10178             need_fpstatus = true;
10179             if (size == 3 && !is_q) {
10180                 unallocated_encoding(s);
10181                 return;
10182             }
10183             break;
10184         case 0x7c: /* URSQRTE */
10185             if (size == 3) {
10186                 unallocated_encoding(s);
10187                 return;
10188             }
10189             need_fpstatus = true;
10190             break;
10191         default:
10192             unallocated_encoding(s);
10193             return;
10194         }
10195         break;
10196     }
10197     default:
10198         unallocated_encoding(s);
10199         return;
10200     }
10201
10202     if (!fp_access_check(s)) {
10203         return;
10204     }
10205
10206     if (need_fpstatus) {
10207         tcg_fpstatus = get_fpstatus_ptr();
10208     } else {
10209         TCGV_UNUSED_PTR(tcg_fpstatus);
10210     }
10211     if (need_rmode) {
10212         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10213         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10214     } else {
10215         TCGV_UNUSED_I32(tcg_rmode);
10216     }
10217
10218     if (size == 3) {
10219         /* All 64-bit element operations can be shared with scalar 2misc */
10220         int pass;
10221
10222         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10223             TCGv_i64 tcg_op = tcg_temp_new_i64();
10224             TCGv_i64 tcg_res = tcg_temp_new_i64();
10225
10226             read_vec_element(s, tcg_op, rn, pass, MO_64);
10227
10228             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10229                             tcg_rmode, tcg_fpstatus);
10230
10231             write_vec_element(s, tcg_res, rd, pass, MO_64);
10232
10233             tcg_temp_free_i64(tcg_res);
10234             tcg_temp_free_i64(tcg_op);
10235         }
10236     } else {
10237         int pass;
10238
10239         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10240             TCGv_i32 tcg_op = tcg_temp_new_i32();
10241             TCGv_i32 tcg_res = tcg_temp_new_i32();
10242             TCGCond cond;
10243
10244             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10245
10246             if (size == 2) {
10247                 /* Special cases for 32 bit elements */
10248                 switch (opcode) {
10249                 case 0xa: /* CMLT */
10250                     /* 32 bit integer comparison against zero, result is
10251                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10252                      * and inverting.
10253                      */
10254                     cond = TCG_COND_LT;
10255                 do_cmop:
10256                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10257                     tcg_gen_neg_i32(tcg_res, tcg_res);
10258                     break;
10259                 case 0x8: /* CMGT, CMGE */
10260                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10261                     goto do_cmop;
10262                 case 0x9: /* CMEQ, CMLE */
10263                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10264                     goto do_cmop;
10265                 case 0x4: /* CLS */
10266                     if (u) {
10267                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
10268                     } else {
10269                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
10270                     }
10271                     break;
10272                 case 0x7: /* SQABS, SQNEG */
10273                     if (u) {
10274                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10275                     } else {
10276                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10277                     }
10278                     break;
10279                 case 0xb: /* ABS, NEG */
10280                     if (u) {
10281                         tcg_gen_neg_i32(tcg_res, tcg_op);
10282                     } else {
10283                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10284                         tcg_gen_neg_i32(tcg_res, tcg_op);
10285                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10286                                             tcg_zero, tcg_op, tcg_res);
10287                         tcg_temp_free_i32(tcg_zero);
10288                     }
10289                     break;
10290                 case 0x2f: /* FABS */
10291                     gen_helper_vfp_abss(tcg_res, tcg_op);
10292                     break;
10293                 case 0x6f: /* FNEG */
10294                     gen_helper_vfp_negs(tcg_res, tcg_op);
10295                     break;
10296                 case 0x7f: /* FSQRT */
10297                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10298                     break;
10299                 case 0x1a: /* FCVTNS */
10300                 case 0x1b: /* FCVTMS */
10301                 case 0x1c: /* FCVTAS */
10302                 case 0x3a: /* FCVTPS */
10303                 case 0x3b: /* FCVTZS */
10304                 {
10305                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10306                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10307                                          tcg_shift, tcg_fpstatus);
10308                     tcg_temp_free_i32(tcg_shift);
10309                     break;
10310                 }
10311                 case 0x5a: /* FCVTNU */
10312                 case 0x5b: /* FCVTMU */
10313                 case 0x5c: /* FCVTAU */
10314                 case 0x7a: /* FCVTPU */
10315                 case 0x7b: /* FCVTZU */
10316                 {
10317                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10318                     gen_helper_vfp_touls(tcg_res, tcg_op,
10319                                          tcg_shift, tcg_fpstatus);
10320                     tcg_temp_free_i32(tcg_shift);
10321                     break;
10322                 }
10323                 case 0x18: /* FRINTN */
10324                 case 0x19: /* FRINTM */
10325                 case 0x38: /* FRINTP */
10326                 case 0x39: /* FRINTZ */
10327                 case 0x58: /* FRINTA */
10328                 case 0x79: /* FRINTI */
10329                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10330                     break;
10331                 case 0x59: /* FRINTX */
10332                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10333                     break;
10334                 case 0x7c: /* URSQRTE */
10335                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10336                     break;
10337                 default:
10338                     g_assert_not_reached();
10339                 }
10340             } else {
10341                 /* Use helpers for 8 and 16 bit elements */
10342                 switch (opcode) {
10343                 case 0x5: /* CNT, RBIT */
10344                     /* For these two insns size is part of the opcode specifier
10345                      * (handled earlier); they always operate on byte elements.
10346                      */
10347                     if (u) {
10348                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10349                     } else {
10350                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10351                     }
10352                     break;
10353                 case 0x7: /* SQABS, SQNEG */
10354                 {
10355                     NeonGenOneOpEnvFn *genfn;
10356                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10357                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10358                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10359                     };
10360                     genfn = fns[size][u];
10361                     genfn(tcg_res, cpu_env, tcg_op);
10362                     break;
10363                 }
10364                 case 0x8: /* CMGT, CMGE */
10365                 case 0x9: /* CMEQ, CMLE */
10366                 case 0xa: /* CMLT */
10367                 {
10368                     static NeonGenTwoOpFn * const fns[3][2] = {
10369                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10370                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10371                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10372                     };
10373                     NeonGenTwoOpFn *genfn;
10374                     int comp;
10375                     bool reverse;
10376                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10377
10378                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10379                     comp = (opcode - 0x8) * 2 + u;
10380                     /* ...but LE, LT are implemented as reverse GE, GT */
10381                     reverse = (comp > 2);
10382                     if (reverse) {
10383                         comp = 4 - comp;
10384                     }
10385                     genfn = fns[comp][size];
10386                     if (reverse) {
10387                         genfn(tcg_res, tcg_zero, tcg_op);
10388                     } else {
10389                         genfn(tcg_res, tcg_op, tcg_zero);
10390                     }
10391                     tcg_temp_free_i32(tcg_zero);
10392                     break;
10393                 }
10394                 case 0xb: /* ABS, NEG */
10395                     if (u) {
10396                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10397                         if (size) {
10398                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10399                         } else {
10400                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10401                         }
10402                         tcg_temp_free_i32(tcg_zero);
10403                     } else {
10404                         if (size) {
10405                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10406                         } else {
10407                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10408                         }
10409                     }
10410                     break;
10411                 case 0x4: /* CLS, CLZ */
10412                     if (u) {
10413                         if (size == 0) {
10414                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10415                         } else {
10416                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10417                         }
10418                     } else {
10419                         if (size == 0) {
10420                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10421                         } else {
10422                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10423                         }
10424                     }
10425                     break;
10426                 default:
10427                     g_assert_not_reached();
10428                 }
10429             }
10430
10431             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10432
10433             tcg_temp_free_i32(tcg_res);
10434             tcg_temp_free_i32(tcg_op);
10435         }
10436     }
10437     if (!is_q) {
10438         clear_vec_high(s, rd);
10439     }
10440
10441     if (need_rmode) {
10442         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10443         tcg_temp_free_i32(tcg_rmode);
10444     }
10445     if (need_fpstatus) {
10446         tcg_temp_free_ptr(tcg_fpstatus);
10447     }
10448 }
10449
10450 /* C3.6.13 AdvSIMD scalar x indexed element
10451  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10452  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10453  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10454  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10455  * C3.6.18 AdvSIMD vector x indexed element
10456  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10457  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10458  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10459  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10460  */
10461 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10462 {
10463     /* This encoding has two kinds of instruction:
10464      *  normal, where we perform elt x idxelt => elt for each
10465      *     element in the vector
10466      *  long, where we perform elt x idxelt and generate a result of
10467      *     double the width of the input element
10468      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10469      */
10470     bool is_scalar = extract32(insn, 28, 1);
10471     bool is_q = extract32(insn, 30, 1);
10472     bool u = extract32(insn, 29, 1);
10473     int size = extract32(insn, 22, 2);
10474     int l = extract32(insn, 21, 1);
10475     int m = extract32(insn, 20, 1);
10476     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10477     int rm = extract32(insn, 16, 4);
10478     int opcode = extract32(insn, 12, 4);
10479     int h = extract32(insn, 11, 1);
10480     int rn = extract32(insn, 5, 5);
10481     int rd = extract32(insn, 0, 5);
10482     bool is_long = false;
10483     bool is_fp = false;
10484     int index;
10485     TCGv_ptr fpst;
10486
10487     switch (opcode) {
10488     case 0x0: /* MLA */
10489     case 0x4: /* MLS */
10490         if (!u || is_scalar) {
10491             unallocated_encoding(s);
10492             return;
10493         }
10494         break;
10495     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10496     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10497     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10498         if (is_scalar) {
10499             unallocated_encoding(s);
10500             return;
10501         }
10502         is_long = true;
10503         break;
10504     case 0x3: /* SQDMLAL, SQDMLAL2 */
10505     case 0x7: /* SQDMLSL, SQDMLSL2 */
10506     case 0xb: /* SQDMULL, SQDMULL2 */
10507         is_long = true;
10508         /* fall through */
10509     case 0xc: /* SQDMULH */
10510     case 0xd: /* SQRDMULH */
10511         if (u) {
10512             unallocated_encoding(s);
10513             return;
10514         }
10515         break;
10516     case 0x8: /* MUL */
10517         if (u || is_scalar) {
10518             unallocated_encoding(s);
10519             return;
10520         }
10521         break;
10522     case 0x1: /* FMLA */
10523     case 0x5: /* FMLS */
10524         if (u) {
10525             unallocated_encoding(s);
10526             return;
10527         }
10528         /* fall through */
10529     case 0x9: /* FMUL, FMULX */
10530         if (!extract32(size, 1, 1)) {
10531             unallocated_encoding(s);
10532             return;
10533         }
10534         is_fp = true;
10535         break;
10536     default:
10537         unallocated_encoding(s);
10538         return;
10539     }
10540
10541     if (is_fp) {
10542         /* low bit of size indicates single/double */
10543         size = extract32(size, 0, 1) ? 3 : 2;
10544         if (size == 2) {
10545             index = h << 1 | l;
10546         } else {
10547             if (l || !is_q) {
10548                 unallocated_encoding(s);
10549                 return;
10550             }
10551             index = h;
10552         }
10553         rm |= (m << 4);
10554     } else {
10555         switch (size) {
10556         case 1:
10557             index = h << 2 | l << 1 | m;
10558             break;
10559         case 2:
10560             index = h << 1 | l;
10561             rm |= (m << 4);
10562             break;
10563         default:
10564             unallocated_encoding(s);
10565             return;
10566         }
10567     }
10568
10569     if (!fp_access_check(s)) {
10570         return;
10571     }
10572
10573     if (is_fp) {
10574         fpst = get_fpstatus_ptr();
10575     } else {
10576         TCGV_UNUSED_PTR(fpst);
10577     }
10578
10579     if (size == 3) {
10580         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10581         int pass;
10582
10583         assert(is_fp && is_q && !is_long);
10584
10585         read_vec_element(s, tcg_idx, rm, index, MO_64);
10586
10587         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10588             TCGv_i64 tcg_op = tcg_temp_new_i64();
10589             TCGv_i64 tcg_res = tcg_temp_new_i64();
10590
10591             read_vec_element(s, tcg_op, rn, pass, MO_64);
10592
10593             switch (opcode) {
10594             case 0x5: /* FMLS */
10595                 /* As usual for ARM, separate negation for fused multiply-add */
10596                 gen_helper_vfp_negd(tcg_op, tcg_op);
10597                 /* fall through */
10598             case 0x1: /* FMLA */
10599                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10600                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10601                 break;
10602             case 0x9: /* FMUL, FMULX */
10603                 if (u) {
10604                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10605                 } else {
10606                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10607                 }
10608                 break;
10609             default:
10610                 g_assert_not_reached();
10611             }
10612
10613             write_vec_element(s, tcg_res, rd, pass, MO_64);
10614             tcg_temp_free_i64(tcg_op);
10615             tcg_temp_free_i64(tcg_res);
10616         }
10617
10618         if (is_scalar) {
10619             clear_vec_high(s, rd);
10620         }
10621
10622         tcg_temp_free_i64(tcg_idx);
10623     } else if (!is_long) {
10624         /* 32 bit floating point, or 16 or 32 bit integer.
10625          * For the 16 bit scalar case we use the usual Neon helpers and
10626          * rely on the fact that 0 op 0 == 0 with no side effects.
10627          */
10628         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10629         int pass, maxpasses;
10630
10631         if (is_scalar) {
10632             maxpasses = 1;
10633         } else {
10634             maxpasses = is_q ? 4 : 2;
10635         }
10636
10637         read_vec_element_i32(s, tcg_idx, rm, index, size);
10638
10639         if (size == 1 && !is_scalar) {
10640             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10641              * the index into both halves of the 32 bit tcg_idx and then use
10642              * the usual Neon helpers.
10643              */
10644             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10645         }
10646
10647         for (pass = 0; pass < maxpasses; pass++) {
10648             TCGv_i32 tcg_op = tcg_temp_new_i32();
10649             TCGv_i32 tcg_res = tcg_temp_new_i32();
10650
10651             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10652
10653             switch (opcode) {
10654             case 0x0: /* MLA */
10655             case 0x4: /* MLS */
10656             case 0x8: /* MUL */
10657             {
10658                 static NeonGenTwoOpFn * const fns[2][2] = {
10659                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10660                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10661                 };
10662                 NeonGenTwoOpFn *genfn;
10663                 bool is_sub = opcode == 0x4;
10664
10665                 if (size == 1) {
10666                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10667                 } else {
10668                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10669                 }
10670                 if (opcode == 0x8) {
10671                     break;
10672                 }
10673                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10674                 genfn = fns[size - 1][is_sub];
10675                 genfn(tcg_res, tcg_op, tcg_res);
10676                 break;
10677             }
10678             case 0x5: /* FMLS */
10679                 /* As usual for ARM, separate negation for fused multiply-add */
10680                 gen_helper_vfp_negs(tcg_op, tcg_op);
10681                 /* fall through */
10682             case 0x1: /* FMLA */
10683                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10684                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10685                 break;
10686             case 0x9: /* FMUL, FMULX */
10687                 if (u) {
10688                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10689                 } else {
10690                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10691                 }
10692                 break;
10693             case 0xc: /* SQDMULH */
10694                 if (size == 1) {
10695                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10696                                                tcg_op, tcg_idx);
10697                 } else {
10698                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10699                                                tcg_op, tcg_idx);
10700                 }
10701                 break;
10702             case 0xd: /* SQRDMULH */
10703                 if (size == 1) {
10704                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10705                                                 tcg_op, tcg_idx);
10706                 } else {
10707                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10708                                                 tcg_op, tcg_idx);
10709                 }
10710                 break;
10711             default:
10712                 g_assert_not_reached();
10713             }
10714
10715             if (is_scalar) {
10716                 write_fp_sreg(s, rd, tcg_res);
10717             } else {
10718                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10719             }
10720
10721             tcg_temp_free_i32(tcg_op);
10722             tcg_temp_free_i32(tcg_res);
10723         }
10724
10725         tcg_temp_free_i32(tcg_idx);
10726
10727         if (!is_q) {
10728             clear_vec_high(s, rd);
10729         }
10730     } else {
10731         /* long ops: 16x16->32 or 32x32->64 */
10732         TCGv_i64 tcg_res[2];
10733         int pass;
10734         bool satop = extract32(opcode, 0, 1);
10735         TCGMemOp memop = MO_32;
10736
10737         if (satop || !u) {
10738             memop |= MO_SIGN;
10739         }
10740
10741         if (size == 2) {
10742             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10743
10744             read_vec_element(s, tcg_idx, rm, index, memop);
10745
10746             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10747                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10748                 TCGv_i64 tcg_passres;
10749                 int passelt;
10750
10751                 if (is_scalar) {
10752                     passelt = 0;
10753                 } else {
10754                     passelt = pass + (is_q * 2);
10755                 }
10756
10757                 read_vec_element(s, tcg_op, rn, passelt, memop);
10758
10759                 tcg_res[pass] = tcg_temp_new_i64();
10760
10761                 if (opcode == 0xa || opcode == 0xb) {
10762                     /* Non-accumulating ops */
10763                     tcg_passres = tcg_res[pass];
10764                 } else {
10765                     tcg_passres = tcg_temp_new_i64();
10766                 }
10767
10768                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10769                 tcg_temp_free_i64(tcg_op);
10770
10771                 if (satop) {
10772                     /* saturating, doubling */
10773                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10774                                                       tcg_passres, tcg_passres);
10775                 }
10776
10777                 if (opcode == 0xa || opcode == 0xb) {
10778                     continue;
10779                 }
10780
10781                 /* Accumulating op: handle accumulate step */
10782                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10783
10784                 switch (opcode) {
10785                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10786                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10787                     break;
10788                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10789                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10790                     break;
10791                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10792                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10793                     /* fall through */
10794                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10795                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10796                                                       tcg_res[pass],
10797                                                       tcg_passres);
10798                     break;
10799                 default:
10800                     g_assert_not_reached();
10801                 }
10802                 tcg_temp_free_i64(tcg_passres);
10803             }
10804             tcg_temp_free_i64(tcg_idx);
10805
10806             if (is_scalar) {
10807                 clear_vec_high(s, rd);
10808             }
10809         } else {
10810             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10811
10812             assert(size == 1);
10813             read_vec_element_i32(s, tcg_idx, rm, index, size);
10814
10815             if (!is_scalar) {
10816                 /* The simplest way to handle the 16x16 indexed ops is to
10817                  * duplicate the index into both halves of the 32 bit tcg_idx
10818                  * and then use the usual Neon helpers.
10819                  */
10820                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10821             }
10822
10823             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10824                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10825                 TCGv_i64 tcg_passres;
10826
10827                 if (is_scalar) {
10828                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10829                 } else {
10830                     read_vec_element_i32(s, tcg_op, rn,
10831                                          pass + (is_q * 2), MO_32);
10832                 }
10833
10834                 tcg_res[pass] = tcg_temp_new_i64();
10835
10836                 if (opcode == 0xa || opcode == 0xb) {
10837                     /* Non-accumulating ops */
10838                     tcg_passres = tcg_res[pass];
10839                 } else {
10840                     tcg_passres = tcg_temp_new_i64();
10841                 }
10842
10843                 if (memop & MO_SIGN) {
10844                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10845                 } else {
10846                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10847                 }
10848                 if (satop) {
10849                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10850                                                       tcg_passres, tcg_passres);
10851                 }
10852                 tcg_temp_free_i32(tcg_op);
10853
10854                 if (opcode == 0xa || opcode == 0xb) {
10855                     continue;
10856                 }
10857
10858                 /* Accumulating op: handle accumulate step */
10859                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10860
10861                 switch (opcode) {
10862                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10863                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10864                                              tcg_passres);
10865                     break;
10866                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10867                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10868                                              tcg_passres);
10869                     break;
10870                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10871                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10872                     /* fall through */
10873                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10874                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10875                                                       tcg_res[pass],
10876                                                       tcg_passres);
10877                     break;
10878                 default:
10879                     g_assert_not_reached();
10880                 }
10881                 tcg_temp_free_i64(tcg_passres);
10882             }
10883             tcg_temp_free_i32(tcg_idx);
10884
10885             if (is_scalar) {
10886                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10887             }
10888         }
10889
10890         if (is_scalar) {
10891             tcg_res[1] = tcg_const_i64(0);
10892         }
10893
10894         for (pass = 0; pass < 2; pass++) {
10895             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10896             tcg_temp_free_i64(tcg_res[pass]);
10897         }
10898     }
10899
10900     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10901         tcg_temp_free_ptr(fpst);
10902     }
10903 }
10904
10905 /* C3.6.19 Crypto AES
10906  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10907  * +-----------------+------+-----------+--------+-----+------+------+
10908  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10909  * +-----------------+------+-----------+--------+-----+------+------+
10910  */
10911 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10912 {
10913     int size = extract32(insn, 22, 2);
10914     int opcode = extract32(insn, 12, 5);
10915     int rn = extract32(insn, 5, 5);
10916     int rd = extract32(insn, 0, 5);
10917     int decrypt;
10918     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10919     CryptoThreeOpEnvFn *genfn;
10920
10921     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10922         || size != 0) {
10923         unallocated_encoding(s);
10924         return;
10925     }
10926
10927     switch (opcode) {
10928     case 0x4: /* AESE */
10929         decrypt = 0;
10930         genfn = gen_helper_crypto_aese;
10931         break;
10932     case 0x6: /* AESMC */
10933         decrypt = 0;
10934         genfn = gen_helper_crypto_aesmc;
10935         break;
10936     case 0x5: /* AESD */
10937         decrypt = 1;
10938         genfn = gen_helper_crypto_aese;
10939         break;
10940     case 0x7: /* AESIMC */
10941         decrypt = 1;
10942         genfn = gen_helper_crypto_aesmc;
10943         break;
10944     default:
10945         unallocated_encoding(s);
10946         return;
10947     }
10948
10949     if (!fp_access_check(s)) {
10950         return;
10951     }
10952
10953     /* Note that we convert the Vx register indexes into the
10954      * index within the vfp.regs[] array, so we can share the
10955      * helper with the AArch32 instructions.
10956      */
10957     tcg_rd_regno = tcg_const_i32(rd << 1);
10958     tcg_rn_regno = tcg_const_i32(rn << 1);
10959     tcg_decrypt = tcg_const_i32(decrypt);
10960
10961     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10962
10963     tcg_temp_free_i32(tcg_rd_regno);
10964     tcg_temp_free_i32(tcg_rn_regno);
10965     tcg_temp_free_i32(tcg_decrypt);
10966 }
10967
10968 /* C3.6.20 Crypto three-reg SHA
10969  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10970  * +-----------------+------+---+------+---+--------+-----+------+------+
10971  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10972  * +-----------------+------+---+------+---+--------+-----+------+------+
10973  */
10974 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10975 {
10976     int size = extract32(insn, 22, 2);
10977     int opcode = extract32(insn, 12, 3);
10978     int rm = extract32(insn, 16, 5);
10979     int rn = extract32(insn, 5, 5);
10980     int rd = extract32(insn, 0, 5);
10981     CryptoThreeOpEnvFn *genfn;
10982     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10983     int feature = ARM_FEATURE_V8_SHA256;
10984
10985     if (size != 0) {
10986         unallocated_encoding(s);
10987         return;
10988     }
10989
10990     switch (opcode) {
10991     case 0: /* SHA1C */
10992     case 1: /* SHA1P */
10993     case 2: /* SHA1M */
10994     case 3: /* SHA1SU0 */
10995         genfn = NULL;
10996         feature = ARM_FEATURE_V8_SHA1;
10997         break;
10998     case 4: /* SHA256H */
10999         genfn = gen_helper_crypto_sha256h;
11000         break;
11001     case 5: /* SHA256H2 */
11002         genfn = gen_helper_crypto_sha256h2;
11003         break;
11004     case 6: /* SHA256SU1 */
11005         genfn = gen_helper_crypto_sha256su1;
11006         break;
11007     default:
11008         unallocated_encoding(s);
11009         return;
11010     }
11011
11012     if (!arm_dc_feature(s, feature)) {
11013         unallocated_encoding(s);
11014         return;
11015     }
11016
11017     if (!fp_access_check(s)) {
11018         return;
11019     }
11020
11021     tcg_rd_regno = tcg_const_i32(rd << 1);
11022     tcg_rn_regno = tcg_const_i32(rn << 1);
11023     tcg_rm_regno = tcg_const_i32(rm << 1);
11024
11025     if (genfn) {
11026         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
11027     } else {
11028         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
11029
11030         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
11031                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
11032         tcg_temp_free_i32(tcg_opcode);
11033     }
11034
11035     tcg_temp_free_i32(tcg_rd_regno);
11036     tcg_temp_free_i32(tcg_rn_regno);
11037     tcg_temp_free_i32(tcg_rm_regno);
11038 }
11039
11040 /* C3.6.21 Crypto two-reg SHA
11041  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
11042  * +-----------------+------+-----------+--------+-----+------+------+
11043  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11044  * +-----------------+------+-----------+--------+-----+------+------+
11045  */
11046 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
11047 {
11048     int size = extract32(insn, 22, 2);
11049     int opcode = extract32(insn, 12, 5);
11050     int rn = extract32(insn, 5, 5);
11051     int rd = extract32(insn, 0, 5);
11052     CryptoTwoOpEnvFn *genfn;
11053     int feature;
11054     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
11055
11056     if (size != 0) {
11057         unallocated_encoding(s);
11058         return;
11059     }
11060
11061     switch (opcode) {
11062     case 0: /* SHA1H */
11063         feature = ARM_FEATURE_V8_SHA1;
11064         genfn = gen_helper_crypto_sha1h;
11065         break;
11066     case 1: /* SHA1SU1 */
11067         feature = ARM_FEATURE_V8_SHA1;
11068         genfn = gen_helper_crypto_sha1su1;
11069         break;
11070     case 2: /* SHA256SU0 */
11071         feature = ARM_FEATURE_V8_SHA256;
11072         genfn = gen_helper_crypto_sha256su0;
11073         break;
11074     default:
11075         unallocated_encoding(s);
11076         return;
11077     }
11078
11079     if (!arm_dc_feature(s, feature)) {
11080         unallocated_encoding(s);
11081         return;
11082     }
11083
11084     if (!fp_access_check(s)) {
11085         return;
11086     }
11087
11088     tcg_rd_regno = tcg_const_i32(rd << 1);
11089     tcg_rn_regno = tcg_const_i32(rn << 1);
11090
11091     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
11092
11093     tcg_temp_free_i32(tcg_rd_regno);
11094     tcg_temp_free_i32(tcg_rn_regno);
11095 }
11096
11097 /* C3.6 Data processing - SIMD, inc Crypto
11098  *
11099  * As the decode gets a little complex we are using a table based
11100  * approach for this part of the decode.
11101  */
11102 static const AArch64DecodeTable data_proc_simd[] = {
11103     /* pattern  ,  mask     ,  fn                        */
11104     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11105     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11106     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11107     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11108     { 0x0e000400, 0x9fe08400, disas_simd_copy },
11109     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11110     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11111     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11112     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11113     { 0x0e000000, 0xbf208c00, disas_simd_tb },
11114     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11115     { 0x2e000000, 0xbf208400, disas_simd_ext },
11116     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11117     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11118     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11119     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11120     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11121     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11122     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11123     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11124     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11125     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11126     { 0x00000000, 0x00000000, NULL }
11127 };
11128
11129 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11130 {
11131     /* Note that this is called with all non-FP cases from
11132      * table C3-6 so it must UNDEF for entries not specifically
11133      * allocated to instructions in that table.
11134      */
11135     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11136     if (fn) {
11137         fn(s, insn);
11138     } else {
11139         unallocated_encoding(s);
11140     }
11141 }
11142
11143 /* C3.6 Data processing - SIMD and floating point */
11144 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11145 {
11146     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11147         disas_data_proc_fp(s, insn);
11148     } else {
11149         /* SIMD, including crypto */
11150         disas_data_proc_simd(s, insn);
11151     }
11152 }
11153
11154 /* C3.1 A64 instruction index by encoding */
11155 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11156 {
11157     uint32_t insn;
11158
11159     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11160     s->insn = insn;
11161     s->pc += 4;
11162
11163     s->fp_access_checked = false;
11164
11165     switch (extract32(insn, 25, 4)) {
11166     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11167         unallocated_encoding(s);
11168         break;
11169     case 0x8: case 0x9: /* Data processing - immediate */
11170         disas_data_proc_imm(s, insn);
11171         break;
11172     case 0xa: case 0xb: /* Branch, exception generation and system insns */
11173         disas_b_exc_sys(s, insn);
11174         break;
11175     case 0x4:
11176     case 0x6:
11177     case 0xc:
11178     case 0xe:      /* Loads and stores */
11179         disas_ldst(s, insn);
11180         break;
11181     case 0x5:
11182     case 0xd:      /* Data processing - register */
11183         disas_data_proc_reg(s, insn);
11184         break;
11185     case 0x7:
11186     case 0xf:      /* Data processing - SIMD and floating point */
11187         disas_data_proc_simd_fp(s, insn);
11188         break;
11189     default:
11190         assert(FALSE); /* all 15 cases should be handled above */
11191         break;
11192     }
11193
11194     /* if we allocated any temporaries, free them here */
11195     free_tmp_a64(s);
11196 }
11197
11198 void gen_intermediate_code_a64(CPUState *cs, TranslationBlock *tb)
11199 {
11200     CPUARMState *env = cs->env_ptr;
11201     ARMCPU *cpu = arm_env_get_cpu(env);
11202     DisasContext dc1, *dc = &dc1;
11203     target_ulong pc_start;
11204     target_ulong next_page_start;
11205     int num_insns;
11206     int max_insns;
11207
11208     pc_start = tb->pc;
11209
11210     dc->tb = tb;
11211
11212     dc->is_jmp = DISAS_NEXT;
11213     dc->pc = pc_start;
11214     dc->singlestep_enabled = cs->singlestep_enabled;
11215     dc->condjmp = 0;
11216
11217     dc->aarch64 = 1;
11218     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11219      * there is no secure EL1, so we route exceptions to EL3.
11220      */
11221     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11222                                !arm_el_is_aa64(env, 3);
11223     dc->thumb = 0;
11224     dc->sctlr_b = 0;
11225     dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11226     dc->condexec_mask = 0;
11227     dc->condexec_cond = 0;
11228     dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(tb->flags));
11229     dc->tbi0 = ARM_TBFLAG_TBI0(tb->flags);
11230     dc->tbi1 = ARM_TBFLAG_TBI1(tb->flags);
11231     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11232 #if !defined(CONFIG_USER_ONLY)
11233     dc->user = (dc->current_el == 0);
11234 #endif
11235     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11236     dc->vec_len = 0;
11237     dc->vec_stride = 0;
11238     dc->cp_regs = cpu->cp_regs;
11239     dc->features = env->features;
11240
11241     /* Single step state. The code-generation logic here is:
11242      *  SS_ACTIVE == 0:
11243      *   generate code with no special handling for single-stepping (except
11244      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11245      *   this happens anyway because those changes are all system register or
11246      *   PSTATE writes).
11247      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11248      *   emit code for one insn
11249      *   emit code to clear PSTATE.SS
11250      *   emit code to generate software step exception for completed step
11251      *   end TB (as usual for having generated an exception)
11252      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11253      *   emit code to generate a software step exception
11254      *   end the TB
11255      */
11256     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11257     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11258     dc->is_ldex = false;
11259     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11260
11261     init_tmp_a64_array(dc);
11262
11263     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11264     num_insns = 0;
11265     max_insns = tb->cflags & CF_COUNT_MASK;
11266     if (max_insns == 0) {
11267         max_insns = CF_COUNT_MASK;
11268     }
11269     if (max_insns > TCG_MAX_INSNS) {
11270         max_insns = TCG_MAX_INSNS;
11271     }
11272
11273     gen_tb_start(tb);
11274
11275     tcg_clear_temp_count();
11276
11277     do {
11278         dc->insn_start_idx = tcg_op_buf_count();
11279         tcg_gen_insn_start(dc->pc, 0, 0);
11280         num_insns++;
11281
11282         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11283             CPUBreakpoint *bp;
11284             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11285                 if (bp->pc == dc->pc) {
11286                     if (bp->flags & BP_CPU) {
11287                         gen_a64_set_pc_im(dc->pc);
11288                         gen_helper_check_breakpoints(cpu_env);
11289                         /* End the TB early; it likely won't be executed */
11290                         dc->is_jmp = DISAS_UPDATE;
11291                     } else {
11292                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11293                         /* The address covered by the breakpoint must be
11294                            included in [tb->pc, tb->pc + tb->size) in order
11295                            to for it to be properly cleared -- thus we
11296                            increment the PC here so that the logic setting
11297                            tb->size below does the right thing.  */
11298                         dc->pc += 4;
11299                         goto done_generating;
11300                     }
11301                     break;
11302                 }
11303             }
11304         }
11305
11306         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11307             gen_io_start();
11308         }
11309
11310         if (dc->ss_active && !dc->pstate_ss) {
11311             /* Singlestep state is Active-pending.
11312              * If we're in this state at the start of a TB then either
11313              *  a) we just took an exception to an EL which is being debugged
11314              *     and this is the first insn in the exception handler
11315              *  b) debug exceptions were masked and we just unmasked them
11316              *     without changing EL (eg by clearing PSTATE.D)
11317              * In either case we're going to take a swstep exception in the
11318              * "did not step an insn" case, and so the syndrome ISV and EX
11319              * bits should be zero.
11320              */
11321             assert(num_insns == 1);
11322             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11323                           default_exception_el(dc));
11324             dc->is_jmp = DISAS_EXC;
11325             break;
11326         }
11327
11328         disas_a64_insn(env, dc);
11329
11330         if (tcg_check_temp_count()) {
11331             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11332                     dc->pc);
11333         }
11334
11335         /* Translation stops when a conditional branch is encountered.
11336          * Otherwise the subsequent code could get translated several times.
11337          * Also stop translation when a page boundary is reached.  This
11338          * ensures prefetch aborts occur at the right place.
11339          */
11340     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11341              !cs->singlestep_enabled &&
11342              !singlestep &&
11343              !dc->ss_active &&
11344              dc->pc < next_page_start &&
11345              num_insns < max_insns);
11346
11347     if (tb->cflags & CF_LAST_IO) {
11348         gen_io_end();
11349     }
11350
11351     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11352         && dc->is_jmp != DISAS_EXC) {
11353         /* Note that this means single stepping WFI doesn't halt the CPU.
11354          * For conditional branch insns this is harmless unreachable code as
11355          * gen_goto_tb() has already handled emitting the debug exception
11356          * (and thus a tb-jump is not possible when singlestepping).
11357          */
11358         assert(dc->is_jmp != DISAS_TB_JUMP);
11359         if (dc->is_jmp != DISAS_JUMP) {
11360             gen_a64_set_pc_im(dc->pc);
11361         }
11362         if (cs->singlestep_enabled) {
11363             gen_exception_internal(EXCP_DEBUG);
11364         } else {
11365             gen_step_complete_exception(dc);
11366         }
11367     } else {
11368         switch (dc->is_jmp) {
11369         case DISAS_NEXT:
11370             gen_goto_tb(dc, 1, dc->pc);
11371             break;
11372         case DISAS_JUMP:
11373             tcg_gen_lookup_and_goto_ptr(cpu_pc);
11374             break;
11375         case DISAS_TB_JUMP:
11376         case DISAS_EXC:
11377         case DISAS_SWI:
11378             break;
11379         case DISAS_WFE:
11380             gen_a64_set_pc_im(dc->pc);
11381             gen_helper_wfe(cpu_env);
11382             break;
11383         case DISAS_YIELD:
11384             gen_a64_set_pc_im(dc->pc);
11385             gen_helper_yield(cpu_env);
11386             break;
11387         case DISAS_WFI:
11388             /* This is a special case because we don't want to just halt the CPU
11389              * if trying to debug across a WFI.
11390              */
11391             gen_a64_set_pc_im(dc->pc);
11392             gen_helper_wfi(cpu_env);
11393             /* The helper doesn't necessarily throw an exception, but we
11394              * must go back to the main loop to check for interrupts anyway.
11395              */
11396             tcg_gen_exit_tb(0);
11397             break;
11398         case DISAS_UPDATE:
11399             gen_a64_set_pc_im(dc->pc);
11400             /* fall through */
11401         case DISAS_EXIT:
11402         default:
11403             tcg_gen_exit_tb(0);
11404             break;
11405         }
11406     }
11407
11408 done_generating:
11409     gen_tb_end(tb, num_insns);
11410
11411 #ifdef DEBUG_DISAS
11412     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11413         qemu_log_in_addr_range(pc_start)) {
11414         qemu_log_lock();
11415         qemu_log("----------------\n");
11416         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11417         log_target_disas(cs, pc_start, dc->pc - pc_start,
11418                          4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11419         qemu_log("\n");
11420         qemu_log_unlock();
11421     }
11422 #endif
11423     tb->size = dc->pc - pc_start;
11424     tb->icount = num_insns;
11425 }