target/arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "qemu/log.h"
  26 #include "arm_ldst.h"
  27 #include "translate.h"
  28 #include "internals.h"
  29 #include "qemu/host-utils.h"
  30
  31 #include "exec/semihost.h"
  32 #include "exec/gen-icount.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36 #include "exec/log.h"
  37
  38 #include "trace-tcg.h"
  39
  40 static TCGv_i64 cpu_X[32];
  41 static TCGv_i64 cpu_pc;
  42
  43 /* Load/store exclusive handling */
  44 static TCGv_i64 cpu_exclusive_high;
  45 static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  46
  47 static const char *regnames[] = {
  48     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52 };
  53
  54 enum a64_shift_type {
  55     A64_SHIFT_TYPE_LSL = 0,
  56     A64_SHIFT_TYPE_LSR = 1,
  57     A64_SHIFT_TYPE_ASR = 2,
  58     A64_SHIFT_TYPE_ROR = 3
  59 };
  60
  61 /* Table based decoder typedefs - used when the relevant bits for decode
  62  * are too awkwardly scattered across the instruction (eg SIMD).
  63  */
  64 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66 typedef struct AArch64DecodeTable {
  67     uint32_t pattern;
  68     uint32_t mask;
  69     AArch64DecodeFn *disas_fn;
  70 } AArch64DecodeTable;
  71
  72 /* Function prototype for gen_ functions for calling Neon helpers */
  73 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84 typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  85 typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  86 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  87
  88 /* Note that the gvec expanders operate on offsets + sizes.  */
  89 typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
  90                         uint32_t, uint32_t, uint32_t);
  91
  92 /* initialize TCG globals.  */
  93 void a64_translate_init(void)
  94 {
  95     int i;
  96
  97     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  98                                     offsetof(CPUARMState, pc),
  99                                     "pc");
 100     for (i = 0; i < 32; i++) {
 101         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
 102                                           offsetof(CPUARMState, xregs[i]),
 103                                           regnames[i]);
 104     }
 105
 106     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 107         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 108 }
 109
 110 static inline int get_a64_user_mem_index(DisasContext *s)
 111 {
 112     /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 113      *  if EL1, access as if EL0; otherwise access at current EL
 114      */
 115     ARMMMUIdx useridx;
 116
 117     switch (s->mmu_idx) {
 118     case ARMMMUIdx_S12NSE1:
 119         useridx = ARMMMUIdx_S12NSE0;
 120         break;
 121     case ARMMMUIdx_S1SE1:
 122         useridx = ARMMMUIdx_S1SE0;
 123         break;
 124     case ARMMMUIdx_S2NS:
 125         g_assert_not_reached();
 126     default:
 127         useridx = s->mmu_idx;
 128         break;
 129     }
 130     return arm_to_core_mmu_idx(useridx);
 131 }
 132
 133 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 134                             fprintf_function cpu_fprintf, int flags)
 135 {
 136     ARMCPU *cpu = ARM_CPU(cs);
 137     CPUARMState *env = &cpu->env;
 138     uint32_t psr = pstate_read(env);
 139     int i;
 140     int el = arm_current_el(env);
 141     const char *ns_status;
 142
 143     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 144             env->pc, env->xregs[31]);
 145     for (i = 0; i < 31; i++) {
 146         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 147         if ((i % 4) == 3) {
 148             cpu_fprintf(f, "\n");
 149         } else {
 150             cpu_fprintf(f, " ");
 151         }
 152     }
 153
 154     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 155         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 156     } else {
 157         ns_status = "";
 158     }
 159
 160     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 161                 psr,
 162                 psr & PSTATE_N ? 'N' : '-',
 163                 psr & PSTATE_Z ? 'Z' : '-',
 164                 psr & PSTATE_C ? 'C' : '-',
 165                 psr & PSTATE_V ? 'V' : '-',
 166                 ns_status,
 167                 el,
 168                 psr & PSTATE_SP ? 'h' : 't');
 169
 170     if (flags & CPU_DUMP_FPU) {
 171         int numvfpregs = 32;
 172         for (i = 0; i < numvfpregs; i++) {
 173             uint64_t *q = aa64_vfp_qreg(env, i);
 174             uint64_t vlo = q[0];
 175             uint64_t vhi = q[1];
 176             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "%c",
 177                         i, vhi, vlo, (i & 1 ? '\n' : ' '));
 178         }
 179         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 180                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 181     }
 182 }
 183
 184 void gen_a64_set_pc_im(uint64_t val)
 185 {
 186     tcg_gen_movi_i64(cpu_pc, val);
 187 }
 188
 189 /* Load the PC from a generic TCG variable.
 190  *
 191  * If address tagging is enabled via the TCR TBI bits, then loading
 192  * an address into the PC will clear out any tag in the it:
 193  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 194  *    then the address is zero-extended, clearing bits [63:56]
 195  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 196  *    and TBI1 controls addressses with bit 55 == 1.
 197  *    If the appropriate TBI bit is set for the address then
 198  *    the address is sign-extended from bit 55 into bits [63:56]
 199  *
 200  * We can avoid doing this for relative-branches, because the
 201  * PC + offset can never overflow into the tag bits (assuming
 202  * that virtual addresses are less than 56 bits wide, as they
 203  * are currently), but we must handle it for branch-to-register.
 204  */
 205 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 206 {
 207
 208     if (s->current_el <= 1) {
 209         /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 210          * examine bit 55 of address, can just generate code.
 211          * If mixed, then test via generated code
 212          */
 213         if (s->tbi0 && s->tbi1) {
 214             TCGv_i64 tmp_reg = tcg_temp_new_i64();
 215             /* Both bits set, sign extension from bit 55 into [63:56] will
 216              * cover both cases
 217              */
 218             tcg_gen_shli_i64(tmp_reg, src, 8);
 219             tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 220             tcg_temp_free_i64(tmp_reg);
 221         } else if (!s->tbi0 && !s->tbi1) {
 222             /* Neither bit set, just load it as-is */
 223             tcg_gen_mov_i64(cpu_pc, src);
 224         } else {
 225             TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 226             TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 227             TCGv_i64 tcg_zero   = tcg_const_i64(0);
 228
 229             tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 230
 231             if (s->tbi0) {
 232                 /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 233                 tcg_gen_andi_i64(tcg_tmpval, src,
 234                                  0x00FFFFFFFFFFFFFFull);
 235                 tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 236                                     tcg_tmpval, src);
 237             } else {
 238                 /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 239                 tcg_gen_ori_i64(tcg_tmpval, src,
 240                                 0xFF00000000000000ull);
 241                 tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 242                                     tcg_tmpval, src);
 243             }
 244             tcg_temp_free_i64(tcg_zero);
 245             tcg_temp_free_i64(tcg_bit55);
 246             tcg_temp_free_i64(tcg_tmpval);
 247         }
 248     } else {  /* EL > 1 */
 249         if (s->tbi0) {
 250             /* Force tag byte to all zero */
 251             tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 252         } else {
 253             /* Load unmodified address */
 254             tcg_gen_mov_i64(cpu_pc, src);
 255         }
 256     }
 257 }
 258
 259 typedef struct DisasCompare64 {
 260     TCGCond cond;
 261     TCGv_i64 value;
 262 } DisasCompare64;
 263
 264 static void a64_test_cc(DisasCompare64 *c64, int cc)
 265 {
 266     DisasCompare c32;
 267
 268     arm_test_cc(&c32, cc);
 269
 270     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 271        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 272     c64->cond = c32.cond;
 273     c64->value = tcg_temp_new_i64();
 274     tcg_gen_ext_i32_i64(c64->value, c32.value);
 275
 276     arm_free_cc(&c32);
 277 }
 278
 279 static void a64_free_cc(DisasCompare64 *c64)
 280 {
 281     tcg_temp_free_i64(c64->value);
 282 }
 283
 284 static void gen_exception_internal(int excp)
 285 {
 286     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 287
 288     assert(excp_is_internal(excp));
 289     gen_helper_exception_internal(cpu_env, tcg_excp);
 290     tcg_temp_free_i32(tcg_excp);
 291 }
 292
 293 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 294 {
 295     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 296     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 297     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 298
 299     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 300                                        tcg_syn, tcg_el);
 301     tcg_temp_free_i32(tcg_el);
 302     tcg_temp_free_i32(tcg_syn);
 303     tcg_temp_free_i32(tcg_excp);
 304 }
 305
 306 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 307 {
 308     gen_a64_set_pc_im(s->pc - offset);
 309     gen_exception_internal(excp);
 310     s->base.is_jmp = DISAS_NORETURN;
 311 }
 312
 313 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 314                                uint32_t syndrome, uint32_t target_el)
 315 {
 316     gen_a64_set_pc_im(s->pc - offset);
 317     gen_exception(excp, syndrome, target_el);
 318     s->base.is_jmp = DISAS_NORETURN;
 319 }
 320
 321 static void gen_ss_advance(DisasContext *s)
 322 {
 323     /* If the singlestep state is Active-not-pending, advance to
 324      * Active-pending.
 325      */
 326     if (s->ss_active) {
 327         s->pstate_ss = 0;
 328         gen_helper_clear_pstate_ss(cpu_env);
 329     }
 330 }
 331
 332 static void gen_step_complete_exception(DisasContext *s)
 333 {
 334     /* We just completed step of an insn. Move from Active-not-pending
 335      * to Active-pending, and then also take the swstep exception.
 336      * This corresponds to making the (IMPDEF) choice to prioritize
 337      * swstep exceptions over asynchronous exceptions taken to an exception
 338      * level where debug is disabled. This choice has the advantage that
 339      * we do not need to maintain internal state corresponding to the
 340      * ISV/EX syndrome bits between completion of the step and generation
 341      * of the exception, and our syndrome information is always correct.
 342      */
 343     gen_ss_advance(s);
 344     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 345                   default_exception_el(s));
 346     s->base.is_jmp = DISAS_NORETURN;
 347 }
 348
 349 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 350 {
 351     /* No direct tb linking with singlestep (either QEMU's or the ARM
 352      * debug architecture kind) or deterministic io
 353      */
 354     if (s->base.singlestep_enabled || s->ss_active ||
 355         (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 356         return false;
 357     }
 358
 359 #ifndef CONFIG_USER_ONLY
 360     /* Only link tbs from inside the same guest page */
 361     if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 362         return false;
 363     }
 364 #endif
 365
 366     return true;
 367 }
 368
 369 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 370 {
 371     TranslationBlock *tb;
 372
 373     tb = s->base.tb;
 374     if (use_goto_tb(s, n, dest)) {
 375         tcg_gen_goto_tb(n);
 376         gen_a64_set_pc_im(dest);
 377         tcg_gen_exit_tb((intptr_t)tb + n);
 378         s->base.is_jmp = DISAS_NORETURN;
 379     } else {
 380         gen_a64_set_pc_im(dest);
 381         if (s->ss_active) {
 382             gen_step_complete_exception(s);
 383         } else if (s->base.singlestep_enabled) {
 384             gen_exception_internal(EXCP_DEBUG);
 385         } else {
 386             tcg_gen_lookup_and_goto_ptr();
 387             s->base.is_jmp = DISAS_NORETURN;
 388         }
 389     }
 390 }
 391
 392 static void unallocated_encoding(DisasContext *s)
 393 {
 394     /* Unallocated and reserved encodings are uncategorized */
 395     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 396                        default_exception_el(s));
 397 }
 398
 399 #define unsupported_encoding(s, insn)                                    \
 400     do {                                                                 \
 401         qemu_log_mask(LOG_UNIMP,                                         \
 402                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 403                       "at pc=%016" PRIx64 "\n",                          \
 404                       __FILE__, __LINE__, insn, s->pc - 4);              \
 405         unallocated_encoding(s);                                         \
 406     } while (0)
 407
 408 static void init_tmp_a64_array(DisasContext *s)
 409 {
 410 #ifdef CONFIG_DEBUG_TCG
 411     memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 412 #endif
 413     s->tmp_a64_count = 0;
 414 }
 415
 416 static void free_tmp_a64(DisasContext *s)
 417 {
 418     int i;
 419     for (i = 0; i < s->tmp_a64_count; i++) {
 420         tcg_temp_free_i64(s->tmp_a64[i]);
 421     }
 422     init_tmp_a64_array(s);
 423 }
 424
 425 static TCGv_i64 new_tmp_a64(DisasContext *s)
 426 {
 427     assert(s->tmp_a64_count < TMP_A64_MAX);
 428     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 429 }
 430
 431 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 432 {
 433     TCGv_i64 t = new_tmp_a64(s);
 434     tcg_gen_movi_i64(t, 0);
 435     return t;
 436 }
 437
 438 /*
 439  * Register access functions
 440  *
 441  * These functions are used for directly accessing a register in where
 442  * changes to the final register value are likely to be made. If you
 443  * need to use a register for temporary calculation (e.g. index type
 444  * operations) use the read_* form.
 445  *
 446  * B1.2.1 Register mappings
 447  *
 448  * In instruction register encoding 31 can refer to ZR (zero register) or
 449  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 450  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 451  * This is the point of the _sp forms.
 452  */
 453 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 454 {
 455     if (reg == 31) {
 456         return new_tmp_a64_zero(s);
 457     } else {
 458         return cpu_X[reg];
 459     }
 460 }
 461
 462 /* register access for when 31 == SP */
 463 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 464 {
 465     return cpu_X[reg];
 466 }
 467
 468 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 469  * representing the register contents. This TCGv is an auto-freed
 470  * temporary so it need not be explicitly freed, and may be modified.
 471  */
 472 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 473 {
 474     TCGv_i64 v = new_tmp_a64(s);
 475     if (reg != 31) {
 476         if (sf) {
 477             tcg_gen_mov_i64(v, cpu_X[reg]);
 478         } else {
 479             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 480         }
 481     } else {
 482         tcg_gen_movi_i64(v, 0);
 483     }
 484     return v;
 485 }
 486
 487 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 488 {
 489     TCGv_i64 v = new_tmp_a64(s);
 490     if (sf) {
 491         tcg_gen_mov_i64(v, cpu_X[reg]);
 492     } else {
 493         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 494     }
 495     return v;
 496 }
 497
 498 /* We should have at some point before trying to access an FP register
 499  * done the necessary access check, so assert that
 500  * (a) we did the check and
 501  * (b) we didn't then just plough ahead anyway if it failed.
 502  * Print the instruction pattern in the abort message so we can figure
 503  * out what we need to fix if a user encounters this problem in the wild.
 504  */
 505 static inline void assert_fp_access_checked(DisasContext *s)
 506 {
 507 #ifdef CONFIG_DEBUG_TCG
 508     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 509         fprintf(stderr, "target-arm: FP access check missing for "
 510                 "instruction 0x%08x\n", s->insn);
 511         abort();
 512     }
 513 #endif
 514 }
 515
 516 /* Return the offset into CPUARMState of an element of specified
 517  * size, 'element' places in from the least significant end of
 518  * the FP/vector register Qn.
 519  */
 520 static inline int vec_reg_offset(DisasContext *s, int regno,
 521                                  int element, TCGMemOp size)
 522 {
 523     int offs = 0;
 524 #ifdef HOST_WORDS_BIGENDIAN
 525     /* This is complicated slightly because vfp.regs[2n] is
 526      * still the low half and  vfp.regs[2n+1] the high half
 527      * of the 128 bit vector, even on big endian systems.
 528      * Calculate the offset assuming a fully bigendian 128 bits,
 529      * then XOR to account for the order of the two 64 bit halves.
 530      */
 531     offs += (16 - ((element + 1) * (1 << size)));
 532     offs ^= 8;
 533 #else
 534     offs += element * (1 << size);
 535 #endif
 536     offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
 537     assert_fp_access_checked(s);
 538     return offs;
 539 }
 540
 541 /* Return the offset info CPUARMState of the "whole" vector register Qn.  */
 542 static inline int vec_full_reg_offset(DisasContext *s, int regno)
 543 {
 544     assert_fp_access_checked(s);
 545     return offsetof(CPUARMState, vfp.regs[regno * 2]);
 546 }
 547
 548 /* Return a newly allocated pointer to the vector register.  */
 549 static TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
 550 {
 551     TCGv_ptr ret = tcg_temp_new_ptr();
 552     tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno));
 553     return ret;
 554 }
 555
 556 /* Return the byte size of the "whole" vector register, VL / 8.  */
 557 static inline int vec_full_reg_size(DisasContext *s)
 558 {
 559     /* FIXME SVE: We should put the composite ZCR_EL* value into tb->flags.
 560        In the meantime this is just the AdvSIMD length of 128.  */
 561     return 128 / 8;
 562 }
 563
 564 /* Return the offset into CPUARMState of a slice (from
 565  * the least significant end) of FP register Qn (ie
 566  * Dn, Sn, Hn or Bn).
 567  * (Note that this is not the same mapping as for A32; see cpu.h)
 568  */
 569 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 570 {
 571     return vec_reg_offset(s, regno, 0, size);
 572 }
 573
 574 /* Offset of the high half of the 128 bit vector Qn */
 575 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 576 {
 577     return vec_reg_offset(s, regno, 1, MO_64);
 578 }
 579
 580 /* Convenience accessors for reading and writing single and double
 581  * FP registers. Writing clears the upper parts of the associated
 582  * 128 bit vector register, as required by the architecture.
 583  * Note that unlike the GP register accessors, the values returned
 584  * by the read functions must be manually freed.
 585  */
 586 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 587 {
 588     TCGv_i64 v = tcg_temp_new_i64();
 589
 590     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 591     return v;
 592 }
 593
 594 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 595 {
 596     TCGv_i32 v = tcg_temp_new_i32();
 597
 598     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 599     return v;
 600 }
 601
 602 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 603 {
 604     TCGv_i64 tcg_zero = tcg_const_i64(0);
 605
 606     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 607     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 608     tcg_temp_free_i64(tcg_zero);
 609 }
 610
 611 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 612 {
 613     TCGv_i64 tmp = tcg_temp_new_i64();
 614
 615     tcg_gen_extu_i32_i64(tmp, v);
 616     write_fp_dreg(s, reg, tmp);
 617     tcg_temp_free_i64(tmp);
 618 }
 619
 620 static TCGv_ptr get_fpstatus_ptr(void)
 621 {
 622     TCGv_ptr statusptr = tcg_temp_new_ptr();
 623     int offset;
 624
 625     /* In A64 all instructions (both FP and Neon) use the FPCR;
 626      * there is no equivalent of the A32 Neon "standard FPSCR value"
 627      * and all operations use vfp.fp_status.
 628      */
 629     offset = offsetof(CPUARMState, vfp.fp_status);
 630     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 631     return statusptr;
 632 }
 633
 634 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 635 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 636                          GVecGen3Fn *gvec_fn, int vece)
 637 {
 638     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 639             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 640 }
 641
 642 /* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 643 static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 644                          int rn, int rm, const GVecGen3 *gvec_op)
 645 {
 646     tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 647                    vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 648                    vec_full_reg_size(s), gvec_op);
 649 }
 650
 651 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 652  * than the 32 bit equivalent.
 653  */
 654 static inline void gen_set_NZ64(TCGv_i64 result)
 655 {
 656     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 657     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 658 }
 659
 660 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 661 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 662 {
 663     if (sf) {
 664         gen_set_NZ64(result);
 665     } else {
 666         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 667         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 668     }
 669     tcg_gen_movi_i32(cpu_CF, 0);
 670     tcg_gen_movi_i32(cpu_VF, 0);
 671 }
 672
 673 /* dest = T0 + T1; compute C, N, V and Z flags */
 674 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 675 {
 676     if (sf) {
 677         TCGv_i64 result, flag, tmp;
 678         result = tcg_temp_new_i64();
 679         flag = tcg_temp_new_i64();
 680         tmp = tcg_temp_new_i64();
 681
 682         tcg_gen_movi_i64(tmp, 0);
 683         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 684
 685         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 686
 687         gen_set_NZ64(result);
 688
 689         tcg_gen_xor_i64(flag, result, t0);
 690         tcg_gen_xor_i64(tmp, t0, t1);
 691         tcg_gen_andc_i64(flag, flag, tmp);
 692         tcg_temp_free_i64(tmp);
 693         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 694
 695         tcg_gen_mov_i64(dest, result);
 696         tcg_temp_free_i64(result);
 697         tcg_temp_free_i64(flag);
 698     } else {
 699         /* 32 bit arithmetic */
 700         TCGv_i32 t0_32 = tcg_temp_new_i32();
 701         TCGv_i32 t1_32 = tcg_temp_new_i32();
 702         TCGv_i32 tmp = tcg_temp_new_i32();
 703
 704         tcg_gen_movi_i32(tmp, 0);
 705         tcg_gen_extrl_i64_i32(t0_32, t0);
 706         tcg_gen_extrl_i64_i32(t1_32, t1);
 707         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 708         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 709         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 710         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 711         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 712         tcg_gen_extu_i32_i64(dest, cpu_NF);
 713
 714         tcg_temp_free_i32(tmp);
 715         tcg_temp_free_i32(t0_32);
 716         tcg_temp_free_i32(t1_32);
 717     }
 718 }
 719
 720 /* dest = T0 - T1; compute C, N, V and Z flags */
 721 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 722 {
 723     if (sf) {
 724         /* 64 bit arithmetic */
 725         TCGv_i64 result, flag, tmp;
 726
 727         result = tcg_temp_new_i64();
 728         flag = tcg_temp_new_i64();
 729         tcg_gen_sub_i64(result, t0, t1);
 730
 731         gen_set_NZ64(result);
 732
 733         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 734         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 735
 736         tcg_gen_xor_i64(flag, result, t0);
 737         tmp = tcg_temp_new_i64();
 738         tcg_gen_xor_i64(tmp, t0, t1);
 739         tcg_gen_and_i64(flag, flag, tmp);
 740         tcg_temp_free_i64(tmp);
 741         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 742         tcg_gen_mov_i64(dest, result);
 743         tcg_temp_free_i64(flag);
 744         tcg_temp_free_i64(result);
 745     } else {
 746         /* 32 bit arithmetic */
 747         TCGv_i32 t0_32 = tcg_temp_new_i32();
 748         TCGv_i32 t1_32 = tcg_temp_new_i32();
 749         TCGv_i32 tmp;
 750
 751         tcg_gen_extrl_i64_i32(t0_32, t0);
 752         tcg_gen_extrl_i64_i32(t1_32, t1);
 753         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 754         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 755         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 756         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 757         tmp = tcg_temp_new_i32();
 758         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 759         tcg_temp_free_i32(t0_32);
 760         tcg_temp_free_i32(t1_32);
 761         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 762         tcg_temp_free_i32(tmp);
 763         tcg_gen_extu_i32_i64(dest, cpu_NF);
 764     }
 765 }
 766
 767 /* dest = T0 + T1 + CF; do not compute flags. */
 768 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 769 {
 770     TCGv_i64 flag = tcg_temp_new_i64();
 771     tcg_gen_extu_i32_i64(flag, cpu_CF);
 772     tcg_gen_add_i64(dest, t0, t1);
 773     tcg_gen_add_i64(dest, dest, flag);
 774     tcg_temp_free_i64(flag);
 775
 776     if (!sf) {
 777         tcg_gen_ext32u_i64(dest, dest);
 778     }
 779 }
 780
 781 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 782 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 783 {
 784     if (sf) {
 785         TCGv_i64 result, cf_64, vf_64, tmp;
 786         result = tcg_temp_new_i64();
 787         cf_64 = tcg_temp_new_i64();
 788         vf_64 = tcg_temp_new_i64();
 789         tmp = tcg_const_i64(0);
 790
 791         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 792         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 793         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 794         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 795         gen_set_NZ64(result);
 796
 797         tcg_gen_xor_i64(vf_64, result, t0);
 798         tcg_gen_xor_i64(tmp, t0, t1);
 799         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 800         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 801
 802         tcg_gen_mov_i64(dest, result);
 803
 804         tcg_temp_free_i64(tmp);
 805         tcg_temp_free_i64(vf_64);
 806         tcg_temp_free_i64(cf_64);
 807         tcg_temp_free_i64(result);
 808     } else {
 809         TCGv_i32 t0_32, t1_32, tmp;
 810         t0_32 = tcg_temp_new_i32();
 811         t1_32 = tcg_temp_new_i32();
 812         tmp = tcg_const_i32(0);
 813
 814         tcg_gen_extrl_i64_i32(t0_32, t0);
 815         tcg_gen_extrl_i64_i32(t1_32, t1);
 816         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 817         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 818
 819         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 820         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 821         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 822         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 823         tcg_gen_extu_i32_i64(dest, cpu_NF);
 824
 825         tcg_temp_free_i32(tmp);
 826         tcg_temp_free_i32(t1_32);
 827         tcg_temp_free_i32(t0_32);
 828     }
 829 }
 830
 831 /*
 832  * Load/Store generators
 833  */
 834
 835 /*
 836  * Store from GPR register to memory.
 837  */
 838 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 839                              TCGv_i64 tcg_addr, int size, int memidx,
 840                              bool iss_valid,
 841                              unsigned int iss_srt,
 842                              bool iss_sf, bool iss_ar)
 843 {
 844     g_assert(size <= 3);
 845     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 846
 847     if (iss_valid) {
 848         uint32_t syn;
 849
 850         syn = syn_data_abort_with_iss(0,
 851                                       size,
 852                                       false,
 853                                       iss_srt,
 854                                       iss_sf,
 855                                       iss_ar,
 856                                       0, 0, 0, 0, 0, false);
 857         disas_set_insn_syndrome(s, syn);
 858     }
 859 }
 860
 861 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 862                       TCGv_i64 tcg_addr, int size,
 863                       bool iss_valid,
 864                       unsigned int iss_srt,
 865                       bool iss_sf, bool iss_ar)
 866 {
 867     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 868                      iss_valid, iss_srt, iss_sf, iss_ar);
 869 }
 870
 871 /*
 872  * Load from memory to GPR register
 873  */
 874 static void do_gpr_ld_memidx(DisasContext *s,
 875                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 876                              int size, bool is_signed,
 877                              bool extend, int memidx,
 878                              bool iss_valid, unsigned int iss_srt,
 879                              bool iss_sf, bool iss_ar)
 880 {
 881     TCGMemOp memop = s->be_data + size;
 882
 883     g_assert(size <= 3);
 884
 885     if (is_signed) {
 886         memop += MO_SIGN;
 887     }
 888
 889     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 890
 891     if (extend && is_signed) {
 892         g_assert(size < 3);
 893         tcg_gen_ext32u_i64(dest, dest);
 894     }
 895
 896     if (iss_valid) {
 897         uint32_t syn;
 898
 899         syn = syn_data_abort_with_iss(0,
 900                                       size,
 901                                       is_signed,
 902                                       iss_srt,
 903                                       iss_sf,
 904                                       iss_ar,
 905                                       0, 0, 0, 0, 0, false);
 906         disas_set_insn_syndrome(s, syn);
 907     }
 908 }
 909
 910 static void do_gpr_ld(DisasContext *s,
 911                       TCGv_i64 dest, TCGv_i64 tcg_addr,
 912                       int size, bool is_signed, bool extend,
 913                       bool iss_valid, unsigned int iss_srt,
 914                       bool iss_sf, bool iss_ar)
 915 {
 916     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 917                      get_mem_index(s),
 918                      iss_valid, iss_srt, iss_sf, iss_ar);
 919 }
 920
 921 /*
 922  * Store from FP register to memory
 923  */
 924 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 925 {
 926     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 927     TCGv_i64 tmp = tcg_temp_new_i64();
 928     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 929     if (size < 4) {
 930         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 931                             s->be_data + size);
 932     } else {
 933         bool be = s->be_data == MO_BE;
 934         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 935
 936         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 937         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 938                             s->be_data | MO_Q);
 939         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 940         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 941                             s->be_data | MO_Q);
 942         tcg_temp_free_i64(tcg_hiaddr);
 943     }
 944
 945     tcg_temp_free_i64(tmp);
 946 }
 947
 948 /*
 949  * Load from memory to FP register
 950  */
 951 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 952 {
 953     /* This always zero-extends and writes to a full 128 bit wide vector */
 954     TCGv_i64 tmplo = tcg_temp_new_i64();
 955     TCGv_i64 tmphi;
 956
 957     if (size < 4) {
 958         TCGMemOp memop = s->be_data + size;
 959         tmphi = tcg_const_i64(0);
 960         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 961     } else {
 962         bool be = s->be_data == MO_BE;
 963         TCGv_i64 tcg_hiaddr;
 964
 965         tmphi = tcg_temp_new_i64();
 966         tcg_hiaddr = tcg_temp_new_i64();
 967
 968         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 969         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 970                             s->be_data | MO_Q);
 971         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 972                             s->be_data | MO_Q);
 973         tcg_temp_free_i64(tcg_hiaddr);
 974     }
 975
 976     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 977     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 978
 979     tcg_temp_free_i64(tmplo);
 980     tcg_temp_free_i64(tmphi);
 981 }
 982
 983 /*
 984  * Vector load/store helpers.
 985  *
 986  * The principal difference between this and a FP load is that we don't
 987  * zero extend as we are filling a partial chunk of the vector register.
 988  * These functions don't support 128 bit loads/stores, which would be
 989  * normal load/store operations.
 990  *
 991  * The _i32 versions are useful when operating on 32 bit quantities
 992  * (eg for floating point single or using Neon helper functions).
 993  */
 994
 995 /* Get value of an element within a vector register */
 996 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 997                              int element, TCGMemOp memop)
 998 {
 999     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1000     switch (memop) {
1001     case MO_8:
1002         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1003         break;
1004     case MO_16:
1005         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1006         break;
1007     case MO_32:
1008         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1009         break;
1010     case MO_8|MO_SIGN:
1011         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1012         break;
1013     case MO_16|MO_SIGN:
1014         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1015         break;
1016     case MO_32|MO_SIGN:
1017         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1018         break;
1019     case MO_64:
1020     case MO_64|MO_SIGN:
1021         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1022         break;
1023     default:
1024         g_assert_not_reached();
1025     }
1026 }
1027
1028 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1029                                  int element, TCGMemOp memop)
1030 {
1031     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1032     switch (memop) {
1033     case MO_8:
1034         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1035         break;
1036     case MO_16:
1037         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1038         break;
1039     case MO_8|MO_SIGN:
1040         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1041         break;
1042     case MO_16|MO_SIGN:
1043         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1044         break;
1045     case MO_32:
1046     case MO_32|MO_SIGN:
1047         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1048         break;
1049     default:
1050         g_assert_not_reached();
1051     }
1052 }
1053
1054 /* Set value of an element within a vector register */
1055 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1056                               int element, TCGMemOp memop)
1057 {
1058     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1059     switch (memop) {
1060     case MO_8:
1061         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1062         break;
1063     case MO_16:
1064         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1065         break;
1066     case MO_32:
1067         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1068         break;
1069     case MO_64:
1070         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1071         break;
1072     default:
1073         g_assert_not_reached();
1074     }
1075 }
1076
1077 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1078                                   int destidx, int element, TCGMemOp memop)
1079 {
1080     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1081     switch (memop) {
1082     case MO_8:
1083         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1084         break;
1085     case MO_16:
1086         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1087         break;
1088     case MO_32:
1089         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1090         break;
1091     default:
1092         g_assert_not_reached();
1093     }
1094 }
1095
1096 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
1097  * vector ops all need to do this).
1098  */
1099 static void clear_vec_high(DisasContext *s, int rd)
1100 {
1101     TCGv_i64 tcg_zero = tcg_const_i64(0);
1102
1103     write_vec_element(s, tcg_zero, rd, 1, MO_64);
1104     tcg_temp_free_i64(tcg_zero);
1105 }
1106
1107 /* Store from vector register to memory */
1108 static void do_vec_st(DisasContext *s, int srcidx, int element,
1109                       TCGv_i64 tcg_addr, int size)
1110 {
1111     TCGMemOp memop = s->be_data + size;
1112     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1113
1114     read_vec_element(s, tcg_tmp, srcidx, element, size);
1115     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1116
1117     tcg_temp_free_i64(tcg_tmp);
1118 }
1119
1120 /* Load from memory to vector register */
1121 static void do_vec_ld(DisasContext *s, int destidx, int element,
1122                       TCGv_i64 tcg_addr, int size)
1123 {
1124     TCGMemOp memop = s->be_data + size;
1125     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1126
1127     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1128     write_vec_element(s, tcg_tmp, destidx, element, size);
1129
1130     tcg_temp_free_i64(tcg_tmp);
1131 }
1132
1133 /* Check that FP/Neon access is enabled. If it is, return
1134  * true. If not, emit code to generate an appropriate exception,
1135  * and return false; the caller should not emit any code for
1136  * the instruction. Note that this check must happen after all
1137  * unallocated-encoding checks (otherwise the syndrome information
1138  * for the resulting exception will be incorrect).
1139  */
1140 static inline bool fp_access_check(DisasContext *s)
1141 {
1142     assert(!s->fp_access_checked);
1143     s->fp_access_checked = true;
1144
1145     if (!s->fp_excp_el) {
1146         return true;
1147     }
1148
1149     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1150                        s->fp_excp_el);
1151     return false;
1152 }
1153
1154 /*
1155  * This utility function is for doing register extension with an
1156  * optional shift. You will likely want to pass a temporary for the
1157  * destination register. See DecodeRegExtend() in the ARM ARM.
1158  */
1159 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1160                               int option, unsigned int shift)
1161 {
1162     int extsize = extract32(option, 0, 2);
1163     bool is_signed = extract32(option, 2, 1);
1164
1165     if (is_signed) {
1166         switch (extsize) {
1167         case 0:
1168             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1169             break;
1170         case 1:
1171             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1172             break;
1173         case 2:
1174             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1175             break;
1176         case 3:
1177             tcg_gen_mov_i64(tcg_out, tcg_in);
1178             break;
1179         }
1180     } else {
1181         switch (extsize) {
1182         case 0:
1183             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1184             break;
1185         case 1:
1186             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1187             break;
1188         case 2:
1189             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1190             break;
1191         case 3:
1192             tcg_gen_mov_i64(tcg_out, tcg_in);
1193             break;
1194         }
1195     }
1196
1197     if (shift) {
1198         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1199     }
1200 }
1201
1202 static inline void gen_check_sp_alignment(DisasContext *s)
1203 {
1204     /* The AArch64 architecture mandates that (if enabled via PSTATE
1205      * or SCTLR bits) there is a check that SP is 16-aligned on every
1206      * SP-relative load or store (with an exception generated if it is not).
1207      * In line with general QEMU practice regarding misaligned accesses,
1208      * we omit these checks for the sake of guest program performance.
1209      * This function is provided as a hook so we can more easily add these
1210      * checks in future (possibly as a "favour catching guest program bugs
1211      * over speed" user selectable option).
1212      */
1213 }
1214
1215 /*
1216  * This provides a simple table based table lookup decoder. It is
1217  * intended to be used when the relevant bits for decode are too
1218  * awkwardly placed and switch/if based logic would be confusing and
1219  * deeply nested. Since it's a linear search through the table, tables
1220  * should be kept small.
1221  *
1222  * It returns the first handler where insn & mask == pattern, or
1223  * NULL if there is no match.
1224  * The table is terminated by an empty mask (i.e. 0)
1225  */
1226 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1227                                                uint32_t insn)
1228 {
1229     const AArch64DecodeTable *tptr = table;
1230
1231     while (tptr->mask) {
1232         if ((insn & tptr->mask) == tptr->pattern) {
1233             return tptr->disas_fn;
1234         }
1235         tptr++;
1236     }
1237     return NULL;
1238 }
1239
1240 /*
1241  * The instruction disassembly implemented here matches
1242  * the instruction encoding classifications in chapter C4
1243  * of the ARM Architecture Reference Manual (DDI0487B_a);
1244  * classification names and decode diagrams here should generally
1245  * match up with those in the manual.
1246  */
1247
1248 /* Unconditional branch (immediate)
1249  *   31  30       26 25                                  0
1250  * +----+-----------+-------------------------------------+
1251  * | op | 0 0 1 0 1 |                 imm26               |
1252  * +----+-----------+-------------------------------------+
1253  */
1254 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1255 {
1256     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1257
1258     if (insn & (1U << 31)) {
1259         /* BL Branch with link */
1260         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1261     }
1262
1263     /* B Branch / BL Branch with link */
1264     gen_goto_tb(s, 0, addr);
1265 }
1266
1267 /* Compare and branch (immediate)
1268  *   31  30         25  24  23                  5 4      0
1269  * +----+-------------+----+---------------------+--------+
1270  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1271  * +----+-------------+----+---------------------+--------+
1272  */
1273 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1274 {
1275     unsigned int sf, op, rt;
1276     uint64_t addr;
1277     TCGLabel *label_match;
1278     TCGv_i64 tcg_cmp;
1279
1280     sf = extract32(insn, 31, 1);
1281     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1282     rt = extract32(insn, 0, 5);
1283     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1284
1285     tcg_cmp = read_cpu_reg(s, rt, sf);
1286     label_match = gen_new_label();
1287
1288     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1289                         tcg_cmp, 0, label_match);
1290
1291     gen_goto_tb(s, 0, s->pc);
1292     gen_set_label(label_match);
1293     gen_goto_tb(s, 1, addr);
1294 }
1295
1296 /* Test and branch (immediate)
1297  *   31  30         25  24  23   19 18          5 4    0
1298  * +----+-------------+----+-------+-------------+------+
1299  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1300  * +----+-------------+----+-------+-------------+------+
1301  */
1302 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1303 {
1304     unsigned int bit_pos, op, rt;
1305     uint64_t addr;
1306     TCGLabel *label_match;
1307     TCGv_i64 tcg_cmp;
1308
1309     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1310     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1311     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1312     rt = extract32(insn, 0, 5);
1313
1314     tcg_cmp = tcg_temp_new_i64();
1315     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1316     label_match = gen_new_label();
1317     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1318                         tcg_cmp, 0, label_match);
1319     tcg_temp_free_i64(tcg_cmp);
1320     gen_goto_tb(s, 0, s->pc);
1321     gen_set_label(label_match);
1322     gen_goto_tb(s, 1, addr);
1323 }
1324
1325 /* Conditional branch (immediate)
1326  *  31           25  24  23                  5   4  3    0
1327  * +---------------+----+---------------------+----+------+
1328  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1329  * +---------------+----+---------------------+----+------+
1330  */
1331 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1332 {
1333     unsigned int cond;
1334     uint64_t addr;
1335
1336     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1337         unallocated_encoding(s);
1338         return;
1339     }
1340     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1341     cond = extract32(insn, 0, 4);
1342
1343     if (cond < 0x0e) {
1344         /* genuinely conditional branches */
1345         TCGLabel *label_match = gen_new_label();
1346         arm_gen_test_cc(cond, label_match);
1347         gen_goto_tb(s, 0, s->pc);
1348         gen_set_label(label_match);
1349         gen_goto_tb(s, 1, addr);
1350     } else {
1351         /* 0xe and 0xf are both "always" conditions */
1352         gen_goto_tb(s, 0, addr);
1353     }
1354 }
1355
1356 /* HINT instruction group, including various allocated HINTs */
1357 static void handle_hint(DisasContext *s, uint32_t insn,
1358                         unsigned int op1, unsigned int op2, unsigned int crm)
1359 {
1360     unsigned int selector = crm << 3 | op2;
1361
1362     if (op1 != 3) {
1363         unallocated_encoding(s);
1364         return;
1365     }
1366
1367     switch (selector) {
1368     case 0: /* NOP */
1369         return;
1370     case 3: /* WFI */
1371         s->base.is_jmp = DISAS_WFI;
1372         return;
1373         /* When running in MTTCG we don't generate jumps to the yield and
1374          * WFE helpers as it won't affect the scheduling of other vCPUs.
1375          * If we wanted to more completely model WFE/SEV so we don't busy
1376          * spin unnecessarily we would need to do something more involved.
1377          */
1378     case 1: /* YIELD */
1379         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1380             s->base.is_jmp = DISAS_YIELD;
1381         }
1382         return;
1383     case 2: /* WFE */
1384         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1385             s->base.is_jmp = DISAS_WFE;
1386         }
1387         return;
1388     case 4: /* SEV */
1389     case 5: /* SEVL */
1390         /* we treat all as NOP at least for now */
1391         return;
1392     default:
1393         /* default specified as NOP equivalent */
1394         return;
1395     }
1396 }
1397
1398 static void gen_clrex(DisasContext *s, uint32_t insn)
1399 {
1400     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1401 }
1402
1403 /* CLREX, DSB, DMB, ISB */
1404 static void handle_sync(DisasContext *s, uint32_t insn,
1405                         unsigned int op1, unsigned int op2, unsigned int crm)
1406 {
1407     TCGBar bar;
1408
1409     if (op1 != 3) {
1410         unallocated_encoding(s);
1411         return;
1412     }
1413
1414     switch (op2) {
1415     case 2: /* CLREX */
1416         gen_clrex(s, insn);
1417         return;
1418     case 4: /* DSB */
1419     case 5: /* DMB */
1420         switch (crm & 3) {
1421         case 1: /* MBReqTypes_Reads */
1422             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1423             break;
1424         case 2: /* MBReqTypes_Writes */
1425             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1426             break;
1427         default: /* MBReqTypes_All */
1428             bar = TCG_BAR_SC | TCG_MO_ALL;
1429             break;
1430         }
1431         tcg_gen_mb(bar);
1432         return;
1433     case 6: /* ISB */
1434         /* We need to break the TB after this insn to execute
1435          * a self-modified code correctly and also to take
1436          * any pending interrupts immediately.
1437          */
1438         gen_goto_tb(s, 0, s->pc);
1439         return;
1440     default:
1441         unallocated_encoding(s);
1442         return;
1443     }
1444 }
1445
1446 /* MSR (immediate) - move immediate to processor state field */
1447 static void handle_msr_i(DisasContext *s, uint32_t insn,
1448                          unsigned int op1, unsigned int op2, unsigned int crm)
1449 {
1450     int op = op1 << 3 | op2;
1451     switch (op) {
1452     case 0x05: /* SPSel */
1453         if (s->current_el == 0) {
1454             unallocated_encoding(s);
1455             return;
1456         }
1457         /* fall through */
1458     case 0x1e: /* DAIFSet */
1459     case 0x1f: /* DAIFClear */
1460     {
1461         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1462         TCGv_i32 tcg_op = tcg_const_i32(op);
1463         gen_a64_set_pc_im(s->pc - 4);
1464         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1465         tcg_temp_free_i32(tcg_imm);
1466         tcg_temp_free_i32(tcg_op);
1467         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1468         gen_a64_set_pc_im(s->pc);
1469         s->base.is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
1470         break;
1471     }
1472     default:
1473         unallocated_encoding(s);
1474         return;
1475     }
1476 }
1477
1478 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1479 {
1480     TCGv_i32 tmp = tcg_temp_new_i32();
1481     TCGv_i32 nzcv = tcg_temp_new_i32();
1482
1483     /* build bit 31, N */
1484     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1485     /* build bit 30, Z */
1486     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1487     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1488     /* build bit 29, C */
1489     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1490     /* build bit 28, V */
1491     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1492     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1493     /* generate result */
1494     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1495
1496     tcg_temp_free_i32(nzcv);
1497     tcg_temp_free_i32(tmp);
1498 }
1499
1500 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1501
1502 {
1503     TCGv_i32 nzcv = tcg_temp_new_i32();
1504
1505     /* take NZCV from R[t] */
1506     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1507
1508     /* bit 31, N */
1509     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1510     /* bit 30, Z */
1511     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1512     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1513     /* bit 29, C */
1514     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1515     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1516     /* bit 28, V */
1517     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1518     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1519     tcg_temp_free_i32(nzcv);
1520 }
1521
1522 /* MRS - move from system register
1523  * MSR (register) - move to system register
1524  * SYS
1525  * SYSL
1526  * These are all essentially the same insn in 'read' and 'write'
1527  * versions, with varying op0 fields.
1528  */
1529 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1530                        unsigned int op0, unsigned int op1, unsigned int op2,
1531                        unsigned int crn, unsigned int crm, unsigned int rt)
1532 {
1533     const ARMCPRegInfo *ri;
1534     TCGv_i64 tcg_rt;
1535
1536     ri = get_arm_cp_reginfo(s->cp_regs,
1537                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1538                                                crn, crm, op0, op1, op2));
1539
1540     if (!ri) {
1541         /* Unknown register; this might be a guest error or a QEMU
1542          * unimplemented feature.
1543          */
1544         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1545                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1546                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1547         unallocated_encoding(s);
1548         return;
1549     }
1550
1551     /* Check access permissions */
1552     if (!cp_access_ok(s->current_el, ri, isread)) {
1553         unallocated_encoding(s);
1554         return;
1555     }
1556
1557     if (ri->accessfn) {
1558         /* Emit code to perform further access permissions checks at
1559          * runtime; this may result in an exception.
1560          */
1561         TCGv_ptr tmpptr;
1562         TCGv_i32 tcg_syn, tcg_isread;
1563         uint32_t syndrome;
1564
1565         gen_a64_set_pc_im(s->pc - 4);
1566         tmpptr = tcg_const_ptr(ri);
1567         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1568         tcg_syn = tcg_const_i32(syndrome);
1569         tcg_isread = tcg_const_i32(isread);
1570         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1571         tcg_temp_free_ptr(tmpptr);
1572         tcg_temp_free_i32(tcg_syn);
1573         tcg_temp_free_i32(tcg_isread);
1574     }
1575
1576     /* Handle special cases first */
1577     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1578     case ARM_CP_NOP:
1579         return;
1580     case ARM_CP_NZCV:
1581         tcg_rt = cpu_reg(s, rt);
1582         if (isread) {
1583             gen_get_nzcv(tcg_rt);
1584         } else {
1585             gen_set_nzcv(tcg_rt);
1586         }
1587         return;
1588     case ARM_CP_CURRENTEL:
1589         /* Reads as current EL value from pstate, which is
1590          * guaranteed to be constant by the tb flags.
1591          */
1592         tcg_rt = cpu_reg(s, rt);
1593         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1594         return;
1595     case ARM_CP_DC_ZVA:
1596         /* Writes clear the aligned block of memory which rt points into. */
1597         tcg_rt = cpu_reg(s, rt);
1598         gen_helper_dc_zva(cpu_env, tcg_rt);
1599         return;
1600     default:
1601         break;
1602     }
1603
1604     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1605         gen_io_start();
1606     }
1607
1608     tcg_rt = cpu_reg(s, rt);
1609
1610     if (isread) {
1611         if (ri->type & ARM_CP_CONST) {
1612             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1613         } else if (ri->readfn) {
1614             TCGv_ptr tmpptr;
1615             tmpptr = tcg_const_ptr(ri);
1616             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1617             tcg_temp_free_ptr(tmpptr);
1618         } else {
1619             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1620         }
1621     } else {
1622         if (ri->type & ARM_CP_CONST) {
1623             /* If not forbidden by access permissions, treat as WI */
1624             return;
1625         } else if (ri->writefn) {
1626             TCGv_ptr tmpptr;
1627             tmpptr = tcg_const_ptr(ri);
1628             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1629             tcg_temp_free_ptr(tmpptr);
1630         } else {
1631             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1632         }
1633     }
1634
1635     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1636         /* I/O operations must end the TB here (whether read or write) */
1637         gen_io_end();
1638         s->base.is_jmp = DISAS_UPDATE;
1639     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1640         /* We default to ending the TB on a coprocessor register write,
1641          * but allow this to be suppressed by the register definition
1642          * (usually only necessary to work around guest bugs).
1643          */
1644         s->base.is_jmp = DISAS_UPDATE;
1645     }
1646 }
1647
1648 /* System
1649  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1650  * +---------------------+---+-----+-----+-------+-------+-----+------+
1651  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1652  * +---------------------+---+-----+-----+-------+-------+-----+------+
1653  */
1654 static void disas_system(DisasContext *s, uint32_t insn)
1655 {
1656     unsigned int l, op0, op1, crn, crm, op2, rt;
1657     l = extract32(insn, 21, 1);
1658     op0 = extract32(insn, 19, 2);
1659     op1 = extract32(insn, 16, 3);
1660     crn = extract32(insn, 12, 4);
1661     crm = extract32(insn, 8, 4);
1662     op2 = extract32(insn, 5, 3);
1663     rt = extract32(insn, 0, 5);
1664
1665     if (op0 == 0) {
1666         if (l || rt != 31) {
1667             unallocated_encoding(s);
1668             return;
1669         }
1670         switch (crn) {
1671         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1672             handle_hint(s, insn, op1, op2, crm);
1673             break;
1674         case 3: /* CLREX, DSB, DMB, ISB */
1675             handle_sync(s, insn, op1, op2, crm);
1676             break;
1677         case 4: /* MSR (immediate) */
1678             handle_msr_i(s, insn, op1, op2, crm);
1679             break;
1680         default:
1681             unallocated_encoding(s);
1682             break;
1683         }
1684         return;
1685     }
1686     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1687 }
1688
1689 /* Exception generation
1690  *
1691  *  31             24 23 21 20                     5 4   2 1  0
1692  * +-----------------+-----+------------------------+-----+----+
1693  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1694  * +-----------------------+------------------------+----------+
1695  */
1696 static void disas_exc(DisasContext *s, uint32_t insn)
1697 {
1698     int opc = extract32(insn, 21, 3);
1699     int op2_ll = extract32(insn, 0, 5);
1700     int imm16 = extract32(insn, 5, 16);
1701     TCGv_i32 tmp;
1702
1703     switch (opc) {
1704     case 0:
1705         /* For SVC, HVC and SMC we advance the single-step state
1706          * machine before taking the exception. This is architecturally
1707          * mandated, to ensure that single-stepping a system call
1708          * instruction works properly.
1709          */
1710         switch (op2_ll) {
1711         case 1:                                                     /* SVC */
1712             gen_ss_advance(s);
1713             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1714                                default_exception_el(s));
1715             break;
1716         case 2:                                                     /* HVC */
1717             if (s->current_el == 0) {
1718                 unallocated_encoding(s);
1719                 break;
1720             }
1721             /* The pre HVC helper handles cases when HVC gets trapped
1722              * as an undefined insn by runtime configuration.
1723              */
1724             gen_a64_set_pc_im(s->pc - 4);
1725             gen_helper_pre_hvc(cpu_env);
1726             gen_ss_advance(s);
1727             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1728             break;
1729         case 3:                                                     /* SMC */
1730             if (s->current_el == 0) {
1731                 unallocated_encoding(s);
1732                 break;
1733             }
1734             gen_a64_set_pc_im(s->pc - 4);
1735             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1736             gen_helper_pre_smc(cpu_env, tmp);
1737             tcg_temp_free_i32(tmp);
1738             gen_ss_advance(s);
1739             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1740             break;
1741         default:
1742             unallocated_encoding(s);
1743             break;
1744         }
1745         break;
1746     case 1:
1747         if (op2_ll != 0) {
1748             unallocated_encoding(s);
1749             break;
1750         }
1751         /* BRK */
1752         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1753                            default_exception_el(s));
1754         break;
1755     case 2:
1756         if (op2_ll != 0) {
1757             unallocated_encoding(s);
1758             break;
1759         }
1760         /* HLT. This has two purposes.
1761          * Architecturally, it is an external halting debug instruction.
1762          * Since QEMU doesn't implement external debug, we treat this as
1763          * it is required for halting debug disabled: it will UNDEF.
1764          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1765          */
1766         if (semihosting_enabled() && imm16 == 0xf000) {
1767 #ifndef CONFIG_USER_ONLY
1768             /* In system mode, don't allow userspace access to semihosting,
1769              * to provide some semblance of security (and for consistency
1770              * with our 32-bit semihosting).
1771              */
1772             if (s->current_el == 0) {
1773                 unsupported_encoding(s, insn);
1774                 break;
1775             }
1776 #endif
1777             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1778         } else {
1779             unsupported_encoding(s, insn);
1780         }
1781         break;
1782     case 5:
1783         if (op2_ll < 1 || op2_ll > 3) {
1784             unallocated_encoding(s);
1785             break;
1786         }
1787         /* DCPS1, DCPS2, DCPS3 */
1788         unsupported_encoding(s, insn);
1789         break;
1790     default:
1791         unallocated_encoding(s);
1792         break;
1793     }
1794 }
1795
1796 /* Unconditional branch (register)
1797  *  31           25 24   21 20   16 15   10 9    5 4     0
1798  * +---------------+-------+-------+-------+------+-------+
1799  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1800  * +---------------+-------+-------+-------+------+-------+
1801  */
1802 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1803 {
1804     unsigned int opc, op2, op3, rn, op4;
1805
1806     opc = extract32(insn, 21, 4);
1807     op2 = extract32(insn, 16, 5);
1808     op3 = extract32(insn, 10, 6);
1809     rn = extract32(insn, 5, 5);
1810     op4 = extract32(insn, 0, 5);
1811
1812     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1813         unallocated_encoding(s);
1814         return;
1815     }
1816
1817     switch (opc) {
1818     case 0: /* BR */
1819     case 1: /* BLR */
1820     case 2: /* RET */
1821         gen_a64_set_pc(s, cpu_reg(s, rn));
1822         /* BLR also needs to load return address */
1823         if (opc == 1) {
1824             tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1825         }
1826         break;
1827     case 4: /* ERET */
1828         if (s->current_el == 0) {
1829             unallocated_encoding(s);
1830             return;
1831         }
1832         gen_helper_exception_return(cpu_env);
1833         /* Must exit loop to check un-masked IRQs */
1834         s->base.is_jmp = DISAS_EXIT;
1835         return;
1836     case 5: /* DRPS */
1837         if (rn != 0x1f) {
1838             unallocated_encoding(s);
1839         } else {
1840             unsupported_encoding(s, insn);
1841         }
1842         return;
1843     default:
1844         unallocated_encoding(s);
1845         return;
1846     }
1847
1848     s->base.is_jmp = DISAS_JUMP;
1849 }
1850
1851 /* Branches, exception generating and system instructions */
1852 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1853 {
1854     switch (extract32(insn, 25, 7)) {
1855     case 0x0a: case 0x0b:
1856     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1857         disas_uncond_b_imm(s, insn);
1858         break;
1859     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1860         disas_comp_b_imm(s, insn);
1861         break;
1862     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1863         disas_test_b_imm(s, insn);
1864         break;
1865     case 0x2a: /* Conditional branch (immediate) */
1866         disas_cond_b_imm(s, insn);
1867         break;
1868     case 0x6a: /* Exception generation / System */
1869         if (insn & (1 << 24)) {
1870             disas_system(s, insn);
1871         } else {
1872             disas_exc(s, insn);
1873         }
1874         break;
1875     case 0x6b: /* Unconditional branch (register) */
1876         disas_uncond_b_reg(s, insn);
1877         break;
1878     default:
1879         unallocated_encoding(s);
1880         break;
1881     }
1882 }
1883
1884 /*
1885  * Load/Store exclusive instructions are implemented by remembering
1886  * the value/address loaded, and seeing if these are the same
1887  * when the store is performed. This is not actually the architecturally
1888  * mandated semantics, but it works for typical guest code sequences
1889  * and avoids having to monitor regular stores.
1890  *
1891  * The store exclusive uses the atomic cmpxchg primitives to avoid
1892  * races in multi-threaded linux-user and when MTTCG softmmu is
1893  * enabled.
1894  */
1895 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1896                                TCGv_i64 addr, int size, bool is_pair)
1897 {
1898     int idx = get_mem_index(s);
1899     TCGMemOp memop = s->be_data;
1900
1901     g_assert(size <= 3);
1902     if (is_pair) {
1903         g_assert(size >= 2);
1904         if (size == 2) {
1905             /* The pair must be single-copy atomic for the doubleword.  */
1906             memop |= MO_64 | MO_ALIGN;
1907             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1908             if (s->be_data == MO_LE) {
1909                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
1910                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
1911             } else {
1912                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
1913                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
1914             }
1915         } else {
1916             /* The pair must be single-copy atomic for *each* doubleword, not
1917                the entire quadword, however it must be quadword aligned.  */
1918             memop |= MO_64;
1919             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
1920                                 memop | MO_ALIGN_16);
1921
1922             TCGv_i64 addr2 = tcg_temp_new_i64();
1923             tcg_gen_addi_i64(addr2, addr, 8);
1924             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
1925             tcg_temp_free_i64(addr2);
1926
1927             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1928             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
1929         }
1930     } else {
1931         memop |= size | MO_ALIGN;
1932         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1933         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1934     }
1935     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1936 }
1937
1938 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1939                                 TCGv_i64 addr, int size, int is_pair)
1940 {
1941     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1942      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1943      *     [addr] = {Rt};
1944      *     if (is_pair) {
1945      *         [addr + datasize] = {Rt2};
1946      *     }
1947      *     {Rd} = 0;
1948      * } else {
1949      *     {Rd} = 1;
1950      * }
1951      * env->exclusive_addr = -1;
1952      */
1953     TCGLabel *fail_label = gen_new_label();
1954     TCGLabel *done_label = gen_new_label();
1955     TCGv_i64 tmp;
1956
1957     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1958
1959     tmp = tcg_temp_new_i64();
1960     if (is_pair) {
1961         if (size == 2) {
1962             if (s->be_data == MO_LE) {
1963                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
1964             } else {
1965                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
1966             }
1967             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
1968                                        cpu_exclusive_val, tmp,
1969                                        get_mem_index(s),
1970                                        MO_64 | MO_ALIGN | s->be_data);
1971             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1972         } else if (s->be_data == MO_LE) {
1973             if (tb_cflags(s->base.tb) & CF_PARALLEL) {
1974                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
1975                                                         cpu_exclusive_addr,
1976                                                         cpu_reg(s, rt),
1977                                                         cpu_reg(s, rt2));
1978             } else {
1979                 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
1980                                                cpu_reg(s, rt), cpu_reg(s, rt2));
1981             }
1982         } else {
1983             if (tb_cflags(s->base.tb) & CF_PARALLEL) {
1984                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
1985                                                         cpu_exclusive_addr,
1986                                                         cpu_reg(s, rt),
1987                                                         cpu_reg(s, rt2));
1988             } else {
1989                 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
1990                                                cpu_reg(s, rt), cpu_reg(s, rt2));
1991             }
1992         }
1993     } else {
1994         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
1995                                    cpu_reg(s, rt), get_mem_index(s),
1996                                    size | MO_ALIGN | s->be_data);
1997         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1998     }
1999     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2000     tcg_temp_free_i64(tmp);
2001     tcg_gen_br(done_label);
2002
2003     gen_set_label(fail_label);
2004     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2005     gen_set_label(done_label);
2006     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2007 }
2008
2009 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2010  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2011  */
2012 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2013 {
2014     int opc0 = extract32(opc, 0, 1);
2015     int regsize;
2016
2017     if (is_signed) {
2018         regsize = opc0 ? 32 : 64;
2019     } else {
2020         regsize = size == 3 ? 64 : 32;
2021     }
2022     return regsize == 64;
2023 }
2024
2025 /* Load/store exclusive
2026  *
2027  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2028  * +-----+-------------+----+---+----+------+----+-------+------+------+
2029  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2030  * +-----+-------------+----+---+----+------+----+-------+------+------+
2031  *
2032  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2033  *   L: 0 -> store, 1 -> load
2034  *  o2: 0 -> exclusive, 1 -> not
2035  *  o1: 0 -> single register, 1 -> register pair
2036  *  o0: 1 -> load-acquire/store-release, 0 -> not
2037  */
2038 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2039 {
2040     int rt = extract32(insn, 0, 5);
2041     int rn = extract32(insn, 5, 5);
2042     int rt2 = extract32(insn, 10, 5);
2043     int is_lasr = extract32(insn, 15, 1);
2044     int rs = extract32(insn, 16, 5);
2045     int is_pair = extract32(insn, 21, 1);
2046     int is_store = !extract32(insn, 22, 1);
2047     int is_excl = !extract32(insn, 23, 1);
2048     int size = extract32(insn, 30, 2);
2049     TCGv_i64 tcg_addr;
2050
2051     if ((!is_excl && !is_pair && !is_lasr) ||
2052         (!is_excl && is_pair) ||
2053         (is_pair && size < 2)) {
2054         unallocated_encoding(s);
2055         return;
2056     }
2057
2058     if (rn == 31) {
2059         gen_check_sp_alignment(s);
2060     }
2061     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2062
2063     /* Note that since TCG is single threaded load-acquire/store-release
2064      * semantics require no extra if (is_lasr) { ... } handling.
2065      */
2066
2067     if (is_excl) {
2068         if (!is_store) {
2069             s->is_ldex = true;
2070             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
2071             if (is_lasr) {
2072                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2073             }
2074         } else {
2075             if (is_lasr) {
2076                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2077             }
2078             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2079         }
2080     } else {
2081         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2082         bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2083
2084         /* Generate ISS for non-exclusive accesses including LASR.  */
2085         if (is_store) {
2086             if (is_lasr) {
2087                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2088             }
2089             do_gpr_st(s, tcg_rt, tcg_addr, size,
2090                       true, rt, iss_sf, is_lasr);
2091         } else {
2092             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2093                       true, rt, iss_sf, is_lasr);
2094             if (is_lasr) {
2095                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2096             }
2097         }
2098     }
2099 }
2100
2101 /*
2102  * Load register (literal)
2103  *
2104  *  31 30 29   27  26 25 24 23                5 4     0
2105  * +-----+-------+---+-----+-------------------+-------+
2106  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2107  * +-----+-------+---+-----+-------------------+-------+
2108  *
2109  * V: 1 -> vector (simd/fp)
2110  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2111  *                   10-> 32 bit signed, 11 -> prefetch
2112  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2113  */
2114 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2115 {
2116     int rt = extract32(insn, 0, 5);
2117     int64_t imm = sextract32(insn, 5, 19) << 2;
2118     bool is_vector = extract32(insn, 26, 1);
2119     int opc = extract32(insn, 30, 2);
2120     bool is_signed = false;
2121     int size = 2;
2122     TCGv_i64 tcg_rt, tcg_addr;
2123
2124     if (is_vector) {
2125         if (opc == 3) {
2126             unallocated_encoding(s);
2127             return;
2128         }
2129         size = 2 + opc;
2130         if (!fp_access_check(s)) {
2131             return;
2132         }
2133     } else {
2134         if (opc == 3) {
2135             /* PRFM (literal) : prefetch */
2136             return;
2137         }
2138         size = 2 + extract32(opc, 0, 1);
2139         is_signed = extract32(opc, 1, 1);
2140     }
2141
2142     tcg_rt = cpu_reg(s, rt);
2143
2144     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2145     if (is_vector) {
2146         do_fp_ld(s, rt, tcg_addr, size);
2147     } else {
2148         /* Only unsigned 32bit loads target 32bit registers.  */
2149         bool iss_sf = opc != 0;
2150
2151         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2152                   true, rt, iss_sf, false);
2153     }
2154     tcg_temp_free_i64(tcg_addr);
2155 }
2156
2157 /*
2158  * LDNP (Load Pair - non-temporal hint)
2159  * LDP (Load Pair - non vector)
2160  * LDPSW (Load Pair Signed Word - non vector)
2161  * STNP (Store Pair - non-temporal hint)
2162  * STP (Store Pair - non vector)
2163  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2164  * LDP (Load Pair of SIMD&FP)
2165  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2166  * STP (Store Pair of SIMD&FP)
2167  *
2168  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2169  * +-----+-------+---+---+-------+---+-----------------------------+
2170  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2171  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2172  *
2173  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2174  *      LDPSW                    01
2175  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2176  *   V: 0 -> GPR, 1 -> Vector
2177  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2178  *      10 -> signed offset, 11 -> pre-index
2179  *   L: 0 -> Store 1 -> Load
2180  *
2181  * Rt, Rt2 = GPR or SIMD registers to be stored
2182  * Rn = general purpose register containing address
2183  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2184  */
2185 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2186 {
2187     int rt = extract32(insn, 0, 5);
2188     int rn = extract32(insn, 5, 5);
2189     int rt2 = extract32(insn, 10, 5);
2190     uint64_t offset = sextract64(insn, 15, 7);
2191     int index = extract32(insn, 23, 2);
2192     bool is_vector = extract32(insn, 26, 1);
2193     bool is_load = extract32(insn, 22, 1);
2194     int opc = extract32(insn, 30, 2);
2195
2196     bool is_signed = false;
2197     bool postindex = false;
2198     bool wback = false;
2199
2200     TCGv_i64 tcg_addr; /* calculated address */
2201     int size;
2202
2203     if (opc == 3) {
2204         unallocated_encoding(s);
2205         return;
2206     }
2207
2208     if (is_vector) {
2209         size = 2 + opc;
2210     } else {
2211         size = 2 + extract32(opc, 1, 1);
2212         is_signed = extract32(opc, 0, 1);
2213         if (!is_load && is_signed) {
2214             unallocated_encoding(s);
2215             return;
2216         }
2217     }
2218
2219     switch (index) {
2220     case 1: /* post-index */
2221         postindex = true;
2222         wback = true;
2223         break;
2224     case 0:
2225         /* signed offset with "non-temporal" hint. Since we don't emulate
2226          * caches we don't care about hints to the cache system about
2227          * data access patterns, and handle this identically to plain
2228          * signed offset.
2229          */
2230         if (is_signed) {
2231             /* There is no non-temporal-hint version of LDPSW */
2232             unallocated_encoding(s);
2233             return;
2234         }
2235         postindex = false;
2236         break;
2237     case 2: /* signed offset, rn not updated */
2238         postindex = false;
2239         break;
2240     case 3: /* pre-index */
2241         postindex = false;
2242         wback = true;
2243         break;
2244     }
2245
2246     if (is_vector && !fp_access_check(s)) {
2247         return;
2248     }
2249
2250     offset <<= size;
2251
2252     if (rn == 31) {
2253         gen_check_sp_alignment(s);
2254     }
2255
2256     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2257
2258     if (!postindex) {
2259         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2260     }
2261
2262     if (is_vector) {
2263         if (is_load) {
2264             do_fp_ld(s, rt, tcg_addr, size);
2265         } else {
2266             do_fp_st(s, rt, tcg_addr, size);
2267         }
2268         tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2269         if (is_load) {
2270             do_fp_ld(s, rt2, tcg_addr, size);
2271         } else {
2272             do_fp_st(s, rt2, tcg_addr, size);
2273         }
2274     } else {
2275         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2276         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2277
2278         if (is_load) {
2279             TCGv_i64 tmp = tcg_temp_new_i64();
2280
2281             /* Do not modify tcg_rt before recognizing any exception
2282              * from the second load.
2283              */
2284             do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
2285                       false, 0, false, false);
2286             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2287             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2288                       false, 0, false, false);
2289
2290             tcg_gen_mov_i64(tcg_rt, tmp);
2291             tcg_temp_free_i64(tmp);
2292         } else {
2293             do_gpr_st(s, tcg_rt, tcg_addr, size,
2294                       false, 0, false, false);
2295             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2296             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2297                       false, 0, false, false);
2298         }
2299     }
2300
2301     if (wback) {
2302         if (postindex) {
2303             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2304         } else {
2305             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2306         }
2307         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2308     }
2309 }
2310
2311 /*
2312  * Load/store (immediate post-indexed)
2313  * Load/store (immediate pre-indexed)
2314  * Load/store (unscaled immediate)
2315  *
2316  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2317  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2318  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2319  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2320  *
2321  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2322          10 -> unprivileged
2323  * V = 0 -> non-vector
2324  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2325  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2326  */
2327 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2328                                 int opc,
2329                                 int size,
2330                                 int rt,
2331                                 bool is_vector)
2332 {
2333     int rn = extract32(insn, 5, 5);
2334     int imm9 = sextract32(insn, 12, 9);
2335     int idx = extract32(insn, 10, 2);
2336     bool is_signed = false;
2337     bool is_store = false;
2338     bool is_extended = false;
2339     bool is_unpriv = (idx == 2);
2340     bool iss_valid = !is_vector;
2341     bool post_index;
2342     bool writeback;
2343
2344     TCGv_i64 tcg_addr;
2345
2346     if (is_vector) {
2347         size |= (opc & 2) << 1;
2348         if (size > 4 || is_unpriv) {
2349             unallocated_encoding(s);
2350             return;
2351         }
2352         is_store = ((opc & 1) == 0);
2353         if (!fp_access_check(s)) {
2354             return;
2355         }
2356     } else {
2357         if (size == 3 && opc == 2) {
2358             /* PRFM - prefetch */
2359             if (is_unpriv) {
2360                 unallocated_encoding(s);
2361                 return;
2362             }
2363             return;
2364         }
2365         if (opc == 3 && size > 1) {
2366             unallocated_encoding(s);
2367             return;
2368         }
2369         is_store = (opc == 0);
2370         is_signed = extract32(opc, 1, 1);
2371         is_extended = (size < 3) && extract32(opc, 0, 1);
2372     }
2373
2374     switch (idx) {
2375     case 0:
2376     case 2:
2377         post_index = false;
2378         writeback = false;
2379         break;
2380     case 1:
2381         post_index = true;
2382         writeback = true;
2383         break;
2384     case 3:
2385         post_index = false;
2386         writeback = true;
2387         break;
2388     default:
2389         g_assert_not_reached();
2390     }
2391
2392     if (rn == 31) {
2393         gen_check_sp_alignment(s);
2394     }
2395     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2396
2397     if (!post_index) {
2398         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2399     }
2400
2401     if (is_vector) {
2402         if (is_store) {
2403             do_fp_st(s, rt, tcg_addr, size);
2404         } else {
2405             do_fp_ld(s, rt, tcg_addr, size);
2406         }
2407     } else {
2408         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2409         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2410         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2411
2412         if (is_store) {
2413             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2414                              iss_valid, rt, iss_sf, false);
2415         } else {
2416             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2417                              is_signed, is_extended, memidx,
2418                              iss_valid, rt, iss_sf, false);
2419         }
2420     }
2421
2422     if (writeback) {
2423         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2424         if (post_index) {
2425             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2426         }
2427         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2428     }
2429 }
2430
2431 /*
2432  * Load/store (register offset)
2433  *
2434  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2435  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2436  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2437  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2438  *
2439  * For non-vector:
2440  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2441  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2442  * For vector:
2443  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2444  *   opc<0>: 0 -> store, 1 -> load
2445  * V: 1 -> vector/simd
2446  * opt: extend encoding (see DecodeRegExtend)
2447  * S: if S=1 then scale (essentially index by sizeof(size))
2448  * Rt: register to transfer into/out of
2449  * Rn: address register or SP for base
2450  * Rm: offset register or ZR for offset
2451  */
2452 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2453                                    int opc,
2454                                    int size,
2455                                    int rt,
2456                                    bool is_vector)
2457 {
2458     int rn = extract32(insn, 5, 5);
2459     int shift = extract32(insn, 12, 1);
2460     int rm = extract32(insn, 16, 5);
2461     int opt = extract32(insn, 13, 3);
2462     bool is_signed = false;
2463     bool is_store = false;
2464     bool is_extended = false;
2465
2466     TCGv_i64 tcg_rm;
2467     TCGv_i64 tcg_addr;
2468
2469     if (extract32(opt, 1, 1) == 0) {
2470         unallocated_encoding(s);
2471         return;
2472     }
2473
2474     if (is_vector) {
2475         size |= (opc & 2) << 1;
2476         if (size > 4) {
2477             unallocated_encoding(s);
2478             return;
2479         }
2480         is_store = !extract32(opc, 0, 1);
2481         if (!fp_access_check(s)) {
2482             return;
2483         }
2484     } else {
2485         if (size == 3 && opc == 2) {
2486             /* PRFM - prefetch */
2487             return;
2488         }
2489         if (opc == 3 && size > 1) {
2490             unallocated_encoding(s);
2491             return;
2492         }
2493         is_store = (opc == 0);
2494         is_signed = extract32(opc, 1, 1);
2495         is_extended = (size < 3) && extract32(opc, 0, 1);
2496     }
2497
2498     if (rn == 31) {
2499         gen_check_sp_alignment(s);
2500     }
2501     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2502
2503     tcg_rm = read_cpu_reg(s, rm, 1);
2504     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2505
2506     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2507
2508     if (is_vector) {
2509         if (is_store) {
2510             do_fp_st(s, rt, tcg_addr, size);
2511         } else {
2512             do_fp_ld(s, rt, tcg_addr, size);
2513         }
2514     } else {
2515         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2516         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2517         if (is_store) {
2518             do_gpr_st(s, tcg_rt, tcg_addr, size,
2519                       true, rt, iss_sf, false);
2520         } else {
2521             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2522                       is_signed, is_extended,
2523                       true, rt, iss_sf, false);
2524         }
2525     }
2526 }
2527
2528 /*
2529  * Load/store (unsigned immediate)
2530  *
2531  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2532  * +----+-------+---+-----+-----+------------+-------+------+
2533  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2534  * +----+-------+---+-----+-----+------------+-------+------+
2535  *
2536  * For non-vector:
2537  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2538  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2539  * For vector:
2540  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2541  *   opc<0>: 0 -> store, 1 -> load
2542  * Rn: base address register (inc SP)
2543  * Rt: target register
2544  */
2545 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2546                                         int opc,
2547                                         int size,
2548                                         int rt,
2549                                         bool is_vector)
2550 {
2551     int rn = extract32(insn, 5, 5);
2552     unsigned int imm12 = extract32(insn, 10, 12);
2553     unsigned int offset;
2554
2555     TCGv_i64 tcg_addr;
2556
2557     bool is_store;
2558     bool is_signed = false;
2559     bool is_extended = false;
2560
2561     if (is_vector) {
2562         size |= (opc & 2) << 1;
2563         if (size > 4) {
2564             unallocated_encoding(s);
2565             return;
2566         }
2567         is_store = !extract32(opc, 0, 1);
2568         if (!fp_access_check(s)) {
2569             return;
2570         }
2571     } else {
2572         if (size == 3 && opc == 2) {
2573             /* PRFM - prefetch */
2574             return;
2575         }
2576         if (opc == 3 && size > 1) {
2577             unallocated_encoding(s);
2578             return;
2579         }
2580         is_store = (opc == 0);
2581         is_signed = extract32(opc, 1, 1);
2582         is_extended = (size < 3) && extract32(opc, 0, 1);
2583     }
2584
2585     if (rn == 31) {
2586         gen_check_sp_alignment(s);
2587     }
2588     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2589     offset = imm12 << size;
2590     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2591
2592     if (is_vector) {
2593         if (is_store) {
2594             do_fp_st(s, rt, tcg_addr, size);
2595         } else {
2596             do_fp_ld(s, rt, tcg_addr, size);
2597         }
2598     } else {
2599         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2600         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2601         if (is_store) {
2602             do_gpr_st(s, tcg_rt, tcg_addr, size,
2603                       true, rt, iss_sf, false);
2604         } else {
2605             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2606                       true, rt, iss_sf, false);
2607         }
2608     }
2609 }
2610
2611 /* Load/store register (all forms) */
2612 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2613 {
2614     int rt = extract32(insn, 0, 5);
2615     int opc = extract32(insn, 22, 2);
2616     bool is_vector = extract32(insn, 26, 1);
2617     int size = extract32(insn, 30, 2);
2618
2619     switch (extract32(insn, 24, 2)) {
2620     case 0:
2621         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2622             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2623         } else {
2624             /* Load/store register (unscaled immediate)
2625              * Load/store immediate pre/post-indexed
2626              * Load/store register unprivileged
2627              */
2628             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2629         }
2630         break;
2631     case 1:
2632         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2633         break;
2634     default:
2635         unallocated_encoding(s);
2636         break;
2637     }
2638 }
2639
2640 /* AdvSIMD load/store multiple structures
2641  *
2642  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2643  * +---+---+---------------+---+-------------+--------+------+------+------+
2644  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2645  * +---+---+---------------+---+-------------+--------+------+------+------+
2646  *
2647  * AdvSIMD load/store multiple structures (post-indexed)
2648  *
2649  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2650  * +---+---+---------------+---+---+---------+--------+------+------+------+
2651  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2652  * +---+---+---------------+---+---+---------+--------+------+------+------+
2653  *
2654  * Rt: first (or only) SIMD&FP register to be transferred
2655  * Rn: base address or SP
2656  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2657  */
2658 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2659 {
2660     int rt = extract32(insn, 0, 5);
2661     int rn = extract32(insn, 5, 5);
2662     int size = extract32(insn, 10, 2);
2663     int opcode = extract32(insn, 12, 4);
2664     bool is_store = !extract32(insn, 22, 1);
2665     bool is_postidx = extract32(insn, 23, 1);
2666     bool is_q = extract32(insn, 30, 1);
2667     TCGv_i64 tcg_addr, tcg_rn;
2668
2669     int ebytes = 1 << size;
2670     int elements = (is_q ? 128 : 64) / (8 << size);
2671     int rpt;    /* num iterations */
2672     int selem;  /* structure elements */
2673     int r;
2674
2675     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2676         unallocated_encoding(s);
2677         return;
2678     }
2679
2680     /* From the shared decode logic */
2681     switch (opcode) {
2682     case 0x0:
2683         rpt = 1;
2684         selem = 4;
2685         break;
2686     case 0x2:
2687         rpt = 4;
2688         selem = 1;
2689         break;
2690     case 0x4:
2691         rpt = 1;
2692         selem = 3;
2693         break;
2694     case 0x6:
2695         rpt = 3;
2696         selem = 1;
2697         break;
2698     case 0x7:
2699         rpt = 1;
2700         selem = 1;
2701         break;
2702     case 0x8:
2703         rpt = 1;
2704         selem = 2;
2705         break;
2706     case 0xa:
2707         rpt = 2;
2708         selem = 1;
2709         break;
2710     default:
2711         unallocated_encoding(s);
2712         return;
2713     }
2714
2715     if (size == 3 && !is_q && selem != 1) {
2716         /* reserved */
2717         unallocated_encoding(s);
2718         return;
2719     }
2720
2721     if (!fp_access_check(s)) {
2722         return;
2723     }
2724
2725     if (rn == 31) {
2726         gen_check_sp_alignment(s);
2727     }
2728
2729     tcg_rn = cpu_reg_sp(s, rn);
2730     tcg_addr = tcg_temp_new_i64();
2731     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2732
2733     for (r = 0; r < rpt; r++) {
2734         int e;
2735         for (e = 0; e < elements; e++) {
2736             int tt = (rt + r) % 32;
2737             int xs;
2738             for (xs = 0; xs < selem; xs++) {
2739                 if (is_store) {
2740                     do_vec_st(s, tt, e, tcg_addr, size);
2741                 } else {
2742                     do_vec_ld(s, tt, e, tcg_addr, size);
2743
2744                     /* For non-quad operations, setting a slice of the low
2745                      * 64 bits of the register clears the high 64 bits (in
2746                      * the ARM ARM pseudocode this is implicit in the fact
2747                      * that 'rval' is a 64 bit wide variable). We optimize
2748                      * by noticing that we only need to do this the first
2749                      * time we touch a register.
2750                      */
2751                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2752                         clear_vec_high(s, tt);
2753                     }
2754                 }
2755                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2756                 tt = (tt + 1) % 32;
2757             }
2758         }
2759     }
2760
2761     if (is_postidx) {
2762         int rm = extract32(insn, 16, 5);
2763         if (rm == 31) {
2764             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2765         } else {
2766             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2767         }
2768     }
2769     tcg_temp_free_i64(tcg_addr);
2770 }
2771
2772 /* AdvSIMD load/store single structure
2773  *
2774  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2775  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2776  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2777  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2778  *
2779  * AdvSIMD load/store single structure (post-indexed)
2780  *
2781  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2782  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2783  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2784  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2785  *
2786  * Rt: first (or only) SIMD&FP register to be transferred
2787  * Rn: base address or SP
2788  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2789  * index = encoded in Q:S:size dependent on size
2790  *
2791  * lane_size = encoded in R, opc
2792  * transfer width = encoded in opc, S, size
2793  */
2794 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2795 {
2796     int rt = extract32(insn, 0, 5);
2797     int rn = extract32(insn, 5, 5);
2798     int size = extract32(insn, 10, 2);
2799     int S = extract32(insn, 12, 1);
2800     int opc = extract32(insn, 13, 3);
2801     int R = extract32(insn, 21, 1);
2802     int is_load = extract32(insn, 22, 1);
2803     int is_postidx = extract32(insn, 23, 1);
2804     int is_q = extract32(insn, 30, 1);
2805
2806     int scale = extract32(opc, 1, 2);
2807     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2808     bool replicate = false;
2809     int index = is_q << 3 | S << 2 | size;
2810     int ebytes, xs;
2811     TCGv_i64 tcg_addr, tcg_rn;
2812
2813     switch (scale) {
2814     case 3:
2815         if (!is_load || S) {
2816             unallocated_encoding(s);
2817             return;
2818         }
2819         scale = size;
2820         replicate = true;
2821         break;
2822     case 0:
2823         break;
2824     case 1:
2825         if (extract32(size, 0, 1)) {
2826             unallocated_encoding(s);
2827             return;
2828         }
2829         index >>= 1;
2830         break;
2831     case 2:
2832         if (extract32(size, 1, 1)) {
2833             unallocated_encoding(s);
2834             return;
2835         }
2836         if (!extract32(size, 0, 1)) {
2837             index >>= 2;
2838         } else {
2839             if (S) {
2840                 unallocated_encoding(s);
2841                 return;
2842             }
2843             index >>= 3;
2844             scale = 3;
2845         }
2846         break;
2847     default:
2848         g_assert_not_reached();
2849     }
2850
2851     if (!fp_access_check(s)) {
2852         return;
2853     }
2854
2855     ebytes = 1 << scale;
2856
2857     if (rn == 31) {
2858         gen_check_sp_alignment(s);
2859     }
2860
2861     tcg_rn = cpu_reg_sp(s, rn);
2862     tcg_addr = tcg_temp_new_i64();
2863     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2864
2865     for (xs = 0; xs < selem; xs++) {
2866         if (replicate) {
2867             /* Load and replicate to all elements */
2868             uint64_t mulconst;
2869             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2870
2871             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2872                                 get_mem_index(s), s->be_data + scale);
2873             switch (scale) {
2874             case 0:
2875                 mulconst = 0x0101010101010101ULL;
2876                 break;
2877             case 1:
2878                 mulconst = 0x0001000100010001ULL;
2879                 break;
2880             case 2:
2881                 mulconst = 0x0000000100000001ULL;
2882                 break;
2883             case 3:
2884                 mulconst = 0;
2885                 break;
2886             default:
2887                 g_assert_not_reached();
2888             }
2889             if (mulconst) {
2890                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2891             }
2892             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2893             if (is_q) {
2894                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2895             } else {
2896                 clear_vec_high(s, rt);
2897             }
2898             tcg_temp_free_i64(tcg_tmp);
2899         } else {
2900             /* Load/store one element per register */
2901             if (is_load) {
2902                 do_vec_ld(s, rt, index, tcg_addr, scale);
2903             } else {
2904                 do_vec_st(s, rt, index, tcg_addr, scale);
2905             }
2906         }
2907         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2908         rt = (rt + 1) % 32;
2909     }
2910
2911     if (is_postidx) {
2912         int rm = extract32(insn, 16, 5);
2913         if (rm == 31) {
2914             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2915         } else {
2916             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2917         }
2918     }
2919     tcg_temp_free_i64(tcg_addr);
2920 }
2921
2922 /* Loads and stores */
2923 static void disas_ldst(DisasContext *s, uint32_t insn)
2924 {
2925     switch (extract32(insn, 24, 6)) {
2926     case 0x08: /* Load/store exclusive */
2927         disas_ldst_excl(s, insn);
2928         break;
2929     case 0x18: case 0x1c: /* Load register (literal) */
2930         disas_ld_lit(s, insn);
2931         break;
2932     case 0x28: case 0x29:
2933     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2934         disas_ldst_pair(s, insn);
2935         break;
2936     case 0x38: case 0x39:
2937     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2938         disas_ldst_reg(s, insn);
2939         break;
2940     case 0x0c: /* AdvSIMD load/store multiple structures */
2941         disas_ldst_multiple_struct(s, insn);
2942         break;
2943     case 0x0d: /* AdvSIMD load/store single structure */
2944         disas_ldst_single_struct(s, insn);
2945         break;
2946     default:
2947         unallocated_encoding(s);
2948         break;
2949     }
2950 }
2951
2952 /* PC-rel. addressing
2953  *   31  30   29 28       24 23                5 4    0
2954  * +----+-------+-----------+-------------------+------+
2955  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2956  * +----+-------+-----------+-------------------+------+
2957  */
2958 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2959 {
2960     unsigned int page, rd;
2961     uint64_t base;
2962     uint64_t offset;
2963
2964     page = extract32(insn, 31, 1);
2965     /* SignExtend(immhi:immlo) -> offset */
2966     offset = sextract64(insn, 5, 19);
2967     offset = offset << 2 | extract32(insn, 29, 2);
2968     rd = extract32(insn, 0, 5);
2969     base = s->pc - 4;
2970
2971     if (page) {
2972         /* ADRP (page based) */
2973         base &= ~0xfff;
2974         offset <<= 12;
2975     }
2976
2977     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2978 }
2979
2980 /*
2981  * Add/subtract (immediate)
2982  *
2983  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2984  * +--+--+--+-----------+-----+-------------+-----+-----+
2985  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2986  * +--+--+--+-----------+-----+-------------+-----+-----+
2987  *
2988  *    sf: 0 -> 32bit, 1 -> 64bit
2989  *    op: 0 -> add  , 1 -> sub
2990  *     S: 1 -> set flags
2991  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2992  */
2993 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2994 {
2995     int rd = extract32(insn, 0, 5);
2996     int rn = extract32(insn, 5, 5);
2997     uint64_t imm = extract32(insn, 10, 12);
2998     int shift = extract32(insn, 22, 2);
2999     bool setflags = extract32(insn, 29, 1);
3000     bool sub_op = extract32(insn, 30, 1);
3001     bool is_64bit = extract32(insn, 31, 1);
3002
3003     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3004     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3005     TCGv_i64 tcg_result;
3006
3007     switch (shift) {
3008     case 0x0:
3009         break;
3010     case 0x1:
3011         imm <<= 12;
3012         break;
3013     default:
3014         unallocated_encoding(s);
3015         return;
3016     }
3017
3018     tcg_result = tcg_temp_new_i64();
3019     if (!setflags) {
3020         if (sub_op) {
3021             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3022         } else {
3023             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3024         }
3025     } else {
3026         TCGv_i64 tcg_imm = tcg_const_i64(imm);
3027         if (sub_op) {
3028             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3029         } else {
3030             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3031         }
3032         tcg_temp_free_i64(tcg_imm);
3033     }
3034
3035     if (is_64bit) {
3036         tcg_gen_mov_i64(tcg_rd, tcg_result);
3037     } else {
3038         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3039     }
3040
3041     tcg_temp_free_i64(tcg_result);
3042 }
3043
3044 /* The input should be a value in the bottom e bits (with higher
3045  * bits zero); returns that value replicated into every element
3046  * of size e in a 64 bit integer.
3047  */
3048 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3049 {
3050     assert(e != 0);
3051     while (e < 64) {
3052         mask |= mask << e;
3053         e *= 2;
3054     }
3055     return mask;
3056 }
3057
3058 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
3059 static inline uint64_t bitmask64(unsigned int length)
3060 {
3061     assert(length > 0 && length <= 64);
3062     return ~0ULL >> (64 - length);
3063 }
3064
3065 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3066  * only require the wmask. Returns false if the imms/immr/immn are a reserved
3067  * value (ie should cause a guest UNDEF exception), and true if they are
3068  * valid, in which case the decoded bit pattern is written to result.
3069  */
3070 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3071                                    unsigned int imms, unsigned int immr)
3072 {
3073     uint64_t mask;
3074     unsigned e, levels, s, r;
3075     int len;
3076
3077     assert(immn < 2 && imms < 64 && immr < 64);
3078
3079     /* The bit patterns we create here are 64 bit patterns which
3080      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3081      * 64 bits each. Each element contains the same value: a run
3082      * of between 1 and e-1 non-zero bits, rotated within the
3083      * element by between 0 and e-1 bits.
3084      *
3085      * The element size and run length are encoded into immn (1 bit)
3086      * and imms (6 bits) as follows:
3087      * 64 bit elements: immn = 1, imms = <length of run - 1>
3088      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3089      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3090      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3091      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3092      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3093      * Notice that immn = 0, imms = 11111x is the only combination
3094      * not covered by one of the above options; this is reserved.
3095      * Further, <length of run - 1> all-ones is a reserved pattern.
3096      *
3097      * In all cases the rotation is by immr % e (and immr is 6 bits).
3098      */
3099
3100     /* First determine the element size */
3101     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3102     if (len < 1) {
3103         /* This is the immn == 0, imms == 0x11111x case */
3104         return false;
3105     }
3106     e = 1 << len;
3107
3108     levels = e - 1;
3109     s = imms & levels;
3110     r = immr & levels;
3111
3112     if (s == levels) {
3113         /* <length of run - 1> mustn't be all-ones. */
3114         return false;
3115     }
3116
3117     /* Create the value of one element: s+1 set bits rotated
3118      * by r within the element (which is e bits wide)...
3119      */
3120     mask = bitmask64(s + 1);
3121     if (r) {
3122         mask = (mask >> r) | (mask << (e - r));
3123         mask &= bitmask64(e);
3124     }
3125     /* ...then replicate the element over the whole 64 bit value */
3126     mask = bitfield_replicate(mask, e);
3127     *result = mask;
3128     return true;
3129 }
3130
3131 /* Logical (immediate)
3132  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3133  * +----+-----+-------------+---+------+------+------+------+
3134  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3135  * +----+-----+-------------+---+------+------+------+------+
3136  */
3137 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3138 {
3139     unsigned int sf, opc, is_n, immr, imms, rn, rd;
3140     TCGv_i64 tcg_rd, tcg_rn;
3141     uint64_t wmask;
3142     bool is_and = false;
3143
3144     sf = extract32(insn, 31, 1);
3145     opc = extract32(insn, 29, 2);
3146     is_n = extract32(insn, 22, 1);
3147     immr = extract32(insn, 16, 6);
3148     imms = extract32(insn, 10, 6);
3149     rn = extract32(insn, 5, 5);
3150     rd = extract32(insn, 0, 5);
3151
3152     if (!sf && is_n) {
3153         unallocated_encoding(s);
3154         return;
3155     }
3156
3157     if (opc == 0x3) { /* ANDS */
3158         tcg_rd = cpu_reg(s, rd);
3159     } else {
3160         tcg_rd = cpu_reg_sp(s, rd);
3161     }
3162     tcg_rn = cpu_reg(s, rn);
3163
3164     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3165         /* some immediate field values are reserved */
3166         unallocated_encoding(s);
3167         return;
3168     }
3169
3170     if (!sf) {
3171         wmask &= 0xffffffff;
3172     }
3173
3174     switch (opc) {
3175     case 0x3: /* ANDS */
3176     case 0x0: /* AND */
3177         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3178         is_and = true;
3179         break;
3180     case 0x1: /* ORR */
3181         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3182         break;
3183     case 0x2: /* EOR */
3184         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3185         break;
3186     default:
3187         assert(FALSE); /* must handle all above */
3188         break;
3189     }
3190
3191     if (!sf && !is_and) {
3192         /* zero extend final result; we know we can skip this for AND
3193          * since the immediate had the high 32 bits clear.
3194          */
3195         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3196     }
3197
3198     if (opc == 3) { /* ANDS */
3199         gen_logic_CC(sf, tcg_rd);
3200     }
3201 }
3202
3203 /*
3204  * Move wide (immediate)
3205  *
3206  *  31 30 29 28         23 22 21 20             5 4    0
3207  * +--+-----+-------------+-----+----------------+------+
3208  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3209  * +--+-----+-------------+-----+----------------+------+
3210  *
3211  * sf: 0 -> 32 bit, 1 -> 64 bit
3212  * opc: 00 -> N, 10 -> Z, 11 -> K
3213  * hw: shift/16 (0,16, and sf only 32, 48)
3214  */
3215 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3216 {
3217     int rd = extract32(insn, 0, 5);
3218     uint64_t imm = extract32(insn, 5, 16);
3219     int sf = extract32(insn, 31, 1);
3220     int opc = extract32(insn, 29, 2);
3221     int pos = extract32(insn, 21, 2) << 4;
3222     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3223     TCGv_i64 tcg_imm;
3224
3225     if (!sf && (pos >= 32)) {
3226         unallocated_encoding(s);
3227         return;
3228     }
3229
3230     switch (opc) {
3231     case 0: /* MOVN */
3232     case 2: /* MOVZ */
3233         imm <<= pos;
3234         if (opc == 0) {
3235             imm = ~imm;
3236         }
3237         if (!sf) {
3238             imm &= 0xffffffffu;
3239         }
3240         tcg_gen_movi_i64(tcg_rd, imm);
3241         break;
3242     case 3: /* MOVK */
3243         tcg_imm = tcg_const_i64(imm);
3244         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3245         tcg_temp_free_i64(tcg_imm);
3246         if (!sf) {
3247             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3248         }
3249         break;
3250     default:
3251         unallocated_encoding(s);
3252         break;
3253     }
3254 }
3255
3256 /* Bitfield
3257  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3258  * +----+-----+-------------+---+------+------+------+------+
3259  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3260  * +----+-----+-------------+---+------+------+------+------+
3261  */
3262 static void disas_bitfield(DisasContext *s, uint32_t insn)
3263 {
3264     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3265     TCGv_i64 tcg_rd, tcg_tmp;
3266
3267     sf = extract32(insn, 31, 1);
3268     opc = extract32(insn, 29, 2);
3269     n = extract32(insn, 22, 1);
3270     ri = extract32(insn, 16, 6);
3271     si = extract32(insn, 10, 6);
3272     rn = extract32(insn, 5, 5);
3273     rd = extract32(insn, 0, 5);
3274     bitsize = sf ? 64 : 32;
3275
3276     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3277         unallocated_encoding(s);
3278         return;
3279     }
3280
3281     tcg_rd = cpu_reg(s, rd);
3282
3283     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3284        to be smaller than bitsize, we'll never reference data outside the
3285        low 32-bits anyway.  */
3286     tcg_tmp = read_cpu_reg(s, rn, 1);
3287
3288     /* Recognize simple(r) extractions.  */
3289     if (si >= ri) {
3290         /* Wd<s-r:0> = Wn<s:r> */
3291         len = (si - ri) + 1;
3292         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3293             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3294             goto done;
3295         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3296             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3297             return;
3298         }
3299         /* opc == 1, BXFIL fall through to deposit */
3300         tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3301         pos = 0;
3302     } else {
3303         /* Handle the ri > si case with a deposit
3304          * Wd<32+s-r,32-r> = Wn<s:0>
3305          */
3306         len = si + 1;
3307         pos = (bitsize - ri) & (bitsize - 1);
3308     }
3309
3310     if (opc == 0 && len < ri) {
3311         /* SBFM: sign extend the destination field from len to fill
3312            the balance of the word.  Let the deposit below insert all
3313            of those sign bits.  */
3314         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3315         len = ri;
3316     }
3317
3318     if (opc == 1) { /* BFM, BXFIL */
3319         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3320     } else {
3321         /* SBFM or UBFM: We start with zero, and we haven't modified
3322            any bits outside bitsize, therefore the zero-extension
3323            below is unneeded.  */
3324         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3325         return;
3326     }
3327
3328  done:
3329     if (!sf) { /* zero extend final result */
3330         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3331     }
3332 }
3333
3334 /* Extract
3335  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3336  * +----+------+-------------+---+----+------+--------+------+------+
3337  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3338  * +----+------+-------------+---+----+------+--------+------+------+
3339  */
3340 static void disas_extract(DisasContext *s, uint32_t insn)
3341 {
3342     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3343
3344     sf = extract32(insn, 31, 1);
3345     n = extract32(insn, 22, 1);
3346     rm = extract32(insn, 16, 5);
3347     imm = extract32(insn, 10, 6);
3348     rn = extract32(insn, 5, 5);
3349     rd = extract32(insn, 0, 5);
3350     op21 = extract32(insn, 29, 2);
3351     op0 = extract32(insn, 21, 1);
3352     bitsize = sf ? 64 : 32;
3353
3354     if (sf != n || op21 || op0 || imm >= bitsize) {
3355         unallocated_encoding(s);
3356     } else {
3357         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3358
3359         tcg_rd = cpu_reg(s, rd);
3360
3361         if (unlikely(imm == 0)) {
3362             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3363              * so an extract from bit 0 is a special case.
3364              */
3365             if (sf) {
3366                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3367             } else {
3368                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3369             }
3370         } else if (rm == rn) { /* ROR */
3371             tcg_rm = cpu_reg(s, rm);
3372             if (sf) {
3373                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3374             } else {
3375                 TCGv_i32 tmp = tcg_temp_new_i32();
3376                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3377                 tcg_gen_rotri_i32(tmp, tmp, imm);
3378                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3379                 tcg_temp_free_i32(tmp);
3380             }
3381         } else {
3382             tcg_rm = read_cpu_reg(s, rm, sf);
3383             tcg_rn = read_cpu_reg(s, rn, sf);
3384             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3385             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3386             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3387             if (!sf) {
3388                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3389             }
3390         }
3391     }
3392 }
3393
3394 /* Data processing - immediate */
3395 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3396 {
3397     switch (extract32(insn, 23, 6)) {
3398     case 0x20: case 0x21: /* PC-rel. addressing */
3399         disas_pc_rel_adr(s, insn);
3400         break;
3401     case 0x22: case 0x23: /* Add/subtract (immediate) */
3402         disas_add_sub_imm(s, insn);
3403         break;
3404     case 0x24: /* Logical (immediate) */
3405         disas_logic_imm(s, insn);
3406         break;
3407     case 0x25: /* Move wide (immediate) */
3408         disas_movw_imm(s, insn);
3409         break;
3410     case 0x26: /* Bitfield */
3411         disas_bitfield(s, insn);
3412         break;
3413     case 0x27: /* Extract */
3414         disas_extract(s, insn);
3415         break;
3416     default:
3417         unallocated_encoding(s);
3418         break;
3419     }
3420 }
3421
3422 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3423  * Note that it is the caller's responsibility to ensure that the
3424  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3425  * mandated semantics for out of range shifts.
3426  */
3427 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3428                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3429 {
3430     switch (shift_type) {
3431     case A64_SHIFT_TYPE_LSL:
3432         tcg_gen_shl_i64(dst, src, shift_amount);
3433         break;
3434     case A64_SHIFT_TYPE_LSR:
3435         tcg_gen_shr_i64(dst, src, shift_amount);
3436         break;
3437     case A64_SHIFT_TYPE_ASR:
3438         if (!sf) {
3439             tcg_gen_ext32s_i64(dst, src);
3440         }
3441         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3442         break;
3443     case A64_SHIFT_TYPE_ROR:
3444         if (sf) {
3445             tcg_gen_rotr_i64(dst, src, shift_amount);
3446         } else {
3447             TCGv_i32 t0, t1;
3448             t0 = tcg_temp_new_i32();
3449             t1 = tcg_temp_new_i32();
3450             tcg_gen_extrl_i64_i32(t0, src);
3451             tcg_gen_extrl_i64_i32(t1, shift_amount);
3452             tcg_gen_rotr_i32(t0, t0, t1);
3453             tcg_gen_extu_i32_i64(dst, t0);
3454             tcg_temp_free_i32(t0);
3455             tcg_temp_free_i32(t1);
3456         }
3457         break;
3458     default:
3459         assert(FALSE); /* all shift types should be handled */
3460         break;
3461     }
3462
3463     if (!sf) { /* zero extend final result */
3464         tcg_gen_ext32u_i64(dst, dst);
3465     }
3466 }
3467
3468 /* Shift a TCGv src by immediate, put result in dst.
3469  * The shift amount must be in range (this should always be true as the
3470  * relevant instructions will UNDEF on bad shift immediates).
3471  */
3472 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3473                           enum a64_shift_type shift_type, unsigned int shift_i)
3474 {
3475     assert(shift_i < (sf ? 64 : 32));
3476
3477     if (shift_i == 0) {
3478         tcg_gen_mov_i64(dst, src);
3479     } else {
3480         TCGv_i64 shift_const;
3481
3482         shift_const = tcg_const_i64(shift_i);
3483         shift_reg(dst, src, sf, shift_type, shift_const);
3484         tcg_temp_free_i64(shift_const);
3485     }
3486 }
3487
3488 /* Logical (shifted register)
3489  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3490  * +----+-----+-----------+-------+---+------+--------+------+------+
3491  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3492  * +----+-----+-----------+-------+---+------+--------+------+------+
3493  */
3494 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3495 {
3496     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3497     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3498
3499     sf = extract32(insn, 31, 1);
3500     opc = extract32(insn, 29, 2);
3501     shift_type = extract32(insn, 22, 2);
3502     invert = extract32(insn, 21, 1);
3503     rm = extract32(insn, 16, 5);
3504     shift_amount = extract32(insn, 10, 6);
3505     rn = extract32(insn, 5, 5);
3506     rd = extract32(insn, 0, 5);
3507
3508     if (!sf && (shift_amount & (1 << 5))) {
3509         unallocated_encoding(s);
3510         return;
3511     }
3512
3513     tcg_rd = cpu_reg(s, rd);
3514
3515     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3516         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3517          * register-register MOV and MVN, so it is worth special casing.
3518          */
3519         tcg_rm = cpu_reg(s, rm);
3520         if (invert) {
3521             tcg_gen_not_i64(tcg_rd, tcg_rm);
3522             if (!sf) {
3523                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3524             }
3525         } else {
3526             if (sf) {
3527                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3528             } else {
3529                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3530             }
3531         }
3532         return;
3533     }
3534
3535     tcg_rm = read_cpu_reg(s, rm, sf);
3536
3537     if (shift_amount) {
3538         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3539     }
3540
3541     tcg_rn = cpu_reg(s, rn);
3542
3543     switch (opc | (invert << 2)) {
3544     case 0: /* AND */
3545     case 3: /* ANDS */
3546         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3547         break;
3548     case 1: /* ORR */
3549         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3550         break;
3551     case 2: /* EOR */
3552         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3553         break;
3554     case 4: /* BIC */
3555     case 7: /* BICS */
3556         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3557         break;
3558     case 5: /* ORN */
3559         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3560         break;
3561     case 6: /* EON */
3562         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3563         break;
3564     default:
3565         assert(FALSE);
3566         break;
3567     }
3568
3569     if (!sf) {
3570         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3571     }
3572
3573     if (opc == 3) {
3574         gen_logic_CC(sf, tcg_rd);
3575     }
3576 }
3577
3578 /*
3579  * Add/subtract (extended register)
3580  *
3581  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3582  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3583  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3584  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3585  *
3586  *  sf: 0 -> 32bit, 1 -> 64bit
3587  *  op: 0 -> add  , 1 -> sub
3588  *   S: 1 -> set flags
3589  * opt: 00
3590  * option: extension type (see DecodeRegExtend)
3591  * imm3: optional shift to Rm
3592  *
3593  * Rd = Rn + LSL(extend(Rm), amount)
3594  */
3595 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3596 {
3597     int rd = extract32(insn, 0, 5);
3598     int rn = extract32(insn, 5, 5);
3599     int imm3 = extract32(insn, 10, 3);
3600     int option = extract32(insn, 13, 3);
3601     int rm = extract32(insn, 16, 5);
3602     bool setflags = extract32(insn, 29, 1);
3603     bool sub_op = extract32(insn, 30, 1);
3604     bool sf = extract32(insn, 31, 1);
3605
3606     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3607     TCGv_i64 tcg_rd;
3608     TCGv_i64 tcg_result;
3609
3610     if (imm3 > 4) {
3611         unallocated_encoding(s);
3612         return;
3613     }
3614
3615     /* non-flag setting ops may use SP */
3616     if (!setflags) {
3617         tcg_rd = cpu_reg_sp(s, rd);
3618     } else {
3619         tcg_rd = cpu_reg(s, rd);
3620     }
3621     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3622
3623     tcg_rm = read_cpu_reg(s, rm, sf);
3624     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3625
3626     tcg_result = tcg_temp_new_i64();
3627
3628     if (!setflags) {
3629         if (sub_op) {
3630             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3631         } else {
3632             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3633         }
3634     } else {
3635         if (sub_op) {
3636             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3637         } else {
3638             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3639         }
3640     }
3641
3642     if (sf) {
3643         tcg_gen_mov_i64(tcg_rd, tcg_result);
3644     } else {
3645         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3646     }
3647
3648     tcg_temp_free_i64(tcg_result);
3649 }
3650
3651 /*
3652  * Add/subtract (shifted register)
3653  *
3654  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3655  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3656  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3657  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3658  *
3659  *    sf: 0 -> 32bit, 1 -> 64bit
3660  *    op: 0 -> add  , 1 -> sub
3661  *     S: 1 -> set flags
3662  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3663  *  imm6: Shift amount to apply to Rm before the add/sub
3664  */
3665 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3666 {
3667     int rd = extract32(insn, 0, 5);
3668     int rn = extract32(insn, 5, 5);
3669     int imm6 = extract32(insn, 10, 6);
3670     int rm = extract32(insn, 16, 5);
3671     int shift_type = extract32(insn, 22, 2);
3672     bool setflags = extract32(insn, 29, 1);
3673     bool sub_op = extract32(insn, 30, 1);
3674     bool sf = extract32(insn, 31, 1);
3675
3676     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3677     TCGv_i64 tcg_rn, tcg_rm;
3678     TCGv_i64 tcg_result;
3679
3680     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3681         unallocated_encoding(s);
3682         return;
3683     }
3684
3685     tcg_rn = read_cpu_reg(s, rn, sf);
3686     tcg_rm = read_cpu_reg(s, rm, sf);
3687
3688     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3689
3690     tcg_result = tcg_temp_new_i64();
3691
3692     if (!setflags) {
3693         if (sub_op) {
3694             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3695         } else {
3696             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3697         }
3698     } else {
3699         if (sub_op) {
3700             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3701         } else {
3702             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3703         }
3704     }
3705
3706     if (sf) {
3707         tcg_gen_mov_i64(tcg_rd, tcg_result);
3708     } else {
3709         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3710     }
3711
3712     tcg_temp_free_i64(tcg_result);
3713 }
3714
3715 /* Data-processing (3 source)
3716  *
3717  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3718  *  +--+------+-----------+------+------+----+------+------+------+
3719  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3720  *  +--+------+-----------+------+------+----+------+------+------+
3721  */
3722 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3723 {
3724     int rd = extract32(insn, 0, 5);
3725     int rn = extract32(insn, 5, 5);
3726     int ra = extract32(insn, 10, 5);
3727     int rm = extract32(insn, 16, 5);
3728     int op_id = (extract32(insn, 29, 3) << 4) |
3729         (extract32(insn, 21, 3) << 1) |
3730         extract32(insn, 15, 1);
3731     bool sf = extract32(insn, 31, 1);
3732     bool is_sub = extract32(op_id, 0, 1);
3733     bool is_high = extract32(op_id, 2, 1);
3734     bool is_signed = false;
3735     TCGv_i64 tcg_op1;
3736     TCGv_i64 tcg_op2;
3737     TCGv_i64 tcg_tmp;
3738
3739     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3740     switch (op_id) {
3741     case 0x42: /* SMADDL */
3742     case 0x43: /* SMSUBL */
3743     case 0x44: /* SMULH */
3744         is_signed = true;
3745         break;
3746     case 0x0: /* MADD (32bit) */
3747     case 0x1: /* MSUB (32bit) */
3748     case 0x40: /* MADD (64bit) */
3749     case 0x41: /* MSUB (64bit) */
3750     case 0x4a: /* UMADDL */
3751     case 0x4b: /* UMSUBL */
3752     case 0x4c: /* UMULH */
3753         break;
3754     default:
3755         unallocated_encoding(s);
3756         return;
3757     }
3758
3759     if (is_high) {
3760         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3761         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3762         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3763         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3764
3765         if (is_signed) {
3766             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3767         } else {
3768             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3769         }
3770
3771         tcg_temp_free_i64(low_bits);
3772         return;
3773     }
3774
3775     tcg_op1 = tcg_temp_new_i64();
3776     tcg_op2 = tcg_temp_new_i64();
3777     tcg_tmp = tcg_temp_new_i64();
3778
3779     if (op_id < 0x42) {
3780         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3781         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3782     } else {
3783         if (is_signed) {
3784             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3785             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3786         } else {
3787             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3788             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3789         }
3790     }
3791
3792     if (ra == 31 && !is_sub) {
3793         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3794         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3795     } else {
3796         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3797         if (is_sub) {
3798             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3799         } else {
3800             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3801         }
3802     }
3803
3804     if (!sf) {
3805         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3806     }
3807
3808     tcg_temp_free_i64(tcg_op1);
3809     tcg_temp_free_i64(tcg_op2);
3810     tcg_temp_free_i64(tcg_tmp);
3811 }
3812
3813 /* Add/subtract (with carry)
3814  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3815  * +--+--+--+------------------------+------+---------+------+-----+
3816  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3817  * +--+--+--+------------------------+------+---------+------+-----+
3818  *                                            [000000]
3819  */
3820
3821 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3822 {
3823     unsigned int sf, op, setflags, rm, rn, rd;
3824     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3825
3826     if (extract32(insn, 10, 6) != 0) {
3827         unallocated_encoding(s);
3828         return;
3829     }
3830
3831     sf = extract32(insn, 31, 1);
3832     op = extract32(insn, 30, 1);
3833     setflags = extract32(insn, 29, 1);
3834     rm = extract32(insn, 16, 5);
3835     rn = extract32(insn, 5, 5);
3836     rd = extract32(insn, 0, 5);
3837
3838     tcg_rd = cpu_reg(s, rd);
3839     tcg_rn = cpu_reg(s, rn);
3840
3841     if (op) {
3842         tcg_y = new_tmp_a64(s);
3843         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3844     } else {
3845         tcg_y = cpu_reg(s, rm);
3846     }
3847
3848     if (setflags) {
3849         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3850     } else {
3851         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3852     }
3853 }
3854
3855 /* Conditional compare (immediate / register)
3856  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3857  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3858  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3859  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3860  *        [1]                             y                [0]       [0]
3861  */
3862 static void disas_cc(DisasContext *s, uint32_t insn)
3863 {
3864     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3865     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3866     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3867     DisasCompare c;
3868
3869     if (!extract32(insn, 29, 1)) {
3870         unallocated_encoding(s);
3871         return;
3872     }
3873     if (insn & (1 << 10 | 1 << 4)) {
3874         unallocated_encoding(s);
3875         return;
3876     }
3877     sf = extract32(insn, 31, 1);
3878     op = extract32(insn, 30, 1);
3879     is_imm = extract32(insn, 11, 1);
3880     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3881     cond = extract32(insn, 12, 4);
3882     rn = extract32(insn, 5, 5);
3883     nzcv = extract32(insn, 0, 4);
3884
3885     /* Set T0 = !COND.  */
3886     tcg_t0 = tcg_temp_new_i32();
3887     arm_test_cc(&c, cond);
3888     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3889     arm_free_cc(&c);
3890
3891     /* Load the arguments for the new comparison.  */
3892     if (is_imm) {
3893         tcg_y = new_tmp_a64(s);
3894         tcg_gen_movi_i64(tcg_y, y);
3895     } else {
3896         tcg_y = cpu_reg(s, y);
3897     }
3898     tcg_rn = cpu_reg(s, rn);
3899
3900     /* Set the flags for the new comparison.  */
3901     tcg_tmp = tcg_temp_new_i64();
3902     if (op) {
3903         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3904     } else {
3905         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3906     }
3907     tcg_temp_free_i64(tcg_tmp);
3908
3909     /* If COND was false, force the flags to #nzcv.  Compute two masks
3910      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3911      * For tcg hosts that support ANDC, we can make do with just T1.
3912      * In either case, allow the tcg optimizer to delete any unused mask.
3913      */
3914     tcg_t1 = tcg_temp_new_i32();
3915     tcg_t2 = tcg_temp_new_i32();
3916     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3917     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3918
3919     if (nzcv & 8) { /* N */
3920         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3921     } else {
3922         if (TCG_TARGET_HAS_andc_i32) {
3923             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3924         } else {
3925             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3926         }
3927     }
3928     if (nzcv & 4) { /* Z */
3929         if (TCG_TARGET_HAS_andc_i32) {
3930             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3931         } else {
3932             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3933         }
3934     } else {
3935         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3936     }
3937     if (nzcv & 2) { /* C */
3938         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3939     } else {
3940         if (TCG_TARGET_HAS_andc_i32) {
3941             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3942         } else {
3943             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3944         }
3945     }
3946     if (nzcv & 1) { /* V */
3947         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3948     } else {
3949         if (TCG_TARGET_HAS_andc_i32) {
3950             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3951         } else {
3952             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3953         }
3954     }
3955     tcg_temp_free_i32(tcg_t0);
3956     tcg_temp_free_i32(tcg_t1);
3957     tcg_temp_free_i32(tcg_t2);
3958 }
3959
3960 /* Conditional select
3961  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3962  * +----+----+---+-----------------+------+------+-----+------+------+
3963  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3964  * +----+----+---+-----------------+------+------+-----+------+------+
3965  */
3966 static void disas_cond_select(DisasContext *s, uint32_t insn)
3967 {
3968     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3969     TCGv_i64 tcg_rd, zero;
3970     DisasCompare64 c;
3971
3972     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3973         /* S == 1 or op2<1> == 1 */
3974         unallocated_encoding(s);
3975         return;
3976     }
3977     sf = extract32(insn, 31, 1);
3978     else_inv = extract32(insn, 30, 1);
3979     rm = extract32(insn, 16, 5);
3980     cond = extract32(insn, 12, 4);
3981     else_inc = extract32(insn, 10, 1);
3982     rn = extract32(insn, 5, 5);
3983     rd = extract32(insn, 0, 5);
3984
3985     tcg_rd = cpu_reg(s, rd);
3986
3987     a64_test_cc(&c, cond);
3988     zero = tcg_const_i64(0);
3989
3990     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3991         /* CSET & CSETM.  */
3992         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3993         if (else_inv) {
3994             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3995         }
3996     } else {
3997         TCGv_i64 t_true = cpu_reg(s, rn);
3998         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3999         if (else_inv && else_inc) {
4000             tcg_gen_neg_i64(t_false, t_false);
4001         } else if (else_inv) {
4002             tcg_gen_not_i64(t_false, t_false);
4003         } else if (else_inc) {
4004             tcg_gen_addi_i64(t_false, t_false, 1);
4005         }
4006         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4007     }
4008
4009     tcg_temp_free_i64(zero);
4010     a64_free_cc(&c);
4011
4012     if (!sf) {
4013         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4014     }
4015 }
4016
4017 static void handle_clz(DisasContext *s, unsigned int sf,
4018                        unsigned int rn, unsigned int rd)
4019 {
4020     TCGv_i64 tcg_rd, tcg_rn;
4021     tcg_rd = cpu_reg(s, rd);
4022     tcg_rn = cpu_reg(s, rn);
4023
4024     if (sf) {
4025         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4026     } else {
4027         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4028         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4029         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4030         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4031         tcg_temp_free_i32(tcg_tmp32);
4032     }
4033 }
4034
4035 static void handle_cls(DisasContext *s, unsigned int sf,
4036                        unsigned int rn, unsigned int rd)
4037 {
4038     TCGv_i64 tcg_rd, tcg_rn;
4039     tcg_rd = cpu_reg(s, rd);
4040     tcg_rn = cpu_reg(s, rn);
4041
4042     if (sf) {
4043         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4044     } else {
4045         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4046         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4047         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4048         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4049         tcg_temp_free_i32(tcg_tmp32);
4050     }
4051 }
4052
4053 static void handle_rbit(DisasContext *s, unsigned int sf,
4054                         unsigned int rn, unsigned int rd)
4055 {
4056     TCGv_i64 tcg_rd, tcg_rn;
4057     tcg_rd = cpu_reg(s, rd);
4058     tcg_rn = cpu_reg(s, rn);
4059
4060     if (sf) {
4061         gen_helper_rbit64(tcg_rd, tcg_rn);
4062     } else {
4063         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4064         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4065         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4066         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4067         tcg_temp_free_i32(tcg_tmp32);
4068     }
4069 }
4070
4071 /* REV with sf==1, opcode==3 ("REV64") */
4072 static void handle_rev64(DisasContext *s, unsigned int sf,
4073                          unsigned int rn, unsigned int rd)
4074 {
4075     if (!sf) {
4076         unallocated_encoding(s);
4077         return;
4078     }
4079     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4080 }
4081
4082 /* REV with sf==0, opcode==2
4083  * REV32 (sf==1, opcode==2)
4084  */
4085 static void handle_rev32(DisasContext *s, unsigned int sf,
4086                          unsigned int rn, unsigned int rd)
4087 {
4088     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4089
4090     if (sf) {
4091         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4092         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4093
4094         /* bswap32_i64 requires zero high word */
4095         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4096         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4097         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4098         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4099         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4100
4101         tcg_temp_free_i64(tcg_tmp);
4102     } else {
4103         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4104         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4105     }
4106 }
4107
4108 /* REV16 (opcode==1) */
4109 static void handle_rev16(DisasContext *s, unsigned int sf,
4110                          unsigned int rn, unsigned int rd)
4111 {
4112     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4113     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4114     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4115     TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4116
4117     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4118     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4119     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4120     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4121     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4122
4123     tcg_temp_free_i64(mask);
4124     tcg_temp_free_i64(tcg_tmp);
4125 }
4126
4127 /* Data-processing (1 source)
4128  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4129  * +----+---+---+-----------------+---------+--------+------+------+
4130  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4131  * +----+---+---+-----------------+---------+--------+------+------+
4132  */
4133 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4134 {
4135     unsigned int sf, opcode, rn, rd;
4136
4137     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4138         unallocated_encoding(s);
4139         return;
4140     }
4141
4142     sf = extract32(insn, 31, 1);
4143     opcode = extract32(insn, 10, 6);
4144     rn = extract32(insn, 5, 5);
4145     rd = extract32(insn, 0, 5);
4146
4147     switch (opcode) {
4148     case 0: /* RBIT */
4149         handle_rbit(s, sf, rn, rd);
4150         break;
4151     case 1: /* REV16 */
4152         handle_rev16(s, sf, rn, rd);
4153         break;
4154     case 2: /* REV32 */
4155         handle_rev32(s, sf, rn, rd);
4156         break;
4157     case 3: /* REV64 */
4158         handle_rev64(s, sf, rn, rd);
4159         break;
4160     case 4: /* CLZ */
4161         handle_clz(s, sf, rn, rd);
4162         break;
4163     case 5: /* CLS */
4164         handle_cls(s, sf, rn, rd);
4165         break;
4166     }
4167 }
4168
4169 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4170                        unsigned int rm, unsigned int rn, unsigned int rd)
4171 {
4172     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4173     tcg_rd = cpu_reg(s, rd);
4174
4175     if (!sf && is_signed) {
4176         tcg_n = new_tmp_a64(s);
4177         tcg_m = new_tmp_a64(s);
4178         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4179         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4180     } else {
4181         tcg_n = read_cpu_reg(s, rn, sf);
4182         tcg_m = read_cpu_reg(s, rm, sf);
4183     }
4184
4185     if (is_signed) {
4186         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4187     } else {
4188         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4189     }
4190
4191     if (!sf) { /* zero extend final result */
4192         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4193     }
4194 }
4195
4196 /* LSLV, LSRV, ASRV, RORV */
4197 static void handle_shift_reg(DisasContext *s,
4198                              enum a64_shift_type shift_type, unsigned int sf,
4199                              unsigned int rm, unsigned int rn, unsigned int rd)
4200 {
4201     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4202     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4203     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4204
4205     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4206     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4207     tcg_temp_free_i64(tcg_shift);
4208 }
4209
4210 /* CRC32[BHWX], CRC32C[BHWX] */
4211 static void handle_crc32(DisasContext *s,
4212                          unsigned int sf, unsigned int sz, bool crc32c,
4213                          unsigned int rm, unsigned int rn, unsigned int rd)
4214 {
4215     TCGv_i64 tcg_acc, tcg_val;
4216     TCGv_i32 tcg_bytes;
4217
4218     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4219         || (sf == 1 && sz != 3)
4220         || (sf == 0 && sz == 3)) {
4221         unallocated_encoding(s);
4222         return;
4223     }
4224
4225     if (sz == 3) {
4226         tcg_val = cpu_reg(s, rm);
4227     } else {
4228         uint64_t mask;
4229         switch (sz) {
4230         case 0:
4231             mask = 0xFF;
4232             break;
4233         case 1:
4234             mask = 0xFFFF;
4235             break;
4236         case 2:
4237             mask = 0xFFFFFFFF;
4238             break;
4239         default:
4240             g_assert_not_reached();
4241         }
4242         tcg_val = new_tmp_a64(s);
4243         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4244     }
4245
4246     tcg_acc = cpu_reg(s, rn);
4247     tcg_bytes = tcg_const_i32(1 << sz);
4248
4249     if (crc32c) {
4250         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4251     } else {
4252         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4253     }
4254
4255     tcg_temp_free_i32(tcg_bytes);
4256 }
4257
4258 /* Data-processing (2 source)
4259  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4260  * +----+---+---+-----------------+------+--------+------+------+
4261  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4262  * +----+---+---+-----------------+------+--------+------+------+
4263  */
4264 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4265 {
4266     unsigned int sf, rm, opcode, rn, rd;
4267     sf = extract32(insn, 31, 1);
4268     rm = extract32(insn, 16, 5);
4269     opcode = extract32(insn, 10, 6);
4270     rn = extract32(insn, 5, 5);
4271     rd = extract32(insn, 0, 5);
4272
4273     if (extract32(insn, 29, 1)) {
4274         unallocated_encoding(s);
4275         return;
4276     }
4277
4278     switch (opcode) {
4279     case 2: /* UDIV */
4280         handle_div(s, false, sf, rm, rn, rd);
4281         break;
4282     case 3: /* SDIV */
4283         handle_div(s, true, sf, rm, rn, rd);
4284         break;
4285     case 8: /* LSLV */
4286         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4287         break;
4288     case 9: /* LSRV */
4289         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4290         break;
4291     case 10: /* ASRV */
4292         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4293         break;
4294     case 11: /* RORV */
4295         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4296         break;
4297     case 16:
4298     case 17:
4299     case 18:
4300     case 19:
4301     case 20:
4302     case 21:
4303     case 22:
4304     case 23: /* CRC32 */
4305     {
4306         int sz = extract32(opcode, 0, 2);
4307         bool crc32c = extract32(opcode, 2, 1);
4308         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4309         break;
4310     }
4311     default:
4312         unallocated_encoding(s);
4313         break;
4314     }
4315 }
4316
4317 /* Data processing - register */
4318 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4319 {
4320     switch (extract32(insn, 24, 5)) {
4321     case 0x0a: /* Logical (shifted register) */
4322         disas_logic_reg(s, insn);
4323         break;
4324     case 0x0b: /* Add/subtract */
4325         if (insn & (1 << 21)) { /* (extended register) */
4326             disas_add_sub_ext_reg(s, insn);
4327         } else {
4328             disas_add_sub_reg(s, insn);
4329         }
4330         break;
4331     case 0x1b: /* Data-processing (3 source) */
4332         disas_data_proc_3src(s, insn);
4333         break;
4334     case 0x1a:
4335         switch (extract32(insn, 21, 3)) {
4336         case 0x0: /* Add/subtract (with carry) */
4337             disas_adc_sbc(s, insn);
4338             break;
4339         case 0x2: /* Conditional compare */
4340             disas_cc(s, insn); /* both imm and reg forms */
4341             break;
4342         case 0x4: /* Conditional select */
4343             disas_cond_select(s, insn);
4344             break;
4345         case 0x6: /* Data-processing */
4346             if (insn & (1 << 30)) { /* (1 source) */
4347                 disas_data_proc_1src(s, insn);
4348             } else {            /* (2 source) */
4349                 disas_data_proc_2src(s, insn);
4350             }
4351             break;
4352         default:
4353             unallocated_encoding(s);
4354             break;
4355         }
4356         break;
4357     default:
4358         unallocated_encoding(s);
4359         break;
4360     }
4361 }
4362
4363 static void handle_fp_compare(DisasContext *s, bool is_double,
4364                               unsigned int rn, unsigned int rm,
4365                               bool cmp_with_zero, bool signal_all_nans)
4366 {
4367     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4368     TCGv_ptr fpst = get_fpstatus_ptr();
4369
4370     if (is_double) {
4371         TCGv_i64 tcg_vn, tcg_vm;
4372
4373         tcg_vn = read_fp_dreg(s, rn);
4374         if (cmp_with_zero) {
4375             tcg_vm = tcg_const_i64(0);
4376         } else {
4377             tcg_vm = read_fp_dreg(s, rm);
4378         }
4379         if (signal_all_nans) {
4380             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4381         } else {
4382             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4383         }
4384         tcg_temp_free_i64(tcg_vn);
4385         tcg_temp_free_i64(tcg_vm);
4386     } else {
4387         TCGv_i32 tcg_vn, tcg_vm;
4388
4389         tcg_vn = read_fp_sreg(s, rn);
4390         if (cmp_with_zero) {
4391             tcg_vm = tcg_const_i32(0);
4392         } else {
4393             tcg_vm = read_fp_sreg(s, rm);
4394         }
4395         if (signal_all_nans) {
4396             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4397         } else {
4398             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4399         }
4400         tcg_temp_free_i32(tcg_vn);
4401         tcg_temp_free_i32(tcg_vm);
4402     }
4403
4404     tcg_temp_free_ptr(fpst);
4405
4406     gen_set_nzcv(tcg_flags);
4407
4408     tcg_temp_free_i64(tcg_flags);
4409 }
4410
4411 /* Floating point compare
4412  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4413  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4414  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4415  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4416  */
4417 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4418 {
4419     unsigned int mos, type, rm, op, rn, opc, op2r;
4420
4421     mos = extract32(insn, 29, 3);
4422     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4423     rm = extract32(insn, 16, 5);
4424     op = extract32(insn, 14, 2);
4425     rn = extract32(insn, 5, 5);
4426     opc = extract32(insn, 3, 2);
4427     op2r = extract32(insn, 0, 3);
4428
4429     if (mos || op || op2r || type > 1) {
4430         unallocated_encoding(s);
4431         return;
4432     }
4433
4434     if (!fp_access_check(s)) {
4435         return;
4436     }
4437
4438     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4439 }
4440
4441 /* Floating point conditional compare
4442  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4443  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4444  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4445  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4446  */
4447 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4448 {
4449     unsigned int mos, type, rm, cond, rn, op, nzcv;
4450     TCGv_i64 tcg_flags;
4451     TCGLabel *label_continue = NULL;
4452
4453     mos = extract32(insn, 29, 3);
4454     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4455     rm = extract32(insn, 16, 5);
4456     cond = extract32(insn, 12, 4);
4457     rn = extract32(insn, 5, 5);
4458     op = extract32(insn, 4, 1);
4459     nzcv = extract32(insn, 0, 4);
4460
4461     if (mos || type > 1) {
4462         unallocated_encoding(s);
4463         return;
4464     }
4465
4466     if (!fp_access_check(s)) {
4467         return;
4468     }
4469
4470     if (cond < 0x0e) { /* not always */
4471         TCGLabel *label_match = gen_new_label();
4472         label_continue = gen_new_label();
4473         arm_gen_test_cc(cond, label_match);
4474         /* nomatch: */
4475         tcg_flags = tcg_const_i64(nzcv << 28);
4476         gen_set_nzcv(tcg_flags);
4477         tcg_temp_free_i64(tcg_flags);
4478         tcg_gen_br(label_continue);
4479         gen_set_label(label_match);
4480     }
4481
4482     handle_fp_compare(s, type, rn, rm, false, op);
4483
4484     if (cond < 0x0e) {
4485         gen_set_label(label_continue);
4486     }
4487 }
4488
4489 /* Floating point conditional select
4490  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4491  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4492  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4493  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4494  */
4495 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4496 {
4497     unsigned int mos, type, rm, cond, rn, rd;
4498     TCGv_i64 t_true, t_false, t_zero;
4499     DisasCompare64 c;
4500
4501     mos = extract32(insn, 29, 3);
4502     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4503     rm = extract32(insn, 16, 5);
4504     cond = extract32(insn, 12, 4);
4505     rn = extract32(insn, 5, 5);
4506     rd = extract32(insn, 0, 5);
4507
4508     if (mos || type > 1) {
4509         unallocated_encoding(s);
4510         return;
4511     }
4512
4513     if (!fp_access_check(s)) {
4514         return;
4515     }
4516
4517     /* Zero extend sreg inputs to 64 bits now.  */
4518     t_true = tcg_temp_new_i64();
4519     t_false = tcg_temp_new_i64();
4520     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4521     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4522
4523     a64_test_cc(&c, cond);
4524     t_zero = tcg_const_i64(0);
4525     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4526     tcg_temp_free_i64(t_zero);
4527     tcg_temp_free_i64(t_false);
4528     a64_free_cc(&c);
4529
4530     /* Note that sregs write back zeros to the high bits,
4531        and we've already done the zero-extension.  */
4532     write_fp_dreg(s, rd, t_true);
4533     tcg_temp_free_i64(t_true);
4534 }
4535
4536 /* Floating-point data-processing (1 source) - single precision */
4537 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4538 {
4539     TCGv_ptr fpst;
4540     TCGv_i32 tcg_op;
4541     TCGv_i32 tcg_res;
4542
4543     fpst = get_fpstatus_ptr();
4544     tcg_op = read_fp_sreg(s, rn);
4545     tcg_res = tcg_temp_new_i32();
4546
4547     switch (opcode) {
4548     case 0x0: /* FMOV */
4549         tcg_gen_mov_i32(tcg_res, tcg_op);
4550         break;
4551     case 0x1: /* FABS */
4552         gen_helper_vfp_abss(tcg_res, tcg_op);
4553         break;
4554     case 0x2: /* FNEG */
4555         gen_helper_vfp_negs(tcg_res, tcg_op);
4556         break;
4557     case 0x3: /* FSQRT */
4558         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4559         break;
4560     case 0x8: /* FRINTN */
4561     case 0x9: /* FRINTP */
4562     case 0xa: /* FRINTM */
4563     case 0xb: /* FRINTZ */
4564     case 0xc: /* FRINTA */
4565     {
4566         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4567
4568         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4569         gen_helper_rints(tcg_res, tcg_op, fpst);
4570
4571         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4572         tcg_temp_free_i32(tcg_rmode);
4573         break;
4574     }
4575     case 0xe: /* FRINTX */
4576         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4577         break;
4578     case 0xf: /* FRINTI */
4579         gen_helper_rints(tcg_res, tcg_op, fpst);
4580         break;
4581     default:
4582         abort();
4583     }
4584
4585     write_fp_sreg(s, rd, tcg_res);
4586
4587     tcg_temp_free_ptr(fpst);
4588     tcg_temp_free_i32(tcg_op);
4589     tcg_temp_free_i32(tcg_res);
4590 }
4591
4592 /* Floating-point data-processing (1 source) - double precision */
4593 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4594 {
4595     TCGv_ptr fpst;
4596     TCGv_i64 tcg_op;
4597     TCGv_i64 tcg_res;
4598
4599     fpst = get_fpstatus_ptr();
4600     tcg_op = read_fp_dreg(s, rn);
4601     tcg_res = tcg_temp_new_i64();
4602
4603     switch (opcode) {
4604     case 0x0: /* FMOV */
4605         tcg_gen_mov_i64(tcg_res, tcg_op);
4606         break;
4607     case 0x1: /* FABS */
4608         gen_helper_vfp_absd(tcg_res, tcg_op);
4609         break;
4610     case 0x2: /* FNEG */
4611         gen_helper_vfp_negd(tcg_res, tcg_op);
4612         break;
4613     case 0x3: /* FSQRT */
4614         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4615         break;
4616     case 0x8: /* FRINTN */
4617     case 0x9: /* FRINTP */
4618     case 0xa: /* FRINTM */
4619     case 0xb: /* FRINTZ */
4620     case 0xc: /* FRINTA */
4621     {
4622         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4623
4624         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4625         gen_helper_rintd(tcg_res, tcg_op, fpst);
4626
4627         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4628         tcg_temp_free_i32(tcg_rmode);
4629         break;
4630     }
4631     case 0xe: /* FRINTX */
4632         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4633         break;
4634     case 0xf: /* FRINTI */
4635         gen_helper_rintd(tcg_res, tcg_op, fpst);
4636         break;
4637     default:
4638         abort();
4639     }
4640
4641     write_fp_dreg(s, rd, tcg_res);
4642
4643     tcg_temp_free_ptr(fpst);
4644     tcg_temp_free_i64(tcg_op);
4645     tcg_temp_free_i64(tcg_res);
4646 }
4647
4648 static void handle_fp_fcvt(DisasContext *s, int opcode,
4649                            int rd, int rn, int dtype, int ntype)
4650 {
4651     switch (ntype) {
4652     case 0x0:
4653     {
4654         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4655         if (dtype == 1) {
4656             /* Single to double */
4657             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4658             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4659             write_fp_dreg(s, rd, tcg_rd);
4660             tcg_temp_free_i64(tcg_rd);
4661         } else {
4662             /* Single to half */
4663             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4664             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4665             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4666             write_fp_sreg(s, rd, tcg_rd);
4667             tcg_temp_free_i32(tcg_rd);
4668         }
4669         tcg_temp_free_i32(tcg_rn);
4670         break;
4671     }
4672     case 0x1:
4673     {
4674         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4675         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4676         if (dtype == 0) {
4677             /* Double to single */
4678             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4679         } else {
4680             /* Double to half */
4681             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4682             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4683         }
4684         write_fp_sreg(s, rd, tcg_rd);
4685         tcg_temp_free_i32(tcg_rd);
4686         tcg_temp_free_i64(tcg_rn);
4687         break;
4688     }
4689     case 0x3:
4690     {
4691         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4692         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4693         if (dtype == 0) {
4694             /* Half to single */
4695             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4696             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4697             write_fp_sreg(s, rd, tcg_rd);
4698             tcg_temp_free_i32(tcg_rd);
4699         } else {
4700             /* Half to double */
4701             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4702             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4703             write_fp_dreg(s, rd, tcg_rd);
4704             tcg_temp_free_i64(tcg_rd);
4705         }
4706         tcg_temp_free_i32(tcg_rn);
4707         break;
4708     }
4709     default:
4710         abort();
4711     }
4712 }
4713
4714 /* Floating point data-processing (1 source)
4715  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4716  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4717  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4718  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4719  */
4720 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4721 {
4722     int type = extract32(insn, 22, 2);
4723     int opcode = extract32(insn, 15, 6);
4724     int rn = extract32(insn, 5, 5);
4725     int rd = extract32(insn, 0, 5);
4726
4727     switch (opcode) {
4728     case 0x4: case 0x5: case 0x7:
4729     {
4730         /* FCVT between half, single and double precision */
4731         int dtype = extract32(opcode, 0, 2);
4732         if (type == 2 || dtype == type) {
4733             unallocated_encoding(s);
4734             return;
4735         }
4736         if (!fp_access_check(s)) {
4737             return;
4738         }
4739
4740         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4741         break;
4742     }
4743     case 0x0 ... 0x3:
4744     case 0x8 ... 0xc:
4745     case 0xe ... 0xf:
4746         /* 32-to-32 and 64-to-64 ops */
4747         switch (type) {
4748         case 0:
4749             if (!fp_access_check(s)) {
4750                 return;
4751             }
4752
4753             handle_fp_1src_single(s, opcode, rd, rn);
4754             break;
4755         case 1:
4756             if (!fp_access_check(s)) {
4757                 return;
4758             }
4759
4760             handle_fp_1src_double(s, opcode, rd, rn);
4761             break;
4762         default:
4763             unallocated_encoding(s);
4764         }
4765         break;
4766     default:
4767         unallocated_encoding(s);
4768         break;
4769     }
4770 }
4771
4772 /* Floating-point data-processing (2 source) - single precision */
4773 static void handle_fp_2src_single(DisasContext *s, int opcode,
4774                                   int rd, int rn, int rm)
4775 {
4776     TCGv_i32 tcg_op1;
4777     TCGv_i32 tcg_op2;
4778     TCGv_i32 tcg_res;
4779     TCGv_ptr fpst;
4780
4781     tcg_res = tcg_temp_new_i32();
4782     fpst = get_fpstatus_ptr();
4783     tcg_op1 = read_fp_sreg(s, rn);
4784     tcg_op2 = read_fp_sreg(s, rm);
4785
4786     switch (opcode) {
4787     case 0x0: /* FMUL */
4788         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4789         break;
4790     case 0x1: /* FDIV */
4791         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4792         break;
4793     case 0x2: /* FADD */
4794         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4795         break;
4796     case 0x3: /* FSUB */
4797         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4798         break;
4799     case 0x4: /* FMAX */
4800         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4801         break;
4802     case 0x5: /* FMIN */
4803         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4804         break;
4805     case 0x6: /* FMAXNM */
4806         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4807         break;
4808     case 0x7: /* FMINNM */
4809         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4810         break;
4811     case 0x8: /* FNMUL */
4812         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4813         gen_helper_vfp_negs(tcg_res, tcg_res);
4814         break;
4815     }
4816
4817     write_fp_sreg(s, rd, tcg_res);
4818
4819     tcg_temp_free_ptr(fpst);
4820     tcg_temp_free_i32(tcg_op1);
4821     tcg_temp_free_i32(tcg_op2);
4822     tcg_temp_free_i32(tcg_res);
4823 }
4824
4825 /* Floating-point data-processing (2 source) - double precision */
4826 static void handle_fp_2src_double(DisasContext *s, int opcode,
4827                                   int rd, int rn, int rm)
4828 {
4829     TCGv_i64 tcg_op1;
4830     TCGv_i64 tcg_op2;
4831     TCGv_i64 tcg_res;
4832     TCGv_ptr fpst;
4833
4834     tcg_res = tcg_temp_new_i64();
4835     fpst = get_fpstatus_ptr();
4836     tcg_op1 = read_fp_dreg(s, rn);
4837     tcg_op2 = read_fp_dreg(s, rm);
4838
4839     switch (opcode) {
4840     case 0x0: /* FMUL */
4841         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4842         break;
4843     case 0x1: /* FDIV */
4844         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4845         break;
4846     case 0x2: /* FADD */
4847         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4848         break;
4849     case 0x3: /* FSUB */
4850         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4851         break;
4852     case 0x4: /* FMAX */
4853         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4854         break;
4855     case 0x5: /* FMIN */
4856         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4857         break;
4858     case 0x6: /* FMAXNM */
4859         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4860         break;
4861     case 0x7: /* FMINNM */
4862         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4863         break;
4864     case 0x8: /* FNMUL */
4865         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4866         gen_helper_vfp_negd(tcg_res, tcg_res);
4867         break;
4868     }
4869
4870     write_fp_dreg(s, rd, tcg_res);
4871
4872     tcg_temp_free_ptr(fpst);
4873     tcg_temp_free_i64(tcg_op1);
4874     tcg_temp_free_i64(tcg_op2);
4875     tcg_temp_free_i64(tcg_res);
4876 }
4877
4878 /* Floating point data-processing (2 source)
4879  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4880  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4881  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4882  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4883  */
4884 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4885 {
4886     int type = extract32(insn, 22, 2);
4887     int rd = extract32(insn, 0, 5);
4888     int rn = extract32(insn, 5, 5);
4889     int rm = extract32(insn, 16, 5);
4890     int opcode = extract32(insn, 12, 4);
4891
4892     if (opcode > 8) {
4893         unallocated_encoding(s);
4894         return;
4895     }
4896
4897     switch (type) {
4898     case 0:
4899         if (!fp_access_check(s)) {
4900             return;
4901         }
4902         handle_fp_2src_single(s, opcode, rd, rn, rm);
4903         break;
4904     case 1:
4905         if (!fp_access_check(s)) {
4906             return;
4907         }
4908         handle_fp_2src_double(s, opcode, rd, rn, rm);
4909         break;
4910     default:
4911         unallocated_encoding(s);
4912     }
4913 }
4914
4915 /* Floating-point data-processing (3 source) - single precision */
4916 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4917                                   int rd, int rn, int rm, int ra)
4918 {
4919     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4920     TCGv_i32 tcg_res = tcg_temp_new_i32();
4921     TCGv_ptr fpst = get_fpstatus_ptr();
4922
4923     tcg_op1 = read_fp_sreg(s, rn);
4924     tcg_op2 = read_fp_sreg(s, rm);
4925     tcg_op3 = read_fp_sreg(s, ra);
4926
4927     /* These are fused multiply-add, and must be done as one
4928      * floating point operation with no rounding between the
4929      * multiplication and addition steps.
4930      * NB that doing the negations here as separate steps is
4931      * correct : an input NaN should come out with its sign bit
4932      * flipped if it is a negated-input.
4933      */
4934     if (o1 == true) {
4935         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4936     }
4937
4938     if (o0 != o1) {
4939         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4940     }
4941
4942     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4943
4944     write_fp_sreg(s, rd, tcg_res);
4945
4946     tcg_temp_free_ptr(fpst);
4947     tcg_temp_free_i32(tcg_op1);
4948     tcg_temp_free_i32(tcg_op2);
4949     tcg_temp_free_i32(tcg_op3);
4950     tcg_temp_free_i32(tcg_res);
4951 }
4952
4953 /* Floating-point data-processing (3 source) - double precision */
4954 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4955                                   int rd, int rn, int rm, int ra)
4956 {
4957     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4958     TCGv_i64 tcg_res = tcg_temp_new_i64();
4959     TCGv_ptr fpst = get_fpstatus_ptr();
4960
4961     tcg_op1 = read_fp_dreg(s, rn);
4962     tcg_op2 = read_fp_dreg(s, rm);
4963     tcg_op3 = read_fp_dreg(s, ra);
4964
4965     /* These are fused multiply-add, and must be done as one
4966      * floating point operation with no rounding between the
4967      * multiplication and addition steps.
4968      * NB that doing the negations here as separate steps is
4969      * correct : an input NaN should come out with its sign bit
4970      * flipped if it is a negated-input.
4971      */
4972     if (o1 == true) {
4973         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4974     }
4975
4976     if (o0 != o1) {
4977         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4978     }
4979
4980     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4981
4982     write_fp_dreg(s, rd, tcg_res);
4983
4984     tcg_temp_free_ptr(fpst);
4985     tcg_temp_free_i64(tcg_op1);
4986     tcg_temp_free_i64(tcg_op2);
4987     tcg_temp_free_i64(tcg_op3);
4988     tcg_temp_free_i64(tcg_res);
4989 }
4990
4991 /* Floating point data-processing (3 source)
4992  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4993  * +---+---+---+-----------+------+----+------+----+------+------+------+
4994  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4995  * +---+---+---+-----------+------+----+------+----+------+------+------+
4996  */
4997 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4998 {
4999     int type = extract32(insn, 22, 2);
5000     int rd = extract32(insn, 0, 5);
5001     int rn = extract32(insn, 5, 5);
5002     int ra = extract32(insn, 10, 5);
5003     int rm = extract32(insn, 16, 5);
5004     bool o0 = extract32(insn, 15, 1);
5005     bool o1 = extract32(insn, 21, 1);
5006
5007     switch (type) {
5008     case 0:
5009         if (!fp_access_check(s)) {
5010             return;
5011         }
5012         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
5013         break;
5014     case 1:
5015         if (!fp_access_check(s)) {
5016             return;
5017         }
5018         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
5019         break;
5020     default:
5021         unallocated_encoding(s);
5022     }
5023 }
5024
5025 /* The imm8 encodes the sign bit, enough bits to represent an exponent in
5026  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
5027  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
5028  */
5029 static uint64_t vfp_expand_imm(int size, uint8_t imm8)
5030 {
5031     uint64_t imm;
5032
5033     switch (size) {
5034     case MO_64:
5035         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5036             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5037             extract32(imm8, 0, 6);
5038         imm <<= 48;
5039         break;
5040     case MO_32:
5041         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5042             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5043             (extract32(imm8, 0, 6) << 3);
5044         imm <<= 16;
5045         break;
5046     case MO_16:
5047         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5048             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
5049             (extract32(imm8, 0, 6) << 6);
5050         break;
5051     default:
5052         g_assert_not_reached();
5053     }
5054     return imm;
5055 }
5056
5057 /* Floating point immediate
5058  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
5059  * +---+---+---+-----------+------+---+------------+-------+------+------+
5060  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
5061  * +---+---+---+-----------+------+---+------------+-------+------+------+
5062  */
5063 static void disas_fp_imm(DisasContext *s, uint32_t insn)
5064 {
5065     int rd = extract32(insn, 0, 5);
5066     int imm8 = extract32(insn, 13, 8);
5067     int is_double = extract32(insn, 22, 2);
5068     uint64_t imm;
5069     TCGv_i64 tcg_res;
5070
5071     if (is_double > 1) {
5072         unallocated_encoding(s);
5073         return;
5074     }
5075
5076     if (!fp_access_check(s)) {
5077         return;
5078     }
5079
5080     imm = vfp_expand_imm(MO_32 + is_double, imm8);
5081
5082     tcg_res = tcg_const_i64(imm);
5083     write_fp_dreg(s, rd, tcg_res);
5084     tcg_temp_free_i64(tcg_res);
5085 }
5086
5087 /* Handle floating point <=> fixed point conversions. Note that we can
5088  * also deal with fp <=> integer conversions as a special case (scale == 64)
5089  * OPTME: consider handling that special case specially or at least skipping
5090  * the call to scalbn in the helpers for zero shifts.
5091  */
5092 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5093                            bool itof, int rmode, int scale, int sf, int type)
5094 {
5095     bool is_signed = !(opcode & 1);
5096     bool is_double = type;
5097     TCGv_ptr tcg_fpstatus;
5098     TCGv_i32 tcg_shift;
5099
5100     tcg_fpstatus = get_fpstatus_ptr();
5101
5102     tcg_shift = tcg_const_i32(64 - scale);
5103
5104     if (itof) {
5105         TCGv_i64 tcg_int = cpu_reg(s, rn);
5106         if (!sf) {
5107             TCGv_i64 tcg_extend = new_tmp_a64(s);
5108
5109             if (is_signed) {
5110                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5111             } else {
5112                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5113             }
5114
5115             tcg_int = tcg_extend;
5116         }
5117
5118         if (is_double) {
5119             TCGv_i64 tcg_double = tcg_temp_new_i64();
5120             if (is_signed) {
5121                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
5122                                      tcg_shift, tcg_fpstatus);
5123             } else {
5124                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
5125                                      tcg_shift, tcg_fpstatus);
5126             }
5127             write_fp_dreg(s, rd, tcg_double);
5128             tcg_temp_free_i64(tcg_double);
5129         } else {
5130             TCGv_i32 tcg_single = tcg_temp_new_i32();
5131             if (is_signed) {
5132                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
5133                                      tcg_shift, tcg_fpstatus);
5134             } else {
5135                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5136                                      tcg_shift, tcg_fpstatus);
5137             }
5138             write_fp_sreg(s, rd, tcg_single);
5139             tcg_temp_free_i32(tcg_single);
5140         }
5141     } else {
5142         TCGv_i64 tcg_int = cpu_reg(s, rd);
5143         TCGv_i32 tcg_rmode;
5144
5145         if (extract32(opcode, 2, 1)) {
5146             /* There are too many rounding modes to all fit into rmode,
5147              * so FCVTA[US] is a special case.
5148              */
5149             rmode = FPROUNDING_TIEAWAY;
5150         }
5151
5152         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5153
5154         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5155
5156         if (is_double) {
5157             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5158             if (is_signed) {
5159                 if (!sf) {
5160                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5161                                          tcg_shift, tcg_fpstatus);
5162                 } else {
5163                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5164                                          tcg_shift, tcg_fpstatus);
5165                 }
5166             } else {
5167                 if (!sf) {
5168                     gen_helper_vfp_tould(tcg_int, tcg_double,
5169                                          tcg_shift, tcg_fpstatus);
5170                 } else {
5171                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5172                                          tcg_shift, tcg_fpstatus);
5173                 }
5174             }
5175             tcg_temp_free_i64(tcg_double);
5176         } else {
5177             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5178             if (sf) {
5179                 if (is_signed) {
5180                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5181                                          tcg_shift, tcg_fpstatus);
5182                 } else {
5183                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5184                                          tcg_shift, tcg_fpstatus);
5185                 }
5186             } else {
5187                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5188                 if (is_signed) {
5189                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5190                                          tcg_shift, tcg_fpstatus);
5191                 } else {
5192                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5193                                          tcg_shift, tcg_fpstatus);
5194                 }
5195                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5196                 tcg_temp_free_i32(tcg_dest);
5197             }
5198             tcg_temp_free_i32(tcg_single);
5199         }
5200
5201         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5202         tcg_temp_free_i32(tcg_rmode);
5203
5204         if (!sf) {
5205             tcg_gen_ext32u_i64(tcg_int, tcg_int);
5206         }
5207     }
5208
5209     tcg_temp_free_ptr(tcg_fpstatus);
5210     tcg_temp_free_i32(tcg_shift);
5211 }
5212
5213 /* Floating point <-> fixed point conversions
5214  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5215  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5216  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5217  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5218  */
5219 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5220 {
5221     int rd = extract32(insn, 0, 5);
5222     int rn = extract32(insn, 5, 5);
5223     int scale = extract32(insn, 10, 6);
5224     int opcode = extract32(insn, 16, 3);
5225     int rmode = extract32(insn, 19, 2);
5226     int type = extract32(insn, 22, 2);
5227     bool sbit = extract32(insn, 29, 1);
5228     bool sf = extract32(insn, 31, 1);
5229     bool itof;
5230
5231     if (sbit || (type > 1)
5232         || (!sf && scale < 32)) {
5233         unallocated_encoding(s);
5234         return;
5235     }
5236
5237     switch ((rmode << 3) | opcode) {
5238     case 0x2: /* SCVTF */
5239     case 0x3: /* UCVTF */
5240         itof = true;
5241         break;
5242     case 0x18: /* FCVTZS */
5243     case 0x19: /* FCVTZU */
5244         itof = false;
5245         break;
5246     default:
5247         unallocated_encoding(s);
5248         return;
5249     }
5250
5251     if (!fp_access_check(s)) {
5252         return;
5253     }
5254
5255     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5256 }
5257
5258 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5259 {
5260     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5261      * without conversion.
5262      */
5263
5264     if (itof) {
5265         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5266
5267         switch (type) {
5268         case 0:
5269         {
5270             /* 32 bit */
5271             TCGv_i64 tmp = tcg_temp_new_i64();
5272             tcg_gen_ext32u_i64(tmp, tcg_rn);
5273             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5274             tcg_gen_movi_i64(tmp, 0);
5275             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5276             tcg_temp_free_i64(tmp);
5277             break;
5278         }
5279         case 1:
5280         {
5281             /* 64 bit */
5282             TCGv_i64 tmp = tcg_const_i64(0);
5283             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5284             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5285             tcg_temp_free_i64(tmp);
5286             break;
5287         }
5288         case 2:
5289             /* 64 bit to top half. */
5290             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5291             break;
5292         }
5293     } else {
5294         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5295
5296         switch (type) {
5297         case 0:
5298             /* 32 bit */
5299             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5300             break;
5301         case 1:
5302             /* 64 bit */
5303             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5304             break;
5305         case 2:
5306             /* 64 bits from top half */
5307             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5308             break;
5309         }
5310     }
5311 }
5312
5313 /* Floating point <-> integer conversions
5314  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5315  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5316  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5317  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5318  */
5319 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5320 {
5321     int rd = extract32(insn, 0, 5);
5322     int rn = extract32(insn, 5, 5);
5323     int opcode = extract32(insn, 16, 3);
5324     int rmode = extract32(insn, 19, 2);
5325     int type = extract32(insn, 22, 2);
5326     bool sbit = extract32(insn, 29, 1);
5327     bool sf = extract32(insn, 31, 1);
5328
5329     if (sbit) {
5330         unallocated_encoding(s);
5331         return;
5332     }
5333
5334     if (opcode > 5) {
5335         /* FMOV */
5336         bool itof = opcode & 1;
5337
5338         if (rmode >= 2) {
5339             unallocated_encoding(s);
5340             return;
5341         }
5342
5343         switch (sf << 3 | type << 1 | rmode) {
5344         case 0x0: /* 32 bit */
5345         case 0xa: /* 64 bit */
5346         case 0xd: /* 64 bit to top half of quad */
5347             break;
5348         default:
5349             /* all other sf/type/rmode combinations are invalid */
5350             unallocated_encoding(s);
5351             break;
5352         }
5353
5354         if (!fp_access_check(s)) {
5355             return;
5356         }
5357         handle_fmov(s, rd, rn, type, itof);
5358     } else {
5359         /* actual FP conversions */
5360         bool itof = extract32(opcode, 1, 1);
5361
5362         if (type > 1 || (rmode != 0 && opcode > 1)) {
5363             unallocated_encoding(s);
5364             return;
5365         }
5366
5367         if (!fp_access_check(s)) {
5368             return;
5369         }
5370         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5371     }
5372 }
5373
5374 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5375  *   31  30  29 28     25 24                          0
5376  * +---+---+---+---------+-----------------------------+
5377  * |   | 0 |   | 1 1 1 1 |                             |
5378  * +---+---+---+---------+-----------------------------+
5379  */
5380 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5381 {
5382     if (extract32(insn, 24, 1)) {
5383         /* Floating point data-processing (3 source) */
5384         disas_fp_3src(s, insn);
5385     } else if (extract32(insn, 21, 1) == 0) {
5386         /* Floating point to fixed point conversions */
5387         disas_fp_fixed_conv(s, insn);
5388     } else {
5389         switch (extract32(insn, 10, 2)) {
5390         case 1:
5391             /* Floating point conditional compare */
5392             disas_fp_ccomp(s, insn);
5393             break;
5394         case 2:
5395             /* Floating point data-processing (2 source) */
5396             disas_fp_2src(s, insn);
5397             break;
5398         case 3:
5399             /* Floating point conditional select */
5400             disas_fp_csel(s, insn);
5401             break;
5402         case 0:
5403             switch (ctz32(extract32(insn, 12, 4))) {
5404             case 0: /* [15:12] == xxx1 */
5405                 /* Floating point immediate */
5406                 disas_fp_imm(s, insn);
5407                 break;
5408             case 1: /* [15:12] == xx10 */
5409                 /* Floating point compare */
5410                 disas_fp_compare(s, insn);
5411                 break;
5412             case 2: /* [15:12] == x100 */
5413                 /* Floating point data-processing (1 source) */
5414                 disas_fp_1src(s, insn);
5415                 break;
5416             case 3: /* [15:12] == 1000 */
5417                 unallocated_encoding(s);
5418                 break;
5419             default: /* [15:12] == 0000 */
5420                 /* Floating point <-> integer conversions */
5421                 disas_fp_int_conv(s, insn);
5422                 break;
5423             }
5424             break;
5425         }
5426     }
5427 }
5428
5429 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5430                      int pos)
5431 {
5432     /* Extract 64 bits from the middle of two concatenated 64 bit
5433      * vector register slices left:right. The extracted bits start
5434      * at 'pos' bits into the right (least significant) side.
5435      * We return the result in tcg_right, and guarantee not to
5436      * trash tcg_left.
5437      */
5438     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5439     assert(pos > 0 && pos < 64);
5440
5441     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5442     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5443     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5444
5445     tcg_temp_free_i64(tcg_tmp);
5446 }
5447
5448 /* EXT
5449  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5450  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5451  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5452  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5453  */
5454 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5455 {
5456     int is_q = extract32(insn, 30, 1);
5457     int op2 = extract32(insn, 22, 2);
5458     int imm4 = extract32(insn, 11, 4);
5459     int rm = extract32(insn, 16, 5);
5460     int rn = extract32(insn, 5, 5);
5461     int rd = extract32(insn, 0, 5);
5462     int pos = imm4 << 3;
5463     TCGv_i64 tcg_resl, tcg_resh;
5464
5465     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5466         unallocated_encoding(s);
5467         return;
5468     }
5469
5470     if (!fp_access_check(s)) {
5471         return;
5472     }
5473
5474     tcg_resh = tcg_temp_new_i64();
5475     tcg_resl = tcg_temp_new_i64();
5476
5477     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5478      * either extracting 128 bits from a 128:128 concatenation, or
5479      * extracting 64 bits from a 64:64 concatenation.
5480      */
5481     if (!is_q) {
5482         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5483         if (pos != 0) {
5484             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5485             do_ext64(s, tcg_resh, tcg_resl, pos);
5486         }
5487         tcg_gen_movi_i64(tcg_resh, 0);
5488     } else {
5489         TCGv_i64 tcg_hh;
5490         typedef struct {
5491             int reg;
5492             int elt;
5493         } EltPosns;
5494         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5495         EltPosns *elt = eltposns;
5496
5497         if (pos >= 64) {
5498             elt++;
5499             pos -= 64;
5500         }
5501
5502         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5503         elt++;
5504         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5505         elt++;
5506         if (pos != 0) {
5507             do_ext64(s, tcg_resh, tcg_resl, pos);
5508             tcg_hh = tcg_temp_new_i64();
5509             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5510             do_ext64(s, tcg_hh, tcg_resh, pos);
5511             tcg_temp_free_i64(tcg_hh);
5512         }
5513     }
5514
5515     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5516     tcg_temp_free_i64(tcg_resl);
5517     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5518     tcg_temp_free_i64(tcg_resh);
5519 }
5520
5521 /* TBL/TBX
5522  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5523  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5524  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5525  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5526  */
5527 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5528 {
5529     int op2 = extract32(insn, 22, 2);
5530     int is_q = extract32(insn, 30, 1);
5531     int rm = extract32(insn, 16, 5);
5532     int rn = extract32(insn, 5, 5);
5533     int rd = extract32(insn, 0, 5);
5534     int is_tblx = extract32(insn, 12, 1);
5535     int len = extract32(insn, 13, 2);
5536     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5537     TCGv_i32 tcg_regno, tcg_numregs;
5538
5539     if (op2 != 0) {
5540         unallocated_encoding(s);
5541         return;
5542     }
5543
5544     if (!fp_access_check(s)) {
5545         return;
5546     }
5547
5548     /* This does a table lookup: for every byte element in the input
5549      * we index into a table formed from up to four vector registers,
5550      * and then the output is the result of the lookups. Our helper
5551      * function does the lookup operation for a single 64 bit part of
5552      * the input.
5553      */
5554     tcg_resl = tcg_temp_new_i64();
5555     tcg_resh = tcg_temp_new_i64();
5556
5557     if (is_tblx) {
5558         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5559     } else {
5560         tcg_gen_movi_i64(tcg_resl, 0);
5561     }
5562     if (is_tblx && is_q) {
5563         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5564     } else {
5565         tcg_gen_movi_i64(tcg_resh, 0);
5566     }
5567
5568     tcg_idx = tcg_temp_new_i64();
5569     tcg_regno = tcg_const_i32(rn);
5570     tcg_numregs = tcg_const_i32(len + 1);
5571     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5572     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5573                         tcg_regno, tcg_numregs);
5574     if (is_q) {
5575         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5576         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5577                             tcg_regno, tcg_numregs);
5578     }
5579     tcg_temp_free_i64(tcg_idx);
5580     tcg_temp_free_i32(tcg_regno);
5581     tcg_temp_free_i32(tcg_numregs);
5582
5583     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5584     tcg_temp_free_i64(tcg_resl);
5585     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5586     tcg_temp_free_i64(tcg_resh);
5587 }
5588
5589 /* ZIP/UZP/TRN
5590  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5591  * +---+---+-------------+------+---+------+---+------------------+------+
5592  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5593  * +---+---+-------------+------+---+------+---+------------------+------+
5594  */
5595 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5596 {
5597     int rd = extract32(insn, 0, 5);
5598     int rn = extract32(insn, 5, 5);
5599     int rm = extract32(insn, 16, 5);
5600     int size = extract32(insn, 22, 2);
5601     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5602      * bit 2 indicates 1 vs 2 variant of the insn.
5603      */
5604     int opcode = extract32(insn, 12, 2);
5605     bool part = extract32(insn, 14, 1);
5606     bool is_q = extract32(insn, 30, 1);
5607     int esize = 8 << size;
5608     int i, ofs;
5609     int datasize = is_q ? 128 : 64;
5610     int elements = datasize / esize;
5611     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5612
5613     if (opcode == 0 || (size == 3 && !is_q)) {
5614         unallocated_encoding(s);
5615         return;
5616     }
5617
5618     if (!fp_access_check(s)) {
5619         return;
5620     }
5621
5622     tcg_resl = tcg_const_i64(0);
5623     tcg_resh = tcg_const_i64(0);
5624     tcg_res = tcg_temp_new_i64();
5625
5626     for (i = 0; i < elements; i++) {
5627         switch (opcode) {
5628         case 1: /* UZP1/2 */
5629         {
5630             int midpoint = elements / 2;
5631             if (i < midpoint) {
5632                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5633             } else {
5634                 read_vec_element(s, tcg_res, rm,
5635                                  2 * (i - midpoint) + part, size);
5636             }
5637             break;
5638         }
5639         case 2: /* TRN1/2 */
5640             if (i & 1) {
5641                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5642             } else {
5643                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5644             }
5645             break;
5646         case 3: /* ZIP1/2 */
5647         {
5648             int base = part * elements / 2;
5649             if (i & 1) {
5650                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5651             } else {
5652                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5653             }
5654             break;
5655         }
5656         default:
5657             g_assert_not_reached();
5658         }
5659
5660         ofs = i * esize;
5661         if (ofs < 64) {
5662             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5663             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5664         } else {
5665             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5666             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5667         }
5668     }
5669
5670     tcg_temp_free_i64(tcg_res);
5671
5672     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5673     tcg_temp_free_i64(tcg_resl);
5674     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5675     tcg_temp_free_i64(tcg_resh);
5676 }
5677
5678 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5679                         int opc, bool is_min, TCGv_ptr fpst)
5680 {
5681     /* Helper function for disas_simd_across_lanes: do a single precision
5682      * min/max operation on the specified two inputs,
5683      * and return the result in tcg_elt1.
5684      */
5685     if (opc == 0xc) {
5686         if (is_min) {
5687             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5688         } else {
5689             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5690         }
5691     } else {
5692         assert(opc == 0xf);
5693         if (is_min) {
5694             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5695         } else {
5696             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5697         }
5698     }
5699 }
5700
5701 /* AdvSIMD across lanes
5702  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5703  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5704  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5705  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5706  */
5707 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5708 {
5709     int rd = extract32(insn, 0, 5);
5710     int rn = extract32(insn, 5, 5);
5711     int size = extract32(insn, 22, 2);
5712     int opcode = extract32(insn, 12, 5);
5713     bool is_q = extract32(insn, 30, 1);
5714     bool is_u = extract32(insn, 29, 1);
5715     bool is_fp = false;
5716     bool is_min = false;
5717     int esize;
5718     int elements;
5719     int i;
5720     TCGv_i64 tcg_res, tcg_elt;
5721
5722     switch (opcode) {
5723     case 0x1b: /* ADDV */
5724         if (is_u) {
5725             unallocated_encoding(s);
5726             return;
5727         }
5728         /* fall through */
5729     case 0x3: /* SADDLV, UADDLV */
5730     case 0xa: /* SMAXV, UMAXV */
5731     case 0x1a: /* SMINV, UMINV */
5732         if (size == 3 || (size == 2 && !is_q)) {
5733             unallocated_encoding(s);
5734             return;
5735         }
5736         break;
5737     case 0xc: /* FMAXNMV, FMINNMV */
5738     case 0xf: /* FMAXV, FMINV */
5739         if (!is_u || !is_q || extract32(size, 0, 1)) {
5740             unallocated_encoding(s);
5741             return;
5742         }
5743         /* Bit 1 of size field encodes min vs max, and actual size is always
5744          * 32 bits: adjust the size variable so following code can rely on it
5745          */
5746         is_min = extract32(size, 1, 1);
5747         is_fp = true;
5748         size = 2;
5749         break;
5750     default:
5751         unallocated_encoding(s);
5752         return;
5753     }
5754
5755     if (!fp_access_check(s)) {
5756         return;
5757     }
5758
5759     esize = 8 << size;
5760     elements = (is_q ? 128 : 64) / esize;
5761
5762     tcg_res = tcg_temp_new_i64();
5763     tcg_elt = tcg_temp_new_i64();
5764
5765     /* These instructions operate across all lanes of a vector
5766      * to produce a single result. We can guarantee that a 64
5767      * bit intermediate is sufficient:
5768      *  + for [US]ADDLV the maximum element size is 32 bits, and
5769      *    the result type is 64 bits
5770      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5771      *    same as the element size, which is 32 bits at most
5772      * For the integer operations we can choose to work at 64
5773      * or 32 bits and truncate at the end; for simplicity
5774      * we use 64 bits always. The floating point
5775      * ops do require 32 bit intermediates, though.
5776      */
5777     if (!is_fp) {
5778         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5779
5780         for (i = 1; i < elements; i++) {
5781             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5782
5783             switch (opcode) {
5784             case 0x03: /* SADDLV / UADDLV */
5785             case 0x1b: /* ADDV */
5786                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5787                 break;
5788             case 0x0a: /* SMAXV / UMAXV */
5789                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5790                                     tcg_res,
5791                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5792                 break;
5793             case 0x1a: /* SMINV / UMINV */
5794                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5795                                     tcg_res,
5796                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5797                 break;
5798                 break;
5799             default:
5800                 g_assert_not_reached();
5801             }
5802
5803         }
5804     } else {
5805         /* Floating point ops which work on 32 bit (single) intermediates.
5806          * Note that correct NaN propagation requires that we do these
5807          * operations in exactly the order specified by the pseudocode.
5808          */
5809         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5810         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5811         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5812         TCGv_ptr fpst = get_fpstatus_ptr();
5813
5814         assert(esize == 32);
5815         assert(elements == 4);
5816
5817         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5818         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5819         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5820         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5821
5822         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5823
5824         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5825         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5826         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5827         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5828
5829         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5830
5831         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5832
5833         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5834         tcg_temp_free_i32(tcg_elt1);
5835         tcg_temp_free_i32(tcg_elt2);
5836         tcg_temp_free_i32(tcg_elt3);
5837         tcg_temp_free_ptr(fpst);
5838     }
5839
5840     tcg_temp_free_i64(tcg_elt);
5841
5842     /* Now truncate the result to the width required for the final output */
5843     if (opcode == 0x03) {
5844         /* SADDLV, UADDLV: result is 2*esize */
5845         size++;
5846     }
5847
5848     switch (size) {
5849     case 0:
5850         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5851         break;
5852     case 1:
5853         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5854         break;
5855     case 2:
5856         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5857         break;
5858     case 3:
5859         break;
5860     default:
5861         g_assert_not_reached();
5862     }
5863
5864     write_fp_dreg(s, rd, tcg_res);
5865     tcg_temp_free_i64(tcg_res);
5866 }
5867
5868 /* DUP (Element, Vector)
5869  *
5870  *  31  30   29              21 20    16 15        10  9    5 4    0
5871  * +---+---+-------------------+--------+-------------+------+------+
5872  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5873  * +---+---+-------------------+--------+-------------+------+------+
5874  *
5875  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5876  */
5877 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5878                              int imm5)
5879 {
5880     int size = ctz32(imm5);
5881     int esize = 8 << size;
5882     int elements = (is_q ? 128 : 64) / esize;
5883     int index, i;
5884     TCGv_i64 tmp;
5885
5886     if (size > 3 || (size == 3 && !is_q)) {
5887         unallocated_encoding(s);
5888         return;
5889     }
5890
5891     if (!fp_access_check(s)) {
5892         return;
5893     }
5894
5895     index = imm5 >> (size + 1);
5896
5897     tmp = tcg_temp_new_i64();
5898     read_vec_element(s, tmp, rn, index, size);
5899
5900     for (i = 0; i < elements; i++) {
5901         write_vec_element(s, tmp, rd, i, size);
5902     }
5903
5904     if (!is_q) {
5905         clear_vec_high(s, rd);
5906     }
5907
5908     tcg_temp_free_i64(tmp);
5909 }
5910
5911 /* DUP (element, scalar)
5912  *  31                   21 20    16 15        10  9    5 4    0
5913  * +-----------------------+--------+-------------+------+------+
5914  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5915  * +-----------------------+--------+-------------+------+------+
5916  */
5917 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5918                               int imm5)
5919 {
5920     int size = ctz32(imm5);
5921     int index;
5922     TCGv_i64 tmp;
5923
5924     if (size > 3) {
5925         unallocated_encoding(s);
5926         return;
5927     }
5928
5929     if (!fp_access_check(s)) {
5930         return;
5931     }
5932
5933     index = imm5 >> (size + 1);
5934
5935     /* This instruction just extracts the specified element and
5936      * zero-extends it into the bottom of the destination register.
5937      */
5938     tmp = tcg_temp_new_i64();
5939     read_vec_element(s, tmp, rn, index, size);
5940     write_fp_dreg(s, rd, tmp);
5941     tcg_temp_free_i64(tmp);
5942 }
5943
5944 /* DUP (General)
5945  *
5946  *  31  30   29              21 20    16 15        10  9    5 4    0
5947  * +---+---+-------------------+--------+-------------+------+------+
5948  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5949  * +---+---+-------------------+--------+-------------+------+------+
5950  *
5951  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5952  */
5953 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5954                              int imm5)
5955 {
5956     int size = ctz32(imm5);
5957     int esize = 8 << size;
5958     int elements = (is_q ? 128 : 64)/esize;
5959     int i = 0;
5960
5961     if (size > 3 || ((size == 3) && !is_q)) {
5962         unallocated_encoding(s);
5963         return;
5964     }
5965
5966     if (!fp_access_check(s)) {
5967         return;
5968     }
5969
5970     for (i = 0; i < elements; i++) {
5971         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5972     }
5973     if (!is_q) {
5974         clear_vec_high(s, rd);
5975     }
5976 }
5977
5978 /* INS (Element)
5979  *
5980  *  31                   21 20    16 15  14    11  10 9    5 4    0
5981  * +-----------------------+--------+------------+---+------+------+
5982  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5983  * +-----------------------+--------+------------+---+------+------+
5984  *
5985  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5986  * index: encoded in imm5<4:size+1>
5987  */
5988 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5989                              int imm4, int imm5)
5990 {
5991     int size = ctz32(imm5);
5992     int src_index, dst_index;
5993     TCGv_i64 tmp;
5994
5995     if (size > 3) {
5996         unallocated_encoding(s);
5997         return;
5998     }
5999
6000     if (!fp_access_check(s)) {
6001         return;
6002     }
6003
6004     dst_index = extract32(imm5, 1+size, 5);
6005     src_index = extract32(imm4, size, 4);
6006
6007     tmp = tcg_temp_new_i64();
6008
6009     read_vec_element(s, tmp, rn, src_index, size);
6010     write_vec_element(s, tmp, rd, dst_index, size);
6011
6012     tcg_temp_free_i64(tmp);
6013 }
6014
6015
6016 /* INS (General)
6017  *
6018  *  31                   21 20    16 15        10  9    5 4    0
6019  * +-----------------------+--------+-------------+------+------+
6020  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
6021  * +-----------------------+--------+-------------+------+------+
6022  *
6023  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6024  * index: encoded in imm5<4:size+1>
6025  */
6026 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
6027 {
6028     int size = ctz32(imm5);
6029     int idx;
6030
6031     if (size > 3) {
6032         unallocated_encoding(s);
6033         return;
6034     }
6035
6036     if (!fp_access_check(s)) {
6037         return;
6038     }
6039
6040     idx = extract32(imm5, 1 + size, 4 - size);
6041     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
6042 }
6043
6044 /*
6045  * UMOV (General)
6046  * SMOV (General)
6047  *
6048  *  31  30   29              21 20    16 15    12   10 9    5 4    0
6049  * +---+---+-------------------+--------+-------------+------+------+
6050  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6051  * +---+---+-------------------+--------+-------------+------+------+
6052  *
6053  * U: unsigned when set
6054  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6055  */
6056 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6057                                   int rn, int rd, int imm5)
6058 {
6059     int size = ctz32(imm5);
6060     int element;
6061     TCGv_i64 tcg_rd;
6062
6063     /* Check for UnallocatedEncodings */
6064     if (is_signed) {
6065         if (size > 2 || (size == 2 && !is_q)) {
6066             unallocated_encoding(s);
6067             return;
6068         }
6069     } else {
6070         if (size > 3
6071             || (size < 3 && is_q)
6072             || (size == 3 && !is_q)) {
6073             unallocated_encoding(s);
6074             return;
6075         }
6076     }
6077
6078     if (!fp_access_check(s)) {
6079         return;
6080     }
6081
6082     element = extract32(imm5, 1+size, 4);
6083
6084     tcg_rd = cpu_reg(s, rd);
6085     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6086     if (is_signed && !is_q) {
6087         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6088     }
6089 }
6090
6091 /* AdvSIMD copy
6092  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6093  * +---+---+----+-----------------+------+---+------+---+------+------+
6094  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6095  * +---+---+----+-----------------+------+---+------+---+------+------+
6096  */
6097 static void disas_simd_copy(DisasContext *s, uint32_t insn)
6098 {
6099     int rd = extract32(insn, 0, 5);
6100     int rn = extract32(insn, 5, 5);
6101     int imm4 = extract32(insn, 11, 4);
6102     int op = extract32(insn, 29, 1);
6103     int is_q = extract32(insn, 30, 1);
6104     int imm5 = extract32(insn, 16, 5);
6105
6106     if (op) {
6107         if (is_q) {
6108             /* INS (element) */
6109             handle_simd_inse(s, rd, rn, imm4, imm5);
6110         } else {
6111             unallocated_encoding(s);
6112         }
6113     } else {
6114         switch (imm4) {
6115         case 0:
6116             /* DUP (element - vector) */
6117             handle_simd_dupe(s, is_q, rd, rn, imm5);
6118             break;
6119         case 1:
6120             /* DUP (general) */
6121             handle_simd_dupg(s, is_q, rd, rn, imm5);
6122             break;
6123         case 3:
6124             if (is_q) {
6125                 /* INS (general) */
6126                 handle_simd_insg(s, rd, rn, imm5);
6127             } else {
6128                 unallocated_encoding(s);
6129             }
6130             break;
6131         case 5:
6132         case 7:
6133             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6134             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6135             break;
6136         default:
6137             unallocated_encoding(s);
6138             break;
6139         }
6140     }
6141 }
6142
6143 /* AdvSIMD modified immediate
6144  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6145  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6146  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6147  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6148  *
6149  * There are a number of operations that can be carried out here:
6150  *   MOVI - move (shifted) imm into register
6151  *   MVNI - move inverted (shifted) imm into register
6152  *   ORR  - bitwise OR of (shifted) imm with register
6153  *   BIC  - bitwise clear of (shifted) imm with register
6154  */
6155 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6156 {
6157     int rd = extract32(insn, 0, 5);
6158     int cmode = extract32(insn, 12, 4);
6159     int cmode_3_1 = extract32(cmode, 1, 3);
6160     int cmode_0 = extract32(cmode, 0, 1);
6161     int o2 = extract32(insn, 11, 1);
6162     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6163     bool is_neg = extract32(insn, 29, 1);
6164     bool is_q = extract32(insn, 30, 1);
6165     uint64_t imm = 0;
6166     TCGv_i64 tcg_rd, tcg_imm;
6167     int i;
6168
6169     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6170         unallocated_encoding(s);
6171         return;
6172     }
6173
6174     if (!fp_access_check(s)) {
6175         return;
6176     }
6177
6178     /* See AdvSIMDExpandImm() in ARM ARM */
6179     switch (cmode_3_1) {
6180     case 0: /* Replicate(Zeros(24):imm8, 2) */
6181     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6182     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6183     case 3: /* Replicate(imm8:Zeros(24), 2) */
6184     {
6185         int shift = cmode_3_1 * 8;
6186         imm = bitfield_replicate(abcdefgh << shift, 32);
6187         break;
6188     }
6189     case 4: /* Replicate(Zeros(8):imm8, 4) */
6190     case 5: /* Replicate(imm8:Zeros(8), 4) */
6191     {
6192         int shift = (cmode_3_1 & 0x1) * 8;
6193         imm = bitfield_replicate(abcdefgh << shift, 16);
6194         break;
6195     }
6196     case 6:
6197         if (cmode_0) {
6198             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6199             imm = (abcdefgh << 16) | 0xffff;
6200         } else {
6201             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6202             imm = (abcdefgh << 8) | 0xff;
6203         }
6204         imm = bitfield_replicate(imm, 32);
6205         break;
6206     case 7:
6207         if (!cmode_0 && !is_neg) {
6208             imm = bitfield_replicate(abcdefgh, 8);
6209         } else if (!cmode_0 && is_neg) {
6210             int i;
6211             imm = 0;
6212             for (i = 0; i < 8; i++) {
6213                 if ((abcdefgh) & (1 << i)) {
6214                     imm |= 0xffULL << (i * 8);
6215                 }
6216             }
6217         } else if (cmode_0) {
6218             if (is_neg) {
6219                 imm = (abcdefgh & 0x3f) << 48;
6220                 if (abcdefgh & 0x80) {
6221                     imm |= 0x8000000000000000ULL;
6222                 }
6223                 if (abcdefgh & 0x40) {
6224                     imm |= 0x3fc0000000000000ULL;
6225                 } else {
6226                     imm |= 0x4000000000000000ULL;
6227                 }
6228             } else {
6229                 imm = (abcdefgh & 0x3f) << 19;
6230                 if (abcdefgh & 0x80) {
6231                     imm |= 0x80000000;
6232                 }
6233                 if (abcdefgh & 0x40) {
6234                     imm |= 0x3e000000;
6235                 } else {
6236                     imm |= 0x40000000;
6237                 }
6238                 imm |= (imm << 32);
6239             }
6240         }
6241         break;
6242     }
6243
6244     if (cmode_3_1 != 7 && is_neg) {
6245         imm = ~imm;
6246     }
6247
6248     tcg_imm = tcg_const_i64(imm);
6249     tcg_rd = new_tmp_a64(s);
6250
6251     for (i = 0; i < 2; i++) {
6252         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6253
6254         if (i == 1 && !is_q) {
6255             /* non-quad ops clear high half of vector */
6256             tcg_gen_movi_i64(tcg_rd, 0);
6257         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6258             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6259             if (is_neg) {
6260                 /* AND (BIC) */
6261                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6262             } else {
6263                 /* ORR */
6264                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6265             }
6266         } else {
6267             /* MOVI */
6268             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6269         }
6270         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6271     }
6272
6273     tcg_temp_free_i64(tcg_imm);
6274 }
6275
6276 /* AdvSIMD scalar copy
6277  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6278  * +-----+----+-----------------+------+---+------+---+------+------+
6279  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6280  * +-----+----+-----------------+------+---+------+---+------+------+
6281  */
6282 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6283 {
6284     int rd = extract32(insn, 0, 5);
6285     int rn = extract32(insn, 5, 5);
6286     int imm4 = extract32(insn, 11, 4);
6287     int imm5 = extract32(insn, 16, 5);
6288     int op = extract32(insn, 29, 1);
6289
6290     if (op != 0 || imm4 != 0) {
6291         unallocated_encoding(s);
6292         return;
6293     }
6294
6295     /* DUP (element, scalar) */
6296     handle_simd_dupes(s, rd, rn, imm5);
6297 }
6298
6299 /* AdvSIMD scalar pairwise
6300  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6301  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6302  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6303  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6304  */
6305 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6306 {
6307     int u = extract32(insn, 29, 1);
6308     int size = extract32(insn, 22, 2);
6309     int opcode = extract32(insn, 12, 5);
6310     int rn = extract32(insn, 5, 5);
6311     int rd = extract32(insn, 0, 5);
6312     TCGv_ptr fpst;
6313
6314     /* For some ops (the FP ones), size[1] is part of the encoding.
6315      * For ADDP strictly it is not but size[1] is always 1 for valid
6316      * encodings.
6317      */
6318     opcode |= (extract32(size, 1, 1) << 5);
6319
6320     switch (opcode) {
6321     case 0x3b: /* ADDP */
6322         if (u || size != 3) {
6323             unallocated_encoding(s);
6324             return;
6325         }
6326         if (!fp_access_check(s)) {
6327             return;
6328         }
6329
6330         fpst = NULL;
6331         break;
6332     case 0xc: /* FMAXNMP */
6333     case 0xd: /* FADDP */
6334     case 0xf: /* FMAXP */
6335     case 0x2c: /* FMINNMP */
6336     case 0x2f: /* FMINP */
6337         /* FP op, size[0] is 32 or 64 bit */
6338         if (!u) {
6339             unallocated_encoding(s);
6340             return;
6341         }
6342         if (!fp_access_check(s)) {
6343             return;
6344         }
6345
6346         size = extract32(size, 0, 1) ? 3 : 2;
6347         fpst = get_fpstatus_ptr();
6348         break;
6349     default:
6350         unallocated_encoding(s);
6351         return;
6352     }
6353
6354     if (size == 3) {
6355         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6356         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6357         TCGv_i64 tcg_res = tcg_temp_new_i64();
6358
6359         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6360         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6361
6362         switch (opcode) {
6363         case 0x3b: /* ADDP */
6364             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6365             break;
6366         case 0xc: /* FMAXNMP */
6367             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6368             break;
6369         case 0xd: /* FADDP */
6370             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6371             break;
6372         case 0xf: /* FMAXP */
6373             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6374             break;
6375         case 0x2c: /* FMINNMP */
6376             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6377             break;
6378         case 0x2f: /* FMINP */
6379             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6380             break;
6381         default:
6382             g_assert_not_reached();
6383         }
6384
6385         write_fp_dreg(s, rd, tcg_res);
6386
6387         tcg_temp_free_i64(tcg_op1);
6388         tcg_temp_free_i64(tcg_op2);
6389         tcg_temp_free_i64(tcg_res);
6390     } else {
6391         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6392         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6393         TCGv_i32 tcg_res = tcg_temp_new_i32();
6394
6395         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6396         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6397
6398         switch (opcode) {
6399         case 0xc: /* FMAXNMP */
6400             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6401             break;
6402         case 0xd: /* FADDP */
6403             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6404             break;
6405         case 0xf: /* FMAXP */
6406             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6407             break;
6408         case 0x2c: /* FMINNMP */
6409             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6410             break;
6411         case 0x2f: /* FMINP */
6412             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6413             break;
6414         default:
6415             g_assert_not_reached();
6416         }
6417
6418         write_fp_sreg(s, rd, tcg_res);
6419
6420         tcg_temp_free_i32(tcg_op1);
6421         tcg_temp_free_i32(tcg_op2);
6422         tcg_temp_free_i32(tcg_res);
6423     }
6424
6425     if (fpst) {
6426         tcg_temp_free_ptr(fpst);
6427     }
6428 }
6429
6430 /*
6431  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6432  *
6433  * This code is handles the common shifting code and is used by both
6434  * the vector and scalar code.
6435  */
6436 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6437                                     TCGv_i64 tcg_rnd, bool accumulate,
6438                                     bool is_u, int size, int shift)
6439 {
6440     bool extended_result = false;
6441     bool round = tcg_rnd != NULL;
6442     int ext_lshift = 0;
6443     TCGv_i64 tcg_src_hi;
6444
6445     if (round && size == 3) {
6446         extended_result = true;
6447         ext_lshift = 64 - shift;
6448         tcg_src_hi = tcg_temp_new_i64();
6449     } else if (shift == 64) {
6450         if (!accumulate && is_u) {
6451             /* result is zero */
6452             tcg_gen_movi_i64(tcg_res, 0);
6453             return;
6454         }
6455     }
6456
6457     /* Deal with the rounding step */
6458     if (round) {
6459         if (extended_result) {
6460             TCGv_i64 tcg_zero = tcg_const_i64(0);
6461             if (!is_u) {
6462                 /* take care of sign extending tcg_res */
6463                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6464                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6465                                  tcg_src, tcg_src_hi,
6466                                  tcg_rnd, tcg_zero);
6467             } else {
6468                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6469                                  tcg_src, tcg_zero,
6470                                  tcg_rnd, tcg_zero);
6471             }
6472             tcg_temp_free_i64(tcg_zero);
6473         } else {
6474             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6475         }
6476     }
6477
6478     /* Now do the shift right */
6479     if (round && extended_result) {
6480         /* extended case, >64 bit precision required */
6481         if (ext_lshift == 0) {
6482             /* special case, only high bits matter */
6483             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6484         } else {
6485             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6486             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6487             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6488         }
6489     } else {
6490         if (is_u) {
6491             if (shift == 64) {
6492                 /* essentially shifting in 64 zeros */
6493                 tcg_gen_movi_i64(tcg_src, 0);
6494             } else {
6495                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6496             }
6497         } else {
6498             if (shift == 64) {
6499                 /* effectively extending the sign-bit */
6500                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6501             } else {
6502                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6503             }
6504         }
6505     }
6506
6507     if (accumulate) {
6508         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6509     } else {
6510         tcg_gen_mov_i64(tcg_res, tcg_src);
6511     }
6512
6513     if (extended_result) {
6514         tcg_temp_free_i64(tcg_src_hi);
6515     }
6516 }
6517
6518 /* Common SHL/SLI - Shift left with an optional insert */
6519 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6520                                  bool insert, int shift)
6521 {
6522     if (insert) { /* SLI */
6523         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6524     } else { /* SHL */
6525         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6526     }
6527 }
6528
6529 /* SRI: shift right with insert */
6530 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6531                                  int size, int shift)
6532 {
6533     int esize = 8 << size;
6534
6535     /* shift count same as element size is valid but does nothing;
6536      * special case to avoid potential shift by 64.
6537      */
6538     if (shift != esize) {
6539         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6540         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6541     }
6542 }
6543
6544 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6545 static void handle_scalar_simd_shri(DisasContext *s,
6546                                     bool is_u, int immh, int immb,
6547                                     int opcode, int rn, int rd)
6548 {
6549     const int size = 3;
6550     int immhb = immh << 3 | immb;
6551     int shift = 2 * (8 << size) - immhb;
6552     bool accumulate = false;
6553     bool round = false;
6554     bool insert = false;
6555     TCGv_i64 tcg_rn;
6556     TCGv_i64 tcg_rd;
6557     TCGv_i64 tcg_round;
6558
6559     if (!extract32(immh, 3, 1)) {
6560         unallocated_encoding(s);
6561         return;
6562     }
6563
6564     if (!fp_access_check(s)) {
6565         return;
6566     }
6567
6568     switch (opcode) {
6569     case 0x02: /* SSRA / USRA (accumulate) */
6570         accumulate = true;
6571         break;
6572     case 0x04: /* SRSHR / URSHR (rounding) */
6573         round = true;
6574         break;
6575     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6576         accumulate = round = true;
6577         break;
6578     case 0x08: /* SRI */
6579         insert = true;
6580         break;
6581     }
6582
6583     if (round) {
6584         uint64_t round_const = 1ULL << (shift - 1);
6585         tcg_round = tcg_const_i64(round_const);
6586     } else {
6587         tcg_round = NULL;
6588     }
6589
6590     tcg_rn = read_fp_dreg(s, rn);
6591     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6592
6593     if (insert) {
6594         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6595     } else {
6596         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6597                                 accumulate, is_u, size, shift);
6598     }
6599
6600     write_fp_dreg(s, rd, tcg_rd);
6601
6602     tcg_temp_free_i64(tcg_rn);
6603     tcg_temp_free_i64(tcg_rd);
6604     if (round) {
6605         tcg_temp_free_i64(tcg_round);
6606     }
6607 }
6608
6609 /* SHL/SLI - Scalar shift left */
6610 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6611                                     int immh, int immb, int opcode,
6612                                     int rn, int rd)
6613 {
6614     int size = 32 - clz32(immh) - 1;
6615     int immhb = immh << 3 | immb;
6616     int shift = immhb - (8 << size);
6617     TCGv_i64 tcg_rn = new_tmp_a64(s);
6618     TCGv_i64 tcg_rd = new_tmp_a64(s);
6619
6620     if (!extract32(immh, 3, 1)) {
6621         unallocated_encoding(s);
6622         return;
6623     }
6624
6625     if (!fp_access_check(s)) {
6626         return;
6627     }
6628
6629     tcg_rn = read_fp_dreg(s, rn);
6630     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6631
6632     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6633
6634     write_fp_dreg(s, rd, tcg_rd);
6635
6636     tcg_temp_free_i64(tcg_rn);
6637     tcg_temp_free_i64(tcg_rd);
6638 }
6639
6640 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6641  * (signed/unsigned) narrowing */
6642 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6643                                    bool is_u_shift, bool is_u_narrow,
6644                                    int immh, int immb, int opcode,
6645                                    int rn, int rd)
6646 {
6647     int immhb = immh << 3 | immb;
6648     int size = 32 - clz32(immh) - 1;
6649     int esize = 8 << size;
6650     int shift = (2 * esize) - immhb;
6651     int elements = is_scalar ? 1 : (64 / esize);
6652     bool round = extract32(opcode, 0, 1);
6653     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6654     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6655     TCGv_i32 tcg_rd_narrowed;
6656     TCGv_i64 tcg_final;
6657
6658     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6659         { gen_helper_neon_narrow_sat_s8,
6660           gen_helper_neon_unarrow_sat8 },
6661         { gen_helper_neon_narrow_sat_s16,
6662           gen_helper_neon_unarrow_sat16 },
6663         { gen_helper_neon_narrow_sat_s32,
6664           gen_helper_neon_unarrow_sat32 },
6665         { NULL, NULL },
6666     };
6667     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6668         gen_helper_neon_narrow_sat_u8,
6669         gen_helper_neon_narrow_sat_u16,
6670         gen_helper_neon_narrow_sat_u32,
6671         NULL
6672     };
6673     NeonGenNarrowEnvFn *narrowfn;
6674
6675     int i;
6676
6677     assert(size < 4);
6678
6679     if (extract32(immh, 3, 1)) {
6680         unallocated_encoding(s);
6681         return;
6682     }
6683
6684     if (!fp_access_check(s)) {
6685         return;
6686     }
6687
6688     if (is_u_shift) {
6689         narrowfn = unsigned_narrow_fns[size];
6690     } else {
6691         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6692     }
6693
6694     tcg_rn = tcg_temp_new_i64();
6695     tcg_rd = tcg_temp_new_i64();
6696     tcg_rd_narrowed = tcg_temp_new_i32();
6697     tcg_final = tcg_const_i64(0);
6698
6699     if (round) {
6700         uint64_t round_const = 1ULL << (shift - 1);
6701         tcg_round = tcg_const_i64(round_const);
6702     } else {
6703         tcg_round = NULL;
6704     }
6705
6706     for (i = 0; i < elements; i++) {
6707         read_vec_element(s, tcg_rn, rn, i, ldop);
6708         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6709                                 false, is_u_shift, size+1, shift);
6710         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6711         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6712         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6713     }
6714
6715     if (!is_q) {
6716         clear_vec_high(s, rd);
6717         write_vec_element(s, tcg_final, rd, 0, MO_64);
6718     } else {
6719         write_vec_element(s, tcg_final, rd, 1, MO_64);
6720     }
6721
6722     if (round) {
6723         tcg_temp_free_i64(tcg_round);
6724     }
6725     tcg_temp_free_i64(tcg_rn);
6726     tcg_temp_free_i64(tcg_rd);
6727     tcg_temp_free_i32(tcg_rd_narrowed);
6728     tcg_temp_free_i64(tcg_final);
6729     return;
6730 }
6731
6732 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6733 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6734                              bool src_unsigned, bool dst_unsigned,
6735                              int immh, int immb, int rn, int rd)
6736 {
6737     int immhb = immh << 3 | immb;
6738     int size = 32 - clz32(immh) - 1;
6739     int shift = immhb - (8 << size);
6740     int pass;
6741
6742     assert(immh != 0);
6743     assert(!(scalar && is_q));
6744
6745     if (!scalar) {
6746         if (!is_q && extract32(immh, 3, 1)) {
6747             unallocated_encoding(s);
6748             return;
6749         }
6750
6751         /* Since we use the variable-shift helpers we must
6752          * replicate the shift count into each element of
6753          * the tcg_shift value.
6754          */
6755         switch (size) {
6756         case 0:
6757             shift |= shift << 8;
6758             /* fall through */
6759         case 1:
6760             shift |= shift << 16;
6761             break;
6762         case 2:
6763         case 3:
6764             break;
6765         default:
6766             g_assert_not_reached();
6767         }
6768     }
6769
6770     if (!fp_access_check(s)) {
6771         return;
6772     }
6773
6774     if (size == 3) {
6775         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6776         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6777             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6778             { NULL, gen_helper_neon_qshl_u64 },
6779         };
6780         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6781         int maxpass = is_q ? 2 : 1;
6782
6783         for (pass = 0; pass < maxpass; pass++) {
6784             TCGv_i64 tcg_op = tcg_temp_new_i64();
6785
6786             read_vec_element(s, tcg_op, rn, pass, MO_64);
6787             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6788             write_vec_element(s, tcg_op, rd, pass, MO_64);
6789
6790             tcg_temp_free_i64(tcg_op);
6791         }
6792         tcg_temp_free_i64(tcg_shift);
6793
6794         if (!is_q) {
6795             clear_vec_high(s, rd);
6796         }
6797     } else {
6798         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6799         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6800             {
6801                 { gen_helper_neon_qshl_s8,
6802                   gen_helper_neon_qshl_s16,
6803                   gen_helper_neon_qshl_s32 },
6804                 { gen_helper_neon_qshlu_s8,
6805                   gen_helper_neon_qshlu_s16,
6806                   gen_helper_neon_qshlu_s32 }
6807             }, {
6808                 { NULL, NULL, NULL },
6809                 { gen_helper_neon_qshl_u8,
6810                   gen_helper_neon_qshl_u16,
6811                   gen_helper_neon_qshl_u32 }
6812             }
6813         };
6814         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6815         TCGMemOp memop = scalar ? size : MO_32;
6816         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6817
6818         for (pass = 0; pass < maxpass; pass++) {
6819             TCGv_i32 tcg_op = tcg_temp_new_i32();
6820
6821             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6822             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6823             if (scalar) {
6824                 switch (size) {
6825                 case 0:
6826                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6827                     break;
6828                 case 1:
6829                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6830                     break;
6831                 case 2:
6832                     break;
6833                 default:
6834                     g_assert_not_reached();
6835                 }
6836                 write_fp_sreg(s, rd, tcg_op);
6837             } else {
6838                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6839             }
6840
6841             tcg_temp_free_i32(tcg_op);
6842         }
6843         tcg_temp_free_i32(tcg_shift);
6844
6845         if (!is_q && !scalar) {
6846             clear_vec_high(s, rd);
6847         }
6848     }
6849 }
6850
6851 /* Common vector code for handling integer to FP conversion */
6852 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6853                                    int elements, int is_signed,
6854                                    int fracbits, int size)
6855 {
6856     bool is_double = size == 3 ? true : false;
6857     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6858     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6859     TCGv_i64 tcg_int = tcg_temp_new_i64();
6860     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6861     int pass;
6862
6863     for (pass = 0; pass < elements; pass++) {
6864         read_vec_element(s, tcg_int, rn, pass, mop);
6865
6866         if (is_double) {
6867             TCGv_i64 tcg_double = tcg_temp_new_i64();
6868             if (is_signed) {
6869                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6870                                      tcg_shift, tcg_fpst);
6871             } else {
6872                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6873                                      tcg_shift, tcg_fpst);
6874             }
6875             if (elements == 1) {
6876                 write_fp_dreg(s, rd, tcg_double);
6877             } else {
6878                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6879             }
6880             tcg_temp_free_i64(tcg_double);
6881         } else {
6882             TCGv_i32 tcg_single = tcg_temp_new_i32();
6883             if (is_signed) {
6884                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6885                                      tcg_shift, tcg_fpst);
6886             } else {
6887                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6888                                      tcg_shift, tcg_fpst);
6889             }
6890             if (elements == 1) {
6891                 write_fp_sreg(s, rd, tcg_single);
6892             } else {
6893                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6894             }
6895             tcg_temp_free_i32(tcg_single);
6896         }
6897     }
6898
6899     if (!is_double && elements == 2) {
6900         clear_vec_high(s, rd);
6901     }
6902
6903     tcg_temp_free_i64(tcg_int);
6904     tcg_temp_free_ptr(tcg_fpst);
6905     tcg_temp_free_i32(tcg_shift);
6906 }
6907
6908 /* UCVTF/SCVTF - Integer to FP conversion */
6909 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6910                                          bool is_q, bool is_u,
6911                                          int immh, int immb, int opcode,
6912                                          int rn, int rd)
6913 {
6914     bool is_double = extract32(immh, 3, 1);
6915     int size = is_double ? MO_64 : MO_32;
6916     int elements;
6917     int immhb = immh << 3 | immb;
6918     int fracbits = (is_double ? 128 : 64) - immhb;
6919
6920     if (!extract32(immh, 2, 2)) {
6921         unallocated_encoding(s);
6922         return;
6923     }
6924
6925     if (is_scalar) {
6926         elements = 1;
6927     } else {
6928         elements = is_double ? 2 : is_q ? 4 : 2;
6929         if (is_double && !is_q) {
6930             unallocated_encoding(s);
6931             return;
6932         }
6933     }
6934
6935     if (!fp_access_check(s)) {
6936         return;
6937     }
6938
6939     /* immh == 0 would be a failure of the decode logic */
6940     g_assert(immh);
6941
6942     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6943 }
6944
6945 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6946 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6947                                          bool is_q, bool is_u,
6948                                          int immh, int immb, int rn, int rd)
6949 {
6950     bool is_double = extract32(immh, 3, 1);
6951     int immhb = immh << 3 | immb;
6952     int fracbits = (is_double ? 128 : 64) - immhb;
6953     int pass;
6954     TCGv_ptr tcg_fpstatus;
6955     TCGv_i32 tcg_rmode, tcg_shift;
6956
6957     if (!extract32(immh, 2, 2)) {
6958         unallocated_encoding(s);
6959         return;
6960     }
6961
6962     if (!is_scalar && !is_q && is_double) {
6963         unallocated_encoding(s);
6964         return;
6965     }
6966
6967     if (!fp_access_check(s)) {
6968         return;
6969     }
6970
6971     assert(!(is_scalar && is_q));
6972
6973     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6974     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6975     tcg_fpstatus = get_fpstatus_ptr();
6976     tcg_shift = tcg_const_i32(fracbits);
6977
6978     if (is_double) {
6979         int maxpass = is_scalar ? 1 : 2;
6980
6981         for (pass = 0; pass < maxpass; pass++) {
6982             TCGv_i64 tcg_op = tcg_temp_new_i64();
6983
6984             read_vec_element(s, tcg_op, rn, pass, MO_64);
6985             if (is_u) {
6986                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6987             } else {
6988                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6989             }
6990             write_vec_element(s, tcg_op, rd, pass, MO_64);
6991             tcg_temp_free_i64(tcg_op);
6992         }
6993         if (!is_q) {
6994             clear_vec_high(s, rd);
6995         }
6996     } else {
6997         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6998         for (pass = 0; pass < maxpass; pass++) {
6999             TCGv_i32 tcg_op = tcg_temp_new_i32();
7000
7001             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7002             if (is_u) {
7003                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7004             } else {
7005                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7006             }
7007             if (is_scalar) {
7008                 write_fp_sreg(s, rd, tcg_op);
7009             } else {
7010                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
7011             }
7012             tcg_temp_free_i32(tcg_op);
7013         }
7014         if (!is_q && !is_scalar) {
7015             clear_vec_high(s, rd);
7016         }
7017     }
7018
7019     tcg_temp_free_ptr(tcg_fpstatus);
7020     tcg_temp_free_i32(tcg_shift);
7021     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
7022     tcg_temp_free_i32(tcg_rmode);
7023 }
7024
7025 /* AdvSIMD scalar shift by immediate
7026  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
7027  * +-----+---+-------------+------+------+--------+---+------+------+
7028  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
7029  * +-----+---+-------------+------+------+--------+---+------+------+
7030  *
7031  * This is the scalar version so it works on a fixed sized registers
7032  */
7033 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
7034 {
7035     int rd = extract32(insn, 0, 5);
7036     int rn = extract32(insn, 5, 5);
7037     int opcode = extract32(insn, 11, 5);
7038     int immb = extract32(insn, 16, 3);
7039     int immh = extract32(insn, 19, 4);
7040     bool is_u = extract32(insn, 29, 1);
7041
7042     if (immh == 0) {
7043         unallocated_encoding(s);
7044         return;
7045     }
7046
7047     switch (opcode) {
7048     case 0x08: /* SRI */
7049         if (!is_u) {
7050             unallocated_encoding(s);
7051             return;
7052         }
7053         /* fall through */
7054     case 0x00: /* SSHR / USHR */
7055     case 0x02: /* SSRA / USRA */
7056     case 0x04: /* SRSHR / URSHR */
7057     case 0x06: /* SRSRA / URSRA */
7058         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7059         break;
7060     case 0x0a: /* SHL / SLI */
7061         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7062         break;
7063     case 0x1c: /* SCVTF, UCVTF */
7064         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7065                                      opcode, rn, rd);
7066         break;
7067     case 0x10: /* SQSHRUN, SQSHRUN2 */
7068     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7069         if (!is_u) {
7070             unallocated_encoding(s);
7071             return;
7072         }
7073         handle_vec_simd_sqshrn(s, true, false, false, true,
7074                                immh, immb, opcode, rn, rd);
7075         break;
7076     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7077     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7078         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7079                                immh, immb, opcode, rn, rd);
7080         break;
7081     case 0xc: /* SQSHLU */
7082         if (!is_u) {
7083             unallocated_encoding(s);
7084             return;
7085         }
7086         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7087         break;
7088     case 0xe: /* SQSHL, UQSHL */
7089         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7090         break;
7091     case 0x1f: /* FCVTZS, FCVTZU */
7092         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7093         break;
7094     default:
7095         unallocated_encoding(s);
7096         break;
7097     }
7098 }
7099
7100 /* AdvSIMD scalar three different
7101  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7102  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7103  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7104  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7105  */
7106 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7107 {
7108     bool is_u = extract32(insn, 29, 1);
7109     int size = extract32(insn, 22, 2);
7110     int opcode = extract32(insn, 12, 4);
7111     int rm = extract32(insn, 16, 5);
7112     int rn = extract32(insn, 5, 5);
7113     int rd = extract32(insn, 0, 5);
7114
7115     if (is_u) {
7116         unallocated_encoding(s);
7117         return;
7118     }
7119
7120     switch (opcode) {
7121     case 0x9: /* SQDMLAL, SQDMLAL2 */
7122     case 0xb: /* SQDMLSL, SQDMLSL2 */
7123     case 0xd: /* SQDMULL, SQDMULL2 */
7124         if (size == 0 || size == 3) {
7125             unallocated_encoding(s);
7126             return;
7127         }
7128         break;
7129     default:
7130         unallocated_encoding(s);
7131         return;
7132     }
7133
7134     if (!fp_access_check(s)) {
7135         return;
7136     }
7137
7138     if (size == 2) {
7139         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7140         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7141         TCGv_i64 tcg_res = tcg_temp_new_i64();
7142
7143         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7144         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7145
7146         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7147         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7148
7149         switch (opcode) {
7150         case 0xd: /* SQDMULL, SQDMULL2 */
7151             break;
7152         case 0xb: /* SQDMLSL, SQDMLSL2 */
7153             tcg_gen_neg_i64(tcg_res, tcg_res);
7154             /* fall through */
7155         case 0x9: /* SQDMLAL, SQDMLAL2 */
7156             read_vec_element(s, tcg_op1, rd, 0, MO_64);
7157             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7158                                               tcg_res, tcg_op1);
7159             break;
7160         default:
7161             g_assert_not_reached();
7162         }
7163
7164         write_fp_dreg(s, rd, tcg_res);
7165
7166         tcg_temp_free_i64(tcg_op1);
7167         tcg_temp_free_i64(tcg_op2);
7168         tcg_temp_free_i64(tcg_res);
7169     } else {
7170         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7171         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7172         TCGv_i64 tcg_res = tcg_temp_new_i64();
7173
7174         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7175         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7176
7177         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7178         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7179
7180         switch (opcode) {
7181         case 0xd: /* SQDMULL, SQDMULL2 */
7182             break;
7183         case 0xb: /* SQDMLSL, SQDMLSL2 */
7184             gen_helper_neon_negl_u32(tcg_res, tcg_res);
7185             /* fall through */
7186         case 0x9: /* SQDMLAL, SQDMLAL2 */
7187         {
7188             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7189             read_vec_element(s, tcg_op3, rd, 0, MO_32);
7190             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7191                                               tcg_res, tcg_op3);
7192             tcg_temp_free_i64(tcg_op3);
7193             break;
7194         }
7195         default:
7196             g_assert_not_reached();
7197         }
7198
7199         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7200         write_fp_dreg(s, rd, tcg_res);
7201
7202         tcg_temp_free_i32(tcg_op1);
7203         tcg_temp_free_i32(tcg_op2);
7204         tcg_temp_free_i64(tcg_res);
7205     }
7206 }
7207
7208 static void handle_3same_64(DisasContext *s, int opcode, bool u,
7209                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7210 {
7211     /* Handle 64x64->64 opcodes which are shared between the scalar
7212      * and vector 3-same groups. We cover every opcode where size == 3
7213      * is valid in either the three-reg-same (integer, not pairwise)
7214      * or scalar-three-reg-same groups. (Some opcodes are not yet
7215      * implemented.)
7216      */
7217     TCGCond cond;
7218
7219     switch (opcode) {
7220     case 0x1: /* SQADD */
7221         if (u) {
7222             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7223         } else {
7224             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7225         }
7226         break;
7227     case 0x5: /* SQSUB */
7228         if (u) {
7229             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7230         } else {
7231             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7232         }
7233         break;
7234     case 0x6: /* CMGT, CMHI */
7235         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7236          * We implement this using setcond (test) and then negating.
7237          */
7238         cond = u ? TCG_COND_GTU : TCG_COND_GT;
7239     do_cmop:
7240         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7241         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7242         break;
7243     case 0x7: /* CMGE, CMHS */
7244         cond = u ? TCG_COND_GEU : TCG_COND_GE;
7245         goto do_cmop;
7246     case 0x11: /* CMTST, CMEQ */
7247         if (u) {
7248             cond = TCG_COND_EQ;
7249             goto do_cmop;
7250         }
7251         /* CMTST : test is "if (X & Y != 0)". */
7252         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7253         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7254         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7255         break;
7256     case 0x8: /* SSHL, USHL */
7257         if (u) {
7258             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7259         } else {
7260             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7261         }
7262         break;
7263     case 0x9: /* SQSHL, UQSHL */
7264         if (u) {
7265             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7266         } else {
7267             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7268         }
7269         break;
7270     case 0xa: /* SRSHL, URSHL */
7271         if (u) {
7272             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7273         } else {
7274             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7275         }
7276         break;
7277     case 0xb: /* SQRSHL, UQRSHL */
7278         if (u) {
7279             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7280         } else {
7281             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7282         }
7283         break;
7284     case 0x10: /* ADD, SUB */
7285         if (u) {
7286             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7287         } else {
7288             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7289         }
7290         break;
7291     default:
7292         g_assert_not_reached();
7293     }
7294 }
7295
7296 /* Handle the 3-same-operands float operations; shared by the scalar
7297  * and vector encodings. The caller must filter out any encodings
7298  * not allocated for the encoding it is dealing with.
7299  */
7300 static void handle_3same_float(DisasContext *s, int size, int elements,
7301                                int fpopcode, int rd, int rn, int rm)
7302 {
7303     int pass;
7304     TCGv_ptr fpst = get_fpstatus_ptr();
7305
7306     for (pass = 0; pass < elements; pass++) {
7307         if (size) {
7308             /* Double */
7309             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7310             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7311             TCGv_i64 tcg_res = tcg_temp_new_i64();
7312
7313             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7314             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7315
7316             switch (fpopcode) {
7317             case 0x39: /* FMLS */
7318                 /* As usual for ARM, separate negation for fused multiply-add */
7319                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7320                 /* fall through */
7321             case 0x19: /* FMLA */
7322                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7323                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7324                                        tcg_res, fpst);
7325                 break;
7326             case 0x18: /* FMAXNM */
7327                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7328                 break;
7329             case 0x1a: /* FADD */
7330                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7331                 break;
7332             case 0x1b: /* FMULX */
7333                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7334                 break;
7335             case 0x1c: /* FCMEQ */
7336                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7337                 break;
7338             case 0x1e: /* FMAX */
7339                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7340                 break;
7341             case 0x1f: /* FRECPS */
7342                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7343                 break;
7344             case 0x38: /* FMINNM */
7345                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7346                 break;
7347             case 0x3a: /* FSUB */
7348                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7349                 break;
7350             case 0x3e: /* FMIN */
7351                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7352                 break;
7353             case 0x3f: /* FRSQRTS */
7354                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7355                 break;
7356             case 0x5b: /* FMUL */
7357                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7358                 break;
7359             case 0x5c: /* FCMGE */
7360                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7361                 break;
7362             case 0x5d: /* FACGE */
7363                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7364                 break;
7365             case 0x5f: /* FDIV */
7366                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7367                 break;
7368             case 0x7a: /* FABD */
7369                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7370                 gen_helper_vfp_absd(tcg_res, tcg_res);
7371                 break;
7372             case 0x7c: /* FCMGT */
7373                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7374                 break;
7375             case 0x7d: /* FACGT */
7376                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7377                 break;
7378             default:
7379                 g_assert_not_reached();
7380             }
7381
7382             write_vec_element(s, tcg_res, rd, pass, MO_64);
7383
7384             tcg_temp_free_i64(tcg_res);
7385             tcg_temp_free_i64(tcg_op1);
7386             tcg_temp_free_i64(tcg_op2);
7387         } else {
7388             /* Single */
7389             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7390             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7391             TCGv_i32 tcg_res = tcg_temp_new_i32();
7392
7393             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7394             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7395
7396             switch (fpopcode) {
7397             case 0x39: /* FMLS */
7398                 /* As usual for ARM, separate negation for fused multiply-add */
7399                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7400                 /* fall through */
7401             case 0x19: /* FMLA */
7402                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7403                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7404                                        tcg_res, fpst);
7405                 break;
7406             case 0x1a: /* FADD */
7407                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7408                 break;
7409             case 0x1b: /* FMULX */
7410                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7411                 break;
7412             case 0x1c: /* FCMEQ */
7413                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7414                 break;
7415             case 0x1e: /* FMAX */
7416                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7417                 break;
7418             case 0x1f: /* FRECPS */
7419                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7420                 break;
7421             case 0x18: /* FMAXNM */
7422                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7423                 break;
7424             case 0x38: /* FMINNM */
7425                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7426                 break;
7427             case 0x3a: /* FSUB */
7428                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7429                 break;
7430             case 0x3e: /* FMIN */
7431                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7432                 break;
7433             case 0x3f: /* FRSQRTS */
7434                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7435                 break;
7436             case 0x5b: /* FMUL */
7437                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7438                 break;
7439             case 0x5c: /* FCMGE */
7440                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7441                 break;
7442             case 0x5d: /* FACGE */
7443                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7444                 break;
7445             case 0x5f: /* FDIV */
7446                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7447                 break;
7448             case 0x7a: /* FABD */
7449                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7450                 gen_helper_vfp_abss(tcg_res, tcg_res);
7451                 break;
7452             case 0x7c: /* FCMGT */
7453                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7454                 break;
7455             case 0x7d: /* FACGT */
7456                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7457                 break;
7458             default:
7459                 g_assert_not_reached();
7460             }
7461
7462             if (elements == 1) {
7463                 /* scalar single so clear high part */
7464                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7465
7466                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7467                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7468                 tcg_temp_free_i64(tcg_tmp);
7469             } else {
7470                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7471             }
7472
7473             tcg_temp_free_i32(tcg_res);
7474             tcg_temp_free_i32(tcg_op1);
7475             tcg_temp_free_i32(tcg_op2);
7476         }
7477     }
7478
7479     tcg_temp_free_ptr(fpst);
7480
7481     if ((elements << size) < 4) {
7482         /* scalar, or non-quad vector op */
7483         clear_vec_high(s, rd);
7484     }
7485 }
7486
7487 /* AdvSIMD scalar three same
7488  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7489  * +-----+---+-----------+------+---+------+--------+---+------+------+
7490  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7491  * +-----+---+-----------+------+---+------+--------+---+------+------+
7492  */
7493 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7494 {
7495     int rd = extract32(insn, 0, 5);
7496     int rn = extract32(insn, 5, 5);
7497     int opcode = extract32(insn, 11, 5);
7498     int rm = extract32(insn, 16, 5);
7499     int size = extract32(insn, 22, 2);
7500     bool u = extract32(insn, 29, 1);
7501     TCGv_i64 tcg_rd;
7502
7503     if (opcode >= 0x18) {
7504         /* Floating point: U, size[1] and opcode indicate operation */
7505         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7506         switch (fpopcode) {
7507         case 0x1b: /* FMULX */
7508         case 0x1f: /* FRECPS */
7509         case 0x3f: /* FRSQRTS */
7510         case 0x5d: /* FACGE */
7511         case 0x7d: /* FACGT */
7512         case 0x1c: /* FCMEQ */
7513         case 0x5c: /* FCMGE */
7514         case 0x7c: /* FCMGT */
7515         case 0x7a: /* FABD */
7516             break;
7517         default:
7518             unallocated_encoding(s);
7519             return;
7520         }
7521
7522         if (!fp_access_check(s)) {
7523             return;
7524         }
7525
7526         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7527         return;
7528     }
7529
7530     switch (opcode) {
7531     case 0x1: /* SQADD, UQADD */
7532     case 0x5: /* SQSUB, UQSUB */
7533     case 0x9: /* SQSHL, UQSHL */
7534     case 0xb: /* SQRSHL, UQRSHL */
7535         break;
7536     case 0x8: /* SSHL, USHL */
7537     case 0xa: /* SRSHL, URSHL */
7538     case 0x6: /* CMGT, CMHI */
7539     case 0x7: /* CMGE, CMHS */
7540     case 0x11: /* CMTST, CMEQ */
7541     case 0x10: /* ADD, SUB (vector) */
7542         if (size != 3) {
7543             unallocated_encoding(s);
7544             return;
7545         }
7546         break;
7547     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7548         if (size != 1 && size != 2) {
7549             unallocated_encoding(s);
7550             return;
7551         }
7552         break;
7553     default:
7554         unallocated_encoding(s);
7555         return;
7556     }
7557
7558     if (!fp_access_check(s)) {
7559         return;
7560     }
7561
7562     tcg_rd = tcg_temp_new_i64();
7563
7564     if (size == 3) {
7565         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7566         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7567
7568         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7569         tcg_temp_free_i64(tcg_rn);
7570         tcg_temp_free_i64(tcg_rm);
7571     } else {
7572         /* Do a single operation on the lowest element in the vector.
7573          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7574          * no side effects for all these operations.
7575          * OPTME: special-purpose helpers would avoid doing some
7576          * unnecessary work in the helper for the 8 and 16 bit cases.
7577          */
7578         NeonGenTwoOpEnvFn *genenvfn;
7579         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7580         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7581         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7582
7583         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7584         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7585
7586         switch (opcode) {
7587         case 0x1: /* SQADD, UQADD */
7588         {
7589             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7590                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7591                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7592                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7593             };
7594             genenvfn = fns[size][u];
7595             break;
7596         }
7597         case 0x5: /* SQSUB, UQSUB */
7598         {
7599             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7600                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7601                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7602                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7603             };
7604             genenvfn = fns[size][u];
7605             break;
7606         }
7607         case 0x9: /* SQSHL, UQSHL */
7608         {
7609             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7610                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7611                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7612                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7613             };
7614             genenvfn = fns[size][u];
7615             break;
7616         }
7617         case 0xb: /* SQRSHL, UQRSHL */
7618         {
7619             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7620                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7621                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7622                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7623             };
7624             genenvfn = fns[size][u];
7625             break;
7626         }
7627         case 0x16: /* SQDMULH, SQRDMULH */
7628         {
7629             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7630                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7631                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7632             };
7633             assert(size == 1 || size == 2);
7634             genenvfn = fns[size - 1][u];
7635             break;
7636         }
7637         default:
7638             g_assert_not_reached();
7639         }
7640
7641         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7642         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7643         tcg_temp_free_i32(tcg_rd32);
7644         tcg_temp_free_i32(tcg_rn);
7645         tcg_temp_free_i32(tcg_rm);
7646     }
7647
7648     write_fp_dreg(s, rd, tcg_rd);
7649
7650     tcg_temp_free_i64(tcg_rd);
7651 }
7652
7653 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7654                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7655                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7656 {
7657     /* Handle 64->64 opcodes which are shared between the scalar and
7658      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7659      * is valid in either group and also the double-precision fp ops.
7660      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7661      * requires them.
7662      */
7663     TCGCond cond;
7664
7665     switch (opcode) {
7666     case 0x4: /* CLS, CLZ */
7667         if (u) {
7668             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7669         } else {
7670             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
7671         }
7672         break;
7673     case 0x5: /* NOT */
7674         /* This opcode is shared with CNT and RBIT but we have earlier
7675          * enforced that size == 3 if and only if this is the NOT insn.
7676          */
7677         tcg_gen_not_i64(tcg_rd, tcg_rn);
7678         break;
7679     case 0x7: /* SQABS, SQNEG */
7680         if (u) {
7681             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7682         } else {
7683             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7684         }
7685         break;
7686     case 0xa: /* CMLT */
7687         /* 64 bit integer comparison against zero, result is
7688          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7689          * subtracting 1.
7690          */
7691         cond = TCG_COND_LT;
7692     do_cmop:
7693         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7694         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7695         break;
7696     case 0x8: /* CMGT, CMGE */
7697         cond = u ? TCG_COND_GE : TCG_COND_GT;
7698         goto do_cmop;
7699     case 0x9: /* CMEQ, CMLE */
7700         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7701         goto do_cmop;
7702     case 0xb: /* ABS, NEG */
7703         if (u) {
7704             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7705         } else {
7706             TCGv_i64 tcg_zero = tcg_const_i64(0);
7707             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7708             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7709                                 tcg_rn, tcg_rd);
7710             tcg_temp_free_i64(tcg_zero);
7711         }
7712         break;
7713     case 0x2f: /* FABS */
7714         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7715         break;
7716     case 0x6f: /* FNEG */
7717         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7718         break;
7719     case 0x7f: /* FSQRT */
7720         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7721         break;
7722     case 0x1a: /* FCVTNS */
7723     case 0x1b: /* FCVTMS */
7724     case 0x1c: /* FCVTAS */
7725     case 0x3a: /* FCVTPS */
7726     case 0x3b: /* FCVTZS */
7727     {
7728         TCGv_i32 tcg_shift = tcg_const_i32(0);
7729         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7730         tcg_temp_free_i32(tcg_shift);
7731         break;
7732     }
7733     case 0x5a: /* FCVTNU */
7734     case 0x5b: /* FCVTMU */
7735     case 0x5c: /* FCVTAU */
7736     case 0x7a: /* FCVTPU */
7737     case 0x7b: /* FCVTZU */
7738     {
7739         TCGv_i32 tcg_shift = tcg_const_i32(0);
7740         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7741         tcg_temp_free_i32(tcg_shift);
7742         break;
7743     }
7744     case 0x18: /* FRINTN */
7745     case 0x19: /* FRINTM */
7746     case 0x38: /* FRINTP */
7747     case 0x39: /* FRINTZ */
7748     case 0x58: /* FRINTA */
7749     case 0x79: /* FRINTI */
7750         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7751         break;
7752     case 0x59: /* FRINTX */
7753         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7754         break;
7755     default:
7756         g_assert_not_reached();
7757     }
7758 }
7759
7760 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7761                                    bool is_scalar, bool is_u, bool is_q,
7762                                    int size, int rn, int rd)
7763 {
7764     bool is_double = (size == 3);
7765     TCGv_ptr fpst;
7766
7767     if (!fp_access_check(s)) {
7768         return;
7769     }
7770
7771     fpst = get_fpstatus_ptr();
7772
7773     if (is_double) {
7774         TCGv_i64 tcg_op = tcg_temp_new_i64();
7775         TCGv_i64 tcg_zero = tcg_const_i64(0);
7776         TCGv_i64 tcg_res = tcg_temp_new_i64();
7777         NeonGenTwoDoubleOPFn *genfn;
7778         bool swap = false;
7779         int pass;
7780
7781         switch (opcode) {
7782         case 0x2e: /* FCMLT (zero) */
7783             swap = true;
7784             /* fallthrough */
7785         case 0x2c: /* FCMGT (zero) */
7786             genfn = gen_helper_neon_cgt_f64;
7787             break;
7788         case 0x2d: /* FCMEQ (zero) */
7789             genfn = gen_helper_neon_ceq_f64;
7790             break;
7791         case 0x6d: /* FCMLE (zero) */
7792             swap = true;
7793             /* fall through */
7794         case 0x6c: /* FCMGE (zero) */
7795             genfn = gen_helper_neon_cge_f64;
7796             break;
7797         default:
7798             g_assert_not_reached();
7799         }
7800
7801         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7802             read_vec_element(s, tcg_op, rn, pass, MO_64);
7803             if (swap) {
7804                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7805             } else {
7806                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7807             }
7808             write_vec_element(s, tcg_res, rd, pass, MO_64);
7809         }
7810         if (is_scalar) {
7811             clear_vec_high(s, rd);
7812         }
7813
7814         tcg_temp_free_i64(tcg_res);
7815         tcg_temp_free_i64(tcg_zero);
7816         tcg_temp_free_i64(tcg_op);
7817     } else {
7818         TCGv_i32 tcg_op = tcg_temp_new_i32();
7819         TCGv_i32 tcg_zero = tcg_const_i32(0);
7820         TCGv_i32 tcg_res = tcg_temp_new_i32();
7821         NeonGenTwoSingleOPFn *genfn;
7822         bool swap = false;
7823         int pass, maxpasses;
7824
7825         switch (opcode) {
7826         case 0x2e: /* FCMLT (zero) */
7827             swap = true;
7828             /* fall through */
7829         case 0x2c: /* FCMGT (zero) */
7830             genfn = gen_helper_neon_cgt_f32;
7831             break;
7832         case 0x2d: /* FCMEQ (zero) */
7833             genfn = gen_helper_neon_ceq_f32;
7834             break;
7835         case 0x6d: /* FCMLE (zero) */
7836             swap = true;
7837             /* fall through */
7838         case 0x6c: /* FCMGE (zero) */
7839             genfn = gen_helper_neon_cge_f32;
7840             break;
7841         default:
7842             g_assert_not_reached();
7843         }
7844
7845         if (is_scalar) {
7846             maxpasses = 1;
7847         } else {
7848             maxpasses = is_q ? 4 : 2;
7849         }
7850
7851         for (pass = 0; pass < maxpasses; pass++) {
7852             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7853             if (swap) {
7854                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7855             } else {
7856                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7857             }
7858             if (is_scalar) {
7859                 write_fp_sreg(s, rd, tcg_res);
7860             } else {
7861                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7862             }
7863         }
7864         tcg_temp_free_i32(tcg_res);
7865         tcg_temp_free_i32(tcg_zero);
7866         tcg_temp_free_i32(tcg_op);
7867         if (!is_q && !is_scalar) {
7868             clear_vec_high(s, rd);
7869         }
7870     }
7871
7872     tcg_temp_free_ptr(fpst);
7873 }
7874
7875 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7876                                     bool is_scalar, bool is_u, bool is_q,
7877                                     int size, int rn, int rd)
7878 {
7879     bool is_double = (size == 3);
7880     TCGv_ptr fpst = get_fpstatus_ptr();
7881
7882     if (is_double) {
7883         TCGv_i64 tcg_op = tcg_temp_new_i64();
7884         TCGv_i64 tcg_res = tcg_temp_new_i64();
7885         int pass;
7886
7887         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7888             read_vec_element(s, tcg_op, rn, pass, MO_64);
7889             switch (opcode) {
7890             case 0x3d: /* FRECPE */
7891                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7892                 break;
7893             case 0x3f: /* FRECPX */
7894                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7895                 break;
7896             case 0x7d: /* FRSQRTE */
7897                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7898                 break;
7899             default:
7900                 g_assert_not_reached();
7901             }
7902             write_vec_element(s, tcg_res, rd, pass, MO_64);
7903         }
7904         if (is_scalar) {
7905             clear_vec_high(s, rd);
7906         }
7907
7908         tcg_temp_free_i64(tcg_res);
7909         tcg_temp_free_i64(tcg_op);
7910     } else {
7911         TCGv_i32 tcg_op = tcg_temp_new_i32();
7912         TCGv_i32 tcg_res = tcg_temp_new_i32();
7913         int pass, maxpasses;
7914
7915         if (is_scalar) {
7916             maxpasses = 1;
7917         } else {
7918             maxpasses = is_q ? 4 : 2;
7919         }
7920
7921         for (pass = 0; pass < maxpasses; pass++) {
7922             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7923
7924             switch (opcode) {
7925             case 0x3c: /* URECPE */
7926                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7927                 break;
7928             case 0x3d: /* FRECPE */
7929                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7930                 break;
7931             case 0x3f: /* FRECPX */
7932                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7933                 break;
7934             case 0x7d: /* FRSQRTE */
7935                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7936                 break;
7937             default:
7938                 g_assert_not_reached();
7939             }
7940
7941             if (is_scalar) {
7942                 write_fp_sreg(s, rd, tcg_res);
7943             } else {
7944                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7945             }
7946         }
7947         tcg_temp_free_i32(tcg_res);
7948         tcg_temp_free_i32(tcg_op);
7949         if (!is_q && !is_scalar) {
7950             clear_vec_high(s, rd);
7951         }
7952     }
7953     tcg_temp_free_ptr(fpst);
7954 }
7955
7956 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7957                                 int opcode, bool u, bool is_q,
7958                                 int size, int rn, int rd)
7959 {
7960     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7961      * in the source becomes a size element in the destination).
7962      */
7963     int pass;
7964     TCGv_i32 tcg_res[2];
7965     int destelt = is_q ? 2 : 0;
7966     int passes = scalar ? 1 : 2;
7967
7968     if (scalar) {
7969         tcg_res[1] = tcg_const_i32(0);
7970     }
7971
7972     for (pass = 0; pass < passes; pass++) {
7973         TCGv_i64 tcg_op = tcg_temp_new_i64();
7974         NeonGenNarrowFn *genfn = NULL;
7975         NeonGenNarrowEnvFn *genenvfn = NULL;
7976
7977         if (scalar) {
7978             read_vec_element(s, tcg_op, rn, pass, size + 1);
7979         } else {
7980             read_vec_element(s, tcg_op, rn, pass, MO_64);
7981         }
7982         tcg_res[pass] = tcg_temp_new_i32();
7983
7984         switch (opcode) {
7985         case 0x12: /* XTN, SQXTUN */
7986         {
7987             static NeonGenNarrowFn * const xtnfns[3] = {
7988                 gen_helper_neon_narrow_u8,
7989                 gen_helper_neon_narrow_u16,
7990                 tcg_gen_extrl_i64_i32,
7991             };
7992             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7993                 gen_helper_neon_unarrow_sat8,
7994                 gen_helper_neon_unarrow_sat16,
7995                 gen_helper_neon_unarrow_sat32,
7996             };
7997             if (u) {
7998                 genenvfn = sqxtunfns[size];
7999             } else {
8000                 genfn = xtnfns[size];
8001             }
8002             break;
8003         }
8004         case 0x14: /* SQXTN, UQXTN */
8005         {
8006             static NeonGenNarrowEnvFn * const fns[3][2] = {
8007                 { gen_helper_neon_narrow_sat_s8,
8008                   gen_helper_neon_narrow_sat_u8 },
8009                 { gen_helper_neon_narrow_sat_s16,
8010                   gen_helper_neon_narrow_sat_u16 },
8011                 { gen_helper_neon_narrow_sat_s32,
8012                   gen_helper_neon_narrow_sat_u32 },
8013             };
8014             genenvfn = fns[size][u];
8015             break;
8016         }
8017         case 0x16: /* FCVTN, FCVTN2 */
8018             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
8019             if (size == 2) {
8020                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
8021             } else {
8022                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
8023                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
8024                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
8025                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
8026                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
8027                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
8028                 tcg_temp_free_i32(tcg_lo);
8029                 tcg_temp_free_i32(tcg_hi);
8030             }
8031             break;
8032         case 0x56:  /* FCVTXN, FCVTXN2 */
8033             /* 64 bit to 32 bit float conversion
8034              * with von Neumann rounding (round to odd)
8035              */
8036             assert(size == 2);
8037             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
8038             break;
8039         default:
8040             g_assert_not_reached();
8041         }
8042
8043         if (genfn) {
8044             genfn(tcg_res[pass], tcg_op);
8045         } else if (genenvfn) {
8046             genenvfn(tcg_res[pass], cpu_env, tcg_op);
8047         }
8048
8049         tcg_temp_free_i64(tcg_op);
8050     }
8051
8052     for (pass = 0; pass < 2; pass++) {
8053         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8054         tcg_temp_free_i32(tcg_res[pass]);
8055     }
8056     if (!is_q) {
8057         clear_vec_high(s, rd);
8058     }
8059 }
8060
8061 /* Remaining saturating accumulating ops */
8062 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
8063                                 bool is_q, int size, int rn, int rd)
8064 {
8065     bool is_double = (size == 3);
8066
8067     if (is_double) {
8068         TCGv_i64 tcg_rn = tcg_temp_new_i64();
8069         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8070         int pass;
8071
8072         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8073             read_vec_element(s, tcg_rn, rn, pass, MO_64);
8074             read_vec_element(s, tcg_rd, rd, pass, MO_64);
8075
8076             if (is_u) { /* USQADD */
8077                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8078             } else { /* SUQADD */
8079                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8080             }
8081             write_vec_element(s, tcg_rd, rd, pass, MO_64);
8082         }
8083         if (is_scalar) {
8084             clear_vec_high(s, rd);
8085         }
8086
8087         tcg_temp_free_i64(tcg_rd);
8088         tcg_temp_free_i64(tcg_rn);
8089     } else {
8090         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8091         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8092         int pass, maxpasses;
8093
8094         if (is_scalar) {
8095             maxpasses = 1;
8096         } else {
8097             maxpasses = is_q ? 4 : 2;
8098         }
8099
8100         for (pass = 0; pass < maxpasses; pass++) {
8101             if (is_scalar) {
8102                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
8103                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
8104             } else {
8105                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8106                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8107             }
8108
8109             if (is_u) { /* USQADD */
8110                 switch (size) {
8111                 case 0:
8112                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8113                     break;
8114                 case 1:
8115                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8116                     break;
8117                 case 2:
8118                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8119                     break;
8120                 default:
8121                     g_assert_not_reached();
8122                 }
8123             } else { /* SUQADD */
8124                 switch (size) {
8125                 case 0:
8126                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8127                     break;
8128                 case 1:
8129                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8130                     break;
8131                 case 2:
8132                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8133                     break;
8134                 default:
8135                     g_assert_not_reached();
8136                 }
8137             }
8138
8139             if (is_scalar) {
8140                 TCGv_i64 tcg_zero = tcg_const_i64(0);
8141                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
8142                 tcg_temp_free_i64(tcg_zero);
8143             }
8144             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8145         }
8146
8147         if (!is_q) {
8148             clear_vec_high(s, rd);
8149         }
8150
8151         tcg_temp_free_i32(tcg_rd);
8152         tcg_temp_free_i32(tcg_rn);
8153     }
8154 }
8155
8156 /* AdvSIMD scalar two reg misc
8157  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8158  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8159  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8160  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8161  */
8162 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8163 {
8164     int rd = extract32(insn, 0, 5);
8165     int rn = extract32(insn, 5, 5);
8166     int opcode = extract32(insn, 12, 5);
8167     int size = extract32(insn, 22, 2);
8168     bool u = extract32(insn, 29, 1);
8169     bool is_fcvt = false;
8170     int rmode;
8171     TCGv_i32 tcg_rmode;
8172     TCGv_ptr tcg_fpstatus;
8173
8174     switch (opcode) {
8175     case 0x3: /* USQADD / SUQADD*/
8176         if (!fp_access_check(s)) {
8177             return;
8178         }
8179         handle_2misc_satacc(s, true, u, false, size, rn, rd);
8180         return;
8181     case 0x7: /* SQABS / SQNEG */
8182         break;
8183     case 0xa: /* CMLT */
8184         if (u) {
8185             unallocated_encoding(s);
8186             return;
8187         }
8188         /* fall through */
8189     case 0x8: /* CMGT, CMGE */
8190     case 0x9: /* CMEQ, CMLE */
8191     case 0xb: /* ABS, NEG */
8192         if (size != 3) {
8193             unallocated_encoding(s);
8194             return;
8195         }
8196         break;
8197     case 0x12: /* SQXTUN */
8198         if (!u) {
8199             unallocated_encoding(s);
8200             return;
8201         }
8202         /* fall through */
8203     case 0x14: /* SQXTN, UQXTN */
8204         if (size == 3) {
8205             unallocated_encoding(s);
8206             return;
8207         }
8208         if (!fp_access_check(s)) {
8209             return;
8210         }
8211         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8212         return;
8213     case 0xc ... 0xf:
8214     case 0x16 ... 0x1d:
8215     case 0x1f:
8216         /* Floating point: U, size[1] and opcode indicate operation;
8217          * size[0] indicates single or double precision.
8218          */
8219         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8220         size = extract32(size, 0, 1) ? 3 : 2;
8221         switch (opcode) {
8222         case 0x2c: /* FCMGT (zero) */
8223         case 0x2d: /* FCMEQ (zero) */
8224         case 0x2e: /* FCMLT (zero) */
8225         case 0x6c: /* FCMGE (zero) */
8226         case 0x6d: /* FCMLE (zero) */
8227             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8228             return;
8229         case 0x1d: /* SCVTF */
8230         case 0x5d: /* UCVTF */
8231         {
8232             bool is_signed = (opcode == 0x1d);
8233             if (!fp_access_check(s)) {
8234                 return;
8235             }
8236             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8237             return;
8238         }
8239         case 0x3d: /* FRECPE */
8240         case 0x3f: /* FRECPX */
8241         case 0x7d: /* FRSQRTE */
8242             if (!fp_access_check(s)) {
8243                 return;
8244             }
8245             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8246             return;
8247         case 0x1a: /* FCVTNS */
8248         case 0x1b: /* FCVTMS */
8249         case 0x3a: /* FCVTPS */
8250         case 0x3b: /* FCVTZS */
8251         case 0x5a: /* FCVTNU */
8252         case 0x5b: /* FCVTMU */
8253         case 0x7a: /* FCVTPU */
8254         case 0x7b: /* FCVTZU */
8255             is_fcvt = true;
8256             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8257             break;
8258         case 0x1c: /* FCVTAS */
8259         case 0x5c: /* FCVTAU */
8260             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8261             is_fcvt = true;
8262             rmode = FPROUNDING_TIEAWAY;
8263             break;
8264         case 0x56: /* FCVTXN, FCVTXN2 */
8265             if (size == 2) {
8266                 unallocated_encoding(s);
8267                 return;
8268             }
8269             if (!fp_access_check(s)) {
8270                 return;
8271             }
8272             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8273             return;
8274         default:
8275             unallocated_encoding(s);
8276             return;
8277         }
8278         break;
8279     default:
8280         unallocated_encoding(s);
8281         return;
8282     }
8283
8284     if (!fp_access_check(s)) {
8285         return;
8286     }
8287
8288     if (is_fcvt) {
8289         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8290         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8291         tcg_fpstatus = get_fpstatus_ptr();
8292     } else {
8293         tcg_rmode = NULL;
8294         tcg_fpstatus = NULL;
8295     }
8296
8297     if (size == 3) {
8298         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8299         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8300
8301         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8302         write_fp_dreg(s, rd, tcg_rd);
8303         tcg_temp_free_i64(tcg_rd);
8304         tcg_temp_free_i64(tcg_rn);
8305     } else {
8306         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8307         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8308
8309         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8310
8311         switch (opcode) {
8312         case 0x7: /* SQABS, SQNEG */
8313         {
8314             NeonGenOneOpEnvFn *genfn;
8315             static NeonGenOneOpEnvFn * const fns[3][2] = {
8316                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8317                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8318                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8319             };
8320             genfn = fns[size][u];
8321             genfn(tcg_rd, cpu_env, tcg_rn);
8322             break;
8323         }
8324         case 0x1a: /* FCVTNS */
8325         case 0x1b: /* FCVTMS */
8326         case 0x1c: /* FCVTAS */
8327         case 0x3a: /* FCVTPS */
8328         case 0x3b: /* FCVTZS */
8329         {
8330             TCGv_i32 tcg_shift = tcg_const_i32(0);
8331             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8332             tcg_temp_free_i32(tcg_shift);
8333             break;
8334         }
8335         case 0x5a: /* FCVTNU */
8336         case 0x5b: /* FCVTMU */
8337         case 0x5c: /* FCVTAU */
8338         case 0x7a: /* FCVTPU */
8339         case 0x7b: /* FCVTZU */
8340         {
8341             TCGv_i32 tcg_shift = tcg_const_i32(0);
8342             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8343             tcg_temp_free_i32(tcg_shift);
8344             break;
8345         }
8346         default:
8347             g_assert_not_reached();
8348         }
8349
8350         write_fp_sreg(s, rd, tcg_rd);
8351         tcg_temp_free_i32(tcg_rd);
8352         tcg_temp_free_i32(tcg_rn);
8353     }
8354
8355     if (is_fcvt) {
8356         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8357         tcg_temp_free_i32(tcg_rmode);
8358         tcg_temp_free_ptr(tcg_fpstatus);
8359     }
8360 }
8361
8362 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8363 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8364                                  int immh, int immb, int opcode, int rn, int rd)
8365 {
8366     int size = 32 - clz32(immh) - 1;
8367     int immhb = immh << 3 | immb;
8368     int shift = 2 * (8 << size) - immhb;
8369     bool accumulate = false;
8370     bool round = false;
8371     bool insert = false;
8372     int dsize = is_q ? 128 : 64;
8373     int esize = 8 << size;
8374     int elements = dsize/esize;
8375     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8376     TCGv_i64 tcg_rn = new_tmp_a64(s);
8377     TCGv_i64 tcg_rd = new_tmp_a64(s);
8378     TCGv_i64 tcg_round;
8379     int i;
8380
8381     if (extract32(immh, 3, 1) && !is_q) {
8382         unallocated_encoding(s);
8383         return;
8384     }
8385
8386     if (size > 3 && !is_q) {
8387         unallocated_encoding(s);
8388         return;
8389     }
8390
8391     if (!fp_access_check(s)) {
8392         return;
8393     }
8394
8395     switch (opcode) {
8396     case 0x02: /* SSRA / USRA (accumulate) */
8397         accumulate = true;
8398         break;
8399     case 0x04: /* SRSHR / URSHR (rounding) */
8400         round = true;
8401         break;
8402     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8403         accumulate = round = true;
8404         break;
8405     case 0x08: /* SRI */
8406         insert = true;
8407         break;
8408     }
8409
8410     if (round) {
8411         uint64_t round_const = 1ULL << (shift - 1);
8412         tcg_round = tcg_const_i64(round_const);
8413     } else {
8414         tcg_round = NULL;
8415     }
8416
8417     for (i = 0; i < elements; i++) {
8418         read_vec_element(s, tcg_rn, rn, i, memop);
8419         if (accumulate || insert) {
8420             read_vec_element(s, tcg_rd, rd, i, memop);
8421         }
8422
8423         if (insert) {
8424             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8425         } else {
8426             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8427                                     accumulate, is_u, size, shift);
8428         }
8429
8430         write_vec_element(s, tcg_rd, rd, i, size);
8431     }
8432
8433     if (!is_q) {
8434         clear_vec_high(s, rd);
8435     }
8436
8437     if (round) {
8438         tcg_temp_free_i64(tcg_round);
8439     }
8440 }
8441
8442 /* SHL/SLI - Vector shift left */
8443 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8444                                 int immh, int immb, int opcode, int rn, int rd)
8445 {
8446     int size = 32 - clz32(immh) - 1;
8447     int immhb = immh << 3 | immb;
8448     int shift = immhb - (8 << size);
8449     int dsize = is_q ? 128 : 64;
8450     int esize = 8 << size;
8451     int elements = dsize/esize;
8452     TCGv_i64 tcg_rn = new_tmp_a64(s);
8453     TCGv_i64 tcg_rd = new_tmp_a64(s);
8454     int i;
8455
8456     if (extract32(immh, 3, 1) && !is_q) {
8457         unallocated_encoding(s);
8458         return;
8459     }
8460
8461     if (size > 3 && !is_q) {
8462         unallocated_encoding(s);
8463         return;
8464     }
8465
8466     if (!fp_access_check(s)) {
8467         return;
8468     }
8469
8470     for (i = 0; i < elements; i++) {
8471         read_vec_element(s, tcg_rn, rn, i, size);
8472         if (insert) {
8473             read_vec_element(s, tcg_rd, rd, i, size);
8474         }
8475
8476         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8477
8478         write_vec_element(s, tcg_rd, rd, i, size);
8479     }
8480
8481     if (!is_q) {
8482         clear_vec_high(s, rd);
8483     }
8484 }
8485
8486 /* USHLL/SHLL - Vector shift left with widening */
8487 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8488                                  int immh, int immb, int opcode, int rn, int rd)
8489 {
8490     int size = 32 - clz32(immh) - 1;
8491     int immhb = immh << 3 | immb;
8492     int shift = immhb - (8 << size);
8493     int dsize = 64;
8494     int esize = 8 << size;
8495     int elements = dsize/esize;
8496     TCGv_i64 tcg_rn = new_tmp_a64(s);
8497     TCGv_i64 tcg_rd = new_tmp_a64(s);
8498     int i;
8499
8500     if (size >= 3) {
8501         unallocated_encoding(s);
8502         return;
8503     }
8504
8505     if (!fp_access_check(s)) {
8506         return;
8507     }
8508
8509     /* For the LL variants the store is larger than the load,
8510      * so if rd == rn we would overwrite parts of our input.
8511      * So load everything right now and use shifts in the main loop.
8512      */
8513     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8514
8515     for (i = 0; i < elements; i++) {
8516         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8517         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8518         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8519         write_vec_element(s, tcg_rd, rd, i, size + 1);
8520     }
8521 }
8522
8523 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8524 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8525                                  int immh, int immb, int opcode, int rn, int rd)
8526 {
8527     int immhb = immh << 3 | immb;
8528     int size = 32 - clz32(immh) - 1;
8529     int dsize = 64;
8530     int esize = 8 << size;
8531     int elements = dsize/esize;
8532     int shift = (2 * esize) - immhb;
8533     bool round = extract32(opcode, 0, 1);
8534     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8535     TCGv_i64 tcg_round;
8536     int i;
8537
8538     if (extract32(immh, 3, 1)) {
8539         unallocated_encoding(s);
8540         return;
8541     }
8542
8543     if (!fp_access_check(s)) {
8544         return;
8545     }
8546
8547     tcg_rn = tcg_temp_new_i64();
8548     tcg_rd = tcg_temp_new_i64();
8549     tcg_final = tcg_temp_new_i64();
8550     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8551
8552     if (round) {
8553         uint64_t round_const = 1ULL << (shift - 1);
8554         tcg_round = tcg_const_i64(round_const);
8555     } else {
8556         tcg_round = NULL;
8557     }
8558
8559     for (i = 0; i < elements; i++) {
8560         read_vec_element(s, tcg_rn, rn, i, size+1);
8561         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8562                                 false, true, size+1, shift);
8563
8564         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8565     }
8566
8567     if (!is_q) {
8568         clear_vec_high(s, rd);
8569         write_vec_element(s, tcg_final, rd, 0, MO_64);
8570     } else {
8571         write_vec_element(s, tcg_final, rd, 1, MO_64);
8572     }
8573
8574     if (round) {
8575         tcg_temp_free_i64(tcg_round);
8576     }
8577     tcg_temp_free_i64(tcg_rn);
8578     tcg_temp_free_i64(tcg_rd);
8579     tcg_temp_free_i64(tcg_final);
8580     return;
8581 }
8582
8583
8584 /* AdvSIMD shift by immediate
8585  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8586  * +---+---+---+-------------+------+------+--------+---+------+------+
8587  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8588  * +---+---+---+-------------+------+------+--------+---+------+------+
8589  */
8590 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8591 {
8592     int rd = extract32(insn, 0, 5);
8593     int rn = extract32(insn, 5, 5);
8594     int opcode = extract32(insn, 11, 5);
8595     int immb = extract32(insn, 16, 3);
8596     int immh = extract32(insn, 19, 4);
8597     bool is_u = extract32(insn, 29, 1);
8598     bool is_q = extract32(insn, 30, 1);
8599
8600     switch (opcode) {
8601     case 0x08: /* SRI */
8602         if (!is_u) {
8603             unallocated_encoding(s);
8604             return;
8605         }
8606         /* fall through */
8607     case 0x00: /* SSHR / USHR */
8608     case 0x02: /* SSRA / USRA (accumulate) */
8609     case 0x04: /* SRSHR / URSHR (rounding) */
8610     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8611         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8612         break;
8613     case 0x0a: /* SHL / SLI */
8614         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8615         break;
8616     case 0x10: /* SHRN */
8617     case 0x11: /* RSHRN / SQRSHRUN */
8618         if (is_u) {
8619             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8620                                    opcode, rn, rd);
8621         } else {
8622             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8623         }
8624         break;
8625     case 0x12: /* SQSHRN / UQSHRN */
8626     case 0x13: /* SQRSHRN / UQRSHRN */
8627         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8628                                opcode, rn, rd);
8629         break;
8630     case 0x14: /* SSHLL / USHLL */
8631         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8632         break;
8633     case 0x1c: /* SCVTF / UCVTF */
8634         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8635                                      opcode, rn, rd);
8636         break;
8637     case 0xc: /* SQSHLU */
8638         if (!is_u) {
8639             unallocated_encoding(s);
8640             return;
8641         }
8642         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8643         break;
8644     case 0xe: /* SQSHL, UQSHL */
8645         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8646         break;
8647     case 0x1f: /* FCVTZS/ FCVTZU */
8648         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8649         return;
8650     default:
8651         unallocated_encoding(s);
8652         return;
8653     }
8654 }
8655
8656 /* Generate code to do a "long" addition or subtraction, ie one done in
8657  * TCGv_i64 on vector lanes twice the width specified by size.
8658  */
8659 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8660                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8661 {
8662     static NeonGenTwo64OpFn * const fns[3][2] = {
8663         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8664         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8665         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8666     };
8667     NeonGenTwo64OpFn *genfn;
8668     assert(size < 3);
8669
8670     genfn = fns[size][is_sub];
8671     genfn(tcg_res, tcg_op1, tcg_op2);
8672 }
8673
8674 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8675                                 int opcode, int rd, int rn, int rm)
8676 {
8677     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8678     TCGv_i64 tcg_res[2];
8679     int pass, accop;
8680
8681     tcg_res[0] = tcg_temp_new_i64();
8682     tcg_res[1] = tcg_temp_new_i64();
8683
8684     /* Does this op do an adding accumulate, a subtracting accumulate,
8685      * or no accumulate at all?
8686      */
8687     switch (opcode) {
8688     case 5:
8689     case 8:
8690     case 9:
8691         accop = 1;
8692         break;
8693     case 10:
8694     case 11:
8695         accop = -1;
8696         break;
8697     default:
8698         accop = 0;
8699         break;
8700     }
8701
8702     if (accop != 0) {
8703         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8704         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8705     }
8706
8707     /* size == 2 means two 32x32->64 operations; this is worth special
8708      * casing because we can generally handle it inline.
8709      */
8710     if (size == 2) {
8711         for (pass = 0; pass < 2; pass++) {
8712             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8713             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8714             TCGv_i64 tcg_passres;
8715             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8716
8717             int elt = pass + is_q * 2;
8718
8719             read_vec_element(s, tcg_op1, rn, elt, memop);
8720             read_vec_element(s, tcg_op2, rm, elt, memop);
8721
8722             if (accop == 0) {
8723                 tcg_passres = tcg_res[pass];
8724             } else {
8725                 tcg_passres = tcg_temp_new_i64();
8726             }
8727
8728             switch (opcode) {
8729             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8730                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8731                 break;
8732             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8733                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8734                 break;
8735             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8736             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8737             {
8738                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8739                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8740
8741                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8742                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8743                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8744                                     tcg_passres,
8745                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8746                 tcg_temp_free_i64(tcg_tmp1);
8747                 tcg_temp_free_i64(tcg_tmp2);
8748                 break;
8749             }
8750             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8751             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8752             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8753                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8754                 break;
8755             case 9: /* SQDMLAL, SQDMLAL2 */
8756             case 11: /* SQDMLSL, SQDMLSL2 */
8757             case 13: /* SQDMULL, SQDMULL2 */
8758                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8759                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8760                                                   tcg_passres, tcg_passres);
8761                 break;
8762             default:
8763                 g_assert_not_reached();
8764             }
8765
8766             if (opcode == 9 || opcode == 11) {
8767                 /* saturating accumulate ops */
8768                 if (accop < 0) {
8769                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8770                 }
8771                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8772                                                   tcg_res[pass], tcg_passres);
8773             } else if (accop > 0) {
8774                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8775             } else if (accop < 0) {
8776                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8777             }
8778
8779             if (accop != 0) {
8780                 tcg_temp_free_i64(tcg_passres);
8781             }
8782
8783             tcg_temp_free_i64(tcg_op1);
8784             tcg_temp_free_i64(tcg_op2);
8785         }
8786     } else {
8787         /* size 0 or 1, generally helper functions */
8788         for (pass = 0; pass < 2; pass++) {
8789             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8790             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8791             TCGv_i64 tcg_passres;
8792             int elt = pass + is_q * 2;
8793
8794             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8795             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8796
8797             if (accop == 0) {
8798                 tcg_passres = tcg_res[pass];
8799             } else {
8800                 tcg_passres = tcg_temp_new_i64();
8801             }
8802
8803             switch (opcode) {
8804             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8805             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8806             {
8807                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8808                 static NeonGenWidenFn * const widenfns[2][2] = {
8809                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8810                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8811                 };
8812                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8813
8814                 widenfn(tcg_op2_64, tcg_op2);
8815                 widenfn(tcg_passres, tcg_op1);
8816                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8817                               tcg_passres, tcg_op2_64);
8818                 tcg_temp_free_i64(tcg_op2_64);
8819                 break;
8820             }
8821             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8822             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8823                 if (size == 0) {
8824                     if (is_u) {
8825                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8826                     } else {
8827                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8828                     }
8829                 } else {
8830                     if (is_u) {
8831                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8832                     } else {
8833                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8834                     }
8835                 }
8836                 break;
8837             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8838             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8839             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8840                 if (size == 0) {
8841                     if (is_u) {
8842                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8843                     } else {
8844                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8845                     }
8846                 } else {
8847                     if (is_u) {
8848                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8849                     } else {
8850                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8851                     }
8852                 }
8853                 break;
8854             case 9: /* SQDMLAL, SQDMLAL2 */
8855             case 11: /* SQDMLSL, SQDMLSL2 */
8856             case 13: /* SQDMULL, SQDMULL2 */
8857                 assert(size == 1);
8858                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8859                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8860                                                   tcg_passres, tcg_passres);
8861                 break;
8862             case 14: /* PMULL */
8863                 assert(size == 0);
8864                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8865                 break;
8866             default:
8867                 g_assert_not_reached();
8868             }
8869             tcg_temp_free_i32(tcg_op1);
8870             tcg_temp_free_i32(tcg_op2);
8871
8872             if (accop != 0) {
8873                 if (opcode == 9 || opcode == 11) {
8874                     /* saturating accumulate ops */
8875                     if (accop < 0) {
8876                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8877                     }
8878                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8879                                                       tcg_res[pass],
8880                                                       tcg_passres);
8881                 } else {
8882                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8883                                   tcg_res[pass], tcg_passres);
8884                 }
8885                 tcg_temp_free_i64(tcg_passres);
8886             }
8887         }
8888     }
8889
8890     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8891     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8892     tcg_temp_free_i64(tcg_res[0]);
8893     tcg_temp_free_i64(tcg_res[1]);
8894 }
8895
8896 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8897                             int opcode, int rd, int rn, int rm)
8898 {
8899     TCGv_i64 tcg_res[2];
8900     int part = is_q ? 2 : 0;
8901     int pass;
8902
8903     for (pass = 0; pass < 2; pass++) {
8904         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8905         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8906         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8907         static NeonGenWidenFn * const widenfns[3][2] = {
8908             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8909             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8910             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8911         };
8912         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8913
8914         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8915         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8916         widenfn(tcg_op2_wide, tcg_op2);
8917         tcg_temp_free_i32(tcg_op2);
8918         tcg_res[pass] = tcg_temp_new_i64();
8919         gen_neon_addl(size, (opcode == 3),
8920                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8921         tcg_temp_free_i64(tcg_op1);
8922         tcg_temp_free_i64(tcg_op2_wide);
8923     }
8924
8925     for (pass = 0; pass < 2; pass++) {
8926         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8927         tcg_temp_free_i64(tcg_res[pass]);
8928     }
8929 }
8930
8931 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8932 {
8933     tcg_gen_addi_i64(in, in, 1U << 31);
8934     tcg_gen_extrh_i64_i32(res, in);
8935 }
8936
8937 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8938                                  int opcode, int rd, int rn, int rm)
8939 {
8940     TCGv_i32 tcg_res[2];
8941     int part = is_q ? 2 : 0;
8942     int pass;
8943
8944     for (pass = 0; pass < 2; pass++) {
8945         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8946         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8947         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8948         static NeonGenNarrowFn * const narrowfns[3][2] = {
8949             { gen_helper_neon_narrow_high_u8,
8950               gen_helper_neon_narrow_round_high_u8 },
8951             { gen_helper_neon_narrow_high_u16,
8952               gen_helper_neon_narrow_round_high_u16 },
8953             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8954         };
8955         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8956
8957         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8958         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8959
8960         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8961
8962         tcg_temp_free_i64(tcg_op1);
8963         tcg_temp_free_i64(tcg_op2);
8964
8965         tcg_res[pass] = tcg_temp_new_i32();
8966         gennarrow(tcg_res[pass], tcg_wideres);
8967         tcg_temp_free_i64(tcg_wideres);
8968     }
8969
8970     for (pass = 0; pass < 2; pass++) {
8971         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8972         tcg_temp_free_i32(tcg_res[pass]);
8973     }
8974     if (!is_q) {
8975         clear_vec_high(s, rd);
8976     }
8977 }
8978
8979 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8980 {
8981     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8982      * is the only three-reg-diff instruction which produces a
8983      * 128-bit wide result from a single operation. However since
8984      * it's possible to calculate the two halves more or less
8985      * separately we just use two helper calls.
8986      */
8987     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8988     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8989     TCGv_i64 tcg_res = tcg_temp_new_i64();
8990
8991     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8992     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8993     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8994     write_vec_element(s, tcg_res, rd, 0, MO_64);
8995     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8996     write_vec_element(s, tcg_res, rd, 1, MO_64);
8997
8998     tcg_temp_free_i64(tcg_op1);
8999     tcg_temp_free_i64(tcg_op2);
9000     tcg_temp_free_i64(tcg_res);
9001 }
9002
9003 /* AdvSIMD three different
9004  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
9005  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
9006  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
9007  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
9008  */
9009 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
9010 {
9011     /* Instructions in this group fall into three basic classes
9012      * (in each case with the operation working on each element in
9013      * the input vectors):
9014      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
9015      *     128 bit input)
9016      * (2) wide 64 x 128 -> 128
9017      * (3) narrowing 128 x 128 -> 64
9018      * Here we do initial decode, catch unallocated cases and
9019      * dispatch to separate functions for each class.
9020      */
9021     int is_q = extract32(insn, 30, 1);
9022     int is_u = extract32(insn, 29, 1);
9023     int size = extract32(insn, 22, 2);
9024     int opcode = extract32(insn, 12, 4);
9025     int rm = extract32(insn, 16, 5);
9026     int rn = extract32(insn, 5, 5);
9027     int rd = extract32(insn, 0, 5);
9028
9029     switch (opcode) {
9030     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
9031     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
9032         /* 64 x 128 -> 128 */
9033         if (size == 3) {
9034             unallocated_encoding(s);
9035             return;
9036         }
9037         if (!fp_access_check(s)) {
9038             return;
9039         }
9040         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
9041         break;
9042     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
9043     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
9044         /* 128 x 128 -> 64 */
9045         if (size == 3) {
9046             unallocated_encoding(s);
9047             return;
9048         }
9049         if (!fp_access_check(s)) {
9050             return;
9051         }
9052         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
9053         break;
9054     case 14: /* PMULL, PMULL2 */
9055         if (is_u || size == 1 || size == 2) {
9056             unallocated_encoding(s);
9057             return;
9058         }
9059         if (size == 3) {
9060             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
9061                 unallocated_encoding(s);
9062                 return;
9063             }
9064             if (!fp_access_check(s)) {
9065                 return;
9066             }
9067             handle_pmull_64(s, is_q, rd, rn, rm);
9068             return;
9069         }
9070         goto is_widening;
9071     case 9: /* SQDMLAL, SQDMLAL2 */
9072     case 11: /* SQDMLSL, SQDMLSL2 */
9073     case 13: /* SQDMULL, SQDMULL2 */
9074         if (is_u || size == 0) {
9075             unallocated_encoding(s);
9076             return;
9077         }
9078         /* fall through */
9079     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9080     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9081     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9082     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9083     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9084     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9085     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
9086         /* 64 x 64 -> 128 */
9087         if (size == 3) {
9088             unallocated_encoding(s);
9089             return;
9090         }
9091     is_widening:
9092         if (!fp_access_check(s)) {
9093             return;
9094         }
9095
9096         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9097         break;
9098     default:
9099         /* opcode 15 not allocated */
9100         unallocated_encoding(s);
9101         break;
9102     }
9103 }
9104
9105 static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9106 {
9107     tcg_gen_xor_i64(rn, rn, rm);
9108     tcg_gen_and_i64(rn, rn, rd);
9109     tcg_gen_xor_i64(rd, rm, rn);
9110 }
9111
9112 static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9113 {
9114     tcg_gen_xor_i64(rn, rn, rd);
9115     tcg_gen_and_i64(rn, rn, rm);
9116     tcg_gen_xor_i64(rd, rd, rn);
9117 }
9118
9119 static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9120 {
9121     tcg_gen_xor_i64(rn, rn, rd);
9122     tcg_gen_andc_i64(rn, rn, rm);
9123     tcg_gen_xor_i64(rd, rd, rn);
9124 }
9125
9126 static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9127 {
9128     tcg_gen_xor_vec(vece, rn, rn, rm);
9129     tcg_gen_and_vec(vece, rn, rn, rd);
9130     tcg_gen_xor_vec(vece, rd, rm, rn);
9131 }
9132
9133 static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9134 {
9135     tcg_gen_xor_vec(vece, rn, rn, rd);
9136     tcg_gen_and_vec(vece, rn, rn, rm);
9137     tcg_gen_xor_vec(vece, rd, rd, rn);
9138 }
9139
9140 static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9141 {
9142     tcg_gen_xor_vec(vece, rn, rn, rd);
9143     tcg_gen_andc_vec(vece, rn, rn, rm);
9144     tcg_gen_xor_vec(vece, rd, rd, rn);
9145 }
9146
9147 /* Logic op (opcode == 3) subgroup of C3.6.16. */
9148 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9149 {
9150     static const GVecGen3 bsl_op = {
9151         .fni8 = gen_bsl_i64,
9152         .fniv = gen_bsl_vec,
9153         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9154         .load_dest = true
9155     };
9156     static const GVecGen3 bit_op = {
9157         .fni8 = gen_bit_i64,
9158         .fniv = gen_bit_vec,
9159         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9160         .load_dest = true
9161     };
9162     static const GVecGen3 bif_op = {
9163         .fni8 = gen_bif_i64,
9164         .fniv = gen_bif_vec,
9165         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9166         .load_dest = true
9167     };
9168
9169     int rd = extract32(insn, 0, 5);
9170     int rn = extract32(insn, 5, 5);
9171     int rm = extract32(insn, 16, 5);
9172     int size = extract32(insn, 22, 2);
9173     bool is_u = extract32(insn, 29, 1);
9174     bool is_q = extract32(insn, 30, 1);
9175
9176     if (!fp_access_check(s)) {
9177         return;
9178     }
9179
9180     switch (size + 4 * is_u) {
9181     case 0: /* AND */
9182         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
9183         return;
9184     case 1: /* BIC */
9185         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
9186         return;
9187     case 2: /* ORR */
9188         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
9189         return;
9190     case 3: /* ORN */
9191         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
9192         return;
9193     case 4: /* EOR */
9194         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
9195         return;
9196
9197     case 5: /* BSL bitwise select */
9198         gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
9199         return;
9200     case 6: /* BIT, bitwise insert if true */
9201         gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
9202         return;
9203     case 7: /* BIF, bitwise insert if false */
9204         gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
9205         return;
9206
9207     default:
9208         g_assert_not_reached();
9209     }
9210 }
9211
9212 /* Helper functions for 32 bit comparisons */
9213 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9214 {
9215     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9216 }
9217
9218 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9219 {
9220     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9221 }
9222
9223 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9224 {
9225     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9226 }
9227
9228 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9229 {
9230     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9231 }
9232
9233 /* Pairwise op subgroup of C3.6.16.
9234  *
9235  * This is called directly or via the handle_3same_float for float pairwise
9236  * operations where the opcode and size are calculated differently.
9237  */
9238 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9239                                    int size, int rn, int rm, int rd)
9240 {
9241     TCGv_ptr fpst;
9242     int pass;
9243
9244     /* Floating point operations need fpst */
9245     if (opcode >= 0x58) {
9246         fpst = get_fpstatus_ptr();
9247     } else {
9248         fpst = NULL;
9249     }
9250
9251     if (!fp_access_check(s)) {
9252         return;
9253     }
9254
9255     /* These operations work on the concatenated rm:rn, with each pair of
9256      * adjacent elements being operated on to produce an element in the result.
9257      */
9258     if (size == 3) {
9259         TCGv_i64 tcg_res[2];
9260
9261         for (pass = 0; pass < 2; pass++) {
9262             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9263             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9264             int passreg = (pass == 0) ? rn : rm;
9265
9266             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9267             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9268             tcg_res[pass] = tcg_temp_new_i64();
9269
9270             switch (opcode) {
9271             case 0x17: /* ADDP */
9272                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9273                 break;
9274             case 0x58: /* FMAXNMP */
9275                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9276                 break;
9277             case 0x5a: /* FADDP */
9278                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9279                 break;
9280             case 0x5e: /* FMAXP */
9281                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9282                 break;
9283             case 0x78: /* FMINNMP */
9284                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9285                 break;
9286             case 0x7e: /* FMINP */
9287                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9288                 break;
9289             default:
9290                 g_assert_not_reached();
9291             }
9292
9293             tcg_temp_free_i64(tcg_op1);
9294             tcg_temp_free_i64(tcg_op2);
9295         }
9296
9297         for (pass = 0; pass < 2; pass++) {
9298             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9299             tcg_temp_free_i64(tcg_res[pass]);
9300         }
9301     } else {
9302         int maxpass = is_q ? 4 : 2;
9303         TCGv_i32 tcg_res[4];
9304
9305         for (pass = 0; pass < maxpass; pass++) {
9306             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9307             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9308             NeonGenTwoOpFn *genfn = NULL;
9309             int passreg = pass < (maxpass / 2) ? rn : rm;
9310             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9311
9312             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9313             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9314             tcg_res[pass] = tcg_temp_new_i32();
9315
9316             switch (opcode) {
9317             case 0x17: /* ADDP */
9318             {
9319                 static NeonGenTwoOpFn * const fns[3] = {
9320                     gen_helper_neon_padd_u8,
9321                     gen_helper_neon_padd_u16,
9322                     tcg_gen_add_i32,
9323                 };
9324                 genfn = fns[size];
9325                 break;
9326             }
9327             case 0x14: /* SMAXP, UMAXP */
9328             {
9329                 static NeonGenTwoOpFn * const fns[3][2] = {
9330                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9331                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9332                     { gen_max_s32, gen_max_u32 },
9333                 };
9334                 genfn = fns[size][u];
9335                 break;
9336             }
9337             case 0x15: /* SMINP, UMINP */
9338             {
9339                 static NeonGenTwoOpFn * const fns[3][2] = {
9340                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9341                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9342                     { gen_min_s32, gen_min_u32 },
9343                 };
9344                 genfn = fns[size][u];
9345                 break;
9346             }
9347             /* The FP operations are all on single floats (32 bit) */
9348             case 0x58: /* FMAXNMP */
9349                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9350                 break;
9351             case 0x5a: /* FADDP */
9352                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9353                 break;
9354             case 0x5e: /* FMAXP */
9355                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9356                 break;
9357             case 0x78: /* FMINNMP */
9358                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9359                 break;
9360             case 0x7e: /* FMINP */
9361                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9362                 break;
9363             default:
9364                 g_assert_not_reached();
9365             }
9366
9367             /* FP ops called directly, otherwise call now */
9368             if (genfn) {
9369                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9370             }
9371
9372             tcg_temp_free_i32(tcg_op1);
9373             tcg_temp_free_i32(tcg_op2);
9374         }
9375
9376         for (pass = 0; pass < maxpass; pass++) {
9377             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9378             tcg_temp_free_i32(tcg_res[pass]);
9379         }
9380         if (!is_q) {
9381             clear_vec_high(s, rd);
9382         }
9383     }
9384
9385     if (fpst) {
9386         tcg_temp_free_ptr(fpst);
9387     }
9388 }
9389
9390 /* Floating point op subgroup of C3.6.16. */
9391 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9392 {
9393     /* For floating point ops, the U, size[1] and opcode bits
9394      * together indicate the operation. size[0] indicates single
9395      * or double.
9396      */
9397     int fpopcode = extract32(insn, 11, 5)
9398         | (extract32(insn, 23, 1) << 5)
9399         | (extract32(insn, 29, 1) << 6);
9400     int is_q = extract32(insn, 30, 1);
9401     int size = extract32(insn, 22, 1);
9402     int rm = extract32(insn, 16, 5);
9403     int rn = extract32(insn, 5, 5);
9404     int rd = extract32(insn, 0, 5);
9405
9406     int datasize = is_q ? 128 : 64;
9407     int esize = 32 << size;
9408     int elements = datasize / esize;
9409
9410     if (size == 1 && !is_q) {
9411         unallocated_encoding(s);
9412         return;
9413     }
9414
9415     switch (fpopcode) {
9416     case 0x58: /* FMAXNMP */
9417     case 0x5a: /* FADDP */
9418     case 0x5e: /* FMAXP */
9419     case 0x78: /* FMINNMP */
9420     case 0x7e: /* FMINP */
9421         if (size && !is_q) {
9422             unallocated_encoding(s);
9423             return;
9424         }
9425         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9426                                rn, rm, rd);
9427         return;
9428     case 0x1b: /* FMULX */
9429     case 0x1f: /* FRECPS */
9430     case 0x3f: /* FRSQRTS */
9431     case 0x5d: /* FACGE */
9432     case 0x7d: /* FACGT */
9433     case 0x19: /* FMLA */
9434     case 0x39: /* FMLS */
9435     case 0x18: /* FMAXNM */
9436     case 0x1a: /* FADD */
9437     case 0x1c: /* FCMEQ */
9438     case 0x1e: /* FMAX */
9439     case 0x38: /* FMINNM */
9440     case 0x3a: /* FSUB */
9441     case 0x3e: /* FMIN */
9442     case 0x5b: /* FMUL */
9443     case 0x5c: /* FCMGE */
9444     case 0x5f: /* FDIV */
9445     case 0x7a: /* FABD */
9446     case 0x7c: /* FCMGT */
9447         if (!fp_access_check(s)) {
9448             return;
9449         }
9450
9451         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9452         return;
9453     default:
9454         unallocated_encoding(s);
9455         return;
9456     }
9457 }
9458
9459 /* Integer op subgroup of C3.6.16. */
9460 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9461 {
9462     int is_q = extract32(insn, 30, 1);
9463     int u = extract32(insn, 29, 1);
9464     int size = extract32(insn, 22, 2);
9465     int opcode = extract32(insn, 11, 5);
9466     int rm = extract32(insn, 16, 5);
9467     int rn = extract32(insn, 5, 5);
9468     int rd = extract32(insn, 0, 5);
9469     int pass;
9470
9471     switch (opcode) {
9472     case 0x13: /* MUL, PMUL */
9473         if (u && size != 0) {
9474             unallocated_encoding(s);
9475             return;
9476         }
9477         /* fall through */
9478     case 0x0: /* SHADD, UHADD */
9479     case 0x2: /* SRHADD, URHADD */
9480     case 0x4: /* SHSUB, UHSUB */
9481     case 0xc: /* SMAX, UMAX */
9482     case 0xd: /* SMIN, UMIN */
9483     case 0xe: /* SABD, UABD */
9484     case 0xf: /* SABA, UABA */
9485     case 0x12: /* MLA, MLS */
9486         if (size == 3) {
9487             unallocated_encoding(s);
9488             return;
9489         }
9490         break;
9491     case 0x16: /* SQDMULH, SQRDMULH */
9492         if (size == 0 || size == 3) {
9493             unallocated_encoding(s);
9494             return;
9495         }
9496         break;
9497     default:
9498         if (size == 3 && !is_q) {
9499             unallocated_encoding(s);
9500             return;
9501         }
9502         break;
9503     }
9504
9505     if (!fp_access_check(s)) {
9506         return;
9507     }
9508
9509     switch (opcode) {
9510     case 0x10: /* ADD, SUB */
9511         if (u) {
9512             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
9513         } else {
9514             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
9515         }
9516         return;
9517     }
9518
9519     if (size == 3) {
9520         assert(is_q);
9521         for (pass = 0; pass < 2; pass++) {
9522             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9523             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9524             TCGv_i64 tcg_res = tcg_temp_new_i64();
9525
9526             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9527             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9528
9529             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9530
9531             write_vec_element(s, tcg_res, rd, pass, MO_64);
9532
9533             tcg_temp_free_i64(tcg_res);
9534             tcg_temp_free_i64(tcg_op1);
9535             tcg_temp_free_i64(tcg_op2);
9536         }
9537     } else {
9538         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9539             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9540             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9541             TCGv_i32 tcg_res = tcg_temp_new_i32();
9542             NeonGenTwoOpFn *genfn = NULL;
9543             NeonGenTwoOpEnvFn *genenvfn = NULL;
9544
9545             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9546             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9547
9548             switch (opcode) {
9549             case 0x0: /* SHADD, UHADD */
9550             {
9551                 static NeonGenTwoOpFn * const fns[3][2] = {
9552                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9553                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9554                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9555                 };
9556                 genfn = fns[size][u];
9557                 break;
9558             }
9559             case 0x1: /* SQADD, UQADD */
9560             {
9561                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9562                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9563                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9564                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9565                 };
9566                 genenvfn = fns[size][u];
9567                 break;
9568             }
9569             case 0x2: /* SRHADD, URHADD */
9570             {
9571                 static NeonGenTwoOpFn * const fns[3][2] = {
9572                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9573                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9574                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9575                 };
9576                 genfn = fns[size][u];
9577                 break;
9578             }
9579             case 0x4: /* SHSUB, UHSUB */
9580             {
9581                 static NeonGenTwoOpFn * const fns[3][2] = {
9582                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9583                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9584                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9585                 };
9586                 genfn = fns[size][u];
9587                 break;
9588             }
9589             case 0x5: /* SQSUB, UQSUB */
9590             {
9591                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9592                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9593                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9594                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9595                 };
9596                 genenvfn = fns[size][u];
9597                 break;
9598             }
9599             case 0x6: /* CMGT, CMHI */
9600             {
9601                 static NeonGenTwoOpFn * const fns[3][2] = {
9602                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9603                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9604                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9605                 };
9606                 genfn = fns[size][u];
9607                 break;
9608             }
9609             case 0x7: /* CMGE, CMHS */
9610             {
9611                 static NeonGenTwoOpFn * const fns[3][2] = {
9612                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9613                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9614                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9615                 };
9616                 genfn = fns[size][u];
9617                 break;
9618             }
9619             case 0x8: /* SSHL, USHL */
9620             {
9621                 static NeonGenTwoOpFn * const fns[3][2] = {
9622                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9623                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9624                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9625                 };
9626                 genfn = fns[size][u];
9627                 break;
9628             }
9629             case 0x9: /* SQSHL, UQSHL */
9630             {
9631                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9632                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9633                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9634                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9635                 };
9636                 genenvfn = fns[size][u];
9637                 break;
9638             }
9639             case 0xa: /* SRSHL, URSHL */
9640             {
9641                 static NeonGenTwoOpFn * const fns[3][2] = {
9642                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9643                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9644                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9645                 };
9646                 genfn = fns[size][u];
9647                 break;
9648             }
9649             case 0xb: /* SQRSHL, UQRSHL */
9650             {
9651                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9652                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9653                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9654                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9655                 };
9656                 genenvfn = fns[size][u];
9657                 break;
9658             }
9659             case 0xc: /* SMAX, UMAX */
9660             {
9661                 static NeonGenTwoOpFn * const fns[3][2] = {
9662                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9663                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9664                     { gen_max_s32, gen_max_u32 },
9665                 };
9666                 genfn = fns[size][u];
9667                 break;
9668             }
9669
9670             case 0xd: /* SMIN, UMIN */
9671             {
9672                 static NeonGenTwoOpFn * const fns[3][2] = {
9673                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9674                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9675                     { gen_min_s32, gen_min_u32 },
9676                 };
9677                 genfn = fns[size][u];
9678                 break;
9679             }
9680             case 0xe: /* SABD, UABD */
9681             case 0xf: /* SABA, UABA */
9682             {
9683                 static NeonGenTwoOpFn * const fns[3][2] = {
9684                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9685                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9686                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9687                 };
9688                 genfn = fns[size][u];
9689                 break;
9690             }
9691             case 0x11: /* CMTST, CMEQ */
9692             {
9693                 static NeonGenTwoOpFn * const fns[3][2] = {
9694                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9695                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9696                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9697                 };
9698                 genfn = fns[size][u];
9699                 break;
9700             }
9701             case 0x13: /* MUL, PMUL */
9702                 if (u) {
9703                     /* PMUL */
9704                     assert(size == 0);
9705                     genfn = gen_helper_neon_mul_p8;
9706                     break;
9707                 }
9708                 /* fall through : MUL */
9709             case 0x12: /* MLA, MLS */
9710             {
9711                 static NeonGenTwoOpFn * const fns[3] = {
9712                     gen_helper_neon_mul_u8,
9713                     gen_helper_neon_mul_u16,
9714                     tcg_gen_mul_i32,
9715                 };
9716                 genfn = fns[size];
9717                 break;
9718             }
9719             case 0x16: /* SQDMULH, SQRDMULH */
9720             {
9721                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9722                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9723                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9724                 };
9725                 assert(size == 1 || size == 2);
9726                 genenvfn = fns[size - 1][u];
9727                 break;
9728             }
9729             default:
9730                 g_assert_not_reached();
9731             }
9732
9733             if (genenvfn) {
9734                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9735             } else {
9736                 genfn(tcg_res, tcg_op1, tcg_op2);
9737             }
9738
9739             if (opcode == 0xf || opcode == 0x12) {
9740                 /* SABA, UABA, MLA, MLS: accumulating ops */
9741                 static NeonGenTwoOpFn * const fns[3][2] = {
9742                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9743                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9744                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9745                 };
9746                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9747
9748                 genfn = fns[size][is_sub];
9749                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9750                 genfn(tcg_res, tcg_op1, tcg_res);
9751             }
9752
9753             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9754
9755             tcg_temp_free_i32(tcg_res);
9756             tcg_temp_free_i32(tcg_op1);
9757             tcg_temp_free_i32(tcg_op2);
9758         }
9759     }
9760
9761     if (!is_q) {
9762         clear_vec_high(s, rd);
9763     }
9764 }
9765
9766 /* AdvSIMD three same
9767  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9768  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9769  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9770  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9771  */
9772 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9773 {
9774     int opcode = extract32(insn, 11, 5);
9775
9776     switch (opcode) {
9777     case 0x3: /* logic ops */
9778         disas_simd_3same_logic(s, insn);
9779         break;
9780     case 0x17: /* ADDP */
9781     case 0x14: /* SMAXP, UMAXP */
9782     case 0x15: /* SMINP, UMINP */
9783     {
9784         /* Pairwise operations */
9785         int is_q = extract32(insn, 30, 1);
9786         int u = extract32(insn, 29, 1);
9787         int size = extract32(insn, 22, 2);
9788         int rm = extract32(insn, 16, 5);
9789         int rn = extract32(insn, 5, 5);
9790         int rd = extract32(insn, 0, 5);
9791         if (opcode == 0x17) {
9792             if (u || (size == 3 && !is_q)) {
9793                 unallocated_encoding(s);
9794                 return;
9795             }
9796         } else {
9797             if (size == 3) {
9798                 unallocated_encoding(s);
9799                 return;
9800             }
9801         }
9802         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9803         break;
9804     }
9805     case 0x18 ... 0x31:
9806         /* floating point ops, sz[1] and U are part of opcode */
9807         disas_simd_3same_float(s, insn);
9808         break;
9809     default:
9810         disas_simd_3same_int(s, insn);
9811         break;
9812     }
9813 }
9814
9815 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9816                                   int size, int rn, int rd)
9817 {
9818     /* Handle 2-reg-misc ops which are widening (so each size element
9819      * in the source becomes a 2*size element in the destination.
9820      * The only instruction like this is FCVTL.
9821      */
9822     int pass;
9823
9824     if (size == 3) {
9825         /* 32 -> 64 bit fp conversion */
9826         TCGv_i64 tcg_res[2];
9827         int srcelt = is_q ? 2 : 0;
9828
9829         for (pass = 0; pass < 2; pass++) {
9830             TCGv_i32 tcg_op = tcg_temp_new_i32();
9831             tcg_res[pass] = tcg_temp_new_i64();
9832
9833             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9834             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9835             tcg_temp_free_i32(tcg_op);
9836         }
9837         for (pass = 0; pass < 2; pass++) {
9838             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9839             tcg_temp_free_i64(tcg_res[pass]);
9840         }
9841     } else {
9842         /* 16 -> 32 bit fp conversion */
9843         int srcelt = is_q ? 4 : 0;
9844         TCGv_i32 tcg_res[4];
9845
9846         for (pass = 0; pass < 4; pass++) {
9847             tcg_res[pass] = tcg_temp_new_i32();
9848
9849             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9850             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9851                                            cpu_env);
9852         }
9853         for (pass = 0; pass < 4; pass++) {
9854             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9855             tcg_temp_free_i32(tcg_res[pass]);
9856         }
9857     }
9858 }
9859
9860 static void handle_rev(DisasContext *s, int opcode, bool u,
9861                        bool is_q, int size, int rn, int rd)
9862 {
9863     int op = (opcode << 1) | u;
9864     int opsz = op + size;
9865     int grp_size = 3 - opsz;
9866     int dsize = is_q ? 128 : 64;
9867     int i;
9868
9869     if (opsz >= 3) {
9870         unallocated_encoding(s);
9871         return;
9872     }
9873
9874     if (!fp_access_check(s)) {
9875         return;
9876     }
9877
9878     if (size == 0) {
9879         /* Special case bytes, use bswap op on each group of elements */
9880         int groups = dsize / (8 << grp_size);
9881
9882         for (i = 0; i < groups; i++) {
9883             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9884
9885             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9886             switch (grp_size) {
9887             case MO_16:
9888                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9889                 break;
9890             case MO_32:
9891                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9892                 break;
9893             case MO_64:
9894                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9895                 break;
9896             default:
9897                 g_assert_not_reached();
9898             }
9899             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9900             tcg_temp_free_i64(tcg_tmp);
9901         }
9902         if (!is_q) {
9903             clear_vec_high(s, rd);
9904         }
9905     } else {
9906         int revmask = (1 << grp_size) - 1;
9907         int esize = 8 << size;
9908         int elements = dsize / esize;
9909         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9910         TCGv_i64 tcg_rd = tcg_const_i64(0);
9911         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9912
9913         for (i = 0; i < elements; i++) {
9914             int e_rev = (i & 0xf) ^ revmask;
9915             int off = e_rev * esize;
9916             read_vec_element(s, tcg_rn, rn, i, size);
9917             if (off >= 64) {
9918                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9919                                     tcg_rn, off - 64, esize);
9920             } else {
9921                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9922             }
9923         }
9924         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9925         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9926
9927         tcg_temp_free_i64(tcg_rd_hi);
9928         tcg_temp_free_i64(tcg_rd);
9929         tcg_temp_free_i64(tcg_rn);
9930     }
9931 }
9932
9933 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9934                                   bool is_q, int size, int rn, int rd)
9935 {
9936     /* Implement the pairwise operations from 2-misc:
9937      * SADDLP, UADDLP, SADALP, UADALP.
9938      * These all add pairs of elements in the input to produce a
9939      * double-width result element in the output (possibly accumulating).
9940      */
9941     bool accum = (opcode == 0x6);
9942     int maxpass = is_q ? 2 : 1;
9943     int pass;
9944     TCGv_i64 tcg_res[2];
9945
9946     if (size == 2) {
9947         /* 32 + 32 -> 64 op */
9948         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9949
9950         for (pass = 0; pass < maxpass; pass++) {
9951             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9952             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9953
9954             tcg_res[pass] = tcg_temp_new_i64();
9955
9956             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9957             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9958             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9959             if (accum) {
9960                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9961                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9962             }
9963
9964             tcg_temp_free_i64(tcg_op1);
9965             tcg_temp_free_i64(tcg_op2);
9966         }
9967     } else {
9968         for (pass = 0; pass < maxpass; pass++) {
9969             TCGv_i64 tcg_op = tcg_temp_new_i64();
9970             NeonGenOneOpFn *genfn;
9971             static NeonGenOneOpFn * const fns[2][2] = {
9972                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9973                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9974             };
9975
9976             genfn = fns[size][u];
9977
9978             tcg_res[pass] = tcg_temp_new_i64();
9979
9980             read_vec_element(s, tcg_op, rn, pass, MO_64);
9981             genfn(tcg_res[pass], tcg_op);
9982
9983             if (accum) {
9984                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9985                 if (size == 0) {
9986                     gen_helper_neon_addl_u16(tcg_res[pass],
9987                                              tcg_res[pass], tcg_op);
9988                 } else {
9989                     gen_helper_neon_addl_u32(tcg_res[pass],
9990                                              tcg_res[pass], tcg_op);
9991                 }
9992             }
9993             tcg_temp_free_i64(tcg_op);
9994         }
9995     }
9996     if (!is_q) {
9997         tcg_res[1] = tcg_const_i64(0);
9998     }
9999     for (pass = 0; pass < 2; pass++) {
10000         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10001         tcg_temp_free_i64(tcg_res[pass]);
10002     }
10003 }
10004
10005 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
10006 {
10007     /* Implement SHLL and SHLL2 */
10008     int pass;
10009     int part = is_q ? 2 : 0;
10010     TCGv_i64 tcg_res[2];
10011
10012     for (pass = 0; pass < 2; pass++) {
10013         static NeonGenWidenFn * const widenfns[3] = {
10014             gen_helper_neon_widen_u8,
10015             gen_helper_neon_widen_u16,
10016             tcg_gen_extu_i32_i64,
10017         };
10018         NeonGenWidenFn *widenfn = widenfns[size];
10019         TCGv_i32 tcg_op = tcg_temp_new_i32();
10020
10021         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
10022         tcg_res[pass] = tcg_temp_new_i64();
10023         widenfn(tcg_res[pass], tcg_op);
10024         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
10025
10026         tcg_temp_free_i32(tcg_op);
10027     }
10028
10029     for (pass = 0; pass < 2; pass++) {
10030         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10031         tcg_temp_free_i64(tcg_res[pass]);
10032     }
10033 }
10034
10035 /* AdvSIMD two reg misc
10036  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10037  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
10038  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10039  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
10040  */
10041 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
10042 {
10043     int size = extract32(insn, 22, 2);
10044     int opcode = extract32(insn, 12, 5);
10045     bool u = extract32(insn, 29, 1);
10046     bool is_q = extract32(insn, 30, 1);
10047     int rn = extract32(insn, 5, 5);
10048     int rd = extract32(insn, 0, 5);
10049     bool need_fpstatus = false;
10050     bool need_rmode = false;
10051     int rmode = -1;
10052     TCGv_i32 tcg_rmode;
10053     TCGv_ptr tcg_fpstatus;
10054
10055     switch (opcode) {
10056     case 0x0: /* REV64, REV32 */
10057     case 0x1: /* REV16 */
10058         handle_rev(s, opcode, u, is_q, size, rn, rd);
10059         return;
10060     case 0x5: /* CNT, NOT, RBIT */
10061         if (u && size == 0) {
10062             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
10063             size = 3;
10064             break;
10065         } else if (u && size == 1) {
10066             /* RBIT */
10067             break;
10068         } else if (!u && size == 0) {
10069             /* CNT */
10070             break;
10071         }
10072         unallocated_encoding(s);
10073         return;
10074     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
10075     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
10076         if (size == 3) {
10077             unallocated_encoding(s);
10078             return;
10079         }
10080         if (!fp_access_check(s)) {
10081             return;
10082         }
10083
10084         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
10085         return;
10086     case 0x4: /* CLS, CLZ */
10087         if (size == 3) {
10088             unallocated_encoding(s);
10089             return;
10090         }
10091         break;
10092     case 0x2: /* SADDLP, UADDLP */
10093     case 0x6: /* SADALP, UADALP */
10094         if (size == 3) {
10095             unallocated_encoding(s);
10096             return;
10097         }
10098         if (!fp_access_check(s)) {
10099             return;
10100         }
10101         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
10102         return;
10103     case 0x13: /* SHLL, SHLL2 */
10104         if (u == 0 || size == 3) {
10105             unallocated_encoding(s);
10106             return;
10107         }
10108         if (!fp_access_check(s)) {
10109             return;
10110         }
10111         handle_shll(s, is_q, size, rn, rd);
10112         return;
10113     case 0xa: /* CMLT */
10114         if (u == 1) {
10115             unallocated_encoding(s);
10116             return;
10117         }
10118         /* fall through */
10119     case 0x8: /* CMGT, CMGE */
10120     case 0x9: /* CMEQ, CMLE */
10121     case 0xb: /* ABS, NEG */
10122         if (size == 3 && !is_q) {
10123             unallocated_encoding(s);
10124             return;
10125         }
10126         break;
10127     case 0x3: /* SUQADD, USQADD */
10128         if (size == 3 && !is_q) {
10129             unallocated_encoding(s);
10130             return;
10131         }
10132         if (!fp_access_check(s)) {
10133             return;
10134         }
10135         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
10136         return;
10137     case 0x7: /* SQABS, SQNEG */
10138         if (size == 3 && !is_q) {
10139             unallocated_encoding(s);
10140             return;
10141         }
10142         break;
10143     case 0xc ... 0xf:
10144     case 0x16 ... 0x1d:
10145     case 0x1f:
10146     {
10147         /* Floating point: U, size[1] and opcode indicate operation;
10148          * size[0] indicates single or double precision.
10149          */
10150         int is_double = extract32(size, 0, 1);
10151         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10152         size = is_double ? 3 : 2;
10153         switch (opcode) {
10154         case 0x2f: /* FABS */
10155         case 0x6f: /* FNEG */
10156             if (size == 3 && !is_q) {
10157                 unallocated_encoding(s);
10158                 return;
10159             }
10160             break;
10161         case 0x1d: /* SCVTF */
10162         case 0x5d: /* UCVTF */
10163         {
10164             bool is_signed = (opcode == 0x1d) ? true : false;
10165             int elements = is_double ? 2 : is_q ? 4 : 2;
10166             if (is_double && !is_q) {
10167                 unallocated_encoding(s);
10168                 return;
10169             }
10170             if (!fp_access_check(s)) {
10171                 return;
10172             }
10173             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10174             return;
10175         }
10176         case 0x2c: /* FCMGT (zero) */
10177         case 0x2d: /* FCMEQ (zero) */
10178         case 0x2e: /* FCMLT (zero) */
10179         case 0x6c: /* FCMGE (zero) */
10180         case 0x6d: /* FCMLE (zero) */
10181             if (size == 3 && !is_q) {
10182                 unallocated_encoding(s);
10183                 return;
10184             }
10185             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10186             return;
10187         case 0x7f: /* FSQRT */
10188             if (size == 3 && !is_q) {
10189                 unallocated_encoding(s);
10190                 return;
10191             }
10192             break;
10193         case 0x1a: /* FCVTNS */
10194         case 0x1b: /* FCVTMS */
10195         case 0x3a: /* FCVTPS */
10196         case 0x3b: /* FCVTZS */
10197         case 0x5a: /* FCVTNU */
10198         case 0x5b: /* FCVTMU */
10199         case 0x7a: /* FCVTPU */
10200         case 0x7b: /* FCVTZU */
10201             need_fpstatus = true;
10202             need_rmode = true;
10203             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10204             if (size == 3 && !is_q) {
10205                 unallocated_encoding(s);
10206                 return;
10207             }
10208             break;
10209         case 0x5c: /* FCVTAU */
10210         case 0x1c: /* FCVTAS */
10211             need_fpstatus = true;
10212             need_rmode = true;
10213             rmode = FPROUNDING_TIEAWAY;
10214             if (size == 3 && !is_q) {
10215                 unallocated_encoding(s);
10216                 return;
10217             }
10218             break;
10219         case 0x3c: /* URECPE */
10220             if (size == 3) {
10221                 unallocated_encoding(s);
10222                 return;
10223             }
10224             /* fall through */
10225         case 0x3d: /* FRECPE */
10226         case 0x7d: /* FRSQRTE */
10227             if (size == 3 && !is_q) {
10228                 unallocated_encoding(s);
10229                 return;
10230             }
10231             if (!fp_access_check(s)) {
10232                 return;
10233             }
10234             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10235             return;
10236         case 0x56: /* FCVTXN, FCVTXN2 */
10237             if (size == 2) {
10238                 unallocated_encoding(s);
10239                 return;
10240             }
10241             /* fall through */
10242         case 0x16: /* FCVTN, FCVTN2 */
10243             /* handle_2misc_narrow does a 2*size -> size operation, but these
10244              * instructions encode the source size rather than dest size.
10245              */
10246             if (!fp_access_check(s)) {
10247                 return;
10248             }
10249             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10250             return;
10251         case 0x17: /* FCVTL, FCVTL2 */
10252             if (!fp_access_check(s)) {
10253                 return;
10254             }
10255             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10256             return;
10257         case 0x18: /* FRINTN */
10258         case 0x19: /* FRINTM */
10259         case 0x38: /* FRINTP */
10260         case 0x39: /* FRINTZ */
10261             need_rmode = true;
10262             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10263             /* fall through */
10264         case 0x59: /* FRINTX */
10265         case 0x79: /* FRINTI */
10266             need_fpstatus = true;
10267             if (size == 3 && !is_q) {
10268                 unallocated_encoding(s);
10269                 return;
10270             }
10271             break;
10272         case 0x58: /* FRINTA */
10273             need_rmode = true;
10274             rmode = FPROUNDING_TIEAWAY;
10275             need_fpstatus = true;
10276             if (size == 3 && !is_q) {
10277                 unallocated_encoding(s);
10278                 return;
10279             }
10280             break;
10281         case 0x7c: /* URSQRTE */
10282             if (size == 3) {
10283                 unallocated_encoding(s);
10284                 return;
10285             }
10286             need_fpstatus = true;
10287             break;
10288         default:
10289             unallocated_encoding(s);
10290             return;
10291         }
10292         break;
10293     }
10294     default:
10295         unallocated_encoding(s);
10296         return;
10297     }
10298
10299     if (!fp_access_check(s)) {
10300         return;
10301     }
10302
10303     if (need_fpstatus) {
10304         tcg_fpstatus = get_fpstatus_ptr();
10305     } else {
10306         tcg_fpstatus = NULL;
10307     }
10308     if (need_rmode) {
10309         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10310         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10311     } else {
10312         tcg_rmode = NULL;
10313     }
10314
10315     if (size == 3) {
10316         /* All 64-bit element operations can be shared with scalar 2misc */
10317         int pass;
10318
10319         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10320             TCGv_i64 tcg_op = tcg_temp_new_i64();
10321             TCGv_i64 tcg_res = tcg_temp_new_i64();
10322
10323             read_vec_element(s, tcg_op, rn, pass, MO_64);
10324
10325             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10326                             tcg_rmode, tcg_fpstatus);
10327
10328             write_vec_element(s, tcg_res, rd, pass, MO_64);
10329
10330             tcg_temp_free_i64(tcg_res);
10331             tcg_temp_free_i64(tcg_op);
10332         }
10333     } else {
10334         int pass;
10335
10336         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10337             TCGv_i32 tcg_op = tcg_temp_new_i32();
10338             TCGv_i32 tcg_res = tcg_temp_new_i32();
10339             TCGCond cond;
10340
10341             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10342
10343             if (size == 2) {
10344                 /* Special cases for 32 bit elements */
10345                 switch (opcode) {
10346                 case 0xa: /* CMLT */
10347                     /* 32 bit integer comparison against zero, result is
10348                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10349                      * and inverting.
10350                      */
10351                     cond = TCG_COND_LT;
10352                 do_cmop:
10353                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10354                     tcg_gen_neg_i32(tcg_res, tcg_res);
10355                     break;
10356                 case 0x8: /* CMGT, CMGE */
10357                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10358                     goto do_cmop;
10359                 case 0x9: /* CMEQ, CMLE */
10360                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10361                     goto do_cmop;
10362                 case 0x4: /* CLS */
10363                     if (u) {
10364                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
10365                     } else {
10366                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
10367                     }
10368                     break;
10369                 case 0x7: /* SQABS, SQNEG */
10370                     if (u) {
10371                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10372                     } else {
10373                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10374                     }
10375                     break;
10376                 case 0xb: /* ABS, NEG */
10377                     if (u) {
10378                         tcg_gen_neg_i32(tcg_res, tcg_op);
10379                     } else {
10380                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10381                         tcg_gen_neg_i32(tcg_res, tcg_op);
10382                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10383                                             tcg_zero, tcg_op, tcg_res);
10384                         tcg_temp_free_i32(tcg_zero);
10385                     }
10386                     break;
10387                 case 0x2f: /* FABS */
10388                     gen_helper_vfp_abss(tcg_res, tcg_op);
10389                     break;
10390                 case 0x6f: /* FNEG */
10391                     gen_helper_vfp_negs(tcg_res, tcg_op);
10392                     break;
10393                 case 0x7f: /* FSQRT */
10394                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10395                     break;
10396                 case 0x1a: /* FCVTNS */
10397                 case 0x1b: /* FCVTMS */
10398                 case 0x1c: /* FCVTAS */
10399                 case 0x3a: /* FCVTPS */
10400                 case 0x3b: /* FCVTZS */
10401                 {
10402                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10403                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10404                                          tcg_shift, tcg_fpstatus);
10405                     tcg_temp_free_i32(tcg_shift);
10406                     break;
10407                 }
10408                 case 0x5a: /* FCVTNU */
10409                 case 0x5b: /* FCVTMU */
10410                 case 0x5c: /* FCVTAU */
10411                 case 0x7a: /* FCVTPU */
10412                 case 0x7b: /* FCVTZU */
10413                 {
10414                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10415                     gen_helper_vfp_touls(tcg_res, tcg_op,
10416                                          tcg_shift, tcg_fpstatus);
10417                     tcg_temp_free_i32(tcg_shift);
10418                     break;
10419                 }
10420                 case 0x18: /* FRINTN */
10421                 case 0x19: /* FRINTM */
10422                 case 0x38: /* FRINTP */
10423                 case 0x39: /* FRINTZ */
10424                 case 0x58: /* FRINTA */
10425                 case 0x79: /* FRINTI */
10426                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10427                     break;
10428                 case 0x59: /* FRINTX */
10429                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10430                     break;
10431                 case 0x7c: /* URSQRTE */
10432                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10433                     break;
10434                 default:
10435                     g_assert_not_reached();
10436                 }
10437             } else {
10438                 /* Use helpers for 8 and 16 bit elements */
10439                 switch (opcode) {
10440                 case 0x5: /* CNT, RBIT */
10441                     /* For these two insns size is part of the opcode specifier
10442                      * (handled earlier); they always operate on byte elements.
10443                      */
10444                     if (u) {
10445                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10446                     } else {
10447                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10448                     }
10449                     break;
10450                 case 0x7: /* SQABS, SQNEG */
10451                 {
10452                     NeonGenOneOpEnvFn *genfn;
10453                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10454                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10455                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10456                     };
10457                     genfn = fns[size][u];
10458                     genfn(tcg_res, cpu_env, tcg_op);
10459                     break;
10460                 }
10461                 case 0x8: /* CMGT, CMGE */
10462                 case 0x9: /* CMEQ, CMLE */
10463                 case 0xa: /* CMLT */
10464                 {
10465                     static NeonGenTwoOpFn * const fns[3][2] = {
10466                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10467                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10468                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10469                     };
10470                     NeonGenTwoOpFn *genfn;
10471                     int comp;
10472                     bool reverse;
10473                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10474
10475                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10476                     comp = (opcode - 0x8) * 2 + u;
10477                     /* ...but LE, LT are implemented as reverse GE, GT */
10478                     reverse = (comp > 2);
10479                     if (reverse) {
10480                         comp = 4 - comp;
10481                     }
10482                     genfn = fns[comp][size];
10483                     if (reverse) {
10484                         genfn(tcg_res, tcg_zero, tcg_op);
10485                     } else {
10486                         genfn(tcg_res, tcg_op, tcg_zero);
10487                     }
10488                     tcg_temp_free_i32(tcg_zero);
10489                     break;
10490                 }
10491                 case 0xb: /* ABS, NEG */
10492                     if (u) {
10493                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10494                         if (size) {
10495                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10496                         } else {
10497                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10498                         }
10499                         tcg_temp_free_i32(tcg_zero);
10500                     } else {
10501                         if (size) {
10502                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10503                         } else {
10504                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10505                         }
10506                     }
10507                     break;
10508                 case 0x4: /* CLS, CLZ */
10509                     if (u) {
10510                         if (size == 0) {
10511                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10512                         } else {
10513                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10514                         }
10515                     } else {
10516                         if (size == 0) {
10517                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10518                         } else {
10519                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10520                         }
10521                     }
10522                     break;
10523                 default:
10524                     g_assert_not_reached();
10525                 }
10526             }
10527
10528             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10529
10530             tcg_temp_free_i32(tcg_res);
10531             tcg_temp_free_i32(tcg_op);
10532         }
10533     }
10534     if (!is_q) {
10535         clear_vec_high(s, rd);
10536     }
10537
10538     if (need_rmode) {
10539         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10540         tcg_temp_free_i32(tcg_rmode);
10541     }
10542     if (need_fpstatus) {
10543         tcg_temp_free_ptr(tcg_fpstatus);
10544     }
10545 }
10546
10547 /* AdvSIMD scalar x indexed element
10548  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10549  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10550  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10551  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10552  * AdvSIMD vector x indexed element
10553  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10554  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10555  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10556  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10557  */
10558 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10559 {
10560     /* This encoding has two kinds of instruction:
10561      *  normal, where we perform elt x idxelt => elt for each
10562      *     element in the vector
10563      *  long, where we perform elt x idxelt and generate a result of
10564      *     double the width of the input element
10565      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10566      */
10567     bool is_scalar = extract32(insn, 28, 1);
10568     bool is_q = extract32(insn, 30, 1);
10569     bool u = extract32(insn, 29, 1);
10570     int size = extract32(insn, 22, 2);
10571     int l = extract32(insn, 21, 1);
10572     int m = extract32(insn, 20, 1);
10573     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10574     int rm = extract32(insn, 16, 4);
10575     int opcode = extract32(insn, 12, 4);
10576     int h = extract32(insn, 11, 1);
10577     int rn = extract32(insn, 5, 5);
10578     int rd = extract32(insn, 0, 5);
10579     bool is_long = false;
10580     bool is_fp = false;
10581     int index;
10582     TCGv_ptr fpst;
10583
10584     switch (opcode) {
10585     case 0x0: /* MLA */
10586     case 0x4: /* MLS */
10587         if (!u || is_scalar) {
10588             unallocated_encoding(s);
10589             return;
10590         }
10591         break;
10592     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10593     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10594     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10595         if (is_scalar) {
10596             unallocated_encoding(s);
10597             return;
10598         }
10599         is_long = true;
10600         break;
10601     case 0x3: /* SQDMLAL, SQDMLAL2 */
10602     case 0x7: /* SQDMLSL, SQDMLSL2 */
10603     case 0xb: /* SQDMULL, SQDMULL2 */
10604         is_long = true;
10605         /* fall through */
10606     case 0xc: /* SQDMULH */
10607     case 0xd: /* SQRDMULH */
10608         if (u) {
10609             unallocated_encoding(s);
10610             return;
10611         }
10612         break;
10613     case 0x8: /* MUL */
10614         if (u || is_scalar) {
10615             unallocated_encoding(s);
10616             return;
10617         }
10618         break;
10619     case 0x1: /* FMLA */
10620     case 0x5: /* FMLS */
10621         if (u) {
10622             unallocated_encoding(s);
10623             return;
10624         }
10625         /* fall through */
10626     case 0x9: /* FMUL, FMULX */
10627         if (!extract32(size, 1, 1)) {
10628             unallocated_encoding(s);
10629             return;
10630         }
10631         is_fp = true;
10632         break;
10633     default:
10634         unallocated_encoding(s);
10635         return;
10636     }
10637
10638     if (is_fp) {
10639         /* low bit of size indicates single/double */
10640         size = extract32(size, 0, 1) ? 3 : 2;
10641         if (size == 2) {
10642             index = h << 1 | l;
10643         } else {
10644             if (l || !is_q) {
10645                 unallocated_encoding(s);
10646                 return;
10647             }
10648             index = h;
10649         }
10650         rm |= (m << 4);
10651     } else {
10652         switch (size) {
10653         case 1:
10654             index = h << 2 | l << 1 | m;
10655             break;
10656         case 2:
10657             index = h << 1 | l;
10658             rm |= (m << 4);
10659             break;
10660         default:
10661             unallocated_encoding(s);
10662             return;
10663         }
10664     }
10665
10666     if (!fp_access_check(s)) {
10667         return;
10668     }
10669
10670     if (is_fp) {
10671         fpst = get_fpstatus_ptr();
10672     } else {
10673         fpst = NULL;
10674     }
10675
10676     if (size == 3) {
10677         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10678         int pass;
10679
10680         assert(is_fp && is_q && !is_long);
10681
10682         read_vec_element(s, tcg_idx, rm, index, MO_64);
10683
10684         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10685             TCGv_i64 tcg_op = tcg_temp_new_i64();
10686             TCGv_i64 tcg_res = tcg_temp_new_i64();
10687
10688             read_vec_element(s, tcg_op, rn, pass, MO_64);
10689
10690             switch (opcode) {
10691             case 0x5: /* FMLS */
10692                 /* As usual for ARM, separate negation for fused multiply-add */
10693                 gen_helper_vfp_negd(tcg_op, tcg_op);
10694                 /* fall through */
10695             case 0x1: /* FMLA */
10696                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10697                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10698                 break;
10699             case 0x9: /* FMUL, FMULX */
10700                 if (u) {
10701                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10702                 } else {
10703                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10704                 }
10705                 break;
10706             default:
10707                 g_assert_not_reached();
10708             }
10709
10710             write_vec_element(s, tcg_res, rd, pass, MO_64);
10711             tcg_temp_free_i64(tcg_op);
10712             tcg_temp_free_i64(tcg_res);
10713         }
10714
10715         if (is_scalar) {
10716             clear_vec_high(s, rd);
10717         }
10718
10719         tcg_temp_free_i64(tcg_idx);
10720     } else if (!is_long) {
10721         /* 32 bit floating point, or 16 or 32 bit integer.
10722          * For the 16 bit scalar case we use the usual Neon helpers and
10723          * rely on the fact that 0 op 0 == 0 with no side effects.
10724          */
10725         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10726         int pass, maxpasses;
10727
10728         if (is_scalar) {
10729             maxpasses = 1;
10730         } else {
10731             maxpasses = is_q ? 4 : 2;
10732         }
10733
10734         read_vec_element_i32(s, tcg_idx, rm, index, size);
10735
10736         if (size == 1 && !is_scalar) {
10737             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10738              * the index into both halves of the 32 bit tcg_idx and then use
10739              * the usual Neon helpers.
10740              */
10741             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10742         }
10743
10744         for (pass = 0; pass < maxpasses; pass++) {
10745             TCGv_i32 tcg_op = tcg_temp_new_i32();
10746             TCGv_i32 tcg_res = tcg_temp_new_i32();
10747
10748             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10749
10750             switch (opcode) {
10751             case 0x0: /* MLA */
10752             case 0x4: /* MLS */
10753             case 0x8: /* MUL */
10754             {
10755                 static NeonGenTwoOpFn * const fns[2][2] = {
10756                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10757                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10758                 };
10759                 NeonGenTwoOpFn *genfn;
10760                 bool is_sub = opcode == 0x4;
10761
10762                 if (size == 1) {
10763                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10764                 } else {
10765                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10766                 }
10767                 if (opcode == 0x8) {
10768                     break;
10769                 }
10770                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10771                 genfn = fns[size - 1][is_sub];
10772                 genfn(tcg_res, tcg_op, tcg_res);
10773                 break;
10774             }
10775             case 0x5: /* FMLS */
10776                 /* As usual for ARM, separate negation for fused multiply-add */
10777                 gen_helper_vfp_negs(tcg_op, tcg_op);
10778                 /* fall through */
10779             case 0x1: /* FMLA */
10780                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10781                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10782                 break;
10783             case 0x9: /* FMUL, FMULX */
10784                 if (u) {
10785                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10786                 } else {
10787                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10788                 }
10789                 break;
10790             case 0xc: /* SQDMULH */
10791                 if (size == 1) {
10792                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10793                                                tcg_op, tcg_idx);
10794                 } else {
10795                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10796                                                tcg_op, tcg_idx);
10797                 }
10798                 break;
10799             case 0xd: /* SQRDMULH */
10800                 if (size == 1) {
10801                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10802                                                 tcg_op, tcg_idx);
10803                 } else {
10804                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10805                                                 tcg_op, tcg_idx);
10806                 }
10807                 break;
10808             default:
10809                 g_assert_not_reached();
10810             }
10811
10812             if (is_scalar) {
10813                 write_fp_sreg(s, rd, tcg_res);
10814             } else {
10815                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10816             }
10817
10818             tcg_temp_free_i32(tcg_op);
10819             tcg_temp_free_i32(tcg_res);
10820         }
10821
10822         tcg_temp_free_i32(tcg_idx);
10823
10824         if (!is_q) {
10825             clear_vec_high(s, rd);
10826         }
10827     } else {
10828         /* long ops: 16x16->32 or 32x32->64 */
10829         TCGv_i64 tcg_res[2];
10830         int pass;
10831         bool satop = extract32(opcode, 0, 1);
10832         TCGMemOp memop = MO_32;
10833
10834         if (satop || !u) {
10835             memop |= MO_SIGN;
10836         }
10837
10838         if (size == 2) {
10839             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10840
10841             read_vec_element(s, tcg_idx, rm, index, memop);
10842
10843             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10844                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10845                 TCGv_i64 tcg_passres;
10846                 int passelt;
10847
10848                 if (is_scalar) {
10849                     passelt = 0;
10850                 } else {
10851                     passelt = pass + (is_q * 2);
10852                 }
10853
10854                 read_vec_element(s, tcg_op, rn, passelt, memop);
10855
10856                 tcg_res[pass] = tcg_temp_new_i64();
10857
10858                 if (opcode == 0xa || opcode == 0xb) {
10859                     /* Non-accumulating ops */
10860                     tcg_passres = tcg_res[pass];
10861                 } else {
10862                     tcg_passres = tcg_temp_new_i64();
10863                 }
10864
10865                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10866                 tcg_temp_free_i64(tcg_op);
10867
10868                 if (satop) {
10869                     /* saturating, doubling */
10870                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10871                                                       tcg_passres, tcg_passres);
10872                 }
10873
10874                 if (opcode == 0xa || opcode == 0xb) {
10875                     continue;
10876                 }
10877
10878                 /* Accumulating op: handle accumulate step */
10879                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10880
10881                 switch (opcode) {
10882                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10883                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10884                     break;
10885                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10886                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10887                     break;
10888                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10889                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10890                     /* fall through */
10891                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10892                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10893                                                       tcg_res[pass],
10894                                                       tcg_passres);
10895                     break;
10896                 default:
10897                     g_assert_not_reached();
10898                 }
10899                 tcg_temp_free_i64(tcg_passres);
10900             }
10901             tcg_temp_free_i64(tcg_idx);
10902
10903             if (is_scalar) {
10904                 clear_vec_high(s, rd);
10905             }
10906         } else {
10907             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10908
10909             assert(size == 1);
10910             read_vec_element_i32(s, tcg_idx, rm, index, size);
10911
10912             if (!is_scalar) {
10913                 /* The simplest way to handle the 16x16 indexed ops is to
10914                  * duplicate the index into both halves of the 32 bit tcg_idx
10915                  * and then use the usual Neon helpers.
10916                  */
10917                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10918             }
10919
10920             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10921                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10922                 TCGv_i64 tcg_passres;
10923
10924                 if (is_scalar) {
10925                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10926                 } else {
10927                     read_vec_element_i32(s, tcg_op, rn,
10928                                          pass + (is_q * 2), MO_32);
10929                 }
10930
10931                 tcg_res[pass] = tcg_temp_new_i64();
10932
10933                 if (opcode == 0xa || opcode == 0xb) {
10934                     /* Non-accumulating ops */
10935                     tcg_passres = tcg_res[pass];
10936                 } else {
10937                     tcg_passres = tcg_temp_new_i64();
10938                 }
10939
10940                 if (memop & MO_SIGN) {
10941                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10942                 } else {
10943                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10944                 }
10945                 if (satop) {
10946                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10947                                                       tcg_passres, tcg_passres);
10948                 }
10949                 tcg_temp_free_i32(tcg_op);
10950
10951                 if (opcode == 0xa || opcode == 0xb) {
10952                     continue;
10953                 }
10954
10955                 /* Accumulating op: handle accumulate step */
10956                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10957
10958                 switch (opcode) {
10959                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10960                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10961                                              tcg_passres);
10962                     break;
10963                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10964                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10965                                              tcg_passres);
10966                     break;
10967                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10968                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10969                     /* fall through */
10970                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10971                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10972                                                       tcg_res[pass],
10973                                                       tcg_passres);
10974                     break;
10975                 default:
10976                     g_assert_not_reached();
10977                 }
10978                 tcg_temp_free_i64(tcg_passres);
10979             }
10980             tcg_temp_free_i32(tcg_idx);
10981
10982             if (is_scalar) {
10983                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10984             }
10985         }
10986
10987         if (is_scalar) {
10988             tcg_res[1] = tcg_const_i64(0);
10989         }
10990
10991         for (pass = 0; pass < 2; pass++) {
10992             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10993             tcg_temp_free_i64(tcg_res[pass]);
10994         }
10995     }
10996
10997     if (fpst) {
10998         tcg_temp_free_ptr(fpst);
10999     }
11000 }
11001
11002 /* Crypto AES
11003  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
11004  * +-----------------+------+-----------+--------+-----+------+------+
11005  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11006  * +-----------------+------+-----------+--------+-----+------+------+
11007  */
11008 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
11009 {
11010     int size = extract32(insn, 22, 2);
11011     int opcode = extract32(insn, 12, 5);
11012     int rn = extract32(insn, 5, 5);
11013     int rd = extract32(insn, 0, 5);
11014     int decrypt;
11015     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
11016     TCGv_i32 tcg_decrypt;
11017     CryptoThreeOpIntFn *genfn;
11018
11019     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
11020         || size != 0) {
11021         unallocated_encoding(s);
11022         return;
11023     }
11024
11025     switch (opcode) {
11026     case 0x4: /* AESE */
11027         decrypt = 0;
11028         genfn = gen_helper_crypto_aese;
11029         break;
11030     case 0x6: /* AESMC */
11031         decrypt = 0;
11032         genfn = gen_helper_crypto_aesmc;
11033         break;
11034     case 0x5: /* AESD */
11035         decrypt = 1;
11036         genfn = gen_helper_crypto_aese;
11037         break;
11038     case 0x7: /* AESIMC */
11039         decrypt = 1;
11040         genfn = gen_helper_crypto_aesmc;
11041         break;
11042     default:
11043         unallocated_encoding(s);
11044         return;
11045     }
11046
11047     if (!fp_access_check(s)) {
11048         return;
11049     }
11050
11051     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
11052     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
11053     tcg_decrypt = tcg_const_i32(decrypt);
11054
11055     genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
11056
11057     tcg_temp_free_ptr(tcg_rd_ptr);
11058     tcg_temp_free_ptr(tcg_rn_ptr);
11059     tcg_temp_free_i32(tcg_decrypt);
11060 }
11061
11062 /* Crypto three-reg SHA
11063  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
11064  * +-----------------+------+---+------+---+--------+-----+------+------+
11065  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
11066  * +-----------------+------+---+------+---+--------+-----+------+------+
11067  */
11068 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
11069 {
11070     int size = extract32(insn, 22, 2);
11071     int opcode = extract32(insn, 12, 3);
11072     int rm = extract32(insn, 16, 5);
11073     int rn = extract32(insn, 5, 5);
11074     int rd = extract32(insn, 0, 5);
11075     CryptoThreeOpFn *genfn;
11076     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
11077     int feature = ARM_FEATURE_V8_SHA256;
11078
11079     if (size != 0) {
11080         unallocated_encoding(s);
11081         return;
11082     }
11083
11084     switch (opcode) {
11085     case 0: /* SHA1C */
11086     case 1: /* SHA1P */
11087     case 2: /* SHA1M */
11088     case 3: /* SHA1SU0 */
11089         genfn = NULL;
11090         feature = ARM_FEATURE_V8_SHA1;
11091         break;
11092     case 4: /* SHA256H */
11093         genfn = gen_helper_crypto_sha256h;
11094         break;
11095     case 5: /* SHA256H2 */
11096         genfn = gen_helper_crypto_sha256h2;
11097         break;
11098     case 6: /* SHA256SU1 */
11099         genfn = gen_helper_crypto_sha256su1;
11100         break;
11101     default:
11102         unallocated_encoding(s);
11103         return;
11104     }
11105
11106     if (!arm_dc_feature(s, feature)) {
11107         unallocated_encoding(s);
11108         return;
11109     }
11110
11111     if (!fp_access_check(s)) {
11112         return;
11113     }
11114
11115     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
11116     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
11117     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
11118
11119     if (genfn) {
11120         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
11121     } else {
11122         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
11123
11124         gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
11125                                     tcg_rm_ptr, tcg_opcode);
11126         tcg_temp_free_i32(tcg_opcode);
11127     }
11128
11129     tcg_temp_free_ptr(tcg_rd_ptr);
11130     tcg_temp_free_ptr(tcg_rn_ptr);
11131     tcg_temp_free_ptr(tcg_rm_ptr);
11132 }
11133
11134 /* Crypto two-reg SHA
11135  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
11136  * +-----------------+------+-----------+--------+-----+------+------+
11137  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11138  * +-----------------+------+-----------+--------+-----+------+------+
11139  */
11140 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
11141 {
11142     int size = extract32(insn, 22, 2);
11143     int opcode = extract32(insn, 12, 5);
11144     int rn = extract32(insn, 5, 5);
11145     int rd = extract32(insn, 0, 5);
11146     CryptoTwoOpFn *genfn;
11147     int feature;
11148     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
11149
11150     if (size != 0) {
11151         unallocated_encoding(s);
11152         return;
11153     }
11154
11155     switch (opcode) {
11156     case 0: /* SHA1H */
11157         feature = ARM_FEATURE_V8_SHA1;
11158         genfn = gen_helper_crypto_sha1h;
11159         break;
11160     case 1: /* SHA1SU1 */
11161         feature = ARM_FEATURE_V8_SHA1;
11162         genfn = gen_helper_crypto_sha1su1;
11163         break;
11164     case 2: /* SHA256SU0 */
11165         feature = ARM_FEATURE_V8_SHA256;
11166         genfn = gen_helper_crypto_sha256su0;
11167         break;
11168     default:
11169         unallocated_encoding(s);
11170         return;
11171     }
11172
11173     if (!arm_dc_feature(s, feature)) {
11174         unallocated_encoding(s);
11175         return;
11176     }
11177
11178     if (!fp_access_check(s)) {
11179         return;
11180     }
11181
11182     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
11183     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
11184
11185     genfn(tcg_rd_ptr, tcg_rn_ptr);
11186
11187     tcg_temp_free_ptr(tcg_rd_ptr);
11188     tcg_temp_free_ptr(tcg_rn_ptr);
11189 }
11190
11191 /* C3.6 Data processing - SIMD, inc Crypto
11192  *
11193  * As the decode gets a little complex we are using a table based
11194  * approach for this part of the decode.
11195  */
11196 static const AArch64DecodeTable data_proc_simd[] = {
11197     /* pattern  ,  mask     ,  fn                        */
11198     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11199     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11200     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11201     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11202     { 0x0e000400, 0x9fe08400, disas_simd_copy },
11203     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11204     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11205     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11206     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11207     { 0x0e000000, 0xbf208c00, disas_simd_tb },
11208     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11209     { 0x2e000000, 0xbf208400, disas_simd_ext },
11210     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11211     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11212     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11213     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11214     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11215     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11216     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11217     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11218     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11219     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11220     { 0x00000000, 0x00000000, NULL }
11221 };
11222
11223 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11224 {
11225     /* Note that this is called with all non-FP cases from
11226      * table C3-6 so it must UNDEF for entries not specifically
11227      * allocated to instructions in that table.
11228      */
11229     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11230     if (fn) {
11231         fn(s, insn);
11232     } else {
11233         unallocated_encoding(s);
11234     }
11235 }
11236
11237 /* C3.6 Data processing - SIMD and floating point */
11238 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11239 {
11240     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11241         disas_data_proc_fp(s, insn);
11242     } else {
11243         /* SIMD, including crypto */
11244         disas_data_proc_simd(s, insn);
11245     }
11246 }
11247
11248 /* C3.1 A64 instruction index by encoding */
11249 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11250 {
11251     uint32_t insn;
11252
11253     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11254     s->insn = insn;
11255     s->pc += 4;
11256
11257     s->fp_access_checked = false;
11258
11259     switch (extract32(insn, 25, 4)) {
11260     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11261         unallocated_encoding(s);
11262         break;
11263     case 0x8: case 0x9: /* Data processing - immediate */
11264         disas_data_proc_imm(s, insn);
11265         break;
11266     case 0xa: case 0xb: /* Branch, exception generation and system insns */
11267         disas_b_exc_sys(s, insn);
11268         break;
11269     case 0x4:
11270     case 0x6:
11271     case 0xc:
11272     case 0xe:      /* Loads and stores */
11273         disas_ldst(s, insn);
11274         break;
11275     case 0x5:
11276     case 0xd:      /* Data processing - register */
11277         disas_data_proc_reg(s, insn);
11278         break;
11279     case 0x7:
11280     case 0xf:      /* Data processing - SIMD and floating point */
11281         disas_data_proc_simd_fp(s, insn);
11282         break;
11283     default:
11284         assert(FALSE); /* all 15 cases should be handled above */
11285         break;
11286     }
11287
11288     /* if we allocated any temporaries, free them here */
11289     free_tmp_a64(s);
11290 }
11291
11292 static int aarch64_tr_init_disas_context(DisasContextBase *dcbase,
11293                                          CPUState *cpu, int max_insns)
11294 {
11295     DisasContext *dc = container_of(dcbase, DisasContext, base);
11296     CPUARMState *env = cpu->env_ptr;
11297     ARMCPU *arm_cpu = arm_env_get_cpu(env);
11298     int bound;
11299
11300     dc->pc = dc->base.pc_first;
11301     dc->condjmp = 0;
11302
11303     dc->aarch64 = 1;
11304     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11305      * there is no secure EL1, so we route exceptions to EL3.
11306      */
11307     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11308                                !arm_el_is_aa64(env, 3);
11309     dc->thumb = 0;
11310     dc->sctlr_b = 0;
11311     dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
11312     dc->condexec_mask = 0;
11313     dc->condexec_cond = 0;
11314     dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
11315     dc->tbi0 = ARM_TBFLAG_TBI0(dc->base.tb->flags);
11316     dc->tbi1 = ARM_TBFLAG_TBI1(dc->base.tb->flags);
11317     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11318 #if !defined(CONFIG_USER_ONLY)
11319     dc->user = (dc->current_el == 0);
11320 #endif
11321     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
11322     dc->vec_len = 0;
11323     dc->vec_stride = 0;
11324     dc->cp_regs = arm_cpu->cp_regs;
11325     dc->features = env->features;
11326
11327     /* Single step state. The code-generation logic here is:
11328      *  SS_ACTIVE == 0:
11329      *   generate code with no special handling for single-stepping (except
11330      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11331      *   this happens anyway because those changes are all system register or
11332      *   PSTATE writes).
11333      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11334      *   emit code for one insn
11335      *   emit code to clear PSTATE.SS
11336      *   emit code to generate software step exception for completed step
11337      *   end TB (as usual for having generated an exception)
11338      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11339      *   emit code to generate a software step exception
11340      *   end the TB
11341      */
11342     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
11343     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
11344     dc->is_ldex = false;
11345     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11346
11347     /* Bound the number of insns to execute to those left on the page.  */
11348     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11349
11350     /* If architectural single step active, limit to 1.  */
11351     if (dc->ss_active) {
11352         bound = 1;
11353     }
11354     max_insns = MIN(max_insns, bound);
11355
11356     init_tmp_a64_array(dc);
11357
11358     return max_insns;
11359 }
11360
11361 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
11362 {
11363     tcg_clear_temp_count();
11364 }
11365
11366 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11367 {
11368     DisasContext *dc = container_of(dcbase, DisasContext, base);
11369
11370     tcg_gen_insn_start(dc->pc, 0, 0);
11371     dc->insn_start = tcg_last_op();
11372 }
11373
11374 static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
11375                                         const CPUBreakpoint *bp)
11376 {
11377     DisasContext *dc = container_of(dcbase, DisasContext, base);
11378
11379     if (bp->flags & BP_CPU) {
11380         gen_a64_set_pc_im(dc->pc);
11381         gen_helper_check_breakpoints(cpu_env);
11382         /* End the TB early; it likely won't be executed */
11383         dc->base.is_jmp = DISAS_TOO_MANY;
11384     } else {
11385         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11386         /* The address covered by the breakpoint must be
11387            included in [tb->pc, tb->pc + tb->size) in order
11388            to for it to be properly cleared -- thus we
11389            increment the PC here so that the logic setting
11390            tb->size below does the right thing.  */
11391         dc->pc += 4;
11392         dc->base.is_jmp = DISAS_NORETURN;
11393     }
11394
11395     return true;
11396 }
11397
11398 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11399 {
11400     DisasContext *dc = container_of(dcbase, DisasContext, base);
11401     CPUARMState *env = cpu->env_ptr;
11402
11403     if (dc->ss_active && !dc->pstate_ss) {
11404         /* Singlestep state is Active-pending.
11405          * If we're in this state at the start of a TB then either
11406          *  a) we just took an exception to an EL which is being debugged
11407          *     and this is the first insn in the exception handler
11408          *  b) debug exceptions were masked and we just unmasked them
11409          *     without changing EL (eg by clearing PSTATE.D)
11410          * In either case we're going to take a swstep exception in the
11411          * "did not step an insn" case, and so the syndrome ISV and EX
11412          * bits should be zero.
11413          */
11414         assert(dc->base.num_insns == 1);
11415         gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11416                       default_exception_el(dc));
11417         dc->base.is_jmp = DISAS_NORETURN;
11418     } else {
11419         disas_a64_insn(env, dc);
11420     }
11421
11422     dc->base.pc_next = dc->pc;
11423     translator_loop_temp_check(&dc->base);
11424 }
11425
11426 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11427 {
11428     DisasContext *dc = container_of(dcbase, DisasContext, base);
11429
11430     if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
11431         /* Note that this means single stepping WFI doesn't halt the CPU.
11432          * For conditional branch insns this is harmless unreachable code as
11433          * gen_goto_tb() has already handled emitting the debug exception
11434          * (and thus a tb-jump is not possible when singlestepping).
11435          */
11436         switch (dc->base.is_jmp) {
11437         default:
11438             gen_a64_set_pc_im(dc->pc);
11439             /* fall through */
11440         case DISAS_EXIT:
11441         case DISAS_JUMP:
11442             if (dc->base.singlestep_enabled) {
11443                 gen_exception_internal(EXCP_DEBUG);
11444             } else {
11445                 gen_step_complete_exception(dc);
11446             }
11447             break;
11448         case DISAS_NORETURN:
11449             break;
11450         }
11451     } else {
11452         switch (dc->base.is_jmp) {
11453         case DISAS_NEXT:
11454         case DISAS_TOO_MANY:
11455             gen_goto_tb(dc, 1, dc->pc);
11456             break;
11457         default:
11458         case DISAS_UPDATE:
11459             gen_a64_set_pc_im(dc->pc);
11460             /* fall through */
11461         case DISAS_JUMP:
11462             tcg_gen_lookup_and_goto_ptr();
11463             break;
11464         case DISAS_EXIT:
11465             tcg_gen_exit_tb(0);
11466             break;
11467         case DISAS_NORETURN:
11468         case DISAS_SWI:
11469             break;
11470         case DISAS_WFE:
11471             gen_a64_set_pc_im(dc->pc);
11472             gen_helper_wfe(cpu_env);
11473             break;
11474         case DISAS_YIELD:
11475             gen_a64_set_pc_im(dc->pc);
11476             gen_helper_yield(cpu_env);
11477             break;
11478         case DISAS_WFI:
11479         {
11480             /* This is a special case because we don't want to just halt the CPU
11481              * if trying to debug across a WFI.
11482              */
11483             TCGv_i32 tmp = tcg_const_i32(4);
11484
11485             gen_a64_set_pc_im(dc->pc);
11486             gen_helper_wfi(cpu_env, tmp);
11487             tcg_temp_free_i32(tmp);
11488             /* The helper doesn't necessarily throw an exception, but we
11489              * must go back to the main loop to check for interrupts anyway.
11490              */
11491             tcg_gen_exit_tb(0);
11492             break;
11493         }
11494         }
11495     }
11496
11497     /* Functions above can change dc->pc, so re-align db->pc_next */
11498     dc->base.pc_next = dc->pc;
11499 }
11500
11501 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
11502                                       CPUState *cpu)
11503 {
11504     DisasContext *dc = container_of(dcbase, DisasContext, base);
11505
11506     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11507     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11508 }
11509
11510 const TranslatorOps aarch64_translator_ops = {
11511     .init_disas_context = aarch64_tr_init_disas_context,
11512     .tb_start           = aarch64_tr_tb_start,
11513     .insn_start         = aarch64_tr_insn_start,
11514     .breakpoint_check   = aarch64_tr_breakpoint_check,
11515     .translate_insn     = aarch64_tr_translate_insn,
11516     .tb_stop            = aarch64_tr_tb_stop,
11517     .disas_log          = aarch64_tr_disas_log,
11518 };