target/arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "qemu/log.h"
  25 #include "arm_ldst.h"
  26 #include "translate.h"
  27 #include "internals.h"
  28 #include "qemu/host-utils.h"
  29
  30 #include "exec/semihost.h"
  31 #include "exec/gen-icount.h"
  32
  33 #include "exec/helper-proto.h"
  34 #include "exec/helper-gen.h"
  35 #include "exec/log.h"
  36
  37 #include "trace-tcg.h"
  38
  39 static TCGv_i64 cpu_X[32];
  40 static TCGv_i64 cpu_pc;
  41
  42 /* Load/store exclusive handling */
  43 static TCGv_i64 cpu_exclusive_high;
  44 static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  45
  46 static const char *regnames[] = {
  47     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  48     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  49     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  50     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  51 };
  52
  53 enum a64_shift_type {
  54     A64_SHIFT_TYPE_LSL = 0,
  55     A64_SHIFT_TYPE_LSR = 1,
  56     A64_SHIFT_TYPE_ASR = 2,
  57     A64_SHIFT_TYPE_ROR = 3
  58 };
  59
  60 /* Table based decoder typedefs - used when the relevant bits for decode
  61  * are too awkwardly scattered across the instruction (eg SIMD).
  62  */
  63 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  64
  65 typedef struct AArch64DecodeTable {
  66     uint32_t pattern;
  67     uint32_t mask;
  68     AArch64DecodeFn *disas_fn;
  69 } AArch64DecodeTable;
  70
  71 /* Function prototype for gen_ functions for calling Neon helpers */
  72 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  73 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  74 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  76 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  78 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  79 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  80 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  81 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  82 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  83 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  84 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  85
  86 /* initialize TCG globals.  */
  87 void a64_translate_init(void)
  88 {
  89     int i;
  90
  91     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  92                                     offsetof(CPUARMState, pc),
  93                                     "pc");
  94     for (i = 0; i < 32; i++) {
  95         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  96                                           offsetof(CPUARMState, xregs[i]),
  97                                           regnames[i]);
  98     }
  99
 100     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 101         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 102 }
 103
 104 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 105 {
 106     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 107      *  if EL1, access as if EL0; otherwise access at current EL
 108      */
 109     switch (s->mmu_idx) {
 110     case ARMMMUIdx_S12NSE1:
 111         return ARMMMUIdx_S12NSE0;
 112     case ARMMMUIdx_S1SE1:
 113         return ARMMMUIdx_S1SE0;
 114     case ARMMMUIdx_S2NS:
 115         g_assert_not_reached();
 116     default:
 117         return s->mmu_idx;
 118     }
 119 }
 120
 121 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 122                             fprintf_function cpu_fprintf, int flags)
 123 {
 124     ARMCPU *cpu = ARM_CPU(cs);
 125     CPUARMState *env = &cpu->env;
 126     uint32_t psr = pstate_read(env);
 127     int i;
 128     int el = arm_current_el(env);
 129     const char *ns_status;
 130
 131     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 132             env->pc, env->xregs[31]);
 133     for (i = 0; i < 31; i++) {
 134         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 135         if ((i % 4) == 3) {
 136             cpu_fprintf(f, "\n");
 137         } else {
 138             cpu_fprintf(f, " ");
 139         }
 140     }
 141
 142     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 143         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 144     } else {
 145         ns_status = "";
 146     }
 147
 148     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 149                 psr,
 150                 psr & PSTATE_N ? 'N' : '-',
 151                 psr & PSTATE_Z ? 'Z' : '-',
 152                 psr & PSTATE_C ? 'C' : '-',
 153                 psr & PSTATE_V ? 'V' : '-',
 154                 ns_status,
 155                 el,
 156                 psr & PSTATE_SP ? 'h' : 't');
 157
 158     if (flags & CPU_DUMP_FPU) {
 159         int numvfpregs = 32;
 160         for (i = 0; i < numvfpregs; i += 2) {
 161             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 162             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 163             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 164                         i, vhi, vlo);
 165             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 166             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 167             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 168                         i + 1, vhi, vlo);
 169         }
 170         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 171                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 172     }
 173 }
 174
 175 void gen_a64_set_pc_im(uint64_t val)
 176 {
 177     tcg_gen_movi_i64(cpu_pc, val);
 178 }
 179
 180 /* Load the PC from a generic TCG variable.
 181  *
 182  * If address tagging is enabled via the TCR TBI bits, then loading
 183  * an address into the PC will clear out any tag in the it:
 184  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 185  *    then the address is zero-extended, clearing bits [63:56]
 186  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 187  *    and TBI1 controls addressses with bit 55 == 1.
 188  *    If the appropriate TBI bit is set for the address then
 189  *    the address is sign-extended from bit 55 into bits [63:56]
 190  *
 191  * We can avoid doing this for relative-branches, because the
 192  * PC + offset can never overflow into the tag bits (assuming
 193  * that virtual addresses are less than 56 bits wide, as they
 194  * are currently), but we must handle it for branch-to-register.
 195  */
 196 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 197 {
 198
 199     if (s->current_el <= 1) {
 200         /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 201          * examine bit 55 of address, can just generate code.
 202          * If mixed, then test via generated code
 203          */
 204         if (s->tbi0 && s->tbi1) {
 205             TCGv_i64 tmp_reg = tcg_temp_new_i64();
 206             /* Both bits set, sign extension from bit 55 into [63:56] will
 207              * cover both cases
 208              */
 209             tcg_gen_shli_i64(tmp_reg, src, 8);
 210             tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 211             tcg_temp_free_i64(tmp_reg);
 212         } else if (!s->tbi0 && !s->tbi1) {
 213             /* Neither bit set, just load it as-is */
 214             tcg_gen_mov_i64(cpu_pc, src);
 215         } else {
 216             TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 217             TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 218             TCGv_i64 tcg_zero   = tcg_const_i64(0);
 219
 220             tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 221
 222             if (s->tbi0) {
 223                 /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 224                 tcg_gen_andi_i64(tcg_tmpval, src,
 225                                  0x00FFFFFFFFFFFFFFull);
 226                 tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 227                                     tcg_tmpval, src);
 228             } else {
 229                 /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 230                 tcg_gen_ori_i64(tcg_tmpval, src,
 231                                 0xFF00000000000000ull);
 232                 tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 233                                     tcg_tmpval, src);
 234             }
 235             tcg_temp_free_i64(tcg_zero);
 236             tcg_temp_free_i64(tcg_bit55);
 237             tcg_temp_free_i64(tcg_tmpval);
 238         }
 239     } else {  /* EL > 1 */
 240         if (s->tbi0) {
 241             /* Force tag byte to all zero */
 242             tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 243         } else {
 244             /* Load unmodified address */
 245             tcg_gen_mov_i64(cpu_pc, src);
 246         }
 247     }
 248 }
 249
 250 typedef struct DisasCompare64 {
 251     TCGCond cond;
 252     TCGv_i64 value;
 253 } DisasCompare64;
 254
 255 static void a64_test_cc(DisasCompare64 *c64, int cc)
 256 {
 257     DisasCompare c32;
 258
 259     arm_test_cc(&c32, cc);
 260
 261     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 262        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 263     c64->cond = c32.cond;
 264     c64->value = tcg_temp_new_i64();
 265     tcg_gen_ext_i32_i64(c64->value, c32.value);
 266
 267     arm_free_cc(&c32);
 268 }
 269
 270 static void a64_free_cc(DisasCompare64 *c64)
 271 {
 272     tcg_temp_free_i64(c64->value);
 273 }
 274
 275 static void gen_exception_internal(int excp)
 276 {
 277     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 278
 279     assert(excp_is_internal(excp));
 280     gen_helper_exception_internal(cpu_env, tcg_excp);
 281     tcg_temp_free_i32(tcg_excp);
 282 }
 283
 284 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 285 {
 286     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 287     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 288     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 289
 290     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 291                                        tcg_syn, tcg_el);
 292     tcg_temp_free_i32(tcg_el);
 293     tcg_temp_free_i32(tcg_syn);
 294     tcg_temp_free_i32(tcg_excp);
 295 }
 296
 297 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 298 {
 299     gen_a64_set_pc_im(s->pc - offset);
 300     gen_exception_internal(excp);
 301     s->is_jmp = DISAS_EXC;
 302 }
 303
 304 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 305                                uint32_t syndrome, uint32_t target_el)
 306 {
 307     gen_a64_set_pc_im(s->pc - offset);
 308     gen_exception(excp, syndrome, target_el);
 309     s->is_jmp = DISAS_EXC;
 310 }
 311
 312 static void gen_ss_advance(DisasContext *s)
 313 {
 314     /* If the singlestep state is Active-not-pending, advance to
 315      * Active-pending.
 316      */
 317     if (s->ss_active) {
 318         s->pstate_ss = 0;
 319         gen_helper_clear_pstate_ss(cpu_env);
 320     }
 321 }
 322
 323 static void gen_step_complete_exception(DisasContext *s)
 324 {
 325     /* We just completed step of an insn. Move from Active-not-pending
 326      * to Active-pending, and then also take the swstep exception.
 327      * This corresponds to making the (IMPDEF) choice to prioritize
 328      * swstep exceptions over asynchronous exceptions taken to an exception
 329      * level where debug is disabled. This choice has the advantage that
 330      * we do not need to maintain internal state corresponding to the
 331      * ISV/EX syndrome bits between completion of the step and generation
 332      * of the exception, and our syndrome information is always correct.
 333      */
 334     gen_ss_advance(s);
 335     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 336                   default_exception_el(s));
 337     s->is_jmp = DISAS_EXC;
 338 }
 339
 340 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 341 {
 342     /* No direct tb linking with singlestep (either QEMU's or the ARM
 343      * debug architecture kind) or deterministic io
 344      */
 345     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 346         return false;
 347     }
 348
 349 #ifndef CONFIG_USER_ONLY
 350     /* Only link tbs from inside the same guest page */
 351     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 352         return false;
 353     }
 354 #endif
 355
 356     return true;
 357 }
 358
 359 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 360 {
 361     TranslationBlock *tb;
 362
 363     tb = s->tb;
 364     if (use_goto_tb(s, n, dest)) {
 365         tcg_gen_goto_tb(n);
 366         gen_a64_set_pc_im(dest);
 367         tcg_gen_exit_tb((intptr_t)tb + n);
 368         s->is_jmp = DISAS_TB_JUMP;
 369     } else {
 370         gen_a64_set_pc_im(dest);
 371         if (s->ss_active) {
 372             gen_step_complete_exception(s);
 373         } else if (s->singlestep_enabled) {
 374             gen_exception_internal(EXCP_DEBUG);
 375         } else {
 376             tcg_gen_exit_tb(0);
 377             s->is_jmp = DISAS_TB_JUMP;
 378         }
 379     }
 380 }
 381
 382 static void unallocated_encoding(DisasContext *s)
 383 {
 384     /* Unallocated and reserved encodings are uncategorized */
 385     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 386                        default_exception_el(s));
 387 }
 388
 389 #define unsupported_encoding(s, insn)                                    \
 390     do {                                                                 \
 391         qemu_log_mask(LOG_UNIMP,                                         \
 392                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 393                       "at pc=%016" PRIx64 "\n",                          \
 394                       __FILE__, __LINE__, insn, s->pc - 4);              \
 395         unallocated_encoding(s);                                         \
 396     } while (0);
 397
 398 static void init_tmp_a64_array(DisasContext *s)
 399 {
 400 #ifdef CONFIG_DEBUG_TCG
 401     int i;
 402     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 403         TCGV_UNUSED_I64(s->tmp_a64[i]);
 404     }
 405 #endif
 406     s->tmp_a64_count = 0;
 407 }
 408
 409 static void free_tmp_a64(DisasContext *s)
 410 {
 411     int i;
 412     for (i = 0; i < s->tmp_a64_count; i++) {
 413         tcg_temp_free_i64(s->tmp_a64[i]);
 414     }
 415     init_tmp_a64_array(s);
 416 }
 417
 418 static TCGv_i64 new_tmp_a64(DisasContext *s)
 419 {
 420     assert(s->tmp_a64_count < TMP_A64_MAX);
 421     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 422 }
 423
 424 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 425 {
 426     TCGv_i64 t = new_tmp_a64(s);
 427     tcg_gen_movi_i64(t, 0);
 428     return t;
 429 }
 430
 431 /*
 432  * Register access functions
 433  *
 434  * These functions are used for directly accessing a register in where
 435  * changes to the final register value are likely to be made. If you
 436  * need to use a register for temporary calculation (e.g. index type
 437  * operations) use the read_* form.
 438  *
 439  * B1.2.1 Register mappings
 440  *
 441  * In instruction register encoding 31 can refer to ZR (zero register) or
 442  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 443  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 444  * This is the point of the _sp forms.
 445  */
 446 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 447 {
 448     if (reg == 31) {
 449         return new_tmp_a64_zero(s);
 450     } else {
 451         return cpu_X[reg];
 452     }
 453 }
 454
 455 /* register access for when 31 == SP */
 456 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 457 {
 458     return cpu_X[reg];
 459 }
 460
 461 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 462  * representing the register contents. This TCGv is an auto-freed
 463  * temporary so it need not be explicitly freed, and may be modified.
 464  */
 465 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 466 {
 467     TCGv_i64 v = new_tmp_a64(s);
 468     if (reg != 31) {
 469         if (sf) {
 470             tcg_gen_mov_i64(v, cpu_X[reg]);
 471         } else {
 472             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 473         }
 474     } else {
 475         tcg_gen_movi_i64(v, 0);
 476     }
 477     return v;
 478 }
 479
 480 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 481 {
 482     TCGv_i64 v = new_tmp_a64(s);
 483     if (sf) {
 484         tcg_gen_mov_i64(v, cpu_X[reg]);
 485     } else {
 486         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 487     }
 488     return v;
 489 }
 490
 491 /* We should have at some point before trying to access an FP register
 492  * done the necessary access check, so assert that
 493  * (a) we did the check and
 494  * (b) we didn't then just plough ahead anyway if it failed.
 495  * Print the instruction pattern in the abort message so we can figure
 496  * out what we need to fix if a user encounters this problem in the wild.
 497  */
 498 static inline void assert_fp_access_checked(DisasContext *s)
 499 {
 500 #ifdef CONFIG_DEBUG_TCG
 501     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 502         fprintf(stderr, "target-arm: FP access check missing for "
 503                 "instruction 0x%08x\n", s->insn);
 504         abort();
 505     }
 506 #endif
 507 }
 508
 509 /* Return the offset into CPUARMState of an element of specified
 510  * size, 'element' places in from the least significant end of
 511  * the FP/vector register Qn.
 512  */
 513 static inline int vec_reg_offset(DisasContext *s, int regno,
 514                                  int element, TCGMemOp size)
 515 {
 516     int offs = 0;
 517 #ifdef HOST_WORDS_BIGENDIAN
 518     /* This is complicated slightly because vfp.regs[2n] is
 519      * still the low half and  vfp.regs[2n+1] the high half
 520      * of the 128 bit vector, even on big endian systems.
 521      * Calculate the offset assuming a fully bigendian 128 bits,
 522      * then XOR to account for the order of the two 64 bit halves.
 523      */
 524     offs += (16 - ((element + 1) * (1 << size)));
 525     offs ^= 8;
 526 #else
 527     offs += element * (1 << size);
 528 #endif
 529     offs += offsetof(CPUARMState, vfp.regs[regno * 2]);
 530     assert_fp_access_checked(s);
 531     return offs;
 532 }
 533
 534 /* Return the offset into CPUARMState of a slice (from
 535  * the least significant end) of FP register Qn (ie
 536  * Dn, Sn, Hn or Bn).
 537  * (Note that this is not the same mapping as for A32; see cpu.h)
 538  */
 539 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 540 {
 541     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 542 #ifdef HOST_WORDS_BIGENDIAN
 543     offs += (8 - (1 << size));
 544 #endif
 545     assert_fp_access_checked(s);
 546     return offs;
 547 }
 548
 549 /* Offset of the high half of the 128 bit vector Qn */
 550 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 551 {
 552     assert_fp_access_checked(s);
 553     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 554 }
 555
 556 /* Convenience accessors for reading and writing single and double
 557  * FP registers. Writing clears the upper parts of the associated
 558  * 128 bit vector register, as required by the architecture.
 559  * Note that unlike the GP register accessors, the values returned
 560  * by the read functions must be manually freed.
 561  */
 562 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 563 {
 564     TCGv_i64 v = tcg_temp_new_i64();
 565
 566     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 567     return v;
 568 }
 569
 570 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 571 {
 572     TCGv_i32 v = tcg_temp_new_i32();
 573
 574     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 575     return v;
 576 }
 577
 578 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 579 {
 580     TCGv_i64 tcg_zero = tcg_const_i64(0);
 581
 582     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 583     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 584     tcg_temp_free_i64(tcg_zero);
 585 }
 586
 587 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 588 {
 589     TCGv_i64 tmp = tcg_temp_new_i64();
 590
 591     tcg_gen_extu_i32_i64(tmp, v);
 592     write_fp_dreg(s, reg, tmp);
 593     tcg_temp_free_i64(tmp);
 594 }
 595
 596 static TCGv_ptr get_fpstatus_ptr(void)
 597 {
 598     TCGv_ptr statusptr = tcg_temp_new_ptr();
 599     int offset;
 600
 601     /* In A64 all instructions (both FP and Neon) use the FPCR;
 602      * there is no equivalent of the A32 Neon "standard FPSCR value"
 603      * and all operations use vfp.fp_status.
 604      */
 605     offset = offsetof(CPUARMState, vfp.fp_status);
 606     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 607     return statusptr;
 608 }
 609
 610 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 611  * than the 32 bit equivalent.
 612  */
 613 static inline void gen_set_NZ64(TCGv_i64 result)
 614 {
 615     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 616     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 617 }
 618
 619 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 620 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 621 {
 622     if (sf) {
 623         gen_set_NZ64(result);
 624     } else {
 625         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 626         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 627     }
 628     tcg_gen_movi_i32(cpu_CF, 0);
 629     tcg_gen_movi_i32(cpu_VF, 0);
 630 }
 631
 632 /* dest = T0 + T1; compute C, N, V and Z flags */
 633 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 634 {
 635     if (sf) {
 636         TCGv_i64 result, flag, tmp;
 637         result = tcg_temp_new_i64();
 638         flag = tcg_temp_new_i64();
 639         tmp = tcg_temp_new_i64();
 640
 641         tcg_gen_movi_i64(tmp, 0);
 642         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 643
 644         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 645
 646         gen_set_NZ64(result);
 647
 648         tcg_gen_xor_i64(flag, result, t0);
 649         tcg_gen_xor_i64(tmp, t0, t1);
 650         tcg_gen_andc_i64(flag, flag, tmp);
 651         tcg_temp_free_i64(tmp);
 652         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 653
 654         tcg_gen_mov_i64(dest, result);
 655         tcg_temp_free_i64(result);
 656         tcg_temp_free_i64(flag);
 657     } else {
 658         /* 32 bit arithmetic */
 659         TCGv_i32 t0_32 = tcg_temp_new_i32();
 660         TCGv_i32 t1_32 = tcg_temp_new_i32();
 661         TCGv_i32 tmp = tcg_temp_new_i32();
 662
 663         tcg_gen_movi_i32(tmp, 0);
 664         tcg_gen_extrl_i64_i32(t0_32, t0);
 665         tcg_gen_extrl_i64_i32(t1_32, t1);
 666         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 667         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 668         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 669         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 670         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 671         tcg_gen_extu_i32_i64(dest, cpu_NF);
 672
 673         tcg_temp_free_i32(tmp);
 674         tcg_temp_free_i32(t0_32);
 675         tcg_temp_free_i32(t1_32);
 676     }
 677 }
 678
 679 /* dest = T0 - T1; compute C, N, V and Z flags */
 680 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 681 {
 682     if (sf) {
 683         /* 64 bit arithmetic */
 684         TCGv_i64 result, flag, tmp;
 685
 686         result = tcg_temp_new_i64();
 687         flag = tcg_temp_new_i64();
 688         tcg_gen_sub_i64(result, t0, t1);
 689
 690         gen_set_NZ64(result);
 691
 692         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 693         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 694
 695         tcg_gen_xor_i64(flag, result, t0);
 696         tmp = tcg_temp_new_i64();
 697         tcg_gen_xor_i64(tmp, t0, t1);
 698         tcg_gen_and_i64(flag, flag, tmp);
 699         tcg_temp_free_i64(tmp);
 700         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 701         tcg_gen_mov_i64(dest, result);
 702         tcg_temp_free_i64(flag);
 703         tcg_temp_free_i64(result);
 704     } else {
 705         /* 32 bit arithmetic */
 706         TCGv_i32 t0_32 = tcg_temp_new_i32();
 707         TCGv_i32 t1_32 = tcg_temp_new_i32();
 708         TCGv_i32 tmp;
 709
 710         tcg_gen_extrl_i64_i32(t0_32, t0);
 711         tcg_gen_extrl_i64_i32(t1_32, t1);
 712         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 713         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 714         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 715         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 716         tmp = tcg_temp_new_i32();
 717         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 718         tcg_temp_free_i32(t0_32);
 719         tcg_temp_free_i32(t1_32);
 720         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 721         tcg_temp_free_i32(tmp);
 722         tcg_gen_extu_i32_i64(dest, cpu_NF);
 723     }
 724 }
 725
 726 /* dest = T0 + T1 + CF; do not compute flags. */
 727 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 728 {
 729     TCGv_i64 flag = tcg_temp_new_i64();
 730     tcg_gen_extu_i32_i64(flag, cpu_CF);
 731     tcg_gen_add_i64(dest, t0, t1);
 732     tcg_gen_add_i64(dest, dest, flag);
 733     tcg_temp_free_i64(flag);
 734
 735     if (!sf) {
 736         tcg_gen_ext32u_i64(dest, dest);
 737     }
 738 }
 739
 740 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 741 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 742 {
 743     if (sf) {
 744         TCGv_i64 result, cf_64, vf_64, tmp;
 745         result = tcg_temp_new_i64();
 746         cf_64 = tcg_temp_new_i64();
 747         vf_64 = tcg_temp_new_i64();
 748         tmp = tcg_const_i64(0);
 749
 750         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 751         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 752         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 753         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 754         gen_set_NZ64(result);
 755
 756         tcg_gen_xor_i64(vf_64, result, t0);
 757         tcg_gen_xor_i64(tmp, t0, t1);
 758         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 759         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 760
 761         tcg_gen_mov_i64(dest, result);
 762
 763         tcg_temp_free_i64(tmp);
 764         tcg_temp_free_i64(vf_64);
 765         tcg_temp_free_i64(cf_64);
 766         tcg_temp_free_i64(result);
 767     } else {
 768         TCGv_i32 t0_32, t1_32, tmp;
 769         t0_32 = tcg_temp_new_i32();
 770         t1_32 = tcg_temp_new_i32();
 771         tmp = tcg_const_i32(0);
 772
 773         tcg_gen_extrl_i64_i32(t0_32, t0);
 774         tcg_gen_extrl_i64_i32(t1_32, t1);
 775         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 776         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 777
 778         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 779         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 780         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 781         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 782         tcg_gen_extu_i32_i64(dest, cpu_NF);
 783
 784         tcg_temp_free_i32(tmp);
 785         tcg_temp_free_i32(t1_32);
 786         tcg_temp_free_i32(t0_32);
 787     }
 788 }
 789
 790 /*
 791  * Load/Store generators
 792  */
 793
 794 /*
 795  * Store from GPR register to memory.
 796  */
 797 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 798                              TCGv_i64 tcg_addr, int size, int memidx,
 799                              bool iss_valid,
 800                              unsigned int iss_srt,
 801                              bool iss_sf, bool iss_ar)
 802 {
 803     g_assert(size <= 3);
 804     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 805
 806     if (iss_valid) {
 807         uint32_t syn;
 808
 809         syn = syn_data_abort_with_iss(0,
 810                                       size,
 811                                       false,
 812                                       iss_srt,
 813                                       iss_sf,
 814                                       iss_ar,
 815                                       0, 0, 0, 0, 0, false);
 816         disas_set_insn_syndrome(s, syn);
 817     }
 818 }
 819
 820 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 821                       TCGv_i64 tcg_addr, int size,
 822                       bool iss_valid,
 823                       unsigned int iss_srt,
 824                       bool iss_sf, bool iss_ar)
 825 {
 826     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 827                      iss_valid, iss_srt, iss_sf, iss_ar);
 828 }
 829
 830 /*
 831  * Load from memory to GPR register
 832  */
 833 static void do_gpr_ld_memidx(DisasContext *s,
 834                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 835                              int size, bool is_signed,
 836                              bool extend, int memidx,
 837                              bool iss_valid, unsigned int iss_srt,
 838                              bool iss_sf, bool iss_ar)
 839 {
 840     TCGMemOp memop = s->be_data + size;
 841
 842     g_assert(size <= 3);
 843
 844     if (is_signed) {
 845         memop += MO_SIGN;
 846     }
 847
 848     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 849
 850     if (extend && is_signed) {
 851         g_assert(size < 3);
 852         tcg_gen_ext32u_i64(dest, dest);
 853     }
 854
 855     if (iss_valid) {
 856         uint32_t syn;
 857
 858         syn = syn_data_abort_with_iss(0,
 859                                       size,
 860                                       is_signed,
 861                                       iss_srt,
 862                                       iss_sf,
 863                                       iss_ar,
 864                                       0, 0, 0, 0, 0, false);
 865         disas_set_insn_syndrome(s, syn);
 866     }
 867 }
 868
 869 static void do_gpr_ld(DisasContext *s,
 870                       TCGv_i64 dest, TCGv_i64 tcg_addr,
 871                       int size, bool is_signed, bool extend,
 872                       bool iss_valid, unsigned int iss_srt,
 873                       bool iss_sf, bool iss_ar)
 874 {
 875     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 876                      get_mem_index(s),
 877                      iss_valid, iss_srt, iss_sf, iss_ar);
 878 }
 879
 880 /*
 881  * Store from FP register to memory
 882  */
 883 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 884 {
 885     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 886     TCGv_i64 tmp = tcg_temp_new_i64();
 887     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 888     if (size < 4) {
 889         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 890                             s->be_data + size);
 891     } else {
 892         bool be = s->be_data == MO_BE;
 893         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 894
 895         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 896         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 897                             s->be_data | MO_Q);
 898         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 899         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 900                             s->be_data | MO_Q);
 901         tcg_temp_free_i64(tcg_hiaddr);
 902     }
 903
 904     tcg_temp_free_i64(tmp);
 905 }
 906
 907 /*
 908  * Load from memory to FP register
 909  */
 910 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 911 {
 912     /* This always zero-extends and writes to a full 128 bit wide vector */
 913     TCGv_i64 tmplo = tcg_temp_new_i64();
 914     TCGv_i64 tmphi;
 915
 916     if (size < 4) {
 917         TCGMemOp memop = s->be_data + size;
 918         tmphi = tcg_const_i64(0);
 919         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 920     } else {
 921         bool be = s->be_data == MO_BE;
 922         TCGv_i64 tcg_hiaddr;
 923
 924         tmphi = tcg_temp_new_i64();
 925         tcg_hiaddr = tcg_temp_new_i64();
 926
 927         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 928         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 929                             s->be_data | MO_Q);
 930         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 931                             s->be_data | MO_Q);
 932         tcg_temp_free_i64(tcg_hiaddr);
 933     }
 934
 935     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 936     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 937
 938     tcg_temp_free_i64(tmplo);
 939     tcg_temp_free_i64(tmphi);
 940 }
 941
 942 /*
 943  * Vector load/store helpers.
 944  *
 945  * The principal difference between this and a FP load is that we don't
 946  * zero extend as we are filling a partial chunk of the vector register.
 947  * These functions don't support 128 bit loads/stores, which would be
 948  * normal load/store operations.
 949  *
 950  * The _i32 versions are useful when operating on 32 bit quantities
 951  * (eg for floating point single or using Neon helper functions).
 952  */
 953
 954 /* Get value of an element within a vector register */
 955 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 956                              int element, TCGMemOp memop)
 957 {
 958     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 959     switch (memop) {
 960     case MO_8:
 961         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 962         break;
 963     case MO_16:
 964         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 965         break;
 966     case MO_32:
 967         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 968         break;
 969     case MO_8|MO_SIGN:
 970         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 971         break;
 972     case MO_16|MO_SIGN:
 973         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 974         break;
 975     case MO_32|MO_SIGN:
 976         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 977         break;
 978     case MO_64:
 979     case MO_64|MO_SIGN:
 980         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 981         break;
 982     default:
 983         g_assert_not_reached();
 984     }
 985 }
 986
 987 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 988                                  int element, TCGMemOp memop)
 989 {
 990     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 991     switch (memop) {
 992     case MO_8:
 993         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 994         break;
 995     case MO_16:
 996         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 997         break;
 998     case MO_8|MO_SIGN:
 999         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1000         break;
1001     case MO_16|MO_SIGN:
1002         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1003         break;
1004     case MO_32:
1005     case MO_32|MO_SIGN:
1006         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1007         break;
1008     default:
1009         g_assert_not_reached();
1010     }
1011 }
1012
1013 /* Set value of an element within a vector register */
1014 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1015                               int element, TCGMemOp memop)
1016 {
1017     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1018     switch (memop) {
1019     case MO_8:
1020         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1021         break;
1022     case MO_16:
1023         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1024         break;
1025     case MO_32:
1026         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1027         break;
1028     case MO_64:
1029         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1030         break;
1031     default:
1032         g_assert_not_reached();
1033     }
1034 }
1035
1036 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1037                                   int destidx, int element, TCGMemOp memop)
1038 {
1039     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1040     switch (memop) {
1041     case MO_8:
1042         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1043         break;
1044     case MO_16:
1045         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1046         break;
1047     case MO_32:
1048         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1049         break;
1050     default:
1051         g_assert_not_reached();
1052     }
1053 }
1054
1055 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
1056  * vector ops all need to do this).
1057  */
1058 static void clear_vec_high(DisasContext *s, int rd)
1059 {
1060     TCGv_i64 tcg_zero = tcg_const_i64(0);
1061
1062     write_vec_element(s, tcg_zero, rd, 1, MO_64);
1063     tcg_temp_free_i64(tcg_zero);
1064 }
1065
1066 /* Store from vector register to memory */
1067 static void do_vec_st(DisasContext *s, int srcidx, int element,
1068                       TCGv_i64 tcg_addr, int size)
1069 {
1070     TCGMemOp memop = s->be_data + size;
1071     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1072
1073     read_vec_element(s, tcg_tmp, srcidx, element, size);
1074     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1075
1076     tcg_temp_free_i64(tcg_tmp);
1077 }
1078
1079 /* Load from memory to vector register */
1080 static void do_vec_ld(DisasContext *s, int destidx, int element,
1081                       TCGv_i64 tcg_addr, int size)
1082 {
1083     TCGMemOp memop = s->be_data + size;
1084     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1085
1086     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1087     write_vec_element(s, tcg_tmp, destidx, element, size);
1088
1089     tcg_temp_free_i64(tcg_tmp);
1090 }
1091
1092 /* Check that FP/Neon access is enabled. If it is, return
1093  * true. If not, emit code to generate an appropriate exception,
1094  * and return false; the caller should not emit any code for
1095  * the instruction. Note that this check must happen after all
1096  * unallocated-encoding checks (otherwise the syndrome information
1097  * for the resulting exception will be incorrect).
1098  */
1099 static inline bool fp_access_check(DisasContext *s)
1100 {
1101     assert(!s->fp_access_checked);
1102     s->fp_access_checked = true;
1103
1104     if (!s->fp_excp_el) {
1105         return true;
1106     }
1107
1108     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1109                        s->fp_excp_el);
1110     return false;
1111 }
1112
1113 /*
1114  * This utility function is for doing register extension with an
1115  * optional shift. You will likely want to pass a temporary for the
1116  * destination register. See DecodeRegExtend() in the ARM ARM.
1117  */
1118 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1119                               int option, unsigned int shift)
1120 {
1121     int extsize = extract32(option, 0, 2);
1122     bool is_signed = extract32(option, 2, 1);
1123
1124     if (is_signed) {
1125         switch (extsize) {
1126         case 0:
1127             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1128             break;
1129         case 1:
1130             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1131             break;
1132         case 2:
1133             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1134             break;
1135         case 3:
1136             tcg_gen_mov_i64(tcg_out, tcg_in);
1137             break;
1138         }
1139     } else {
1140         switch (extsize) {
1141         case 0:
1142             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1143             break;
1144         case 1:
1145             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1146             break;
1147         case 2:
1148             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1149             break;
1150         case 3:
1151             tcg_gen_mov_i64(tcg_out, tcg_in);
1152             break;
1153         }
1154     }
1155
1156     if (shift) {
1157         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1158     }
1159 }
1160
1161 static inline void gen_check_sp_alignment(DisasContext *s)
1162 {
1163     /* The AArch64 architecture mandates that (if enabled via PSTATE
1164      * or SCTLR bits) there is a check that SP is 16-aligned on every
1165      * SP-relative load or store (with an exception generated if it is not).
1166      * In line with general QEMU practice regarding misaligned accesses,
1167      * we omit these checks for the sake of guest program performance.
1168      * This function is provided as a hook so we can more easily add these
1169      * checks in future (possibly as a "favour catching guest program bugs
1170      * over speed" user selectable option).
1171      */
1172 }
1173
1174 /*
1175  * This provides a simple table based table lookup decoder. It is
1176  * intended to be used when the relevant bits for decode are too
1177  * awkwardly placed and switch/if based logic would be confusing and
1178  * deeply nested. Since it's a linear search through the table, tables
1179  * should be kept small.
1180  *
1181  * It returns the first handler where insn & mask == pattern, or
1182  * NULL if there is no match.
1183  * The table is terminated by an empty mask (i.e. 0)
1184  */
1185 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1186                                                uint32_t insn)
1187 {
1188     const AArch64DecodeTable *tptr = table;
1189
1190     while (tptr->mask) {
1191         if ((insn & tptr->mask) == tptr->pattern) {
1192             return tptr->disas_fn;
1193         }
1194         tptr++;
1195     }
1196     return NULL;
1197 }
1198
1199 /*
1200  * the instruction disassembly implemented here matches
1201  * the instruction encoding classifications in chapter 3 (C3)
1202  * of the ARM Architecture Reference Manual (DDI0487A_a)
1203  */
1204
1205 /* C3.2.7 Unconditional branch (immediate)
1206  *   31  30       26 25                                  0
1207  * +----+-----------+-------------------------------------+
1208  * | op | 0 0 1 0 1 |                 imm26               |
1209  * +----+-----------+-------------------------------------+
1210  */
1211 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1212 {
1213     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1214
1215     if (insn & (1U << 31)) {
1216         /* C5.6.26 BL Branch with link */
1217         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1218     }
1219
1220     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1221     gen_goto_tb(s, 0, addr);
1222 }
1223
1224 /* C3.2.1 Compare & branch (immediate)
1225  *   31  30         25  24  23                  5 4      0
1226  * +----+-------------+----+---------------------+--------+
1227  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1228  * +----+-------------+----+---------------------+--------+
1229  */
1230 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1231 {
1232     unsigned int sf, op, rt;
1233     uint64_t addr;
1234     TCGLabel *label_match;
1235     TCGv_i64 tcg_cmp;
1236
1237     sf = extract32(insn, 31, 1);
1238     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1239     rt = extract32(insn, 0, 5);
1240     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1241
1242     tcg_cmp = read_cpu_reg(s, rt, sf);
1243     label_match = gen_new_label();
1244
1245     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1246                         tcg_cmp, 0, label_match);
1247
1248     gen_goto_tb(s, 0, s->pc);
1249     gen_set_label(label_match);
1250     gen_goto_tb(s, 1, addr);
1251 }
1252
1253 /* C3.2.5 Test & branch (immediate)
1254  *   31  30         25  24  23   19 18          5 4    0
1255  * +----+-------------+----+-------+-------------+------+
1256  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1257  * +----+-------------+----+-------+-------------+------+
1258  */
1259 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1260 {
1261     unsigned int bit_pos, op, rt;
1262     uint64_t addr;
1263     TCGLabel *label_match;
1264     TCGv_i64 tcg_cmp;
1265
1266     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1267     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1268     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1269     rt = extract32(insn, 0, 5);
1270
1271     tcg_cmp = tcg_temp_new_i64();
1272     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1273     label_match = gen_new_label();
1274     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1275                         tcg_cmp, 0, label_match);
1276     tcg_temp_free_i64(tcg_cmp);
1277     gen_goto_tb(s, 0, s->pc);
1278     gen_set_label(label_match);
1279     gen_goto_tb(s, 1, addr);
1280 }
1281
1282 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1283  *  31           25  24  23                  5   4  3    0
1284  * +---------------+----+---------------------+----+------+
1285  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1286  * +---------------+----+---------------------+----+------+
1287  */
1288 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1289 {
1290     unsigned int cond;
1291     uint64_t addr;
1292
1293     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1294         unallocated_encoding(s);
1295         return;
1296     }
1297     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1298     cond = extract32(insn, 0, 4);
1299
1300     if (cond < 0x0e) {
1301         /* genuinely conditional branches */
1302         TCGLabel *label_match = gen_new_label();
1303         arm_gen_test_cc(cond, label_match);
1304         gen_goto_tb(s, 0, s->pc);
1305         gen_set_label(label_match);
1306         gen_goto_tb(s, 1, addr);
1307     } else {
1308         /* 0xe and 0xf are both "always" conditions */
1309         gen_goto_tb(s, 0, addr);
1310     }
1311 }
1312
1313 /* C5.6.68 HINT */
1314 static void handle_hint(DisasContext *s, uint32_t insn,
1315                         unsigned int op1, unsigned int op2, unsigned int crm)
1316 {
1317     unsigned int selector = crm << 3 | op2;
1318
1319     if (op1 != 3) {
1320         unallocated_encoding(s);
1321         return;
1322     }
1323
1324     switch (selector) {
1325     case 0: /* NOP */
1326         return;
1327     case 3: /* WFI */
1328         s->is_jmp = DISAS_WFI;
1329         return;
1330     case 1: /* YIELD */
1331         s->is_jmp = DISAS_YIELD;
1332         return;
1333     case 2: /* WFE */
1334         s->is_jmp = DISAS_WFE;
1335         return;
1336     case 4: /* SEV */
1337     case 5: /* SEVL */
1338         /* we treat all as NOP at least for now */
1339         return;
1340     default:
1341         /* default specified as NOP equivalent */
1342         return;
1343     }
1344 }
1345
1346 static void gen_clrex(DisasContext *s, uint32_t insn)
1347 {
1348     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1349 }
1350
1351 /* CLREX, DSB, DMB, ISB */
1352 static void handle_sync(DisasContext *s, uint32_t insn,
1353                         unsigned int op1, unsigned int op2, unsigned int crm)
1354 {
1355     TCGBar bar;
1356
1357     if (op1 != 3) {
1358         unallocated_encoding(s);
1359         return;
1360     }
1361
1362     switch (op2) {
1363     case 2: /* CLREX */
1364         gen_clrex(s, insn);
1365         return;
1366     case 4: /* DSB */
1367     case 5: /* DMB */
1368         switch (crm & 3) {
1369         case 1: /* MBReqTypes_Reads */
1370             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1371             break;
1372         case 2: /* MBReqTypes_Writes */
1373             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1374             break;
1375         default: /* MBReqTypes_All */
1376             bar = TCG_BAR_SC | TCG_MO_ALL;
1377             break;
1378         }
1379         tcg_gen_mb(bar);
1380         return;
1381     case 6: /* ISB */
1382         /* We need to break the TB after this insn to execute
1383          * a self-modified code correctly and also to take
1384          * any pending interrupts immediately.
1385          */
1386         s->is_jmp = DISAS_UPDATE;
1387         return;
1388     default:
1389         unallocated_encoding(s);
1390         return;
1391     }
1392 }
1393
1394 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1395 static void handle_msr_i(DisasContext *s, uint32_t insn,
1396                          unsigned int op1, unsigned int op2, unsigned int crm)
1397 {
1398     int op = op1 << 3 | op2;
1399     switch (op) {
1400     case 0x05: /* SPSel */
1401         if (s->current_el == 0) {
1402             unallocated_encoding(s);
1403             return;
1404         }
1405         /* fall through */
1406     case 0x1e: /* DAIFSet */
1407     case 0x1f: /* DAIFClear */
1408     {
1409         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1410         TCGv_i32 tcg_op = tcg_const_i32(op);
1411         gen_a64_set_pc_im(s->pc - 4);
1412         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1413         tcg_temp_free_i32(tcg_imm);
1414         tcg_temp_free_i32(tcg_op);
1415         s->is_jmp = DISAS_UPDATE;
1416         break;
1417     }
1418     default:
1419         unallocated_encoding(s);
1420         return;
1421     }
1422 }
1423
1424 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1425 {
1426     TCGv_i32 tmp = tcg_temp_new_i32();
1427     TCGv_i32 nzcv = tcg_temp_new_i32();
1428
1429     /* build bit 31, N */
1430     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1431     /* build bit 30, Z */
1432     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1433     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1434     /* build bit 29, C */
1435     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1436     /* build bit 28, V */
1437     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1438     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1439     /* generate result */
1440     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1441
1442     tcg_temp_free_i32(nzcv);
1443     tcg_temp_free_i32(tmp);
1444 }
1445
1446 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1447
1448 {
1449     TCGv_i32 nzcv = tcg_temp_new_i32();
1450
1451     /* take NZCV from R[t] */
1452     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1453
1454     /* bit 31, N */
1455     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1456     /* bit 30, Z */
1457     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1458     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1459     /* bit 29, C */
1460     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1461     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1462     /* bit 28, V */
1463     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1464     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1465     tcg_temp_free_i32(nzcv);
1466 }
1467
1468 /* C5.6.129 MRS - move from system register
1469  * C5.6.131 MSR (register) - move to system register
1470  * C5.6.204 SYS
1471  * C5.6.205 SYSL
1472  * These are all essentially the same insn in 'read' and 'write'
1473  * versions, with varying op0 fields.
1474  */
1475 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1476                        unsigned int op0, unsigned int op1, unsigned int op2,
1477                        unsigned int crn, unsigned int crm, unsigned int rt)
1478 {
1479     const ARMCPRegInfo *ri;
1480     TCGv_i64 tcg_rt;
1481
1482     ri = get_arm_cp_reginfo(s->cp_regs,
1483                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1484                                                crn, crm, op0, op1, op2));
1485
1486     if (!ri) {
1487         /* Unknown register; this might be a guest error or a QEMU
1488          * unimplemented feature.
1489          */
1490         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1491                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1492                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1493         unallocated_encoding(s);
1494         return;
1495     }
1496
1497     /* Check access permissions */
1498     if (!cp_access_ok(s->current_el, ri, isread)) {
1499         unallocated_encoding(s);
1500         return;
1501     }
1502
1503     if (ri->accessfn) {
1504         /* Emit code to perform further access permissions checks at
1505          * runtime; this may result in an exception.
1506          */
1507         TCGv_ptr tmpptr;
1508         TCGv_i32 tcg_syn, tcg_isread;
1509         uint32_t syndrome;
1510
1511         gen_a64_set_pc_im(s->pc - 4);
1512         tmpptr = tcg_const_ptr(ri);
1513         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1514         tcg_syn = tcg_const_i32(syndrome);
1515         tcg_isread = tcg_const_i32(isread);
1516         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1517         tcg_temp_free_ptr(tmpptr);
1518         tcg_temp_free_i32(tcg_syn);
1519         tcg_temp_free_i32(tcg_isread);
1520     }
1521
1522     /* Handle special cases first */
1523     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1524     case ARM_CP_NOP:
1525         return;
1526     case ARM_CP_NZCV:
1527         tcg_rt = cpu_reg(s, rt);
1528         if (isread) {
1529             gen_get_nzcv(tcg_rt);
1530         } else {
1531             gen_set_nzcv(tcg_rt);
1532         }
1533         return;
1534     case ARM_CP_CURRENTEL:
1535         /* Reads as current EL value from pstate, which is
1536          * guaranteed to be constant by the tb flags.
1537          */
1538         tcg_rt = cpu_reg(s, rt);
1539         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1540         return;
1541     case ARM_CP_DC_ZVA:
1542         /* Writes clear the aligned block of memory which rt points into. */
1543         tcg_rt = cpu_reg(s, rt);
1544         gen_helper_dc_zva(cpu_env, tcg_rt);
1545         return;
1546     default:
1547         break;
1548     }
1549
1550     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1551         gen_io_start();
1552     }
1553
1554     tcg_rt = cpu_reg(s, rt);
1555
1556     if (isread) {
1557         if (ri->type & ARM_CP_CONST) {
1558             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1559         } else if (ri->readfn) {
1560             TCGv_ptr tmpptr;
1561             tmpptr = tcg_const_ptr(ri);
1562             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1563             tcg_temp_free_ptr(tmpptr);
1564         } else {
1565             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1566         }
1567     } else {
1568         if (ri->type & ARM_CP_CONST) {
1569             /* If not forbidden by access permissions, treat as WI */
1570             return;
1571         } else if (ri->writefn) {
1572             TCGv_ptr tmpptr;
1573             tmpptr = tcg_const_ptr(ri);
1574             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1575             tcg_temp_free_ptr(tmpptr);
1576         } else {
1577             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1578         }
1579     }
1580
1581     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1582         /* I/O operations must end the TB here (whether read or write) */
1583         gen_io_end();
1584         s->is_jmp = DISAS_UPDATE;
1585     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1586         /* We default to ending the TB on a coprocessor register write,
1587          * but allow this to be suppressed by the register definition
1588          * (usually only necessary to work around guest bugs).
1589          */
1590         s->is_jmp = DISAS_UPDATE;
1591     }
1592 }
1593
1594 /* C3.2.4 System
1595  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1596  * +---------------------+---+-----+-----+-------+-------+-----+------+
1597  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1598  * +---------------------+---+-----+-----+-------+-------+-----+------+
1599  */
1600 static void disas_system(DisasContext *s, uint32_t insn)
1601 {
1602     unsigned int l, op0, op1, crn, crm, op2, rt;
1603     l = extract32(insn, 21, 1);
1604     op0 = extract32(insn, 19, 2);
1605     op1 = extract32(insn, 16, 3);
1606     crn = extract32(insn, 12, 4);
1607     crm = extract32(insn, 8, 4);
1608     op2 = extract32(insn, 5, 3);
1609     rt = extract32(insn, 0, 5);
1610
1611     if (op0 == 0) {
1612         if (l || rt != 31) {
1613             unallocated_encoding(s);
1614             return;
1615         }
1616         switch (crn) {
1617         case 2: /* C5.6.68 HINT */
1618             handle_hint(s, insn, op1, op2, crm);
1619             break;
1620         case 3: /* CLREX, DSB, DMB, ISB */
1621             handle_sync(s, insn, op1, op2, crm);
1622             break;
1623         case 4: /* C5.6.130 MSR (immediate) */
1624             handle_msr_i(s, insn, op1, op2, crm);
1625             break;
1626         default:
1627             unallocated_encoding(s);
1628             break;
1629         }
1630         return;
1631     }
1632     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1633 }
1634
1635 /* C3.2.3 Exception generation
1636  *
1637  *  31             24 23 21 20                     5 4   2 1  0
1638  * +-----------------+-----+------------------------+-----+----+
1639  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1640  * +-----------------------+------------------------+----------+
1641  */
1642 static void disas_exc(DisasContext *s, uint32_t insn)
1643 {
1644     int opc = extract32(insn, 21, 3);
1645     int op2_ll = extract32(insn, 0, 5);
1646     int imm16 = extract32(insn, 5, 16);
1647     TCGv_i32 tmp;
1648
1649     switch (opc) {
1650     case 0:
1651         /* For SVC, HVC and SMC we advance the single-step state
1652          * machine before taking the exception. This is architecturally
1653          * mandated, to ensure that single-stepping a system call
1654          * instruction works properly.
1655          */
1656         switch (op2_ll) {
1657         case 1:                                                     /* SVC */
1658             gen_ss_advance(s);
1659             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1660                                default_exception_el(s));
1661             break;
1662         case 2:                                                     /* HVC */
1663             if (s->current_el == 0) {
1664                 unallocated_encoding(s);
1665                 break;
1666             }
1667             /* The pre HVC helper handles cases when HVC gets trapped
1668              * as an undefined insn by runtime configuration.
1669              */
1670             gen_a64_set_pc_im(s->pc - 4);
1671             gen_helper_pre_hvc(cpu_env);
1672             gen_ss_advance(s);
1673             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1674             break;
1675         case 3:                                                     /* SMC */
1676             if (s->current_el == 0) {
1677                 unallocated_encoding(s);
1678                 break;
1679             }
1680             gen_a64_set_pc_im(s->pc - 4);
1681             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1682             gen_helper_pre_smc(cpu_env, tmp);
1683             tcg_temp_free_i32(tmp);
1684             gen_ss_advance(s);
1685             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1686             break;
1687         default:
1688             unallocated_encoding(s);
1689             break;
1690         }
1691         break;
1692     case 1:
1693         if (op2_ll != 0) {
1694             unallocated_encoding(s);
1695             break;
1696         }
1697         /* BRK */
1698         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1699                            default_exception_el(s));
1700         break;
1701     case 2:
1702         if (op2_ll != 0) {
1703             unallocated_encoding(s);
1704             break;
1705         }
1706         /* HLT. This has two purposes.
1707          * Architecturally, it is an external halting debug instruction.
1708          * Since QEMU doesn't implement external debug, we treat this as
1709          * it is required for halting debug disabled: it will UNDEF.
1710          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1711          */
1712         if (semihosting_enabled() && imm16 == 0xf000) {
1713 #ifndef CONFIG_USER_ONLY
1714             /* In system mode, don't allow userspace access to semihosting,
1715              * to provide some semblance of security (and for consistency
1716              * with our 32-bit semihosting).
1717              */
1718             if (s->current_el == 0) {
1719                 unsupported_encoding(s, insn);
1720                 break;
1721             }
1722 #endif
1723             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1724         } else {
1725             unsupported_encoding(s, insn);
1726         }
1727         break;
1728     case 5:
1729         if (op2_ll < 1 || op2_ll > 3) {
1730             unallocated_encoding(s);
1731             break;
1732         }
1733         /* DCPS1, DCPS2, DCPS3 */
1734         unsupported_encoding(s, insn);
1735         break;
1736     default:
1737         unallocated_encoding(s);
1738         break;
1739     }
1740 }
1741
1742 /* C3.2.7 Unconditional branch (register)
1743  *  31           25 24   21 20   16 15   10 9    5 4     0
1744  * +---------------+-------+-------+-------+------+-------+
1745  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1746  * +---------------+-------+-------+-------+------+-------+
1747  */
1748 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1749 {
1750     unsigned int opc, op2, op3, rn, op4;
1751
1752     opc = extract32(insn, 21, 4);
1753     op2 = extract32(insn, 16, 5);
1754     op3 = extract32(insn, 10, 6);
1755     rn = extract32(insn, 5, 5);
1756     op4 = extract32(insn, 0, 5);
1757
1758     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1759         unallocated_encoding(s);
1760         return;
1761     }
1762
1763     switch (opc) {
1764     case 0: /* BR */
1765     case 1: /* BLR */
1766     case 2: /* RET */
1767         gen_a64_set_pc(s, cpu_reg(s, rn));
1768         /* BLR also needs to load return address */
1769         if (opc == 1) {
1770             tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1771         }
1772         break;
1773     case 4: /* ERET */
1774         if (s->current_el == 0) {
1775             unallocated_encoding(s);
1776             return;
1777         }
1778         gen_helper_exception_return(cpu_env);
1779         s->is_jmp = DISAS_JUMP;
1780         return;
1781     case 5: /* DRPS */
1782         if (rn != 0x1f) {
1783             unallocated_encoding(s);
1784         } else {
1785             unsupported_encoding(s, insn);
1786         }
1787         return;
1788     default:
1789         unallocated_encoding(s);
1790         return;
1791     }
1792
1793     s->is_jmp = DISAS_JUMP;
1794 }
1795
1796 /* C3.2 Branches, exception generating and system instructions */
1797 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1798 {
1799     switch (extract32(insn, 25, 7)) {
1800     case 0x0a: case 0x0b:
1801     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1802         disas_uncond_b_imm(s, insn);
1803         break;
1804     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1805         disas_comp_b_imm(s, insn);
1806         break;
1807     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1808         disas_test_b_imm(s, insn);
1809         break;
1810     case 0x2a: /* Conditional branch (immediate) */
1811         disas_cond_b_imm(s, insn);
1812         break;
1813     case 0x6a: /* Exception generation / System */
1814         if (insn & (1 << 24)) {
1815             disas_system(s, insn);
1816         } else {
1817             disas_exc(s, insn);
1818         }
1819         break;
1820     case 0x6b: /* Unconditional branch (register) */
1821         disas_uncond_b_reg(s, insn);
1822         break;
1823     default:
1824         unallocated_encoding(s);
1825         break;
1826     }
1827 }
1828
1829 /*
1830  * Load/Store exclusive instructions are implemented by remembering
1831  * the value/address loaded, and seeing if these are the same
1832  * when the store is performed. This is not actually the architecturally
1833  * mandated semantics, but it works for typical guest code sequences
1834  * and avoids having to monitor regular stores.
1835  *
1836  * The store exclusive uses the atomic cmpxchg primitives to avoid
1837  * races in multi-threaded linux-user and when MTTCG softmmu is
1838  * enabled.
1839  */
1840 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1841                                TCGv_i64 addr, int size, bool is_pair)
1842 {
1843     TCGv_i64 tmp = tcg_temp_new_i64();
1844     TCGMemOp memop = s->be_data + size;
1845
1846     g_assert(size <= 3);
1847     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1848
1849     if (is_pair) {
1850         TCGv_i64 addr2 = tcg_temp_new_i64();
1851         TCGv_i64 hitmp = tcg_temp_new_i64();
1852
1853         g_assert(size >= 2);
1854         tcg_gen_addi_i64(addr2, addr, 1 << size);
1855         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1856         tcg_temp_free_i64(addr2);
1857         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1858         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1859         tcg_temp_free_i64(hitmp);
1860     }
1861
1862     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1863     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1864
1865     tcg_temp_free_i64(tmp);
1866     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1867 }
1868
1869 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1870                                 TCGv_i64 inaddr, int size, int is_pair)
1871 {
1872     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1873      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1874      *     [addr] = {Rt};
1875      *     if (is_pair) {
1876      *         [addr + datasize] = {Rt2};
1877      *     }
1878      *     {Rd} = 0;
1879      * } else {
1880      *     {Rd} = 1;
1881      * }
1882      * env->exclusive_addr = -1;
1883      */
1884     TCGLabel *fail_label = gen_new_label();
1885     TCGLabel *done_label = gen_new_label();
1886     TCGv_i64 addr = tcg_temp_local_new_i64();
1887     TCGv_i64 tmp;
1888
1889     /* Copy input into a local temp so it is not trashed when the
1890      * basic block ends at the branch insn.
1891      */
1892     tcg_gen_mov_i64(addr, inaddr);
1893     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1894
1895     tmp = tcg_temp_new_i64();
1896     if (is_pair) {
1897         if (size == 2) {
1898             TCGv_i64 val = tcg_temp_new_i64();
1899             tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
1900             tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
1901             tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
1902                                        get_mem_index(s),
1903                                        size | MO_ALIGN | s->be_data);
1904             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
1905             tcg_temp_free_i64(val);
1906         } else if (s->be_data == MO_LE) {
1907             gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
1908                                            cpu_reg(s, rt2));
1909         } else {
1910             gen_helper_paired_cmpxchg64_be(tmp, cpu_env, addr, cpu_reg(s, rt),
1911                                            cpu_reg(s, rt2));
1912         }
1913     } else {
1914         TCGv_i64 val = cpu_reg(s, rt);
1915         tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, val,
1916                                    get_mem_index(s),
1917                                    size | MO_ALIGN | s->be_data);
1918         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1919     }
1920
1921     tcg_temp_free_i64(addr);
1922
1923     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
1924     tcg_temp_free_i64(tmp);
1925     tcg_gen_br(done_label);
1926
1927     gen_set_label(fail_label);
1928     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1929     gen_set_label(done_label);
1930     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1931 }
1932
1933 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
1934  * from the ARMv8 specs for LDR (Shared decode for all encodings).
1935  */
1936 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1937 {
1938     int opc0 = extract32(opc, 0, 1);
1939     int regsize;
1940
1941     if (is_signed) {
1942         regsize = opc0 ? 32 : 64;
1943     } else {
1944         regsize = size == 3 ? 64 : 32;
1945     }
1946     return regsize == 64;
1947 }
1948
1949 /* C3.3.6 Load/store exclusive
1950  *
1951  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1952  * +-----+-------------+----+---+----+------+----+-------+------+------+
1953  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1954  * +-----+-------------+----+---+----+------+----+-------+------+------+
1955  *
1956  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1957  *   L: 0 -> store, 1 -> load
1958  *  o2: 0 -> exclusive, 1 -> not
1959  *  o1: 0 -> single register, 1 -> register pair
1960  *  o0: 1 -> load-acquire/store-release, 0 -> not
1961  */
1962 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1963 {
1964     int rt = extract32(insn, 0, 5);
1965     int rn = extract32(insn, 5, 5);
1966     int rt2 = extract32(insn, 10, 5);
1967     int is_lasr = extract32(insn, 15, 1);
1968     int rs = extract32(insn, 16, 5);
1969     int is_pair = extract32(insn, 21, 1);
1970     int is_store = !extract32(insn, 22, 1);
1971     int is_excl = !extract32(insn, 23, 1);
1972     int size = extract32(insn, 30, 2);
1973     TCGv_i64 tcg_addr;
1974
1975     if ((!is_excl && !is_pair && !is_lasr) ||
1976         (!is_excl && is_pair) ||
1977         (is_pair && size < 2)) {
1978         unallocated_encoding(s);
1979         return;
1980     }
1981
1982     if (rn == 31) {
1983         gen_check_sp_alignment(s);
1984     }
1985     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1986
1987     /* Note that since TCG is single threaded load-acquire/store-release
1988      * semantics require no extra if (is_lasr) { ... } handling.
1989      */
1990
1991     if (is_excl) {
1992         if (!is_store) {
1993             s->is_ldex = true;
1994             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1995             if (is_lasr) {
1996                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
1997             }
1998         } else {
1999             if (is_lasr) {
2000                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2001             }
2002             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2003         }
2004     } else {
2005         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2006         bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2007
2008         /* Generate ISS for non-exclusive accesses including LASR.  */
2009         if (is_store) {
2010             if (is_lasr) {
2011                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2012             }
2013             do_gpr_st(s, tcg_rt, tcg_addr, size,
2014                       true, rt, iss_sf, is_lasr);
2015         } else {
2016             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2017                       true, rt, iss_sf, is_lasr);
2018             if (is_lasr) {
2019                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2020             }
2021         }
2022     }
2023 }
2024
2025 /*
2026  * C3.3.5 Load register (literal)
2027  *
2028  *  31 30 29   27  26 25 24 23                5 4     0
2029  * +-----+-------+---+-----+-------------------+-------+
2030  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2031  * +-----+-------+---+-----+-------------------+-------+
2032  *
2033  * V: 1 -> vector (simd/fp)
2034  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2035  *                   10-> 32 bit signed, 11 -> prefetch
2036  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2037  */
2038 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2039 {
2040     int rt = extract32(insn, 0, 5);
2041     int64_t imm = sextract32(insn, 5, 19) << 2;
2042     bool is_vector = extract32(insn, 26, 1);
2043     int opc = extract32(insn, 30, 2);
2044     bool is_signed = false;
2045     int size = 2;
2046     TCGv_i64 tcg_rt, tcg_addr;
2047
2048     if (is_vector) {
2049         if (opc == 3) {
2050             unallocated_encoding(s);
2051             return;
2052         }
2053         size = 2 + opc;
2054         if (!fp_access_check(s)) {
2055             return;
2056         }
2057     } else {
2058         if (opc == 3) {
2059             /* PRFM (literal) : prefetch */
2060             return;
2061         }
2062         size = 2 + extract32(opc, 0, 1);
2063         is_signed = extract32(opc, 1, 1);
2064     }
2065
2066     tcg_rt = cpu_reg(s, rt);
2067
2068     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2069     if (is_vector) {
2070         do_fp_ld(s, rt, tcg_addr, size);
2071     } else {
2072         /* Only unsigned 32bit loads target 32bit registers.  */
2073         bool iss_sf = opc != 0;
2074
2075         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2076                   true, rt, iss_sf, false);
2077     }
2078     tcg_temp_free_i64(tcg_addr);
2079 }
2080
2081 /*
2082  * C5.6.80 LDNP (Load Pair - non-temporal hint)
2083  * C5.6.81 LDP (Load Pair - non vector)
2084  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
2085  * C5.6.176 STNP (Store Pair - non-temporal hint)
2086  * C5.6.177 STP (Store Pair - non vector)
2087  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
2088  * C6.3.165 LDP (Load Pair of SIMD&FP)
2089  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
2090  * C6.3.284 STP (Store Pair of SIMD&FP)
2091  *
2092  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2093  * +-----+-------+---+---+-------+---+-----------------------------+
2094  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2095  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2096  *
2097  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2098  *      LDPSW                    01
2099  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2100  *   V: 0 -> GPR, 1 -> Vector
2101  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2102  *      10 -> signed offset, 11 -> pre-index
2103  *   L: 0 -> Store 1 -> Load
2104  *
2105  * Rt, Rt2 = GPR or SIMD registers to be stored
2106  * Rn = general purpose register containing address
2107  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2108  */
2109 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2110 {
2111     int rt = extract32(insn, 0, 5);
2112     int rn = extract32(insn, 5, 5);
2113     int rt2 = extract32(insn, 10, 5);
2114     uint64_t offset = sextract64(insn, 15, 7);
2115     int index = extract32(insn, 23, 2);
2116     bool is_vector = extract32(insn, 26, 1);
2117     bool is_load = extract32(insn, 22, 1);
2118     int opc = extract32(insn, 30, 2);
2119
2120     bool is_signed = false;
2121     bool postindex = false;
2122     bool wback = false;
2123
2124     TCGv_i64 tcg_addr; /* calculated address */
2125     int size;
2126
2127     if (opc == 3) {
2128         unallocated_encoding(s);
2129         return;
2130     }
2131
2132     if (is_vector) {
2133         size = 2 + opc;
2134     } else {
2135         size = 2 + extract32(opc, 1, 1);
2136         is_signed = extract32(opc, 0, 1);
2137         if (!is_load && is_signed) {
2138             unallocated_encoding(s);
2139             return;
2140         }
2141     }
2142
2143     switch (index) {
2144     case 1: /* post-index */
2145         postindex = true;
2146         wback = true;
2147         break;
2148     case 0:
2149         /* signed offset with "non-temporal" hint. Since we don't emulate
2150          * caches we don't care about hints to the cache system about
2151          * data access patterns, and handle this identically to plain
2152          * signed offset.
2153          */
2154         if (is_signed) {
2155             /* There is no non-temporal-hint version of LDPSW */
2156             unallocated_encoding(s);
2157             return;
2158         }
2159         postindex = false;
2160         break;
2161     case 2: /* signed offset, rn not updated */
2162         postindex = false;
2163         break;
2164     case 3: /* pre-index */
2165         postindex = false;
2166         wback = true;
2167         break;
2168     }
2169
2170     if (is_vector && !fp_access_check(s)) {
2171         return;
2172     }
2173
2174     offset <<= size;
2175
2176     if (rn == 31) {
2177         gen_check_sp_alignment(s);
2178     }
2179
2180     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2181
2182     if (!postindex) {
2183         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2184     }
2185
2186     if (is_vector) {
2187         if (is_load) {
2188             do_fp_ld(s, rt, tcg_addr, size);
2189         } else {
2190             do_fp_st(s, rt, tcg_addr, size);
2191         }
2192     } else {
2193         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2194         if (is_load) {
2195             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2196                       false, 0, false, false);
2197         } else {
2198             do_gpr_st(s, tcg_rt, tcg_addr, size,
2199                       false, 0, false, false);
2200         }
2201     }
2202     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2203     if (is_vector) {
2204         if (is_load) {
2205             do_fp_ld(s, rt2, tcg_addr, size);
2206         } else {
2207             do_fp_st(s, rt2, tcg_addr, size);
2208         }
2209     } else {
2210         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2211         if (is_load) {
2212             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2213                       false, 0, false, false);
2214         } else {
2215             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2216                       false, 0, false, false);
2217         }
2218     }
2219
2220     if (wback) {
2221         if (postindex) {
2222             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2223         } else {
2224             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2225         }
2226         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2227     }
2228 }
2229
2230 /*
2231  * C3.3.8 Load/store (immediate post-indexed)
2232  * C3.3.9 Load/store (immediate pre-indexed)
2233  * C3.3.12 Load/store (unscaled immediate)
2234  *
2235  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2236  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2237  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2238  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2239  *
2240  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2241          10 -> unprivileged
2242  * V = 0 -> non-vector
2243  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2244  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2245  */
2246 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2247                                 int opc,
2248                                 int size,
2249                                 int rt,
2250                                 bool is_vector)
2251 {
2252     int rn = extract32(insn, 5, 5);
2253     int imm9 = sextract32(insn, 12, 9);
2254     int idx = extract32(insn, 10, 2);
2255     bool is_signed = false;
2256     bool is_store = false;
2257     bool is_extended = false;
2258     bool is_unpriv = (idx == 2);
2259     bool iss_valid = !is_vector;
2260     bool post_index;
2261     bool writeback;
2262
2263     TCGv_i64 tcg_addr;
2264
2265     if (is_vector) {
2266         size |= (opc & 2) << 1;
2267         if (size > 4 || is_unpriv) {
2268             unallocated_encoding(s);
2269             return;
2270         }
2271         is_store = ((opc & 1) == 0);
2272         if (!fp_access_check(s)) {
2273             return;
2274         }
2275     } else {
2276         if (size == 3 && opc == 2) {
2277             /* PRFM - prefetch */
2278             if (is_unpriv) {
2279                 unallocated_encoding(s);
2280                 return;
2281             }
2282             return;
2283         }
2284         if (opc == 3 && size > 1) {
2285             unallocated_encoding(s);
2286             return;
2287         }
2288         is_store = (opc == 0);
2289         is_signed = extract32(opc, 1, 1);
2290         is_extended = (size < 3) && extract32(opc, 0, 1);
2291     }
2292
2293     switch (idx) {
2294     case 0:
2295     case 2:
2296         post_index = false;
2297         writeback = false;
2298         break;
2299     case 1:
2300         post_index = true;
2301         writeback = true;
2302         break;
2303     case 3:
2304         post_index = false;
2305         writeback = true;
2306         break;
2307     }
2308
2309     if (rn == 31) {
2310         gen_check_sp_alignment(s);
2311     }
2312     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2313
2314     if (!post_index) {
2315         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2316     }
2317
2318     if (is_vector) {
2319         if (is_store) {
2320             do_fp_st(s, rt, tcg_addr, size);
2321         } else {
2322             do_fp_ld(s, rt, tcg_addr, size);
2323         }
2324     } else {
2325         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2326         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2327         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2328
2329         if (is_store) {
2330             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2331                              iss_valid, rt, iss_sf, false);
2332         } else {
2333             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2334                              is_signed, is_extended, memidx,
2335                              iss_valid, rt, iss_sf, false);
2336         }
2337     }
2338
2339     if (writeback) {
2340         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2341         if (post_index) {
2342             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2343         }
2344         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2345     }
2346 }
2347
2348 /*
2349  * C3.3.10 Load/store (register offset)
2350  *
2351  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2352  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2353  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2354  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2355  *
2356  * For non-vector:
2357  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2358  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2359  * For vector:
2360  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2361  *   opc<0>: 0 -> store, 1 -> load
2362  * V: 1 -> vector/simd
2363  * opt: extend encoding (see DecodeRegExtend)
2364  * S: if S=1 then scale (essentially index by sizeof(size))
2365  * Rt: register to transfer into/out of
2366  * Rn: address register or SP for base
2367  * Rm: offset register or ZR for offset
2368  */
2369 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2370                                    int opc,
2371                                    int size,
2372                                    int rt,
2373                                    bool is_vector)
2374 {
2375     int rn = extract32(insn, 5, 5);
2376     int shift = extract32(insn, 12, 1);
2377     int rm = extract32(insn, 16, 5);
2378     int opt = extract32(insn, 13, 3);
2379     bool is_signed = false;
2380     bool is_store = false;
2381     bool is_extended = false;
2382
2383     TCGv_i64 tcg_rm;
2384     TCGv_i64 tcg_addr;
2385
2386     if (extract32(opt, 1, 1) == 0) {
2387         unallocated_encoding(s);
2388         return;
2389     }
2390
2391     if (is_vector) {
2392         size |= (opc & 2) << 1;
2393         if (size > 4) {
2394             unallocated_encoding(s);
2395             return;
2396         }
2397         is_store = !extract32(opc, 0, 1);
2398         if (!fp_access_check(s)) {
2399             return;
2400         }
2401     } else {
2402         if (size == 3 && opc == 2) {
2403             /* PRFM - prefetch */
2404             return;
2405         }
2406         if (opc == 3 && size > 1) {
2407             unallocated_encoding(s);
2408             return;
2409         }
2410         is_store = (opc == 0);
2411         is_signed = extract32(opc, 1, 1);
2412         is_extended = (size < 3) && extract32(opc, 0, 1);
2413     }
2414
2415     if (rn == 31) {
2416         gen_check_sp_alignment(s);
2417     }
2418     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2419
2420     tcg_rm = read_cpu_reg(s, rm, 1);
2421     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2422
2423     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2424
2425     if (is_vector) {
2426         if (is_store) {
2427             do_fp_st(s, rt, tcg_addr, size);
2428         } else {
2429             do_fp_ld(s, rt, tcg_addr, size);
2430         }
2431     } else {
2432         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2433         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2434         if (is_store) {
2435             do_gpr_st(s, tcg_rt, tcg_addr, size,
2436                       true, rt, iss_sf, false);
2437         } else {
2438             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2439                       is_signed, is_extended,
2440                       true, rt, iss_sf, false);
2441         }
2442     }
2443 }
2444
2445 /*
2446  * C3.3.13 Load/store (unsigned immediate)
2447  *
2448  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2449  * +----+-------+---+-----+-----+------------+-------+------+
2450  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2451  * +----+-------+---+-----+-----+------------+-------+------+
2452  *
2453  * For non-vector:
2454  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2455  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2456  * For vector:
2457  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2458  *   opc<0>: 0 -> store, 1 -> load
2459  * Rn: base address register (inc SP)
2460  * Rt: target register
2461  */
2462 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2463                                         int opc,
2464                                         int size,
2465                                         int rt,
2466                                         bool is_vector)
2467 {
2468     int rn = extract32(insn, 5, 5);
2469     unsigned int imm12 = extract32(insn, 10, 12);
2470     unsigned int offset;
2471
2472     TCGv_i64 tcg_addr;
2473
2474     bool is_store;
2475     bool is_signed = false;
2476     bool is_extended = false;
2477
2478     if (is_vector) {
2479         size |= (opc & 2) << 1;
2480         if (size > 4) {
2481             unallocated_encoding(s);
2482             return;
2483         }
2484         is_store = !extract32(opc, 0, 1);
2485         if (!fp_access_check(s)) {
2486             return;
2487         }
2488     } else {
2489         if (size == 3 && opc == 2) {
2490             /* PRFM - prefetch */
2491             return;
2492         }
2493         if (opc == 3 && size > 1) {
2494             unallocated_encoding(s);
2495             return;
2496         }
2497         is_store = (opc == 0);
2498         is_signed = extract32(opc, 1, 1);
2499         is_extended = (size < 3) && extract32(opc, 0, 1);
2500     }
2501
2502     if (rn == 31) {
2503         gen_check_sp_alignment(s);
2504     }
2505     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2506     offset = imm12 << size;
2507     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2508
2509     if (is_vector) {
2510         if (is_store) {
2511             do_fp_st(s, rt, tcg_addr, size);
2512         } else {
2513             do_fp_ld(s, rt, tcg_addr, size);
2514         }
2515     } else {
2516         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2517         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2518         if (is_store) {
2519             do_gpr_st(s, tcg_rt, tcg_addr, size,
2520                       true, rt, iss_sf, false);
2521         } else {
2522             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2523                       true, rt, iss_sf, false);
2524         }
2525     }
2526 }
2527
2528 /* Load/store register (all forms) */
2529 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2530 {
2531     int rt = extract32(insn, 0, 5);
2532     int opc = extract32(insn, 22, 2);
2533     bool is_vector = extract32(insn, 26, 1);
2534     int size = extract32(insn, 30, 2);
2535
2536     switch (extract32(insn, 24, 2)) {
2537     case 0:
2538         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2539             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2540         } else {
2541             /* Load/store register (unscaled immediate)
2542              * Load/store immediate pre/post-indexed
2543              * Load/store register unprivileged
2544              */
2545             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2546         }
2547         break;
2548     case 1:
2549         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2550         break;
2551     default:
2552         unallocated_encoding(s);
2553         break;
2554     }
2555 }
2556
2557 /* C3.3.1 AdvSIMD load/store multiple structures
2558  *
2559  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2560  * +---+---+---------------+---+-------------+--------+------+------+------+
2561  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2562  * +---+---+---------------+---+-------------+--------+------+------+------+
2563  *
2564  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2565  *
2566  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2567  * +---+---+---------------+---+---+---------+--------+------+------+------+
2568  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2569  * +---+---+---------------+---+---+---------+--------+------+------+------+
2570  *
2571  * Rt: first (or only) SIMD&FP register to be transferred
2572  * Rn: base address or SP
2573  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2574  */
2575 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2576 {
2577     int rt = extract32(insn, 0, 5);
2578     int rn = extract32(insn, 5, 5);
2579     int size = extract32(insn, 10, 2);
2580     int opcode = extract32(insn, 12, 4);
2581     bool is_store = !extract32(insn, 22, 1);
2582     bool is_postidx = extract32(insn, 23, 1);
2583     bool is_q = extract32(insn, 30, 1);
2584     TCGv_i64 tcg_addr, tcg_rn;
2585
2586     int ebytes = 1 << size;
2587     int elements = (is_q ? 128 : 64) / (8 << size);
2588     int rpt;    /* num iterations */
2589     int selem;  /* structure elements */
2590     int r;
2591
2592     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2593         unallocated_encoding(s);
2594         return;
2595     }
2596
2597     /* From the shared decode logic */
2598     switch (opcode) {
2599     case 0x0:
2600         rpt = 1;
2601         selem = 4;
2602         break;
2603     case 0x2:
2604         rpt = 4;
2605         selem = 1;
2606         break;
2607     case 0x4:
2608         rpt = 1;
2609         selem = 3;
2610         break;
2611     case 0x6:
2612         rpt = 3;
2613         selem = 1;
2614         break;
2615     case 0x7:
2616         rpt = 1;
2617         selem = 1;
2618         break;
2619     case 0x8:
2620         rpt = 1;
2621         selem = 2;
2622         break;
2623     case 0xa:
2624         rpt = 2;
2625         selem = 1;
2626         break;
2627     default:
2628         unallocated_encoding(s);
2629         return;
2630     }
2631
2632     if (size == 3 && !is_q && selem != 1) {
2633         /* reserved */
2634         unallocated_encoding(s);
2635         return;
2636     }
2637
2638     if (!fp_access_check(s)) {
2639         return;
2640     }
2641
2642     if (rn == 31) {
2643         gen_check_sp_alignment(s);
2644     }
2645
2646     tcg_rn = cpu_reg_sp(s, rn);
2647     tcg_addr = tcg_temp_new_i64();
2648     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2649
2650     for (r = 0; r < rpt; r++) {
2651         int e;
2652         for (e = 0; e < elements; e++) {
2653             int tt = (rt + r) % 32;
2654             int xs;
2655             for (xs = 0; xs < selem; xs++) {
2656                 if (is_store) {
2657                     do_vec_st(s, tt, e, tcg_addr, size);
2658                 } else {
2659                     do_vec_ld(s, tt, e, tcg_addr, size);
2660
2661                     /* For non-quad operations, setting a slice of the low
2662                      * 64 bits of the register clears the high 64 bits (in
2663                      * the ARM ARM pseudocode this is implicit in the fact
2664                      * that 'rval' is a 64 bit wide variable). We optimize
2665                      * by noticing that we only need to do this the first
2666                      * time we touch a register.
2667                      */
2668                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2669                         clear_vec_high(s, tt);
2670                     }
2671                 }
2672                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2673                 tt = (tt + 1) % 32;
2674             }
2675         }
2676     }
2677
2678     if (is_postidx) {
2679         int rm = extract32(insn, 16, 5);
2680         if (rm == 31) {
2681             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2682         } else {
2683             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2684         }
2685     }
2686     tcg_temp_free_i64(tcg_addr);
2687 }
2688
2689 /* C3.3.3 AdvSIMD load/store single structure
2690  *
2691  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2692  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2693  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2694  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2695  *
2696  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2697  *
2698  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2699  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2700  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2701  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2702  *
2703  * Rt: first (or only) SIMD&FP register to be transferred
2704  * Rn: base address or SP
2705  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2706  * index = encoded in Q:S:size dependent on size
2707  *
2708  * lane_size = encoded in R, opc
2709  * transfer width = encoded in opc, S, size
2710  */
2711 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2712 {
2713     int rt = extract32(insn, 0, 5);
2714     int rn = extract32(insn, 5, 5);
2715     int size = extract32(insn, 10, 2);
2716     int S = extract32(insn, 12, 1);
2717     int opc = extract32(insn, 13, 3);
2718     int R = extract32(insn, 21, 1);
2719     int is_load = extract32(insn, 22, 1);
2720     int is_postidx = extract32(insn, 23, 1);
2721     int is_q = extract32(insn, 30, 1);
2722
2723     int scale = extract32(opc, 1, 2);
2724     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2725     bool replicate = false;
2726     int index = is_q << 3 | S << 2 | size;
2727     int ebytes, xs;
2728     TCGv_i64 tcg_addr, tcg_rn;
2729
2730     switch (scale) {
2731     case 3:
2732         if (!is_load || S) {
2733             unallocated_encoding(s);
2734             return;
2735         }
2736         scale = size;
2737         replicate = true;
2738         break;
2739     case 0:
2740         break;
2741     case 1:
2742         if (extract32(size, 0, 1)) {
2743             unallocated_encoding(s);
2744             return;
2745         }
2746         index >>= 1;
2747         break;
2748     case 2:
2749         if (extract32(size, 1, 1)) {
2750             unallocated_encoding(s);
2751             return;
2752         }
2753         if (!extract32(size, 0, 1)) {
2754             index >>= 2;
2755         } else {
2756             if (S) {
2757                 unallocated_encoding(s);
2758                 return;
2759             }
2760             index >>= 3;
2761             scale = 3;
2762         }
2763         break;
2764     default:
2765         g_assert_not_reached();
2766     }
2767
2768     if (!fp_access_check(s)) {
2769         return;
2770     }
2771
2772     ebytes = 1 << scale;
2773
2774     if (rn == 31) {
2775         gen_check_sp_alignment(s);
2776     }
2777
2778     tcg_rn = cpu_reg_sp(s, rn);
2779     tcg_addr = tcg_temp_new_i64();
2780     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2781
2782     for (xs = 0; xs < selem; xs++) {
2783         if (replicate) {
2784             /* Load and replicate to all elements */
2785             uint64_t mulconst;
2786             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2787
2788             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2789                                 get_mem_index(s), s->be_data + scale);
2790             switch (scale) {
2791             case 0:
2792                 mulconst = 0x0101010101010101ULL;
2793                 break;
2794             case 1:
2795                 mulconst = 0x0001000100010001ULL;
2796                 break;
2797             case 2:
2798                 mulconst = 0x0000000100000001ULL;
2799                 break;
2800             case 3:
2801                 mulconst = 0;
2802                 break;
2803             default:
2804                 g_assert_not_reached();
2805             }
2806             if (mulconst) {
2807                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2808             }
2809             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2810             if (is_q) {
2811                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2812             } else {
2813                 clear_vec_high(s, rt);
2814             }
2815             tcg_temp_free_i64(tcg_tmp);
2816         } else {
2817             /* Load/store one element per register */
2818             if (is_load) {
2819                 do_vec_ld(s, rt, index, tcg_addr, scale);
2820             } else {
2821                 do_vec_st(s, rt, index, tcg_addr, scale);
2822             }
2823         }
2824         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2825         rt = (rt + 1) % 32;
2826     }
2827
2828     if (is_postidx) {
2829         int rm = extract32(insn, 16, 5);
2830         if (rm == 31) {
2831             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2832         } else {
2833             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2834         }
2835     }
2836     tcg_temp_free_i64(tcg_addr);
2837 }
2838
2839 /* C3.3 Loads and stores */
2840 static void disas_ldst(DisasContext *s, uint32_t insn)
2841 {
2842     switch (extract32(insn, 24, 6)) {
2843     case 0x08: /* Load/store exclusive */
2844         disas_ldst_excl(s, insn);
2845         break;
2846     case 0x18: case 0x1c: /* Load register (literal) */
2847         disas_ld_lit(s, insn);
2848         break;
2849     case 0x28: case 0x29:
2850     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2851         disas_ldst_pair(s, insn);
2852         break;
2853     case 0x38: case 0x39:
2854     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2855         disas_ldst_reg(s, insn);
2856         break;
2857     case 0x0c: /* AdvSIMD load/store multiple structures */
2858         disas_ldst_multiple_struct(s, insn);
2859         break;
2860     case 0x0d: /* AdvSIMD load/store single structure */
2861         disas_ldst_single_struct(s, insn);
2862         break;
2863     default:
2864         unallocated_encoding(s);
2865         break;
2866     }
2867 }
2868
2869 /* C3.4.6 PC-rel. addressing
2870  *   31  30   29 28       24 23                5 4    0
2871  * +----+-------+-----------+-------------------+------+
2872  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2873  * +----+-------+-----------+-------------------+------+
2874  */
2875 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2876 {
2877     unsigned int page, rd;
2878     uint64_t base;
2879     uint64_t offset;
2880
2881     page = extract32(insn, 31, 1);
2882     /* SignExtend(immhi:immlo) -> offset */
2883     offset = sextract64(insn, 5, 19);
2884     offset = offset << 2 | extract32(insn, 29, 2);
2885     rd = extract32(insn, 0, 5);
2886     base = s->pc - 4;
2887
2888     if (page) {
2889         /* ADRP (page based) */
2890         base &= ~0xfff;
2891         offset <<= 12;
2892     }
2893
2894     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2895 }
2896
2897 /*
2898  * C3.4.1 Add/subtract (immediate)
2899  *
2900  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2901  * +--+--+--+-----------+-----+-------------+-----+-----+
2902  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2903  * +--+--+--+-----------+-----+-------------+-----+-----+
2904  *
2905  *    sf: 0 -> 32bit, 1 -> 64bit
2906  *    op: 0 -> add  , 1 -> sub
2907  *     S: 1 -> set flags
2908  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2909  */
2910 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2911 {
2912     int rd = extract32(insn, 0, 5);
2913     int rn = extract32(insn, 5, 5);
2914     uint64_t imm = extract32(insn, 10, 12);
2915     int shift = extract32(insn, 22, 2);
2916     bool setflags = extract32(insn, 29, 1);
2917     bool sub_op = extract32(insn, 30, 1);
2918     bool is_64bit = extract32(insn, 31, 1);
2919
2920     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2921     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2922     TCGv_i64 tcg_result;
2923
2924     switch (shift) {
2925     case 0x0:
2926         break;
2927     case 0x1:
2928         imm <<= 12;
2929         break;
2930     default:
2931         unallocated_encoding(s);
2932         return;
2933     }
2934
2935     tcg_result = tcg_temp_new_i64();
2936     if (!setflags) {
2937         if (sub_op) {
2938             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2939         } else {
2940             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2941         }
2942     } else {
2943         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2944         if (sub_op) {
2945             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2946         } else {
2947             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2948         }
2949         tcg_temp_free_i64(tcg_imm);
2950     }
2951
2952     if (is_64bit) {
2953         tcg_gen_mov_i64(tcg_rd, tcg_result);
2954     } else {
2955         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2956     }
2957
2958     tcg_temp_free_i64(tcg_result);
2959 }
2960
2961 /* The input should be a value in the bottom e bits (with higher
2962  * bits zero); returns that value replicated into every element
2963  * of size e in a 64 bit integer.
2964  */
2965 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2966 {
2967     assert(e != 0);
2968     while (e < 64) {
2969         mask |= mask << e;
2970         e *= 2;
2971     }
2972     return mask;
2973 }
2974
2975 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2976 static inline uint64_t bitmask64(unsigned int length)
2977 {
2978     assert(length > 0 && length <= 64);
2979     return ~0ULL >> (64 - length);
2980 }
2981
2982 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2983  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2984  * value (ie should cause a guest UNDEF exception), and true if they are
2985  * valid, in which case the decoded bit pattern is written to result.
2986  */
2987 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2988                                    unsigned int imms, unsigned int immr)
2989 {
2990     uint64_t mask;
2991     unsigned e, levels, s, r;
2992     int len;
2993
2994     assert(immn < 2 && imms < 64 && immr < 64);
2995
2996     /* The bit patterns we create here are 64 bit patterns which
2997      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2998      * 64 bits each. Each element contains the same value: a run
2999      * of between 1 and e-1 non-zero bits, rotated within the
3000      * element by between 0 and e-1 bits.
3001      *
3002      * The element size and run length are encoded into immn (1 bit)
3003      * and imms (6 bits) as follows:
3004      * 64 bit elements: immn = 1, imms = <length of run - 1>
3005      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3006      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3007      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3008      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3009      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3010      * Notice that immn = 0, imms = 11111x is the only combination
3011      * not covered by one of the above options; this is reserved.
3012      * Further, <length of run - 1> all-ones is a reserved pattern.
3013      *
3014      * In all cases the rotation is by immr % e (and immr is 6 bits).
3015      */
3016
3017     /* First determine the element size */
3018     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3019     if (len < 1) {
3020         /* This is the immn == 0, imms == 0x11111x case */
3021         return false;
3022     }
3023     e = 1 << len;
3024
3025     levels = e - 1;
3026     s = imms & levels;
3027     r = immr & levels;
3028
3029     if (s == levels) {
3030         /* <length of run - 1> mustn't be all-ones. */
3031         return false;
3032     }
3033
3034     /* Create the value of one element: s+1 set bits rotated
3035      * by r within the element (which is e bits wide)...
3036      */
3037     mask = bitmask64(s + 1);
3038     if (r) {
3039         mask = (mask >> r) | (mask << (e - r));
3040         mask &= bitmask64(e);
3041     }
3042     /* ...then replicate the element over the whole 64 bit value */
3043     mask = bitfield_replicate(mask, e);
3044     *result = mask;
3045     return true;
3046 }
3047
3048 /* C3.4.4 Logical (immediate)
3049  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3050  * +----+-----+-------------+---+------+------+------+------+
3051  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3052  * +----+-----+-------------+---+------+------+------+------+
3053  */
3054 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3055 {
3056     unsigned int sf, opc, is_n, immr, imms, rn, rd;
3057     TCGv_i64 tcg_rd, tcg_rn;
3058     uint64_t wmask;
3059     bool is_and = false;
3060
3061     sf = extract32(insn, 31, 1);
3062     opc = extract32(insn, 29, 2);
3063     is_n = extract32(insn, 22, 1);
3064     immr = extract32(insn, 16, 6);
3065     imms = extract32(insn, 10, 6);
3066     rn = extract32(insn, 5, 5);
3067     rd = extract32(insn, 0, 5);
3068
3069     if (!sf && is_n) {
3070         unallocated_encoding(s);
3071         return;
3072     }
3073
3074     if (opc == 0x3) { /* ANDS */
3075         tcg_rd = cpu_reg(s, rd);
3076     } else {
3077         tcg_rd = cpu_reg_sp(s, rd);
3078     }
3079     tcg_rn = cpu_reg(s, rn);
3080
3081     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3082         /* some immediate field values are reserved */
3083         unallocated_encoding(s);
3084         return;
3085     }
3086
3087     if (!sf) {
3088         wmask &= 0xffffffff;
3089     }
3090
3091     switch (opc) {
3092     case 0x3: /* ANDS */
3093     case 0x0: /* AND */
3094         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3095         is_and = true;
3096         break;
3097     case 0x1: /* ORR */
3098         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3099         break;
3100     case 0x2: /* EOR */
3101         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3102         break;
3103     default:
3104         assert(FALSE); /* must handle all above */
3105         break;
3106     }
3107
3108     if (!sf && !is_and) {
3109         /* zero extend final result; we know we can skip this for AND
3110          * since the immediate had the high 32 bits clear.
3111          */
3112         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3113     }
3114
3115     if (opc == 3) { /* ANDS */
3116         gen_logic_CC(sf, tcg_rd);
3117     }
3118 }
3119
3120 /*
3121  * C3.4.5 Move wide (immediate)
3122  *
3123  *  31 30 29 28         23 22 21 20             5 4    0
3124  * +--+-----+-------------+-----+----------------+------+
3125  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3126  * +--+-----+-------------+-----+----------------+------+
3127  *
3128  * sf: 0 -> 32 bit, 1 -> 64 bit
3129  * opc: 00 -> N, 10 -> Z, 11 -> K
3130  * hw: shift/16 (0,16, and sf only 32, 48)
3131  */
3132 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3133 {
3134     int rd = extract32(insn, 0, 5);
3135     uint64_t imm = extract32(insn, 5, 16);
3136     int sf = extract32(insn, 31, 1);
3137     int opc = extract32(insn, 29, 2);
3138     int pos = extract32(insn, 21, 2) << 4;
3139     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3140     TCGv_i64 tcg_imm;
3141
3142     if (!sf && (pos >= 32)) {
3143         unallocated_encoding(s);
3144         return;
3145     }
3146
3147     switch (opc) {
3148     case 0: /* MOVN */
3149     case 2: /* MOVZ */
3150         imm <<= pos;
3151         if (opc == 0) {
3152             imm = ~imm;
3153         }
3154         if (!sf) {
3155             imm &= 0xffffffffu;
3156         }
3157         tcg_gen_movi_i64(tcg_rd, imm);
3158         break;
3159     case 3: /* MOVK */
3160         tcg_imm = tcg_const_i64(imm);
3161         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3162         tcg_temp_free_i64(tcg_imm);
3163         if (!sf) {
3164             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3165         }
3166         break;
3167     default:
3168         unallocated_encoding(s);
3169         break;
3170     }
3171 }
3172
3173 /* C3.4.2 Bitfield
3174  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3175  * +----+-----+-------------+---+------+------+------+------+
3176  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3177  * +----+-----+-------------+---+------+------+------+------+
3178  */
3179 static void disas_bitfield(DisasContext *s, uint32_t insn)
3180 {
3181     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3182     TCGv_i64 tcg_rd, tcg_tmp;
3183
3184     sf = extract32(insn, 31, 1);
3185     opc = extract32(insn, 29, 2);
3186     n = extract32(insn, 22, 1);
3187     ri = extract32(insn, 16, 6);
3188     si = extract32(insn, 10, 6);
3189     rn = extract32(insn, 5, 5);
3190     rd = extract32(insn, 0, 5);
3191     bitsize = sf ? 64 : 32;
3192
3193     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3194         unallocated_encoding(s);
3195         return;
3196     }
3197
3198     tcg_rd = cpu_reg(s, rd);
3199
3200     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3201        to be smaller than bitsize, we'll never reference data outside the
3202        low 32-bits anyway.  */
3203     tcg_tmp = read_cpu_reg(s, rn, 1);
3204
3205     /* Recognize simple(r) extractions.  */
3206     if (si >= ri) {
3207         /* Wd<s-r:0> = Wn<s:r> */
3208         len = (si - ri) + 1;
3209         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3210             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3211             goto done;
3212         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3213             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3214             return;
3215         }
3216         /* opc == 1, BXFIL fall through to deposit */
3217         tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3218         pos = 0;
3219     } else {
3220         /* Handle the ri > si case with a deposit
3221          * Wd<32+s-r,32-r> = Wn<s:0>
3222          */
3223         len = si + 1;
3224         pos = (bitsize - ri) & (bitsize - 1);
3225     }
3226
3227     if (opc == 0 && len < ri) {
3228         /* SBFM: sign extend the destination field from len to fill
3229            the balance of the word.  Let the deposit below insert all
3230            of those sign bits.  */
3231         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3232         len = ri;
3233     }
3234
3235     if (opc == 1) { /* BFM, BXFIL */
3236         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3237     } else {
3238         /* SBFM or UBFM: We start with zero, and we haven't modified
3239            any bits outside bitsize, therefore the zero-extension
3240            below is unneeded.  */
3241         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3242         return;
3243     }
3244
3245  done:
3246     if (!sf) { /* zero extend final result */
3247         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3248     }
3249 }
3250
3251 /* C3.4.3 Extract
3252  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3253  * +----+------+-------------+---+----+------+--------+------+------+
3254  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3255  * +----+------+-------------+---+----+------+--------+------+------+
3256  */
3257 static void disas_extract(DisasContext *s, uint32_t insn)
3258 {
3259     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3260
3261     sf = extract32(insn, 31, 1);
3262     n = extract32(insn, 22, 1);
3263     rm = extract32(insn, 16, 5);
3264     imm = extract32(insn, 10, 6);
3265     rn = extract32(insn, 5, 5);
3266     rd = extract32(insn, 0, 5);
3267     op21 = extract32(insn, 29, 2);
3268     op0 = extract32(insn, 21, 1);
3269     bitsize = sf ? 64 : 32;
3270
3271     if (sf != n || op21 || op0 || imm >= bitsize) {
3272         unallocated_encoding(s);
3273     } else {
3274         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3275
3276         tcg_rd = cpu_reg(s, rd);
3277
3278         if (unlikely(imm == 0)) {
3279             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3280              * so an extract from bit 0 is a special case.
3281              */
3282             if (sf) {
3283                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3284             } else {
3285                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3286             }
3287         } else if (rm == rn) { /* ROR */
3288             tcg_rm = cpu_reg(s, rm);
3289             if (sf) {
3290                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3291             } else {
3292                 TCGv_i32 tmp = tcg_temp_new_i32();
3293                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3294                 tcg_gen_rotri_i32(tmp, tmp, imm);
3295                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3296                 tcg_temp_free_i32(tmp);
3297             }
3298         } else {
3299             tcg_rm = read_cpu_reg(s, rm, sf);
3300             tcg_rn = read_cpu_reg(s, rn, sf);
3301             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3302             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3303             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3304             if (!sf) {
3305                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3306             }
3307         }
3308     }
3309 }
3310
3311 /* C3.4 Data processing - immediate */
3312 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3313 {
3314     switch (extract32(insn, 23, 6)) {
3315     case 0x20: case 0x21: /* PC-rel. addressing */
3316         disas_pc_rel_adr(s, insn);
3317         break;
3318     case 0x22: case 0x23: /* Add/subtract (immediate) */
3319         disas_add_sub_imm(s, insn);
3320         break;
3321     case 0x24: /* Logical (immediate) */
3322         disas_logic_imm(s, insn);
3323         break;
3324     case 0x25: /* Move wide (immediate) */
3325         disas_movw_imm(s, insn);
3326         break;
3327     case 0x26: /* Bitfield */
3328         disas_bitfield(s, insn);
3329         break;
3330     case 0x27: /* Extract */
3331         disas_extract(s, insn);
3332         break;
3333     default:
3334         unallocated_encoding(s);
3335         break;
3336     }
3337 }
3338
3339 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3340  * Note that it is the caller's responsibility to ensure that the
3341  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3342  * mandated semantics for out of range shifts.
3343  */
3344 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3345                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3346 {
3347     switch (shift_type) {
3348     case A64_SHIFT_TYPE_LSL:
3349         tcg_gen_shl_i64(dst, src, shift_amount);
3350         break;
3351     case A64_SHIFT_TYPE_LSR:
3352         tcg_gen_shr_i64(dst, src, shift_amount);
3353         break;
3354     case A64_SHIFT_TYPE_ASR:
3355         if (!sf) {
3356             tcg_gen_ext32s_i64(dst, src);
3357         }
3358         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3359         break;
3360     case A64_SHIFT_TYPE_ROR:
3361         if (sf) {
3362             tcg_gen_rotr_i64(dst, src, shift_amount);
3363         } else {
3364             TCGv_i32 t0, t1;
3365             t0 = tcg_temp_new_i32();
3366             t1 = tcg_temp_new_i32();
3367             tcg_gen_extrl_i64_i32(t0, src);
3368             tcg_gen_extrl_i64_i32(t1, shift_amount);
3369             tcg_gen_rotr_i32(t0, t0, t1);
3370             tcg_gen_extu_i32_i64(dst, t0);
3371             tcg_temp_free_i32(t0);
3372             tcg_temp_free_i32(t1);
3373         }
3374         break;
3375     default:
3376         assert(FALSE); /* all shift types should be handled */
3377         break;
3378     }
3379
3380     if (!sf) { /* zero extend final result */
3381         tcg_gen_ext32u_i64(dst, dst);
3382     }
3383 }
3384
3385 /* Shift a TCGv src by immediate, put result in dst.
3386  * The shift amount must be in range (this should always be true as the
3387  * relevant instructions will UNDEF on bad shift immediates).
3388  */
3389 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3390                           enum a64_shift_type shift_type, unsigned int shift_i)
3391 {
3392     assert(shift_i < (sf ? 64 : 32));
3393
3394     if (shift_i == 0) {
3395         tcg_gen_mov_i64(dst, src);
3396     } else {
3397         TCGv_i64 shift_const;
3398
3399         shift_const = tcg_const_i64(shift_i);
3400         shift_reg(dst, src, sf, shift_type, shift_const);
3401         tcg_temp_free_i64(shift_const);
3402     }
3403 }
3404
3405 /* C3.5.10 Logical (shifted register)
3406  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3407  * +----+-----+-----------+-------+---+------+--------+------+------+
3408  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3409  * +----+-----+-----------+-------+---+------+--------+------+------+
3410  */
3411 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3412 {
3413     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3414     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3415
3416     sf = extract32(insn, 31, 1);
3417     opc = extract32(insn, 29, 2);
3418     shift_type = extract32(insn, 22, 2);
3419     invert = extract32(insn, 21, 1);
3420     rm = extract32(insn, 16, 5);
3421     shift_amount = extract32(insn, 10, 6);
3422     rn = extract32(insn, 5, 5);
3423     rd = extract32(insn, 0, 5);
3424
3425     if (!sf && (shift_amount & (1 << 5))) {
3426         unallocated_encoding(s);
3427         return;
3428     }
3429
3430     tcg_rd = cpu_reg(s, rd);
3431
3432     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3433         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3434          * register-register MOV and MVN, so it is worth special casing.
3435          */
3436         tcg_rm = cpu_reg(s, rm);
3437         if (invert) {
3438             tcg_gen_not_i64(tcg_rd, tcg_rm);
3439             if (!sf) {
3440                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3441             }
3442         } else {
3443             if (sf) {
3444                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3445             } else {
3446                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3447             }
3448         }
3449         return;
3450     }
3451
3452     tcg_rm = read_cpu_reg(s, rm, sf);
3453
3454     if (shift_amount) {
3455         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3456     }
3457
3458     tcg_rn = cpu_reg(s, rn);
3459
3460     switch (opc | (invert << 2)) {
3461     case 0: /* AND */
3462     case 3: /* ANDS */
3463         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3464         break;
3465     case 1: /* ORR */
3466         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3467         break;
3468     case 2: /* EOR */
3469         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3470         break;
3471     case 4: /* BIC */
3472     case 7: /* BICS */
3473         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3474         break;
3475     case 5: /* ORN */
3476         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3477         break;
3478     case 6: /* EON */
3479         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3480         break;
3481     default:
3482         assert(FALSE);
3483         break;
3484     }
3485
3486     if (!sf) {
3487         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3488     }
3489
3490     if (opc == 3) {
3491         gen_logic_CC(sf, tcg_rd);
3492     }
3493 }
3494
3495 /*
3496  * C3.5.1 Add/subtract (extended register)
3497  *
3498  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3499  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3500  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3501  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3502  *
3503  *  sf: 0 -> 32bit, 1 -> 64bit
3504  *  op: 0 -> add  , 1 -> sub
3505  *   S: 1 -> set flags
3506  * opt: 00
3507  * option: extension type (see DecodeRegExtend)
3508  * imm3: optional shift to Rm
3509  *
3510  * Rd = Rn + LSL(extend(Rm), amount)
3511  */
3512 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3513 {
3514     int rd = extract32(insn, 0, 5);
3515     int rn = extract32(insn, 5, 5);
3516     int imm3 = extract32(insn, 10, 3);
3517     int option = extract32(insn, 13, 3);
3518     int rm = extract32(insn, 16, 5);
3519     bool setflags = extract32(insn, 29, 1);
3520     bool sub_op = extract32(insn, 30, 1);
3521     bool sf = extract32(insn, 31, 1);
3522
3523     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3524     TCGv_i64 tcg_rd;
3525     TCGv_i64 tcg_result;
3526
3527     if (imm3 > 4) {
3528         unallocated_encoding(s);
3529         return;
3530     }
3531
3532     /* non-flag setting ops may use SP */
3533     if (!setflags) {
3534         tcg_rd = cpu_reg_sp(s, rd);
3535     } else {
3536         tcg_rd = cpu_reg(s, rd);
3537     }
3538     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3539
3540     tcg_rm = read_cpu_reg(s, rm, sf);
3541     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3542
3543     tcg_result = tcg_temp_new_i64();
3544
3545     if (!setflags) {
3546         if (sub_op) {
3547             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3548         } else {
3549             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3550         }
3551     } else {
3552         if (sub_op) {
3553             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3554         } else {
3555             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3556         }
3557     }
3558
3559     if (sf) {
3560         tcg_gen_mov_i64(tcg_rd, tcg_result);
3561     } else {
3562         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3563     }
3564
3565     tcg_temp_free_i64(tcg_result);
3566 }
3567
3568 /*
3569  * C3.5.2 Add/subtract (shifted register)
3570  *
3571  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3572  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3573  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3574  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3575  *
3576  *    sf: 0 -> 32bit, 1 -> 64bit
3577  *    op: 0 -> add  , 1 -> sub
3578  *     S: 1 -> set flags
3579  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3580  *  imm6: Shift amount to apply to Rm before the add/sub
3581  */
3582 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3583 {
3584     int rd = extract32(insn, 0, 5);
3585     int rn = extract32(insn, 5, 5);
3586     int imm6 = extract32(insn, 10, 6);
3587     int rm = extract32(insn, 16, 5);
3588     int shift_type = extract32(insn, 22, 2);
3589     bool setflags = extract32(insn, 29, 1);
3590     bool sub_op = extract32(insn, 30, 1);
3591     bool sf = extract32(insn, 31, 1);
3592
3593     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3594     TCGv_i64 tcg_rn, tcg_rm;
3595     TCGv_i64 tcg_result;
3596
3597     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3598         unallocated_encoding(s);
3599         return;
3600     }
3601
3602     tcg_rn = read_cpu_reg(s, rn, sf);
3603     tcg_rm = read_cpu_reg(s, rm, sf);
3604
3605     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3606
3607     tcg_result = tcg_temp_new_i64();
3608
3609     if (!setflags) {
3610         if (sub_op) {
3611             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3612         } else {
3613             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3614         }
3615     } else {
3616         if (sub_op) {
3617             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3618         } else {
3619             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3620         }
3621     }
3622
3623     if (sf) {
3624         tcg_gen_mov_i64(tcg_rd, tcg_result);
3625     } else {
3626         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3627     }
3628
3629     tcg_temp_free_i64(tcg_result);
3630 }
3631
3632 /* C3.5.9 Data-processing (3 source)
3633
3634    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3635   +--+------+-----------+------+------+----+------+------+------+
3636   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3637   +--+------+-----------+------+------+----+------+------+------+
3638
3639  */
3640 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3641 {
3642     int rd = extract32(insn, 0, 5);
3643     int rn = extract32(insn, 5, 5);
3644     int ra = extract32(insn, 10, 5);
3645     int rm = extract32(insn, 16, 5);
3646     int op_id = (extract32(insn, 29, 3) << 4) |
3647         (extract32(insn, 21, 3) << 1) |
3648         extract32(insn, 15, 1);
3649     bool sf = extract32(insn, 31, 1);
3650     bool is_sub = extract32(op_id, 0, 1);
3651     bool is_high = extract32(op_id, 2, 1);
3652     bool is_signed = false;
3653     TCGv_i64 tcg_op1;
3654     TCGv_i64 tcg_op2;
3655     TCGv_i64 tcg_tmp;
3656
3657     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3658     switch (op_id) {
3659     case 0x42: /* SMADDL */
3660     case 0x43: /* SMSUBL */
3661     case 0x44: /* SMULH */
3662         is_signed = true;
3663         break;
3664     case 0x0: /* MADD (32bit) */
3665     case 0x1: /* MSUB (32bit) */
3666     case 0x40: /* MADD (64bit) */
3667     case 0x41: /* MSUB (64bit) */
3668     case 0x4a: /* UMADDL */
3669     case 0x4b: /* UMSUBL */
3670     case 0x4c: /* UMULH */
3671         break;
3672     default:
3673         unallocated_encoding(s);
3674         return;
3675     }
3676
3677     if (is_high) {
3678         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3679         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3680         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3681         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3682
3683         if (is_signed) {
3684             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3685         } else {
3686             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3687         }
3688
3689         tcg_temp_free_i64(low_bits);
3690         return;
3691     }
3692
3693     tcg_op1 = tcg_temp_new_i64();
3694     tcg_op2 = tcg_temp_new_i64();
3695     tcg_tmp = tcg_temp_new_i64();
3696
3697     if (op_id < 0x42) {
3698         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3699         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3700     } else {
3701         if (is_signed) {
3702             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3703             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3704         } else {
3705             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3706             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3707         }
3708     }
3709
3710     if (ra == 31 && !is_sub) {
3711         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3712         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3713     } else {
3714         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3715         if (is_sub) {
3716             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3717         } else {
3718             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3719         }
3720     }
3721
3722     if (!sf) {
3723         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3724     }
3725
3726     tcg_temp_free_i64(tcg_op1);
3727     tcg_temp_free_i64(tcg_op2);
3728     tcg_temp_free_i64(tcg_tmp);
3729 }
3730
3731 /* C3.5.3 - Add/subtract (with carry)
3732  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3733  * +--+--+--+------------------------+------+---------+------+-----+
3734  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3735  * +--+--+--+------------------------+------+---------+------+-----+
3736  *                                            [000000]
3737  */
3738
3739 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3740 {
3741     unsigned int sf, op, setflags, rm, rn, rd;
3742     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3743
3744     if (extract32(insn, 10, 6) != 0) {
3745         unallocated_encoding(s);
3746         return;
3747     }
3748
3749     sf = extract32(insn, 31, 1);
3750     op = extract32(insn, 30, 1);
3751     setflags = extract32(insn, 29, 1);
3752     rm = extract32(insn, 16, 5);
3753     rn = extract32(insn, 5, 5);
3754     rd = extract32(insn, 0, 5);
3755
3756     tcg_rd = cpu_reg(s, rd);
3757     tcg_rn = cpu_reg(s, rn);
3758
3759     if (op) {
3760         tcg_y = new_tmp_a64(s);
3761         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3762     } else {
3763         tcg_y = cpu_reg(s, rm);
3764     }
3765
3766     if (setflags) {
3767         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3768     } else {
3769         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3770     }
3771 }
3772
3773 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3774  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3775  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3776  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3777  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3778  *        [1]                             y                [0]       [0]
3779  */
3780 static void disas_cc(DisasContext *s, uint32_t insn)
3781 {
3782     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3783     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3784     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3785     DisasCompare c;
3786
3787     if (!extract32(insn, 29, 1)) {
3788         unallocated_encoding(s);
3789         return;
3790     }
3791     if (insn & (1 << 10 | 1 << 4)) {
3792         unallocated_encoding(s);
3793         return;
3794     }
3795     sf = extract32(insn, 31, 1);
3796     op = extract32(insn, 30, 1);
3797     is_imm = extract32(insn, 11, 1);
3798     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3799     cond = extract32(insn, 12, 4);
3800     rn = extract32(insn, 5, 5);
3801     nzcv = extract32(insn, 0, 4);
3802
3803     /* Set T0 = !COND.  */
3804     tcg_t0 = tcg_temp_new_i32();
3805     arm_test_cc(&c, cond);
3806     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3807     arm_free_cc(&c);
3808
3809     /* Load the arguments for the new comparison.  */
3810     if (is_imm) {
3811         tcg_y = new_tmp_a64(s);
3812         tcg_gen_movi_i64(tcg_y, y);
3813     } else {
3814         tcg_y = cpu_reg(s, y);
3815     }
3816     tcg_rn = cpu_reg(s, rn);
3817
3818     /* Set the flags for the new comparison.  */
3819     tcg_tmp = tcg_temp_new_i64();
3820     if (op) {
3821         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3822     } else {
3823         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3824     }
3825     tcg_temp_free_i64(tcg_tmp);
3826
3827     /* If COND was false, force the flags to #nzcv.  Compute two masks
3828      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3829      * For tcg hosts that support ANDC, we can make do with just T1.
3830      * In either case, allow the tcg optimizer to delete any unused mask.
3831      */
3832     tcg_t1 = tcg_temp_new_i32();
3833     tcg_t2 = tcg_temp_new_i32();
3834     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3835     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3836
3837     if (nzcv & 8) { /* N */
3838         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3839     } else {
3840         if (TCG_TARGET_HAS_andc_i32) {
3841             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3842         } else {
3843             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3844         }
3845     }
3846     if (nzcv & 4) { /* Z */
3847         if (TCG_TARGET_HAS_andc_i32) {
3848             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3849         } else {
3850             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3851         }
3852     } else {
3853         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3854     }
3855     if (nzcv & 2) { /* C */
3856         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3857     } else {
3858         if (TCG_TARGET_HAS_andc_i32) {
3859             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3860         } else {
3861             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3862         }
3863     }
3864     if (nzcv & 1) { /* V */
3865         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3866     } else {
3867         if (TCG_TARGET_HAS_andc_i32) {
3868             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3869         } else {
3870             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3871         }
3872     }
3873     tcg_temp_free_i32(tcg_t0);
3874     tcg_temp_free_i32(tcg_t1);
3875     tcg_temp_free_i32(tcg_t2);
3876 }
3877
3878 /* C3.5.6 Conditional select
3879  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3880  * +----+----+---+-----------------+------+------+-----+------+------+
3881  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3882  * +----+----+---+-----------------+------+------+-----+------+------+
3883  */
3884 static void disas_cond_select(DisasContext *s, uint32_t insn)
3885 {
3886     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3887     TCGv_i64 tcg_rd, zero;
3888     DisasCompare64 c;
3889
3890     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3891         /* S == 1 or op2<1> == 1 */
3892         unallocated_encoding(s);
3893         return;
3894     }
3895     sf = extract32(insn, 31, 1);
3896     else_inv = extract32(insn, 30, 1);
3897     rm = extract32(insn, 16, 5);
3898     cond = extract32(insn, 12, 4);
3899     else_inc = extract32(insn, 10, 1);
3900     rn = extract32(insn, 5, 5);
3901     rd = extract32(insn, 0, 5);
3902
3903     tcg_rd = cpu_reg(s, rd);
3904
3905     a64_test_cc(&c, cond);
3906     zero = tcg_const_i64(0);
3907
3908     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3909         /* CSET & CSETM.  */
3910         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3911         if (else_inv) {
3912             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3913         }
3914     } else {
3915         TCGv_i64 t_true = cpu_reg(s, rn);
3916         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3917         if (else_inv && else_inc) {
3918             tcg_gen_neg_i64(t_false, t_false);
3919         } else if (else_inv) {
3920             tcg_gen_not_i64(t_false, t_false);
3921         } else if (else_inc) {
3922             tcg_gen_addi_i64(t_false, t_false, 1);
3923         }
3924         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3925     }
3926
3927     tcg_temp_free_i64(zero);
3928     a64_free_cc(&c);
3929
3930     if (!sf) {
3931         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3932     }
3933 }
3934
3935 static void handle_clz(DisasContext *s, unsigned int sf,
3936                        unsigned int rn, unsigned int rd)
3937 {
3938     TCGv_i64 tcg_rd, tcg_rn;
3939     tcg_rd = cpu_reg(s, rd);
3940     tcg_rn = cpu_reg(s, rn);
3941
3942     if (sf) {
3943         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
3944     } else {
3945         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3946         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3947         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
3948         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3949         tcg_temp_free_i32(tcg_tmp32);
3950     }
3951 }
3952
3953 static void handle_cls(DisasContext *s, unsigned int sf,
3954                        unsigned int rn, unsigned int rd)
3955 {
3956     TCGv_i64 tcg_rd, tcg_rn;
3957     tcg_rd = cpu_reg(s, rd);
3958     tcg_rn = cpu_reg(s, rn);
3959
3960     if (sf) {
3961         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
3962     } else {
3963         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3964         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3965         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
3966         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3967         tcg_temp_free_i32(tcg_tmp32);
3968     }
3969 }
3970
3971 static void handle_rbit(DisasContext *s, unsigned int sf,
3972                         unsigned int rn, unsigned int rd)
3973 {
3974     TCGv_i64 tcg_rd, tcg_rn;
3975     tcg_rd = cpu_reg(s, rd);
3976     tcg_rn = cpu_reg(s, rn);
3977
3978     if (sf) {
3979         gen_helper_rbit64(tcg_rd, tcg_rn);
3980     } else {
3981         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3982         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3983         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3984         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3985         tcg_temp_free_i32(tcg_tmp32);
3986     }
3987 }
3988
3989 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3990 static void handle_rev64(DisasContext *s, unsigned int sf,
3991                          unsigned int rn, unsigned int rd)
3992 {
3993     if (!sf) {
3994         unallocated_encoding(s);
3995         return;
3996     }
3997     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3998 }
3999
4000 /* C5.6.149 REV with sf==0, opcode==2
4001  * C5.6.151 REV32 (sf==1, opcode==2)
4002  */
4003 static void handle_rev32(DisasContext *s, unsigned int sf,
4004                          unsigned int rn, unsigned int rd)
4005 {
4006     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4007
4008     if (sf) {
4009         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4010         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4011
4012         /* bswap32_i64 requires zero high word */
4013         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4014         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4015         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4016         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4017         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4018
4019         tcg_temp_free_i64(tcg_tmp);
4020     } else {
4021         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4022         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4023     }
4024 }
4025
4026 /* C5.6.150 REV16 (opcode==1) */
4027 static void handle_rev16(DisasContext *s, unsigned int sf,
4028                          unsigned int rn, unsigned int rd)
4029 {
4030     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4031     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4032     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4033
4034     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
4035     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
4036
4037     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
4038     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
4039     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4040     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
4041
4042     if (sf) {
4043         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4044         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
4045         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4046         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
4047
4048         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
4049         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4050         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
4051     }
4052
4053     tcg_temp_free_i64(tcg_tmp);
4054 }
4055
4056 /* C3.5.7 Data-processing (1 source)
4057  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4058  * +----+---+---+-----------------+---------+--------+------+------+
4059  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4060  * +----+---+---+-----------------+---------+--------+------+------+
4061  */
4062 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4063 {
4064     unsigned int sf, opcode, rn, rd;
4065
4066     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4067         unallocated_encoding(s);
4068         return;
4069     }
4070
4071     sf = extract32(insn, 31, 1);
4072     opcode = extract32(insn, 10, 6);
4073     rn = extract32(insn, 5, 5);
4074     rd = extract32(insn, 0, 5);
4075
4076     switch (opcode) {
4077     case 0: /* RBIT */
4078         handle_rbit(s, sf, rn, rd);
4079         break;
4080     case 1: /* REV16 */
4081         handle_rev16(s, sf, rn, rd);
4082         break;
4083     case 2: /* REV32 */
4084         handle_rev32(s, sf, rn, rd);
4085         break;
4086     case 3: /* REV64 */
4087         handle_rev64(s, sf, rn, rd);
4088         break;
4089     case 4: /* CLZ */
4090         handle_clz(s, sf, rn, rd);
4091         break;
4092     case 5: /* CLS */
4093         handle_cls(s, sf, rn, rd);
4094         break;
4095     }
4096 }
4097
4098 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4099                        unsigned int rm, unsigned int rn, unsigned int rd)
4100 {
4101     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4102     tcg_rd = cpu_reg(s, rd);
4103
4104     if (!sf && is_signed) {
4105         tcg_n = new_tmp_a64(s);
4106         tcg_m = new_tmp_a64(s);
4107         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4108         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4109     } else {
4110         tcg_n = read_cpu_reg(s, rn, sf);
4111         tcg_m = read_cpu_reg(s, rm, sf);
4112     }
4113
4114     if (is_signed) {
4115         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4116     } else {
4117         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4118     }
4119
4120     if (!sf) { /* zero extend final result */
4121         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4122     }
4123 }
4124
4125 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
4126 static void handle_shift_reg(DisasContext *s,
4127                              enum a64_shift_type shift_type, unsigned int sf,
4128                              unsigned int rm, unsigned int rn, unsigned int rd)
4129 {
4130     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4131     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4132     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4133
4134     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4135     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4136     tcg_temp_free_i64(tcg_shift);
4137 }
4138
4139 /* CRC32[BHWX], CRC32C[BHWX] */
4140 static void handle_crc32(DisasContext *s,
4141                          unsigned int sf, unsigned int sz, bool crc32c,
4142                          unsigned int rm, unsigned int rn, unsigned int rd)
4143 {
4144     TCGv_i64 tcg_acc, tcg_val;
4145     TCGv_i32 tcg_bytes;
4146
4147     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4148         || (sf == 1 && sz != 3)
4149         || (sf == 0 && sz == 3)) {
4150         unallocated_encoding(s);
4151         return;
4152     }
4153
4154     if (sz == 3) {
4155         tcg_val = cpu_reg(s, rm);
4156     } else {
4157         uint64_t mask;
4158         switch (sz) {
4159         case 0:
4160             mask = 0xFF;
4161             break;
4162         case 1:
4163             mask = 0xFFFF;
4164             break;
4165         case 2:
4166             mask = 0xFFFFFFFF;
4167             break;
4168         default:
4169             g_assert_not_reached();
4170         }
4171         tcg_val = new_tmp_a64(s);
4172         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4173     }
4174
4175     tcg_acc = cpu_reg(s, rn);
4176     tcg_bytes = tcg_const_i32(1 << sz);
4177
4178     if (crc32c) {
4179         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4180     } else {
4181         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4182     }
4183
4184     tcg_temp_free_i32(tcg_bytes);
4185 }
4186
4187 /* C3.5.8 Data-processing (2 source)
4188  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4189  * +----+---+---+-----------------+------+--------+------+------+
4190  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4191  * +----+---+---+-----------------+------+--------+------+------+
4192  */
4193 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4194 {
4195     unsigned int sf, rm, opcode, rn, rd;
4196     sf = extract32(insn, 31, 1);
4197     rm = extract32(insn, 16, 5);
4198     opcode = extract32(insn, 10, 6);
4199     rn = extract32(insn, 5, 5);
4200     rd = extract32(insn, 0, 5);
4201
4202     if (extract32(insn, 29, 1)) {
4203         unallocated_encoding(s);
4204         return;
4205     }
4206
4207     switch (opcode) {
4208     case 2: /* UDIV */
4209         handle_div(s, false, sf, rm, rn, rd);
4210         break;
4211     case 3: /* SDIV */
4212         handle_div(s, true, sf, rm, rn, rd);
4213         break;
4214     case 8: /* LSLV */
4215         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4216         break;
4217     case 9: /* LSRV */
4218         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4219         break;
4220     case 10: /* ASRV */
4221         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4222         break;
4223     case 11: /* RORV */
4224         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4225         break;
4226     case 16:
4227     case 17:
4228     case 18:
4229     case 19:
4230     case 20:
4231     case 21:
4232     case 22:
4233     case 23: /* CRC32 */
4234     {
4235         int sz = extract32(opcode, 0, 2);
4236         bool crc32c = extract32(opcode, 2, 1);
4237         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4238         break;
4239     }
4240     default:
4241         unallocated_encoding(s);
4242         break;
4243     }
4244 }
4245
4246 /* C3.5 Data processing - register */
4247 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4248 {
4249     switch (extract32(insn, 24, 5)) {
4250     case 0x0a: /* Logical (shifted register) */
4251         disas_logic_reg(s, insn);
4252         break;
4253     case 0x0b: /* Add/subtract */
4254         if (insn & (1 << 21)) { /* (extended register) */
4255             disas_add_sub_ext_reg(s, insn);
4256         } else {
4257             disas_add_sub_reg(s, insn);
4258         }
4259         break;
4260     case 0x1b: /* Data-processing (3 source) */
4261         disas_data_proc_3src(s, insn);
4262         break;
4263     case 0x1a:
4264         switch (extract32(insn, 21, 3)) {
4265         case 0x0: /* Add/subtract (with carry) */
4266             disas_adc_sbc(s, insn);
4267             break;
4268         case 0x2: /* Conditional compare */
4269             disas_cc(s, insn); /* both imm and reg forms */
4270             break;
4271         case 0x4: /* Conditional select */
4272             disas_cond_select(s, insn);
4273             break;
4274         case 0x6: /* Data-processing */
4275             if (insn & (1 << 30)) { /* (1 source) */
4276                 disas_data_proc_1src(s, insn);
4277             } else {            /* (2 source) */
4278                 disas_data_proc_2src(s, insn);
4279             }
4280             break;
4281         default:
4282             unallocated_encoding(s);
4283             break;
4284         }
4285         break;
4286     default:
4287         unallocated_encoding(s);
4288         break;
4289     }
4290 }
4291
4292 static void handle_fp_compare(DisasContext *s, bool is_double,
4293                               unsigned int rn, unsigned int rm,
4294                               bool cmp_with_zero, bool signal_all_nans)
4295 {
4296     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4297     TCGv_ptr fpst = get_fpstatus_ptr();
4298
4299     if (is_double) {
4300         TCGv_i64 tcg_vn, tcg_vm;
4301
4302         tcg_vn = read_fp_dreg(s, rn);
4303         if (cmp_with_zero) {
4304             tcg_vm = tcg_const_i64(0);
4305         } else {
4306             tcg_vm = read_fp_dreg(s, rm);
4307         }
4308         if (signal_all_nans) {
4309             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4310         } else {
4311             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4312         }
4313         tcg_temp_free_i64(tcg_vn);
4314         tcg_temp_free_i64(tcg_vm);
4315     } else {
4316         TCGv_i32 tcg_vn, tcg_vm;
4317
4318         tcg_vn = read_fp_sreg(s, rn);
4319         if (cmp_with_zero) {
4320             tcg_vm = tcg_const_i32(0);
4321         } else {
4322             tcg_vm = read_fp_sreg(s, rm);
4323         }
4324         if (signal_all_nans) {
4325             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4326         } else {
4327             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4328         }
4329         tcg_temp_free_i32(tcg_vn);
4330         tcg_temp_free_i32(tcg_vm);
4331     }
4332
4333     tcg_temp_free_ptr(fpst);
4334
4335     gen_set_nzcv(tcg_flags);
4336
4337     tcg_temp_free_i64(tcg_flags);
4338 }
4339
4340 /* C3.6.22 Floating point compare
4341  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4342  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4343  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4344  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4345  */
4346 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4347 {
4348     unsigned int mos, type, rm, op, rn, opc, op2r;
4349
4350     mos = extract32(insn, 29, 3);
4351     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4352     rm = extract32(insn, 16, 5);
4353     op = extract32(insn, 14, 2);
4354     rn = extract32(insn, 5, 5);
4355     opc = extract32(insn, 3, 2);
4356     op2r = extract32(insn, 0, 3);
4357
4358     if (mos || op || op2r || type > 1) {
4359         unallocated_encoding(s);
4360         return;
4361     }
4362
4363     if (!fp_access_check(s)) {
4364         return;
4365     }
4366
4367     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4368 }
4369
4370 /* C3.6.23 Floating point conditional compare
4371  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4372  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4373  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4374  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4375  */
4376 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4377 {
4378     unsigned int mos, type, rm, cond, rn, op, nzcv;
4379     TCGv_i64 tcg_flags;
4380     TCGLabel *label_continue = NULL;
4381
4382     mos = extract32(insn, 29, 3);
4383     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4384     rm = extract32(insn, 16, 5);
4385     cond = extract32(insn, 12, 4);
4386     rn = extract32(insn, 5, 5);
4387     op = extract32(insn, 4, 1);
4388     nzcv = extract32(insn, 0, 4);
4389
4390     if (mos || type > 1) {
4391         unallocated_encoding(s);
4392         return;
4393     }
4394
4395     if (!fp_access_check(s)) {
4396         return;
4397     }
4398
4399     if (cond < 0x0e) { /* not always */
4400         TCGLabel *label_match = gen_new_label();
4401         label_continue = gen_new_label();
4402         arm_gen_test_cc(cond, label_match);
4403         /* nomatch: */
4404         tcg_flags = tcg_const_i64(nzcv << 28);
4405         gen_set_nzcv(tcg_flags);
4406         tcg_temp_free_i64(tcg_flags);
4407         tcg_gen_br(label_continue);
4408         gen_set_label(label_match);
4409     }
4410
4411     handle_fp_compare(s, type, rn, rm, false, op);
4412
4413     if (cond < 0x0e) {
4414         gen_set_label(label_continue);
4415     }
4416 }
4417
4418 /* C3.6.24 Floating point conditional select
4419  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4420  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4421  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4422  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4423  */
4424 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4425 {
4426     unsigned int mos, type, rm, cond, rn, rd;
4427     TCGv_i64 t_true, t_false, t_zero;
4428     DisasCompare64 c;
4429
4430     mos = extract32(insn, 29, 3);
4431     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4432     rm = extract32(insn, 16, 5);
4433     cond = extract32(insn, 12, 4);
4434     rn = extract32(insn, 5, 5);
4435     rd = extract32(insn, 0, 5);
4436
4437     if (mos || type > 1) {
4438         unallocated_encoding(s);
4439         return;
4440     }
4441
4442     if (!fp_access_check(s)) {
4443         return;
4444     }
4445
4446     /* Zero extend sreg inputs to 64 bits now.  */
4447     t_true = tcg_temp_new_i64();
4448     t_false = tcg_temp_new_i64();
4449     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4450     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4451
4452     a64_test_cc(&c, cond);
4453     t_zero = tcg_const_i64(0);
4454     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4455     tcg_temp_free_i64(t_zero);
4456     tcg_temp_free_i64(t_false);
4457     a64_free_cc(&c);
4458
4459     /* Note that sregs write back zeros to the high bits,
4460        and we've already done the zero-extension.  */
4461     write_fp_dreg(s, rd, t_true);
4462     tcg_temp_free_i64(t_true);
4463 }
4464
4465 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4466 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4467 {
4468     TCGv_ptr fpst;
4469     TCGv_i32 tcg_op;
4470     TCGv_i32 tcg_res;
4471
4472     fpst = get_fpstatus_ptr();
4473     tcg_op = read_fp_sreg(s, rn);
4474     tcg_res = tcg_temp_new_i32();
4475
4476     switch (opcode) {
4477     case 0x0: /* FMOV */
4478         tcg_gen_mov_i32(tcg_res, tcg_op);
4479         break;
4480     case 0x1: /* FABS */
4481         gen_helper_vfp_abss(tcg_res, tcg_op);
4482         break;
4483     case 0x2: /* FNEG */
4484         gen_helper_vfp_negs(tcg_res, tcg_op);
4485         break;
4486     case 0x3: /* FSQRT */
4487         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4488         break;
4489     case 0x8: /* FRINTN */
4490     case 0x9: /* FRINTP */
4491     case 0xa: /* FRINTM */
4492     case 0xb: /* FRINTZ */
4493     case 0xc: /* FRINTA */
4494     {
4495         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4496
4497         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4498         gen_helper_rints(tcg_res, tcg_op, fpst);
4499
4500         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4501         tcg_temp_free_i32(tcg_rmode);
4502         break;
4503     }
4504     case 0xe: /* FRINTX */
4505         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4506         break;
4507     case 0xf: /* FRINTI */
4508         gen_helper_rints(tcg_res, tcg_op, fpst);
4509         break;
4510     default:
4511         abort();
4512     }
4513
4514     write_fp_sreg(s, rd, tcg_res);
4515
4516     tcg_temp_free_ptr(fpst);
4517     tcg_temp_free_i32(tcg_op);
4518     tcg_temp_free_i32(tcg_res);
4519 }
4520
4521 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4522 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4523 {
4524     TCGv_ptr fpst;
4525     TCGv_i64 tcg_op;
4526     TCGv_i64 tcg_res;
4527
4528     fpst = get_fpstatus_ptr();
4529     tcg_op = read_fp_dreg(s, rn);
4530     tcg_res = tcg_temp_new_i64();
4531
4532     switch (opcode) {
4533     case 0x0: /* FMOV */
4534         tcg_gen_mov_i64(tcg_res, tcg_op);
4535         break;
4536     case 0x1: /* FABS */
4537         gen_helper_vfp_absd(tcg_res, tcg_op);
4538         break;
4539     case 0x2: /* FNEG */
4540         gen_helper_vfp_negd(tcg_res, tcg_op);
4541         break;
4542     case 0x3: /* FSQRT */
4543         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4544         break;
4545     case 0x8: /* FRINTN */
4546     case 0x9: /* FRINTP */
4547     case 0xa: /* FRINTM */
4548     case 0xb: /* FRINTZ */
4549     case 0xc: /* FRINTA */
4550     {
4551         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4552
4553         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4554         gen_helper_rintd(tcg_res, tcg_op, fpst);
4555
4556         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4557         tcg_temp_free_i32(tcg_rmode);
4558         break;
4559     }
4560     case 0xe: /* FRINTX */
4561         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4562         break;
4563     case 0xf: /* FRINTI */
4564         gen_helper_rintd(tcg_res, tcg_op, fpst);
4565         break;
4566     default:
4567         abort();
4568     }
4569
4570     write_fp_dreg(s, rd, tcg_res);
4571
4572     tcg_temp_free_ptr(fpst);
4573     tcg_temp_free_i64(tcg_op);
4574     tcg_temp_free_i64(tcg_res);
4575 }
4576
4577 static void handle_fp_fcvt(DisasContext *s, int opcode,
4578                            int rd, int rn, int dtype, int ntype)
4579 {
4580     switch (ntype) {
4581     case 0x0:
4582     {
4583         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4584         if (dtype == 1) {
4585             /* Single to double */
4586             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4587             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4588             write_fp_dreg(s, rd, tcg_rd);
4589             tcg_temp_free_i64(tcg_rd);
4590         } else {
4591             /* Single to half */
4592             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4593             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4594             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4595             write_fp_sreg(s, rd, tcg_rd);
4596             tcg_temp_free_i32(tcg_rd);
4597         }
4598         tcg_temp_free_i32(tcg_rn);
4599         break;
4600     }
4601     case 0x1:
4602     {
4603         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4604         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4605         if (dtype == 0) {
4606             /* Double to single */
4607             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4608         } else {
4609             /* Double to half */
4610             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4611             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4612         }
4613         write_fp_sreg(s, rd, tcg_rd);
4614         tcg_temp_free_i32(tcg_rd);
4615         tcg_temp_free_i64(tcg_rn);
4616         break;
4617     }
4618     case 0x3:
4619     {
4620         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4621         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4622         if (dtype == 0) {
4623             /* Half to single */
4624             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4625             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4626             write_fp_sreg(s, rd, tcg_rd);
4627             tcg_temp_free_i32(tcg_rd);
4628         } else {
4629             /* Half to double */
4630             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4631             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4632             write_fp_dreg(s, rd, tcg_rd);
4633             tcg_temp_free_i64(tcg_rd);
4634         }
4635         tcg_temp_free_i32(tcg_rn);
4636         break;
4637     }
4638     default:
4639         abort();
4640     }
4641 }
4642
4643 /* C3.6.25 Floating point data-processing (1 source)
4644  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4645  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4646  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4647  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4648  */
4649 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4650 {
4651     int type = extract32(insn, 22, 2);
4652     int opcode = extract32(insn, 15, 6);
4653     int rn = extract32(insn, 5, 5);
4654     int rd = extract32(insn, 0, 5);
4655
4656     switch (opcode) {
4657     case 0x4: case 0x5: case 0x7:
4658     {
4659         /* FCVT between half, single and double precision */
4660         int dtype = extract32(opcode, 0, 2);
4661         if (type == 2 || dtype == type) {
4662             unallocated_encoding(s);
4663             return;
4664         }
4665         if (!fp_access_check(s)) {
4666             return;
4667         }
4668
4669         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4670         break;
4671     }
4672     case 0x0 ... 0x3:
4673     case 0x8 ... 0xc:
4674     case 0xe ... 0xf:
4675         /* 32-to-32 and 64-to-64 ops */
4676         switch (type) {
4677         case 0:
4678             if (!fp_access_check(s)) {
4679                 return;
4680             }
4681
4682             handle_fp_1src_single(s, opcode, rd, rn);
4683             break;
4684         case 1:
4685             if (!fp_access_check(s)) {
4686                 return;
4687             }
4688
4689             handle_fp_1src_double(s, opcode, rd, rn);
4690             break;
4691         default:
4692             unallocated_encoding(s);
4693         }
4694         break;
4695     default:
4696         unallocated_encoding(s);
4697         break;
4698     }
4699 }
4700
4701 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4702 static void handle_fp_2src_single(DisasContext *s, int opcode,
4703                                   int rd, int rn, int rm)
4704 {
4705     TCGv_i32 tcg_op1;
4706     TCGv_i32 tcg_op2;
4707     TCGv_i32 tcg_res;
4708     TCGv_ptr fpst;
4709
4710     tcg_res = tcg_temp_new_i32();
4711     fpst = get_fpstatus_ptr();
4712     tcg_op1 = read_fp_sreg(s, rn);
4713     tcg_op2 = read_fp_sreg(s, rm);
4714
4715     switch (opcode) {
4716     case 0x0: /* FMUL */
4717         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4718         break;
4719     case 0x1: /* FDIV */
4720         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4721         break;
4722     case 0x2: /* FADD */
4723         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4724         break;
4725     case 0x3: /* FSUB */
4726         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4727         break;
4728     case 0x4: /* FMAX */
4729         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4730         break;
4731     case 0x5: /* FMIN */
4732         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4733         break;
4734     case 0x6: /* FMAXNM */
4735         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4736         break;
4737     case 0x7: /* FMINNM */
4738         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4739         break;
4740     case 0x8: /* FNMUL */
4741         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4742         gen_helper_vfp_negs(tcg_res, tcg_res);
4743         break;
4744     }
4745
4746     write_fp_sreg(s, rd, tcg_res);
4747
4748     tcg_temp_free_ptr(fpst);
4749     tcg_temp_free_i32(tcg_op1);
4750     tcg_temp_free_i32(tcg_op2);
4751     tcg_temp_free_i32(tcg_res);
4752 }
4753
4754 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4755 static void handle_fp_2src_double(DisasContext *s, int opcode,
4756                                   int rd, int rn, int rm)
4757 {
4758     TCGv_i64 tcg_op1;
4759     TCGv_i64 tcg_op2;
4760     TCGv_i64 tcg_res;
4761     TCGv_ptr fpst;
4762
4763     tcg_res = tcg_temp_new_i64();
4764     fpst = get_fpstatus_ptr();
4765     tcg_op1 = read_fp_dreg(s, rn);
4766     tcg_op2 = read_fp_dreg(s, rm);
4767
4768     switch (opcode) {
4769     case 0x0: /* FMUL */
4770         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4771         break;
4772     case 0x1: /* FDIV */
4773         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4774         break;
4775     case 0x2: /* FADD */
4776         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4777         break;
4778     case 0x3: /* FSUB */
4779         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4780         break;
4781     case 0x4: /* FMAX */
4782         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4783         break;
4784     case 0x5: /* FMIN */
4785         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4786         break;
4787     case 0x6: /* FMAXNM */
4788         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4789         break;
4790     case 0x7: /* FMINNM */
4791         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4792         break;
4793     case 0x8: /* FNMUL */
4794         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4795         gen_helper_vfp_negd(tcg_res, tcg_res);
4796         break;
4797     }
4798
4799     write_fp_dreg(s, rd, tcg_res);
4800
4801     tcg_temp_free_ptr(fpst);
4802     tcg_temp_free_i64(tcg_op1);
4803     tcg_temp_free_i64(tcg_op2);
4804     tcg_temp_free_i64(tcg_res);
4805 }
4806
4807 /* C3.6.26 Floating point data-processing (2 source)
4808  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4809  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4810  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4811  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4812  */
4813 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4814 {
4815     int type = extract32(insn, 22, 2);
4816     int rd = extract32(insn, 0, 5);
4817     int rn = extract32(insn, 5, 5);
4818     int rm = extract32(insn, 16, 5);
4819     int opcode = extract32(insn, 12, 4);
4820
4821     if (opcode > 8) {
4822         unallocated_encoding(s);
4823         return;
4824     }
4825
4826     switch (type) {
4827     case 0:
4828         if (!fp_access_check(s)) {
4829             return;
4830         }
4831         handle_fp_2src_single(s, opcode, rd, rn, rm);
4832         break;
4833     case 1:
4834         if (!fp_access_check(s)) {
4835             return;
4836         }
4837         handle_fp_2src_double(s, opcode, rd, rn, rm);
4838         break;
4839     default:
4840         unallocated_encoding(s);
4841     }
4842 }
4843
4844 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4845 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4846                                   int rd, int rn, int rm, int ra)
4847 {
4848     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4849     TCGv_i32 tcg_res = tcg_temp_new_i32();
4850     TCGv_ptr fpst = get_fpstatus_ptr();
4851
4852     tcg_op1 = read_fp_sreg(s, rn);
4853     tcg_op2 = read_fp_sreg(s, rm);
4854     tcg_op3 = read_fp_sreg(s, ra);
4855
4856     /* These are fused multiply-add, and must be done as one
4857      * floating point operation with no rounding between the
4858      * multiplication and addition steps.
4859      * NB that doing the negations here as separate steps is
4860      * correct : an input NaN should come out with its sign bit
4861      * flipped if it is a negated-input.
4862      */
4863     if (o1 == true) {
4864         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4865     }
4866
4867     if (o0 != o1) {
4868         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4869     }
4870
4871     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4872
4873     write_fp_sreg(s, rd, tcg_res);
4874
4875     tcg_temp_free_ptr(fpst);
4876     tcg_temp_free_i32(tcg_op1);
4877     tcg_temp_free_i32(tcg_op2);
4878     tcg_temp_free_i32(tcg_op3);
4879     tcg_temp_free_i32(tcg_res);
4880 }
4881
4882 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4883 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4884                                   int rd, int rn, int rm, int ra)
4885 {
4886     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4887     TCGv_i64 tcg_res = tcg_temp_new_i64();
4888     TCGv_ptr fpst = get_fpstatus_ptr();
4889
4890     tcg_op1 = read_fp_dreg(s, rn);
4891     tcg_op2 = read_fp_dreg(s, rm);
4892     tcg_op3 = read_fp_dreg(s, ra);
4893
4894     /* These are fused multiply-add, and must be done as one
4895      * floating point operation with no rounding between the
4896      * multiplication and addition steps.
4897      * NB that doing the negations here as separate steps is
4898      * correct : an input NaN should come out with its sign bit
4899      * flipped if it is a negated-input.
4900      */
4901     if (o1 == true) {
4902         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4903     }
4904
4905     if (o0 != o1) {
4906         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4907     }
4908
4909     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4910
4911     write_fp_dreg(s, rd, tcg_res);
4912
4913     tcg_temp_free_ptr(fpst);
4914     tcg_temp_free_i64(tcg_op1);
4915     tcg_temp_free_i64(tcg_op2);
4916     tcg_temp_free_i64(tcg_op3);
4917     tcg_temp_free_i64(tcg_res);
4918 }
4919
4920 /* C3.6.27 Floating point data-processing (3 source)
4921  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4922  * +---+---+---+-----------+------+----+------+----+------+------+------+
4923  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4924  * +---+---+---+-----------+------+----+------+----+------+------+------+
4925  */
4926 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4927 {
4928     int type = extract32(insn, 22, 2);
4929     int rd = extract32(insn, 0, 5);
4930     int rn = extract32(insn, 5, 5);
4931     int ra = extract32(insn, 10, 5);
4932     int rm = extract32(insn, 16, 5);
4933     bool o0 = extract32(insn, 15, 1);
4934     bool o1 = extract32(insn, 21, 1);
4935
4936     switch (type) {
4937     case 0:
4938         if (!fp_access_check(s)) {
4939             return;
4940         }
4941         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4942         break;
4943     case 1:
4944         if (!fp_access_check(s)) {
4945             return;
4946         }
4947         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4948         break;
4949     default:
4950         unallocated_encoding(s);
4951     }
4952 }
4953
4954 /* C3.6.28 Floating point immediate
4955  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4956  * +---+---+---+-----------+------+---+------------+-------+------+------+
4957  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4958  * +---+---+---+-----------+------+---+------------+-------+------+------+
4959  */
4960 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4961 {
4962     int rd = extract32(insn, 0, 5);
4963     int imm8 = extract32(insn, 13, 8);
4964     int is_double = extract32(insn, 22, 2);
4965     uint64_t imm;
4966     TCGv_i64 tcg_res;
4967
4968     if (is_double > 1) {
4969         unallocated_encoding(s);
4970         return;
4971     }
4972
4973     if (!fp_access_check(s)) {
4974         return;
4975     }
4976
4977     /* The imm8 encodes the sign bit, enough bits to represent
4978      * an exponent in the range 01....1xx to 10....0xx,
4979      * and the most significant 4 bits of the mantissa; see
4980      * VFPExpandImm() in the v8 ARM ARM.
4981      */
4982     if (is_double) {
4983         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4984             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4985             extract32(imm8, 0, 6);
4986         imm <<= 48;
4987     } else {
4988         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4989             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4990             (extract32(imm8, 0, 6) << 3);
4991         imm <<= 16;
4992     }
4993
4994     tcg_res = tcg_const_i64(imm);
4995     write_fp_dreg(s, rd, tcg_res);
4996     tcg_temp_free_i64(tcg_res);
4997 }
4998
4999 /* Handle floating point <=> fixed point conversions. Note that we can
5000  * also deal with fp <=> integer conversions as a special case (scale == 64)
5001  * OPTME: consider handling that special case specially or at least skipping
5002  * the call to scalbn in the helpers for zero shifts.
5003  */
5004 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5005                            bool itof, int rmode, int scale, int sf, int type)
5006 {
5007     bool is_signed = !(opcode & 1);
5008     bool is_double = type;
5009     TCGv_ptr tcg_fpstatus;
5010     TCGv_i32 tcg_shift;
5011
5012     tcg_fpstatus = get_fpstatus_ptr();
5013
5014     tcg_shift = tcg_const_i32(64 - scale);
5015
5016     if (itof) {
5017         TCGv_i64 tcg_int = cpu_reg(s, rn);
5018         if (!sf) {
5019             TCGv_i64 tcg_extend = new_tmp_a64(s);
5020
5021             if (is_signed) {
5022                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5023             } else {
5024                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5025             }
5026
5027             tcg_int = tcg_extend;
5028         }
5029
5030         if (is_double) {
5031             TCGv_i64 tcg_double = tcg_temp_new_i64();
5032             if (is_signed) {
5033                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
5034                                      tcg_shift, tcg_fpstatus);
5035             } else {
5036                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
5037                                      tcg_shift, tcg_fpstatus);
5038             }
5039             write_fp_dreg(s, rd, tcg_double);
5040             tcg_temp_free_i64(tcg_double);
5041         } else {
5042             TCGv_i32 tcg_single = tcg_temp_new_i32();
5043             if (is_signed) {
5044                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
5045                                      tcg_shift, tcg_fpstatus);
5046             } else {
5047                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5048                                      tcg_shift, tcg_fpstatus);
5049             }
5050             write_fp_sreg(s, rd, tcg_single);
5051             tcg_temp_free_i32(tcg_single);
5052         }
5053     } else {
5054         TCGv_i64 tcg_int = cpu_reg(s, rd);
5055         TCGv_i32 tcg_rmode;
5056
5057         if (extract32(opcode, 2, 1)) {
5058             /* There are too many rounding modes to all fit into rmode,
5059              * so FCVTA[US] is a special case.
5060              */
5061             rmode = FPROUNDING_TIEAWAY;
5062         }
5063
5064         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5065
5066         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5067
5068         if (is_double) {
5069             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5070             if (is_signed) {
5071                 if (!sf) {
5072                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5073                                          tcg_shift, tcg_fpstatus);
5074                 } else {
5075                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5076                                          tcg_shift, tcg_fpstatus);
5077                 }
5078             } else {
5079                 if (!sf) {
5080                     gen_helper_vfp_tould(tcg_int, tcg_double,
5081                                          tcg_shift, tcg_fpstatus);
5082                 } else {
5083                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5084                                          tcg_shift, tcg_fpstatus);
5085                 }
5086             }
5087             tcg_temp_free_i64(tcg_double);
5088         } else {
5089             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5090             if (sf) {
5091                 if (is_signed) {
5092                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5093                                          tcg_shift, tcg_fpstatus);
5094                 } else {
5095                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5096                                          tcg_shift, tcg_fpstatus);
5097                 }
5098             } else {
5099                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5100                 if (is_signed) {
5101                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5102                                          tcg_shift, tcg_fpstatus);
5103                 } else {
5104                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5105                                          tcg_shift, tcg_fpstatus);
5106                 }
5107                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5108                 tcg_temp_free_i32(tcg_dest);
5109             }
5110             tcg_temp_free_i32(tcg_single);
5111         }
5112
5113         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5114         tcg_temp_free_i32(tcg_rmode);
5115
5116         if (!sf) {
5117             tcg_gen_ext32u_i64(tcg_int, tcg_int);
5118         }
5119     }
5120
5121     tcg_temp_free_ptr(tcg_fpstatus);
5122     tcg_temp_free_i32(tcg_shift);
5123 }
5124
5125 /* C3.6.29 Floating point <-> fixed point conversions
5126  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5127  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5128  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5129  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5130  */
5131 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5132 {
5133     int rd = extract32(insn, 0, 5);
5134     int rn = extract32(insn, 5, 5);
5135     int scale = extract32(insn, 10, 6);
5136     int opcode = extract32(insn, 16, 3);
5137     int rmode = extract32(insn, 19, 2);
5138     int type = extract32(insn, 22, 2);
5139     bool sbit = extract32(insn, 29, 1);
5140     bool sf = extract32(insn, 31, 1);
5141     bool itof;
5142
5143     if (sbit || (type > 1)
5144         || (!sf && scale < 32)) {
5145         unallocated_encoding(s);
5146         return;
5147     }
5148
5149     switch ((rmode << 3) | opcode) {
5150     case 0x2: /* SCVTF */
5151     case 0x3: /* UCVTF */
5152         itof = true;
5153         break;
5154     case 0x18: /* FCVTZS */
5155     case 0x19: /* FCVTZU */
5156         itof = false;
5157         break;
5158     default:
5159         unallocated_encoding(s);
5160         return;
5161     }
5162
5163     if (!fp_access_check(s)) {
5164         return;
5165     }
5166
5167     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5168 }
5169
5170 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5171 {
5172     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5173      * without conversion.
5174      */
5175
5176     if (itof) {
5177         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5178
5179         switch (type) {
5180         case 0:
5181         {
5182             /* 32 bit */
5183             TCGv_i64 tmp = tcg_temp_new_i64();
5184             tcg_gen_ext32u_i64(tmp, tcg_rn);
5185             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5186             tcg_gen_movi_i64(tmp, 0);
5187             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5188             tcg_temp_free_i64(tmp);
5189             break;
5190         }
5191         case 1:
5192         {
5193             /* 64 bit */
5194             TCGv_i64 tmp = tcg_const_i64(0);
5195             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5196             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5197             tcg_temp_free_i64(tmp);
5198             break;
5199         }
5200         case 2:
5201             /* 64 bit to top half. */
5202             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5203             break;
5204         }
5205     } else {
5206         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5207
5208         switch (type) {
5209         case 0:
5210             /* 32 bit */
5211             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5212             break;
5213         case 1:
5214             /* 64 bit */
5215             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5216             break;
5217         case 2:
5218             /* 64 bits from top half */
5219             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5220             break;
5221         }
5222     }
5223 }
5224
5225 /* C3.6.30 Floating point <-> integer conversions
5226  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5227  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5228  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5229  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5230  */
5231 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5232 {
5233     int rd = extract32(insn, 0, 5);
5234     int rn = extract32(insn, 5, 5);
5235     int opcode = extract32(insn, 16, 3);
5236     int rmode = extract32(insn, 19, 2);
5237     int type = extract32(insn, 22, 2);
5238     bool sbit = extract32(insn, 29, 1);
5239     bool sf = extract32(insn, 31, 1);
5240
5241     if (sbit) {
5242         unallocated_encoding(s);
5243         return;
5244     }
5245
5246     if (opcode > 5) {
5247         /* FMOV */
5248         bool itof = opcode & 1;
5249
5250         if (rmode >= 2) {
5251             unallocated_encoding(s);
5252             return;
5253         }
5254
5255         switch (sf << 3 | type << 1 | rmode) {
5256         case 0x0: /* 32 bit */
5257         case 0xa: /* 64 bit */
5258         case 0xd: /* 64 bit to top half of quad */
5259             break;
5260         default:
5261             /* all other sf/type/rmode combinations are invalid */
5262             unallocated_encoding(s);
5263             break;
5264         }
5265
5266         if (!fp_access_check(s)) {
5267             return;
5268         }
5269         handle_fmov(s, rd, rn, type, itof);
5270     } else {
5271         /* actual FP conversions */
5272         bool itof = extract32(opcode, 1, 1);
5273
5274         if (type > 1 || (rmode != 0 && opcode > 1)) {
5275             unallocated_encoding(s);
5276             return;
5277         }
5278
5279         if (!fp_access_check(s)) {
5280             return;
5281         }
5282         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5283     }
5284 }
5285
5286 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5287  *   31  30  29 28     25 24                          0
5288  * +---+---+---+---------+-----------------------------+
5289  * |   | 0 |   | 1 1 1 1 |                             |
5290  * +---+---+---+---------+-----------------------------+
5291  */
5292 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5293 {
5294     if (extract32(insn, 24, 1)) {
5295         /* Floating point data-processing (3 source) */
5296         disas_fp_3src(s, insn);
5297     } else if (extract32(insn, 21, 1) == 0) {
5298         /* Floating point to fixed point conversions */
5299         disas_fp_fixed_conv(s, insn);
5300     } else {
5301         switch (extract32(insn, 10, 2)) {
5302         case 1:
5303             /* Floating point conditional compare */
5304             disas_fp_ccomp(s, insn);
5305             break;
5306         case 2:
5307             /* Floating point data-processing (2 source) */
5308             disas_fp_2src(s, insn);
5309             break;
5310         case 3:
5311             /* Floating point conditional select */
5312             disas_fp_csel(s, insn);
5313             break;
5314         case 0:
5315             switch (ctz32(extract32(insn, 12, 4))) {
5316             case 0: /* [15:12] == xxx1 */
5317                 /* Floating point immediate */
5318                 disas_fp_imm(s, insn);
5319                 break;
5320             case 1: /* [15:12] == xx10 */
5321                 /* Floating point compare */
5322                 disas_fp_compare(s, insn);
5323                 break;
5324             case 2: /* [15:12] == x100 */
5325                 /* Floating point data-processing (1 source) */
5326                 disas_fp_1src(s, insn);
5327                 break;
5328             case 3: /* [15:12] == 1000 */
5329                 unallocated_encoding(s);
5330                 break;
5331             default: /* [15:12] == 0000 */
5332                 /* Floating point <-> integer conversions */
5333                 disas_fp_int_conv(s, insn);
5334                 break;
5335             }
5336             break;
5337         }
5338     }
5339 }
5340
5341 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5342                      int pos)
5343 {
5344     /* Extract 64 bits from the middle of two concatenated 64 bit
5345      * vector register slices left:right. The extracted bits start
5346      * at 'pos' bits into the right (least significant) side.
5347      * We return the result in tcg_right, and guarantee not to
5348      * trash tcg_left.
5349      */
5350     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5351     assert(pos > 0 && pos < 64);
5352
5353     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5354     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5355     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5356
5357     tcg_temp_free_i64(tcg_tmp);
5358 }
5359
5360 /* C3.6.1 EXT
5361  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5362  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5363  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5364  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5365  */
5366 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5367 {
5368     int is_q = extract32(insn, 30, 1);
5369     int op2 = extract32(insn, 22, 2);
5370     int imm4 = extract32(insn, 11, 4);
5371     int rm = extract32(insn, 16, 5);
5372     int rn = extract32(insn, 5, 5);
5373     int rd = extract32(insn, 0, 5);
5374     int pos = imm4 << 3;
5375     TCGv_i64 tcg_resl, tcg_resh;
5376
5377     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5378         unallocated_encoding(s);
5379         return;
5380     }
5381
5382     if (!fp_access_check(s)) {
5383         return;
5384     }
5385
5386     tcg_resh = tcg_temp_new_i64();
5387     tcg_resl = tcg_temp_new_i64();
5388
5389     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5390      * either extracting 128 bits from a 128:128 concatenation, or
5391      * extracting 64 bits from a 64:64 concatenation.
5392      */
5393     if (!is_q) {
5394         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5395         if (pos != 0) {
5396             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5397             do_ext64(s, tcg_resh, tcg_resl, pos);
5398         }
5399         tcg_gen_movi_i64(tcg_resh, 0);
5400     } else {
5401         TCGv_i64 tcg_hh;
5402         typedef struct {
5403             int reg;
5404             int elt;
5405         } EltPosns;
5406         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5407         EltPosns *elt = eltposns;
5408
5409         if (pos >= 64) {
5410             elt++;
5411             pos -= 64;
5412         }
5413
5414         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5415         elt++;
5416         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5417         elt++;
5418         if (pos != 0) {
5419             do_ext64(s, tcg_resh, tcg_resl, pos);
5420             tcg_hh = tcg_temp_new_i64();
5421             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5422             do_ext64(s, tcg_hh, tcg_resh, pos);
5423             tcg_temp_free_i64(tcg_hh);
5424         }
5425     }
5426
5427     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5428     tcg_temp_free_i64(tcg_resl);
5429     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5430     tcg_temp_free_i64(tcg_resh);
5431 }
5432
5433 /* C3.6.2 TBL/TBX
5434  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5435  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5436  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5437  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5438  */
5439 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5440 {
5441     int op2 = extract32(insn, 22, 2);
5442     int is_q = extract32(insn, 30, 1);
5443     int rm = extract32(insn, 16, 5);
5444     int rn = extract32(insn, 5, 5);
5445     int rd = extract32(insn, 0, 5);
5446     int is_tblx = extract32(insn, 12, 1);
5447     int len = extract32(insn, 13, 2);
5448     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5449     TCGv_i32 tcg_regno, tcg_numregs;
5450
5451     if (op2 != 0) {
5452         unallocated_encoding(s);
5453         return;
5454     }
5455
5456     if (!fp_access_check(s)) {
5457         return;
5458     }
5459
5460     /* This does a table lookup: for every byte element in the input
5461      * we index into a table formed from up to four vector registers,
5462      * and then the output is the result of the lookups. Our helper
5463      * function does the lookup operation for a single 64 bit part of
5464      * the input.
5465      */
5466     tcg_resl = tcg_temp_new_i64();
5467     tcg_resh = tcg_temp_new_i64();
5468
5469     if (is_tblx) {
5470         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5471     } else {
5472         tcg_gen_movi_i64(tcg_resl, 0);
5473     }
5474     if (is_tblx && is_q) {
5475         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5476     } else {
5477         tcg_gen_movi_i64(tcg_resh, 0);
5478     }
5479
5480     tcg_idx = tcg_temp_new_i64();
5481     tcg_regno = tcg_const_i32(rn);
5482     tcg_numregs = tcg_const_i32(len + 1);
5483     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5484     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5485                         tcg_regno, tcg_numregs);
5486     if (is_q) {
5487         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5488         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5489                             tcg_regno, tcg_numregs);
5490     }
5491     tcg_temp_free_i64(tcg_idx);
5492     tcg_temp_free_i32(tcg_regno);
5493     tcg_temp_free_i32(tcg_numregs);
5494
5495     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5496     tcg_temp_free_i64(tcg_resl);
5497     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5498     tcg_temp_free_i64(tcg_resh);
5499 }
5500
5501 /* C3.6.3 ZIP/UZP/TRN
5502  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5503  * +---+---+-------------+------+---+------+---+------------------+------+
5504  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5505  * +---+---+-------------+------+---+------+---+------------------+------+
5506  */
5507 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5508 {
5509     int rd = extract32(insn, 0, 5);
5510     int rn = extract32(insn, 5, 5);
5511     int rm = extract32(insn, 16, 5);
5512     int size = extract32(insn, 22, 2);
5513     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5514      * bit 2 indicates 1 vs 2 variant of the insn.
5515      */
5516     int opcode = extract32(insn, 12, 2);
5517     bool part = extract32(insn, 14, 1);
5518     bool is_q = extract32(insn, 30, 1);
5519     int esize = 8 << size;
5520     int i, ofs;
5521     int datasize = is_q ? 128 : 64;
5522     int elements = datasize / esize;
5523     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5524
5525     if (opcode == 0 || (size == 3 && !is_q)) {
5526         unallocated_encoding(s);
5527         return;
5528     }
5529
5530     if (!fp_access_check(s)) {
5531         return;
5532     }
5533
5534     tcg_resl = tcg_const_i64(0);
5535     tcg_resh = tcg_const_i64(0);
5536     tcg_res = tcg_temp_new_i64();
5537
5538     for (i = 0; i < elements; i++) {
5539         switch (opcode) {
5540         case 1: /* UZP1/2 */
5541         {
5542             int midpoint = elements / 2;
5543             if (i < midpoint) {
5544                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5545             } else {
5546                 read_vec_element(s, tcg_res, rm,
5547                                  2 * (i - midpoint) + part, size);
5548             }
5549             break;
5550         }
5551         case 2: /* TRN1/2 */
5552             if (i & 1) {
5553                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5554             } else {
5555                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5556             }
5557             break;
5558         case 3: /* ZIP1/2 */
5559         {
5560             int base = part * elements / 2;
5561             if (i & 1) {
5562                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5563             } else {
5564                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5565             }
5566             break;
5567         }
5568         default:
5569             g_assert_not_reached();
5570         }
5571
5572         ofs = i * esize;
5573         if (ofs < 64) {
5574             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5575             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5576         } else {
5577             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5578             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5579         }
5580     }
5581
5582     tcg_temp_free_i64(tcg_res);
5583
5584     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5585     tcg_temp_free_i64(tcg_resl);
5586     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5587     tcg_temp_free_i64(tcg_resh);
5588 }
5589
5590 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5591                         int opc, bool is_min, TCGv_ptr fpst)
5592 {
5593     /* Helper function for disas_simd_across_lanes: do a single precision
5594      * min/max operation on the specified two inputs,
5595      * and return the result in tcg_elt1.
5596      */
5597     if (opc == 0xc) {
5598         if (is_min) {
5599             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5600         } else {
5601             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5602         }
5603     } else {
5604         assert(opc == 0xf);
5605         if (is_min) {
5606             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5607         } else {
5608             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5609         }
5610     }
5611 }
5612
5613 /* C3.6.4 AdvSIMD across lanes
5614  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5615  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5616  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5617  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5618  */
5619 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5620 {
5621     int rd = extract32(insn, 0, 5);
5622     int rn = extract32(insn, 5, 5);
5623     int size = extract32(insn, 22, 2);
5624     int opcode = extract32(insn, 12, 5);
5625     bool is_q = extract32(insn, 30, 1);
5626     bool is_u = extract32(insn, 29, 1);
5627     bool is_fp = false;
5628     bool is_min = false;
5629     int esize;
5630     int elements;
5631     int i;
5632     TCGv_i64 tcg_res, tcg_elt;
5633
5634     switch (opcode) {
5635     case 0x1b: /* ADDV */
5636         if (is_u) {
5637             unallocated_encoding(s);
5638             return;
5639         }
5640         /* fall through */
5641     case 0x3: /* SADDLV, UADDLV */
5642     case 0xa: /* SMAXV, UMAXV */
5643     case 0x1a: /* SMINV, UMINV */
5644         if (size == 3 || (size == 2 && !is_q)) {
5645             unallocated_encoding(s);
5646             return;
5647         }
5648         break;
5649     case 0xc: /* FMAXNMV, FMINNMV */
5650     case 0xf: /* FMAXV, FMINV */
5651         if (!is_u || !is_q || extract32(size, 0, 1)) {
5652             unallocated_encoding(s);
5653             return;
5654         }
5655         /* Bit 1 of size field encodes min vs max, and actual size is always
5656          * 32 bits: adjust the size variable so following code can rely on it
5657          */
5658         is_min = extract32(size, 1, 1);
5659         is_fp = true;
5660         size = 2;
5661         break;
5662     default:
5663         unallocated_encoding(s);
5664         return;
5665     }
5666
5667     if (!fp_access_check(s)) {
5668         return;
5669     }
5670
5671     esize = 8 << size;
5672     elements = (is_q ? 128 : 64) / esize;
5673
5674     tcg_res = tcg_temp_new_i64();
5675     tcg_elt = tcg_temp_new_i64();
5676
5677     /* These instructions operate across all lanes of a vector
5678      * to produce a single result. We can guarantee that a 64
5679      * bit intermediate is sufficient:
5680      *  + for [US]ADDLV the maximum element size is 32 bits, and
5681      *    the result type is 64 bits
5682      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5683      *    same as the element size, which is 32 bits at most
5684      * For the integer operations we can choose to work at 64
5685      * or 32 bits and truncate at the end; for simplicity
5686      * we use 64 bits always. The floating point
5687      * ops do require 32 bit intermediates, though.
5688      */
5689     if (!is_fp) {
5690         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5691
5692         for (i = 1; i < elements; i++) {
5693             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5694
5695             switch (opcode) {
5696             case 0x03: /* SADDLV / UADDLV */
5697             case 0x1b: /* ADDV */
5698                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5699                 break;
5700             case 0x0a: /* SMAXV / UMAXV */
5701                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5702                                     tcg_res,
5703                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5704                 break;
5705             case 0x1a: /* SMINV / UMINV */
5706                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5707                                     tcg_res,
5708                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5709                 break;
5710                 break;
5711             default:
5712                 g_assert_not_reached();
5713             }
5714
5715         }
5716     } else {
5717         /* Floating point ops which work on 32 bit (single) intermediates.
5718          * Note that correct NaN propagation requires that we do these
5719          * operations in exactly the order specified by the pseudocode.
5720          */
5721         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5722         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5723         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5724         TCGv_ptr fpst = get_fpstatus_ptr();
5725
5726         assert(esize == 32);
5727         assert(elements == 4);
5728
5729         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5730         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5731         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5732         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5733
5734         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5735
5736         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5737         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5738         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5739         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5740
5741         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5742
5743         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5744
5745         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5746         tcg_temp_free_i32(tcg_elt1);
5747         tcg_temp_free_i32(tcg_elt2);
5748         tcg_temp_free_i32(tcg_elt3);
5749         tcg_temp_free_ptr(fpst);
5750     }
5751
5752     tcg_temp_free_i64(tcg_elt);
5753
5754     /* Now truncate the result to the width required for the final output */
5755     if (opcode == 0x03) {
5756         /* SADDLV, UADDLV: result is 2*esize */
5757         size++;
5758     }
5759
5760     switch (size) {
5761     case 0:
5762         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5763         break;
5764     case 1:
5765         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5766         break;
5767     case 2:
5768         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5769         break;
5770     case 3:
5771         break;
5772     default:
5773         g_assert_not_reached();
5774     }
5775
5776     write_fp_dreg(s, rd, tcg_res);
5777     tcg_temp_free_i64(tcg_res);
5778 }
5779
5780 /* C6.3.31 DUP (Element, Vector)
5781  *
5782  *  31  30   29              21 20    16 15        10  9    5 4    0
5783  * +---+---+-------------------+--------+-------------+------+------+
5784  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5785  * +---+---+-------------------+--------+-------------+------+------+
5786  *
5787  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5788  */
5789 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5790                              int imm5)
5791 {
5792     int size = ctz32(imm5);
5793     int esize = 8 << size;
5794     int elements = (is_q ? 128 : 64) / esize;
5795     int index, i;
5796     TCGv_i64 tmp;
5797
5798     if (size > 3 || (size == 3 && !is_q)) {
5799         unallocated_encoding(s);
5800         return;
5801     }
5802
5803     if (!fp_access_check(s)) {
5804         return;
5805     }
5806
5807     index = imm5 >> (size + 1);
5808
5809     tmp = tcg_temp_new_i64();
5810     read_vec_element(s, tmp, rn, index, size);
5811
5812     for (i = 0; i < elements; i++) {
5813         write_vec_element(s, tmp, rd, i, size);
5814     }
5815
5816     if (!is_q) {
5817         clear_vec_high(s, rd);
5818     }
5819
5820     tcg_temp_free_i64(tmp);
5821 }
5822
5823 /* C6.3.31 DUP (element, scalar)
5824  *  31                   21 20    16 15        10  9    5 4    0
5825  * +-----------------------+--------+-------------+------+------+
5826  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5827  * +-----------------------+--------+-------------+------+------+
5828  */
5829 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5830                               int imm5)
5831 {
5832     int size = ctz32(imm5);
5833     int index;
5834     TCGv_i64 tmp;
5835
5836     if (size > 3) {
5837         unallocated_encoding(s);
5838         return;
5839     }
5840
5841     if (!fp_access_check(s)) {
5842         return;
5843     }
5844
5845     index = imm5 >> (size + 1);
5846
5847     /* This instruction just extracts the specified element and
5848      * zero-extends it into the bottom of the destination register.
5849      */
5850     tmp = tcg_temp_new_i64();
5851     read_vec_element(s, tmp, rn, index, size);
5852     write_fp_dreg(s, rd, tmp);
5853     tcg_temp_free_i64(tmp);
5854 }
5855
5856 /* C6.3.32 DUP (General)
5857  *
5858  *  31  30   29              21 20    16 15        10  9    5 4    0
5859  * +---+---+-------------------+--------+-------------+------+------+
5860  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5861  * +---+---+-------------------+--------+-------------+------+------+
5862  *
5863  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5864  */
5865 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5866                              int imm5)
5867 {
5868     int size = ctz32(imm5);
5869     int esize = 8 << size;
5870     int elements = (is_q ? 128 : 64)/esize;
5871     int i = 0;
5872
5873     if (size > 3 || ((size == 3) && !is_q)) {
5874         unallocated_encoding(s);
5875         return;
5876     }
5877
5878     if (!fp_access_check(s)) {
5879         return;
5880     }
5881
5882     for (i = 0; i < elements; i++) {
5883         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5884     }
5885     if (!is_q) {
5886         clear_vec_high(s, rd);
5887     }
5888 }
5889
5890 /* C6.3.150 INS (Element)
5891  *
5892  *  31                   21 20    16 15  14    11  10 9    5 4    0
5893  * +-----------------------+--------+------------+---+------+------+
5894  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5895  * +-----------------------+--------+------------+---+------+------+
5896  *
5897  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5898  * index: encoded in imm5<4:size+1>
5899  */
5900 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5901                              int imm4, int imm5)
5902 {
5903     int size = ctz32(imm5);
5904     int src_index, dst_index;
5905     TCGv_i64 tmp;
5906
5907     if (size > 3) {
5908         unallocated_encoding(s);
5909         return;
5910     }
5911
5912     if (!fp_access_check(s)) {
5913         return;
5914     }
5915
5916     dst_index = extract32(imm5, 1+size, 5);
5917     src_index = extract32(imm4, size, 4);
5918
5919     tmp = tcg_temp_new_i64();
5920
5921     read_vec_element(s, tmp, rn, src_index, size);
5922     write_vec_element(s, tmp, rd, dst_index, size);
5923
5924     tcg_temp_free_i64(tmp);
5925 }
5926
5927
5928 /* C6.3.151 INS (General)
5929  *
5930  *  31                   21 20    16 15        10  9    5 4    0
5931  * +-----------------------+--------+-------------+------+------+
5932  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5933  * +-----------------------+--------+-------------+------+------+
5934  *
5935  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5936  * index: encoded in imm5<4:size+1>
5937  */
5938 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5939 {
5940     int size = ctz32(imm5);
5941     int idx;
5942
5943     if (size > 3) {
5944         unallocated_encoding(s);
5945         return;
5946     }
5947
5948     if (!fp_access_check(s)) {
5949         return;
5950     }
5951
5952     idx = extract32(imm5, 1 + size, 4 - size);
5953     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5954 }
5955
5956 /*
5957  * C6.3.321 UMOV (General)
5958  * C6.3.237 SMOV (General)
5959  *
5960  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5961  * +---+---+-------------------+--------+-------------+------+------+
5962  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5963  * +---+---+-------------------+--------+-------------+------+------+
5964  *
5965  * U: unsigned when set
5966  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5967  */
5968 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5969                                   int rn, int rd, int imm5)
5970 {
5971     int size = ctz32(imm5);
5972     int element;
5973     TCGv_i64 tcg_rd;
5974
5975     /* Check for UnallocatedEncodings */
5976     if (is_signed) {
5977         if (size > 2 || (size == 2 && !is_q)) {
5978             unallocated_encoding(s);
5979             return;
5980         }
5981     } else {
5982         if (size > 3
5983             || (size < 3 && is_q)
5984             || (size == 3 && !is_q)) {
5985             unallocated_encoding(s);
5986             return;
5987         }
5988     }
5989
5990     if (!fp_access_check(s)) {
5991         return;
5992     }
5993
5994     element = extract32(imm5, 1+size, 4);
5995
5996     tcg_rd = cpu_reg(s, rd);
5997     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5998     if (is_signed && !is_q) {
5999         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6000     }
6001 }
6002
6003 /* C3.6.5 AdvSIMD copy
6004  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6005  * +---+---+----+-----------------+------+---+------+---+------+------+
6006  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6007  * +---+---+----+-----------------+------+---+------+---+------+------+
6008  */
6009 static void disas_simd_copy(DisasContext *s, uint32_t insn)
6010 {
6011     int rd = extract32(insn, 0, 5);
6012     int rn = extract32(insn, 5, 5);
6013     int imm4 = extract32(insn, 11, 4);
6014     int op = extract32(insn, 29, 1);
6015     int is_q = extract32(insn, 30, 1);
6016     int imm5 = extract32(insn, 16, 5);
6017
6018     if (op) {
6019         if (is_q) {
6020             /* INS (element) */
6021             handle_simd_inse(s, rd, rn, imm4, imm5);
6022         } else {
6023             unallocated_encoding(s);
6024         }
6025     } else {
6026         switch (imm4) {
6027         case 0:
6028             /* DUP (element - vector) */
6029             handle_simd_dupe(s, is_q, rd, rn, imm5);
6030             break;
6031         case 1:
6032             /* DUP (general) */
6033             handle_simd_dupg(s, is_q, rd, rn, imm5);
6034             break;
6035         case 3:
6036             if (is_q) {
6037                 /* INS (general) */
6038                 handle_simd_insg(s, rd, rn, imm5);
6039             } else {
6040                 unallocated_encoding(s);
6041             }
6042             break;
6043         case 5:
6044         case 7:
6045             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6046             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6047             break;
6048         default:
6049             unallocated_encoding(s);
6050             break;
6051         }
6052     }
6053 }
6054
6055 /* C3.6.6 AdvSIMD modified immediate
6056  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6057  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6058  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6059  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6060  *
6061  * There are a number of operations that can be carried out here:
6062  *   MOVI - move (shifted) imm into register
6063  *   MVNI - move inverted (shifted) imm into register
6064  *   ORR  - bitwise OR of (shifted) imm with register
6065  *   BIC  - bitwise clear of (shifted) imm with register
6066  */
6067 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6068 {
6069     int rd = extract32(insn, 0, 5);
6070     int cmode = extract32(insn, 12, 4);
6071     int cmode_3_1 = extract32(cmode, 1, 3);
6072     int cmode_0 = extract32(cmode, 0, 1);
6073     int o2 = extract32(insn, 11, 1);
6074     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6075     bool is_neg = extract32(insn, 29, 1);
6076     bool is_q = extract32(insn, 30, 1);
6077     uint64_t imm = 0;
6078     TCGv_i64 tcg_rd, tcg_imm;
6079     int i;
6080
6081     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6082         unallocated_encoding(s);
6083         return;
6084     }
6085
6086     if (!fp_access_check(s)) {
6087         return;
6088     }
6089
6090     /* See AdvSIMDExpandImm() in ARM ARM */
6091     switch (cmode_3_1) {
6092     case 0: /* Replicate(Zeros(24):imm8, 2) */
6093     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6094     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6095     case 3: /* Replicate(imm8:Zeros(24), 2) */
6096     {
6097         int shift = cmode_3_1 * 8;
6098         imm = bitfield_replicate(abcdefgh << shift, 32);
6099         break;
6100     }
6101     case 4: /* Replicate(Zeros(8):imm8, 4) */
6102     case 5: /* Replicate(imm8:Zeros(8), 4) */
6103     {
6104         int shift = (cmode_3_1 & 0x1) * 8;
6105         imm = bitfield_replicate(abcdefgh << shift, 16);
6106         break;
6107     }
6108     case 6:
6109         if (cmode_0) {
6110             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6111             imm = (abcdefgh << 16) | 0xffff;
6112         } else {
6113             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6114             imm = (abcdefgh << 8) | 0xff;
6115         }
6116         imm = bitfield_replicate(imm, 32);
6117         break;
6118     case 7:
6119         if (!cmode_0 && !is_neg) {
6120             imm = bitfield_replicate(abcdefgh, 8);
6121         } else if (!cmode_0 && is_neg) {
6122             int i;
6123             imm = 0;
6124             for (i = 0; i < 8; i++) {
6125                 if ((abcdefgh) & (1 << i)) {
6126                     imm |= 0xffULL << (i * 8);
6127                 }
6128             }
6129         } else if (cmode_0) {
6130             if (is_neg) {
6131                 imm = (abcdefgh & 0x3f) << 48;
6132                 if (abcdefgh & 0x80) {
6133                     imm |= 0x8000000000000000ULL;
6134                 }
6135                 if (abcdefgh & 0x40) {
6136                     imm |= 0x3fc0000000000000ULL;
6137                 } else {
6138                     imm |= 0x4000000000000000ULL;
6139                 }
6140             } else {
6141                 imm = (abcdefgh & 0x3f) << 19;
6142                 if (abcdefgh & 0x80) {
6143                     imm |= 0x80000000;
6144                 }
6145                 if (abcdefgh & 0x40) {
6146                     imm |= 0x3e000000;
6147                 } else {
6148                     imm |= 0x40000000;
6149                 }
6150                 imm |= (imm << 32);
6151             }
6152         }
6153         break;
6154     }
6155
6156     if (cmode_3_1 != 7 && is_neg) {
6157         imm = ~imm;
6158     }
6159
6160     tcg_imm = tcg_const_i64(imm);
6161     tcg_rd = new_tmp_a64(s);
6162
6163     for (i = 0; i < 2; i++) {
6164         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6165
6166         if (i == 1 && !is_q) {
6167             /* non-quad ops clear high half of vector */
6168             tcg_gen_movi_i64(tcg_rd, 0);
6169         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6170             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6171             if (is_neg) {
6172                 /* AND (BIC) */
6173                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6174             } else {
6175                 /* ORR */
6176                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6177             }
6178         } else {
6179             /* MOVI */
6180             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6181         }
6182         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6183     }
6184
6185     tcg_temp_free_i64(tcg_imm);
6186 }
6187
6188 /* C3.6.7 AdvSIMD scalar copy
6189  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6190  * +-----+----+-----------------+------+---+------+---+------+------+
6191  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6192  * +-----+----+-----------------+------+---+------+---+------+------+
6193  */
6194 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6195 {
6196     int rd = extract32(insn, 0, 5);
6197     int rn = extract32(insn, 5, 5);
6198     int imm4 = extract32(insn, 11, 4);
6199     int imm5 = extract32(insn, 16, 5);
6200     int op = extract32(insn, 29, 1);
6201
6202     if (op != 0 || imm4 != 0) {
6203         unallocated_encoding(s);
6204         return;
6205     }
6206
6207     /* DUP (element, scalar) */
6208     handle_simd_dupes(s, rd, rn, imm5);
6209 }
6210
6211 /* C3.6.8 AdvSIMD scalar pairwise
6212  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6213  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6214  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6215  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6216  */
6217 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6218 {
6219     int u = extract32(insn, 29, 1);
6220     int size = extract32(insn, 22, 2);
6221     int opcode = extract32(insn, 12, 5);
6222     int rn = extract32(insn, 5, 5);
6223     int rd = extract32(insn, 0, 5);
6224     TCGv_ptr fpst;
6225
6226     /* For some ops (the FP ones), size[1] is part of the encoding.
6227      * For ADDP strictly it is not but size[1] is always 1 for valid
6228      * encodings.
6229      */
6230     opcode |= (extract32(size, 1, 1) << 5);
6231
6232     switch (opcode) {
6233     case 0x3b: /* ADDP */
6234         if (u || size != 3) {
6235             unallocated_encoding(s);
6236             return;
6237         }
6238         if (!fp_access_check(s)) {
6239             return;
6240         }
6241
6242         TCGV_UNUSED_PTR(fpst);
6243         break;
6244     case 0xc: /* FMAXNMP */
6245     case 0xd: /* FADDP */
6246     case 0xf: /* FMAXP */
6247     case 0x2c: /* FMINNMP */
6248     case 0x2f: /* FMINP */
6249         /* FP op, size[0] is 32 or 64 bit */
6250         if (!u) {
6251             unallocated_encoding(s);
6252             return;
6253         }
6254         if (!fp_access_check(s)) {
6255             return;
6256         }
6257
6258         size = extract32(size, 0, 1) ? 3 : 2;
6259         fpst = get_fpstatus_ptr();
6260         break;
6261     default:
6262         unallocated_encoding(s);
6263         return;
6264     }
6265
6266     if (size == 3) {
6267         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6268         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6269         TCGv_i64 tcg_res = tcg_temp_new_i64();
6270
6271         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6272         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6273
6274         switch (opcode) {
6275         case 0x3b: /* ADDP */
6276             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6277             break;
6278         case 0xc: /* FMAXNMP */
6279             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6280             break;
6281         case 0xd: /* FADDP */
6282             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6283             break;
6284         case 0xf: /* FMAXP */
6285             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6286             break;
6287         case 0x2c: /* FMINNMP */
6288             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6289             break;
6290         case 0x2f: /* FMINP */
6291             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6292             break;
6293         default:
6294             g_assert_not_reached();
6295         }
6296
6297         write_fp_dreg(s, rd, tcg_res);
6298
6299         tcg_temp_free_i64(tcg_op1);
6300         tcg_temp_free_i64(tcg_op2);
6301         tcg_temp_free_i64(tcg_res);
6302     } else {
6303         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6304         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6305         TCGv_i32 tcg_res = tcg_temp_new_i32();
6306
6307         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6308         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6309
6310         switch (opcode) {
6311         case 0xc: /* FMAXNMP */
6312             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6313             break;
6314         case 0xd: /* FADDP */
6315             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6316             break;
6317         case 0xf: /* FMAXP */
6318             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6319             break;
6320         case 0x2c: /* FMINNMP */
6321             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6322             break;
6323         case 0x2f: /* FMINP */
6324             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6325             break;
6326         default:
6327             g_assert_not_reached();
6328         }
6329
6330         write_fp_sreg(s, rd, tcg_res);
6331
6332         tcg_temp_free_i32(tcg_op1);
6333         tcg_temp_free_i32(tcg_op2);
6334         tcg_temp_free_i32(tcg_res);
6335     }
6336
6337     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6338         tcg_temp_free_ptr(fpst);
6339     }
6340 }
6341
6342 /*
6343  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6344  *
6345  * This code is handles the common shifting code and is used by both
6346  * the vector and scalar code.
6347  */
6348 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6349                                     TCGv_i64 tcg_rnd, bool accumulate,
6350                                     bool is_u, int size, int shift)
6351 {
6352     bool extended_result = false;
6353     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6354     int ext_lshift = 0;
6355     TCGv_i64 tcg_src_hi;
6356
6357     if (round && size == 3) {
6358         extended_result = true;
6359         ext_lshift = 64 - shift;
6360         tcg_src_hi = tcg_temp_new_i64();
6361     } else if (shift == 64) {
6362         if (!accumulate && is_u) {
6363             /* result is zero */
6364             tcg_gen_movi_i64(tcg_res, 0);
6365             return;
6366         }
6367     }
6368
6369     /* Deal with the rounding step */
6370     if (round) {
6371         if (extended_result) {
6372             TCGv_i64 tcg_zero = tcg_const_i64(0);
6373             if (!is_u) {
6374                 /* take care of sign extending tcg_res */
6375                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6376                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6377                                  tcg_src, tcg_src_hi,
6378                                  tcg_rnd, tcg_zero);
6379             } else {
6380                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6381                                  tcg_src, tcg_zero,
6382                                  tcg_rnd, tcg_zero);
6383             }
6384             tcg_temp_free_i64(tcg_zero);
6385         } else {
6386             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6387         }
6388     }
6389
6390     /* Now do the shift right */
6391     if (round && extended_result) {
6392         /* extended case, >64 bit precision required */
6393         if (ext_lshift == 0) {
6394             /* special case, only high bits matter */
6395             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6396         } else {
6397             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6398             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6399             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6400         }
6401     } else {
6402         if (is_u) {
6403             if (shift == 64) {
6404                 /* essentially shifting in 64 zeros */
6405                 tcg_gen_movi_i64(tcg_src, 0);
6406             } else {
6407                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6408             }
6409         } else {
6410             if (shift == 64) {
6411                 /* effectively extending the sign-bit */
6412                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6413             } else {
6414                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6415             }
6416         }
6417     }
6418
6419     if (accumulate) {
6420         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6421     } else {
6422         tcg_gen_mov_i64(tcg_res, tcg_src);
6423     }
6424
6425     if (extended_result) {
6426         tcg_temp_free_i64(tcg_src_hi);
6427     }
6428 }
6429
6430 /* Common SHL/SLI - Shift left with an optional insert */
6431 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6432                                  bool insert, int shift)
6433 {
6434     if (insert) { /* SLI */
6435         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6436     } else { /* SHL */
6437         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6438     }
6439 }
6440
6441 /* SRI: shift right with insert */
6442 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6443                                  int size, int shift)
6444 {
6445     int esize = 8 << size;
6446
6447     /* shift count same as element size is valid but does nothing;
6448      * special case to avoid potential shift by 64.
6449      */
6450     if (shift != esize) {
6451         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6452         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6453     }
6454 }
6455
6456 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6457 static void handle_scalar_simd_shri(DisasContext *s,
6458                                     bool is_u, int immh, int immb,
6459                                     int opcode, int rn, int rd)
6460 {
6461     const int size = 3;
6462     int immhb = immh << 3 | immb;
6463     int shift = 2 * (8 << size) - immhb;
6464     bool accumulate = false;
6465     bool round = false;
6466     bool insert = false;
6467     TCGv_i64 tcg_rn;
6468     TCGv_i64 tcg_rd;
6469     TCGv_i64 tcg_round;
6470
6471     if (!extract32(immh, 3, 1)) {
6472         unallocated_encoding(s);
6473         return;
6474     }
6475
6476     if (!fp_access_check(s)) {
6477         return;
6478     }
6479
6480     switch (opcode) {
6481     case 0x02: /* SSRA / USRA (accumulate) */
6482         accumulate = true;
6483         break;
6484     case 0x04: /* SRSHR / URSHR (rounding) */
6485         round = true;
6486         break;
6487     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6488         accumulate = round = true;
6489         break;
6490     case 0x08: /* SRI */
6491         insert = true;
6492         break;
6493     }
6494
6495     if (round) {
6496         uint64_t round_const = 1ULL << (shift - 1);
6497         tcg_round = tcg_const_i64(round_const);
6498     } else {
6499         TCGV_UNUSED_I64(tcg_round);
6500     }
6501
6502     tcg_rn = read_fp_dreg(s, rn);
6503     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6504
6505     if (insert) {
6506         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6507     } else {
6508         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6509                                 accumulate, is_u, size, shift);
6510     }
6511
6512     write_fp_dreg(s, rd, tcg_rd);
6513
6514     tcg_temp_free_i64(tcg_rn);
6515     tcg_temp_free_i64(tcg_rd);
6516     if (round) {
6517         tcg_temp_free_i64(tcg_round);
6518     }
6519 }
6520
6521 /* SHL/SLI - Scalar shift left */
6522 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6523                                     int immh, int immb, int opcode,
6524                                     int rn, int rd)
6525 {
6526     int size = 32 - clz32(immh) - 1;
6527     int immhb = immh << 3 | immb;
6528     int shift = immhb - (8 << size);
6529     TCGv_i64 tcg_rn = new_tmp_a64(s);
6530     TCGv_i64 tcg_rd = new_tmp_a64(s);
6531
6532     if (!extract32(immh, 3, 1)) {
6533         unallocated_encoding(s);
6534         return;
6535     }
6536
6537     if (!fp_access_check(s)) {
6538         return;
6539     }
6540
6541     tcg_rn = read_fp_dreg(s, rn);
6542     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6543
6544     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6545
6546     write_fp_dreg(s, rd, tcg_rd);
6547
6548     tcg_temp_free_i64(tcg_rn);
6549     tcg_temp_free_i64(tcg_rd);
6550 }
6551
6552 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6553  * (signed/unsigned) narrowing */
6554 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6555                                    bool is_u_shift, bool is_u_narrow,
6556                                    int immh, int immb, int opcode,
6557                                    int rn, int rd)
6558 {
6559     int immhb = immh << 3 | immb;
6560     int size = 32 - clz32(immh) - 1;
6561     int esize = 8 << size;
6562     int shift = (2 * esize) - immhb;
6563     int elements = is_scalar ? 1 : (64 / esize);
6564     bool round = extract32(opcode, 0, 1);
6565     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6566     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6567     TCGv_i32 tcg_rd_narrowed;
6568     TCGv_i64 tcg_final;
6569
6570     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6571         { gen_helper_neon_narrow_sat_s8,
6572           gen_helper_neon_unarrow_sat8 },
6573         { gen_helper_neon_narrow_sat_s16,
6574           gen_helper_neon_unarrow_sat16 },
6575         { gen_helper_neon_narrow_sat_s32,
6576           gen_helper_neon_unarrow_sat32 },
6577         { NULL, NULL },
6578     };
6579     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6580         gen_helper_neon_narrow_sat_u8,
6581         gen_helper_neon_narrow_sat_u16,
6582         gen_helper_neon_narrow_sat_u32,
6583         NULL
6584     };
6585     NeonGenNarrowEnvFn *narrowfn;
6586
6587     int i;
6588
6589     assert(size < 4);
6590
6591     if (extract32(immh, 3, 1)) {
6592         unallocated_encoding(s);
6593         return;
6594     }
6595
6596     if (!fp_access_check(s)) {
6597         return;
6598     }
6599
6600     if (is_u_shift) {
6601         narrowfn = unsigned_narrow_fns[size];
6602     } else {
6603         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6604     }
6605
6606     tcg_rn = tcg_temp_new_i64();
6607     tcg_rd = tcg_temp_new_i64();
6608     tcg_rd_narrowed = tcg_temp_new_i32();
6609     tcg_final = tcg_const_i64(0);
6610
6611     if (round) {
6612         uint64_t round_const = 1ULL << (shift - 1);
6613         tcg_round = tcg_const_i64(round_const);
6614     } else {
6615         TCGV_UNUSED_I64(tcg_round);
6616     }
6617
6618     for (i = 0; i < elements; i++) {
6619         read_vec_element(s, tcg_rn, rn, i, ldop);
6620         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6621                                 false, is_u_shift, size+1, shift);
6622         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6623         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6624         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6625     }
6626
6627     if (!is_q) {
6628         clear_vec_high(s, rd);
6629         write_vec_element(s, tcg_final, rd, 0, MO_64);
6630     } else {
6631         write_vec_element(s, tcg_final, rd, 1, MO_64);
6632     }
6633
6634     if (round) {
6635         tcg_temp_free_i64(tcg_round);
6636     }
6637     tcg_temp_free_i64(tcg_rn);
6638     tcg_temp_free_i64(tcg_rd);
6639     tcg_temp_free_i32(tcg_rd_narrowed);
6640     tcg_temp_free_i64(tcg_final);
6641     return;
6642 }
6643
6644 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6645 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6646                              bool src_unsigned, bool dst_unsigned,
6647                              int immh, int immb, int rn, int rd)
6648 {
6649     int immhb = immh << 3 | immb;
6650     int size = 32 - clz32(immh) - 1;
6651     int shift = immhb - (8 << size);
6652     int pass;
6653
6654     assert(immh != 0);
6655     assert(!(scalar && is_q));
6656
6657     if (!scalar) {
6658         if (!is_q && extract32(immh, 3, 1)) {
6659             unallocated_encoding(s);
6660             return;
6661         }
6662
6663         /* Since we use the variable-shift helpers we must
6664          * replicate the shift count into each element of
6665          * the tcg_shift value.
6666          */
6667         switch (size) {
6668         case 0:
6669             shift |= shift << 8;
6670             /* fall through */
6671         case 1:
6672             shift |= shift << 16;
6673             break;
6674         case 2:
6675         case 3:
6676             break;
6677         default:
6678             g_assert_not_reached();
6679         }
6680     }
6681
6682     if (!fp_access_check(s)) {
6683         return;
6684     }
6685
6686     if (size == 3) {
6687         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6688         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6689             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6690             { NULL, gen_helper_neon_qshl_u64 },
6691         };
6692         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6693         int maxpass = is_q ? 2 : 1;
6694
6695         for (pass = 0; pass < maxpass; pass++) {
6696             TCGv_i64 tcg_op = tcg_temp_new_i64();
6697
6698             read_vec_element(s, tcg_op, rn, pass, MO_64);
6699             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6700             write_vec_element(s, tcg_op, rd, pass, MO_64);
6701
6702             tcg_temp_free_i64(tcg_op);
6703         }
6704         tcg_temp_free_i64(tcg_shift);
6705
6706         if (!is_q) {
6707             clear_vec_high(s, rd);
6708         }
6709     } else {
6710         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6711         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6712             {
6713                 { gen_helper_neon_qshl_s8,
6714                   gen_helper_neon_qshl_s16,
6715                   gen_helper_neon_qshl_s32 },
6716                 { gen_helper_neon_qshlu_s8,
6717                   gen_helper_neon_qshlu_s16,
6718                   gen_helper_neon_qshlu_s32 }
6719             }, {
6720                 { NULL, NULL, NULL },
6721                 { gen_helper_neon_qshl_u8,
6722                   gen_helper_neon_qshl_u16,
6723                   gen_helper_neon_qshl_u32 }
6724             }
6725         };
6726         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6727         TCGMemOp memop = scalar ? size : MO_32;
6728         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6729
6730         for (pass = 0; pass < maxpass; pass++) {
6731             TCGv_i32 tcg_op = tcg_temp_new_i32();
6732
6733             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6734             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6735             if (scalar) {
6736                 switch (size) {
6737                 case 0:
6738                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6739                     break;
6740                 case 1:
6741                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6742                     break;
6743                 case 2:
6744                     break;
6745                 default:
6746                     g_assert_not_reached();
6747                 }
6748                 write_fp_sreg(s, rd, tcg_op);
6749             } else {
6750                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6751             }
6752
6753             tcg_temp_free_i32(tcg_op);
6754         }
6755         tcg_temp_free_i32(tcg_shift);
6756
6757         if (!is_q && !scalar) {
6758             clear_vec_high(s, rd);
6759         }
6760     }
6761 }
6762
6763 /* Common vector code for handling integer to FP conversion */
6764 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6765                                    int elements, int is_signed,
6766                                    int fracbits, int size)
6767 {
6768     bool is_double = size == 3 ? true : false;
6769     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6770     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6771     TCGv_i64 tcg_int = tcg_temp_new_i64();
6772     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6773     int pass;
6774
6775     for (pass = 0; pass < elements; pass++) {
6776         read_vec_element(s, tcg_int, rn, pass, mop);
6777
6778         if (is_double) {
6779             TCGv_i64 tcg_double = tcg_temp_new_i64();
6780             if (is_signed) {
6781                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6782                                      tcg_shift, tcg_fpst);
6783             } else {
6784                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6785                                      tcg_shift, tcg_fpst);
6786             }
6787             if (elements == 1) {
6788                 write_fp_dreg(s, rd, tcg_double);
6789             } else {
6790                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6791             }
6792             tcg_temp_free_i64(tcg_double);
6793         } else {
6794             TCGv_i32 tcg_single = tcg_temp_new_i32();
6795             if (is_signed) {
6796                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6797                                      tcg_shift, tcg_fpst);
6798             } else {
6799                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6800                                      tcg_shift, tcg_fpst);
6801             }
6802             if (elements == 1) {
6803                 write_fp_sreg(s, rd, tcg_single);
6804             } else {
6805                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6806             }
6807             tcg_temp_free_i32(tcg_single);
6808         }
6809     }
6810
6811     if (!is_double && elements == 2) {
6812         clear_vec_high(s, rd);
6813     }
6814
6815     tcg_temp_free_i64(tcg_int);
6816     tcg_temp_free_ptr(tcg_fpst);
6817     tcg_temp_free_i32(tcg_shift);
6818 }
6819
6820 /* UCVTF/SCVTF - Integer to FP conversion */
6821 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6822                                          bool is_q, bool is_u,
6823                                          int immh, int immb, int opcode,
6824                                          int rn, int rd)
6825 {
6826     bool is_double = extract32(immh, 3, 1);
6827     int size = is_double ? MO_64 : MO_32;
6828     int elements;
6829     int immhb = immh << 3 | immb;
6830     int fracbits = (is_double ? 128 : 64) - immhb;
6831
6832     if (!extract32(immh, 2, 2)) {
6833         unallocated_encoding(s);
6834         return;
6835     }
6836
6837     if (is_scalar) {
6838         elements = 1;
6839     } else {
6840         elements = is_double ? 2 : is_q ? 4 : 2;
6841         if (is_double && !is_q) {
6842             unallocated_encoding(s);
6843             return;
6844         }
6845     }
6846
6847     if (!fp_access_check(s)) {
6848         return;
6849     }
6850
6851     /* immh == 0 would be a failure of the decode logic */
6852     g_assert(immh);
6853
6854     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6855 }
6856
6857 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6858 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6859                                          bool is_q, bool is_u,
6860                                          int immh, int immb, int rn, int rd)
6861 {
6862     bool is_double = extract32(immh, 3, 1);
6863     int immhb = immh << 3 | immb;
6864     int fracbits = (is_double ? 128 : 64) - immhb;
6865     int pass;
6866     TCGv_ptr tcg_fpstatus;
6867     TCGv_i32 tcg_rmode, tcg_shift;
6868
6869     if (!extract32(immh, 2, 2)) {
6870         unallocated_encoding(s);
6871         return;
6872     }
6873
6874     if (!is_scalar && !is_q && is_double) {
6875         unallocated_encoding(s);
6876         return;
6877     }
6878
6879     if (!fp_access_check(s)) {
6880         return;
6881     }
6882
6883     assert(!(is_scalar && is_q));
6884
6885     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6886     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6887     tcg_fpstatus = get_fpstatus_ptr();
6888     tcg_shift = tcg_const_i32(fracbits);
6889
6890     if (is_double) {
6891         int maxpass = is_scalar ? 1 : 2;
6892
6893         for (pass = 0; pass < maxpass; pass++) {
6894             TCGv_i64 tcg_op = tcg_temp_new_i64();
6895
6896             read_vec_element(s, tcg_op, rn, pass, MO_64);
6897             if (is_u) {
6898                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6899             } else {
6900                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6901             }
6902             write_vec_element(s, tcg_op, rd, pass, MO_64);
6903             tcg_temp_free_i64(tcg_op);
6904         }
6905         if (!is_q) {
6906             clear_vec_high(s, rd);
6907         }
6908     } else {
6909         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6910         for (pass = 0; pass < maxpass; pass++) {
6911             TCGv_i32 tcg_op = tcg_temp_new_i32();
6912
6913             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6914             if (is_u) {
6915                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6916             } else {
6917                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6918             }
6919             if (is_scalar) {
6920                 write_fp_sreg(s, rd, tcg_op);
6921             } else {
6922                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6923             }
6924             tcg_temp_free_i32(tcg_op);
6925         }
6926         if (!is_q && !is_scalar) {
6927             clear_vec_high(s, rd);
6928         }
6929     }
6930
6931     tcg_temp_free_ptr(tcg_fpstatus);
6932     tcg_temp_free_i32(tcg_shift);
6933     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6934     tcg_temp_free_i32(tcg_rmode);
6935 }
6936
6937 /* C3.6.9 AdvSIMD scalar shift by immediate
6938  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6939  * +-----+---+-------------+------+------+--------+---+------+------+
6940  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6941  * +-----+---+-------------+------+------+--------+---+------+------+
6942  *
6943  * This is the scalar version so it works on a fixed sized registers
6944  */
6945 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6946 {
6947     int rd = extract32(insn, 0, 5);
6948     int rn = extract32(insn, 5, 5);
6949     int opcode = extract32(insn, 11, 5);
6950     int immb = extract32(insn, 16, 3);
6951     int immh = extract32(insn, 19, 4);
6952     bool is_u = extract32(insn, 29, 1);
6953
6954     if (immh == 0) {
6955         unallocated_encoding(s);
6956         return;
6957     }
6958
6959     switch (opcode) {
6960     case 0x08: /* SRI */
6961         if (!is_u) {
6962             unallocated_encoding(s);
6963             return;
6964         }
6965         /* fall through */
6966     case 0x00: /* SSHR / USHR */
6967     case 0x02: /* SSRA / USRA */
6968     case 0x04: /* SRSHR / URSHR */
6969     case 0x06: /* SRSRA / URSRA */
6970         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6971         break;
6972     case 0x0a: /* SHL / SLI */
6973         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6974         break;
6975     case 0x1c: /* SCVTF, UCVTF */
6976         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6977                                      opcode, rn, rd);
6978         break;
6979     case 0x10: /* SQSHRUN, SQSHRUN2 */
6980     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6981         if (!is_u) {
6982             unallocated_encoding(s);
6983             return;
6984         }
6985         handle_vec_simd_sqshrn(s, true, false, false, true,
6986                                immh, immb, opcode, rn, rd);
6987         break;
6988     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6989     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6990         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6991                                immh, immb, opcode, rn, rd);
6992         break;
6993     case 0xc: /* SQSHLU */
6994         if (!is_u) {
6995             unallocated_encoding(s);
6996             return;
6997         }
6998         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6999         break;
7000     case 0xe: /* SQSHL, UQSHL */
7001         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7002         break;
7003     case 0x1f: /* FCVTZS, FCVTZU */
7004         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7005         break;
7006     default:
7007         unallocated_encoding(s);
7008         break;
7009     }
7010 }
7011
7012 /* C3.6.10 AdvSIMD scalar three different
7013  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7014  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7015  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7016  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7017  */
7018 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7019 {
7020     bool is_u = extract32(insn, 29, 1);
7021     int size = extract32(insn, 22, 2);
7022     int opcode = extract32(insn, 12, 4);
7023     int rm = extract32(insn, 16, 5);
7024     int rn = extract32(insn, 5, 5);
7025     int rd = extract32(insn, 0, 5);
7026
7027     if (is_u) {
7028         unallocated_encoding(s);
7029         return;
7030     }
7031
7032     switch (opcode) {
7033     case 0x9: /* SQDMLAL, SQDMLAL2 */
7034     case 0xb: /* SQDMLSL, SQDMLSL2 */
7035     case 0xd: /* SQDMULL, SQDMULL2 */
7036         if (size == 0 || size == 3) {
7037             unallocated_encoding(s);
7038             return;
7039         }
7040         break;
7041     default:
7042         unallocated_encoding(s);
7043         return;
7044     }
7045
7046     if (!fp_access_check(s)) {
7047         return;
7048     }
7049
7050     if (size == 2) {
7051         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7052         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7053         TCGv_i64 tcg_res = tcg_temp_new_i64();
7054
7055         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7056         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7057
7058         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7059         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7060
7061         switch (opcode) {
7062         case 0xd: /* SQDMULL, SQDMULL2 */
7063             break;
7064         case 0xb: /* SQDMLSL, SQDMLSL2 */
7065             tcg_gen_neg_i64(tcg_res, tcg_res);
7066             /* fall through */
7067         case 0x9: /* SQDMLAL, SQDMLAL2 */
7068             read_vec_element(s, tcg_op1, rd, 0, MO_64);
7069             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7070                                               tcg_res, tcg_op1);
7071             break;
7072         default:
7073             g_assert_not_reached();
7074         }
7075
7076         write_fp_dreg(s, rd, tcg_res);
7077
7078         tcg_temp_free_i64(tcg_op1);
7079         tcg_temp_free_i64(tcg_op2);
7080         tcg_temp_free_i64(tcg_res);
7081     } else {
7082         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7083         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7084         TCGv_i64 tcg_res = tcg_temp_new_i64();
7085
7086         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7087         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7088
7089         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7090         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7091
7092         switch (opcode) {
7093         case 0xd: /* SQDMULL, SQDMULL2 */
7094             break;
7095         case 0xb: /* SQDMLSL, SQDMLSL2 */
7096             gen_helper_neon_negl_u32(tcg_res, tcg_res);
7097             /* fall through */
7098         case 0x9: /* SQDMLAL, SQDMLAL2 */
7099         {
7100             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7101             read_vec_element(s, tcg_op3, rd, 0, MO_32);
7102             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7103                                               tcg_res, tcg_op3);
7104             tcg_temp_free_i64(tcg_op3);
7105             break;
7106         }
7107         default:
7108             g_assert_not_reached();
7109         }
7110
7111         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7112         write_fp_dreg(s, rd, tcg_res);
7113
7114         tcg_temp_free_i32(tcg_op1);
7115         tcg_temp_free_i32(tcg_op2);
7116         tcg_temp_free_i64(tcg_res);
7117     }
7118 }
7119
7120 static void handle_3same_64(DisasContext *s, int opcode, bool u,
7121                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7122 {
7123     /* Handle 64x64->64 opcodes which are shared between the scalar
7124      * and vector 3-same groups. We cover every opcode where size == 3
7125      * is valid in either the three-reg-same (integer, not pairwise)
7126      * or scalar-three-reg-same groups. (Some opcodes are not yet
7127      * implemented.)
7128      */
7129     TCGCond cond;
7130
7131     switch (opcode) {
7132     case 0x1: /* SQADD */
7133         if (u) {
7134             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7135         } else {
7136             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7137         }
7138         break;
7139     case 0x5: /* SQSUB */
7140         if (u) {
7141             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7142         } else {
7143             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7144         }
7145         break;
7146     case 0x6: /* CMGT, CMHI */
7147         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7148          * We implement this using setcond (test) and then negating.
7149          */
7150         cond = u ? TCG_COND_GTU : TCG_COND_GT;
7151     do_cmop:
7152         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7153         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7154         break;
7155     case 0x7: /* CMGE, CMHS */
7156         cond = u ? TCG_COND_GEU : TCG_COND_GE;
7157         goto do_cmop;
7158     case 0x11: /* CMTST, CMEQ */
7159         if (u) {
7160             cond = TCG_COND_EQ;
7161             goto do_cmop;
7162         }
7163         /* CMTST : test is "if (X & Y != 0)". */
7164         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7165         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7166         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7167         break;
7168     case 0x8: /* SSHL, USHL */
7169         if (u) {
7170             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7171         } else {
7172             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7173         }
7174         break;
7175     case 0x9: /* SQSHL, UQSHL */
7176         if (u) {
7177             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7178         } else {
7179             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7180         }
7181         break;
7182     case 0xa: /* SRSHL, URSHL */
7183         if (u) {
7184             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7185         } else {
7186             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7187         }
7188         break;
7189     case 0xb: /* SQRSHL, UQRSHL */
7190         if (u) {
7191             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7192         } else {
7193             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7194         }
7195         break;
7196     case 0x10: /* ADD, SUB */
7197         if (u) {
7198             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7199         } else {
7200             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7201         }
7202         break;
7203     default:
7204         g_assert_not_reached();
7205     }
7206 }
7207
7208 /* Handle the 3-same-operands float operations; shared by the scalar
7209  * and vector encodings. The caller must filter out any encodings
7210  * not allocated for the encoding it is dealing with.
7211  */
7212 static void handle_3same_float(DisasContext *s, int size, int elements,
7213                                int fpopcode, int rd, int rn, int rm)
7214 {
7215     int pass;
7216     TCGv_ptr fpst = get_fpstatus_ptr();
7217
7218     for (pass = 0; pass < elements; pass++) {
7219         if (size) {
7220             /* Double */
7221             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7222             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7223             TCGv_i64 tcg_res = tcg_temp_new_i64();
7224
7225             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7226             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7227
7228             switch (fpopcode) {
7229             case 0x39: /* FMLS */
7230                 /* As usual for ARM, separate negation for fused multiply-add */
7231                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7232                 /* fall through */
7233             case 0x19: /* FMLA */
7234                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7235                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7236                                        tcg_res, fpst);
7237                 break;
7238             case 0x18: /* FMAXNM */
7239                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7240                 break;
7241             case 0x1a: /* FADD */
7242                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7243                 break;
7244             case 0x1b: /* FMULX */
7245                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7246                 break;
7247             case 0x1c: /* FCMEQ */
7248                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7249                 break;
7250             case 0x1e: /* FMAX */
7251                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7252                 break;
7253             case 0x1f: /* FRECPS */
7254                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7255                 break;
7256             case 0x38: /* FMINNM */
7257                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7258                 break;
7259             case 0x3a: /* FSUB */
7260                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7261                 break;
7262             case 0x3e: /* FMIN */
7263                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7264                 break;
7265             case 0x3f: /* FRSQRTS */
7266                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7267                 break;
7268             case 0x5b: /* FMUL */
7269                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7270                 break;
7271             case 0x5c: /* FCMGE */
7272                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7273                 break;
7274             case 0x5d: /* FACGE */
7275                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7276                 break;
7277             case 0x5f: /* FDIV */
7278                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7279                 break;
7280             case 0x7a: /* FABD */
7281                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7282                 gen_helper_vfp_absd(tcg_res, tcg_res);
7283                 break;
7284             case 0x7c: /* FCMGT */
7285                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7286                 break;
7287             case 0x7d: /* FACGT */
7288                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7289                 break;
7290             default:
7291                 g_assert_not_reached();
7292             }
7293
7294             write_vec_element(s, tcg_res, rd, pass, MO_64);
7295
7296             tcg_temp_free_i64(tcg_res);
7297             tcg_temp_free_i64(tcg_op1);
7298             tcg_temp_free_i64(tcg_op2);
7299         } else {
7300             /* Single */
7301             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7302             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7303             TCGv_i32 tcg_res = tcg_temp_new_i32();
7304
7305             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7306             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7307
7308             switch (fpopcode) {
7309             case 0x39: /* FMLS */
7310                 /* As usual for ARM, separate negation for fused multiply-add */
7311                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7312                 /* fall through */
7313             case 0x19: /* FMLA */
7314                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7315                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7316                                        tcg_res, fpst);
7317                 break;
7318             case 0x1a: /* FADD */
7319                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7320                 break;
7321             case 0x1b: /* FMULX */
7322                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7323                 break;
7324             case 0x1c: /* FCMEQ */
7325                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7326                 break;
7327             case 0x1e: /* FMAX */
7328                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7329                 break;
7330             case 0x1f: /* FRECPS */
7331                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7332                 break;
7333             case 0x18: /* FMAXNM */
7334                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7335                 break;
7336             case 0x38: /* FMINNM */
7337                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7338                 break;
7339             case 0x3a: /* FSUB */
7340                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7341                 break;
7342             case 0x3e: /* FMIN */
7343                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7344                 break;
7345             case 0x3f: /* FRSQRTS */
7346                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7347                 break;
7348             case 0x5b: /* FMUL */
7349                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7350                 break;
7351             case 0x5c: /* FCMGE */
7352                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7353                 break;
7354             case 0x5d: /* FACGE */
7355                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7356                 break;
7357             case 0x5f: /* FDIV */
7358                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7359                 break;
7360             case 0x7a: /* FABD */
7361                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7362                 gen_helper_vfp_abss(tcg_res, tcg_res);
7363                 break;
7364             case 0x7c: /* FCMGT */
7365                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7366                 break;
7367             case 0x7d: /* FACGT */
7368                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7369                 break;
7370             default:
7371                 g_assert_not_reached();
7372             }
7373
7374             if (elements == 1) {
7375                 /* scalar single so clear high part */
7376                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7377
7378                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7379                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7380                 tcg_temp_free_i64(tcg_tmp);
7381             } else {
7382                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7383             }
7384
7385             tcg_temp_free_i32(tcg_res);
7386             tcg_temp_free_i32(tcg_op1);
7387             tcg_temp_free_i32(tcg_op2);
7388         }
7389     }
7390
7391     tcg_temp_free_ptr(fpst);
7392
7393     if ((elements << size) < 4) {
7394         /* scalar, or non-quad vector op */
7395         clear_vec_high(s, rd);
7396     }
7397 }
7398
7399 /* C3.6.11 AdvSIMD scalar three same
7400  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7401  * +-----+---+-----------+------+---+------+--------+---+------+------+
7402  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7403  * +-----+---+-----------+------+---+------+--------+---+------+------+
7404  */
7405 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7406 {
7407     int rd = extract32(insn, 0, 5);
7408     int rn = extract32(insn, 5, 5);
7409     int opcode = extract32(insn, 11, 5);
7410     int rm = extract32(insn, 16, 5);
7411     int size = extract32(insn, 22, 2);
7412     bool u = extract32(insn, 29, 1);
7413     TCGv_i64 tcg_rd;
7414
7415     if (opcode >= 0x18) {
7416         /* Floating point: U, size[1] and opcode indicate operation */
7417         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7418         switch (fpopcode) {
7419         case 0x1b: /* FMULX */
7420         case 0x1f: /* FRECPS */
7421         case 0x3f: /* FRSQRTS */
7422         case 0x5d: /* FACGE */
7423         case 0x7d: /* FACGT */
7424         case 0x1c: /* FCMEQ */
7425         case 0x5c: /* FCMGE */
7426         case 0x7c: /* FCMGT */
7427         case 0x7a: /* FABD */
7428             break;
7429         default:
7430             unallocated_encoding(s);
7431             return;
7432         }
7433
7434         if (!fp_access_check(s)) {
7435             return;
7436         }
7437
7438         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7439         return;
7440     }
7441
7442     switch (opcode) {
7443     case 0x1: /* SQADD, UQADD */
7444     case 0x5: /* SQSUB, UQSUB */
7445     case 0x9: /* SQSHL, UQSHL */
7446     case 0xb: /* SQRSHL, UQRSHL */
7447         break;
7448     case 0x8: /* SSHL, USHL */
7449     case 0xa: /* SRSHL, URSHL */
7450     case 0x6: /* CMGT, CMHI */
7451     case 0x7: /* CMGE, CMHS */
7452     case 0x11: /* CMTST, CMEQ */
7453     case 0x10: /* ADD, SUB (vector) */
7454         if (size != 3) {
7455             unallocated_encoding(s);
7456             return;
7457         }
7458         break;
7459     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7460         if (size != 1 && size != 2) {
7461             unallocated_encoding(s);
7462             return;
7463         }
7464         break;
7465     default:
7466         unallocated_encoding(s);
7467         return;
7468     }
7469
7470     if (!fp_access_check(s)) {
7471         return;
7472     }
7473
7474     tcg_rd = tcg_temp_new_i64();
7475
7476     if (size == 3) {
7477         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7478         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7479
7480         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7481         tcg_temp_free_i64(tcg_rn);
7482         tcg_temp_free_i64(tcg_rm);
7483     } else {
7484         /* Do a single operation on the lowest element in the vector.
7485          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7486          * no side effects for all these operations.
7487          * OPTME: special-purpose helpers would avoid doing some
7488          * unnecessary work in the helper for the 8 and 16 bit cases.
7489          */
7490         NeonGenTwoOpEnvFn *genenvfn;
7491         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7492         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7493         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7494
7495         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7496         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7497
7498         switch (opcode) {
7499         case 0x1: /* SQADD, UQADD */
7500         {
7501             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7502                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7503                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7504                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7505             };
7506             genenvfn = fns[size][u];
7507             break;
7508         }
7509         case 0x5: /* SQSUB, UQSUB */
7510         {
7511             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7512                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7513                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7514                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7515             };
7516             genenvfn = fns[size][u];
7517             break;
7518         }
7519         case 0x9: /* SQSHL, UQSHL */
7520         {
7521             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7522                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7523                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7524                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7525             };
7526             genenvfn = fns[size][u];
7527             break;
7528         }
7529         case 0xb: /* SQRSHL, UQRSHL */
7530         {
7531             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7532                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7533                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7534                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7535             };
7536             genenvfn = fns[size][u];
7537             break;
7538         }
7539         case 0x16: /* SQDMULH, SQRDMULH */
7540         {
7541             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7542                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7543                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7544             };
7545             assert(size == 1 || size == 2);
7546             genenvfn = fns[size - 1][u];
7547             break;
7548         }
7549         default:
7550             g_assert_not_reached();
7551         }
7552
7553         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7554         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7555         tcg_temp_free_i32(tcg_rd32);
7556         tcg_temp_free_i32(tcg_rn);
7557         tcg_temp_free_i32(tcg_rm);
7558     }
7559
7560     write_fp_dreg(s, rd, tcg_rd);
7561
7562     tcg_temp_free_i64(tcg_rd);
7563 }
7564
7565 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7566                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7567                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7568 {
7569     /* Handle 64->64 opcodes which are shared between the scalar and
7570      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7571      * is valid in either group and also the double-precision fp ops.
7572      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7573      * requires them.
7574      */
7575     TCGCond cond;
7576
7577     switch (opcode) {
7578     case 0x4: /* CLS, CLZ */
7579         if (u) {
7580             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
7581         } else {
7582             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
7583         }
7584         break;
7585     case 0x5: /* NOT */
7586         /* This opcode is shared with CNT and RBIT but we have earlier
7587          * enforced that size == 3 if and only if this is the NOT insn.
7588          */
7589         tcg_gen_not_i64(tcg_rd, tcg_rn);
7590         break;
7591     case 0x7: /* SQABS, SQNEG */
7592         if (u) {
7593             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7594         } else {
7595             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7596         }
7597         break;
7598     case 0xa: /* CMLT */
7599         /* 64 bit integer comparison against zero, result is
7600          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7601          * subtracting 1.
7602          */
7603         cond = TCG_COND_LT;
7604     do_cmop:
7605         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7606         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7607         break;
7608     case 0x8: /* CMGT, CMGE */
7609         cond = u ? TCG_COND_GE : TCG_COND_GT;
7610         goto do_cmop;
7611     case 0x9: /* CMEQ, CMLE */
7612         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7613         goto do_cmop;
7614     case 0xb: /* ABS, NEG */
7615         if (u) {
7616             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7617         } else {
7618             TCGv_i64 tcg_zero = tcg_const_i64(0);
7619             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7620             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7621                                 tcg_rn, tcg_rd);
7622             tcg_temp_free_i64(tcg_zero);
7623         }
7624         break;
7625     case 0x2f: /* FABS */
7626         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7627         break;
7628     case 0x6f: /* FNEG */
7629         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7630         break;
7631     case 0x7f: /* FSQRT */
7632         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7633         break;
7634     case 0x1a: /* FCVTNS */
7635     case 0x1b: /* FCVTMS */
7636     case 0x1c: /* FCVTAS */
7637     case 0x3a: /* FCVTPS */
7638     case 0x3b: /* FCVTZS */
7639     {
7640         TCGv_i32 tcg_shift = tcg_const_i32(0);
7641         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7642         tcg_temp_free_i32(tcg_shift);
7643         break;
7644     }
7645     case 0x5a: /* FCVTNU */
7646     case 0x5b: /* FCVTMU */
7647     case 0x5c: /* FCVTAU */
7648     case 0x7a: /* FCVTPU */
7649     case 0x7b: /* FCVTZU */
7650     {
7651         TCGv_i32 tcg_shift = tcg_const_i32(0);
7652         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7653         tcg_temp_free_i32(tcg_shift);
7654         break;
7655     }
7656     case 0x18: /* FRINTN */
7657     case 0x19: /* FRINTM */
7658     case 0x38: /* FRINTP */
7659     case 0x39: /* FRINTZ */
7660     case 0x58: /* FRINTA */
7661     case 0x79: /* FRINTI */
7662         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7663         break;
7664     case 0x59: /* FRINTX */
7665         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7666         break;
7667     default:
7668         g_assert_not_reached();
7669     }
7670 }
7671
7672 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7673                                    bool is_scalar, bool is_u, bool is_q,
7674                                    int size, int rn, int rd)
7675 {
7676     bool is_double = (size == 3);
7677     TCGv_ptr fpst;
7678
7679     if (!fp_access_check(s)) {
7680         return;
7681     }
7682
7683     fpst = get_fpstatus_ptr();
7684
7685     if (is_double) {
7686         TCGv_i64 tcg_op = tcg_temp_new_i64();
7687         TCGv_i64 tcg_zero = tcg_const_i64(0);
7688         TCGv_i64 tcg_res = tcg_temp_new_i64();
7689         NeonGenTwoDoubleOPFn *genfn;
7690         bool swap = false;
7691         int pass;
7692
7693         switch (opcode) {
7694         case 0x2e: /* FCMLT (zero) */
7695             swap = true;
7696             /* fallthrough */
7697         case 0x2c: /* FCMGT (zero) */
7698             genfn = gen_helper_neon_cgt_f64;
7699             break;
7700         case 0x2d: /* FCMEQ (zero) */
7701             genfn = gen_helper_neon_ceq_f64;
7702             break;
7703         case 0x6d: /* FCMLE (zero) */
7704             swap = true;
7705             /* fall through */
7706         case 0x6c: /* FCMGE (zero) */
7707             genfn = gen_helper_neon_cge_f64;
7708             break;
7709         default:
7710             g_assert_not_reached();
7711         }
7712
7713         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7714             read_vec_element(s, tcg_op, rn, pass, MO_64);
7715             if (swap) {
7716                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7717             } else {
7718                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7719             }
7720             write_vec_element(s, tcg_res, rd, pass, MO_64);
7721         }
7722         if (is_scalar) {
7723             clear_vec_high(s, rd);
7724         }
7725
7726         tcg_temp_free_i64(tcg_res);
7727         tcg_temp_free_i64(tcg_zero);
7728         tcg_temp_free_i64(tcg_op);
7729     } else {
7730         TCGv_i32 tcg_op = tcg_temp_new_i32();
7731         TCGv_i32 tcg_zero = tcg_const_i32(0);
7732         TCGv_i32 tcg_res = tcg_temp_new_i32();
7733         NeonGenTwoSingleOPFn *genfn;
7734         bool swap = false;
7735         int pass, maxpasses;
7736
7737         switch (opcode) {
7738         case 0x2e: /* FCMLT (zero) */
7739             swap = true;
7740             /* fall through */
7741         case 0x2c: /* FCMGT (zero) */
7742             genfn = gen_helper_neon_cgt_f32;
7743             break;
7744         case 0x2d: /* FCMEQ (zero) */
7745             genfn = gen_helper_neon_ceq_f32;
7746             break;
7747         case 0x6d: /* FCMLE (zero) */
7748             swap = true;
7749             /* fall through */
7750         case 0x6c: /* FCMGE (zero) */
7751             genfn = gen_helper_neon_cge_f32;
7752             break;
7753         default:
7754             g_assert_not_reached();
7755         }
7756
7757         if (is_scalar) {
7758             maxpasses = 1;
7759         } else {
7760             maxpasses = is_q ? 4 : 2;
7761         }
7762
7763         for (pass = 0; pass < maxpasses; pass++) {
7764             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7765             if (swap) {
7766                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7767             } else {
7768                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7769             }
7770             if (is_scalar) {
7771                 write_fp_sreg(s, rd, tcg_res);
7772             } else {
7773                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7774             }
7775         }
7776         tcg_temp_free_i32(tcg_res);
7777         tcg_temp_free_i32(tcg_zero);
7778         tcg_temp_free_i32(tcg_op);
7779         if (!is_q && !is_scalar) {
7780             clear_vec_high(s, rd);
7781         }
7782     }
7783
7784     tcg_temp_free_ptr(fpst);
7785 }
7786
7787 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7788                                     bool is_scalar, bool is_u, bool is_q,
7789                                     int size, int rn, int rd)
7790 {
7791     bool is_double = (size == 3);
7792     TCGv_ptr fpst = get_fpstatus_ptr();
7793
7794     if (is_double) {
7795         TCGv_i64 tcg_op = tcg_temp_new_i64();
7796         TCGv_i64 tcg_res = tcg_temp_new_i64();
7797         int pass;
7798
7799         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7800             read_vec_element(s, tcg_op, rn, pass, MO_64);
7801             switch (opcode) {
7802             case 0x3d: /* FRECPE */
7803                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7804                 break;
7805             case 0x3f: /* FRECPX */
7806                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7807                 break;
7808             case 0x7d: /* FRSQRTE */
7809                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7810                 break;
7811             default:
7812                 g_assert_not_reached();
7813             }
7814             write_vec_element(s, tcg_res, rd, pass, MO_64);
7815         }
7816         if (is_scalar) {
7817             clear_vec_high(s, rd);
7818         }
7819
7820         tcg_temp_free_i64(tcg_res);
7821         tcg_temp_free_i64(tcg_op);
7822     } else {
7823         TCGv_i32 tcg_op = tcg_temp_new_i32();
7824         TCGv_i32 tcg_res = tcg_temp_new_i32();
7825         int pass, maxpasses;
7826
7827         if (is_scalar) {
7828             maxpasses = 1;
7829         } else {
7830             maxpasses = is_q ? 4 : 2;
7831         }
7832
7833         for (pass = 0; pass < maxpasses; pass++) {
7834             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7835
7836             switch (opcode) {
7837             case 0x3c: /* URECPE */
7838                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7839                 break;
7840             case 0x3d: /* FRECPE */
7841                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7842                 break;
7843             case 0x3f: /* FRECPX */
7844                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7845                 break;
7846             case 0x7d: /* FRSQRTE */
7847                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7848                 break;
7849             default:
7850                 g_assert_not_reached();
7851             }
7852
7853             if (is_scalar) {
7854                 write_fp_sreg(s, rd, tcg_res);
7855             } else {
7856                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7857             }
7858         }
7859         tcg_temp_free_i32(tcg_res);
7860         tcg_temp_free_i32(tcg_op);
7861         if (!is_q && !is_scalar) {
7862             clear_vec_high(s, rd);
7863         }
7864     }
7865     tcg_temp_free_ptr(fpst);
7866 }
7867
7868 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7869                                 int opcode, bool u, bool is_q,
7870                                 int size, int rn, int rd)
7871 {
7872     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7873      * in the source becomes a size element in the destination).
7874      */
7875     int pass;
7876     TCGv_i32 tcg_res[2];
7877     int destelt = is_q ? 2 : 0;
7878     int passes = scalar ? 1 : 2;
7879
7880     if (scalar) {
7881         tcg_res[1] = tcg_const_i32(0);
7882     }
7883
7884     for (pass = 0; pass < passes; pass++) {
7885         TCGv_i64 tcg_op = tcg_temp_new_i64();
7886         NeonGenNarrowFn *genfn = NULL;
7887         NeonGenNarrowEnvFn *genenvfn = NULL;
7888
7889         if (scalar) {
7890             read_vec_element(s, tcg_op, rn, pass, size + 1);
7891         } else {
7892             read_vec_element(s, tcg_op, rn, pass, MO_64);
7893         }
7894         tcg_res[pass] = tcg_temp_new_i32();
7895
7896         switch (opcode) {
7897         case 0x12: /* XTN, SQXTUN */
7898         {
7899             static NeonGenNarrowFn * const xtnfns[3] = {
7900                 gen_helper_neon_narrow_u8,
7901                 gen_helper_neon_narrow_u16,
7902                 tcg_gen_extrl_i64_i32,
7903             };
7904             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7905                 gen_helper_neon_unarrow_sat8,
7906                 gen_helper_neon_unarrow_sat16,
7907                 gen_helper_neon_unarrow_sat32,
7908             };
7909             if (u) {
7910                 genenvfn = sqxtunfns[size];
7911             } else {
7912                 genfn = xtnfns[size];
7913             }
7914             break;
7915         }
7916         case 0x14: /* SQXTN, UQXTN */
7917         {
7918             static NeonGenNarrowEnvFn * const fns[3][2] = {
7919                 { gen_helper_neon_narrow_sat_s8,
7920                   gen_helper_neon_narrow_sat_u8 },
7921                 { gen_helper_neon_narrow_sat_s16,
7922                   gen_helper_neon_narrow_sat_u16 },
7923                 { gen_helper_neon_narrow_sat_s32,
7924                   gen_helper_neon_narrow_sat_u32 },
7925             };
7926             genenvfn = fns[size][u];
7927             break;
7928         }
7929         case 0x16: /* FCVTN, FCVTN2 */
7930             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7931             if (size == 2) {
7932                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7933             } else {
7934                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7935                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7936                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7937                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7938                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7939                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7940                 tcg_temp_free_i32(tcg_lo);
7941                 tcg_temp_free_i32(tcg_hi);
7942             }
7943             break;
7944         case 0x56:  /* FCVTXN, FCVTXN2 */
7945             /* 64 bit to 32 bit float conversion
7946              * with von Neumann rounding (round to odd)
7947              */
7948             assert(size == 2);
7949             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7950             break;
7951         default:
7952             g_assert_not_reached();
7953         }
7954
7955         if (genfn) {
7956             genfn(tcg_res[pass], tcg_op);
7957         } else if (genenvfn) {
7958             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7959         }
7960
7961         tcg_temp_free_i64(tcg_op);
7962     }
7963
7964     for (pass = 0; pass < 2; pass++) {
7965         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7966         tcg_temp_free_i32(tcg_res[pass]);
7967     }
7968     if (!is_q) {
7969         clear_vec_high(s, rd);
7970     }
7971 }
7972
7973 /* Remaining saturating accumulating ops */
7974 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7975                                 bool is_q, int size, int rn, int rd)
7976 {
7977     bool is_double = (size == 3);
7978
7979     if (is_double) {
7980         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7981         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7982         int pass;
7983
7984         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7985             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7986             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7987
7988             if (is_u) { /* USQADD */
7989                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7990             } else { /* SUQADD */
7991                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7992             }
7993             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7994         }
7995         if (is_scalar) {
7996             clear_vec_high(s, rd);
7997         }
7998
7999         tcg_temp_free_i64(tcg_rd);
8000         tcg_temp_free_i64(tcg_rn);
8001     } else {
8002         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8003         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8004         int pass, maxpasses;
8005
8006         if (is_scalar) {
8007             maxpasses = 1;
8008         } else {
8009             maxpasses = is_q ? 4 : 2;
8010         }
8011
8012         for (pass = 0; pass < maxpasses; pass++) {
8013             if (is_scalar) {
8014                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
8015                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
8016             } else {
8017                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8018                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8019             }
8020
8021             if (is_u) { /* USQADD */
8022                 switch (size) {
8023                 case 0:
8024                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8025                     break;
8026                 case 1:
8027                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8028                     break;
8029                 case 2:
8030                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8031                     break;
8032                 default:
8033                     g_assert_not_reached();
8034                 }
8035             } else { /* SUQADD */
8036                 switch (size) {
8037                 case 0:
8038                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8039                     break;
8040                 case 1:
8041                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8042                     break;
8043                 case 2:
8044                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8045                     break;
8046                 default:
8047                     g_assert_not_reached();
8048                 }
8049             }
8050
8051             if (is_scalar) {
8052                 TCGv_i64 tcg_zero = tcg_const_i64(0);
8053                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
8054                 tcg_temp_free_i64(tcg_zero);
8055             }
8056             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8057         }
8058
8059         if (!is_q) {
8060             clear_vec_high(s, rd);
8061         }
8062
8063         tcg_temp_free_i32(tcg_rd);
8064         tcg_temp_free_i32(tcg_rn);
8065     }
8066 }
8067
8068 /* C3.6.12 AdvSIMD scalar two reg misc
8069  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8070  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8071  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8072  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8073  */
8074 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8075 {
8076     int rd = extract32(insn, 0, 5);
8077     int rn = extract32(insn, 5, 5);
8078     int opcode = extract32(insn, 12, 5);
8079     int size = extract32(insn, 22, 2);
8080     bool u = extract32(insn, 29, 1);
8081     bool is_fcvt = false;
8082     int rmode;
8083     TCGv_i32 tcg_rmode;
8084     TCGv_ptr tcg_fpstatus;
8085
8086     switch (opcode) {
8087     case 0x3: /* USQADD / SUQADD*/
8088         if (!fp_access_check(s)) {
8089             return;
8090         }
8091         handle_2misc_satacc(s, true, u, false, size, rn, rd);
8092         return;
8093     case 0x7: /* SQABS / SQNEG */
8094         break;
8095     case 0xa: /* CMLT */
8096         if (u) {
8097             unallocated_encoding(s);
8098             return;
8099         }
8100         /* fall through */
8101     case 0x8: /* CMGT, CMGE */
8102     case 0x9: /* CMEQ, CMLE */
8103     case 0xb: /* ABS, NEG */
8104         if (size != 3) {
8105             unallocated_encoding(s);
8106             return;
8107         }
8108         break;
8109     case 0x12: /* SQXTUN */
8110         if (!u) {
8111             unallocated_encoding(s);
8112             return;
8113         }
8114         /* fall through */
8115     case 0x14: /* SQXTN, UQXTN */
8116         if (size == 3) {
8117             unallocated_encoding(s);
8118             return;
8119         }
8120         if (!fp_access_check(s)) {
8121             return;
8122         }
8123         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8124         return;
8125     case 0xc ... 0xf:
8126     case 0x16 ... 0x1d:
8127     case 0x1f:
8128         /* Floating point: U, size[1] and opcode indicate operation;
8129          * size[0] indicates single or double precision.
8130          */
8131         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8132         size = extract32(size, 0, 1) ? 3 : 2;
8133         switch (opcode) {
8134         case 0x2c: /* FCMGT (zero) */
8135         case 0x2d: /* FCMEQ (zero) */
8136         case 0x2e: /* FCMLT (zero) */
8137         case 0x6c: /* FCMGE (zero) */
8138         case 0x6d: /* FCMLE (zero) */
8139             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8140             return;
8141         case 0x1d: /* SCVTF */
8142         case 0x5d: /* UCVTF */
8143         {
8144             bool is_signed = (opcode == 0x1d);
8145             if (!fp_access_check(s)) {
8146                 return;
8147             }
8148             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8149             return;
8150         }
8151         case 0x3d: /* FRECPE */
8152         case 0x3f: /* FRECPX */
8153         case 0x7d: /* FRSQRTE */
8154             if (!fp_access_check(s)) {
8155                 return;
8156             }
8157             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8158             return;
8159         case 0x1a: /* FCVTNS */
8160         case 0x1b: /* FCVTMS */
8161         case 0x3a: /* FCVTPS */
8162         case 0x3b: /* FCVTZS */
8163         case 0x5a: /* FCVTNU */
8164         case 0x5b: /* FCVTMU */
8165         case 0x7a: /* FCVTPU */
8166         case 0x7b: /* FCVTZU */
8167             is_fcvt = true;
8168             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8169             break;
8170         case 0x1c: /* FCVTAS */
8171         case 0x5c: /* FCVTAU */
8172             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8173             is_fcvt = true;
8174             rmode = FPROUNDING_TIEAWAY;
8175             break;
8176         case 0x56: /* FCVTXN, FCVTXN2 */
8177             if (size == 2) {
8178                 unallocated_encoding(s);
8179                 return;
8180             }
8181             if (!fp_access_check(s)) {
8182                 return;
8183             }
8184             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8185             return;
8186         default:
8187             unallocated_encoding(s);
8188             return;
8189         }
8190         break;
8191     default:
8192         unallocated_encoding(s);
8193         return;
8194     }
8195
8196     if (!fp_access_check(s)) {
8197         return;
8198     }
8199
8200     if (is_fcvt) {
8201         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8202         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8203         tcg_fpstatus = get_fpstatus_ptr();
8204     } else {
8205         TCGV_UNUSED_I32(tcg_rmode);
8206         TCGV_UNUSED_PTR(tcg_fpstatus);
8207     }
8208
8209     if (size == 3) {
8210         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8211         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8212
8213         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8214         write_fp_dreg(s, rd, tcg_rd);
8215         tcg_temp_free_i64(tcg_rd);
8216         tcg_temp_free_i64(tcg_rn);
8217     } else {
8218         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8219         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8220
8221         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8222
8223         switch (opcode) {
8224         case 0x7: /* SQABS, SQNEG */
8225         {
8226             NeonGenOneOpEnvFn *genfn;
8227             static NeonGenOneOpEnvFn * const fns[3][2] = {
8228                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8229                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8230                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8231             };
8232             genfn = fns[size][u];
8233             genfn(tcg_rd, cpu_env, tcg_rn);
8234             break;
8235         }
8236         case 0x1a: /* FCVTNS */
8237         case 0x1b: /* FCVTMS */
8238         case 0x1c: /* FCVTAS */
8239         case 0x3a: /* FCVTPS */
8240         case 0x3b: /* FCVTZS */
8241         {
8242             TCGv_i32 tcg_shift = tcg_const_i32(0);
8243             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8244             tcg_temp_free_i32(tcg_shift);
8245             break;
8246         }
8247         case 0x5a: /* FCVTNU */
8248         case 0x5b: /* FCVTMU */
8249         case 0x5c: /* FCVTAU */
8250         case 0x7a: /* FCVTPU */
8251         case 0x7b: /* FCVTZU */
8252         {
8253             TCGv_i32 tcg_shift = tcg_const_i32(0);
8254             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8255             tcg_temp_free_i32(tcg_shift);
8256             break;
8257         }
8258         default:
8259             g_assert_not_reached();
8260         }
8261
8262         write_fp_sreg(s, rd, tcg_rd);
8263         tcg_temp_free_i32(tcg_rd);
8264         tcg_temp_free_i32(tcg_rn);
8265     }
8266
8267     if (is_fcvt) {
8268         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8269         tcg_temp_free_i32(tcg_rmode);
8270         tcg_temp_free_ptr(tcg_fpstatus);
8271     }
8272 }
8273
8274 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8275 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8276                                  int immh, int immb, int opcode, int rn, int rd)
8277 {
8278     int size = 32 - clz32(immh) - 1;
8279     int immhb = immh << 3 | immb;
8280     int shift = 2 * (8 << size) - immhb;
8281     bool accumulate = false;
8282     bool round = false;
8283     bool insert = false;
8284     int dsize = is_q ? 128 : 64;
8285     int esize = 8 << size;
8286     int elements = dsize/esize;
8287     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8288     TCGv_i64 tcg_rn = new_tmp_a64(s);
8289     TCGv_i64 tcg_rd = new_tmp_a64(s);
8290     TCGv_i64 tcg_round;
8291     int i;
8292
8293     if (extract32(immh, 3, 1) && !is_q) {
8294         unallocated_encoding(s);
8295         return;
8296     }
8297
8298     if (size > 3 && !is_q) {
8299         unallocated_encoding(s);
8300         return;
8301     }
8302
8303     if (!fp_access_check(s)) {
8304         return;
8305     }
8306
8307     switch (opcode) {
8308     case 0x02: /* SSRA / USRA (accumulate) */
8309         accumulate = true;
8310         break;
8311     case 0x04: /* SRSHR / URSHR (rounding) */
8312         round = true;
8313         break;
8314     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8315         accumulate = round = true;
8316         break;
8317     case 0x08: /* SRI */
8318         insert = true;
8319         break;
8320     }
8321
8322     if (round) {
8323         uint64_t round_const = 1ULL << (shift - 1);
8324         tcg_round = tcg_const_i64(round_const);
8325     } else {
8326         TCGV_UNUSED_I64(tcg_round);
8327     }
8328
8329     for (i = 0; i < elements; i++) {
8330         read_vec_element(s, tcg_rn, rn, i, memop);
8331         if (accumulate || insert) {
8332             read_vec_element(s, tcg_rd, rd, i, memop);
8333         }
8334
8335         if (insert) {
8336             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8337         } else {
8338             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8339                                     accumulate, is_u, size, shift);
8340         }
8341
8342         write_vec_element(s, tcg_rd, rd, i, size);
8343     }
8344
8345     if (!is_q) {
8346         clear_vec_high(s, rd);
8347     }
8348
8349     if (round) {
8350         tcg_temp_free_i64(tcg_round);
8351     }
8352 }
8353
8354 /* SHL/SLI - Vector shift left */
8355 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8356                                 int immh, int immb, int opcode, int rn, int rd)
8357 {
8358     int size = 32 - clz32(immh) - 1;
8359     int immhb = immh << 3 | immb;
8360     int shift = immhb - (8 << size);
8361     int dsize = is_q ? 128 : 64;
8362     int esize = 8 << size;
8363     int elements = dsize/esize;
8364     TCGv_i64 tcg_rn = new_tmp_a64(s);
8365     TCGv_i64 tcg_rd = new_tmp_a64(s);
8366     int i;
8367
8368     if (extract32(immh, 3, 1) && !is_q) {
8369         unallocated_encoding(s);
8370         return;
8371     }
8372
8373     if (size > 3 && !is_q) {
8374         unallocated_encoding(s);
8375         return;
8376     }
8377
8378     if (!fp_access_check(s)) {
8379         return;
8380     }
8381
8382     for (i = 0; i < elements; i++) {
8383         read_vec_element(s, tcg_rn, rn, i, size);
8384         if (insert) {
8385             read_vec_element(s, tcg_rd, rd, i, size);
8386         }
8387
8388         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8389
8390         write_vec_element(s, tcg_rd, rd, i, size);
8391     }
8392
8393     if (!is_q) {
8394         clear_vec_high(s, rd);
8395     }
8396 }
8397
8398 /* USHLL/SHLL - Vector shift left with widening */
8399 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8400                                  int immh, int immb, int opcode, int rn, int rd)
8401 {
8402     int size = 32 - clz32(immh) - 1;
8403     int immhb = immh << 3 | immb;
8404     int shift = immhb - (8 << size);
8405     int dsize = 64;
8406     int esize = 8 << size;
8407     int elements = dsize/esize;
8408     TCGv_i64 tcg_rn = new_tmp_a64(s);
8409     TCGv_i64 tcg_rd = new_tmp_a64(s);
8410     int i;
8411
8412     if (size >= 3) {
8413         unallocated_encoding(s);
8414         return;
8415     }
8416
8417     if (!fp_access_check(s)) {
8418         return;
8419     }
8420
8421     /* For the LL variants the store is larger than the load,
8422      * so if rd == rn we would overwrite parts of our input.
8423      * So load everything right now and use shifts in the main loop.
8424      */
8425     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8426
8427     for (i = 0; i < elements; i++) {
8428         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8429         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8430         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8431         write_vec_element(s, tcg_rd, rd, i, size + 1);
8432     }
8433 }
8434
8435 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8436 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8437                                  int immh, int immb, int opcode, int rn, int rd)
8438 {
8439     int immhb = immh << 3 | immb;
8440     int size = 32 - clz32(immh) - 1;
8441     int dsize = 64;
8442     int esize = 8 << size;
8443     int elements = dsize/esize;
8444     int shift = (2 * esize) - immhb;
8445     bool round = extract32(opcode, 0, 1);
8446     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8447     TCGv_i64 tcg_round;
8448     int i;
8449
8450     if (extract32(immh, 3, 1)) {
8451         unallocated_encoding(s);
8452         return;
8453     }
8454
8455     if (!fp_access_check(s)) {
8456         return;
8457     }
8458
8459     tcg_rn = tcg_temp_new_i64();
8460     tcg_rd = tcg_temp_new_i64();
8461     tcg_final = tcg_temp_new_i64();
8462     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8463
8464     if (round) {
8465         uint64_t round_const = 1ULL << (shift - 1);
8466         tcg_round = tcg_const_i64(round_const);
8467     } else {
8468         TCGV_UNUSED_I64(tcg_round);
8469     }
8470
8471     for (i = 0; i < elements; i++) {
8472         read_vec_element(s, tcg_rn, rn, i, size+1);
8473         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8474                                 false, true, size+1, shift);
8475
8476         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8477     }
8478
8479     if (!is_q) {
8480         clear_vec_high(s, rd);
8481         write_vec_element(s, tcg_final, rd, 0, MO_64);
8482     } else {
8483         write_vec_element(s, tcg_final, rd, 1, MO_64);
8484     }
8485
8486     if (round) {
8487         tcg_temp_free_i64(tcg_round);
8488     }
8489     tcg_temp_free_i64(tcg_rn);
8490     tcg_temp_free_i64(tcg_rd);
8491     tcg_temp_free_i64(tcg_final);
8492     return;
8493 }
8494
8495
8496 /* C3.6.14 AdvSIMD shift by immediate
8497  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8498  * +---+---+---+-------------+------+------+--------+---+------+------+
8499  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8500  * +---+---+---+-------------+------+------+--------+---+------+------+
8501  */
8502 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8503 {
8504     int rd = extract32(insn, 0, 5);
8505     int rn = extract32(insn, 5, 5);
8506     int opcode = extract32(insn, 11, 5);
8507     int immb = extract32(insn, 16, 3);
8508     int immh = extract32(insn, 19, 4);
8509     bool is_u = extract32(insn, 29, 1);
8510     bool is_q = extract32(insn, 30, 1);
8511
8512     switch (opcode) {
8513     case 0x08: /* SRI */
8514         if (!is_u) {
8515             unallocated_encoding(s);
8516             return;
8517         }
8518         /* fall through */
8519     case 0x00: /* SSHR / USHR */
8520     case 0x02: /* SSRA / USRA (accumulate) */
8521     case 0x04: /* SRSHR / URSHR (rounding) */
8522     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8523         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8524         break;
8525     case 0x0a: /* SHL / SLI */
8526         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8527         break;
8528     case 0x10: /* SHRN */
8529     case 0x11: /* RSHRN / SQRSHRUN */
8530         if (is_u) {
8531             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8532                                    opcode, rn, rd);
8533         } else {
8534             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8535         }
8536         break;
8537     case 0x12: /* SQSHRN / UQSHRN */
8538     case 0x13: /* SQRSHRN / UQRSHRN */
8539         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8540                                opcode, rn, rd);
8541         break;
8542     case 0x14: /* SSHLL / USHLL */
8543         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8544         break;
8545     case 0x1c: /* SCVTF / UCVTF */
8546         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8547                                      opcode, rn, rd);
8548         break;
8549     case 0xc: /* SQSHLU */
8550         if (!is_u) {
8551             unallocated_encoding(s);
8552             return;
8553         }
8554         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8555         break;
8556     case 0xe: /* SQSHL, UQSHL */
8557         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8558         break;
8559     case 0x1f: /* FCVTZS/ FCVTZU */
8560         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8561         return;
8562     default:
8563         unallocated_encoding(s);
8564         return;
8565     }
8566 }
8567
8568 /* Generate code to do a "long" addition or subtraction, ie one done in
8569  * TCGv_i64 on vector lanes twice the width specified by size.
8570  */
8571 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8572                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8573 {
8574     static NeonGenTwo64OpFn * const fns[3][2] = {
8575         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8576         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8577         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8578     };
8579     NeonGenTwo64OpFn *genfn;
8580     assert(size < 3);
8581
8582     genfn = fns[size][is_sub];
8583     genfn(tcg_res, tcg_op1, tcg_op2);
8584 }
8585
8586 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8587                                 int opcode, int rd, int rn, int rm)
8588 {
8589     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8590     TCGv_i64 tcg_res[2];
8591     int pass, accop;
8592
8593     tcg_res[0] = tcg_temp_new_i64();
8594     tcg_res[1] = tcg_temp_new_i64();
8595
8596     /* Does this op do an adding accumulate, a subtracting accumulate,
8597      * or no accumulate at all?
8598      */
8599     switch (opcode) {
8600     case 5:
8601     case 8:
8602     case 9:
8603         accop = 1;
8604         break;
8605     case 10:
8606     case 11:
8607         accop = -1;
8608         break;
8609     default:
8610         accop = 0;
8611         break;
8612     }
8613
8614     if (accop != 0) {
8615         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8616         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8617     }
8618
8619     /* size == 2 means two 32x32->64 operations; this is worth special
8620      * casing because we can generally handle it inline.
8621      */
8622     if (size == 2) {
8623         for (pass = 0; pass < 2; pass++) {
8624             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8625             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8626             TCGv_i64 tcg_passres;
8627             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8628
8629             int elt = pass + is_q * 2;
8630
8631             read_vec_element(s, tcg_op1, rn, elt, memop);
8632             read_vec_element(s, tcg_op2, rm, elt, memop);
8633
8634             if (accop == 0) {
8635                 tcg_passres = tcg_res[pass];
8636             } else {
8637                 tcg_passres = tcg_temp_new_i64();
8638             }
8639
8640             switch (opcode) {
8641             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8642                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8643                 break;
8644             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8645                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8646                 break;
8647             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8648             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8649             {
8650                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8651                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8652
8653                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8654                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8655                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8656                                     tcg_passres,
8657                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8658                 tcg_temp_free_i64(tcg_tmp1);
8659                 tcg_temp_free_i64(tcg_tmp2);
8660                 break;
8661             }
8662             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8663             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8664             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8665                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8666                 break;
8667             case 9: /* SQDMLAL, SQDMLAL2 */
8668             case 11: /* SQDMLSL, SQDMLSL2 */
8669             case 13: /* SQDMULL, SQDMULL2 */
8670                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8671                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8672                                                   tcg_passres, tcg_passres);
8673                 break;
8674             default:
8675                 g_assert_not_reached();
8676             }
8677
8678             if (opcode == 9 || opcode == 11) {
8679                 /* saturating accumulate ops */
8680                 if (accop < 0) {
8681                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8682                 }
8683                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8684                                                   tcg_res[pass], tcg_passres);
8685             } else if (accop > 0) {
8686                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8687             } else if (accop < 0) {
8688                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8689             }
8690
8691             if (accop != 0) {
8692                 tcg_temp_free_i64(tcg_passres);
8693             }
8694
8695             tcg_temp_free_i64(tcg_op1);
8696             tcg_temp_free_i64(tcg_op2);
8697         }
8698     } else {
8699         /* size 0 or 1, generally helper functions */
8700         for (pass = 0; pass < 2; pass++) {
8701             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8702             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8703             TCGv_i64 tcg_passres;
8704             int elt = pass + is_q * 2;
8705
8706             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8707             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8708
8709             if (accop == 0) {
8710                 tcg_passres = tcg_res[pass];
8711             } else {
8712                 tcg_passres = tcg_temp_new_i64();
8713             }
8714
8715             switch (opcode) {
8716             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8717             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8718             {
8719                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8720                 static NeonGenWidenFn * const widenfns[2][2] = {
8721                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8722                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8723                 };
8724                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8725
8726                 widenfn(tcg_op2_64, tcg_op2);
8727                 widenfn(tcg_passres, tcg_op1);
8728                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8729                               tcg_passres, tcg_op2_64);
8730                 tcg_temp_free_i64(tcg_op2_64);
8731                 break;
8732             }
8733             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8734             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8735                 if (size == 0) {
8736                     if (is_u) {
8737                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8738                     } else {
8739                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8740                     }
8741                 } else {
8742                     if (is_u) {
8743                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8744                     } else {
8745                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8746                     }
8747                 }
8748                 break;
8749             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8750             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8751             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8752                 if (size == 0) {
8753                     if (is_u) {
8754                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8755                     } else {
8756                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8757                     }
8758                 } else {
8759                     if (is_u) {
8760                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8761                     } else {
8762                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8763                     }
8764                 }
8765                 break;
8766             case 9: /* SQDMLAL, SQDMLAL2 */
8767             case 11: /* SQDMLSL, SQDMLSL2 */
8768             case 13: /* SQDMULL, SQDMULL2 */
8769                 assert(size == 1);
8770                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8771                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8772                                                   tcg_passres, tcg_passres);
8773                 break;
8774             case 14: /* PMULL */
8775                 assert(size == 0);
8776                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8777                 break;
8778             default:
8779                 g_assert_not_reached();
8780             }
8781             tcg_temp_free_i32(tcg_op1);
8782             tcg_temp_free_i32(tcg_op2);
8783
8784             if (accop != 0) {
8785                 if (opcode == 9 || opcode == 11) {
8786                     /* saturating accumulate ops */
8787                     if (accop < 0) {
8788                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8789                     }
8790                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8791                                                       tcg_res[pass],
8792                                                       tcg_passres);
8793                 } else {
8794                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8795                                   tcg_res[pass], tcg_passres);
8796                 }
8797                 tcg_temp_free_i64(tcg_passres);
8798             }
8799         }
8800     }
8801
8802     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8803     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8804     tcg_temp_free_i64(tcg_res[0]);
8805     tcg_temp_free_i64(tcg_res[1]);
8806 }
8807
8808 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8809                             int opcode, int rd, int rn, int rm)
8810 {
8811     TCGv_i64 tcg_res[2];
8812     int part = is_q ? 2 : 0;
8813     int pass;
8814
8815     for (pass = 0; pass < 2; pass++) {
8816         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8817         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8818         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8819         static NeonGenWidenFn * const widenfns[3][2] = {
8820             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8821             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8822             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8823         };
8824         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8825
8826         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8827         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8828         widenfn(tcg_op2_wide, tcg_op2);
8829         tcg_temp_free_i32(tcg_op2);
8830         tcg_res[pass] = tcg_temp_new_i64();
8831         gen_neon_addl(size, (opcode == 3),
8832                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8833         tcg_temp_free_i64(tcg_op1);
8834         tcg_temp_free_i64(tcg_op2_wide);
8835     }
8836
8837     for (pass = 0; pass < 2; pass++) {
8838         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8839         tcg_temp_free_i64(tcg_res[pass]);
8840     }
8841 }
8842
8843 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8844 {
8845     tcg_gen_addi_i64(in, in, 1U << 31);
8846     tcg_gen_extrh_i64_i32(res, in);
8847 }
8848
8849 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8850                                  int opcode, int rd, int rn, int rm)
8851 {
8852     TCGv_i32 tcg_res[2];
8853     int part = is_q ? 2 : 0;
8854     int pass;
8855
8856     for (pass = 0; pass < 2; pass++) {
8857         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8858         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8859         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8860         static NeonGenNarrowFn * const narrowfns[3][2] = {
8861             { gen_helper_neon_narrow_high_u8,
8862               gen_helper_neon_narrow_round_high_u8 },
8863             { gen_helper_neon_narrow_high_u16,
8864               gen_helper_neon_narrow_round_high_u16 },
8865             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8866         };
8867         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8868
8869         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8870         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8871
8872         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8873
8874         tcg_temp_free_i64(tcg_op1);
8875         tcg_temp_free_i64(tcg_op2);
8876
8877         tcg_res[pass] = tcg_temp_new_i32();
8878         gennarrow(tcg_res[pass], tcg_wideres);
8879         tcg_temp_free_i64(tcg_wideres);
8880     }
8881
8882     for (pass = 0; pass < 2; pass++) {
8883         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8884         tcg_temp_free_i32(tcg_res[pass]);
8885     }
8886     if (!is_q) {
8887         clear_vec_high(s, rd);
8888     }
8889 }
8890
8891 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8892 {
8893     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8894      * is the only three-reg-diff instruction which produces a
8895      * 128-bit wide result from a single operation. However since
8896      * it's possible to calculate the two halves more or less
8897      * separately we just use two helper calls.
8898      */
8899     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8900     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8901     TCGv_i64 tcg_res = tcg_temp_new_i64();
8902
8903     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8904     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8905     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8906     write_vec_element(s, tcg_res, rd, 0, MO_64);
8907     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8908     write_vec_element(s, tcg_res, rd, 1, MO_64);
8909
8910     tcg_temp_free_i64(tcg_op1);
8911     tcg_temp_free_i64(tcg_op2);
8912     tcg_temp_free_i64(tcg_res);
8913 }
8914
8915 /* C3.6.15 AdvSIMD three different
8916  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8917  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8918  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8919  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8920  */
8921 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8922 {
8923     /* Instructions in this group fall into three basic classes
8924      * (in each case with the operation working on each element in
8925      * the input vectors):
8926      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8927      *     128 bit input)
8928      * (2) wide 64 x 128 -> 128
8929      * (3) narrowing 128 x 128 -> 64
8930      * Here we do initial decode, catch unallocated cases and
8931      * dispatch to separate functions for each class.
8932      */
8933     int is_q = extract32(insn, 30, 1);
8934     int is_u = extract32(insn, 29, 1);
8935     int size = extract32(insn, 22, 2);
8936     int opcode = extract32(insn, 12, 4);
8937     int rm = extract32(insn, 16, 5);
8938     int rn = extract32(insn, 5, 5);
8939     int rd = extract32(insn, 0, 5);
8940
8941     switch (opcode) {
8942     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8943     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8944         /* 64 x 128 -> 128 */
8945         if (size == 3) {
8946             unallocated_encoding(s);
8947             return;
8948         }
8949         if (!fp_access_check(s)) {
8950             return;
8951         }
8952         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8953         break;
8954     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8955     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8956         /* 128 x 128 -> 64 */
8957         if (size == 3) {
8958             unallocated_encoding(s);
8959             return;
8960         }
8961         if (!fp_access_check(s)) {
8962             return;
8963         }
8964         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8965         break;
8966     case 14: /* PMULL, PMULL2 */
8967         if (is_u || size == 1 || size == 2) {
8968             unallocated_encoding(s);
8969             return;
8970         }
8971         if (size == 3) {
8972             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8973                 unallocated_encoding(s);
8974                 return;
8975             }
8976             if (!fp_access_check(s)) {
8977                 return;
8978             }
8979             handle_pmull_64(s, is_q, rd, rn, rm);
8980             return;
8981         }
8982         goto is_widening;
8983     case 9: /* SQDMLAL, SQDMLAL2 */
8984     case 11: /* SQDMLSL, SQDMLSL2 */
8985     case 13: /* SQDMULL, SQDMULL2 */
8986         if (is_u || size == 0) {
8987             unallocated_encoding(s);
8988             return;
8989         }
8990         /* fall through */
8991     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8992     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8993     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8994     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8995     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8996     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8997     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8998         /* 64 x 64 -> 128 */
8999         if (size == 3) {
9000             unallocated_encoding(s);
9001             return;
9002         }
9003     is_widening:
9004         if (!fp_access_check(s)) {
9005             return;
9006         }
9007
9008         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9009         break;
9010     default:
9011         /* opcode 15 not allocated */
9012         unallocated_encoding(s);
9013         break;
9014     }
9015 }
9016
9017 /* Logic op (opcode == 3) subgroup of C3.6.16. */
9018 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9019 {
9020     int rd = extract32(insn, 0, 5);
9021     int rn = extract32(insn, 5, 5);
9022     int rm = extract32(insn, 16, 5);
9023     int size = extract32(insn, 22, 2);
9024     bool is_u = extract32(insn, 29, 1);
9025     bool is_q = extract32(insn, 30, 1);
9026     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
9027     int pass;
9028
9029     if (!fp_access_check(s)) {
9030         return;
9031     }
9032
9033     tcg_op1 = tcg_temp_new_i64();
9034     tcg_op2 = tcg_temp_new_i64();
9035     tcg_res[0] = tcg_temp_new_i64();
9036     tcg_res[1] = tcg_temp_new_i64();
9037
9038     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
9039         read_vec_element(s, tcg_op1, rn, pass, MO_64);
9040         read_vec_element(s, tcg_op2, rm, pass, MO_64);
9041
9042         if (!is_u) {
9043             switch (size) {
9044             case 0: /* AND */
9045                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
9046                 break;
9047             case 1: /* BIC */
9048                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9049                 break;
9050             case 2: /* ORR */
9051                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
9052                 break;
9053             case 3: /* ORN */
9054                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9055                 break;
9056             }
9057         } else {
9058             if (size != 0) {
9059                 /* B* ops need res loaded to operate on */
9060                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9061             }
9062
9063             switch (size) {
9064             case 0: /* EOR */
9065                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
9066                 break;
9067             case 1: /* BSL bitwise select */
9068                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
9069                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9070                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
9071                 break;
9072             case 2: /* BIT, bitwise insert if true */
9073                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9074                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
9075                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9076                 break;
9077             case 3: /* BIF, bitwise insert if false */
9078                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9079                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
9080                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9081                 break;
9082             }
9083         }
9084     }
9085
9086     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9087     if (!is_q) {
9088         tcg_gen_movi_i64(tcg_res[1], 0);
9089     }
9090     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9091
9092     tcg_temp_free_i64(tcg_op1);
9093     tcg_temp_free_i64(tcg_op2);
9094     tcg_temp_free_i64(tcg_res[0]);
9095     tcg_temp_free_i64(tcg_res[1]);
9096 }
9097
9098 /* Helper functions for 32 bit comparisons */
9099 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9100 {
9101     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9102 }
9103
9104 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9105 {
9106     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9107 }
9108
9109 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9110 {
9111     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9112 }
9113
9114 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9115 {
9116     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9117 }
9118
9119 /* Pairwise op subgroup of C3.6.16.
9120  *
9121  * This is called directly or via the handle_3same_float for float pairwise
9122  * operations where the opcode and size are calculated differently.
9123  */
9124 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9125                                    int size, int rn, int rm, int rd)
9126 {
9127     TCGv_ptr fpst;
9128     int pass;
9129
9130     /* Floating point operations need fpst */
9131     if (opcode >= 0x58) {
9132         fpst = get_fpstatus_ptr();
9133     } else {
9134         TCGV_UNUSED_PTR(fpst);
9135     }
9136
9137     if (!fp_access_check(s)) {
9138         return;
9139     }
9140
9141     /* These operations work on the concatenated rm:rn, with each pair of
9142      * adjacent elements being operated on to produce an element in the result.
9143      */
9144     if (size == 3) {
9145         TCGv_i64 tcg_res[2];
9146
9147         for (pass = 0; pass < 2; pass++) {
9148             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9149             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9150             int passreg = (pass == 0) ? rn : rm;
9151
9152             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9153             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9154             tcg_res[pass] = tcg_temp_new_i64();
9155
9156             switch (opcode) {
9157             case 0x17: /* ADDP */
9158                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9159                 break;
9160             case 0x58: /* FMAXNMP */
9161                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9162                 break;
9163             case 0x5a: /* FADDP */
9164                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9165                 break;
9166             case 0x5e: /* FMAXP */
9167                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9168                 break;
9169             case 0x78: /* FMINNMP */
9170                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9171                 break;
9172             case 0x7e: /* FMINP */
9173                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9174                 break;
9175             default:
9176                 g_assert_not_reached();
9177             }
9178
9179             tcg_temp_free_i64(tcg_op1);
9180             tcg_temp_free_i64(tcg_op2);
9181         }
9182
9183         for (pass = 0; pass < 2; pass++) {
9184             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9185             tcg_temp_free_i64(tcg_res[pass]);
9186         }
9187     } else {
9188         int maxpass = is_q ? 4 : 2;
9189         TCGv_i32 tcg_res[4];
9190
9191         for (pass = 0; pass < maxpass; pass++) {
9192             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9193             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9194             NeonGenTwoOpFn *genfn = NULL;
9195             int passreg = pass < (maxpass / 2) ? rn : rm;
9196             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9197
9198             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9199             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9200             tcg_res[pass] = tcg_temp_new_i32();
9201
9202             switch (opcode) {
9203             case 0x17: /* ADDP */
9204             {
9205                 static NeonGenTwoOpFn * const fns[3] = {
9206                     gen_helper_neon_padd_u8,
9207                     gen_helper_neon_padd_u16,
9208                     tcg_gen_add_i32,
9209                 };
9210                 genfn = fns[size];
9211                 break;
9212             }
9213             case 0x14: /* SMAXP, UMAXP */
9214             {
9215                 static NeonGenTwoOpFn * const fns[3][2] = {
9216                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9217                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9218                     { gen_max_s32, gen_max_u32 },
9219                 };
9220                 genfn = fns[size][u];
9221                 break;
9222             }
9223             case 0x15: /* SMINP, UMINP */
9224             {
9225                 static NeonGenTwoOpFn * const fns[3][2] = {
9226                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9227                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9228                     { gen_min_s32, gen_min_u32 },
9229                 };
9230                 genfn = fns[size][u];
9231                 break;
9232             }
9233             /* The FP operations are all on single floats (32 bit) */
9234             case 0x58: /* FMAXNMP */
9235                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9236                 break;
9237             case 0x5a: /* FADDP */
9238                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9239                 break;
9240             case 0x5e: /* FMAXP */
9241                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9242                 break;
9243             case 0x78: /* FMINNMP */
9244                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9245                 break;
9246             case 0x7e: /* FMINP */
9247                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9248                 break;
9249             default:
9250                 g_assert_not_reached();
9251             }
9252
9253             /* FP ops called directly, otherwise call now */
9254             if (genfn) {
9255                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9256             }
9257
9258             tcg_temp_free_i32(tcg_op1);
9259             tcg_temp_free_i32(tcg_op2);
9260         }
9261
9262         for (pass = 0; pass < maxpass; pass++) {
9263             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9264             tcg_temp_free_i32(tcg_res[pass]);
9265         }
9266         if (!is_q) {
9267             clear_vec_high(s, rd);
9268         }
9269     }
9270
9271     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9272         tcg_temp_free_ptr(fpst);
9273     }
9274 }
9275
9276 /* Floating point op subgroup of C3.6.16. */
9277 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9278 {
9279     /* For floating point ops, the U, size[1] and opcode bits
9280      * together indicate the operation. size[0] indicates single
9281      * or double.
9282      */
9283     int fpopcode = extract32(insn, 11, 5)
9284         | (extract32(insn, 23, 1) << 5)
9285         | (extract32(insn, 29, 1) << 6);
9286     int is_q = extract32(insn, 30, 1);
9287     int size = extract32(insn, 22, 1);
9288     int rm = extract32(insn, 16, 5);
9289     int rn = extract32(insn, 5, 5);
9290     int rd = extract32(insn, 0, 5);
9291
9292     int datasize = is_q ? 128 : 64;
9293     int esize = 32 << size;
9294     int elements = datasize / esize;
9295
9296     if (size == 1 && !is_q) {
9297         unallocated_encoding(s);
9298         return;
9299     }
9300
9301     switch (fpopcode) {
9302     case 0x58: /* FMAXNMP */
9303     case 0x5a: /* FADDP */
9304     case 0x5e: /* FMAXP */
9305     case 0x78: /* FMINNMP */
9306     case 0x7e: /* FMINP */
9307         if (size && !is_q) {
9308             unallocated_encoding(s);
9309             return;
9310         }
9311         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9312                                rn, rm, rd);
9313         return;
9314     case 0x1b: /* FMULX */
9315     case 0x1f: /* FRECPS */
9316     case 0x3f: /* FRSQRTS */
9317     case 0x5d: /* FACGE */
9318     case 0x7d: /* FACGT */
9319     case 0x19: /* FMLA */
9320     case 0x39: /* FMLS */
9321     case 0x18: /* FMAXNM */
9322     case 0x1a: /* FADD */
9323     case 0x1c: /* FCMEQ */
9324     case 0x1e: /* FMAX */
9325     case 0x38: /* FMINNM */
9326     case 0x3a: /* FSUB */
9327     case 0x3e: /* FMIN */
9328     case 0x5b: /* FMUL */
9329     case 0x5c: /* FCMGE */
9330     case 0x5f: /* FDIV */
9331     case 0x7a: /* FABD */
9332     case 0x7c: /* FCMGT */
9333         if (!fp_access_check(s)) {
9334             return;
9335         }
9336
9337         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9338         return;
9339     default:
9340         unallocated_encoding(s);
9341         return;
9342     }
9343 }
9344
9345 /* Integer op subgroup of C3.6.16. */
9346 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9347 {
9348     int is_q = extract32(insn, 30, 1);
9349     int u = extract32(insn, 29, 1);
9350     int size = extract32(insn, 22, 2);
9351     int opcode = extract32(insn, 11, 5);
9352     int rm = extract32(insn, 16, 5);
9353     int rn = extract32(insn, 5, 5);
9354     int rd = extract32(insn, 0, 5);
9355     int pass;
9356
9357     switch (opcode) {
9358     case 0x13: /* MUL, PMUL */
9359         if (u && size != 0) {
9360             unallocated_encoding(s);
9361             return;
9362         }
9363         /* fall through */
9364     case 0x0: /* SHADD, UHADD */
9365     case 0x2: /* SRHADD, URHADD */
9366     case 0x4: /* SHSUB, UHSUB */
9367     case 0xc: /* SMAX, UMAX */
9368     case 0xd: /* SMIN, UMIN */
9369     case 0xe: /* SABD, UABD */
9370     case 0xf: /* SABA, UABA */
9371     case 0x12: /* MLA, MLS */
9372         if (size == 3) {
9373             unallocated_encoding(s);
9374             return;
9375         }
9376         break;
9377     case 0x16: /* SQDMULH, SQRDMULH */
9378         if (size == 0 || size == 3) {
9379             unallocated_encoding(s);
9380             return;
9381         }
9382         break;
9383     default:
9384         if (size == 3 && !is_q) {
9385             unallocated_encoding(s);
9386             return;
9387         }
9388         break;
9389     }
9390
9391     if (!fp_access_check(s)) {
9392         return;
9393     }
9394
9395     if (size == 3) {
9396         assert(is_q);
9397         for (pass = 0; pass < 2; pass++) {
9398             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9399             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9400             TCGv_i64 tcg_res = tcg_temp_new_i64();
9401
9402             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9403             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9404
9405             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9406
9407             write_vec_element(s, tcg_res, rd, pass, MO_64);
9408
9409             tcg_temp_free_i64(tcg_res);
9410             tcg_temp_free_i64(tcg_op1);
9411             tcg_temp_free_i64(tcg_op2);
9412         }
9413     } else {
9414         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9415             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9416             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9417             TCGv_i32 tcg_res = tcg_temp_new_i32();
9418             NeonGenTwoOpFn *genfn = NULL;
9419             NeonGenTwoOpEnvFn *genenvfn = NULL;
9420
9421             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9422             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9423
9424             switch (opcode) {
9425             case 0x0: /* SHADD, UHADD */
9426             {
9427                 static NeonGenTwoOpFn * const fns[3][2] = {
9428                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9429                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9430                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9431                 };
9432                 genfn = fns[size][u];
9433                 break;
9434             }
9435             case 0x1: /* SQADD, UQADD */
9436             {
9437                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9438                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9439                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9440                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9441                 };
9442                 genenvfn = fns[size][u];
9443                 break;
9444             }
9445             case 0x2: /* SRHADD, URHADD */
9446             {
9447                 static NeonGenTwoOpFn * const fns[3][2] = {
9448                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9449                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9450                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9451                 };
9452                 genfn = fns[size][u];
9453                 break;
9454             }
9455             case 0x4: /* SHSUB, UHSUB */
9456             {
9457                 static NeonGenTwoOpFn * const fns[3][2] = {
9458                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9459                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9460                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9461                 };
9462                 genfn = fns[size][u];
9463                 break;
9464             }
9465             case 0x5: /* SQSUB, UQSUB */
9466             {
9467                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9468                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9469                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9470                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9471                 };
9472                 genenvfn = fns[size][u];
9473                 break;
9474             }
9475             case 0x6: /* CMGT, CMHI */
9476             {
9477                 static NeonGenTwoOpFn * const fns[3][2] = {
9478                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9479                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9480                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9481                 };
9482                 genfn = fns[size][u];
9483                 break;
9484             }
9485             case 0x7: /* CMGE, CMHS */
9486             {
9487                 static NeonGenTwoOpFn * const fns[3][2] = {
9488                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9489                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9490                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9491                 };
9492                 genfn = fns[size][u];
9493                 break;
9494             }
9495             case 0x8: /* SSHL, USHL */
9496             {
9497                 static NeonGenTwoOpFn * const fns[3][2] = {
9498                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9499                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9500                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9501                 };
9502                 genfn = fns[size][u];
9503                 break;
9504             }
9505             case 0x9: /* SQSHL, UQSHL */
9506             {
9507                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9508                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9509                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9510                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9511                 };
9512                 genenvfn = fns[size][u];
9513                 break;
9514             }
9515             case 0xa: /* SRSHL, URSHL */
9516             {
9517                 static NeonGenTwoOpFn * const fns[3][2] = {
9518                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9519                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9520                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9521                 };
9522                 genfn = fns[size][u];
9523                 break;
9524             }
9525             case 0xb: /* SQRSHL, UQRSHL */
9526             {
9527                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9528                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9529                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9530                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9531                 };
9532                 genenvfn = fns[size][u];
9533                 break;
9534             }
9535             case 0xc: /* SMAX, UMAX */
9536             {
9537                 static NeonGenTwoOpFn * const fns[3][2] = {
9538                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9539                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9540                     { gen_max_s32, gen_max_u32 },
9541                 };
9542                 genfn = fns[size][u];
9543                 break;
9544             }
9545
9546             case 0xd: /* SMIN, UMIN */
9547             {
9548                 static NeonGenTwoOpFn * const fns[3][2] = {
9549                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9550                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9551                     { gen_min_s32, gen_min_u32 },
9552                 };
9553                 genfn = fns[size][u];
9554                 break;
9555             }
9556             case 0xe: /* SABD, UABD */
9557             case 0xf: /* SABA, UABA */
9558             {
9559                 static NeonGenTwoOpFn * const fns[3][2] = {
9560                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9561                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9562                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9563                 };
9564                 genfn = fns[size][u];
9565                 break;
9566             }
9567             case 0x10: /* ADD, SUB */
9568             {
9569                 static NeonGenTwoOpFn * const fns[3][2] = {
9570                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9571                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9572                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9573                 };
9574                 genfn = fns[size][u];
9575                 break;
9576             }
9577             case 0x11: /* CMTST, CMEQ */
9578             {
9579                 static NeonGenTwoOpFn * const fns[3][2] = {
9580                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9581                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9582                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9583                 };
9584                 genfn = fns[size][u];
9585                 break;
9586             }
9587             case 0x13: /* MUL, PMUL */
9588                 if (u) {
9589                     /* PMUL */
9590                     assert(size == 0);
9591                     genfn = gen_helper_neon_mul_p8;
9592                     break;
9593                 }
9594                 /* fall through : MUL */
9595             case 0x12: /* MLA, MLS */
9596             {
9597                 static NeonGenTwoOpFn * const fns[3] = {
9598                     gen_helper_neon_mul_u8,
9599                     gen_helper_neon_mul_u16,
9600                     tcg_gen_mul_i32,
9601                 };
9602                 genfn = fns[size];
9603                 break;
9604             }
9605             case 0x16: /* SQDMULH, SQRDMULH */
9606             {
9607                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9608                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9609                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9610                 };
9611                 assert(size == 1 || size == 2);
9612                 genenvfn = fns[size - 1][u];
9613                 break;
9614             }
9615             default:
9616                 g_assert_not_reached();
9617             }
9618
9619             if (genenvfn) {
9620                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9621             } else {
9622                 genfn(tcg_res, tcg_op1, tcg_op2);
9623             }
9624
9625             if (opcode == 0xf || opcode == 0x12) {
9626                 /* SABA, UABA, MLA, MLS: accumulating ops */
9627                 static NeonGenTwoOpFn * const fns[3][2] = {
9628                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9629                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9630                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9631                 };
9632                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9633
9634                 genfn = fns[size][is_sub];
9635                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9636                 genfn(tcg_res, tcg_op1, tcg_res);
9637             }
9638
9639             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9640
9641             tcg_temp_free_i32(tcg_res);
9642             tcg_temp_free_i32(tcg_op1);
9643             tcg_temp_free_i32(tcg_op2);
9644         }
9645     }
9646
9647     if (!is_q) {
9648         clear_vec_high(s, rd);
9649     }
9650 }
9651
9652 /* C3.6.16 AdvSIMD three same
9653  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9654  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9655  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9656  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9657  */
9658 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9659 {
9660     int opcode = extract32(insn, 11, 5);
9661
9662     switch (opcode) {
9663     case 0x3: /* logic ops */
9664         disas_simd_3same_logic(s, insn);
9665         break;
9666     case 0x17: /* ADDP */
9667     case 0x14: /* SMAXP, UMAXP */
9668     case 0x15: /* SMINP, UMINP */
9669     {
9670         /* Pairwise operations */
9671         int is_q = extract32(insn, 30, 1);
9672         int u = extract32(insn, 29, 1);
9673         int size = extract32(insn, 22, 2);
9674         int rm = extract32(insn, 16, 5);
9675         int rn = extract32(insn, 5, 5);
9676         int rd = extract32(insn, 0, 5);
9677         if (opcode == 0x17) {
9678             if (u || (size == 3 && !is_q)) {
9679                 unallocated_encoding(s);
9680                 return;
9681             }
9682         } else {
9683             if (size == 3) {
9684                 unallocated_encoding(s);
9685                 return;
9686             }
9687         }
9688         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9689         break;
9690     }
9691     case 0x18 ... 0x31:
9692         /* floating point ops, sz[1] and U are part of opcode */
9693         disas_simd_3same_float(s, insn);
9694         break;
9695     default:
9696         disas_simd_3same_int(s, insn);
9697         break;
9698     }
9699 }
9700
9701 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9702                                   int size, int rn, int rd)
9703 {
9704     /* Handle 2-reg-misc ops which are widening (so each size element
9705      * in the source becomes a 2*size element in the destination.
9706      * The only instruction like this is FCVTL.
9707      */
9708     int pass;
9709
9710     if (size == 3) {
9711         /* 32 -> 64 bit fp conversion */
9712         TCGv_i64 tcg_res[2];
9713         int srcelt = is_q ? 2 : 0;
9714
9715         for (pass = 0; pass < 2; pass++) {
9716             TCGv_i32 tcg_op = tcg_temp_new_i32();
9717             tcg_res[pass] = tcg_temp_new_i64();
9718
9719             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9720             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9721             tcg_temp_free_i32(tcg_op);
9722         }
9723         for (pass = 0; pass < 2; pass++) {
9724             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9725             tcg_temp_free_i64(tcg_res[pass]);
9726         }
9727     } else {
9728         /* 16 -> 32 bit fp conversion */
9729         int srcelt = is_q ? 4 : 0;
9730         TCGv_i32 tcg_res[4];
9731
9732         for (pass = 0; pass < 4; pass++) {
9733             tcg_res[pass] = tcg_temp_new_i32();
9734
9735             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9736             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9737                                            cpu_env);
9738         }
9739         for (pass = 0; pass < 4; pass++) {
9740             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9741             tcg_temp_free_i32(tcg_res[pass]);
9742         }
9743     }
9744 }
9745
9746 static void handle_rev(DisasContext *s, int opcode, bool u,
9747                        bool is_q, int size, int rn, int rd)
9748 {
9749     int op = (opcode << 1) | u;
9750     int opsz = op + size;
9751     int grp_size = 3 - opsz;
9752     int dsize = is_q ? 128 : 64;
9753     int i;
9754
9755     if (opsz >= 3) {
9756         unallocated_encoding(s);
9757         return;
9758     }
9759
9760     if (!fp_access_check(s)) {
9761         return;
9762     }
9763
9764     if (size == 0) {
9765         /* Special case bytes, use bswap op on each group of elements */
9766         int groups = dsize / (8 << grp_size);
9767
9768         for (i = 0; i < groups; i++) {
9769             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9770
9771             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9772             switch (grp_size) {
9773             case MO_16:
9774                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9775                 break;
9776             case MO_32:
9777                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9778                 break;
9779             case MO_64:
9780                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9781                 break;
9782             default:
9783                 g_assert_not_reached();
9784             }
9785             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9786             tcg_temp_free_i64(tcg_tmp);
9787         }
9788         if (!is_q) {
9789             clear_vec_high(s, rd);
9790         }
9791     } else {
9792         int revmask = (1 << grp_size) - 1;
9793         int esize = 8 << size;
9794         int elements = dsize / esize;
9795         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9796         TCGv_i64 tcg_rd = tcg_const_i64(0);
9797         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9798
9799         for (i = 0; i < elements; i++) {
9800             int e_rev = (i & 0xf) ^ revmask;
9801             int off = e_rev * esize;
9802             read_vec_element(s, tcg_rn, rn, i, size);
9803             if (off >= 64) {
9804                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9805                                     tcg_rn, off - 64, esize);
9806             } else {
9807                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9808             }
9809         }
9810         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9811         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9812
9813         tcg_temp_free_i64(tcg_rd_hi);
9814         tcg_temp_free_i64(tcg_rd);
9815         tcg_temp_free_i64(tcg_rn);
9816     }
9817 }
9818
9819 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9820                                   bool is_q, int size, int rn, int rd)
9821 {
9822     /* Implement the pairwise operations from 2-misc:
9823      * SADDLP, UADDLP, SADALP, UADALP.
9824      * These all add pairs of elements in the input to produce a
9825      * double-width result element in the output (possibly accumulating).
9826      */
9827     bool accum = (opcode == 0x6);
9828     int maxpass = is_q ? 2 : 1;
9829     int pass;
9830     TCGv_i64 tcg_res[2];
9831
9832     if (size == 2) {
9833         /* 32 + 32 -> 64 op */
9834         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9835
9836         for (pass = 0; pass < maxpass; pass++) {
9837             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9838             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9839
9840             tcg_res[pass] = tcg_temp_new_i64();
9841
9842             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9843             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9844             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9845             if (accum) {
9846                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9847                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9848             }
9849
9850             tcg_temp_free_i64(tcg_op1);
9851             tcg_temp_free_i64(tcg_op2);
9852         }
9853     } else {
9854         for (pass = 0; pass < maxpass; pass++) {
9855             TCGv_i64 tcg_op = tcg_temp_new_i64();
9856             NeonGenOneOpFn *genfn;
9857             static NeonGenOneOpFn * const fns[2][2] = {
9858                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9859                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9860             };
9861
9862             genfn = fns[size][u];
9863
9864             tcg_res[pass] = tcg_temp_new_i64();
9865
9866             read_vec_element(s, tcg_op, rn, pass, MO_64);
9867             genfn(tcg_res[pass], tcg_op);
9868
9869             if (accum) {
9870                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9871                 if (size == 0) {
9872                     gen_helper_neon_addl_u16(tcg_res[pass],
9873                                              tcg_res[pass], tcg_op);
9874                 } else {
9875                     gen_helper_neon_addl_u32(tcg_res[pass],
9876                                              tcg_res[pass], tcg_op);
9877                 }
9878             }
9879             tcg_temp_free_i64(tcg_op);
9880         }
9881     }
9882     if (!is_q) {
9883         tcg_res[1] = tcg_const_i64(0);
9884     }
9885     for (pass = 0; pass < 2; pass++) {
9886         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9887         tcg_temp_free_i64(tcg_res[pass]);
9888     }
9889 }
9890
9891 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9892 {
9893     /* Implement SHLL and SHLL2 */
9894     int pass;
9895     int part = is_q ? 2 : 0;
9896     TCGv_i64 tcg_res[2];
9897
9898     for (pass = 0; pass < 2; pass++) {
9899         static NeonGenWidenFn * const widenfns[3] = {
9900             gen_helper_neon_widen_u8,
9901             gen_helper_neon_widen_u16,
9902             tcg_gen_extu_i32_i64,
9903         };
9904         NeonGenWidenFn *widenfn = widenfns[size];
9905         TCGv_i32 tcg_op = tcg_temp_new_i32();
9906
9907         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9908         tcg_res[pass] = tcg_temp_new_i64();
9909         widenfn(tcg_res[pass], tcg_op);
9910         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9911
9912         tcg_temp_free_i32(tcg_op);
9913     }
9914
9915     for (pass = 0; pass < 2; pass++) {
9916         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9917         tcg_temp_free_i64(tcg_res[pass]);
9918     }
9919 }
9920
9921 /* C3.6.17 AdvSIMD two reg misc
9922  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9923  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9924  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9925  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9926  */
9927 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9928 {
9929     int size = extract32(insn, 22, 2);
9930     int opcode = extract32(insn, 12, 5);
9931     bool u = extract32(insn, 29, 1);
9932     bool is_q = extract32(insn, 30, 1);
9933     int rn = extract32(insn, 5, 5);
9934     int rd = extract32(insn, 0, 5);
9935     bool need_fpstatus = false;
9936     bool need_rmode = false;
9937     int rmode = -1;
9938     TCGv_i32 tcg_rmode;
9939     TCGv_ptr tcg_fpstatus;
9940
9941     switch (opcode) {
9942     case 0x0: /* REV64, REV32 */
9943     case 0x1: /* REV16 */
9944         handle_rev(s, opcode, u, is_q, size, rn, rd);
9945         return;
9946     case 0x5: /* CNT, NOT, RBIT */
9947         if (u && size == 0) {
9948             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9949             size = 3;
9950             break;
9951         } else if (u && size == 1) {
9952             /* RBIT */
9953             break;
9954         } else if (!u && size == 0) {
9955             /* CNT */
9956             break;
9957         }
9958         unallocated_encoding(s);
9959         return;
9960     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9961     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9962         if (size == 3) {
9963             unallocated_encoding(s);
9964             return;
9965         }
9966         if (!fp_access_check(s)) {
9967             return;
9968         }
9969
9970         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9971         return;
9972     case 0x4: /* CLS, CLZ */
9973         if (size == 3) {
9974             unallocated_encoding(s);
9975             return;
9976         }
9977         break;
9978     case 0x2: /* SADDLP, UADDLP */
9979     case 0x6: /* SADALP, UADALP */
9980         if (size == 3) {
9981             unallocated_encoding(s);
9982             return;
9983         }
9984         if (!fp_access_check(s)) {
9985             return;
9986         }
9987         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9988         return;
9989     case 0x13: /* SHLL, SHLL2 */
9990         if (u == 0 || size == 3) {
9991             unallocated_encoding(s);
9992             return;
9993         }
9994         if (!fp_access_check(s)) {
9995             return;
9996         }
9997         handle_shll(s, is_q, size, rn, rd);
9998         return;
9999     case 0xa: /* CMLT */
10000         if (u == 1) {
10001             unallocated_encoding(s);
10002             return;
10003         }
10004         /* fall through */
10005     case 0x8: /* CMGT, CMGE */
10006     case 0x9: /* CMEQ, CMLE */
10007     case 0xb: /* ABS, NEG */
10008         if (size == 3 && !is_q) {
10009             unallocated_encoding(s);
10010             return;
10011         }
10012         break;
10013     case 0x3: /* SUQADD, USQADD */
10014         if (size == 3 && !is_q) {
10015             unallocated_encoding(s);
10016             return;
10017         }
10018         if (!fp_access_check(s)) {
10019             return;
10020         }
10021         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
10022         return;
10023     case 0x7: /* SQABS, SQNEG */
10024         if (size == 3 && !is_q) {
10025             unallocated_encoding(s);
10026             return;
10027         }
10028         break;
10029     case 0xc ... 0xf:
10030     case 0x16 ... 0x1d:
10031     case 0x1f:
10032     {
10033         /* Floating point: U, size[1] and opcode indicate operation;
10034          * size[0] indicates single or double precision.
10035          */
10036         int is_double = extract32(size, 0, 1);
10037         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10038         size = is_double ? 3 : 2;
10039         switch (opcode) {
10040         case 0x2f: /* FABS */
10041         case 0x6f: /* FNEG */
10042             if (size == 3 && !is_q) {
10043                 unallocated_encoding(s);
10044                 return;
10045             }
10046             break;
10047         case 0x1d: /* SCVTF */
10048         case 0x5d: /* UCVTF */
10049         {
10050             bool is_signed = (opcode == 0x1d) ? true : false;
10051             int elements = is_double ? 2 : is_q ? 4 : 2;
10052             if (is_double && !is_q) {
10053                 unallocated_encoding(s);
10054                 return;
10055             }
10056             if (!fp_access_check(s)) {
10057                 return;
10058             }
10059             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10060             return;
10061         }
10062         case 0x2c: /* FCMGT (zero) */
10063         case 0x2d: /* FCMEQ (zero) */
10064         case 0x2e: /* FCMLT (zero) */
10065         case 0x6c: /* FCMGE (zero) */
10066         case 0x6d: /* FCMLE (zero) */
10067             if (size == 3 && !is_q) {
10068                 unallocated_encoding(s);
10069                 return;
10070             }
10071             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10072             return;
10073         case 0x7f: /* FSQRT */
10074             if (size == 3 && !is_q) {
10075                 unallocated_encoding(s);
10076                 return;
10077             }
10078             break;
10079         case 0x1a: /* FCVTNS */
10080         case 0x1b: /* FCVTMS */
10081         case 0x3a: /* FCVTPS */
10082         case 0x3b: /* FCVTZS */
10083         case 0x5a: /* FCVTNU */
10084         case 0x5b: /* FCVTMU */
10085         case 0x7a: /* FCVTPU */
10086         case 0x7b: /* FCVTZU */
10087             need_fpstatus = true;
10088             need_rmode = true;
10089             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10090             if (size == 3 && !is_q) {
10091                 unallocated_encoding(s);
10092                 return;
10093             }
10094             break;
10095         case 0x5c: /* FCVTAU */
10096         case 0x1c: /* FCVTAS */
10097             need_fpstatus = true;
10098             need_rmode = true;
10099             rmode = FPROUNDING_TIEAWAY;
10100             if (size == 3 && !is_q) {
10101                 unallocated_encoding(s);
10102                 return;
10103             }
10104             break;
10105         case 0x3c: /* URECPE */
10106             if (size == 3) {
10107                 unallocated_encoding(s);
10108                 return;
10109             }
10110             /* fall through */
10111         case 0x3d: /* FRECPE */
10112         case 0x7d: /* FRSQRTE */
10113             if (size == 3 && !is_q) {
10114                 unallocated_encoding(s);
10115                 return;
10116             }
10117             if (!fp_access_check(s)) {
10118                 return;
10119             }
10120             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10121             return;
10122         case 0x56: /* FCVTXN, FCVTXN2 */
10123             if (size == 2) {
10124                 unallocated_encoding(s);
10125                 return;
10126             }
10127             /* fall through */
10128         case 0x16: /* FCVTN, FCVTN2 */
10129             /* handle_2misc_narrow does a 2*size -> size operation, but these
10130              * instructions encode the source size rather than dest size.
10131              */
10132             if (!fp_access_check(s)) {
10133                 return;
10134             }
10135             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10136             return;
10137         case 0x17: /* FCVTL, FCVTL2 */
10138             if (!fp_access_check(s)) {
10139                 return;
10140             }
10141             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10142             return;
10143         case 0x18: /* FRINTN */
10144         case 0x19: /* FRINTM */
10145         case 0x38: /* FRINTP */
10146         case 0x39: /* FRINTZ */
10147             need_rmode = true;
10148             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10149             /* fall through */
10150         case 0x59: /* FRINTX */
10151         case 0x79: /* FRINTI */
10152             need_fpstatus = true;
10153             if (size == 3 && !is_q) {
10154                 unallocated_encoding(s);
10155                 return;
10156             }
10157             break;
10158         case 0x58: /* FRINTA */
10159             need_rmode = true;
10160             rmode = FPROUNDING_TIEAWAY;
10161             need_fpstatus = true;
10162             if (size == 3 && !is_q) {
10163                 unallocated_encoding(s);
10164                 return;
10165             }
10166             break;
10167         case 0x7c: /* URSQRTE */
10168             if (size == 3) {
10169                 unallocated_encoding(s);
10170                 return;
10171             }
10172             need_fpstatus = true;
10173             break;
10174         default:
10175             unallocated_encoding(s);
10176             return;
10177         }
10178         break;
10179     }
10180     default:
10181         unallocated_encoding(s);
10182         return;
10183     }
10184
10185     if (!fp_access_check(s)) {
10186         return;
10187     }
10188
10189     if (need_fpstatus) {
10190         tcg_fpstatus = get_fpstatus_ptr();
10191     } else {
10192         TCGV_UNUSED_PTR(tcg_fpstatus);
10193     }
10194     if (need_rmode) {
10195         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10196         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10197     } else {
10198         TCGV_UNUSED_I32(tcg_rmode);
10199     }
10200
10201     if (size == 3) {
10202         /* All 64-bit element operations can be shared with scalar 2misc */
10203         int pass;
10204
10205         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10206             TCGv_i64 tcg_op = tcg_temp_new_i64();
10207             TCGv_i64 tcg_res = tcg_temp_new_i64();
10208
10209             read_vec_element(s, tcg_op, rn, pass, MO_64);
10210
10211             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10212                             tcg_rmode, tcg_fpstatus);
10213
10214             write_vec_element(s, tcg_res, rd, pass, MO_64);
10215
10216             tcg_temp_free_i64(tcg_res);
10217             tcg_temp_free_i64(tcg_op);
10218         }
10219     } else {
10220         int pass;
10221
10222         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10223             TCGv_i32 tcg_op = tcg_temp_new_i32();
10224             TCGv_i32 tcg_res = tcg_temp_new_i32();
10225             TCGCond cond;
10226
10227             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10228
10229             if (size == 2) {
10230                 /* Special cases for 32 bit elements */
10231                 switch (opcode) {
10232                 case 0xa: /* CMLT */
10233                     /* 32 bit integer comparison against zero, result is
10234                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10235                      * and inverting.
10236                      */
10237                     cond = TCG_COND_LT;
10238                 do_cmop:
10239                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10240                     tcg_gen_neg_i32(tcg_res, tcg_res);
10241                     break;
10242                 case 0x8: /* CMGT, CMGE */
10243                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10244                     goto do_cmop;
10245                 case 0x9: /* CMEQ, CMLE */
10246                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10247                     goto do_cmop;
10248                 case 0x4: /* CLS */
10249                     if (u) {
10250                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
10251                     } else {
10252                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
10253                     }
10254                     break;
10255                 case 0x7: /* SQABS, SQNEG */
10256                     if (u) {
10257                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10258                     } else {
10259                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10260                     }
10261                     break;
10262                 case 0xb: /* ABS, NEG */
10263                     if (u) {
10264                         tcg_gen_neg_i32(tcg_res, tcg_op);
10265                     } else {
10266                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10267                         tcg_gen_neg_i32(tcg_res, tcg_op);
10268                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10269                                             tcg_zero, tcg_op, tcg_res);
10270                         tcg_temp_free_i32(tcg_zero);
10271                     }
10272                     break;
10273                 case 0x2f: /* FABS */
10274                     gen_helper_vfp_abss(tcg_res, tcg_op);
10275                     break;
10276                 case 0x6f: /* FNEG */
10277                     gen_helper_vfp_negs(tcg_res, tcg_op);
10278                     break;
10279                 case 0x7f: /* FSQRT */
10280                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10281                     break;
10282                 case 0x1a: /* FCVTNS */
10283                 case 0x1b: /* FCVTMS */
10284                 case 0x1c: /* FCVTAS */
10285                 case 0x3a: /* FCVTPS */
10286                 case 0x3b: /* FCVTZS */
10287                 {
10288                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10289                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10290                                          tcg_shift, tcg_fpstatus);
10291                     tcg_temp_free_i32(tcg_shift);
10292                     break;
10293                 }
10294                 case 0x5a: /* FCVTNU */
10295                 case 0x5b: /* FCVTMU */
10296                 case 0x5c: /* FCVTAU */
10297                 case 0x7a: /* FCVTPU */
10298                 case 0x7b: /* FCVTZU */
10299                 {
10300                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10301                     gen_helper_vfp_touls(tcg_res, tcg_op,
10302                                          tcg_shift, tcg_fpstatus);
10303                     tcg_temp_free_i32(tcg_shift);
10304                     break;
10305                 }
10306                 case 0x18: /* FRINTN */
10307                 case 0x19: /* FRINTM */
10308                 case 0x38: /* FRINTP */
10309                 case 0x39: /* FRINTZ */
10310                 case 0x58: /* FRINTA */
10311                 case 0x79: /* FRINTI */
10312                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10313                     break;
10314                 case 0x59: /* FRINTX */
10315                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10316                     break;
10317                 case 0x7c: /* URSQRTE */
10318                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10319                     break;
10320                 default:
10321                     g_assert_not_reached();
10322                 }
10323             } else {
10324                 /* Use helpers for 8 and 16 bit elements */
10325                 switch (opcode) {
10326                 case 0x5: /* CNT, RBIT */
10327                     /* For these two insns size is part of the opcode specifier
10328                      * (handled earlier); they always operate on byte elements.
10329                      */
10330                     if (u) {
10331                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10332                     } else {
10333                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10334                     }
10335                     break;
10336                 case 0x7: /* SQABS, SQNEG */
10337                 {
10338                     NeonGenOneOpEnvFn *genfn;
10339                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10340                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10341                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10342                     };
10343                     genfn = fns[size][u];
10344                     genfn(tcg_res, cpu_env, tcg_op);
10345                     break;
10346                 }
10347                 case 0x8: /* CMGT, CMGE */
10348                 case 0x9: /* CMEQ, CMLE */
10349                 case 0xa: /* CMLT */
10350                 {
10351                     static NeonGenTwoOpFn * const fns[3][2] = {
10352                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10353                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10354                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10355                     };
10356                     NeonGenTwoOpFn *genfn;
10357                     int comp;
10358                     bool reverse;
10359                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10360
10361                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10362                     comp = (opcode - 0x8) * 2 + u;
10363                     /* ...but LE, LT are implemented as reverse GE, GT */
10364                     reverse = (comp > 2);
10365                     if (reverse) {
10366                         comp = 4 - comp;
10367                     }
10368                     genfn = fns[comp][size];
10369                     if (reverse) {
10370                         genfn(tcg_res, tcg_zero, tcg_op);
10371                     } else {
10372                         genfn(tcg_res, tcg_op, tcg_zero);
10373                     }
10374                     tcg_temp_free_i32(tcg_zero);
10375                     break;
10376                 }
10377                 case 0xb: /* ABS, NEG */
10378                     if (u) {
10379                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10380                         if (size) {
10381                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10382                         } else {
10383                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10384                         }
10385                         tcg_temp_free_i32(tcg_zero);
10386                     } else {
10387                         if (size) {
10388                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10389                         } else {
10390                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10391                         }
10392                     }
10393                     break;
10394                 case 0x4: /* CLS, CLZ */
10395                     if (u) {
10396                         if (size == 0) {
10397                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10398                         } else {
10399                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10400                         }
10401                     } else {
10402                         if (size == 0) {
10403                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10404                         } else {
10405                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10406                         }
10407                     }
10408                     break;
10409                 default:
10410                     g_assert_not_reached();
10411                 }
10412             }
10413
10414             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10415
10416             tcg_temp_free_i32(tcg_res);
10417             tcg_temp_free_i32(tcg_op);
10418         }
10419     }
10420     if (!is_q) {
10421         clear_vec_high(s, rd);
10422     }
10423
10424     if (need_rmode) {
10425         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10426         tcg_temp_free_i32(tcg_rmode);
10427     }
10428     if (need_fpstatus) {
10429         tcg_temp_free_ptr(tcg_fpstatus);
10430     }
10431 }
10432
10433 /* C3.6.13 AdvSIMD scalar x indexed element
10434  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10435  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10436  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10437  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10438  * C3.6.18 AdvSIMD vector x indexed element
10439  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10440  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10441  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10442  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10443  */
10444 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10445 {
10446     /* This encoding has two kinds of instruction:
10447      *  normal, where we perform elt x idxelt => elt for each
10448      *     element in the vector
10449      *  long, where we perform elt x idxelt and generate a result of
10450      *     double the width of the input element
10451      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10452      */
10453     bool is_scalar = extract32(insn, 28, 1);
10454     bool is_q = extract32(insn, 30, 1);
10455     bool u = extract32(insn, 29, 1);
10456     int size = extract32(insn, 22, 2);
10457     int l = extract32(insn, 21, 1);
10458     int m = extract32(insn, 20, 1);
10459     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10460     int rm = extract32(insn, 16, 4);
10461     int opcode = extract32(insn, 12, 4);
10462     int h = extract32(insn, 11, 1);
10463     int rn = extract32(insn, 5, 5);
10464     int rd = extract32(insn, 0, 5);
10465     bool is_long = false;
10466     bool is_fp = false;
10467     int index;
10468     TCGv_ptr fpst;
10469
10470     switch (opcode) {
10471     case 0x0: /* MLA */
10472     case 0x4: /* MLS */
10473         if (!u || is_scalar) {
10474             unallocated_encoding(s);
10475             return;
10476         }
10477         break;
10478     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10479     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10480     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10481         if (is_scalar) {
10482             unallocated_encoding(s);
10483             return;
10484         }
10485         is_long = true;
10486         break;
10487     case 0x3: /* SQDMLAL, SQDMLAL2 */
10488     case 0x7: /* SQDMLSL, SQDMLSL2 */
10489     case 0xb: /* SQDMULL, SQDMULL2 */
10490         is_long = true;
10491         /* fall through */
10492     case 0xc: /* SQDMULH */
10493     case 0xd: /* SQRDMULH */
10494         if (u) {
10495             unallocated_encoding(s);
10496             return;
10497         }
10498         break;
10499     case 0x8: /* MUL */
10500         if (u || is_scalar) {
10501             unallocated_encoding(s);
10502             return;
10503         }
10504         break;
10505     case 0x1: /* FMLA */
10506     case 0x5: /* FMLS */
10507         if (u) {
10508             unallocated_encoding(s);
10509             return;
10510         }
10511         /* fall through */
10512     case 0x9: /* FMUL, FMULX */
10513         if (!extract32(size, 1, 1)) {
10514             unallocated_encoding(s);
10515             return;
10516         }
10517         is_fp = true;
10518         break;
10519     default:
10520         unallocated_encoding(s);
10521         return;
10522     }
10523
10524     if (is_fp) {
10525         /* low bit of size indicates single/double */
10526         size = extract32(size, 0, 1) ? 3 : 2;
10527         if (size == 2) {
10528             index = h << 1 | l;
10529         } else {
10530             if (l || !is_q) {
10531                 unallocated_encoding(s);
10532                 return;
10533             }
10534             index = h;
10535         }
10536         rm |= (m << 4);
10537     } else {
10538         switch (size) {
10539         case 1:
10540             index = h << 2 | l << 1 | m;
10541             break;
10542         case 2:
10543             index = h << 1 | l;
10544             rm |= (m << 4);
10545             break;
10546         default:
10547             unallocated_encoding(s);
10548             return;
10549         }
10550     }
10551
10552     if (!fp_access_check(s)) {
10553         return;
10554     }
10555
10556     if (is_fp) {
10557         fpst = get_fpstatus_ptr();
10558     } else {
10559         TCGV_UNUSED_PTR(fpst);
10560     }
10561
10562     if (size == 3) {
10563         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10564         int pass;
10565
10566         assert(is_fp && is_q && !is_long);
10567
10568         read_vec_element(s, tcg_idx, rm, index, MO_64);
10569
10570         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10571             TCGv_i64 tcg_op = tcg_temp_new_i64();
10572             TCGv_i64 tcg_res = tcg_temp_new_i64();
10573
10574             read_vec_element(s, tcg_op, rn, pass, MO_64);
10575
10576             switch (opcode) {
10577             case 0x5: /* FMLS */
10578                 /* As usual for ARM, separate negation for fused multiply-add */
10579                 gen_helper_vfp_negd(tcg_op, tcg_op);
10580                 /* fall through */
10581             case 0x1: /* FMLA */
10582                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10583                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10584                 break;
10585             case 0x9: /* FMUL, FMULX */
10586                 if (u) {
10587                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10588                 } else {
10589                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10590                 }
10591                 break;
10592             default:
10593                 g_assert_not_reached();
10594             }
10595
10596             write_vec_element(s, tcg_res, rd, pass, MO_64);
10597             tcg_temp_free_i64(tcg_op);
10598             tcg_temp_free_i64(tcg_res);
10599         }
10600
10601         if (is_scalar) {
10602             clear_vec_high(s, rd);
10603         }
10604
10605         tcg_temp_free_i64(tcg_idx);
10606     } else if (!is_long) {
10607         /* 32 bit floating point, or 16 or 32 bit integer.
10608          * For the 16 bit scalar case we use the usual Neon helpers and
10609          * rely on the fact that 0 op 0 == 0 with no side effects.
10610          */
10611         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10612         int pass, maxpasses;
10613
10614         if (is_scalar) {
10615             maxpasses = 1;
10616         } else {
10617             maxpasses = is_q ? 4 : 2;
10618         }
10619
10620         read_vec_element_i32(s, tcg_idx, rm, index, size);
10621
10622         if (size == 1 && !is_scalar) {
10623             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10624              * the index into both halves of the 32 bit tcg_idx and then use
10625              * the usual Neon helpers.
10626              */
10627             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10628         }
10629
10630         for (pass = 0; pass < maxpasses; pass++) {
10631             TCGv_i32 tcg_op = tcg_temp_new_i32();
10632             TCGv_i32 tcg_res = tcg_temp_new_i32();
10633
10634             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10635
10636             switch (opcode) {
10637             case 0x0: /* MLA */
10638             case 0x4: /* MLS */
10639             case 0x8: /* MUL */
10640             {
10641                 static NeonGenTwoOpFn * const fns[2][2] = {
10642                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10643                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10644                 };
10645                 NeonGenTwoOpFn *genfn;
10646                 bool is_sub = opcode == 0x4;
10647
10648                 if (size == 1) {
10649                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10650                 } else {
10651                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10652                 }
10653                 if (opcode == 0x8) {
10654                     break;
10655                 }
10656                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10657                 genfn = fns[size - 1][is_sub];
10658                 genfn(tcg_res, tcg_op, tcg_res);
10659                 break;
10660             }
10661             case 0x5: /* FMLS */
10662                 /* As usual for ARM, separate negation for fused multiply-add */
10663                 gen_helper_vfp_negs(tcg_op, tcg_op);
10664                 /* fall through */
10665             case 0x1: /* FMLA */
10666                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10667                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10668                 break;
10669             case 0x9: /* FMUL, FMULX */
10670                 if (u) {
10671                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10672                 } else {
10673                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10674                 }
10675                 break;
10676             case 0xc: /* SQDMULH */
10677                 if (size == 1) {
10678                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10679                                                tcg_op, tcg_idx);
10680                 } else {
10681                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10682                                                tcg_op, tcg_idx);
10683                 }
10684                 break;
10685             case 0xd: /* SQRDMULH */
10686                 if (size == 1) {
10687                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10688                                                 tcg_op, tcg_idx);
10689                 } else {
10690                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10691                                                 tcg_op, tcg_idx);
10692                 }
10693                 break;
10694             default:
10695                 g_assert_not_reached();
10696             }
10697
10698             if (is_scalar) {
10699                 write_fp_sreg(s, rd, tcg_res);
10700             } else {
10701                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10702             }
10703
10704             tcg_temp_free_i32(tcg_op);
10705             tcg_temp_free_i32(tcg_res);
10706         }
10707
10708         tcg_temp_free_i32(tcg_idx);
10709
10710         if (!is_q) {
10711             clear_vec_high(s, rd);
10712         }
10713     } else {
10714         /* long ops: 16x16->32 or 32x32->64 */
10715         TCGv_i64 tcg_res[2];
10716         int pass;
10717         bool satop = extract32(opcode, 0, 1);
10718         TCGMemOp memop = MO_32;
10719
10720         if (satop || !u) {
10721             memop |= MO_SIGN;
10722         }
10723
10724         if (size == 2) {
10725             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10726
10727             read_vec_element(s, tcg_idx, rm, index, memop);
10728
10729             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10730                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10731                 TCGv_i64 tcg_passres;
10732                 int passelt;
10733
10734                 if (is_scalar) {
10735                     passelt = 0;
10736                 } else {
10737                     passelt = pass + (is_q * 2);
10738                 }
10739
10740                 read_vec_element(s, tcg_op, rn, passelt, memop);
10741
10742                 tcg_res[pass] = tcg_temp_new_i64();
10743
10744                 if (opcode == 0xa || opcode == 0xb) {
10745                     /* Non-accumulating ops */
10746                     tcg_passres = tcg_res[pass];
10747                 } else {
10748                     tcg_passres = tcg_temp_new_i64();
10749                 }
10750
10751                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10752                 tcg_temp_free_i64(tcg_op);
10753
10754                 if (satop) {
10755                     /* saturating, doubling */
10756                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10757                                                       tcg_passres, tcg_passres);
10758                 }
10759
10760                 if (opcode == 0xa || opcode == 0xb) {
10761                     continue;
10762                 }
10763
10764                 /* Accumulating op: handle accumulate step */
10765                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10766
10767                 switch (opcode) {
10768                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10769                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10770                     break;
10771                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10772                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10773                     break;
10774                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10775                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10776                     /* fall through */
10777                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10778                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10779                                                       tcg_res[pass],
10780                                                       tcg_passres);
10781                     break;
10782                 default:
10783                     g_assert_not_reached();
10784                 }
10785                 tcg_temp_free_i64(tcg_passres);
10786             }
10787             tcg_temp_free_i64(tcg_idx);
10788
10789             if (is_scalar) {
10790                 clear_vec_high(s, rd);
10791             }
10792         } else {
10793             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10794
10795             assert(size == 1);
10796             read_vec_element_i32(s, tcg_idx, rm, index, size);
10797
10798             if (!is_scalar) {
10799                 /* The simplest way to handle the 16x16 indexed ops is to
10800                  * duplicate the index into both halves of the 32 bit tcg_idx
10801                  * and then use the usual Neon helpers.
10802                  */
10803                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10804             }
10805
10806             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10807                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10808                 TCGv_i64 tcg_passres;
10809
10810                 if (is_scalar) {
10811                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10812                 } else {
10813                     read_vec_element_i32(s, tcg_op, rn,
10814                                          pass + (is_q * 2), MO_32);
10815                 }
10816
10817                 tcg_res[pass] = tcg_temp_new_i64();
10818
10819                 if (opcode == 0xa || opcode == 0xb) {
10820                     /* Non-accumulating ops */
10821                     tcg_passres = tcg_res[pass];
10822                 } else {
10823                     tcg_passres = tcg_temp_new_i64();
10824                 }
10825
10826                 if (memop & MO_SIGN) {
10827                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10828                 } else {
10829                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10830                 }
10831                 if (satop) {
10832                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10833                                                       tcg_passres, tcg_passres);
10834                 }
10835                 tcg_temp_free_i32(tcg_op);
10836
10837                 if (opcode == 0xa || opcode == 0xb) {
10838                     continue;
10839                 }
10840
10841                 /* Accumulating op: handle accumulate step */
10842                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10843
10844                 switch (opcode) {
10845                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10846                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10847                                              tcg_passres);
10848                     break;
10849                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10850                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10851                                              tcg_passres);
10852                     break;
10853                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10854                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10855                     /* fall through */
10856                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10857                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10858                                                       tcg_res[pass],
10859                                                       tcg_passres);
10860                     break;
10861                 default:
10862                     g_assert_not_reached();
10863                 }
10864                 tcg_temp_free_i64(tcg_passres);
10865             }
10866             tcg_temp_free_i32(tcg_idx);
10867
10868             if (is_scalar) {
10869                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10870             }
10871         }
10872
10873         if (is_scalar) {
10874             tcg_res[1] = tcg_const_i64(0);
10875         }
10876
10877         for (pass = 0; pass < 2; pass++) {
10878             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10879             tcg_temp_free_i64(tcg_res[pass]);
10880         }
10881     }
10882
10883     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10884         tcg_temp_free_ptr(fpst);
10885     }
10886 }
10887
10888 /* C3.6.19 Crypto AES
10889  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10890  * +-----------------+------+-----------+--------+-----+------+------+
10891  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10892  * +-----------------+------+-----------+--------+-----+------+------+
10893  */
10894 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10895 {
10896     int size = extract32(insn, 22, 2);
10897     int opcode = extract32(insn, 12, 5);
10898     int rn = extract32(insn, 5, 5);
10899     int rd = extract32(insn, 0, 5);
10900     int decrypt;
10901     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10902     CryptoThreeOpEnvFn *genfn;
10903
10904     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10905         || size != 0) {
10906         unallocated_encoding(s);
10907         return;
10908     }
10909
10910     switch (opcode) {
10911     case 0x4: /* AESE */
10912         decrypt = 0;
10913         genfn = gen_helper_crypto_aese;
10914         break;
10915     case 0x6: /* AESMC */
10916         decrypt = 0;
10917         genfn = gen_helper_crypto_aesmc;
10918         break;
10919     case 0x5: /* AESD */
10920         decrypt = 1;
10921         genfn = gen_helper_crypto_aese;
10922         break;
10923     case 0x7: /* AESIMC */
10924         decrypt = 1;
10925         genfn = gen_helper_crypto_aesmc;
10926         break;
10927     default:
10928         unallocated_encoding(s);
10929         return;
10930     }
10931
10932     /* Note that we convert the Vx register indexes into the
10933      * index within the vfp.regs[] array, so we can share the
10934      * helper with the AArch32 instructions.
10935      */
10936     tcg_rd_regno = tcg_const_i32(rd << 1);
10937     tcg_rn_regno = tcg_const_i32(rn << 1);
10938     tcg_decrypt = tcg_const_i32(decrypt);
10939
10940     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10941
10942     tcg_temp_free_i32(tcg_rd_regno);
10943     tcg_temp_free_i32(tcg_rn_regno);
10944     tcg_temp_free_i32(tcg_decrypt);
10945 }
10946
10947 /* C3.6.20 Crypto three-reg SHA
10948  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10949  * +-----------------+------+---+------+---+--------+-----+------+------+
10950  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10951  * +-----------------+------+---+------+---+--------+-----+------+------+
10952  */
10953 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10954 {
10955     int size = extract32(insn, 22, 2);
10956     int opcode = extract32(insn, 12, 3);
10957     int rm = extract32(insn, 16, 5);
10958     int rn = extract32(insn, 5, 5);
10959     int rd = extract32(insn, 0, 5);
10960     CryptoThreeOpEnvFn *genfn;
10961     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10962     int feature = ARM_FEATURE_V8_SHA256;
10963
10964     if (size != 0) {
10965         unallocated_encoding(s);
10966         return;
10967     }
10968
10969     switch (opcode) {
10970     case 0: /* SHA1C */
10971     case 1: /* SHA1P */
10972     case 2: /* SHA1M */
10973     case 3: /* SHA1SU0 */
10974         genfn = NULL;
10975         feature = ARM_FEATURE_V8_SHA1;
10976         break;
10977     case 4: /* SHA256H */
10978         genfn = gen_helper_crypto_sha256h;
10979         break;
10980     case 5: /* SHA256H2 */
10981         genfn = gen_helper_crypto_sha256h2;
10982         break;
10983     case 6: /* SHA256SU1 */
10984         genfn = gen_helper_crypto_sha256su1;
10985         break;
10986     default:
10987         unallocated_encoding(s);
10988         return;
10989     }
10990
10991     if (!arm_dc_feature(s, feature)) {
10992         unallocated_encoding(s);
10993         return;
10994     }
10995
10996     tcg_rd_regno = tcg_const_i32(rd << 1);
10997     tcg_rn_regno = tcg_const_i32(rn << 1);
10998     tcg_rm_regno = tcg_const_i32(rm << 1);
10999
11000     if (genfn) {
11001         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
11002     } else {
11003         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
11004
11005         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
11006                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
11007         tcg_temp_free_i32(tcg_opcode);
11008     }
11009
11010     tcg_temp_free_i32(tcg_rd_regno);
11011     tcg_temp_free_i32(tcg_rn_regno);
11012     tcg_temp_free_i32(tcg_rm_regno);
11013 }
11014
11015 /* C3.6.21 Crypto two-reg SHA
11016  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
11017  * +-----------------+------+-----------+--------+-----+------+------+
11018  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11019  * +-----------------+------+-----------+--------+-----+------+------+
11020  */
11021 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
11022 {
11023     int size = extract32(insn, 22, 2);
11024     int opcode = extract32(insn, 12, 5);
11025     int rn = extract32(insn, 5, 5);
11026     int rd = extract32(insn, 0, 5);
11027     CryptoTwoOpEnvFn *genfn;
11028     int feature;
11029     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
11030
11031     if (size != 0) {
11032         unallocated_encoding(s);
11033         return;
11034     }
11035
11036     switch (opcode) {
11037     case 0: /* SHA1H */
11038         feature = ARM_FEATURE_V8_SHA1;
11039         genfn = gen_helper_crypto_sha1h;
11040         break;
11041     case 1: /* SHA1SU1 */
11042         feature = ARM_FEATURE_V8_SHA1;
11043         genfn = gen_helper_crypto_sha1su1;
11044         break;
11045     case 2: /* SHA256SU0 */
11046         feature = ARM_FEATURE_V8_SHA256;
11047         genfn = gen_helper_crypto_sha256su0;
11048         break;
11049     default:
11050         unallocated_encoding(s);
11051         return;
11052     }
11053
11054     if (!arm_dc_feature(s, feature)) {
11055         unallocated_encoding(s);
11056         return;
11057     }
11058
11059     tcg_rd_regno = tcg_const_i32(rd << 1);
11060     tcg_rn_regno = tcg_const_i32(rn << 1);
11061
11062     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
11063
11064     tcg_temp_free_i32(tcg_rd_regno);
11065     tcg_temp_free_i32(tcg_rn_regno);
11066 }
11067
11068 /* C3.6 Data processing - SIMD, inc Crypto
11069  *
11070  * As the decode gets a little complex we are using a table based
11071  * approach for this part of the decode.
11072  */
11073 static const AArch64DecodeTable data_proc_simd[] = {
11074     /* pattern  ,  mask     ,  fn                        */
11075     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11076     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11077     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11078     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11079     { 0x0e000400, 0x9fe08400, disas_simd_copy },
11080     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11081     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11082     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11083     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11084     { 0x0e000000, 0xbf208c00, disas_simd_tb },
11085     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11086     { 0x2e000000, 0xbf208400, disas_simd_ext },
11087     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11088     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11089     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11090     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11091     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11092     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11093     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11094     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11095     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11096     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11097     { 0x00000000, 0x00000000, NULL }
11098 };
11099
11100 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11101 {
11102     /* Note that this is called with all non-FP cases from
11103      * table C3-6 so it must UNDEF for entries not specifically
11104      * allocated to instructions in that table.
11105      */
11106     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11107     if (fn) {
11108         fn(s, insn);
11109     } else {
11110         unallocated_encoding(s);
11111     }
11112 }
11113
11114 /* C3.6 Data processing - SIMD and floating point */
11115 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11116 {
11117     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11118         disas_data_proc_fp(s, insn);
11119     } else {
11120         /* SIMD, including crypto */
11121         disas_data_proc_simd(s, insn);
11122     }
11123 }
11124
11125 /* C3.1 A64 instruction index by encoding */
11126 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11127 {
11128     uint32_t insn;
11129
11130     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11131     s->insn = insn;
11132     s->pc += 4;
11133
11134     s->fp_access_checked = false;
11135
11136     switch (extract32(insn, 25, 4)) {
11137     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11138         unallocated_encoding(s);
11139         break;
11140     case 0x8: case 0x9: /* Data processing - immediate */
11141         disas_data_proc_imm(s, insn);
11142         break;
11143     case 0xa: case 0xb: /* Branch, exception generation and system insns */
11144         disas_b_exc_sys(s, insn);
11145         break;
11146     case 0x4:
11147     case 0x6:
11148     case 0xc:
11149     case 0xe:      /* Loads and stores */
11150         disas_ldst(s, insn);
11151         break;
11152     case 0x5:
11153     case 0xd:      /* Data processing - register */
11154         disas_data_proc_reg(s, insn);
11155         break;
11156     case 0x7:
11157     case 0xf:      /* Data processing - SIMD and floating point */
11158         disas_data_proc_simd_fp(s, insn);
11159         break;
11160     default:
11161         assert(FALSE); /* all 15 cases should be handled above */
11162         break;
11163     }
11164
11165     /* if we allocated any temporaries, free them here */
11166     free_tmp_a64(s);
11167 }
11168
11169 void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11170 {
11171     CPUState *cs = CPU(cpu);
11172     CPUARMState *env = &cpu->env;
11173     DisasContext dc1, *dc = &dc1;
11174     target_ulong pc_start;
11175     target_ulong next_page_start;
11176     int num_insns;
11177     int max_insns;
11178
11179     pc_start = tb->pc;
11180
11181     dc->tb = tb;
11182
11183     dc->is_jmp = DISAS_NEXT;
11184     dc->pc = pc_start;
11185     dc->singlestep_enabled = cs->singlestep_enabled;
11186     dc->condjmp = 0;
11187
11188     dc->aarch64 = 1;
11189     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11190      * there is no secure EL1, so we route exceptions to EL3.
11191      */
11192     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11193                                !arm_el_is_aa64(env, 3);
11194     dc->thumb = 0;
11195     dc->sctlr_b = 0;
11196     dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11197     dc->condexec_mask = 0;
11198     dc->condexec_cond = 0;
11199     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11200     dc->tbi0 = ARM_TBFLAG_TBI0(tb->flags);
11201     dc->tbi1 = ARM_TBFLAG_TBI1(tb->flags);
11202     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11203 #if !defined(CONFIG_USER_ONLY)
11204     dc->user = (dc->current_el == 0);
11205 #endif
11206     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11207     dc->vec_len = 0;
11208     dc->vec_stride = 0;
11209     dc->cp_regs = cpu->cp_regs;
11210     dc->features = env->features;
11211
11212     /* Single step state. The code-generation logic here is:
11213      *  SS_ACTIVE == 0:
11214      *   generate code with no special handling for single-stepping (except
11215      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11216      *   this happens anyway because those changes are all system register or
11217      *   PSTATE writes).
11218      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11219      *   emit code for one insn
11220      *   emit code to clear PSTATE.SS
11221      *   emit code to generate software step exception for completed step
11222      *   end TB (as usual for having generated an exception)
11223      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11224      *   emit code to generate a software step exception
11225      *   end the TB
11226      */
11227     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11228     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11229     dc->is_ldex = false;
11230     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11231
11232     init_tmp_a64_array(dc);
11233
11234     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11235     num_insns = 0;
11236     max_insns = tb->cflags & CF_COUNT_MASK;
11237     if (max_insns == 0) {
11238         max_insns = CF_COUNT_MASK;
11239     }
11240     if (max_insns > TCG_MAX_INSNS) {
11241         max_insns = TCG_MAX_INSNS;
11242     }
11243
11244     gen_tb_start(tb);
11245
11246     tcg_clear_temp_count();
11247
11248     do {
11249         dc->insn_start_idx = tcg_op_buf_count();
11250         tcg_gen_insn_start(dc->pc, 0, 0);
11251         num_insns++;
11252
11253         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11254             CPUBreakpoint *bp;
11255             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11256                 if (bp->pc == dc->pc) {
11257                     if (bp->flags & BP_CPU) {
11258                         gen_a64_set_pc_im(dc->pc);
11259                         gen_helper_check_breakpoints(cpu_env);
11260                         /* End the TB early; it likely won't be executed */
11261                         dc->is_jmp = DISAS_UPDATE;
11262                     } else {
11263                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11264                         /* The address covered by the breakpoint must be
11265                            included in [tb->pc, tb->pc + tb->size) in order
11266                            to for it to be properly cleared -- thus we
11267                            increment the PC here so that the logic setting
11268                            tb->size below does the right thing.  */
11269                         dc->pc += 4;
11270                         goto done_generating;
11271                     }
11272                     break;
11273                 }
11274             }
11275         }
11276
11277         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11278             gen_io_start();
11279         }
11280
11281         if (dc->ss_active && !dc->pstate_ss) {
11282             /* Singlestep state is Active-pending.
11283              * If we're in this state at the start of a TB then either
11284              *  a) we just took an exception to an EL which is being debugged
11285              *     and this is the first insn in the exception handler
11286              *  b) debug exceptions were masked and we just unmasked them
11287              *     without changing EL (eg by clearing PSTATE.D)
11288              * In either case we're going to take a swstep exception in the
11289              * "did not step an insn" case, and so the syndrome ISV and EX
11290              * bits should be zero.
11291              */
11292             assert(num_insns == 1);
11293             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11294                           default_exception_el(dc));
11295             dc->is_jmp = DISAS_EXC;
11296             break;
11297         }
11298
11299         disas_a64_insn(env, dc);
11300
11301         if (tcg_check_temp_count()) {
11302             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11303                     dc->pc);
11304         }
11305
11306         /* Translation stops when a conditional branch is encountered.
11307          * Otherwise the subsequent code could get translated several times.
11308          * Also stop translation when a page boundary is reached.  This
11309          * ensures prefetch aborts occur at the right place.
11310          */
11311     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11312              !cs->singlestep_enabled &&
11313              !singlestep &&
11314              !dc->ss_active &&
11315              dc->pc < next_page_start &&
11316              num_insns < max_insns);
11317
11318     if (tb->cflags & CF_LAST_IO) {
11319         gen_io_end();
11320     }
11321
11322     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11323         && dc->is_jmp != DISAS_EXC) {
11324         /* Note that this means single stepping WFI doesn't halt the CPU.
11325          * For conditional branch insns this is harmless unreachable code as
11326          * gen_goto_tb() has already handled emitting the debug exception
11327          * (and thus a tb-jump is not possible when singlestepping).
11328          */
11329         assert(dc->is_jmp != DISAS_TB_JUMP);
11330         if (dc->is_jmp != DISAS_JUMP) {
11331             gen_a64_set_pc_im(dc->pc);
11332         }
11333         if (cs->singlestep_enabled) {
11334             gen_exception_internal(EXCP_DEBUG);
11335         } else {
11336             gen_step_complete_exception(dc);
11337         }
11338     } else {
11339         switch (dc->is_jmp) {
11340         case DISAS_NEXT:
11341             gen_goto_tb(dc, 1, dc->pc);
11342             break;
11343         default:
11344         case DISAS_UPDATE:
11345             gen_a64_set_pc_im(dc->pc);
11346             /* fall through */
11347         case DISAS_JUMP:
11348             /* indicate that the hash table must be used to find the next TB */
11349             tcg_gen_exit_tb(0);
11350             break;
11351         case DISAS_TB_JUMP:
11352         case DISAS_EXC:
11353         case DISAS_SWI:
11354             break;
11355         case DISAS_WFE:
11356             gen_a64_set_pc_im(dc->pc);
11357             gen_helper_wfe(cpu_env);
11358             break;
11359         case DISAS_YIELD:
11360             gen_a64_set_pc_im(dc->pc);
11361             gen_helper_yield(cpu_env);
11362             break;
11363         case DISAS_WFI:
11364             /* This is a special case because we don't want to just halt the CPU
11365              * if trying to debug across a WFI.
11366              */
11367             gen_a64_set_pc_im(dc->pc);
11368             gen_helper_wfi(cpu_env);
11369             /* The helper doesn't necessarily throw an exception, but we
11370              * must go back to the main loop to check for interrupts anyway.
11371              */
11372             tcg_gen_exit_tb(0);
11373             break;
11374         }
11375     }
11376
11377 done_generating:
11378     gen_tb_end(tb, num_insns);
11379
11380 #ifdef DEBUG_DISAS
11381     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11382         qemu_log_in_addr_range(pc_start)) {
11383         qemu_log_lock();
11384         qemu_log("----------------\n");
11385         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11386         log_target_disas(cs, pc_start, dc->pc - pc_start,
11387                          4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11388         qemu_log("\n");
11389         qemu_log_unlock();
11390     }
11391 #endif
11392     tb->size = dc->pc - pc_start;
11393     tb->icount = num_insns;
11394 }