target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "qemu/log.h"
  25 #include "arm_ldst.h"
  26 #include "translate.h"
  27 #include "internals.h"
  28 #include "qemu/host-utils.h"
  29
  30 #include "exec/semihost.h"
  31 #include "exec/gen-icount.h"
  32
  33 #include "exec/helper-proto.h"
  34 #include "exec/helper-gen.h"
  35 #include "exec/log.h"
  36
  37 #include "trace-tcg.h"
  38
  39 static TCGv_i64 cpu_X[32];
  40 static TCGv_i64 cpu_pc;
  41
  42 /* Load/store exclusive handling */
  43 static TCGv_i64 cpu_exclusive_high;
  44 static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  45
  46 static const char *regnames[] = {
  47     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  48     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  49     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  50     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  51 };
  52
  53 enum a64_shift_type {
  54     A64_SHIFT_TYPE_LSL = 0,
  55     A64_SHIFT_TYPE_LSR = 1,
  56     A64_SHIFT_TYPE_ASR = 2,
  57     A64_SHIFT_TYPE_ROR = 3
  58 };
  59
  60 /* Table based decoder typedefs - used when the relevant bits for decode
  61  * are too awkwardly scattered across the instruction (eg SIMD).
  62  */
  63 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  64
  65 typedef struct AArch64DecodeTable {
  66     uint32_t pattern;
  67     uint32_t mask;
  68     AArch64DecodeFn *disas_fn;
  69 } AArch64DecodeTable;
  70
  71 /* Function prototype for gen_ functions for calling Neon helpers */
  72 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  73 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  74 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  76 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  78 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  79 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  80 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  81 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  82 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  83 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  84 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  85
  86 /* initialize TCG globals.  */
  87 void a64_translate_init(void)
  88 {
  89     int i;
  90
  91     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  92                                     offsetof(CPUARMState, pc),
  93                                     "pc");
  94     for (i = 0; i < 32; i++) {
  95         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  96                                           offsetof(CPUARMState, xregs[i]),
  97                                           regnames[i]);
  98     }
  99
 100     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 101         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 102 }
 103
 104 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 105 {
 106     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 107      *  if EL1, access as if EL0; otherwise access at current EL
 108      */
 109     switch (s->mmu_idx) {
 110     case ARMMMUIdx_S12NSE1:
 111         return ARMMMUIdx_S12NSE0;
 112     case ARMMMUIdx_S1SE1:
 113         return ARMMMUIdx_S1SE0;
 114     case ARMMMUIdx_S2NS:
 115         g_assert_not_reached();
 116     default:
 117         return s->mmu_idx;
 118     }
 119 }
 120
 121 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 122                             fprintf_function cpu_fprintf, int flags)
 123 {
 124     ARMCPU *cpu = ARM_CPU(cs);
 125     CPUARMState *env = &cpu->env;
 126     uint32_t psr = pstate_read(env);
 127     int i;
 128     int el = arm_current_el(env);
 129     const char *ns_status;
 130
 131     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 132             env->pc, env->xregs[31]);
 133     for (i = 0; i < 31; i++) {
 134         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 135         if ((i % 4) == 3) {
 136             cpu_fprintf(f, "\n");
 137         } else {
 138             cpu_fprintf(f, " ");
 139         }
 140     }
 141
 142     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 143         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 144     } else {
 145         ns_status = "";
 146     }
 147
 148     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 149                 psr,
 150                 psr & PSTATE_N ? 'N' : '-',
 151                 psr & PSTATE_Z ? 'Z' : '-',
 152                 psr & PSTATE_C ? 'C' : '-',
 153                 psr & PSTATE_V ? 'V' : '-',
 154                 ns_status,
 155                 el,
 156                 psr & PSTATE_SP ? 'h' : 't');
 157
 158     if (flags & CPU_DUMP_FPU) {
 159         int numvfpregs = 32;
 160         for (i = 0; i < numvfpregs; i += 2) {
 161             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 162             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 163             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 164                         i, vhi, vlo);
 165             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 166             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 167             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 168                         i + 1, vhi, vlo);
 169         }
 170         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 171                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 172     }
 173 }
 174
 175 void gen_a64_set_pc_im(uint64_t val)
 176 {
 177     tcg_gen_movi_i64(cpu_pc, val);
 178 }
 179
 180 /* Load the PC from a generic TCG variable.
 181  *
 182  * If address tagging is enabled via the TCR TBI bits, then loading
 183  * an address into the PC will clear out any tag in the it:
 184  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 185  *    then the address is zero-extended, clearing bits [63:56]
 186  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 187  *    and TBI1 controls addressses with bit 55 == 1.
 188  *    If the appropriate TBI bit is set for the address then
 189  *    the address is sign-extended from bit 55 into bits [63:56]
 190  *
 191  * We can avoid doing this for relative-branches, because the
 192  * PC + offset can never overflow into the tag bits (assuming
 193  * that virtual addresses are less than 56 bits wide, as they
 194  * are currently), but we must handle it for branch-to-register.
 195  */
 196 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 197 {
 198
 199     if (s->current_el <= 1) {
 200         /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 201          * examine bit 55 of address, can just generate code.
 202          * If mixed, then test via generated code
 203          */
 204         if (s->tbi0 && s->tbi1) {
 205             TCGv_i64 tmp_reg = tcg_temp_new_i64();
 206             /* Both bits set, sign extension from bit 55 into [63:56] will
 207              * cover both cases
 208              */
 209             tcg_gen_shli_i64(tmp_reg, src, 8);
 210             tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 211             tcg_temp_free_i64(tmp_reg);
 212         } else if (!s->tbi0 && !s->tbi1) {
 213             /* Neither bit set, just load it as-is */
 214             tcg_gen_mov_i64(cpu_pc, src);
 215         } else {
 216             TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 217             TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 218             TCGv_i64 tcg_zero   = tcg_const_i64(0);
 219
 220             tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 221
 222             if (s->tbi0) {
 223                 /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 224                 tcg_gen_andi_i64(tcg_tmpval, src,
 225                                  0x00FFFFFFFFFFFFFFull);
 226                 tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 227                                     tcg_tmpval, src);
 228             } else {
 229                 /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 230                 tcg_gen_ori_i64(tcg_tmpval, src,
 231                                 0xFF00000000000000ull);
 232                 tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 233                                     tcg_tmpval, src);
 234             }
 235             tcg_temp_free_i64(tcg_zero);
 236             tcg_temp_free_i64(tcg_bit55);
 237             tcg_temp_free_i64(tcg_tmpval);
 238         }
 239     } else {  /* EL > 1 */
 240         if (s->tbi0) {
 241             /* Force tag byte to all zero */
 242             tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 243         } else {
 244             /* Load unmodified address */
 245             tcg_gen_mov_i64(cpu_pc, src);
 246         }
 247     }
 248 }
 249
 250 typedef struct DisasCompare64 {
 251     TCGCond cond;
 252     TCGv_i64 value;
 253 } DisasCompare64;
 254
 255 static void a64_test_cc(DisasCompare64 *c64, int cc)
 256 {
 257     DisasCompare c32;
 258
 259     arm_test_cc(&c32, cc);
 260
 261     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 262        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 263     c64->cond = c32.cond;
 264     c64->value = tcg_temp_new_i64();
 265     tcg_gen_ext_i32_i64(c64->value, c32.value);
 266
 267     arm_free_cc(&c32);
 268 }
 269
 270 static void a64_free_cc(DisasCompare64 *c64)
 271 {
 272     tcg_temp_free_i64(c64->value);
 273 }
 274
 275 static void gen_exception_internal(int excp)
 276 {
 277     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 278
 279     assert(excp_is_internal(excp));
 280     gen_helper_exception_internal(cpu_env, tcg_excp);
 281     tcg_temp_free_i32(tcg_excp);
 282 }
 283
 284 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 285 {
 286     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 287     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 288     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 289
 290     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 291                                        tcg_syn, tcg_el);
 292     tcg_temp_free_i32(tcg_el);
 293     tcg_temp_free_i32(tcg_syn);
 294     tcg_temp_free_i32(tcg_excp);
 295 }
 296
 297 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 298 {
 299     gen_a64_set_pc_im(s->pc - offset);
 300     gen_exception_internal(excp);
 301     s->is_jmp = DISAS_EXC;
 302 }
 303
 304 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 305                                uint32_t syndrome, uint32_t target_el)
 306 {
 307     gen_a64_set_pc_im(s->pc - offset);
 308     gen_exception(excp, syndrome, target_el);
 309     s->is_jmp = DISAS_EXC;
 310 }
 311
 312 static void gen_ss_advance(DisasContext *s)
 313 {
 314     /* If the singlestep state is Active-not-pending, advance to
 315      * Active-pending.
 316      */
 317     if (s->ss_active) {
 318         s->pstate_ss = 0;
 319         gen_helper_clear_pstate_ss(cpu_env);
 320     }
 321 }
 322
 323 static void gen_step_complete_exception(DisasContext *s)
 324 {
 325     /* We just completed step of an insn. Move from Active-not-pending
 326      * to Active-pending, and then also take the swstep exception.
 327      * This corresponds to making the (IMPDEF) choice to prioritize
 328      * swstep exceptions over asynchronous exceptions taken to an exception
 329      * level where debug is disabled. This choice has the advantage that
 330      * we do not need to maintain internal state corresponding to the
 331      * ISV/EX syndrome bits between completion of the step and generation
 332      * of the exception, and our syndrome information is always correct.
 333      */
 334     gen_ss_advance(s);
 335     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 336                   default_exception_el(s));
 337     s->is_jmp = DISAS_EXC;
 338 }
 339
 340 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 341 {
 342     /* No direct tb linking with singlestep (either QEMU's or the ARM
 343      * debug architecture kind) or deterministic io
 344      */
 345     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 346         return false;
 347     }
 348
 349 #ifndef CONFIG_USER_ONLY
 350     /* Only link tbs from inside the same guest page */
 351     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 352         return false;
 353     }
 354 #endif
 355
 356     return true;
 357 }
 358
 359 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 360 {
 361     TranslationBlock *tb;
 362
 363     tb = s->tb;
 364     if (use_goto_tb(s, n, dest)) {
 365         tcg_gen_goto_tb(n);
 366         gen_a64_set_pc_im(dest);
 367         tcg_gen_exit_tb((intptr_t)tb + n);
 368         s->is_jmp = DISAS_TB_JUMP;
 369     } else {
 370         gen_a64_set_pc_im(dest);
 371         if (s->ss_active) {
 372             gen_step_complete_exception(s);
 373         } else if (s->singlestep_enabled) {
 374             gen_exception_internal(EXCP_DEBUG);
 375         } else {
 376             tcg_gen_exit_tb(0);
 377             s->is_jmp = DISAS_TB_JUMP;
 378         }
 379     }
 380 }
 381
 382 static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
 383 {
 384     /* We don't need to save all of the syndrome so we mask and shift
 385      * out uneeded bits to help the sleb128 encoder do a better job.
 386      */
 387     syn &= ARM_INSN_START_WORD2_MASK;
 388     syn >>= ARM_INSN_START_WORD2_SHIFT;
 389
 390     /* We check and clear insn_start_idx to catch multiple updates.  */
 391     assert(s->insn_start_idx != 0);
 392     tcg_set_insn_param(s->insn_start_idx, 2, syn);
 393     s->insn_start_idx = 0;
 394 }
 395
 396 static void unallocated_encoding(DisasContext *s)
 397 {
 398     /* Unallocated and reserved encodings are uncategorized */
 399     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 400                        default_exception_el(s));
 401 }
 402
 403 #define unsupported_encoding(s, insn)                                    \
 404     do {                                                                 \
 405         qemu_log_mask(LOG_UNIMP,                                         \
 406                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 407                       "at pc=%016" PRIx64 "\n",                          \
 408                       __FILE__, __LINE__, insn, s->pc - 4);              \
 409         unallocated_encoding(s);                                         \
 410     } while (0);
 411
 412 static void init_tmp_a64_array(DisasContext *s)
 413 {
 414 #ifdef CONFIG_DEBUG_TCG
 415     int i;
 416     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 417         TCGV_UNUSED_I64(s->tmp_a64[i]);
 418     }
 419 #endif
 420     s->tmp_a64_count = 0;
 421 }
 422
 423 static void free_tmp_a64(DisasContext *s)
 424 {
 425     int i;
 426     for (i = 0; i < s->tmp_a64_count; i++) {
 427         tcg_temp_free_i64(s->tmp_a64[i]);
 428     }
 429     init_tmp_a64_array(s);
 430 }
 431
 432 static TCGv_i64 new_tmp_a64(DisasContext *s)
 433 {
 434     assert(s->tmp_a64_count < TMP_A64_MAX);
 435     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 436 }
 437
 438 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 439 {
 440     TCGv_i64 t = new_tmp_a64(s);
 441     tcg_gen_movi_i64(t, 0);
 442     return t;
 443 }
 444
 445 /*
 446  * Register access functions
 447  *
 448  * These functions are used for directly accessing a register in where
 449  * changes to the final register value are likely to be made. If you
 450  * need to use a register for temporary calculation (e.g. index type
 451  * operations) use the read_* form.
 452  *
 453  * B1.2.1 Register mappings
 454  *
 455  * In instruction register encoding 31 can refer to ZR (zero register) or
 456  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 457  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 458  * This is the point of the _sp forms.
 459  */
 460 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 461 {
 462     if (reg == 31) {
 463         return new_tmp_a64_zero(s);
 464     } else {
 465         return cpu_X[reg];
 466     }
 467 }
 468
 469 /* register access for when 31 == SP */
 470 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 471 {
 472     return cpu_X[reg];
 473 }
 474
 475 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 476  * representing the register contents. This TCGv is an auto-freed
 477  * temporary so it need not be explicitly freed, and may be modified.
 478  */
 479 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 480 {
 481     TCGv_i64 v = new_tmp_a64(s);
 482     if (reg != 31) {
 483         if (sf) {
 484             tcg_gen_mov_i64(v, cpu_X[reg]);
 485         } else {
 486             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 487         }
 488     } else {
 489         tcg_gen_movi_i64(v, 0);
 490     }
 491     return v;
 492 }
 493
 494 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 495 {
 496     TCGv_i64 v = new_tmp_a64(s);
 497     if (sf) {
 498         tcg_gen_mov_i64(v, cpu_X[reg]);
 499     } else {
 500         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 501     }
 502     return v;
 503 }
 504
 505 /* We should have at some point before trying to access an FP register
 506  * done the necessary access check, so assert that
 507  * (a) we did the check and
 508  * (b) we didn't then just plough ahead anyway if it failed.
 509  * Print the instruction pattern in the abort message so we can figure
 510  * out what we need to fix if a user encounters this problem in the wild.
 511  */
 512 static inline void assert_fp_access_checked(DisasContext *s)
 513 {
 514 #ifdef CONFIG_DEBUG_TCG
 515     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 516         fprintf(stderr, "target-arm: FP access check missing for "
 517                 "instruction 0x%08x\n", s->insn);
 518         abort();
 519     }
 520 #endif
 521 }
 522
 523 /* Return the offset into CPUARMState of an element of specified
 524  * size, 'element' places in from the least significant end of
 525  * the FP/vector register Qn.
 526  */
 527 static inline int vec_reg_offset(DisasContext *s, int regno,
 528                                  int element, TCGMemOp size)
 529 {
 530     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 531 #ifdef HOST_WORDS_BIGENDIAN
 532     /* This is complicated slightly because vfp.regs[2n] is
 533      * still the low half and  vfp.regs[2n+1] the high half
 534      * of the 128 bit vector, even on big endian systems.
 535      * Calculate the offset assuming a fully bigendian 128 bits,
 536      * then XOR to account for the order of the two 64 bit halves.
 537      */
 538     offs += (16 - ((element + 1) * (1 << size)));
 539     offs ^= 8;
 540 #else
 541     offs += element * (1 << size);
 542 #endif
 543     assert_fp_access_checked(s);
 544     return offs;
 545 }
 546
 547 /* Return the offset into CPUARMState of a slice (from
 548  * the least significant end) of FP register Qn (ie
 549  * Dn, Sn, Hn or Bn).
 550  * (Note that this is not the same mapping as for A32; see cpu.h)
 551  */
 552 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 553 {
 554     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 555 #ifdef HOST_WORDS_BIGENDIAN
 556     offs += (8 - (1 << size));
 557 #endif
 558     assert_fp_access_checked(s);
 559     return offs;
 560 }
 561
 562 /* Offset of the high half of the 128 bit vector Qn */
 563 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 564 {
 565     assert_fp_access_checked(s);
 566     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 567 }
 568
 569 /* Convenience accessors for reading and writing single and double
 570  * FP registers. Writing clears the upper parts of the associated
 571  * 128 bit vector register, as required by the architecture.
 572  * Note that unlike the GP register accessors, the values returned
 573  * by the read functions must be manually freed.
 574  */
 575 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 576 {
 577     TCGv_i64 v = tcg_temp_new_i64();
 578
 579     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 580     return v;
 581 }
 582
 583 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 584 {
 585     TCGv_i32 v = tcg_temp_new_i32();
 586
 587     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 588     return v;
 589 }
 590
 591 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 592 {
 593     TCGv_i64 tcg_zero = tcg_const_i64(0);
 594
 595     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 596     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 597     tcg_temp_free_i64(tcg_zero);
 598 }
 599
 600 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 601 {
 602     TCGv_i64 tmp = tcg_temp_new_i64();
 603
 604     tcg_gen_extu_i32_i64(tmp, v);
 605     write_fp_dreg(s, reg, tmp);
 606     tcg_temp_free_i64(tmp);
 607 }
 608
 609 static TCGv_ptr get_fpstatus_ptr(void)
 610 {
 611     TCGv_ptr statusptr = tcg_temp_new_ptr();
 612     int offset;
 613
 614     /* In A64 all instructions (both FP and Neon) use the FPCR;
 615      * there is no equivalent of the A32 Neon "standard FPSCR value"
 616      * and all operations use vfp.fp_status.
 617      */
 618     offset = offsetof(CPUARMState, vfp.fp_status);
 619     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 620     return statusptr;
 621 }
 622
 623 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 624  * than the 32 bit equivalent.
 625  */
 626 static inline void gen_set_NZ64(TCGv_i64 result)
 627 {
 628     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 629     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 630 }
 631
 632 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 633 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 634 {
 635     if (sf) {
 636         gen_set_NZ64(result);
 637     } else {
 638         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 639         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 640     }
 641     tcg_gen_movi_i32(cpu_CF, 0);
 642     tcg_gen_movi_i32(cpu_VF, 0);
 643 }
 644
 645 /* dest = T0 + T1; compute C, N, V and Z flags */
 646 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 647 {
 648     if (sf) {
 649         TCGv_i64 result, flag, tmp;
 650         result = tcg_temp_new_i64();
 651         flag = tcg_temp_new_i64();
 652         tmp = tcg_temp_new_i64();
 653
 654         tcg_gen_movi_i64(tmp, 0);
 655         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 656
 657         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 658
 659         gen_set_NZ64(result);
 660
 661         tcg_gen_xor_i64(flag, result, t0);
 662         tcg_gen_xor_i64(tmp, t0, t1);
 663         tcg_gen_andc_i64(flag, flag, tmp);
 664         tcg_temp_free_i64(tmp);
 665         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 666
 667         tcg_gen_mov_i64(dest, result);
 668         tcg_temp_free_i64(result);
 669         tcg_temp_free_i64(flag);
 670     } else {
 671         /* 32 bit arithmetic */
 672         TCGv_i32 t0_32 = tcg_temp_new_i32();
 673         TCGv_i32 t1_32 = tcg_temp_new_i32();
 674         TCGv_i32 tmp = tcg_temp_new_i32();
 675
 676         tcg_gen_movi_i32(tmp, 0);
 677         tcg_gen_extrl_i64_i32(t0_32, t0);
 678         tcg_gen_extrl_i64_i32(t1_32, t1);
 679         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 680         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 681         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 682         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 683         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 684         tcg_gen_extu_i32_i64(dest, cpu_NF);
 685
 686         tcg_temp_free_i32(tmp);
 687         tcg_temp_free_i32(t0_32);
 688         tcg_temp_free_i32(t1_32);
 689     }
 690 }
 691
 692 /* dest = T0 - T1; compute C, N, V and Z flags */
 693 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 694 {
 695     if (sf) {
 696         /* 64 bit arithmetic */
 697         TCGv_i64 result, flag, tmp;
 698
 699         result = tcg_temp_new_i64();
 700         flag = tcg_temp_new_i64();
 701         tcg_gen_sub_i64(result, t0, t1);
 702
 703         gen_set_NZ64(result);
 704
 705         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 706         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 707
 708         tcg_gen_xor_i64(flag, result, t0);
 709         tmp = tcg_temp_new_i64();
 710         tcg_gen_xor_i64(tmp, t0, t1);
 711         tcg_gen_and_i64(flag, flag, tmp);
 712         tcg_temp_free_i64(tmp);
 713         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 714         tcg_gen_mov_i64(dest, result);
 715         tcg_temp_free_i64(flag);
 716         tcg_temp_free_i64(result);
 717     } else {
 718         /* 32 bit arithmetic */
 719         TCGv_i32 t0_32 = tcg_temp_new_i32();
 720         TCGv_i32 t1_32 = tcg_temp_new_i32();
 721         TCGv_i32 tmp;
 722
 723         tcg_gen_extrl_i64_i32(t0_32, t0);
 724         tcg_gen_extrl_i64_i32(t1_32, t1);
 725         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 726         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 727         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 728         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 729         tmp = tcg_temp_new_i32();
 730         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 731         tcg_temp_free_i32(t0_32);
 732         tcg_temp_free_i32(t1_32);
 733         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 734         tcg_temp_free_i32(tmp);
 735         tcg_gen_extu_i32_i64(dest, cpu_NF);
 736     }
 737 }
 738
 739 /* dest = T0 + T1 + CF; do not compute flags. */
 740 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 741 {
 742     TCGv_i64 flag = tcg_temp_new_i64();
 743     tcg_gen_extu_i32_i64(flag, cpu_CF);
 744     tcg_gen_add_i64(dest, t0, t1);
 745     tcg_gen_add_i64(dest, dest, flag);
 746     tcg_temp_free_i64(flag);
 747
 748     if (!sf) {
 749         tcg_gen_ext32u_i64(dest, dest);
 750     }
 751 }
 752
 753 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 754 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 755 {
 756     if (sf) {
 757         TCGv_i64 result, cf_64, vf_64, tmp;
 758         result = tcg_temp_new_i64();
 759         cf_64 = tcg_temp_new_i64();
 760         vf_64 = tcg_temp_new_i64();
 761         tmp = tcg_const_i64(0);
 762
 763         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 764         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 765         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 766         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 767         gen_set_NZ64(result);
 768
 769         tcg_gen_xor_i64(vf_64, result, t0);
 770         tcg_gen_xor_i64(tmp, t0, t1);
 771         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 772         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 773
 774         tcg_gen_mov_i64(dest, result);
 775
 776         tcg_temp_free_i64(tmp);
 777         tcg_temp_free_i64(vf_64);
 778         tcg_temp_free_i64(cf_64);
 779         tcg_temp_free_i64(result);
 780     } else {
 781         TCGv_i32 t0_32, t1_32, tmp;
 782         t0_32 = tcg_temp_new_i32();
 783         t1_32 = tcg_temp_new_i32();
 784         tmp = tcg_const_i32(0);
 785
 786         tcg_gen_extrl_i64_i32(t0_32, t0);
 787         tcg_gen_extrl_i64_i32(t1_32, t1);
 788         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 789         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 790
 791         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 792         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 793         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 794         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 795         tcg_gen_extu_i32_i64(dest, cpu_NF);
 796
 797         tcg_temp_free_i32(tmp);
 798         tcg_temp_free_i32(t1_32);
 799         tcg_temp_free_i32(t0_32);
 800     }
 801 }
 802
 803 /*
 804  * Load/Store generators
 805  */
 806
 807 /*
 808  * Store from GPR register to memory.
 809  */
 810 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 811                              TCGv_i64 tcg_addr, int size, int memidx,
 812                              bool iss_valid,
 813                              unsigned int iss_srt,
 814                              bool iss_sf, bool iss_ar)
 815 {
 816     g_assert(size <= 3);
 817     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 818
 819     if (iss_valid) {
 820         uint32_t syn;
 821
 822         syn = syn_data_abort_with_iss(0,
 823                                       size,
 824                                       false,
 825                                       iss_srt,
 826                                       iss_sf,
 827                                       iss_ar,
 828                                       0, 0, 0, 0, 0, false);
 829         disas_set_insn_syndrome(s, syn);
 830     }
 831 }
 832
 833 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 834                       TCGv_i64 tcg_addr, int size,
 835                       bool iss_valid,
 836                       unsigned int iss_srt,
 837                       bool iss_sf, bool iss_ar)
 838 {
 839     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 840                      iss_valid, iss_srt, iss_sf, iss_ar);
 841 }
 842
 843 /*
 844  * Load from memory to GPR register
 845  */
 846 static void do_gpr_ld_memidx(DisasContext *s,
 847                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 848                              int size, bool is_signed,
 849                              bool extend, int memidx,
 850                              bool iss_valid, unsigned int iss_srt,
 851                              bool iss_sf, bool iss_ar)
 852 {
 853     TCGMemOp memop = s->be_data + size;
 854
 855     g_assert(size <= 3);
 856
 857     if (is_signed) {
 858         memop += MO_SIGN;
 859     }
 860
 861     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 862
 863     if (extend && is_signed) {
 864         g_assert(size < 3);
 865         tcg_gen_ext32u_i64(dest, dest);
 866     }
 867
 868     if (iss_valid) {
 869         uint32_t syn;
 870
 871         syn = syn_data_abort_with_iss(0,
 872                                       size,
 873                                       is_signed,
 874                                       iss_srt,
 875                                       iss_sf,
 876                                       iss_ar,
 877                                       0, 0, 0, 0, 0, false);
 878         disas_set_insn_syndrome(s, syn);
 879     }
 880 }
 881
 882 static void do_gpr_ld(DisasContext *s,
 883                       TCGv_i64 dest, TCGv_i64 tcg_addr,
 884                       int size, bool is_signed, bool extend,
 885                       bool iss_valid, unsigned int iss_srt,
 886                       bool iss_sf, bool iss_ar)
 887 {
 888     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 889                      get_mem_index(s),
 890                      iss_valid, iss_srt, iss_sf, iss_ar);
 891 }
 892
 893 /*
 894  * Store from FP register to memory
 895  */
 896 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 897 {
 898     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 899     TCGv_i64 tmp = tcg_temp_new_i64();
 900     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 901     if (size < 4) {
 902         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 903                             s->be_data + size);
 904     } else {
 905         bool be = s->be_data == MO_BE;
 906         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 907
 908         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 909         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 910                             s->be_data | MO_Q);
 911         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 912         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 913                             s->be_data | MO_Q);
 914         tcg_temp_free_i64(tcg_hiaddr);
 915     }
 916
 917     tcg_temp_free_i64(tmp);
 918 }
 919
 920 /*
 921  * Load from memory to FP register
 922  */
 923 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 924 {
 925     /* This always zero-extends and writes to a full 128 bit wide vector */
 926     TCGv_i64 tmplo = tcg_temp_new_i64();
 927     TCGv_i64 tmphi;
 928
 929     if (size < 4) {
 930         TCGMemOp memop = s->be_data + size;
 931         tmphi = tcg_const_i64(0);
 932         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 933     } else {
 934         bool be = s->be_data == MO_BE;
 935         TCGv_i64 tcg_hiaddr;
 936
 937         tmphi = tcg_temp_new_i64();
 938         tcg_hiaddr = tcg_temp_new_i64();
 939
 940         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 941         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 942                             s->be_data | MO_Q);
 943         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 944                             s->be_data | MO_Q);
 945         tcg_temp_free_i64(tcg_hiaddr);
 946     }
 947
 948     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 949     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 950
 951     tcg_temp_free_i64(tmplo);
 952     tcg_temp_free_i64(tmphi);
 953 }
 954
 955 /*
 956  * Vector load/store helpers.
 957  *
 958  * The principal difference between this and a FP load is that we don't
 959  * zero extend as we are filling a partial chunk of the vector register.
 960  * These functions don't support 128 bit loads/stores, which would be
 961  * normal load/store operations.
 962  *
 963  * The _i32 versions are useful when operating on 32 bit quantities
 964  * (eg for floating point single or using Neon helper functions).
 965  */
 966
 967 /* Get value of an element within a vector register */
 968 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 969                              int element, TCGMemOp memop)
 970 {
 971     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 972     switch (memop) {
 973     case MO_8:
 974         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 975         break;
 976     case MO_16:
 977         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 978         break;
 979     case MO_32:
 980         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 981         break;
 982     case MO_8|MO_SIGN:
 983         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 984         break;
 985     case MO_16|MO_SIGN:
 986         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 987         break;
 988     case MO_32|MO_SIGN:
 989         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 990         break;
 991     case MO_64:
 992     case MO_64|MO_SIGN:
 993         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 994         break;
 995     default:
 996         g_assert_not_reached();
 997     }
 998 }
 999
1000 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1001                                  int element, TCGMemOp memop)
1002 {
1003     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1004     switch (memop) {
1005     case MO_8:
1006         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1007         break;
1008     case MO_16:
1009         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1010         break;
1011     case MO_8|MO_SIGN:
1012         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1013         break;
1014     case MO_16|MO_SIGN:
1015         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1016         break;
1017     case MO_32:
1018     case MO_32|MO_SIGN:
1019         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1020         break;
1021     default:
1022         g_assert_not_reached();
1023     }
1024 }
1025
1026 /* Set value of an element within a vector register */
1027 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1028                               int element, TCGMemOp memop)
1029 {
1030     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1031     switch (memop) {
1032     case MO_8:
1033         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1034         break;
1035     case MO_16:
1036         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1037         break;
1038     case MO_32:
1039         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1040         break;
1041     case MO_64:
1042         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1043         break;
1044     default:
1045         g_assert_not_reached();
1046     }
1047 }
1048
1049 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1050                                   int destidx, int element, TCGMemOp memop)
1051 {
1052     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1053     switch (memop) {
1054     case MO_8:
1055         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1056         break;
1057     case MO_16:
1058         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1059         break;
1060     case MO_32:
1061         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1062         break;
1063     default:
1064         g_assert_not_reached();
1065     }
1066 }
1067
1068 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
1069  * vector ops all need to do this).
1070  */
1071 static void clear_vec_high(DisasContext *s, int rd)
1072 {
1073     TCGv_i64 tcg_zero = tcg_const_i64(0);
1074
1075     write_vec_element(s, tcg_zero, rd, 1, MO_64);
1076     tcg_temp_free_i64(tcg_zero);
1077 }
1078
1079 /* Store from vector register to memory */
1080 static void do_vec_st(DisasContext *s, int srcidx, int element,
1081                       TCGv_i64 tcg_addr, int size)
1082 {
1083     TCGMemOp memop = s->be_data + size;
1084     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1085
1086     read_vec_element(s, tcg_tmp, srcidx, element, size);
1087     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1088
1089     tcg_temp_free_i64(tcg_tmp);
1090 }
1091
1092 /* Load from memory to vector register */
1093 static void do_vec_ld(DisasContext *s, int destidx, int element,
1094                       TCGv_i64 tcg_addr, int size)
1095 {
1096     TCGMemOp memop = s->be_data + size;
1097     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1098
1099     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1100     write_vec_element(s, tcg_tmp, destidx, element, size);
1101
1102     tcg_temp_free_i64(tcg_tmp);
1103 }
1104
1105 /* Check that FP/Neon access is enabled. If it is, return
1106  * true. If not, emit code to generate an appropriate exception,
1107  * and return false; the caller should not emit any code for
1108  * the instruction. Note that this check must happen after all
1109  * unallocated-encoding checks (otherwise the syndrome information
1110  * for the resulting exception will be incorrect).
1111  */
1112 static inline bool fp_access_check(DisasContext *s)
1113 {
1114     assert(!s->fp_access_checked);
1115     s->fp_access_checked = true;
1116
1117     if (!s->fp_excp_el) {
1118         return true;
1119     }
1120
1121     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1122                        s->fp_excp_el);
1123     return false;
1124 }
1125
1126 /*
1127  * This utility function is for doing register extension with an
1128  * optional shift. You will likely want to pass a temporary for the
1129  * destination register. See DecodeRegExtend() in the ARM ARM.
1130  */
1131 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1132                               int option, unsigned int shift)
1133 {
1134     int extsize = extract32(option, 0, 2);
1135     bool is_signed = extract32(option, 2, 1);
1136
1137     if (is_signed) {
1138         switch (extsize) {
1139         case 0:
1140             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1141             break;
1142         case 1:
1143             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1144             break;
1145         case 2:
1146             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1147             break;
1148         case 3:
1149             tcg_gen_mov_i64(tcg_out, tcg_in);
1150             break;
1151         }
1152     } else {
1153         switch (extsize) {
1154         case 0:
1155             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1156             break;
1157         case 1:
1158             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1159             break;
1160         case 2:
1161             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1162             break;
1163         case 3:
1164             tcg_gen_mov_i64(tcg_out, tcg_in);
1165             break;
1166         }
1167     }
1168
1169     if (shift) {
1170         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1171     }
1172 }
1173
1174 static inline void gen_check_sp_alignment(DisasContext *s)
1175 {
1176     /* The AArch64 architecture mandates that (if enabled via PSTATE
1177      * or SCTLR bits) there is a check that SP is 16-aligned on every
1178      * SP-relative load or store (with an exception generated if it is not).
1179      * In line with general QEMU practice regarding misaligned accesses,
1180      * we omit these checks for the sake of guest program performance.
1181      * This function is provided as a hook so we can more easily add these
1182      * checks in future (possibly as a "favour catching guest program bugs
1183      * over speed" user selectable option).
1184      */
1185 }
1186
1187 /*
1188  * This provides a simple table based table lookup decoder. It is
1189  * intended to be used when the relevant bits for decode are too
1190  * awkwardly placed and switch/if based logic would be confusing and
1191  * deeply nested. Since it's a linear search through the table, tables
1192  * should be kept small.
1193  *
1194  * It returns the first handler where insn & mask == pattern, or
1195  * NULL if there is no match.
1196  * The table is terminated by an empty mask (i.e. 0)
1197  */
1198 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1199                                                uint32_t insn)
1200 {
1201     const AArch64DecodeTable *tptr = table;
1202
1203     while (tptr->mask) {
1204         if ((insn & tptr->mask) == tptr->pattern) {
1205             return tptr->disas_fn;
1206         }
1207         tptr++;
1208     }
1209     return NULL;
1210 }
1211
1212 /*
1213  * the instruction disassembly implemented here matches
1214  * the instruction encoding classifications in chapter 3 (C3)
1215  * of the ARM Architecture Reference Manual (DDI0487A_a)
1216  */
1217
1218 /* C3.2.7 Unconditional branch (immediate)
1219  *   31  30       26 25                                  0
1220  * +----+-----------+-------------------------------------+
1221  * | op | 0 0 1 0 1 |                 imm26               |
1222  * +----+-----------+-------------------------------------+
1223  */
1224 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1225 {
1226     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1227
1228     if (insn & (1U << 31)) {
1229         /* C5.6.26 BL Branch with link */
1230         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1231     }
1232
1233     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1234     gen_goto_tb(s, 0, addr);
1235 }
1236
1237 /* C3.2.1 Compare & branch (immediate)
1238  *   31  30         25  24  23                  5 4      0
1239  * +----+-------------+----+---------------------+--------+
1240  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1241  * +----+-------------+----+---------------------+--------+
1242  */
1243 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1244 {
1245     unsigned int sf, op, rt;
1246     uint64_t addr;
1247     TCGLabel *label_match;
1248     TCGv_i64 tcg_cmp;
1249
1250     sf = extract32(insn, 31, 1);
1251     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1252     rt = extract32(insn, 0, 5);
1253     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1254
1255     tcg_cmp = read_cpu_reg(s, rt, sf);
1256     label_match = gen_new_label();
1257
1258     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1259                         tcg_cmp, 0, label_match);
1260
1261     gen_goto_tb(s, 0, s->pc);
1262     gen_set_label(label_match);
1263     gen_goto_tb(s, 1, addr);
1264 }
1265
1266 /* C3.2.5 Test & branch (immediate)
1267  *   31  30         25  24  23   19 18          5 4    0
1268  * +----+-------------+----+-------+-------------+------+
1269  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1270  * +----+-------------+----+-------+-------------+------+
1271  */
1272 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1273 {
1274     unsigned int bit_pos, op, rt;
1275     uint64_t addr;
1276     TCGLabel *label_match;
1277     TCGv_i64 tcg_cmp;
1278
1279     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1280     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1281     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1282     rt = extract32(insn, 0, 5);
1283
1284     tcg_cmp = tcg_temp_new_i64();
1285     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1286     label_match = gen_new_label();
1287     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1288                         tcg_cmp, 0, label_match);
1289     tcg_temp_free_i64(tcg_cmp);
1290     gen_goto_tb(s, 0, s->pc);
1291     gen_set_label(label_match);
1292     gen_goto_tb(s, 1, addr);
1293 }
1294
1295 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1296  *  31           25  24  23                  5   4  3    0
1297  * +---------------+----+---------------------+----+------+
1298  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1299  * +---------------+----+---------------------+----+------+
1300  */
1301 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1302 {
1303     unsigned int cond;
1304     uint64_t addr;
1305
1306     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1307         unallocated_encoding(s);
1308         return;
1309     }
1310     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1311     cond = extract32(insn, 0, 4);
1312
1313     if (cond < 0x0e) {
1314         /* genuinely conditional branches */
1315         TCGLabel *label_match = gen_new_label();
1316         arm_gen_test_cc(cond, label_match);
1317         gen_goto_tb(s, 0, s->pc);
1318         gen_set_label(label_match);
1319         gen_goto_tb(s, 1, addr);
1320     } else {
1321         /* 0xe and 0xf are both "always" conditions */
1322         gen_goto_tb(s, 0, addr);
1323     }
1324 }
1325
1326 /* C5.6.68 HINT */
1327 static void handle_hint(DisasContext *s, uint32_t insn,
1328                         unsigned int op1, unsigned int op2, unsigned int crm)
1329 {
1330     unsigned int selector = crm << 3 | op2;
1331
1332     if (op1 != 3) {
1333         unallocated_encoding(s);
1334         return;
1335     }
1336
1337     switch (selector) {
1338     case 0: /* NOP */
1339         return;
1340     case 3: /* WFI */
1341         s->is_jmp = DISAS_WFI;
1342         return;
1343     case 1: /* YIELD */
1344         s->is_jmp = DISAS_YIELD;
1345         return;
1346     case 2: /* WFE */
1347         s->is_jmp = DISAS_WFE;
1348         return;
1349     case 4: /* SEV */
1350     case 5: /* SEVL */
1351         /* we treat all as NOP at least for now */
1352         return;
1353     default:
1354         /* default specified as NOP equivalent */
1355         return;
1356     }
1357 }
1358
1359 static void gen_clrex(DisasContext *s, uint32_t insn)
1360 {
1361     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1362 }
1363
1364 /* CLREX, DSB, DMB, ISB */
1365 static void handle_sync(DisasContext *s, uint32_t insn,
1366                         unsigned int op1, unsigned int op2, unsigned int crm)
1367 {
1368     TCGBar bar;
1369
1370     if (op1 != 3) {
1371         unallocated_encoding(s);
1372         return;
1373     }
1374
1375     switch (op2) {
1376     case 2: /* CLREX */
1377         gen_clrex(s, insn);
1378         return;
1379     case 4: /* DSB */
1380     case 5: /* DMB */
1381         switch (crm & 3) {
1382         case 1: /* MBReqTypes_Reads */
1383             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1384             break;
1385         case 2: /* MBReqTypes_Writes */
1386             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1387             break;
1388         default: /* MBReqTypes_All */
1389             bar = TCG_BAR_SC | TCG_MO_ALL;
1390             break;
1391         }
1392         tcg_gen_mb(bar);
1393         return;
1394     case 6: /* ISB */
1395         /* We need to break the TB after this insn to execute
1396          * a self-modified code correctly and also to take
1397          * any pending interrupts immediately.
1398          */
1399         s->is_jmp = DISAS_UPDATE;
1400         return;
1401     default:
1402         unallocated_encoding(s);
1403         return;
1404     }
1405 }
1406
1407 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1408 static void handle_msr_i(DisasContext *s, uint32_t insn,
1409                          unsigned int op1, unsigned int op2, unsigned int crm)
1410 {
1411     int op = op1 << 3 | op2;
1412     switch (op) {
1413     case 0x05: /* SPSel */
1414         if (s->current_el == 0) {
1415             unallocated_encoding(s);
1416             return;
1417         }
1418         /* fall through */
1419     case 0x1e: /* DAIFSet */
1420     case 0x1f: /* DAIFClear */
1421     {
1422         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1423         TCGv_i32 tcg_op = tcg_const_i32(op);
1424         gen_a64_set_pc_im(s->pc - 4);
1425         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1426         tcg_temp_free_i32(tcg_imm);
1427         tcg_temp_free_i32(tcg_op);
1428         s->is_jmp = DISAS_UPDATE;
1429         break;
1430     }
1431     default:
1432         unallocated_encoding(s);
1433         return;
1434     }
1435 }
1436
1437 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1438 {
1439     TCGv_i32 tmp = tcg_temp_new_i32();
1440     TCGv_i32 nzcv = tcg_temp_new_i32();
1441
1442     /* build bit 31, N */
1443     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1444     /* build bit 30, Z */
1445     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1446     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1447     /* build bit 29, C */
1448     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1449     /* build bit 28, V */
1450     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1451     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1452     /* generate result */
1453     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1454
1455     tcg_temp_free_i32(nzcv);
1456     tcg_temp_free_i32(tmp);
1457 }
1458
1459 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1460
1461 {
1462     TCGv_i32 nzcv = tcg_temp_new_i32();
1463
1464     /* take NZCV from R[t] */
1465     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1466
1467     /* bit 31, N */
1468     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1469     /* bit 30, Z */
1470     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1471     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1472     /* bit 29, C */
1473     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1474     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1475     /* bit 28, V */
1476     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1477     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1478     tcg_temp_free_i32(nzcv);
1479 }
1480
1481 /* C5.6.129 MRS - move from system register
1482  * C5.6.131 MSR (register) - move to system register
1483  * C5.6.204 SYS
1484  * C5.6.205 SYSL
1485  * These are all essentially the same insn in 'read' and 'write'
1486  * versions, with varying op0 fields.
1487  */
1488 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1489                        unsigned int op0, unsigned int op1, unsigned int op2,
1490                        unsigned int crn, unsigned int crm, unsigned int rt)
1491 {
1492     const ARMCPRegInfo *ri;
1493     TCGv_i64 tcg_rt;
1494
1495     ri = get_arm_cp_reginfo(s->cp_regs,
1496                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1497                                                crn, crm, op0, op1, op2));
1498
1499     if (!ri) {
1500         /* Unknown register; this might be a guest error or a QEMU
1501          * unimplemented feature.
1502          */
1503         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1504                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1505                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1506         unallocated_encoding(s);
1507         return;
1508     }
1509
1510     /* Check access permissions */
1511     if (!cp_access_ok(s->current_el, ri, isread)) {
1512         unallocated_encoding(s);
1513         return;
1514     }
1515
1516     if (ri->accessfn) {
1517         /* Emit code to perform further access permissions checks at
1518          * runtime; this may result in an exception.
1519          */
1520         TCGv_ptr tmpptr;
1521         TCGv_i32 tcg_syn, tcg_isread;
1522         uint32_t syndrome;
1523
1524         gen_a64_set_pc_im(s->pc - 4);
1525         tmpptr = tcg_const_ptr(ri);
1526         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1527         tcg_syn = tcg_const_i32(syndrome);
1528         tcg_isread = tcg_const_i32(isread);
1529         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1530         tcg_temp_free_ptr(tmpptr);
1531         tcg_temp_free_i32(tcg_syn);
1532         tcg_temp_free_i32(tcg_isread);
1533     }
1534
1535     /* Handle special cases first */
1536     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1537     case ARM_CP_NOP:
1538         return;
1539     case ARM_CP_NZCV:
1540         tcg_rt = cpu_reg(s, rt);
1541         if (isread) {
1542             gen_get_nzcv(tcg_rt);
1543         } else {
1544             gen_set_nzcv(tcg_rt);
1545         }
1546         return;
1547     case ARM_CP_CURRENTEL:
1548         /* Reads as current EL value from pstate, which is
1549          * guaranteed to be constant by the tb flags.
1550          */
1551         tcg_rt = cpu_reg(s, rt);
1552         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1553         return;
1554     case ARM_CP_DC_ZVA:
1555         /* Writes clear the aligned block of memory which rt points into. */
1556         tcg_rt = cpu_reg(s, rt);
1557         gen_helper_dc_zva(cpu_env, tcg_rt);
1558         return;
1559     default:
1560         break;
1561     }
1562
1563     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1564         gen_io_start();
1565     }
1566
1567     tcg_rt = cpu_reg(s, rt);
1568
1569     if (isread) {
1570         if (ri->type & ARM_CP_CONST) {
1571             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1572         } else if (ri->readfn) {
1573             TCGv_ptr tmpptr;
1574             tmpptr = tcg_const_ptr(ri);
1575             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1576             tcg_temp_free_ptr(tmpptr);
1577         } else {
1578             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1579         }
1580     } else {
1581         if (ri->type & ARM_CP_CONST) {
1582             /* If not forbidden by access permissions, treat as WI */
1583             return;
1584         } else if (ri->writefn) {
1585             TCGv_ptr tmpptr;
1586             tmpptr = tcg_const_ptr(ri);
1587             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1588             tcg_temp_free_ptr(tmpptr);
1589         } else {
1590             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1591         }
1592     }
1593
1594     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1595         /* I/O operations must end the TB here (whether read or write) */
1596         gen_io_end();
1597         s->is_jmp = DISAS_UPDATE;
1598     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1599         /* We default to ending the TB on a coprocessor register write,
1600          * but allow this to be suppressed by the register definition
1601          * (usually only necessary to work around guest bugs).
1602          */
1603         s->is_jmp = DISAS_UPDATE;
1604     }
1605 }
1606
1607 /* C3.2.4 System
1608  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1609  * +---------------------+---+-----+-----+-------+-------+-----+------+
1610  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1611  * +---------------------+---+-----+-----+-------+-------+-----+------+
1612  */
1613 static void disas_system(DisasContext *s, uint32_t insn)
1614 {
1615     unsigned int l, op0, op1, crn, crm, op2, rt;
1616     l = extract32(insn, 21, 1);
1617     op0 = extract32(insn, 19, 2);
1618     op1 = extract32(insn, 16, 3);
1619     crn = extract32(insn, 12, 4);
1620     crm = extract32(insn, 8, 4);
1621     op2 = extract32(insn, 5, 3);
1622     rt = extract32(insn, 0, 5);
1623
1624     if (op0 == 0) {
1625         if (l || rt != 31) {
1626             unallocated_encoding(s);
1627             return;
1628         }
1629         switch (crn) {
1630         case 2: /* C5.6.68 HINT */
1631             handle_hint(s, insn, op1, op2, crm);
1632             break;
1633         case 3: /* CLREX, DSB, DMB, ISB */
1634             handle_sync(s, insn, op1, op2, crm);
1635             break;
1636         case 4: /* C5.6.130 MSR (immediate) */
1637             handle_msr_i(s, insn, op1, op2, crm);
1638             break;
1639         default:
1640             unallocated_encoding(s);
1641             break;
1642         }
1643         return;
1644     }
1645     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1646 }
1647
1648 /* C3.2.3 Exception generation
1649  *
1650  *  31             24 23 21 20                     5 4   2 1  0
1651  * +-----------------+-----+------------------------+-----+----+
1652  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1653  * +-----------------------+------------------------+----------+
1654  */
1655 static void disas_exc(DisasContext *s, uint32_t insn)
1656 {
1657     int opc = extract32(insn, 21, 3);
1658     int op2_ll = extract32(insn, 0, 5);
1659     int imm16 = extract32(insn, 5, 16);
1660     TCGv_i32 tmp;
1661
1662     switch (opc) {
1663     case 0:
1664         /* For SVC, HVC and SMC we advance the single-step state
1665          * machine before taking the exception. This is architecturally
1666          * mandated, to ensure that single-stepping a system call
1667          * instruction works properly.
1668          */
1669         switch (op2_ll) {
1670         case 1:                                                     /* SVC */
1671             gen_ss_advance(s);
1672             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1673                                default_exception_el(s));
1674             break;
1675         case 2:                                                     /* HVC */
1676             if (s->current_el == 0) {
1677                 unallocated_encoding(s);
1678                 break;
1679             }
1680             /* The pre HVC helper handles cases when HVC gets trapped
1681              * as an undefined insn by runtime configuration.
1682              */
1683             gen_a64_set_pc_im(s->pc - 4);
1684             gen_helper_pre_hvc(cpu_env);
1685             gen_ss_advance(s);
1686             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1687             break;
1688         case 3:                                                     /* SMC */
1689             if (s->current_el == 0) {
1690                 unallocated_encoding(s);
1691                 break;
1692             }
1693             gen_a64_set_pc_im(s->pc - 4);
1694             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1695             gen_helper_pre_smc(cpu_env, tmp);
1696             tcg_temp_free_i32(tmp);
1697             gen_ss_advance(s);
1698             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1699             break;
1700         default:
1701             unallocated_encoding(s);
1702             break;
1703         }
1704         break;
1705     case 1:
1706         if (op2_ll != 0) {
1707             unallocated_encoding(s);
1708             break;
1709         }
1710         /* BRK */
1711         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1712                            default_exception_el(s));
1713         break;
1714     case 2:
1715         if (op2_ll != 0) {
1716             unallocated_encoding(s);
1717             break;
1718         }
1719         /* HLT. This has two purposes.
1720          * Architecturally, it is an external halting debug instruction.
1721          * Since QEMU doesn't implement external debug, we treat this as
1722          * it is required for halting debug disabled: it will UNDEF.
1723          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1724          */
1725         if (semihosting_enabled() && imm16 == 0xf000) {
1726 #ifndef CONFIG_USER_ONLY
1727             /* In system mode, don't allow userspace access to semihosting,
1728              * to provide some semblance of security (and for consistency
1729              * with our 32-bit semihosting).
1730              */
1731             if (s->current_el == 0) {
1732                 unsupported_encoding(s, insn);
1733                 break;
1734             }
1735 #endif
1736             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1737         } else {
1738             unsupported_encoding(s, insn);
1739         }
1740         break;
1741     case 5:
1742         if (op2_ll < 1 || op2_ll > 3) {
1743             unallocated_encoding(s);
1744             break;
1745         }
1746         /* DCPS1, DCPS2, DCPS3 */
1747         unsupported_encoding(s, insn);
1748         break;
1749     default:
1750         unallocated_encoding(s);
1751         break;
1752     }
1753 }
1754
1755 /* C3.2.7 Unconditional branch (register)
1756  *  31           25 24   21 20   16 15   10 9    5 4     0
1757  * +---------------+-------+-------+-------+------+-------+
1758  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1759  * +---------------+-------+-------+-------+------+-------+
1760  */
1761 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1762 {
1763     unsigned int opc, op2, op3, rn, op4;
1764
1765     opc = extract32(insn, 21, 4);
1766     op2 = extract32(insn, 16, 5);
1767     op3 = extract32(insn, 10, 6);
1768     rn = extract32(insn, 5, 5);
1769     op4 = extract32(insn, 0, 5);
1770
1771     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1772         unallocated_encoding(s);
1773         return;
1774     }
1775
1776     switch (opc) {
1777     case 0: /* BR */
1778     case 1: /* BLR */
1779     case 2: /* RET */
1780         gen_a64_set_pc(s, cpu_reg(s, rn));
1781         /* BLR also needs to load return address */
1782         if (opc == 1) {
1783             tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1784         }
1785         break;
1786     case 4: /* ERET */
1787         if (s->current_el == 0) {
1788             unallocated_encoding(s);
1789             return;
1790         }
1791         gen_helper_exception_return(cpu_env);
1792         s->is_jmp = DISAS_JUMP;
1793         return;
1794     case 5: /* DRPS */
1795         if (rn != 0x1f) {
1796             unallocated_encoding(s);
1797         } else {
1798             unsupported_encoding(s, insn);
1799         }
1800         return;
1801     default:
1802         unallocated_encoding(s);
1803         return;
1804     }
1805
1806     s->is_jmp = DISAS_JUMP;
1807 }
1808
1809 /* C3.2 Branches, exception generating and system instructions */
1810 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1811 {
1812     switch (extract32(insn, 25, 7)) {
1813     case 0x0a: case 0x0b:
1814     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1815         disas_uncond_b_imm(s, insn);
1816         break;
1817     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1818         disas_comp_b_imm(s, insn);
1819         break;
1820     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1821         disas_test_b_imm(s, insn);
1822         break;
1823     case 0x2a: /* Conditional branch (immediate) */
1824         disas_cond_b_imm(s, insn);
1825         break;
1826     case 0x6a: /* Exception generation / System */
1827         if (insn & (1 << 24)) {
1828             disas_system(s, insn);
1829         } else {
1830             disas_exc(s, insn);
1831         }
1832         break;
1833     case 0x6b: /* Unconditional branch (register) */
1834         disas_uncond_b_reg(s, insn);
1835         break;
1836     default:
1837         unallocated_encoding(s);
1838         break;
1839     }
1840 }
1841
1842 /*
1843  * Load/Store exclusive instructions are implemented by remembering
1844  * the value/address loaded, and seeing if these are the same
1845  * when the store is performed. This is not actually the architecturally
1846  * mandated semantics, but it works for typical guest code sequences
1847  * and avoids having to monitor regular stores.
1848  *
1849  * The store exclusive uses the atomic cmpxchg primitives to avoid
1850  * races in multi-threaded linux-user and when MTTCG softmmu is
1851  * enabled.
1852  */
1853 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1854                                TCGv_i64 addr, int size, bool is_pair)
1855 {
1856     TCGv_i64 tmp = tcg_temp_new_i64();
1857     TCGMemOp memop = s->be_data + size;
1858
1859     g_assert(size <= 3);
1860     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1861
1862     if (is_pair) {
1863         TCGv_i64 addr2 = tcg_temp_new_i64();
1864         TCGv_i64 hitmp = tcg_temp_new_i64();
1865
1866         g_assert(size >= 2);
1867         tcg_gen_addi_i64(addr2, addr, 1 << size);
1868         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1869         tcg_temp_free_i64(addr2);
1870         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1871         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1872         tcg_temp_free_i64(hitmp);
1873     }
1874
1875     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1876     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1877
1878     tcg_temp_free_i64(tmp);
1879     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1880 }
1881
1882 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1883                                 TCGv_i64 inaddr, int size, int is_pair)
1884 {
1885     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1886      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1887      *     [addr] = {Rt};
1888      *     if (is_pair) {
1889      *         [addr + datasize] = {Rt2};
1890      *     }
1891      *     {Rd} = 0;
1892      * } else {
1893      *     {Rd} = 1;
1894      * }
1895      * env->exclusive_addr = -1;
1896      */
1897     TCGLabel *fail_label = gen_new_label();
1898     TCGLabel *done_label = gen_new_label();
1899     TCGv_i64 addr = tcg_temp_local_new_i64();
1900     TCGv_i64 tmp;
1901
1902     /* Copy input into a local temp so it is not trashed when the
1903      * basic block ends at the branch insn.
1904      */
1905     tcg_gen_mov_i64(addr, inaddr);
1906     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1907
1908     tmp = tcg_temp_new_i64();
1909     if (is_pair) {
1910         if (size == 2) {
1911             TCGv_i64 val = tcg_temp_new_i64();
1912             tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
1913             tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
1914             tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
1915                                        get_mem_index(s),
1916                                        size | MO_ALIGN | s->be_data);
1917             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
1918             tcg_temp_free_i64(val);
1919         } else if (s->be_data == MO_LE) {
1920             gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
1921                                            cpu_reg(s, rt2));
1922         } else {
1923             gen_helper_paired_cmpxchg64_be(tmp, cpu_env, addr, cpu_reg(s, rt),
1924                                            cpu_reg(s, rt2));
1925         }
1926     } else {
1927         TCGv_i64 val = cpu_reg(s, rt);
1928         tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, val,
1929                                    get_mem_index(s),
1930                                    size | MO_ALIGN | s->be_data);
1931         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
1932     }
1933
1934     tcg_temp_free_i64(addr);
1935
1936     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
1937     tcg_temp_free_i64(tmp);
1938     tcg_gen_br(done_label);
1939
1940     gen_set_label(fail_label);
1941     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1942     gen_set_label(done_label);
1943     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1944 }
1945
1946 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
1947  * from the ARMv8 specs for LDR (Shared decode for all encodings).
1948  */
1949 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1950 {
1951     int opc0 = extract32(opc, 0, 1);
1952     int regsize;
1953
1954     if (is_signed) {
1955         regsize = opc0 ? 32 : 64;
1956     } else {
1957         regsize = size == 3 ? 64 : 32;
1958     }
1959     return regsize == 64;
1960 }
1961
1962 /* C3.3.6 Load/store exclusive
1963  *
1964  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1965  * +-----+-------------+----+---+----+------+----+-------+------+------+
1966  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1967  * +-----+-------------+----+---+----+------+----+-------+------+------+
1968  *
1969  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1970  *   L: 0 -> store, 1 -> load
1971  *  o2: 0 -> exclusive, 1 -> not
1972  *  o1: 0 -> single register, 1 -> register pair
1973  *  o0: 1 -> load-acquire/store-release, 0 -> not
1974  */
1975 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1976 {
1977     int rt = extract32(insn, 0, 5);
1978     int rn = extract32(insn, 5, 5);
1979     int rt2 = extract32(insn, 10, 5);
1980     int is_lasr = extract32(insn, 15, 1);
1981     int rs = extract32(insn, 16, 5);
1982     int is_pair = extract32(insn, 21, 1);
1983     int is_store = !extract32(insn, 22, 1);
1984     int is_excl = !extract32(insn, 23, 1);
1985     int size = extract32(insn, 30, 2);
1986     TCGv_i64 tcg_addr;
1987
1988     if ((!is_excl && !is_pair && !is_lasr) ||
1989         (!is_excl && is_pair) ||
1990         (is_pair && size < 2)) {
1991         unallocated_encoding(s);
1992         return;
1993     }
1994
1995     if (rn == 31) {
1996         gen_check_sp_alignment(s);
1997     }
1998     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1999
2000     /* Note that since TCG is single threaded load-acquire/store-release
2001      * semantics require no extra if (is_lasr) { ... } handling.
2002      */
2003
2004     if (is_excl) {
2005         if (!is_store) {
2006             s->is_ldex = true;
2007             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
2008             if (is_lasr) {
2009                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2010             }
2011         } else {
2012             if (is_lasr) {
2013                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2014             }
2015             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2016         }
2017     } else {
2018         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2019         bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2020
2021         /* Generate ISS for non-exclusive accesses including LASR.  */
2022         if (is_store) {
2023             if (is_lasr) {
2024                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2025             }
2026             do_gpr_st(s, tcg_rt, tcg_addr, size,
2027                       true, rt, iss_sf, is_lasr);
2028         } else {
2029             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2030                       true, rt, iss_sf, is_lasr);
2031             if (is_lasr) {
2032                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2033             }
2034         }
2035     }
2036 }
2037
2038 /*
2039  * C3.3.5 Load register (literal)
2040  *
2041  *  31 30 29   27  26 25 24 23                5 4     0
2042  * +-----+-------+---+-----+-------------------+-------+
2043  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2044  * +-----+-------+---+-----+-------------------+-------+
2045  *
2046  * V: 1 -> vector (simd/fp)
2047  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2048  *                   10-> 32 bit signed, 11 -> prefetch
2049  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2050  */
2051 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2052 {
2053     int rt = extract32(insn, 0, 5);
2054     int64_t imm = sextract32(insn, 5, 19) << 2;
2055     bool is_vector = extract32(insn, 26, 1);
2056     int opc = extract32(insn, 30, 2);
2057     bool is_signed = false;
2058     int size = 2;
2059     TCGv_i64 tcg_rt, tcg_addr;
2060
2061     if (is_vector) {
2062         if (opc == 3) {
2063             unallocated_encoding(s);
2064             return;
2065         }
2066         size = 2 + opc;
2067         if (!fp_access_check(s)) {
2068             return;
2069         }
2070     } else {
2071         if (opc == 3) {
2072             /* PRFM (literal) : prefetch */
2073             return;
2074         }
2075         size = 2 + extract32(opc, 0, 1);
2076         is_signed = extract32(opc, 1, 1);
2077     }
2078
2079     tcg_rt = cpu_reg(s, rt);
2080
2081     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2082     if (is_vector) {
2083         do_fp_ld(s, rt, tcg_addr, size);
2084     } else {
2085         /* Only unsigned 32bit loads target 32bit registers.  */
2086         bool iss_sf = opc != 0;
2087
2088         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2089                   true, rt, iss_sf, false);
2090     }
2091     tcg_temp_free_i64(tcg_addr);
2092 }
2093
2094 /*
2095  * C5.6.80 LDNP (Load Pair - non-temporal hint)
2096  * C5.6.81 LDP (Load Pair - non vector)
2097  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
2098  * C5.6.176 STNP (Store Pair - non-temporal hint)
2099  * C5.6.177 STP (Store Pair - non vector)
2100  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
2101  * C6.3.165 LDP (Load Pair of SIMD&FP)
2102  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
2103  * C6.3.284 STP (Store Pair of SIMD&FP)
2104  *
2105  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2106  * +-----+-------+---+---+-------+---+-----------------------------+
2107  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2108  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2109  *
2110  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2111  *      LDPSW                    01
2112  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2113  *   V: 0 -> GPR, 1 -> Vector
2114  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2115  *      10 -> signed offset, 11 -> pre-index
2116  *   L: 0 -> Store 1 -> Load
2117  *
2118  * Rt, Rt2 = GPR or SIMD registers to be stored
2119  * Rn = general purpose register containing address
2120  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2121  */
2122 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2123 {
2124     int rt = extract32(insn, 0, 5);
2125     int rn = extract32(insn, 5, 5);
2126     int rt2 = extract32(insn, 10, 5);
2127     uint64_t offset = sextract64(insn, 15, 7);
2128     int index = extract32(insn, 23, 2);
2129     bool is_vector = extract32(insn, 26, 1);
2130     bool is_load = extract32(insn, 22, 1);
2131     int opc = extract32(insn, 30, 2);
2132
2133     bool is_signed = false;
2134     bool postindex = false;
2135     bool wback = false;
2136
2137     TCGv_i64 tcg_addr; /* calculated address */
2138     int size;
2139
2140     if (opc == 3) {
2141         unallocated_encoding(s);
2142         return;
2143     }
2144
2145     if (is_vector) {
2146         size = 2 + opc;
2147     } else {
2148         size = 2 + extract32(opc, 1, 1);
2149         is_signed = extract32(opc, 0, 1);
2150         if (!is_load && is_signed) {
2151             unallocated_encoding(s);
2152             return;
2153         }
2154     }
2155
2156     switch (index) {
2157     case 1: /* post-index */
2158         postindex = true;
2159         wback = true;
2160         break;
2161     case 0:
2162         /* signed offset with "non-temporal" hint. Since we don't emulate
2163          * caches we don't care about hints to the cache system about
2164          * data access patterns, and handle this identically to plain
2165          * signed offset.
2166          */
2167         if (is_signed) {
2168             /* There is no non-temporal-hint version of LDPSW */
2169             unallocated_encoding(s);
2170             return;
2171         }
2172         postindex = false;
2173         break;
2174     case 2: /* signed offset, rn not updated */
2175         postindex = false;
2176         break;
2177     case 3: /* pre-index */
2178         postindex = false;
2179         wback = true;
2180         break;
2181     }
2182
2183     if (is_vector && !fp_access_check(s)) {
2184         return;
2185     }
2186
2187     offset <<= size;
2188
2189     if (rn == 31) {
2190         gen_check_sp_alignment(s);
2191     }
2192
2193     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2194
2195     if (!postindex) {
2196         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2197     }
2198
2199     if (is_vector) {
2200         if (is_load) {
2201             do_fp_ld(s, rt, tcg_addr, size);
2202         } else {
2203             do_fp_st(s, rt, tcg_addr, size);
2204         }
2205     } else {
2206         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2207         if (is_load) {
2208             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2209                       false, 0, false, false);
2210         } else {
2211             do_gpr_st(s, tcg_rt, tcg_addr, size,
2212                       false, 0, false, false);
2213         }
2214     }
2215     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2216     if (is_vector) {
2217         if (is_load) {
2218             do_fp_ld(s, rt2, tcg_addr, size);
2219         } else {
2220             do_fp_st(s, rt2, tcg_addr, size);
2221         }
2222     } else {
2223         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2224         if (is_load) {
2225             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2226                       false, 0, false, false);
2227         } else {
2228             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2229                       false, 0, false, false);
2230         }
2231     }
2232
2233     if (wback) {
2234         if (postindex) {
2235             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2236         } else {
2237             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2238         }
2239         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2240     }
2241 }
2242
2243 /*
2244  * C3.3.8 Load/store (immediate post-indexed)
2245  * C3.3.9 Load/store (immediate pre-indexed)
2246  * C3.3.12 Load/store (unscaled immediate)
2247  *
2248  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2249  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2250  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2251  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2252  *
2253  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2254          10 -> unprivileged
2255  * V = 0 -> non-vector
2256  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2257  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2258  */
2259 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2260                                 int opc,
2261                                 int size,
2262                                 int rt,
2263                                 bool is_vector)
2264 {
2265     int rn = extract32(insn, 5, 5);
2266     int imm9 = sextract32(insn, 12, 9);
2267     int idx = extract32(insn, 10, 2);
2268     bool is_signed = false;
2269     bool is_store = false;
2270     bool is_extended = false;
2271     bool is_unpriv = (idx == 2);
2272     bool iss_valid = !is_vector;
2273     bool post_index;
2274     bool writeback;
2275
2276     TCGv_i64 tcg_addr;
2277
2278     if (is_vector) {
2279         size |= (opc & 2) << 1;
2280         if (size > 4 || is_unpriv) {
2281             unallocated_encoding(s);
2282             return;
2283         }
2284         is_store = ((opc & 1) == 0);
2285         if (!fp_access_check(s)) {
2286             return;
2287         }
2288     } else {
2289         if (size == 3 && opc == 2) {
2290             /* PRFM - prefetch */
2291             if (is_unpriv) {
2292                 unallocated_encoding(s);
2293                 return;
2294             }
2295             return;
2296         }
2297         if (opc == 3 && size > 1) {
2298             unallocated_encoding(s);
2299             return;
2300         }
2301         is_store = (opc == 0);
2302         is_signed = extract32(opc, 1, 1);
2303         is_extended = (size < 3) && extract32(opc, 0, 1);
2304     }
2305
2306     switch (idx) {
2307     case 0:
2308     case 2:
2309         post_index = false;
2310         writeback = false;
2311         break;
2312     case 1:
2313         post_index = true;
2314         writeback = true;
2315         break;
2316     case 3:
2317         post_index = false;
2318         writeback = true;
2319         break;
2320     }
2321
2322     if (rn == 31) {
2323         gen_check_sp_alignment(s);
2324     }
2325     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2326
2327     if (!post_index) {
2328         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2329     }
2330
2331     if (is_vector) {
2332         if (is_store) {
2333             do_fp_st(s, rt, tcg_addr, size);
2334         } else {
2335             do_fp_ld(s, rt, tcg_addr, size);
2336         }
2337     } else {
2338         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2339         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2340         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2341
2342         if (is_store) {
2343             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2344                              iss_valid, rt, iss_sf, false);
2345         } else {
2346             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2347                              is_signed, is_extended, memidx,
2348                              iss_valid, rt, iss_sf, false);
2349         }
2350     }
2351
2352     if (writeback) {
2353         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2354         if (post_index) {
2355             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2356         }
2357         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2358     }
2359 }
2360
2361 /*
2362  * C3.3.10 Load/store (register offset)
2363  *
2364  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2365  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2366  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2367  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2368  *
2369  * For non-vector:
2370  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2371  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2372  * For vector:
2373  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2374  *   opc<0>: 0 -> store, 1 -> load
2375  * V: 1 -> vector/simd
2376  * opt: extend encoding (see DecodeRegExtend)
2377  * S: if S=1 then scale (essentially index by sizeof(size))
2378  * Rt: register to transfer into/out of
2379  * Rn: address register or SP for base
2380  * Rm: offset register or ZR for offset
2381  */
2382 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2383                                    int opc,
2384                                    int size,
2385                                    int rt,
2386                                    bool is_vector)
2387 {
2388     int rn = extract32(insn, 5, 5);
2389     int shift = extract32(insn, 12, 1);
2390     int rm = extract32(insn, 16, 5);
2391     int opt = extract32(insn, 13, 3);
2392     bool is_signed = false;
2393     bool is_store = false;
2394     bool is_extended = false;
2395
2396     TCGv_i64 tcg_rm;
2397     TCGv_i64 tcg_addr;
2398
2399     if (extract32(opt, 1, 1) == 0) {
2400         unallocated_encoding(s);
2401         return;
2402     }
2403
2404     if (is_vector) {
2405         size |= (opc & 2) << 1;
2406         if (size > 4) {
2407             unallocated_encoding(s);
2408             return;
2409         }
2410         is_store = !extract32(opc, 0, 1);
2411         if (!fp_access_check(s)) {
2412             return;
2413         }
2414     } else {
2415         if (size == 3 && opc == 2) {
2416             /* PRFM - prefetch */
2417             return;
2418         }
2419         if (opc == 3 && size > 1) {
2420             unallocated_encoding(s);
2421             return;
2422         }
2423         is_store = (opc == 0);
2424         is_signed = extract32(opc, 1, 1);
2425         is_extended = (size < 3) && extract32(opc, 0, 1);
2426     }
2427
2428     if (rn == 31) {
2429         gen_check_sp_alignment(s);
2430     }
2431     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2432
2433     tcg_rm = read_cpu_reg(s, rm, 1);
2434     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2435
2436     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2437
2438     if (is_vector) {
2439         if (is_store) {
2440             do_fp_st(s, rt, tcg_addr, size);
2441         } else {
2442             do_fp_ld(s, rt, tcg_addr, size);
2443         }
2444     } else {
2445         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2446         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2447         if (is_store) {
2448             do_gpr_st(s, tcg_rt, tcg_addr, size,
2449                       true, rt, iss_sf, false);
2450         } else {
2451             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2452                       is_signed, is_extended,
2453                       true, rt, iss_sf, false);
2454         }
2455     }
2456 }
2457
2458 /*
2459  * C3.3.13 Load/store (unsigned immediate)
2460  *
2461  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2462  * +----+-------+---+-----+-----+------------+-------+------+
2463  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2464  * +----+-------+---+-----+-----+------------+-------+------+
2465  *
2466  * For non-vector:
2467  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2468  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2469  * For vector:
2470  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2471  *   opc<0>: 0 -> store, 1 -> load
2472  * Rn: base address register (inc SP)
2473  * Rt: target register
2474  */
2475 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2476                                         int opc,
2477                                         int size,
2478                                         int rt,
2479                                         bool is_vector)
2480 {
2481     int rn = extract32(insn, 5, 5);
2482     unsigned int imm12 = extract32(insn, 10, 12);
2483     unsigned int offset;
2484
2485     TCGv_i64 tcg_addr;
2486
2487     bool is_store;
2488     bool is_signed = false;
2489     bool is_extended = false;
2490
2491     if (is_vector) {
2492         size |= (opc & 2) << 1;
2493         if (size > 4) {
2494             unallocated_encoding(s);
2495             return;
2496         }
2497         is_store = !extract32(opc, 0, 1);
2498         if (!fp_access_check(s)) {
2499             return;
2500         }
2501     } else {
2502         if (size == 3 && opc == 2) {
2503             /* PRFM - prefetch */
2504             return;
2505         }
2506         if (opc == 3 && size > 1) {
2507             unallocated_encoding(s);
2508             return;
2509         }
2510         is_store = (opc == 0);
2511         is_signed = extract32(opc, 1, 1);
2512         is_extended = (size < 3) && extract32(opc, 0, 1);
2513     }
2514
2515     if (rn == 31) {
2516         gen_check_sp_alignment(s);
2517     }
2518     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2519     offset = imm12 << size;
2520     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2521
2522     if (is_vector) {
2523         if (is_store) {
2524             do_fp_st(s, rt, tcg_addr, size);
2525         } else {
2526             do_fp_ld(s, rt, tcg_addr, size);
2527         }
2528     } else {
2529         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2530         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2531         if (is_store) {
2532             do_gpr_st(s, tcg_rt, tcg_addr, size,
2533                       true, rt, iss_sf, false);
2534         } else {
2535             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2536                       true, rt, iss_sf, false);
2537         }
2538     }
2539 }
2540
2541 /* Load/store register (all forms) */
2542 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2543 {
2544     int rt = extract32(insn, 0, 5);
2545     int opc = extract32(insn, 22, 2);
2546     bool is_vector = extract32(insn, 26, 1);
2547     int size = extract32(insn, 30, 2);
2548
2549     switch (extract32(insn, 24, 2)) {
2550     case 0:
2551         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2552             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2553         } else {
2554             /* Load/store register (unscaled immediate)
2555              * Load/store immediate pre/post-indexed
2556              * Load/store register unprivileged
2557              */
2558             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2559         }
2560         break;
2561     case 1:
2562         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2563         break;
2564     default:
2565         unallocated_encoding(s);
2566         break;
2567     }
2568 }
2569
2570 /* C3.3.1 AdvSIMD load/store multiple structures
2571  *
2572  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2573  * +---+---+---------------+---+-------------+--------+------+------+------+
2574  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2575  * +---+---+---------------+---+-------------+--------+------+------+------+
2576  *
2577  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2578  *
2579  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2580  * +---+---+---------------+---+---+---------+--------+------+------+------+
2581  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2582  * +---+---+---------------+---+---+---------+--------+------+------+------+
2583  *
2584  * Rt: first (or only) SIMD&FP register to be transferred
2585  * Rn: base address or SP
2586  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2587  */
2588 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2589 {
2590     int rt = extract32(insn, 0, 5);
2591     int rn = extract32(insn, 5, 5);
2592     int size = extract32(insn, 10, 2);
2593     int opcode = extract32(insn, 12, 4);
2594     bool is_store = !extract32(insn, 22, 1);
2595     bool is_postidx = extract32(insn, 23, 1);
2596     bool is_q = extract32(insn, 30, 1);
2597     TCGv_i64 tcg_addr, tcg_rn;
2598
2599     int ebytes = 1 << size;
2600     int elements = (is_q ? 128 : 64) / (8 << size);
2601     int rpt;    /* num iterations */
2602     int selem;  /* structure elements */
2603     int r;
2604
2605     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2606         unallocated_encoding(s);
2607         return;
2608     }
2609
2610     /* From the shared decode logic */
2611     switch (opcode) {
2612     case 0x0:
2613         rpt = 1;
2614         selem = 4;
2615         break;
2616     case 0x2:
2617         rpt = 4;
2618         selem = 1;
2619         break;
2620     case 0x4:
2621         rpt = 1;
2622         selem = 3;
2623         break;
2624     case 0x6:
2625         rpt = 3;
2626         selem = 1;
2627         break;
2628     case 0x7:
2629         rpt = 1;
2630         selem = 1;
2631         break;
2632     case 0x8:
2633         rpt = 1;
2634         selem = 2;
2635         break;
2636     case 0xa:
2637         rpt = 2;
2638         selem = 1;
2639         break;
2640     default:
2641         unallocated_encoding(s);
2642         return;
2643     }
2644
2645     if (size == 3 && !is_q && selem != 1) {
2646         /* reserved */
2647         unallocated_encoding(s);
2648         return;
2649     }
2650
2651     if (!fp_access_check(s)) {
2652         return;
2653     }
2654
2655     if (rn == 31) {
2656         gen_check_sp_alignment(s);
2657     }
2658
2659     tcg_rn = cpu_reg_sp(s, rn);
2660     tcg_addr = tcg_temp_new_i64();
2661     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2662
2663     for (r = 0; r < rpt; r++) {
2664         int e;
2665         for (e = 0; e < elements; e++) {
2666             int tt = (rt + r) % 32;
2667             int xs;
2668             for (xs = 0; xs < selem; xs++) {
2669                 if (is_store) {
2670                     do_vec_st(s, tt, e, tcg_addr, size);
2671                 } else {
2672                     do_vec_ld(s, tt, e, tcg_addr, size);
2673
2674                     /* For non-quad operations, setting a slice of the low
2675                      * 64 bits of the register clears the high 64 bits (in
2676                      * the ARM ARM pseudocode this is implicit in the fact
2677                      * that 'rval' is a 64 bit wide variable). We optimize
2678                      * by noticing that we only need to do this the first
2679                      * time we touch a register.
2680                      */
2681                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2682                         clear_vec_high(s, tt);
2683                     }
2684                 }
2685                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2686                 tt = (tt + 1) % 32;
2687             }
2688         }
2689     }
2690
2691     if (is_postidx) {
2692         int rm = extract32(insn, 16, 5);
2693         if (rm == 31) {
2694             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2695         } else {
2696             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2697         }
2698     }
2699     tcg_temp_free_i64(tcg_addr);
2700 }
2701
2702 /* C3.3.3 AdvSIMD load/store single structure
2703  *
2704  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2705  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2706  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2707  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2708  *
2709  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2710  *
2711  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2712  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2713  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2714  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2715  *
2716  * Rt: first (or only) SIMD&FP register to be transferred
2717  * Rn: base address or SP
2718  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2719  * index = encoded in Q:S:size dependent on size
2720  *
2721  * lane_size = encoded in R, opc
2722  * transfer width = encoded in opc, S, size
2723  */
2724 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2725 {
2726     int rt = extract32(insn, 0, 5);
2727     int rn = extract32(insn, 5, 5);
2728     int size = extract32(insn, 10, 2);
2729     int S = extract32(insn, 12, 1);
2730     int opc = extract32(insn, 13, 3);
2731     int R = extract32(insn, 21, 1);
2732     int is_load = extract32(insn, 22, 1);
2733     int is_postidx = extract32(insn, 23, 1);
2734     int is_q = extract32(insn, 30, 1);
2735
2736     int scale = extract32(opc, 1, 2);
2737     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2738     bool replicate = false;
2739     int index = is_q << 3 | S << 2 | size;
2740     int ebytes, xs;
2741     TCGv_i64 tcg_addr, tcg_rn;
2742
2743     switch (scale) {
2744     case 3:
2745         if (!is_load || S) {
2746             unallocated_encoding(s);
2747             return;
2748         }
2749         scale = size;
2750         replicate = true;
2751         break;
2752     case 0:
2753         break;
2754     case 1:
2755         if (extract32(size, 0, 1)) {
2756             unallocated_encoding(s);
2757             return;
2758         }
2759         index >>= 1;
2760         break;
2761     case 2:
2762         if (extract32(size, 1, 1)) {
2763             unallocated_encoding(s);
2764             return;
2765         }
2766         if (!extract32(size, 0, 1)) {
2767             index >>= 2;
2768         } else {
2769             if (S) {
2770                 unallocated_encoding(s);
2771                 return;
2772             }
2773             index >>= 3;
2774             scale = 3;
2775         }
2776         break;
2777     default:
2778         g_assert_not_reached();
2779     }
2780
2781     if (!fp_access_check(s)) {
2782         return;
2783     }
2784
2785     ebytes = 1 << scale;
2786
2787     if (rn == 31) {
2788         gen_check_sp_alignment(s);
2789     }
2790
2791     tcg_rn = cpu_reg_sp(s, rn);
2792     tcg_addr = tcg_temp_new_i64();
2793     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2794
2795     for (xs = 0; xs < selem; xs++) {
2796         if (replicate) {
2797             /* Load and replicate to all elements */
2798             uint64_t mulconst;
2799             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2800
2801             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2802                                 get_mem_index(s), s->be_data + scale);
2803             switch (scale) {
2804             case 0:
2805                 mulconst = 0x0101010101010101ULL;
2806                 break;
2807             case 1:
2808                 mulconst = 0x0001000100010001ULL;
2809                 break;
2810             case 2:
2811                 mulconst = 0x0000000100000001ULL;
2812                 break;
2813             case 3:
2814                 mulconst = 0;
2815                 break;
2816             default:
2817                 g_assert_not_reached();
2818             }
2819             if (mulconst) {
2820                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2821             }
2822             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2823             if (is_q) {
2824                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2825             } else {
2826                 clear_vec_high(s, rt);
2827             }
2828             tcg_temp_free_i64(tcg_tmp);
2829         } else {
2830             /* Load/store one element per register */
2831             if (is_load) {
2832                 do_vec_ld(s, rt, index, tcg_addr, s->be_data + scale);
2833             } else {
2834                 do_vec_st(s, rt, index, tcg_addr, s->be_data + scale);
2835             }
2836         }
2837         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2838         rt = (rt + 1) % 32;
2839     }
2840
2841     if (is_postidx) {
2842         int rm = extract32(insn, 16, 5);
2843         if (rm == 31) {
2844             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2845         } else {
2846             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2847         }
2848     }
2849     tcg_temp_free_i64(tcg_addr);
2850 }
2851
2852 /* C3.3 Loads and stores */
2853 static void disas_ldst(DisasContext *s, uint32_t insn)
2854 {
2855     switch (extract32(insn, 24, 6)) {
2856     case 0x08: /* Load/store exclusive */
2857         disas_ldst_excl(s, insn);
2858         break;
2859     case 0x18: case 0x1c: /* Load register (literal) */
2860         disas_ld_lit(s, insn);
2861         break;
2862     case 0x28: case 0x29:
2863     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2864         disas_ldst_pair(s, insn);
2865         break;
2866     case 0x38: case 0x39:
2867     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2868         disas_ldst_reg(s, insn);
2869         break;
2870     case 0x0c: /* AdvSIMD load/store multiple structures */
2871         disas_ldst_multiple_struct(s, insn);
2872         break;
2873     case 0x0d: /* AdvSIMD load/store single structure */
2874         disas_ldst_single_struct(s, insn);
2875         break;
2876     default:
2877         unallocated_encoding(s);
2878         break;
2879     }
2880 }
2881
2882 /* C3.4.6 PC-rel. addressing
2883  *   31  30   29 28       24 23                5 4    0
2884  * +----+-------+-----------+-------------------+------+
2885  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2886  * +----+-------+-----------+-------------------+------+
2887  */
2888 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2889 {
2890     unsigned int page, rd;
2891     uint64_t base;
2892     uint64_t offset;
2893
2894     page = extract32(insn, 31, 1);
2895     /* SignExtend(immhi:immlo) -> offset */
2896     offset = sextract64(insn, 5, 19);
2897     offset = offset << 2 | extract32(insn, 29, 2);
2898     rd = extract32(insn, 0, 5);
2899     base = s->pc - 4;
2900
2901     if (page) {
2902         /* ADRP (page based) */
2903         base &= ~0xfff;
2904         offset <<= 12;
2905     }
2906
2907     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2908 }
2909
2910 /*
2911  * C3.4.1 Add/subtract (immediate)
2912  *
2913  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2914  * +--+--+--+-----------+-----+-------------+-----+-----+
2915  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2916  * +--+--+--+-----------+-----+-------------+-----+-----+
2917  *
2918  *    sf: 0 -> 32bit, 1 -> 64bit
2919  *    op: 0 -> add  , 1 -> sub
2920  *     S: 1 -> set flags
2921  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2922  */
2923 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2924 {
2925     int rd = extract32(insn, 0, 5);
2926     int rn = extract32(insn, 5, 5);
2927     uint64_t imm = extract32(insn, 10, 12);
2928     int shift = extract32(insn, 22, 2);
2929     bool setflags = extract32(insn, 29, 1);
2930     bool sub_op = extract32(insn, 30, 1);
2931     bool is_64bit = extract32(insn, 31, 1);
2932
2933     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2934     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2935     TCGv_i64 tcg_result;
2936
2937     switch (shift) {
2938     case 0x0:
2939         break;
2940     case 0x1:
2941         imm <<= 12;
2942         break;
2943     default:
2944         unallocated_encoding(s);
2945         return;
2946     }
2947
2948     tcg_result = tcg_temp_new_i64();
2949     if (!setflags) {
2950         if (sub_op) {
2951             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2952         } else {
2953             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2954         }
2955     } else {
2956         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2957         if (sub_op) {
2958             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2959         } else {
2960             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2961         }
2962         tcg_temp_free_i64(tcg_imm);
2963     }
2964
2965     if (is_64bit) {
2966         tcg_gen_mov_i64(tcg_rd, tcg_result);
2967     } else {
2968         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2969     }
2970
2971     tcg_temp_free_i64(tcg_result);
2972 }
2973
2974 /* The input should be a value in the bottom e bits (with higher
2975  * bits zero); returns that value replicated into every element
2976  * of size e in a 64 bit integer.
2977  */
2978 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2979 {
2980     assert(e != 0);
2981     while (e < 64) {
2982         mask |= mask << e;
2983         e *= 2;
2984     }
2985     return mask;
2986 }
2987
2988 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2989 static inline uint64_t bitmask64(unsigned int length)
2990 {
2991     assert(length > 0 && length <= 64);
2992     return ~0ULL >> (64 - length);
2993 }
2994
2995 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2996  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2997  * value (ie should cause a guest UNDEF exception), and true if they are
2998  * valid, in which case the decoded bit pattern is written to result.
2999  */
3000 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3001                                    unsigned int imms, unsigned int immr)
3002 {
3003     uint64_t mask;
3004     unsigned e, levels, s, r;
3005     int len;
3006
3007     assert(immn < 2 && imms < 64 && immr < 64);
3008
3009     /* The bit patterns we create here are 64 bit patterns which
3010      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3011      * 64 bits each. Each element contains the same value: a run
3012      * of between 1 and e-1 non-zero bits, rotated within the
3013      * element by between 0 and e-1 bits.
3014      *
3015      * The element size and run length are encoded into immn (1 bit)
3016      * and imms (6 bits) as follows:
3017      * 64 bit elements: immn = 1, imms = <length of run - 1>
3018      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3019      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3020      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3021      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3022      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3023      * Notice that immn = 0, imms = 11111x is the only combination
3024      * not covered by one of the above options; this is reserved.
3025      * Further, <length of run - 1> all-ones is a reserved pattern.
3026      *
3027      * In all cases the rotation is by immr % e (and immr is 6 bits).
3028      */
3029
3030     /* First determine the element size */
3031     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3032     if (len < 1) {
3033         /* This is the immn == 0, imms == 0x11111x case */
3034         return false;
3035     }
3036     e = 1 << len;
3037
3038     levels = e - 1;
3039     s = imms & levels;
3040     r = immr & levels;
3041
3042     if (s == levels) {
3043         /* <length of run - 1> mustn't be all-ones. */
3044         return false;
3045     }
3046
3047     /* Create the value of one element: s+1 set bits rotated
3048      * by r within the element (which is e bits wide)...
3049      */
3050     mask = bitmask64(s + 1);
3051     if (r) {
3052         mask = (mask >> r) | (mask << (e - r));
3053         mask &= bitmask64(e);
3054     }
3055     /* ...then replicate the element over the whole 64 bit value */
3056     mask = bitfield_replicate(mask, e);
3057     *result = mask;
3058     return true;
3059 }
3060
3061 /* C3.4.4 Logical (immediate)
3062  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3063  * +----+-----+-------------+---+------+------+------+------+
3064  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3065  * +----+-----+-------------+---+------+------+------+------+
3066  */
3067 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3068 {
3069     unsigned int sf, opc, is_n, immr, imms, rn, rd;
3070     TCGv_i64 tcg_rd, tcg_rn;
3071     uint64_t wmask;
3072     bool is_and = false;
3073
3074     sf = extract32(insn, 31, 1);
3075     opc = extract32(insn, 29, 2);
3076     is_n = extract32(insn, 22, 1);
3077     immr = extract32(insn, 16, 6);
3078     imms = extract32(insn, 10, 6);
3079     rn = extract32(insn, 5, 5);
3080     rd = extract32(insn, 0, 5);
3081
3082     if (!sf && is_n) {
3083         unallocated_encoding(s);
3084         return;
3085     }
3086
3087     if (opc == 0x3) { /* ANDS */
3088         tcg_rd = cpu_reg(s, rd);
3089     } else {
3090         tcg_rd = cpu_reg_sp(s, rd);
3091     }
3092     tcg_rn = cpu_reg(s, rn);
3093
3094     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3095         /* some immediate field values are reserved */
3096         unallocated_encoding(s);
3097         return;
3098     }
3099
3100     if (!sf) {
3101         wmask &= 0xffffffff;
3102     }
3103
3104     switch (opc) {
3105     case 0x3: /* ANDS */
3106     case 0x0: /* AND */
3107         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3108         is_and = true;
3109         break;
3110     case 0x1: /* ORR */
3111         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3112         break;
3113     case 0x2: /* EOR */
3114         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3115         break;
3116     default:
3117         assert(FALSE); /* must handle all above */
3118         break;
3119     }
3120
3121     if (!sf && !is_and) {
3122         /* zero extend final result; we know we can skip this for AND
3123          * since the immediate had the high 32 bits clear.
3124          */
3125         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3126     }
3127
3128     if (opc == 3) { /* ANDS */
3129         gen_logic_CC(sf, tcg_rd);
3130     }
3131 }
3132
3133 /*
3134  * C3.4.5 Move wide (immediate)
3135  *
3136  *  31 30 29 28         23 22 21 20             5 4    0
3137  * +--+-----+-------------+-----+----------------+------+
3138  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3139  * +--+-----+-------------+-----+----------------+------+
3140  *
3141  * sf: 0 -> 32 bit, 1 -> 64 bit
3142  * opc: 00 -> N, 10 -> Z, 11 -> K
3143  * hw: shift/16 (0,16, and sf only 32, 48)
3144  */
3145 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3146 {
3147     int rd = extract32(insn, 0, 5);
3148     uint64_t imm = extract32(insn, 5, 16);
3149     int sf = extract32(insn, 31, 1);
3150     int opc = extract32(insn, 29, 2);
3151     int pos = extract32(insn, 21, 2) << 4;
3152     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3153     TCGv_i64 tcg_imm;
3154
3155     if (!sf && (pos >= 32)) {
3156         unallocated_encoding(s);
3157         return;
3158     }
3159
3160     switch (opc) {
3161     case 0: /* MOVN */
3162     case 2: /* MOVZ */
3163         imm <<= pos;
3164         if (opc == 0) {
3165             imm = ~imm;
3166         }
3167         if (!sf) {
3168             imm &= 0xffffffffu;
3169         }
3170         tcg_gen_movi_i64(tcg_rd, imm);
3171         break;
3172     case 3: /* MOVK */
3173         tcg_imm = tcg_const_i64(imm);
3174         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3175         tcg_temp_free_i64(tcg_imm);
3176         if (!sf) {
3177             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3178         }
3179         break;
3180     default:
3181         unallocated_encoding(s);
3182         break;
3183     }
3184 }
3185
3186 /* C3.4.2 Bitfield
3187  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3188  * +----+-----+-------------+---+------+------+------+------+
3189  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3190  * +----+-----+-------------+---+------+------+------+------+
3191  */
3192 static void disas_bitfield(DisasContext *s, uint32_t insn)
3193 {
3194     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3195     TCGv_i64 tcg_rd, tcg_tmp;
3196
3197     sf = extract32(insn, 31, 1);
3198     opc = extract32(insn, 29, 2);
3199     n = extract32(insn, 22, 1);
3200     ri = extract32(insn, 16, 6);
3201     si = extract32(insn, 10, 6);
3202     rn = extract32(insn, 5, 5);
3203     rd = extract32(insn, 0, 5);
3204     bitsize = sf ? 64 : 32;
3205
3206     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3207         unallocated_encoding(s);
3208         return;
3209     }
3210
3211     tcg_rd = cpu_reg(s, rd);
3212
3213     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3214        to be smaller than bitsize, we'll never reference data outside the
3215        low 32-bits anyway.  */
3216     tcg_tmp = read_cpu_reg(s, rn, 1);
3217
3218     /* Recognize the common aliases.  */
3219     if (opc == 0) { /* SBFM */
3220         if (ri == 0) {
3221             if (si == 7) { /* SXTB */
3222                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3223                 goto done;
3224             } else if (si == 15) { /* SXTH */
3225                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3226                 goto done;
3227             } else if (si == 31) { /* SXTW */
3228                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3229                 goto done;
3230             }
3231         }
3232         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3233             if (si == 31) {
3234                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3235             }
3236             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3237             goto done;
3238         }
3239     } else if (opc == 2) { /* UBFM */
3240         if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3241             tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3242             return;
3243         }
3244         if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3245             if (si == 31) {
3246                 tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3247             }
3248             tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3249             return;
3250         }
3251         if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3252             int shift = bitsize - 1 - si;
3253             tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3254             goto done;
3255         }
3256     }
3257
3258     if (opc != 1) { /* SBFM or UBFM */
3259         tcg_gen_movi_i64(tcg_rd, 0);
3260     }
3261
3262     /* do the bit move operation */
3263     if (si >= ri) {
3264         /* Wd<s-r:0> = Wn<s:r> */
3265         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3266         pos = 0;
3267         len = (si - ri) + 1;
3268     } else {
3269         /* Wd<32+s-r,32-r> = Wn<s:0> */
3270         pos = bitsize - ri;
3271         len = si + 1;
3272     }
3273
3274     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3275
3276     if (opc == 0) { /* SBFM - sign extend the destination field */
3277         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3278         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3279     }
3280
3281  done:
3282     if (!sf) { /* zero extend final result */
3283         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3284     }
3285 }
3286
3287 /* C3.4.3 Extract
3288  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3289  * +----+------+-------------+---+----+------+--------+------+------+
3290  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3291  * +----+------+-------------+---+----+------+--------+------+------+
3292  */
3293 static void disas_extract(DisasContext *s, uint32_t insn)
3294 {
3295     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3296
3297     sf = extract32(insn, 31, 1);
3298     n = extract32(insn, 22, 1);
3299     rm = extract32(insn, 16, 5);
3300     imm = extract32(insn, 10, 6);
3301     rn = extract32(insn, 5, 5);
3302     rd = extract32(insn, 0, 5);
3303     op21 = extract32(insn, 29, 2);
3304     op0 = extract32(insn, 21, 1);
3305     bitsize = sf ? 64 : 32;
3306
3307     if (sf != n || op21 || op0 || imm >= bitsize) {
3308         unallocated_encoding(s);
3309     } else {
3310         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3311
3312         tcg_rd = cpu_reg(s, rd);
3313
3314         if (unlikely(imm == 0)) {
3315             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3316              * so an extract from bit 0 is a special case.
3317              */
3318             if (sf) {
3319                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3320             } else {
3321                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3322             }
3323         } else if (rm == rn) { /* ROR */
3324             tcg_rm = cpu_reg(s, rm);
3325             if (sf) {
3326                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3327             } else {
3328                 TCGv_i32 tmp = tcg_temp_new_i32();
3329                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3330                 tcg_gen_rotri_i32(tmp, tmp, imm);
3331                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3332                 tcg_temp_free_i32(tmp);
3333             }
3334         } else {
3335             tcg_rm = read_cpu_reg(s, rm, sf);
3336             tcg_rn = read_cpu_reg(s, rn, sf);
3337             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3338             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3339             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3340             if (!sf) {
3341                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3342             }
3343         }
3344     }
3345 }
3346
3347 /* C3.4 Data processing - immediate */
3348 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3349 {
3350     switch (extract32(insn, 23, 6)) {
3351     case 0x20: case 0x21: /* PC-rel. addressing */
3352         disas_pc_rel_adr(s, insn);
3353         break;
3354     case 0x22: case 0x23: /* Add/subtract (immediate) */
3355         disas_add_sub_imm(s, insn);
3356         break;
3357     case 0x24: /* Logical (immediate) */
3358         disas_logic_imm(s, insn);
3359         break;
3360     case 0x25: /* Move wide (immediate) */
3361         disas_movw_imm(s, insn);
3362         break;
3363     case 0x26: /* Bitfield */
3364         disas_bitfield(s, insn);
3365         break;
3366     case 0x27: /* Extract */
3367         disas_extract(s, insn);
3368         break;
3369     default:
3370         unallocated_encoding(s);
3371         break;
3372     }
3373 }
3374
3375 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3376  * Note that it is the caller's responsibility to ensure that the
3377  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3378  * mandated semantics for out of range shifts.
3379  */
3380 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3381                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3382 {
3383     switch (shift_type) {
3384     case A64_SHIFT_TYPE_LSL:
3385         tcg_gen_shl_i64(dst, src, shift_amount);
3386         break;
3387     case A64_SHIFT_TYPE_LSR:
3388         tcg_gen_shr_i64(dst, src, shift_amount);
3389         break;
3390     case A64_SHIFT_TYPE_ASR:
3391         if (!sf) {
3392             tcg_gen_ext32s_i64(dst, src);
3393         }
3394         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3395         break;
3396     case A64_SHIFT_TYPE_ROR:
3397         if (sf) {
3398             tcg_gen_rotr_i64(dst, src, shift_amount);
3399         } else {
3400             TCGv_i32 t0, t1;
3401             t0 = tcg_temp_new_i32();
3402             t1 = tcg_temp_new_i32();
3403             tcg_gen_extrl_i64_i32(t0, src);
3404             tcg_gen_extrl_i64_i32(t1, shift_amount);
3405             tcg_gen_rotr_i32(t0, t0, t1);
3406             tcg_gen_extu_i32_i64(dst, t0);
3407             tcg_temp_free_i32(t0);
3408             tcg_temp_free_i32(t1);
3409         }
3410         break;
3411     default:
3412         assert(FALSE); /* all shift types should be handled */
3413         break;
3414     }
3415
3416     if (!sf) { /* zero extend final result */
3417         tcg_gen_ext32u_i64(dst, dst);
3418     }
3419 }
3420
3421 /* Shift a TCGv src by immediate, put result in dst.
3422  * The shift amount must be in range (this should always be true as the
3423  * relevant instructions will UNDEF on bad shift immediates).
3424  */
3425 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3426                           enum a64_shift_type shift_type, unsigned int shift_i)
3427 {
3428     assert(shift_i < (sf ? 64 : 32));
3429
3430     if (shift_i == 0) {
3431         tcg_gen_mov_i64(dst, src);
3432     } else {
3433         TCGv_i64 shift_const;
3434
3435         shift_const = tcg_const_i64(shift_i);
3436         shift_reg(dst, src, sf, shift_type, shift_const);
3437         tcg_temp_free_i64(shift_const);
3438     }
3439 }
3440
3441 /* C3.5.10 Logical (shifted register)
3442  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3443  * +----+-----+-----------+-------+---+------+--------+------+------+
3444  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3445  * +----+-----+-----------+-------+---+------+--------+------+------+
3446  */
3447 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3448 {
3449     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3450     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3451
3452     sf = extract32(insn, 31, 1);
3453     opc = extract32(insn, 29, 2);
3454     shift_type = extract32(insn, 22, 2);
3455     invert = extract32(insn, 21, 1);
3456     rm = extract32(insn, 16, 5);
3457     shift_amount = extract32(insn, 10, 6);
3458     rn = extract32(insn, 5, 5);
3459     rd = extract32(insn, 0, 5);
3460
3461     if (!sf && (shift_amount & (1 << 5))) {
3462         unallocated_encoding(s);
3463         return;
3464     }
3465
3466     tcg_rd = cpu_reg(s, rd);
3467
3468     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3469         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3470          * register-register MOV and MVN, so it is worth special casing.
3471          */
3472         tcg_rm = cpu_reg(s, rm);
3473         if (invert) {
3474             tcg_gen_not_i64(tcg_rd, tcg_rm);
3475             if (!sf) {
3476                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3477             }
3478         } else {
3479             if (sf) {
3480                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3481             } else {
3482                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3483             }
3484         }
3485         return;
3486     }
3487
3488     tcg_rm = read_cpu_reg(s, rm, sf);
3489
3490     if (shift_amount) {
3491         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3492     }
3493
3494     tcg_rn = cpu_reg(s, rn);
3495
3496     switch (opc | (invert << 2)) {
3497     case 0: /* AND */
3498     case 3: /* ANDS */
3499         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3500         break;
3501     case 1: /* ORR */
3502         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3503         break;
3504     case 2: /* EOR */
3505         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3506         break;
3507     case 4: /* BIC */
3508     case 7: /* BICS */
3509         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3510         break;
3511     case 5: /* ORN */
3512         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3513         break;
3514     case 6: /* EON */
3515         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3516         break;
3517     default:
3518         assert(FALSE);
3519         break;
3520     }
3521
3522     if (!sf) {
3523         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3524     }
3525
3526     if (opc == 3) {
3527         gen_logic_CC(sf, tcg_rd);
3528     }
3529 }
3530
3531 /*
3532  * C3.5.1 Add/subtract (extended register)
3533  *
3534  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3535  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3536  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3537  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3538  *
3539  *  sf: 0 -> 32bit, 1 -> 64bit
3540  *  op: 0 -> add  , 1 -> sub
3541  *   S: 1 -> set flags
3542  * opt: 00
3543  * option: extension type (see DecodeRegExtend)
3544  * imm3: optional shift to Rm
3545  *
3546  * Rd = Rn + LSL(extend(Rm), amount)
3547  */
3548 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3549 {
3550     int rd = extract32(insn, 0, 5);
3551     int rn = extract32(insn, 5, 5);
3552     int imm3 = extract32(insn, 10, 3);
3553     int option = extract32(insn, 13, 3);
3554     int rm = extract32(insn, 16, 5);
3555     bool setflags = extract32(insn, 29, 1);
3556     bool sub_op = extract32(insn, 30, 1);
3557     bool sf = extract32(insn, 31, 1);
3558
3559     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3560     TCGv_i64 tcg_rd;
3561     TCGv_i64 tcg_result;
3562
3563     if (imm3 > 4) {
3564         unallocated_encoding(s);
3565         return;
3566     }
3567
3568     /* non-flag setting ops may use SP */
3569     if (!setflags) {
3570         tcg_rd = cpu_reg_sp(s, rd);
3571     } else {
3572         tcg_rd = cpu_reg(s, rd);
3573     }
3574     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3575
3576     tcg_rm = read_cpu_reg(s, rm, sf);
3577     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3578
3579     tcg_result = tcg_temp_new_i64();
3580
3581     if (!setflags) {
3582         if (sub_op) {
3583             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3584         } else {
3585             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3586         }
3587     } else {
3588         if (sub_op) {
3589             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3590         } else {
3591             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3592         }
3593     }
3594
3595     if (sf) {
3596         tcg_gen_mov_i64(tcg_rd, tcg_result);
3597     } else {
3598         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3599     }
3600
3601     tcg_temp_free_i64(tcg_result);
3602 }
3603
3604 /*
3605  * C3.5.2 Add/subtract (shifted register)
3606  *
3607  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3608  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3609  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3610  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3611  *
3612  *    sf: 0 -> 32bit, 1 -> 64bit
3613  *    op: 0 -> add  , 1 -> sub
3614  *     S: 1 -> set flags
3615  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3616  *  imm6: Shift amount to apply to Rm before the add/sub
3617  */
3618 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3619 {
3620     int rd = extract32(insn, 0, 5);
3621     int rn = extract32(insn, 5, 5);
3622     int imm6 = extract32(insn, 10, 6);
3623     int rm = extract32(insn, 16, 5);
3624     int shift_type = extract32(insn, 22, 2);
3625     bool setflags = extract32(insn, 29, 1);
3626     bool sub_op = extract32(insn, 30, 1);
3627     bool sf = extract32(insn, 31, 1);
3628
3629     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3630     TCGv_i64 tcg_rn, tcg_rm;
3631     TCGv_i64 tcg_result;
3632
3633     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3634         unallocated_encoding(s);
3635         return;
3636     }
3637
3638     tcg_rn = read_cpu_reg(s, rn, sf);
3639     tcg_rm = read_cpu_reg(s, rm, sf);
3640
3641     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3642
3643     tcg_result = tcg_temp_new_i64();
3644
3645     if (!setflags) {
3646         if (sub_op) {
3647             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3648         } else {
3649             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3650         }
3651     } else {
3652         if (sub_op) {
3653             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3654         } else {
3655             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3656         }
3657     }
3658
3659     if (sf) {
3660         tcg_gen_mov_i64(tcg_rd, tcg_result);
3661     } else {
3662         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3663     }
3664
3665     tcg_temp_free_i64(tcg_result);
3666 }
3667
3668 /* C3.5.9 Data-processing (3 source)
3669
3670    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3671   +--+------+-----------+------+------+----+------+------+------+
3672   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3673   +--+------+-----------+------+------+----+------+------+------+
3674
3675  */
3676 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3677 {
3678     int rd = extract32(insn, 0, 5);
3679     int rn = extract32(insn, 5, 5);
3680     int ra = extract32(insn, 10, 5);
3681     int rm = extract32(insn, 16, 5);
3682     int op_id = (extract32(insn, 29, 3) << 4) |
3683         (extract32(insn, 21, 3) << 1) |
3684         extract32(insn, 15, 1);
3685     bool sf = extract32(insn, 31, 1);
3686     bool is_sub = extract32(op_id, 0, 1);
3687     bool is_high = extract32(op_id, 2, 1);
3688     bool is_signed = false;
3689     TCGv_i64 tcg_op1;
3690     TCGv_i64 tcg_op2;
3691     TCGv_i64 tcg_tmp;
3692
3693     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3694     switch (op_id) {
3695     case 0x42: /* SMADDL */
3696     case 0x43: /* SMSUBL */
3697     case 0x44: /* SMULH */
3698         is_signed = true;
3699         break;
3700     case 0x0: /* MADD (32bit) */
3701     case 0x1: /* MSUB (32bit) */
3702     case 0x40: /* MADD (64bit) */
3703     case 0x41: /* MSUB (64bit) */
3704     case 0x4a: /* UMADDL */
3705     case 0x4b: /* UMSUBL */
3706     case 0x4c: /* UMULH */
3707         break;
3708     default:
3709         unallocated_encoding(s);
3710         return;
3711     }
3712
3713     if (is_high) {
3714         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3715         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3716         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3717         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3718
3719         if (is_signed) {
3720             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3721         } else {
3722             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3723         }
3724
3725         tcg_temp_free_i64(low_bits);
3726         return;
3727     }
3728
3729     tcg_op1 = tcg_temp_new_i64();
3730     tcg_op2 = tcg_temp_new_i64();
3731     tcg_tmp = tcg_temp_new_i64();
3732
3733     if (op_id < 0x42) {
3734         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3735         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3736     } else {
3737         if (is_signed) {
3738             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3739             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3740         } else {
3741             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3742             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3743         }
3744     }
3745
3746     if (ra == 31 && !is_sub) {
3747         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3748         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3749     } else {
3750         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3751         if (is_sub) {
3752             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3753         } else {
3754             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3755         }
3756     }
3757
3758     if (!sf) {
3759         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3760     }
3761
3762     tcg_temp_free_i64(tcg_op1);
3763     tcg_temp_free_i64(tcg_op2);
3764     tcg_temp_free_i64(tcg_tmp);
3765 }
3766
3767 /* C3.5.3 - Add/subtract (with carry)
3768  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3769  * +--+--+--+------------------------+------+---------+------+-----+
3770  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3771  * +--+--+--+------------------------+------+---------+------+-----+
3772  *                                            [000000]
3773  */
3774
3775 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3776 {
3777     unsigned int sf, op, setflags, rm, rn, rd;
3778     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3779
3780     if (extract32(insn, 10, 6) != 0) {
3781         unallocated_encoding(s);
3782         return;
3783     }
3784
3785     sf = extract32(insn, 31, 1);
3786     op = extract32(insn, 30, 1);
3787     setflags = extract32(insn, 29, 1);
3788     rm = extract32(insn, 16, 5);
3789     rn = extract32(insn, 5, 5);
3790     rd = extract32(insn, 0, 5);
3791
3792     tcg_rd = cpu_reg(s, rd);
3793     tcg_rn = cpu_reg(s, rn);
3794
3795     if (op) {
3796         tcg_y = new_tmp_a64(s);
3797         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3798     } else {
3799         tcg_y = cpu_reg(s, rm);
3800     }
3801
3802     if (setflags) {
3803         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3804     } else {
3805         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3806     }
3807 }
3808
3809 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3810  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3811  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3812  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3813  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3814  *        [1]                             y                [0]       [0]
3815  */
3816 static void disas_cc(DisasContext *s, uint32_t insn)
3817 {
3818     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3819     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3820     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3821     DisasCompare c;
3822
3823     if (!extract32(insn, 29, 1)) {
3824         unallocated_encoding(s);
3825         return;
3826     }
3827     if (insn & (1 << 10 | 1 << 4)) {
3828         unallocated_encoding(s);
3829         return;
3830     }
3831     sf = extract32(insn, 31, 1);
3832     op = extract32(insn, 30, 1);
3833     is_imm = extract32(insn, 11, 1);
3834     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3835     cond = extract32(insn, 12, 4);
3836     rn = extract32(insn, 5, 5);
3837     nzcv = extract32(insn, 0, 4);
3838
3839     /* Set T0 = !COND.  */
3840     tcg_t0 = tcg_temp_new_i32();
3841     arm_test_cc(&c, cond);
3842     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3843     arm_free_cc(&c);
3844
3845     /* Load the arguments for the new comparison.  */
3846     if (is_imm) {
3847         tcg_y = new_tmp_a64(s);
3848         tcg_gen_movi_i64(tcg_y, y);
3849     } else {
3850         tcg_y = cpu_reg(s, y);
3851     }
3852     tcg_rn = cpu_reg(s, rn);
3853
3854     /* Set the flags for the new comparison.  */
3855     tcg_tmp = tcg_temp_new_i64();
3856     if (op) {
3857         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3858     } else {
3859         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3860     }
3861     tcg_temp_free_i64(tcg_tmp);
3862
3863     /* If COND was false, force the flags to #nzcv.  Compute two masks
3864      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3865      * For tcg hosts that support ANDC, we can make do with just T1.
3866      * In either case, allow the tcg optimizer to delete any unused mask.
3867      */
3868     tcg_t1 = tcg_temp_new_i32();
3869     tcg_t2 = tcg_temp_new_i32();
3870     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3871     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3872
3873     if (nzcv & 8) { /* N */
3874         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3875     } else {
3876         if (TCG_TARGET_HAS_andc_i32) {
3877             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3878         } else {
3879             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3880         }
3881     }
3882     if (nzcv & 4) { /* Z */
3883         if (TCG_TARGET_HAS_andc_i32) {
3884             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3885         } else {
3886             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3887         }
3888     } else {
3889         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3890     }
3891     if (nzcv & 2) { /* C */
3892         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3893     } else {
3894         if (TCG_TARGET_HAS_andc_i32) {
3895             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3896         } else {
3897             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3898         }
3899     }
3900     if (nzcv & 1) { /* V */
3901         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3902     } else {
3903         if (TCG_TARGET_HAS_andc_i32) {
3904             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3905         } else {
3906             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3907         }
3908     }
3909     tcg_temp_free_i32(tcg_t0);
3910     tcg_temp_free_i32(tcg_t1);
3911     tcg_temp_free_i32(tcg_t2);
3912 }
3913
3914 /* C3.5.6 Conditional select
3915  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3916  * +----+----+---+-----------------+------+------+-----+------+------+
3917  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3918  * +----+----+---+-----------------+------+------+-----+------+------+
3919  */
3920 static void disas_cond_select(DisasContext *s, uint32_t insn)
3921 {
3922     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3923     TCGv_i64 tcg_rd, zero;
3924     DisasCompare64 c;
3925
3926     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3927         /* S == 1 or op2<1> == 1 */
3928         unallocated_encoding(s);
3929         return;
3930     }
3931     sf = extract32(insn, 31, 1);
3932     else_inv = extract32(insn, 30, 1);
3933     rm = extract32(insn, 16, 5);
3934     cond = extract32(insn, 12, 4);
3935     else_inc = extract32(insn, 10, 1);
3936     rn = extract32(insn, 5, 5);
3937     rd = extract32(insn, 0, 5);
3938
3939     tcg_rd = cpu_reg(s, rd);
3940
3941     a64_test_cc(&c, cond);
3942     zero = tcg_const_i64(0);
3943
3944     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3945         /* CSET & CSETM.  */
3946         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3947         if (else_inv) {
3948             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3949         }
3950     } else {
3951         TCGv_i64 t_true = cpu_reg(s, rn);
3952         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3953         if (else_inv && else_inc) {
3954             tcg_gen_neg_i64(t_false, t_false);
3955         } else if (else_inv) {
3956             tcg_gen_not_i64(t_false, t_false);
3957         } else if (else_inc) {
3958             tcg_gen_addi_i64(t_false, t_false, 1);
3959         }
3960         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3961     }
3962
3963     tcg_temp_free_i64(zero);
3964     a64_free_cc(&c);
3965
3966     if (!sf) {
3967         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3968     }
3969 }
3970
3971 static void handle_clz(DisasContext *s, unsigned int sf,
3972                        unsigned int rn, unsigned int rd)
3973 {
3974     TCGv_i64 tcg_rd, tcg_rn;
3975     tcg_rd = cpu_reg(s, rd);
3976     tcg_rn = cpu_reg(s, rn);
3977
3978     if (sf) {
3979         gen_helper_clz64(tcg_rd, tcg_rn);
3980     } else {
3981         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3982         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3983         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3984         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3985         tcg_temp_free_i32(tcg_tmp32);
3986     }
3987 }
3988
3989 static void handle_cls(DisasContext *s, unsigned int sf,
3990                        unsigned int rn, unsigned int rd)
3991 {
3992     TCGv_i64 tcg_rd, tcg_rn;
3993     tcg_rd = cpu_reg(s, rd);
3994     tcg_rn = cpu_reg(s, rn);
3995
3996     if (sf) {
3997         gen_helper_cls64(tcg_rd, tcg_rn);
3998     } else {
3999         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4000         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4001         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
4002         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4003         tcg_temp_free_i32(tcg_tmp32);
4004     }
4005 }
4006
4007 static void handle_rbit(DisasContext *s, unsigned int sf,
4008                         unsigned int rn, unsigned int rd)
4009 {
4010     TCGv_i64 tcg_rd, tcg_rn;
4011     tcg_rd = cpu_reg(s, rd);
4012     tcg_rn = cpu_reg(s, rn);
4013
4014     if (sf) {
4015         gen_helper_rbit64(tcg_rd, tcg_rn);
4016     } else {
4017         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4018         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4019         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4020         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4021         tcg_temp_free_i32(tcg_tmp32);
4022     }
4023 }
4024
4025 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
4026 static void handle_rev64(DisasContext *s, unsigned int sf,
4027                          unsigned int rn, unsigned int rd)
4028 {
4029     if (!sf) {
4030         unallocated_encoding(s);
4031         return;
4032     }
4033     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4034 }
4035
4036 /* C5.6.149 REV with sf==0, opcode==2
4037  * C5.6.151 REV32 (sf==1, opcode==2)
4038  */
4039 static void handle_rev32(DisasContext *s, unsigned int sf,
4040                          unsigned int rn, unsigned int rd)
4041 {
4042     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4043
4044     if (sf) {
4045         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4046         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4047
4048         /* bswap32_i64 requires zero high word */
4049         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4050         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4051         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4052         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4053         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4054
4055         tcg_temp_free_i64(tcg_tmp);
4056     } else {
4057         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4058         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4059     }
4060 }
4061
4062 /* C5.6.150 REV16 (opcode==1) */
4063 static void handle_rev16(DisasContext *s, unsigned int sf,
4064                          unsigned int rn, unsigned int rd)
4065 {
4066     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4067     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4068     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4069
4070     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
4071     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
4072
4073     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
4074     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
4075     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4076     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
4077
4078     if (sf) {
4079         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4080         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
4081         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4082         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
4083
4084         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
4085         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4086         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
4087     }
4088
4089     tcg_temp_free_i64(tcg_tmp);
4090 }
4091
4092 /* C3.5.7 Data-processing (1 source)
4093  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4094  * +----+---+---+-----------------+---------+--------+------+------+
4095  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4096  * +----+---+---+-----------------+---------+--------+------+------+
4097  */
4098 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4099 {
4100     unsigned int sf, opcode, rn, rd;
4101
4102     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4103         unallocated_encoding(s);
4104         return;
4105     }
4106
4107     sf = extract32(insn, 31, 1);
4108     opcode = extract32(insn, 10, 6);
4109     rn = extract32(insn, 5, 5);
4110     rd = extract32(insn, 0, 5);
4111
4112     switch (opcode) {
4113     case 0: /* RBIT */
4114         handle_rbit(s, sf, rn, rd);
4115         break;
4116     case 1: /* REV16 */
4117         handle_rev16(s, sf, rn, rd);
4118         break;
4119     case 2: /* REV32 */
4120         handle_rev32(s, sf, rn, rd);
4121         break;
4122     case 3: /* REV64 */
4123         handle_rev64(s, sf, rn, rd);
4124         break;
4125     case 4: /* CLZ */
4126         handle_clz(s, sf, rn, rd);
4127         break;
4128     case 5: /* CLS */
4129         handle_cls(s, sf, rn, rd);
4130         break;
4131     }
4132 }
4133
4134 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4135                        unsigned int rm, unsigned int rn, unsigned int rd)
4136 {
4137     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4138     tcg_rd = cpu_reg(s, rd);
4139
4140     if (!sf && is_signed) {
4141         tcg_n = new_tmp_a64(s);
4142         tcg_m = new_tmp_a64(s);
4143         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4144         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4145     } else {
4146         tcg_n = read_cpu_reg(s, rn, sf);
4147         tcg_m = read_cpu_reg(s, rm, sf);
4148     }
4149
4150     if (is_signed) {
4151         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4152     } else {
4153         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4154     }
4155
4156     if (!sf) { /* zero extend final result */
4157         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4158     }
4159 }
4160
4161 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
4162 static void handle_shift_reg(DisasContext *s,
4163                              enum a64_shift_type shift_type, unsigned int sf,
4164                              unsigned int rm, unsigned int rn, unsigned int rd)
4165 {
4166     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4167     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4168     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4169
4170     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4171     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4172     tcg_temp_free_i64(tcg_shift);
4173 }
4174
4175 /* CRC32[BHWX], CRC32C[BHWX] */
4176 static void handle_crc32(DisasContext *s,
4177                          unsigned int sf, unsigned int sz, bool crc32c,
4178                          unsigned int rm, unsigned int rn, unsigned int rd)
4179 {
4180     TCGv_i64 tcg_acc, tcg_val;
4181     TCGv_i32 tcg_bytes;
4182
4183     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4184         || (sf == 1 && sz != 3)
4185         || (sf == 0 && sz == 3)) {
4186         unallocated_encoding(s);
4187         return;
4188     }
4189
4190     if (sz == 3) {
4191         tcg_val = cpu_reg(s, rm);
4192     } else {
4193         uint64_t mask;
4194         switch (sz) {
4195         case 0:
4196             mask = 0xFF;
4197             break;
4198         case 1:
4199             mask = 0xFFFF;
4200             break;
4201         case 2:
4202             mask = 0xFFFFFFFF;
4203             break;
4204         default:
4205             g_assert_not_reached();
4206         }
4207         tcg_val = new_tmp_a64(s);
4208         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4209     }
4210
4211     tcg_acc = cpu_reg(s, rn);
4212     tcg_bytes = tcg_const_i32(1 << sz);
4213
4214     if (crc32c) {
4215         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4216     } else {
4217         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4218     }
4219
4220     tcg_temp_free_i32(tcg_bytes);
4221 }
4222
4223 /* C3.5.8 Data-processing (2 source)
4224  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4225  * +----+---+---+-----------------+------+--------+------+------+
4226  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4227  * +----+---+---+-----------------+------+--------+------+------+
4228  */
4229 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4230 {
4231     unsigned int sf, rm, opcode, rn, rd;
4232     sf = extract32(insn, 31, 1);
4233     rm = extract32(insn, 16, 5);
4234     opcode = extract32(insn, 10, 6);
4235     rn = extract32(insn, 5, 5);
4236     rd = extract32(insn, 0, 5);
4237
4238     if (extract32(insn, 29, 1)) {
4239         unallocated_encoding(s);
4240         return;
4241     }
4242
4243     switch (opcode) {
4244     case 2: /* UDIV */
4245         handle_div(s, false, sf, rm, rn, rd);
4246         break;
4247     case 3: /* SDIV */
4248         handle_div(s, true, sf, rm, rn, rd);
4249         break;
4250     case 8: /* LSLV */
4251         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4252         break;
4253     case 9: /* LSRV */
4254         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4255         break;
4256     case 10: /* ASRV */
4257         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4258         break;
4259     case 11: /* RORV */
4260         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4261         break;
4262     case 16:
4263     case 17:
4264     case 18:
4265     case 19:
4266     case 20:
4267     case 21:
4268     case 22:
4269     case 23: /* CRC32 */
4270     {
4271         int sz = extract32(opcode, 0, 2);
4272         bool crc32c = extract32(opcode, 2, 1);
4273         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4274         break;
4275     }
4276     default:
4277         unallocated_encoding(s);
4278         break;
4279     }
4280 }
4281
4282 /* C3.5 Data processing - register */
4283 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4284 {
4285     switch (extract32(insn, 24, 5)) {
4286     case 0x0a: /* Logical (shifted register) */
4287         disas_logic_reg(s, insn);
4288         break;
4289     case 0x0b: /* Add/subtract */
4290         if (insn & (1 << 21)) { /* (extended register) */
4291             disas_add_sub_ext_reg(s, insn);
4292         } else {
4293             disas_add_sub_reg(s, insn);
4294         }
4295         break;
4296     case 0x1b: /* Data-processing (3 source) */
4297         disas_data_proc_3src(s, insn);
4298         break;
4299     case 0x1a:
4300         switch (extract32(insn, 21, 3)) {
4301         case 0x0: /* Add/subtract (with carry) */
4302             disas_adc_sbc(s, insn);
4303             break;
4304         case 0x2: /* Conditional compare */
4305             disas_cc(s, insn); /* both imm and reg forms */
4306             break;
4307         case 0x4: /* Conditional select */
4308             disas_cond_select(s, insn);
4309             break;
4310         case 0x6: /* Data-processing */
4311             if (insn & (1 << 30)) { /* (1 source) */
4312                 disas_data_proc_1src(s, insn);
4313             } else {            /* (2 source) */
4314                 disas_data_proc_2src(s, insn);
4315             }
4316             break;
4317         default:
4318             unallocated_encoding(s);
4319             break;
4320         }
4321         break;
4322     default:
4323         unallocated_encoding(s);
4324         break;
4325     }
4326 }
4327
4328 static void handle_fp_compare(DisasContext *s, bool is_double,
4329                               unsigned int rn, unsigned int rm,
4330                               bool cmp_with_zero, bool signal_all_nans)
4331 {
4332     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4333     TCGv_ptr fpst = get_fpstatus_ptr();
4334
4335     if (is_double) {
4336         TCGv_i64 tcg_vn, tcg_vm;
4337
4338         tcg_vn = read_fp_dreg(s, rn);
4339         if (cmp_with_zero) {
4340             tcg_vm = tcg_const_i64(0);
4341         } else {
4342             tcg_vm = read_fp_dreg(s, rm);
4343         }
4344         if (signal_all_nans) {
4345             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4346         } else {
4347             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4348         }
4349         tcg_temp_free_i64(tcg_vn);
4350         tcg_temp_free_i64(tcg_vm);
4351     } else {
4352         TCGv_i32 tcg_vn, tcg_vm;
4353
4354         tcg_vn = read_fp_sreg(s, rn);
4355         if (cmp_with_zero) {
4356             tcg_vm = tcg_const_i32(0);
4357         } else {
4358             tcg_vm = read_fp_sreg(s, rm);
4359         }
4360         if (signal_all_nans) {
4361             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4362         } else {
4363             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4364         }
4365         tcg_temp_free_i32(tcg_vn);
4366         tcg_temp_free_i32(tcg_vm);
4367     }
4368
4369     tcg_temp_free_ptr(fpst);
4370
4371     gen_set_nzcv(tcg_flags);
4372
4373     tcg_temp_free_i64(tcg_flags);
4374 }
4375
4376 /* C3.6.22 Floating point compare
4377  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4378  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4379  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4380  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4381  */
4382 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4383 {
4384     unsigned int mos, type, rm, op, rn, opc, op2r;
4385
4386     mos = extract32(insn, 29, 3);
4387     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4388     rm = extract32(insn, 16, 5);
4389     op = extract32(insn, 14, 2);
4390     rn = extract32(insn, 5, 5);
4391     opc = extract32(insn, 3, 2);
4392     op2r = extract32(insn, 0, 3);
4393
4394     if (mos || op || op2r || type > 1) {
4395         unallocated_encoding(s);
4396         return;
4397     }
4398
4399     if (!fp_access_check(s)) {
4400         return;
4401     }
4402
4403     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4404 }
4405
4406 /* C3.6.23 Floating point conditional compare
4407  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4408  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4409  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4410  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4411  */
4412 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4413 {
4414     unsigned int mos, type, rm, cond, rn, op, nzcv;
4415     TCGv_i64 tcg_flags;
4416     TCGLabel *label_continue = NULL;
4417
4418     mos = extract32(insn, 29, 3);
4419     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4420     rm = extract32(insn, 16, 5);
4421     cond = extract32(insn, 12, 4);
4422     rn = extract32(insn, 5, 5);
4423     op = extract32(insn, 4, 1);
4424     nzcv = extract32(insn, 0, 4);
4425
4426     if (mos || type > 1) {
4427         unallocated_encoding(s);
4428         return;
4429     }
4430
4431     if (!fp_access_check(s)) {
4432         return;
4433     }
4434
4435     if (cond < 0x0e) { /* not always */
4436         TCGLabel *label_match = gen_new_label();
4437         label_continue = gen_new_label();
4438         arm_gen_test_cc(cond, label_match);
4439         /* nomatch: */
4440         tcg_flags = tcg_const_i64(nzcv << 28);
4441         gen_set_nzcv(tcg_flags);
4442         tcg_temp_free_i64(tcg_flags);
4443         tcg_gen_br(label_continue);
4444         gen_set_label(label_match);
4445     }
4446
4447     handle_fp_compare(s, type, rn, rm, false, op);
4448
4449     if (cond < 0x0e) {
4450         gen_set_label(label_continue);
4451     }
4452 }
4453
4454 /* C3.6.24 Floating point conditional select
4455  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4456  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4457  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4458  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4459  */
4460 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4461 {
4462     unsigned int mos, type, rm, cond, rn, rd;
4463     TCGv_i64 t_true, t_false, t_zero;
4464     DisasCompare64 c;
4465
4466     mos = extract32(insn, 29, 3);
4467     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4468     rm = extract32(insn, 16, 5);
4469     cond = extract32(insn, 12, 4);
4470     rn = extract32(insn, 5, 5);
4471     rd = extract32(insn, 0, 5);
4472
4473     if (mos || type > 1) {
4474         unallocated_encoding(s);
4475         return;
4476     }
4477
4478     if (!fp_access_check(s)) {
4479         return;
4480     }
4481
4482     /* Zero extend sreg inputs to 64 bits now.  */
4483     t_true = tcg_temp_new_i64();
4484     t_false = tcg_temp_new_i64();
4485     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4486     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4487
4488     a64_test_cc(&c, cond);
4489     t_zero = tcg_const_i64(0);
4490     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4491     tcg_temp_free_i64(t_zero);
4492     tcg_temp_free_i64(t_false);
4493     a64_free_cc(&c);
4494
4495     /* Note that sregs write back zeros to the high bits,
4496        and we've already done the zero-extension.  */
4497     write_fp_dreg(s, rd, t_true);
4498     tcg_temp_free_i64(t_true);
4499 }
4500
4501 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4502 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4503 {
4504     TCGv_ptr fpst;
4505     TCGv_i32 tcg_op;
4506     TCGv_i32 tcg_res;
4507
4508     fpst = get_fpstatus_ptr();
4509     tcg_op = read_fp_sreg(s, rn);
4510     tcg_res = tcg_temp_new_i32();
4511
4512     switch (opcode) {
4513     case 0x0: /* FMOV */
4514         tcg_gen_mov_i32(tcg_res, tcg_op);
4515         break;
4516     case 0x1: /* FABS */
4517         gen_helper_vfp_abss(tcg_res, tcg_op);
4518         break;
4519     case 0x2: /* FNEG */
4520         gen_helper_vfp_negs(tcg_res, tcg_op);
4521         break;
4522     case 0x3: /* FSQRT */
4523         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4524         break;
4525     case 0x8: /* FRINTN */
4526     case 0x9: /* FRINTP */
4527     case 0xa: /* FRINTM */
4528     case 0xb: /* FRINTZ */
4529     case 0xc: /* FRINTA */
4530     {
4531         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4532
4533         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4534         gen_helper_rints(tcg_res, tcg_op, fpst);
4535
4536         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4537         tcg_temp_free_i32(tcg_rmode);
4538         break;
4539     }
4540     case 0xe: /* FRINTX */
4541         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4542         break;
4543     case 0xf: /* FRINTI */
4544         gen_helper_rints(tcg_res, tcg_op, fpst);
4545         break;
4546     default:
4547         abort();
4548     }
4549
4550     write_fp_sreg(s, rd, tcg_res);
4551
4552     tcg_temp_free_ptr(fpst);
4553     tcg_temp_free_i32(tcg_op);
4554     tcg_temp_free_i32(tcg_res);
4555 }
4556
4557 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4558 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4559 {
4560     TCGv_ptr fpst;
4561     TCGv_i64 tcg_op;
4562     TCGv_i64 tcg_res;
4563
4564     fpst = get_fpstatus_ptr();
4565     tcg_op = read_fp_dreg(s, rn);
4566     tcg_res = tcg_temp_new_i64();
4567
4568     switch (opcode) {
4569     case 0x0: /* FMOV */
4570         tcg_gen_mov_i64(tcg_res, tcg_op);
4571         break;
4572     case 0x1: /* FABS */
4573         gen_helper_vfp_absd(tcg_res, tcg_op);
4574         break;
4575     case 0x2: /* FNEG */
4576         gen_helper_vfp_negd(tcg_res, tcg_op);
4577         break;
4578     case 0x3: /* FSQRT */
4579         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4580         break;
4581     case 0x8: /* FRINTN */
4582     case 0x9: /* FRINTP */
4583     case 0xa: /* FRINTM */
4584     case 0xb: /* FRINTZ */
4585     case 0xc: /* FRINTA */
4586     {
4587         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4588
4589         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4590         gen_helper_rintd(tcg_res, tcg_op, fpst);
4591
4592         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4593         tcg_temp_free_i32(tcg_rmode);
4594         break;
4595     }
4596     case 0xe: /* FRINTX */
4597         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4598         break;
4599     case 0xf: /* FRINTI */
4600         gen_helper_rintd(tcg_res, tcg_op, fpst);
4601         break;
4602     default:
4603         abort();
4604     }
4605
4606     write_fp_dreg(s, rd, tcg_res);
4607
4608     tcg_temp_free_ptr(fpst);
4609     tcg_temp_free_i64(tcg_op);
4610     tcg_temp_free_i64(tcg_res);
4611 }
4612
4613 static void handle_fp_fcvt(DisasContext *s, int opcode,
4614                            int rd, int rn, int dtype, int ntype)
4615 {
4616     switch (ntype) {
4617     case 0x0:
4618     {
4619         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4620         if (dtype == 1) {
4621             /* Single to double */
4622             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4623             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4624             write_fp_dreg(s, rd, tcg_rd);
4625             tcg_temp_free_i64(tcg_rd);
4626         } else {
4627             /* Single to half */
4628             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4629             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4630             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4631             write_fp_sreg(s, rd, tcg_rd);
4632             tcg_temp_free_i32(tcg_rd);
4633         }
4634         tcg_temp_free_i32(tcg_rn);
4635         break;
4636     }
4637     case 0x1:
4638     {
4639         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4640         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4641         if (dtype == 0) {
4642             /* Double to single */
4643             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4644         } else {
4645             /* Double to half */
4646             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4647             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4648         }
4649         write_fp_sreg(s, rd, tcg_rd);
4650         tcg_temp_free_i32(tcg_rd);
4651         tcg_temp_free_i64(tcg_rn);
4652         break;
4653     }
4654     case 0x3:
4655     {
4656         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4657         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4658         if (dtype == 0) {
4659             /* Half to single */
4660             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4661             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4662             write_fp_sreg(s, rd, tcg_rd);
4663             tcg_temp_free_i32(tcg_rd);
4664         } else {
4665             /* Half to double */
4666             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4667             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4668             write_fp_dreg(s, rd, tcg_rd);
4669             tcg_temp_free_i64(tcg_rd);
4670         }
4671         tcg_temp_free_i32(tcg_rn);
4672         break;
4673     }
4674     default:
4675         abort();
4676     }
4677 }
4678
4679 /* C3.6.25 Floating point data-processing (1 source)
4680  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4681  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4682  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4683  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4684  */
4685 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4686 {
4687     int type = extract32(insn, 22, 2);
4688     int opcode = extract32(insn, 15, 6);
4689     int rn = extract32(insn, 5, 5);
4690     int rd = extract32(insn, 0, 5);
4691
4692     switch (opcode) {
4693     case 0x4: case 0x5: case 0x7:
4694     {
4695         /* FCVT between half, single and double precision */
4696         int dtype = extract32(opcode, 0, 2);
4697         if (type == 2 || dtype == type) {
4698             unallocated_encoding(s);
4699             return;
4700         }
4701         if (!fp_access_check(s)) {
4702             return;
4703         }
4704
4705         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4706         break;
4707     }
4708     case 0x0 ... 0x3:
4709     case 0x8 ... 0xc:
4710     case 0xe ... 0xf:
4711         /* 32-to-32 and 64-to-64 ops */
4712         switch (type) {
4713         case 0:
4714             if (!fp_access_check(s)) {
4715                 return;
4716             }
4717
4718             handle_fp_1src_single(s, opcode, rd, rn);
4719             break;
4720         case 1:
4721             if (!fp_access_check(s)) {
4722                 return;
4723             }
4724
4725             handle_fp_1src_double(s, opcode, rd, rn);
4726             break;
4727         default:
4728             unallocated_encoding(s);
4729         }
4730         break;
4731     default:
4732         unallocated_encoding(s);
4733         break;
4734     }
4735 }
4736
4737 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4738 static void handle_fp_2src_single(DisasContext *s, int opcode,
4739                                   int rd, int rn, int rm)
4740 {
4741     TCGv_i32 tcg_op1;
4742     TCGv_i32 tcg_op2;
4743     TCGv_i32 tcg_res;
4744     TCGv_ptr fpst;
4745
4746     tcg_res = tcg_temp_new_i32();
4747     fpst = get_fpstatus_ptr();
4748     tcg_op1 = read_fp_sreg(s, rn);
4749     tcg_op2 = read_fp_sreg(s, rm);
4750
4751     switch (opcode) {
4752     case 0x0: /* FMUL */
4753         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4754         break;
4755     case 0x1: /* FDIV */
4756         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4757         break;
4758     case 0x2: /* FADD */
4759         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4760         break;
4761     case 0x3: /* FSUB */
4762         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4763         break;
4764     case 0x4: /* FMAX */
4765         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4766         break;
4767     case 0x5: /* FMIN */
4768         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4769         break;
4770     case 0x6: /* FMAXNM */
4771         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4772         break;
4773     case 0x7: /* FMINNM */
4774         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4775         break;
4776     case 0x8: /* FNMUL */
4777         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4778         gen_helper_vfp_negs(tcg_res, tcg_res);
4779         break;
4780     }
4781
4782     write_fp_sreg(s, rd, tcg_res);
4783
4784     tcg_temp_free_ptr(fpst);
4785     tcg_temp_free_i32(tcg_op1);
4786     tcg_temp_free_i32(tcg_op2);
4787     tcg_temp_free_i32(tcg_res);
4788 }
4789
4790 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4791 static void handle_fp_2src_double(DisasContext *s, int opcode,
4792                                   int rd, int rn, int rm)
4793 {
4794     TCGv_i64 tcg_op1;
4795     TCGv_i64 tcg_op2;
4796     TCGv_i64 tcg_res;
4797     TCGv_ptr fpst;
4798
4799     tcg_res = tcg_temp_new_i64();
4800     fpst = get_fpstatus_ptr();
4801     tcg_op1 = read_fp_dreg(s, rn);
4802     tcg_op2 = read_fp_dreg(s, rm);
4803
4804     switch (opcode) {
4805     case 0x0: /* FMUL */
4806         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4807         break;
4808     case 0x1: /* FDIV */
4809         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4810         break;
4811     case 0x2: /* FADD */
4812         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4813         break;
4814     case 0x3: /* FSUB */
4815         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4816         break;
4817     case 0x4: /* FMAX */
4818         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4819         break;
4820     case 0x5: /* FMIN */
4821         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4822         break;
4823     case 0x6: /* FMAXNM */
4824         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4825         break;
4826     case 0x7: /* FMINNM */
4827         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4828         break;
4829     case 0x8: /* FNMUL */
4830         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4831         gen_helper_vfp_negd(tcg_res, tcg_res);
4832         break;
4833     }
4834
4835     write_fp_dreg(s, rd, tcg_res);
4836
4837     tcg_temp_free_ptr(fpst);
4838     tcg_temp_free_i64(tcg_op1);
4839     tcg_temp_free_i64(tcg_op2);
4840     tcg_temp_free_i64(tcg_res);
4841 }
4842
4843 /* C3.6.26 Floating point data-processing (2 source)
4844  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4845  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4846  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4847  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4848  */
4849 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4850 {
4851     int type = extract32(insn, 22, 2);
4852     int rd = extract32(insn, 0, 5);
4853     int rn = extract32(insn, 5, 5);
4854     int rm = extract32(insn, 16, 5);
4855     int opcode = extract32(insn, 12, 4);
4856
4857     if (opcode > 8) {
4858         unallocated_encoding(s);
4859         return;
4860     }
4861
4862     switch (type) {
4863     case 0:
4864         if (!fp_access_check(s)) {
4865             return;
4866         }
4867         handle_fp_2src_single(s, opcode, rd, rn, rm);
4868         break;
4869     case 1:
4870         if (!fp_access_check(s)) {
4871             return;
4872         }
4873         handle_fp_2src_double(s, opcode, rd, rn, rm);
4874         break;
4875     default:
4876         unallocated_encoding(s);
4877     }
4878 }
4879
4880 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4881 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4882                                   int rd, int rn, int rm, int ra)
4883 {
4884     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4885     TCGv_i32 tcg_res = tcg_temp_new_i32();
4886     TCGv_ptr fpst = get_fpstatus_ptr();
4887
4888     tcg_op1 = read_fp_sreg(s, rn);
4889     tcg_op2 = read_fp_sreg(s, rm);
4890     tcg_op3 = read_fp_sreg(s, ra);
4891
4892     /* These are fused multiply-add, and must be done as one
4893      * floating point operation with no rounding between the
4894      * multiplication and addition steps.
4895      * NB that doing the negations here as separate steps is
4896      * correct : an input NaN should come out with its sign bit
4897      * flipped if it is a negated-input.
4898      */
4899     if (o1 == true) {
4900         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4901     }
4902
4903     if (o0 != o1) {
4904         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4905     }
4906
4907     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4908
4909     write_fp_sreg(s, rd, tcg_res);
4910
4911     tcg_temp_free_ptr(fpst);
4912     tcg_temp_free_i32(tcg_op1);
4913     tcg_temp_free_i32(tcg_op2);
4914     tcg_temp_free_i32(tcg_op3);
4915     tcg_temp_free_i32(tcg_res);
4916 }
4917
4918 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4919 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4920                                   int rd, int rn, int rm, int ra)
4921 {
4922     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4923     TCGv_i64 tcg_res = tcg_temp_new_i64();
4924     TCGv_ptr fpst = get_fpstatus_ptr();
4925
4926     tcg_op1 = read_fp_dreg(s, rn);
4927     tcg_op2 = read_fp_dreg(s, rm);
4928     tcg_op3 = read_fp_dreg(s, ra);
4929
4930     /* These are fused multiply-add, and must be done as one
4931      * floating point operation with no rounding between the
4932      * multiplication and addition steps.
4933      * NB that doing the negations here as separate steps is
4934      * correct : an input NaN should come out with its sign bit
4935      * flipped if it is a negated-input.
4936      */
4937     if (o1 == true) {
4938         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4939     }
4940
4941     if (o0 != o1) {
4942         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4943     }
4944
4945     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4946
4947     write_fp_dreg(s, rd, tcg_res);
4948
4949     tcg_temp_free_ptr(fpst);
4950     tcg_temp_free_i64(tcg_op1);
4951     tcg_temp_free_i64(tcg_op2);
4952     tcg_temp_free_i64(tcg_op3);
4953     tcg_temp_free_i64(tcg_res);
4954 }
4955
4956 /* C3.6.27 Floating point data-processing (3 source)
4957  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4958  * +---+---+---+-----------+------+----+------+----+------+------+------+
4959  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4960  * +---+---+---+-----------+------+----+------+----+------+------+------+
4961  */
4962 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4963 {
4964     int type = extract32(insn, 22, 2);
4965     int rd = extract32(insn, 0, 5);
4966     int rn = extract32(insn, 5, 5);
4967     int ra = extract32(insn, 10, 5);
4968     int rm = extract32(insn, 16, 5);
4969     bool o0 = extract32(insn, 15, 1);
4970     bool o1 = extract32(insn, 21, 1);
4971
4972     switch (type) {
4973     case 0:
4974         if (!fp_access_check(s)) {
4975             return;
4976         }
4977         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4978         break;
4979     case 1:
4980         if (!fp_access_check(s)) {
4981             return;
4982         }
4983         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4984         break;
4985     default:
4986         unallocated_encoding(s);
4987     }
4988 }
4989
4990 /* C3.6.28 Floating point immediate
4991  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4992  * +---+---+---+-----------+------+---+------------+-------+------+------+
4993  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4994  * +---+---+---+-----------+------+---+------------+-------+------+------+
4995  */
4996 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4997 {
4998     int rd = extract32(insn, 0, 5);
4999     int imm8 = extract32(insn, 13, 8);
5000     int is_double = extract32(insn, 22, 2);
5001     uint64_t imm;
5002     TCGv_i64 tcg_res;
5003
5004     if (is_double > 1) {
5005         unallocated_encoding(s);
5006         return;
5007     }
5008
5009     if (!fp_access_check(s)) {
5010         return;
5011     }
5012
5013     /* The imm8 encodes the sign bit, enough bits to represent
5014      * an exponent in the range 01....1xx to 10....0xx,
5015      * and the most significant 4 bits of the mantissa; see
5016      * VFPExpandImm() in the v8 ARM ARM.
5017      */
5018     if (is_double) {
5019         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5020             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5021             extract32(imm8, 0, 6);
5022         imm <<= 48;
5023     } else {
5024         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5025             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5026             (extract32(imm8, 0, 6) << 3);
5027         imm <<= 16;
5028     }
5029
5030     tcg_res = tcg_const_i64(imm);
5031     write_fp_dreg(s, rd, tcg_res);
5032     tcg_temp_free_i64(tcg_res);
5033 }
5034
5035 /* Handle floating point <=> fixed point conversions. Note that we can
5036  * also deal with fp <=> integer conversions as a special case (scale == 64)
5037  * OPTME: consider handling that special case specially or at least skipping
5038  * the call to scalbn in the helpers for zero shifts.
5039  */
5040 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5041                            bool itof, int rmode, int scale, int sf, int type)
5042 {
5043     bool is_signed = !(opcode & 1);
5044     bool is_double = type;
5045     TCGv_ptr tcg_fpstatus;
5046     TCGv_i32 tcg_shift;
5047
5048     tcg_fpstatus = get_fpstatus_ptr();
5049
5050     tcg_shift = tcg_const_i32(64 - scale);
5051
5052     if (itof) {
5053         TCGv_i64 tcg_int = cpu_reg(s, rn);
5054         if (!sf) {
5055             TCGv_i64 tcg_extend = new_tmp_a64(s);
5056
5057             if (is_signed) {
5058                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5059             } else {
5060                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5061             }
5062
5063             tcg_int = tcg_extend;
5064         }
5065
5066         if (is_double) {
5067             TCGv_i64 tcg_double = tcg_temp_new_i64();
5068             if (is_signed) {
5069                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
5070                                      tcg_shift, tcg_fpstatus);
5071             } else {
5072                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
5073                                      tcg_shift, tcg_fpstatus);
5074             }
5075             write_fp_dreg(s, rd, tcg_double);
5076             tcg_temp_free_i64(tcg_double);
5077         } else {
5078             TCGv_i32 tcg_single = tcg_temp_new_i32();
5079             if (is_signed) {
5080                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
5081                                      tcg_shift, tcg_fpstatus);
5082             } else {
5083                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5084                                      tcg_shift, tcg_fpstatus);
5085             }
5086             write_fp_sreg(s, rd, tcg_single);
5087             tcg_temp_free_i32(tcg_single);
5088         }
5089     } else {
5090         TCGv_i64 tcg_int = cpu_reg(s, rd);
5091         TCGv_i32 tcg_rmode;
5092
5093         if (extract32(opcode, 2, 1)) {
5094             /* There are too many rounding modes to all fit into rmode,
5095              * so FCVTA[US] is a special case.
5096              */
5097             rmode = FPROUNDING_TIEAWAY;
5098         }
5099
5100         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5101
5102         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5103
5104         if (is_double) {
5105             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5106             if (is_signed) {
5107                 if (!sf) {
5108                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5109                                          tcg_shift, tcg_fpstatus);
5110                 } else {
5111                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5112                                          tcg_shift, tcg_fpstatus);
5113                 }
5114             } else {
5115                 if (!sf) {
5116                     gen_helper_vfp_tould(tcg_int, tcg_double,
5117                                          tcg_shift, tcg_fpstatus);
5118                 } else {
5119                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5120                                          tcg_shift, tcg_fpstatus);
5121                 }
5122             }
5123             tcg_temp_free_i64(tcg_double);
5124         } else {
5125             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5126             if (sf) {
5127                 if (is_signed) {
5128                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5129                                          tcg_shift, tcg_fpstatus);
5130                 } else {
5131                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5132                                          tcg_shift, tcg_fpstatus);
5133                 }
5134             } else {
5135                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5136                 if (is_signed) {
5137                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5138                                          tcg_shift, tcg_fpstatus);
5139                 } else {
5140                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5141                                          tcg_shift, tcg_fpstatus);
5142                 }
5143                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5144                 tcg_temp_free_i32(tcg_dest);
5145             }
5146             tcg_temp_free_i32(tcg_single);
5147         }
5148
5149         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5150         tcg_temp_free_i32(tcg_rmode);
5151
5152         if (!sf) {
5153             tcg_gen_ext32u_i64(tcg_int, tcg_int);
5154         }
5155     }
5156
5157     tcg_temp_free_ptr(tcg_fpstatus);
5158     tcg_temp_free_i32(tcg_shift);
5159 }
5160
5161 /* C3.6.29 Floating point <-> fixed point conversions
5162  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5163  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5164  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5165  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5166  */
5167 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5168 {
5169     int rd = extract32(insn, 0, 5);
5170     int rn = extract32(insn, 5, 5);
5171     int scale = extract32(insn, 10, 6);
5172     int opcode = extract32(insn, 16, 3);
5173     int rmode = extract32(insn, 19, 2);
5174     int type = extract32(insn, 22, 2);
5175     bool sbit = extract32(insn, 29, 1);
5176     bool sf = extract32(insn, 31, 1);
5177     bool itof;
5178
5179     if (sbit || (type > 1)
5180         || (!sf && scale < 32)) {
5181         unallocated_encoding(s);
5182         return;
5183     }
5184
5185     switch ((rmode << 3) | opcode) {
5186     case 0x2: /* SCVTF */
5187     case 0x3: /* UCVTF */
5188         itof = true;
5189         break;
5190     case 0x18: /* FCVTZS */
5191     case 0x19: /* FCVTZU */
5192         itof = false;
5193         break;
5194     default:
5195         unallocated_encoding(s);
5196         return;
5197     }
5198
5199     if (!fp_access_check(s)) {
5200         return;
5201     }
5202
5203     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5204 }
5205
5206 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5207 {
5208     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5209      * without conversion.
5210      */
5211
5212     if (itof) {
5213         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5214
5215         switch (type) {
5216         case 0:
5217         {
5218             /* 32 bit */
5219             TCGv_i64 tmp = tcg_temp_new_i64();
5220             tcg_gen_ext32u_i64(tmp, tcg_rn);
5221             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5222             tcg_gen_movi_i64(tmp, 0);
5223             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5224             tcg_temp_free_i64(tmp);
5225             break;
5226         }
5227         case 1:
5228         {
5229             /* 64 bit */
5230             TCGv_i64 tmp = tcg_const_i64(0);
5231             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5232             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5233             tcg_temp_free_i64(tmp);
5234             break;
5235         }
5236         case 2:
5237             /* 64 bit to top half. */
5238             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5239             break;
5240         }
5241     } else {
5242         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5243
5244         switch (type) {
5245         case 0:
5246             /* 32 bit */
5247             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5248             break;
5249         case 1:
5250             /* 64 bit */
5251             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5252             break;
5253         case 2:
5254             /* 64 bits from top half */
5255             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5256             break;
5257         }
5258     }
5259 }
5260
5261 /* C3.6.30 Floating point <-> integer conversions
5262  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5263  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5264  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5265  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5266  */
5267 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5268 {
5269     int rd = extract32(insn, 0, 5);
5270     int rn = extract32(insn, 5, 5);
5271     int opcode = extract32(insn, 16, 3);
5272     int rmode = extract32(insn, 19, 2);
5273     int type = extract32(insn, 22, 2);
5274     bool sbit = extract32(insn, 29, 1);
5275     bool sf = extract32(insn, 31, 1);
5276
5277     if (sbit) {
5278         unallocated_encoding(s);
5279         return;
5280     }
5281
5282     if (opcode > 5) {
5283         /* FMOV */
5284         bool itof = opcode & 1;
5285
5286         if (rmode >= 2) {
5287             unallocated_encoding(s);
5288             return;
5289         }
5290
5291         switch (sf << 3 | type << 1 | rmode) {
5292         case 0x0: /* 32 bit */
5293         case 0xa: /* 64 bit */
5294         case 0xd: /* 64 bit to top half of quad */
5295             break;
5296         default:
5297             /* all other sf/type/rmode combinations are invalid */
5298             unallocated_encoding(s);
5299             break;
5300         }
5301
5302         if (!fp_access_check(s)) {
5303             return;
5304         }
5305         handle_fmov(s, rd, rn, type, itof);
5306     } else {
5307         /* actual FP conversions */
5308         bool itof = extract32(opcode, 1, 1);
5309
5310         if (type > 1 || (rmode != 0 && opcode > 1)) {
5311             unallocated_encoding(s);
5312             return;
5313         }
5314
5315         if (!fp_access_check(s)) {
5316             return;
5317         }
5318         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5319     }
5320 }
5321
5322 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5323  *   31  30  29 28     25 24                          0
5324  * +---+---+---+---------+-----------------------------+
5325  * |   | 0 |   | 1 1 1 1 |                             |
5326  * +---+---+---+---------+-----------------------------+
5327  */
5328 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5329 {
5330     if (extract32(insn, 24, 1)) {
5331         /* Floating point data-processing (3 source) */
5332         disas_fp_3src(s, insn);
5333     } else if (extract32(insn, 21, 1) == 0) {
5334         /* Floating point to fixed point conversions */
5335         disas_fp_fixed_conv(s, insn);
5336     } else {
5337         switch (extract32(insn, 10, 2)) {
5338         case 1:
5339             /* Floating point conditional compare */
5340             disas_fp_ccomp(s, insn);
5341             break;
5342         case 2:
5343             /* Floating point data-processing (2 source) */
5344             disas_fp_2src(s, insn);
5345             break;
5346         case 3:
5347             /* Floating point conditional select */
5348             disas_fp_csel(s, insn);
5349             break;
5350         case 0:
5351             switch (ctz32(extract32(insn, 12, 4))) {
5352             case 0: /* [15:12] == xxx1 */
5353                 /* Floating point immediate */
5354                 disas_fp_imm(s, insn);
5355                 break;
5356             case 1: /* [15:12] == xx10 */
5357                 /* Floating point compare */
5358                 disas_fp_compare(s, insn);
5359                 break;
5360             case 2: /* [15:12] == x100 */
5361                 /* Floating point data-processing (1 source) */
5362                 disas_fp_1src(s, insn);
5363                 break;
5364             case 3: /* [15:12] == 1000 */
5365                 unallocated_encoding(s);
5366                 break;
5367             default: /* [15:12] == 0000 */
5368                 /* Floating point <-> integer conversions */
5369                 disas_fp_int_conv(s, insn);
5370                 break;
5371             }
5372             break;
5373         }
5374     }
5375 }
5376
5377 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5378                      int pos)
5379 {
5380     /* Extract 64 bits from the middle of two concatenated 64 bit
5381      * vector register slices left:right. The extracted bits start
5382      * at 'pos' bits into the right (least significant) side.
5383      * We return the result in tcg_right, and guarantee not to
5384      * trash tcg_left.
5385      */
5386     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5387     assert(pos > 0 && pos < 64);
5388
5389     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5390     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5391     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5392
5393     tcg_temp_free_i64(tcg_tmp);
5394 }
5395
5396 /* C3.6.1 EXT
5397  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5398  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5399  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5400  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5401  */
5402 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5403 {
5404     int is_q = extract32(insn, 30, 1);
5405     int op2 = extract32(insn, 22, 2);
5406     int imm4 = extract32(insn, 11, 4);
5407     int rm = extract32(insn, 16, 5);
5408     int rn = extract32(insn, 5, 5);
5409     int rd = extract32(insn, 0, 5);
5410     int pos = imm4 << 3;
5411     TCGv_i64 tcg_resl, tcg_resh;
5412
5413     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5414         unallocated_encoding(s);
5415         return;
5416     }
5417
5418     if (!fp_access_check(s)) {
5419         return;
5420     }
5421
5422     tcg_resh = tcg_temp_new_i64();
5423     tcg_resl = tcg_temp_new_i64();
5424
5425     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5426      * either extracting 128 bits from a 128:128 concatenation, or
5427      * extracting 64 bits from a 64:64 concatenation.
5428      */
5429     if (!is_q) {
5430         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5431         if (pos != 0) {
5432             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5433             do_ext64(s, tcg_resh, tcg_resl, pos);
5434         }
5435         tcg_gen_movi_i64(tcg_resh, 0);
5436     } else {
5437         TCGv_i64 tcg_hh;
5438         typedef struct {
5439             int reg;
5440             int elt;
5441         } EltPosns;
5442         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5443         EltPosns *elt = eltposns;
5444
5445         if (pos >= 64) {
5446             elt++;
5447             pos -= 64;
5448         }
5449
5450         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5451         elt++;
5452         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5453         elt++;
5454         if (pos != 0) {
5455             do_ext64(s, tcg_resh, tcg_resl, pos);
5456             tcg_hh = tcg_temp_new_i64();
5457             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5458             do_ext64(s, tcg_hh, tcg_resh, pos);
5459             tcg_temp_free_i64(tcg_hh);
5460         }
5461     }
5462
5463     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5464     tcg_temp_free_i64(tcg_resl);
5465     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5466     tcg_temp_free_i64(tcg_resh);
5467 }
5468
5469 /* C3.6.2 TBL/TBX
5470  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5471  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5472  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5473  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5474  */
5475 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5476 {
5477     int op2 = extract32(insn, 22, 2);
5478     int is_q = extract32(insn, 30, 1);
5479     int rm = extract32(insn, 16, 5);
5480     int rn = extract32(insn, 5, 5);
5481     int rd = extract32(insn, 0, 5);
5482     int is_tblx = extract32(insn, 12, 1);
5483     int len = extract32(insn, 13, 2);
5484     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5485     TCGv_i32 tcg_regno, tcg_numregs;
5486
5487     if (op2 != 0) {
5488         unallocated_encoding(s);
5489         return;
5490     }
5491
5492     if (!fp_access_check(s)) {
5493         return;
5494     }
5495
5496     /* This does a table lookup: for every byte element in the input
5497      * we index into a table formed from up to four vector registers,
5498      * and then the output is the result of the lookups. Our helper
5499      * function does the lookup operation for a single 64 bit part of
5500      * the input.
5501      */
5502     tcg_resl = tcg_temp_new_i64();
5503     tcg_resh = tcg_temp_new_i64();
5504
5505     if (is_tblx) {
5506         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5507     } else {
5508         tcg_gen_movi_i64(tcg_resl, 0);
5509     }
5510     if (is_tblx && is_q) {
5511         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5512     } else {
5513         tcg_gen_movi_i64(tcg_resh, 0);
5514     }
5515
5516     tcg_idx = tcg_temp_new_i64();
5517     tcg_regno = tcg_const_i32(rn);
5518     tcg_numregs = tcg_const_i32(len + 1);
5519     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5520     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5521                         tcg_regno, tcg_numregs);
5522     if (is_q) {
5523         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5524         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5525                             tcg_regno, tcg_numregs);
5526     }
5527     tcg_temp_free_i64(tcg_idx);
5528     tcg_temp_free_i32(tcg_regno);
5529     tcg_temp_free_i32(tcg_numregs);
5530
5531     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5532     tcg_temp_free_i64(tcg_resl);
5533     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5534     tcg_temp_free_i64(tcg_resh);
5535 }
5536
5537 /* C3.6.3 ZIP/UZP/TRN
5538  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5539  * +---+---+-------------+------+---+------+---+------------------+------+
5540  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5541  * +---+---+-------------+------+---+------+---+------------------+------+
5542  */
5543 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5544 {
5545     int rd = extract32(insn, 0, 5);
5546     int rn = extract32(insn, 5, 5);
5547     int rm = extract32(insn, 16, 5);
5548     int size = extract32(insn, 22, 2);
5549     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5550      * bit 2 indicates 1 vs 2 variant of the insn.
5551      */
5552     int opcode = extract32(insn, 12, 2);
5553     bool part = extract32(insn, 14, 1);
5554     bool is_q = extract32(insn, 30, 1);
5555     int esize = 8 << size;
5556     int i, ofs;
5557     int datasize = is_q ? 128 : 64;
5558     int elements = datasize / esize;
5559     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5560
5561     if (opcode == 0 || (size == 3 && !is_q)) {
5562         unallocated_encoding(s);
5563         return;
5564     }
5565
5566     if (!fp_access_check(s)) {
5567         return;
5568     }
5569
5570     tcg_resl = tcg_const_i64(0);
5571     tcg_resh = tcg_const_i64(0);
5572     tcg_res = tcg_temp_new_i64();
5573
5574     for (i = 0; i < elements; i++) {
5575         switch (opcode) {
5576         case 1: /* UZP1/2 */
5577         {
5578             int midpoint = elements / 2;
5579             if (i < midpoint) {
5580                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5581             } else {
5582                 read_vec_element(s, tcg_res, rm,
5583                                  2 * (i - midpoint) + part, size);
5584             }
5585             break;
5586         }
5587         case 2: /* TRN1/2 */
5588             if (i & 1) {
5589                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5590             } else {
5591                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5592             }
5593             break;
5594         case 3: /* ZIP1/2 */
5595         {
5596             int base = part * elements / 2;
5597             if (i & 1) {
5598                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5599             } else {
5600                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5601             }
5602             break;
5603         }
5604         default:
5605             g_assert_not_reached();
5606         }
5607
5608         ofs = i * esize;
5609         if (ofs < 64) {
5610             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5611             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5612         } else {
5613             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5614             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5615         }
5616     }
5617
5618     tcg_temp_free_i64(tcg_res);
5619
5620     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5621     tcg_temp_free_i64(tcg_resl);
5622     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5623     tcg_temp_free_i64(tcg_resh);
5624 }
5625
5626 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5627                         int opc, bool is_min, TCGv_ptr fpst)
5628 {
5629     /* Helper function for disas_simd_across_lanes: do a single precision
5630      * min/max operation on the specified two inputs,
5631      * and return the result in tcg_elt1.
5632      */
5633     if (opc == 0xc) {
5634         if (is_min) {
5635             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5636         } else {
5637             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5638         }
5639     } else {
5640         assert(opc == 0xf);
5641         if (is_min) {
5642             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5643         } else {
5644             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5645         }
5646     }
5647 }
5648
5649 /* C3.6.4 AdvSIMD across lanes
5650  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5651  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5652  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5653  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5654  */
5655 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5656 {
5657     int rd = extract32(insn, 0, 5);
5658     int rn = extract32(insn, 5, 5);
5659     int size = extract32(insn, 22, 2);
5660     int opcode = extract32(insn, 12, 5);
5661     bool is_q = extract32(insn, 30, 1);
5662     bool is_u = extract32(insn, 29, 1);
5663     bool is_fp = false;
5664     bool is_min = false;
5665     int esize;
5666     int elements;
5667     int i;
5668     TCGv_i64 tcg_res, tcg_elt;
5669
5670     switch (opcode) {
5671     case 0x1b: /* ADDV */
5672         if (is_u) {
5673             unallocated_encoding(s);
5674             return;
5675         }
5676         /* fall through */
5677     case 0x3: /* SADDLV, UADDLV */
5678     case 0xa: /* SMAXV, UMAXV */
5679     case 0x1a: /* SMINV, UMINV */
5680         if (size == 3 || (size == 2 && !is_q)) {
5681             unallocated_encoding(s);
5682             return;
5683         }
5684         break;
5685     case 0xc: /* FMAXNMV, FMINNMV */
5686     case 0xf: /* FMAXV, FMINV */
5687         if (!is_u || !is_q || extract32(size, 0, 1)) {
5688             unallocated_encoding(s);
5689             return;
5690         }
5691         /* Bit 1 of size field encodes min vs max, and actual size is always
5692          * 32 bits: adjust the size variable so following code can rely on it
5693          */
5694         is_min = extract32(size, 1, 1);
5695         is_fp = true;
5696         size = 2;
5697         break;
5698     default:
5699         unallocated_encoding(s);
5700         return;
5701     }
5702
5703     if (!fp_access_check(s)) {
5704         return;
5705     }
5706
5707     esize = 8 << size;
5708     elements = (is_q ? 128 : 64) / esize;
5709
5710     tcg_res = tcg_temp_new_i64();
5711     tcg_elt = tcg_temp_new_i64();
5712
5713     /* These instructions operate across all lanes of a vector
5714      * to produce a single result. We can guarantee that a 64
5715      * bit intermediate is sufficient:
5716      *  + for [US]ADDLV the maximum element size is 32 bits, and
5717      *    the result type is 64 bits
5718      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5719      *    same as the element size, which is 32 bits at most
5720      * For the integer operations we can choose to work at 64
5721      * or 32 bits and truncate at the end; for simplicity
5722      * we use 64 bits always. The floating point
5723      * ops do require 32 bit intermediates, though.
5724      */
5725     if (!is_fp) {
5726         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5727
5728         for (i = 1; i < elements; i++) {
5729             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5730
5731             switch (opcode) {
5732             case 0x03: /* SADDLV / UADDLV */
5733             case 0x1b: /* ADDV */
5734                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5735                 break;
5736             case 0x0a: /* SMAXV / UMAXV */
5737                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5738                                     tcg_res,
5739                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5740                 break;
5741             case 0x1a: /* SMINV / UMINV */
5742                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5743                                     tcg_res,
5744                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5745                 break;
5746                 break;
5747             default:
5748                 g_assert_not_reached();
5749             }
5750
5751         }
5752     } else {
5753         /* Floating point ops which work on 32 bit (single) intermediates.
5754          * Note that correct NaN propagation requires that we do these
5755          * operations in exactly the order specified by the pseudocode.
5756          */
5757         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5758         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5759         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5760         TCGv_ptr fpst = get_fpstatus_ptr();
5761
5762         assert(esize == 32);
5763         assert(elements == 4);
5764
5765         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5766         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5767         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5768         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5769
5770         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5771
5772         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5773         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5774         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5775         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5776
5777         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5778
5779         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5780
5781         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5782         tcg_temp_free_i32(tcg_elt1);
5783         tcg_temp_free_i32(tcg_elt2);
5784         tcg_temp_free_i32(tcg_elt3);
5785         tcg_temp_free_ptr(fpst);
5786     }
5787
5788     tcg_temp_free_i64(tcg_elt);
5789
5790     /* Now truncate the result to the width required for the final output */
5791     if (opcode == 0x03) {
5792         /* SADDLV, UADDLV: result is 2*esize */
5793         size++;
5794     }
5795
5796     switch (size) {
5797     case 0:
5798         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5799         break;
5800     case 1:
5801         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5802         break;
5803     case 2:
5804         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5805         break;
5806     case 3:
5807         break;
5808     default:
5809         g_assert_not_reached();
5810     }
5811
5812     write_fp_dreg(s, rd, tcg_res);
5813     tcg_temp_free_i64(tcg_res);
5814 }
5815
5816 /* C6.3.31 DUP (Element, Vector)
5817  *
5818  *  31  30   29              21 20    16 15        10  9    5 4    0
5819  * +---+---+-------------------+--------+-------------+------+------+
5820  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5821  * +---+---+-------------------+--------+-------------+------+------+
5822  *
5823  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5824  */
5825 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5826                              int imm5)
5827 {
5828     int size = ctz32(imm5);
5829     int esize = 8 << size;
5830     int elements = (is_q ? 128 : 64) / esize;
5831     int index, i;
5832     TCGv_i64 tmp;
5833
5834     if (size > 3 || (size == 3 && !is_q)) {
5835         unallocated_encoding(s);
5836         return;
5837     }
5838
5839     if (!fp_access_check(s)) {
5840         return;
5841     }
5842
5843     index = imm5 >> (size + 1);
5844
5845     tmp = tcg_temp_new_i64();
5846     read_vec_element(s, tmp, rn, index, size);
5847
5848     for (i = 0; i < elements; i++) {
5849         write_vec_element(s, tmp, rd, i, size);
5850     }
5851
5852     if (!is_q) {
5853         clear_vec_high(s, rd);
5854     }
5855
5856     tcg_temp_free_i64(tmp);
5857 }
5858
5859 /* C6.3.31 DUP (element, scalar)
5860  *  31                   21 20    16 15        10  9    5 4    0
5861  * +-----------------------+--------+-------------+------+------+
5862  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5863  * +-----------------------+--------+-------------+------+------+
5864  */
5865 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5866                               int imm5)
5867 {
5868     int size = ctz32(imm5);
5869     int index;
5870     TCGv_i64 tmp;
5871
5872     if (size > 3) {
5873         unallocated_encoding(s);
5874         return;
5875     }
5876
5877     if (!fp_access_check(s)) {
5878         return;
5879     }
5880
5881     index = imm5 >> (size + 1);
5882
5883     /* This instruction just extracts the specified element and
5884      * zero-extends it into the bottom of the destination register.
5885      */
5886     tmp = tcg_temp_new_i64();
5887     read_vec_element(s, tmp, rn, index, size);
5888     write_fp_dreg(s, rd, tmp);
5889     tcg_temp_free_i64(tmp);
5890 }
5891
5892 /* C6.3.32 DUP (General)
5893  *
5894  *  31  30   29              21 20    16 15        10  9    5 4    0
5895  * +---+---+-------------------+--------+-------------+------+------+
5896  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5897  * +---+---+-------------------+--------+-------------+------+------+
5898  *
5899  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5900  */
5901 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5902                              int imm5)
5903 {
5904     int size = ctz32(imm5);
5905     int esize = 8 << size;
5906     int elements = (is_q ? 128 : 64)/esize;
5907     int i = 0;
5908
5909     if (size > 3 || ((size == 3) && !is_q)) {
5910         unallocated_encoding(s);
5911         return;
5912     }
5913
5914     if (!fp_access_check(s)) {
5915         return;
5916     }
5917
5918     for (i = 0; i < elements; i++) {
5919         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5920     }
5921     if (!is_q) {
5922         clear_vec_high(s, rd);
5923     }
5924 }
5925
5926 /* C6.3.150 INS (Element)
5927  *
5928  *  31                   21 20    16 15  14    11  10 9    5 4    0
5929  * +-----------------------+--------+------------+---+------+------+
5930  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5931  * +-----------------------+--------+------------+---+------+------+
5932  *
5933  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5934  * index: encoded in imm5<4:size+1>
5935  */
5936 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5937                              int imm4, int imm5)
5938 {
5939     int size = ctz32(imm5);
5940     int src_index, dst_index;
5941     TCGv_i64 tmp;
5942
5943     if (size > 3) {
5944         unallocated_encoding(s);
5945         return;
5946     }
5947
5948     if (!fp_access_check(s)) {
5949         return;
5950     }
5951
5952     dst_index = extract32(imm5, 1+size, 5);
5953     src_index = extract32(imm4, size, 4);
5954
5955     tmp = tcg_temp_new_i64();
5956
5957     read_vec_element(s, tmp, rn, src_index, size);
5958     write_vec_element(s, tmp, rd, dst_index, size);
5959
5960     tcg_temp_free_i64(tmp);
5961 }
5962
5963
5964 /* C6.3.151 INS (General)
5965  *
5966  *  31                   21 20    16 15        10  9    5 4    0
5967  * +-----------------------+--------+-------------+------+------+
5968  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5969  * +-----------------------+--------+-------------+------+------+
5970  *
5971  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5972  * index: encoded in imm5<4:size+1>
5973  */
5974 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5975 {
5976     int size = ctz32(imm5);
5977     int idx;
5978
5979     if (size > 3) {
5980         unallocated_encoding(s);
5981         return;
5982     }
5983
5984     if (!fp_access_check(s)) {
5985         return;
5986     }
5987
5988     idx = extract32(imm5, 1 + size, 4 - size);
5989     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5990 }
5991
5992 /*
5993  * C6.3.321 UMOV (General)
5994  * C6.3.237 SMOV (General)
5995  *
5996  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5997  * +---+---+-------------------+--------+-------------+------+------+
5998  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5999  * +---+---+-------------------+--------+-------------+------+------+
6000  *
6001  * U: unsigned when set
6002  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6003  */
6004 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6005                                   int rn, int rd, int imm5)
6006 {
6007     int size = ctz32(imm5);
6008     int element;
6009     TCGv_i64 tcg_rd;
6010
6011     /* Check for UnallocatedEncodings */
6012     if (is_signed) {
6013         if (size > 2 || (size == 2 && !is_q)) {
6014             unallocated_encoding(s);
6015             return;
6016         }
6017     } else {
6018         if (size > 3
6019             || (size < 3 && is_q)
6020             || (size == 3 && !is_q)) {
6021             unallocated_encoding(s);
6022             return;
6023         }
6024     }
6025
6026     if (!fp_access_check(s)) {
6027         return;
6028     }
6029
6030     element = extract32(imm5, 1+size, 4);
6031
6032     tcg_rd = cpu_reg(s, rd);
6033     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6034     if (is_signed && !is_q) {
6035         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6036     }
6037 }
6038
6039 /* C3.6.5 AdvSIMD copy
6040  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6041  * +---+---+----+-----------------+------+---+------+---+------+------+
6042  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6043  * +---+---+----+-----------------+------+---+------+---+------+------+
6044  */
6045 static void disas_simd_copy(DisasContext *s, uint32_t insn)
6046 {
6047     int rd = extract32(insn, 0, 5);
6048     int rn = extract32(insn, 5, 5);
6049     int imm4 = extract32(insn, 11, 4);
6050     int op = extract32(insn, 29, 1);
6051     int is_q = extract32(insn, 30, 1);
6052     int imm5 = extract32(insn, 16, 5);
6053
6054     if (op) {
6055         if (is_q) {
6056             /* INS (element) */
6057             handle_simd_inse(s, rd, rn, imm4, imm5);
6058         } else {
6059             unallocated_encoding(s);
6060         }
6061     } else {
6062         switch (imm4) {
6063         case 0:
6064             /* DUP (element - vector) */
6065             handle_simd_dupe(s, is_q, rd, rn, imm5);
6066             break;
6067         case 1:
6068             /* DUP (general) */
6069             handle_simd_dupg(s, is_q, rd, rn, imm5);
6070             break;
6071         case 3:
6072             if (is_q) {
6073                 /* INS (general) */
6074                 handle_simd_insg(s, rd, rn, imm5);
6075             } else {
6076                 unallocated_encoding(s);
6077             }
6078             break;
6079         case 5:
6080         case 7:
6081             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6082             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6083             break;
6084         default:
6085             unallocated_encoding(s);
6086             break;
6087         }
6088     }
6089 }
6090
6091 /* C3.6.6 AdvSIMD modified immediate
6092  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6093  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6094  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6095  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6096  *
6097  * There are a number of operations that can be carried out here:
6098  *   MOVI - move (shifted) imm into register
6099  *   MVNI - move inverted (shifted) imm into register
6100  *   ORR  - bitwise OR of (shifted) imm with register
6101  *   BIC  - bitwise clear of (shifted) imm with register
6102  */
6103 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6104 {
6105     int rd = extract32(insn, 0, 5);
6106     int cmode = extract32(insn, 12, 4);
6107     int cmode_3_1 = extract32(cmode, 1, 3);
6108     int cmode_0 = extract32(cmode, 0, 1);
6109     int o2 = extract32(insn, 11, 1);
6110     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6111     bool is_neg = extract32(insn, 29, 1);
6112     bool is_q = extract32(insn, 30, 1);
6113     uint64_t imm = 0;
6114     TCGv_i64 tcg_rd, tcg_imm;
6115     int i;
6116
6117     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6118         unallocated_encoding(s);
6119         return;
6120     }
6121
6122     if (!fp_access_check(s)) {
6123         return;
6124     }
6125
6126     /* See AdvSIMDExpandImm() in ARM ARM */
6127     switch (cmode_3_1) {
6128     case 0: /* Replicate(Zeros(24):imm8, 2) */
6129     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6130     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6131     case 3: /* Replicate(imm8:Zeros(24), 2) */
6132     {
6133         int shift = cmode_3_1 * 8;
6134         imm = bitfield_replicate(abcdefgh << shift, 32);
6135         break;
6136     }
6137     case 4: /* Replicate(Zeros(8):imm8, 4) */
6138     case 5: /* Replicate(imm8:Zeros(8), 4) */
6139     {
6140         int shift = (cmode_3_1 & 0x1) * 8;
6141         imm = bitfield_replicate(abcdefgh << shift, 16);
6142         break;
6143     }
6144     case 6:
6145         if (cmode_0) {
6146             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6147             imm = (abcdefgh << 16) | 0xffff;
6148         } else {
6149             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6150             imm = (abcdefgh << 8) | 0xff;
6151         }
6152         imm = bitfield_replicate(imm, 32);
6153         break;
6154     case 7:
6155         if (!cmode_0 && !is_neg) {
6156             imm = bitfield_replicate(abcdefgh, 8);
6157         } else if (!cmode_0 && is_neg) {
6158             int i;
6159             imm = 0;
6160             for (i = 0; i < 8; i++) {
6161                 if ((abcdefgh) & (1 << i)) {
6162                     imm |= 0xffULL << (i * 8);
6163                 }
6164             }
6165         } else if (cmode_0) {
6166             if (is_neg) {
6167                 imm = (abcdefgh & 0x3f) << 48;
6168                 if (abcdefgh & 0x80) {
6169                     imm |= 0x8000000000000000ULL;
6170                 }
6171                 if (abcdefgh & 0x40) {
6172                     imm |= 0x3fc0000000000000ULL;
6173                 } else {
6174                     imm |= 0x4000000000000000ULL;
6175                 }
6176             } else {
6177                 imm = (abcdefgh & 0x3f) << 19;
6178                 if (abcdefgh & 0x80) {
6179                     imm |= 0x80000000;
6180                 }
6181                 if (abcdefgh & 0x40) {
6182                     imm |= 0x3e000000;
6183                 } else {
6184                     imm |= 0x40000000;
6185                 }
6186                 imm |= (imm << 32);
6187             }
6188         }
6189         break;
6190     }
6191
6192     if (cmode_3_1 != 7 && is_neg) {
6193         imm = ~imm;
6194     }
6195
6196     tcg_imm = tcg_const_i64(imm);
6197     tcg_rd = new_tmp_a64(s);
6198
6199     for (i = 0; i < 2; i++) {
6200         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6201
6202         if (i == 1 && !is_q) {
6203             /* non-quad ops clear high half of vector */
6204             tcg_gen_movi_i64(tcg_rd, 0);
6205         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6206             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6207             if (is_neg) {
6208                 /* AND (BIC) */
6209                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6210             } else {
6211                 /* ORR */
6212                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6213             }
6214         } else {
6215             /* MOVI */
6216             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6217         }
6218         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6219     }
6220
6221     tcg_temp_free_i64(tcg_imm);
6222 }
6223
6224 /* C3.6.7 AdvSIMD scalar copy
6225  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6226  * +-----+----+-----------------+------+---+------+---+------+------+
6227  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6228  * +-----+----+-----------------+------+---+------+---+------+------+
6229  */
6230 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6231 {
6232     int rd = extract32(insn, 0, 5);
6233     int rn = extract32(insn, 5, 5);
6234     int imm4 = extract32(insn, 11, 4);
6235     int imm5 = extract32(insn, 16, 5);
6236     int op = extract32(insn, 29, 1);
6237
6238     if (op != 0 || imm4 != 0) {
6239         unallocated_encoding(s);
6240         return;
6241     }
6242
6243     /* DUP (element, scalar) */
6244     handle_simd_dupes(s, rd, rn, imm5);
6245 }
6246
6247 /* C3.6.8 AdvSIMD scalar pairwise
6248  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6249  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6250  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6251  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6252  */
6253 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6254 {
6255     int u = extract32(insn, 29, 1);
6256     int size = extract32(insn, 22, 2);
6257     int opcode = extract32(insn, 12, 5);
6258     int rn = extract32(insn, 5, 5);
6259     int rd = extract32(insn, 0, 5);
6260     TCGv_ptr fpst;
6261
6262     /* For some ops (the FP ones), size[1] is part of the encoding.
6263      * For ADDP strictly it is not but size[1] is always 1 for valid
6264      * encodings.
6265      */
6266     opcode |= (extract32(size, 1, 1) << 5);
6267
6268     switch (opcode) {
6269     case 0x3b: /* ADDP */
6270         if (u || size != 3) {
6271             unallocated_encoding(s);
6272             return;
6273         }
6274         if (!fp_access_check(s)) {
6275             return;
6276         }
6277
6278         TCGV_UNUSED_PTR(fpst);
6279         break;
6280     case 0xc: /* FMAXNMP */
6281     case 0xd: /* FADDP */
6282     case 0xf: /* FMAXP */
6283     case 0x2c: /* FMINNMP */
6284     case 0x2f: /* FMINP */
6285         /* FP op, size[0] is 32 or 64 bit */
6286         if (!u) {
6287             unallocated_encoding(s);
6288             return;
6289         }
6290         if (!fp_access_check(s)) {
6291             return;
6292         }
6293
6294         size = extract32(size, 0, 1) ? 3 : 2;
6295         fpst = get_fpstatus_ptr();
6296         break;
6297     default:
6298         unallocated_encoding(s);
6299         return;
6300     }
6301
6302     if (size == 3) {
6303         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6304         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6305         TCGv_i64 tcg_res = tcg_temp_new_i64();
6306
6307         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6308         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6309
6310         switch (opcode) {
6311         case 0x3b: /* ADDP */
6312             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6313             break;
6314         case 0xc: /* FMAXNMP */
6315             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6316             break;
6317         case 0xd: /* FADDP */
6318             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6319             break;
6320         case 0xf: /* FMAXP */
6321             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6322             break;
6323         case 0x2c: /* FMINNMP */
6324             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6325             break;
6326         case 0x2f: /* FMINP */
6327             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6328             break;
6329         default:
6330             g_assert_not_reached();
6331         }
6332
6333         write_fp_dreg(s, rd, tcg_res);
6334
6335         tcg_temp_free_i64(tcg_op1);
6336         tcg_temp_free_i64(tcg_op2);
6337         tcg_temp_free_i64(tcg_res);
6338     } else {
6339         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6340         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6341         TCGv_i32 tcg_res = tcg_temp_new_i32();
6342
6343         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6344         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6345
6346         switch (opcode) {
6347         case 0xc: /* FMAXNMP */
6348             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6349             break;
6350         case 0xd: /* FADDP */
6351             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6352             break;
6353         case 0xf: /* FMAXP */
6354             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6355             break;
6356         case 0x2c: /* FMINNMP */
6357             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6358             break;
6359         case 0x2f: /* FMINP */
6360             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6361             break;
6362         default:
6363             g_assert_not_reached();
6364         }
6365
6366         write_fp_sreg(s, rd, tcg_res);
6367
6368         tcg_temp_free_i32(tcg_op1);
6369         tcg_temp_free_i32(tcg_op2);
6370         tcg_temp_free_i32(tcg_res);
6371     }
6372
6373     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6374         tcg_temp_free_ptr(fpst);
6375     }
6376 }
6377
6378 /*
6379  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6380  *
6381  * This code is handles the common shifting code and is used by both
6382  * the vector and scalar code.
6383  */
6384 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6385                                     TCGv_i64 tcg_rnd, bool accumulate,
6386                                     bool is_u, int size, int shift)
6387 {
6388     bool extended_result = false;
6389     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6390     int ext_lshift = 0;
6391     TCGv_i64 tcg_src_hi;
6392
6393     if (round && size == 3) {
6394         extended_result = true;
6395         ext_lshift = 64 - shift;
6396         tcg_src_hi = tcg_temp_new_i64();
6397     } else if (shift == 64) {
6398         if (!accumulate && is_u) {
6399             /* result is zero */
6400             tcg_gen_movi_i64(tcg_res, 0);
6401             return;
6402         }
6403     }
6404
6405     /* Deal with the rounding step */
6406     if (round) {
6407         if (extended_result) {
6408             TCGv_i64 tcg_zero = tcg_const_i64(0);
6409             if (!is_u) {
6410                 /* take care of sign extending tcg_res */
6411                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6412                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6413                                  tcg_src, tcg_src_hi,
6414                                  tcg_rnd, tcg_zero);
6415             } else {
6416                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6417                                  tcg_src, tcg_zero,
6418                                  tcg_rnd, tcg_zero);
6419             }
6420             tcg_temp_free_i64(tcg_zero);
6421         } else {
6422             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6423         }
6424     }
6425
6426     /* Now do the shift right */
6427     if (round && extended_result) {
6428         /* extended case, >64 bit precision required */
6429         if (ext_lshift == 0) {
6430             /* special case, only high bits matter */
6431             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6432         } else {
6433             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6434             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6435             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6436         }
6437     } else {
6438         if (is_u) {
6439             if (shift == 64) {
6440                 /* essentially shifting in 64 zeros */
6441                 tcg_gen_movi_i64(tcg_src, 0);
6442             } else {
6443                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6444             }
6445         } else {
6446             if (shift == 64) {
6447                 /* effectively extending the sign-bit */
6448                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6449             } else {
6450                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6451             }
6452         }
6453     }
6454
6455     if (accumulate) {
6456         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6457     } else {
6458         tcg_gen_mov_i64(tcg_res, tcg_src);
6459     }
6460
6461     if (extended_result) {
6462         tcg_temp_free_i64(tcg_src_hi);
6463     }
6464 }
6465
6466 /* Common SHL/SLI - Shift left with an optional insert */
6467 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6468                                  bool insert, int shift)
6469 {
6470     if (insert) { /* SLI */
6471         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6472     } else { /* SHL */
6473         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6474     }
6475 }
6476
6477 /* SRI: shift right with insert */
6478 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6479                                  int size, int shift)
6480 {
6481     int esize = 8 << size;
6482
6483     /* shift count same as element size is valid but does nothing;
6484      * special case to avoid potential shift by 64.
6485      */
6486     if (shift != esize) {
6487         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6488         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6489     }
6490 }
6491
6492 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6493 static void handle_scalar_simd_shri(DisasContext *s,
6494                                     bool is_u, int immh, int immb,
6495                                     int opcode, int rn, int rd)
6496 {
6497     const int size = 3;
6498     int immhb = immh << 3 | immb;
6499     int shift = 2 * (8 << size) - immhb;
6500     bool accumulate = false;
6501     bool round = false;
6502     bool insert = false;
6503     TCGv_i64 tcg_rn;
6504     TCGv_i64 tcg_rd;
6505     TCGv_i64 tcg_round;
6506
6507     if (!extract32(immh, 3, 1)) {
6508         unallocated_encoding(s);
6509         return;
6510     }
6511
6512     if (!fp_access_check(s)) {
6513         return;
6514     }
6515
6516     switch (opcode) {
6517     case 0x02: /* SSRA / USRA (accumulate) */
6518         accumulate = true;
6519         break;
6520     case 0x04: /* SRSHR / URSHR (rounding) */
6521         round = true;
6522         break;
6523     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6524         accumulate = round = true;
6525         break;
6526     case 0x08: /* SRI */
6527         insert = true;
6528         break;
6529     }
6530
6531     if (round) {
6532         uint64_t round_const = 1ULL << (shift - 1);
6533         tcg_round = tcg_const_i64(round_const);
6534     } else {
6535         TCGV_UNUSED_I64(tcg_round);
6536     }
6537
6538     tcg_rn = read_fp_dreg(s, rn);
6539     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6540
6541     if (insert) {
6542         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6543     } else {
6544         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6545                                 accumulate, is_u, size, shift);
6546     }
6547
6548     write_fp_dreg(s, rd, tcg_rd);
6549
6550     tcg_temp_free_i64(tcg_rn);
6551     tcg_temp_free_i64(tcg_rd);
6552     if (round) {
6553         tcg_temp_free_i64(tcg_round);
6554     }
6555 }
6556
6557 /* SHL/SLI - Scalar shift left */
6558 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6559                                     int immh, int immb, int opcode,
6560                                     int rn, int rd)
6561 {
6562     int size = 32 - clz32(immh) - 1;
6563     int immhb = immh << 3 | immb;
6564     int shift = immhb - (8 << size);
6565     TCGv_i64 tcg_rn = new_tmp_a64(s);
6566     TCGv_i64 tcg_rd = new_tmp_a64(s);
6567
6568     if (!extract32(immh, 3, 1)) {
6569         unallocated_encoding(s);
6570         return;
6571     }
6572
6573     if (!fp_access_check(s)) {
6574         return;
6575     }
6576
6577     tcg_rn = read_fp_dreg(s, rn);
6578     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6579
6580     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6581
6582     write_fp_dreg(s, rd, tcg_rd);
6583
6584     tcg_temp_free_i64(tcg_rn);
6585     tcg_temp_free_i64(tcg_rd);
6586 }
6587
6588 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6589  * (signed/unsigned) narrowing */
6590 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6591                                    bool is_u_shift, bool is_u_narrow,
6592                                    int immh, int immb, int opcode,
6593                                    int rn, int rd)
6594 {
6595     int immhb = immh << 3 | immb;
6596     int size = 32 - clz32(immh) - 1;
6597     int esize = 8 << size;
6598     int shift = (2 * esize) - immhb;
6599     int elements = is_scalar ? 1 : (64 / esize);
6600     bool round = extract32(opcode, 0, 1);
6601     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6602     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6603     TCGv_i32 tcg_rd_narrowed;
6604     TCGv_i64 tcg_final;
6605
6606     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6607         { gen_helper_neon_narrow_sat_s8,
6608           gen_helper_neon_unarrow_sat8 },
6609         { gen_helper_neon_narrow_sat_s16,
6610           gen_helper_neon_unarrow_sat16 },
6611         { gen_helper_neon_narrow_sat_s32,
6612           gen_helper_neon_unarrow_sat32 },
6613         { NULL, NULL },
6614     };
6615     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6616         gen_helper_neon_narrow_sat_u8,
6617         gen_helper_neon_narrow_sat_u16,
6618         gen_helper_neon_narrow_sat_u32,
6619         NULL
6620     };
6621     NeonGenNarrowEnvFn *narrowfn;
6622
6623     int i;
6624
6625     assert(size < 4);
6626
6627     if (extract32(immh, 3, 1)) {
6628         unallocated_encoding(s);
6629         return;
6630     }
6631
6632     if (!fp_access_check(s)) {
6633         return;
6634     }
6635
6636     if (is_u_shift) {
6637         narrowfn = unsigned_narrow_fns[size];
6638     } else {
6639         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6640     }
6641
6642     tcg_rn = tcg_temp_new_i64();
6643     tcg_rd = tcg_temp_new_i64();
6644     tcg_rd_narrowed = tcg_temp_new_i32();
6645     tcg_final = tcg_const_i64(0);
6646
6647     if (round) {
6648         uint64_t round_const = 1ULL << (shift - 1);
6649         tcg_round = tcg_const_i64(round_const);
6650     } else {
6651         TCGV_UNUSED_I64(tcg_round);
6652     }
6653
6654     for (i = 0; i < elements; i++) {
6655         read_vec_element(s, tcg_rn, rn, i, ldop);
6656         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6657                                 false, is_u_shift, size+1, shift);
6658         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6659         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6660         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6661     }
6662
6663     if (!is_q) {
6664         clear_vec_high(s, rd);
6665         write_vec_element(s, tcg_final, rd, 0, MO_64);
6666     } else {
6667         write_vec_element(s, tcg_final, rd, 1, MO_64);
6668     }
6669
6670     if (round) {
6671         tcg_temp_free_i64(tcg_round);
6672     }
6673     tcg_temp_free_i64(tcg_rn);
6674     tcg_temp_free_i64(tcg_rd);
6675     tcg_temp_free_i32(tcg_rd_narrowed);
6676     tcg_temp_free_i64(tcg_final);
6677     return;
6678 }
6679
6680 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6681 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6682                              bool src_unsigned, bool dst_unsigned,
6683                              int immh, int immb, int rn, int rd)
6684 {
6685     int immhb = immh << 3 | immb;
6686     int size = 32 - clz32(immh) - 1;
6687     int shift = immhb - (8 << size);
6688     int pass;
6689
6690     assert(immh != 0);
6691     assert(!(scalar && is_q));
6692
6693     if (!scalar) {
6694         if (!is_q && extract32(immh, 3, 1)) {
6695             unallocated_encoding(s);
6696             return;
6697         }
6698
6699         /* Since we use the variable-shift helpers we must
6700          * replicate the shift count into each element of
6701          * the tcg_shift value.
6702          */
6703         switch (size) {
6704         case 0:
6705             shift |= shift << 8;
6706             /* fall through */
6707         case 1:
6708             shift |= shift << 16;
6709             break;
6710         case 2:
6711         case 3:
6712             break;
6713         default:
6714             g_assert_not_reached();
6715         }
6716     }
6717
6718     if (!fp_access_check(s)) {
6719         return;
6720     }
6721
6722     if (size == 3) {
6723         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6724         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6725             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6726             { NULL, gen_helper_neon_qshl_u64 },
6727         };
6728         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6729         int maxpass = is_q ? 2 : 1;
6730
6731         for (pass = 0; pass < maxpass; pass++) {
6732             TCGv_i64 tcg_op = tcg_temp_new_i64();
6733
6734             read_vec_element(s, tcg_op, rn, pass, MO_64);
6735             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6736             write_vec_element(s, tcg_op, rd, pass, MO_64);
6737
6738             tcg_temp_free_i64(tcg_op);
6739         }
6740         tcg_temp_free_i64(tcg_shift);
6741
6742         if (!is_q) {
6743             clear_vec_high(s, rd);
6744         }
6745     } else {
6746         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6747         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6748             {
6749                 { gen_helper_neon_qshl_s8,
6750                   gen_helper_neon_qshl_s16,
6751                   gen_helper_neon_qshl_s32 },
6752                 { gen_helper_neon_qshlu_s8,
6753                   gen_helper_neon_qshlu_s16,
6754                   gen_helper_neon_qshlu_s32 }
6755             }, {
6756                 { NULL, NULL, NULL },
6757                 { gen_helper_neon_qshl_u8,
6758                   gen_helper_neon_qshl_u16,
6759                   gen_helper_neon_qshl_u32 }
6760             }
6761         };
6762         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6763         TCGMemOp memop = scalar ? size : MO_32;
6764         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6765
6766         for (pass = 0; pass < maxpass; pass++) {
6767             TCGv_i32 tcg_op = tcg_temp_new_i32();
6768
6769             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6770             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6771             if (scalar) {
6772                 switch (size) {
6773                 case 0:
6774                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6775                     break;
6776                 case 1:
6777                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6778                     break;
6779                 case 2:
6780                     break;
6781                 default:
6782                     g_assert_not_reached();
6783                 }
6784                 write_fp_sreg(s, rd, tcg_op);
6785             } else {
6786                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6787             }
6788
6789             tcg_temp_free_i32(tcg_op);
6790         }
6791         tcg_temp_free_i32(tcg_shift);
6792
6793         if (!is_q && !scalar) {
6794             clear_vec_high(s, rd);
6795         }
6796     }
6797 }
6798
6799 /* Common vector code for handling integer to FP conversion */
6800 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6801                                    int elements, int is_signed,
6802                                    int fracbits, int size)
6803 {
6804     bool is_double = size == 3 ? true : false;
6805     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6806     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6807     TCGv_i64 tcg_int = tcg_temp_new_i64();
6808     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6809     int pass;
6810
6811     for (pass = 0; pass < elements; pass++) {
6812         read_vec_element(s, tcg_int, rn, pass, mop);
6813
6814         if (is_double) {
6815             TCGv_i64 tcg_double = tcg_temp_new_i64();
6816             if (is_signed) {
6817                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6818                                      tcg_shift, tcg_fpst);
6819             } else {
6820                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6821                                      tcg_shift, tcg_fpst);
6822             }
6823             if (elements == 1) {
6824                 write_fp_dreg(s, rd, tcg_double);
6825             } else {
6826                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6827             }
6828             tcg_temp_free_i64(tcg_double);
6829         } else {
6830             TCGv_i32 tcg_single = tcg_temp_new_i32();
6831             if (is_signed) {
6832                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6833                                      tcg_shift, tcg_fpst);
6834             } else {
6835                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6836                                      tcg_shift, tcg_fpst);
6837             }
6838             if (elements == 1) {
6839                 write_fp_sreg(s, rd, tcg_single);
6840             } else {
6841                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6842             }
6843             tcg_temp_free_i32(tcg_single);
6844         }
6845     }
6846
6847     if (!is_double && elements == 2) {
6848         clear_vec_high(s, rd);
6849     }
6850
6851     tcg_temp_free_i64(tcg_int);
6852     tcg_temp_free_ptr(tcg_fpst);
6853     tcg_temp_free_i32(tcg_shift);
6854 }
6855
6856 /* UCVTF/SCVTF - Integer to FP conversion */
6857 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6858                                          bool is_q, bool is_u,
6859                                          int immh, int immb, int opcode,
6860                                          int rn, int rd)
6861 {
6862     bool is_double = extract32(immh, 3, 1);
6863     int size = is_double ? MO_64 : MO_32;
6864     int elements;
6865     int immhb = immh << 3 | immb;
6866     int fracbits = (is_double ? 128 : 64) - immhb;
6867
6868     if (!extract32(immh, 2, 2)) {
6869         unallocated_encoding(s);
6870         return;
6871     }
6872
6873     if (is_scalar) {
6874         elements = 1;
6875     } else {
6876         elements = is_double ? 2 : is_q ? 4 : 2;
6877         if (is_double && !is_q) {
6878             unallocated_encoding(s);
6879             return;
6880         }
6881     }
6882
6883     if (!fp_access_check(s)) {
6884         return;
6885     }
6886
6887     /* immh == 0 would be a failure of the decode logic */
6888     g_assert(immh);
6889
6890     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6891 }
6892
6893 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6894 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6895                                          bool is_q, bool is_u,
6896                                          int immh, int immb, int rn, int rd)
6897 {
6898     bool is_double = extract32(immh, 3, 1);
6899     int immhb = immh << 3 | immb;
6900     int fracbits = (is_double ? 128 : 64) - immhb;
6901     int pass;
6902     TCGv_ptr tcg_fpstatus;
6903     TCGv_i32 tcg_rmode, tcg_shift;
6904
6905     if (!extract32(immh, 2, 2)) {
6906         unallocated_encoding(s);
6907         return;
6908     }
6909
6910     if (!is_scalar && !is_q && is_double) {
6911         unallocated_encoding(s);
6912         return;
6913     }
6914
6915     if (!fp_access_check(s)) {
6916         return;
6917     }
6918
6919     assert(!(is_scalar && is_q));
6920
6921     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6922     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6923     tcg_fpstatus = get_fpstatus_ptr();
6924     tcg_shift = tcg_const_i32(fracbits);
6925
6926     if (is_double) {
6927         int maxpass = is_scalar ? 1 : 2;
6928
6929         for (pass = 0; pass < maxpass; pass++) {
6930             TCGv_i64 tcg_op = tcg_temp_new_i64();
6931
6932             read_vec_element(s, tcg_op, rn, pass, MO_64);
6933             if (is_u) {
6934                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6935             } else {
6936                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6937             }
6938             write_vec_element(s, tcg_op, rd, pass, MO_64);
6939             tcg_temp_free_i64(tcg_op);
6940         }
6941         if (!is_q) {
6942             clear_vec_high(s, rd);
6943         }
6944     } else {
6945         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6946         for (pass = 0; pass < maxpass; pass++) {
6947             TCGv_i32 tcg_op = tcg_temp_new_i32();
6948
6949             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6950             if (is_u) {
6951                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6952             } else {
6953                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6954             }
6955             if (is_scalar) {
6956                 write_fp_sreg(s, rd, tcg_op);
6957             } else {
6958                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6959             }
6960             tcg_temp_free_i32(tcg_op);
6961         }
6962         if (!is_q && !is_scalar) {
6963             clear_vec_high(s, rd);
6964         }
6965     }
6966
6967     tcg_temp_free_ptr(tcg_fpstatus);
6968     tcg_temp_free_i32(tcg_shift);
6969     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6970     tcg_temp_free_i32(tcg_rmode);
6971 }
6972
6973 /* C3.6.9 AdvSIMD scalar shift by immediate
6974  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6975  * +-----+---+-------------+------+------+--------+---+------+------+
6976  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6977  * +-----+---+-------------+------+------+--------+---+------+------+
6978  *
6979  * This is the scalar version so it works on a fixed sized registers
6980  */
6981 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6982 {
6983     int rd = extract32(insn, 0, 5);
6984     int rn = extract32(insn, 5, 5);
6985     int opcode = extract32(insn, 11, 5);
6986     int immb = extract32(insn, 16, 3);
6987     int immh = extract32(insn, 19, 4);
6988     bool is_u = extract32(insn, 29, 1);
6989
6990     if (immh == 0) {
6991         unallocated_encoding(s);
6992         return;
6993     }
6994
6995     switch (opcode) {
6996     case 0x08: /* SRI */
6997         if (!is_u) {
6998             unallocated_encoding(s);
6999             return;
7000         }
7001         /* fall through */
7002     case 0x00: /* SSHR / USHR */
7003     case 0x02: /* SSRA / USRA */
7004     case 0x04: /* SRSHR / URSHR */
7005     case 0x06: /* SRSRA / URSRA */
7006         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7007         break;
7008     case 0x0a: /* SHL / SLI */
7009         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7010         break;
7011     case 0x1c: /* SCVTF, UCVTF */
7012         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7013                                      opcode, rn, rd);
7014         break;
7015     case 0x10: /* SQSHRUN, SQSHRUN2 */
7016     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7017         if (!is_u) {
7018             unallocated_encoding(s);
7019             return;
7020         }
7021         handle_vec_simd_sqshrn(s, true, false, false, true,
7022                                immh, immb, opcode, rn, rd);
7023         break;
7024     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7025     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7026         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7027                                immh, immb, opcode, rn, rd);
7028         break;
7029     case 0xc: /* SQSHLU */
7030         if (!is_u) {
7031             unallocated_encoding(s);
7032             return;
7033         }
7034         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7035         break;
7036     case 0xe: /* SQSHL, UQSHL */
7037         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7038         break;
7039     case 0x1f: /* FCVTZS, FCVTZU */
7040         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7041         break;
7042     default:
7043         unallocated_encoding(s);
7044         break;
7045     }
7046 }
7047
7048 /* C3.6.10 AdvSIMD scalar three different
7049  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7050  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7051  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7052  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7053  */
7054 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7055 {
7056     bool is_u = extract32(insn, 29, 1);
7057     int size = extract32(insn, 22, 2);
7058     int opcode = extract32(insn, 12, 4);
7059     int rm = extract32(insn, 16, 5);
7060     int rn = extract32(insn, 5, 5);
7061     int rd = extract32(insn, 0, 5);
7062
7063     if (is_u) {
7064         unallocated_encoding(s);
7065         return;
7066     }
7067
7068     switch (opcode) {
7069     case 0x9: /* SQDMLAL, SQDMLAL2 */
7070     case 0xb: /* SQDMLSL, SQDMLSL2 */
7071     case 0xd: /* SQDMULL, SQDMULL2 */
7072         if (size == 0 || size == 3) {
7073             unallocated_encoding(s);
7074             return;
7075         }
7076         break;
7077     default:
7078         unallocated_encoding(s);
7079         return;
7080     }
7081
7082     if (!fp_access_check(s)) {
7083         return;
7084     }
7085
7086     if (size == 2) {
7087         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7088         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7089         TCGv_i64 tcg_res = tcg_temp_new_i64();
7090
7091         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7092         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7093
7094         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7095         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7096
7097         switch (opcode) {
7098         case 0xd: /* SQDMULL, SQDMULL2 */
7099             break;
7100         case 0xb: /* SQDMLSL, SQDMLSL2 */
7101             tcg_gen_neg_i64(tcg_res, tcg_res);
7102             /* fall through */
7103         case 0x9: /* SQDMLAL, SQDMLAL2 */
7104             read_vec_element(s, tcg_op1, rd, 0, MO_64);
7105             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7106                                               tcg_res, tcg_op1);
7107             break;
7108         default:
7109             g_assert_not_reached();
7110         }
7111
7112         write_fp_dreg(s, rd, tcg_res);
7113
7114         tcg_temp_free_i64(tcg_op1);
7115         tcg_temp_free_i64(tcg_op2);
7116         tcg_temp_free_i64(tcg_res);
7117     } else {
7118         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7119         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7120         TCGv_i64 tcg_res = tcg_temp_new_i64();
7121
7122         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7123         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7124
7125         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7126         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7127
7128         switch (opcode) {
7129         case 0xd: /* SQDMULL, SQDMULL2 */
7130             break;
7131         case 0xb: /* SQDMLSL, SQDMLSL2 */
7132             gen_helper_neon_negl_u32(tcg_res, tcg_res);
7133             /* fall through */
7134         case 0x9: /* SQDMLAL, SQDMLAL2 */
7135         {
7136             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7137             read_vec_element(s, tcg_op3, rd, 0, MO_32);
7138             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7139                                               tcg_res, tcg_op3);
7140             tcg_temp_free_i64(tcg_op3);
7141             break;
7142         }
7143         default:
7144             g_assert_not_reached();
7145         }
7146
7147         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7148         write_fp_dreg(s, rd, tcg_res);
7149
7150         tcg_temp_free_i32(tcg_op1);
7151         tcg_temp_free_i32(tcg_op2);
7152         tcg_temp_free_i64(tcg_res);
7153     }
7154 }
7155
7156 static void handle_3same_64(DisasContext *s, int opcode, bool u,
7157                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7158 {
7159     /* Handle 64x64->64 opcodes which are shared between the scalar
7160      * and vector 3-same groups. We cover every opcode where size == 3
7161      * is valid in either the three-reg-same (integer, not pairwise)
7162      * or scalar-three-reg-same groups. (Some opcodes are not yet
7163      * implemented.)
7164      */
7165     TCGCond cond;
7166
7167     switch (opcode) {
7168     case 0x1: /* SQADD */
7169         if (u) {
7170             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7171         } else {
7172             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7173         }
7174         break;
7175     case 0x5: /* SQSUB */
7176         if (u) {
7177             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7178         } else {
7179             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7180         }
7181         break;
7182     case 0x6: /* CMGT, CMHI */
7183         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7184          * We implement this using setcond (test) and then negating.
7185          */
7186         cond = u ? TCG_COND_GTU : TCG_COND_GT;
7187     do_cmop:
7188         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7189         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7190         break;
7191     case 0x7: /* CMGE, CMHS */
7192         cond = u ? TCG_COND_GEU : TCG_COND_GE;
7193         goto do_cmop;
7194     case 0x11: /* CMTST, CMEQ */
7195         if (u) {
7196             cond = TCG_COND_EQ;
7197             goto do_cmop;
7198         }
7199         /* CMTST : test is "if (X & Y != 0)". */
7200         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7201         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7202         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7203         break;
7204     case 0x8: /* SSHL, USHL */
7205         if (u) {
7206             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7207         } else {
7208             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7209         }
7210         break;
7211     case 0x9: /* SQSHL, UQSHL */
7212         if (u) {
7213             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7214         } else {
7215             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7216         }
7217         break;
7218     case 0xa: /* SRSHL, URSHL */
7219         if (u) {
7220             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7221         } else {
7222             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7223         }
7224         break;
7225     case 0xb: /* SQRSHL, UQRSHL */
7226         if (u) {
7227             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7228         } else {
7229             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7230         }
7231         break;
7232     case 0x10: /* ADD, SUB */
7233         if (u) {
7234             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7235         } else {
7236             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7237         }
7238         break;
7239     default:
7240         g_assert_not_reached();
7241     }
7242 }
7243
7244 /* Handle the 3-same-operands float operations; shared by the scalar
7245  * and vector encodings. The caller must filter out any encodings
7246  * not allocated for the encoding it is dealing with.
7247  */
7248 static void handle_3same_float(DisasContext *s, int size, int elements,
7249                                int fpopcode, int rd, int rn, int rm)
7250 {
7251     int pass;
7252     TCGv_ptr fpst = get_fpstatus_ptr();
7253
7254     for (pass = 0; pass < elements; pass++) {
7255         if (size) {
7256             /* Double */
7257             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7258             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7259             TCGv_i64 tcg_res = tcg_temp_new_i64();
7260
7261             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7262             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7263
7264             switch (fpopcode) {
7265             case 0x39: /* FMLS */
7266                 /* As usual for ARM, separate negation for fused multiply-add */
7267                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7268                 /* fall through */
7269             case 0x19: /* FMLA */
7270                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7271                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7272                                        tcg_res, fpst);
7273                 break;
7274             case 0x18: /* FMAXNM */
7275                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7276                 break;
7277             case 0x1a: /* FADD */
7278                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7279                 break;
7280             case 0x1b: /* FMULX */
7281                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7282                 break;
7283             case 0x1c: /* FCMEQ */
7284                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7285                 break;
7286             case 0x1e: /* FMAX */
7287                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7288                 break;
7289             case 0x1f: /* FRECPS */
7290                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7291                 break;
7292             case 0x38: /* FMINNM */
7293                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7294                 break;
7295             case 0x3a: /* FSUB */
7296                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7297                 break;
7298             case 0x3e: /* FMIN */
7299                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7300                 break;
7301             case 0x3f: /* FRSQRTS */
7302                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7303                 break;
7304             case 0x5b: /* FMUL */
7305                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7306                 break;
7307             case 0x5c: /* FCMGE */
7308                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7309                 break;
7310             case 0x5d: /* FACGE */
7311                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7312                 break;
7313             case 0x5f: /* FDIV */
7314                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7315                 break;
7316             case 0x7a: /* FABD */
7317                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7318                 gen_helper_vfp_absd(tcg_res, tcg_res);
7319                 break;
7320             case 0x7c: /* FCMGT */
7321                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7322                 break;
7323             case 0x7d: /* FACGT */
7324                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7325                 break;
7326             default:
7327                 g_assert_not_reached();
7328             }
7329
7330             write_vec_element(s, tcg_res, rd, pass, MO_64);
7331
7332             tcg_temp_free_i64(tcg_res);
7333             tcg_temp_free_i64(tcg_op1);
7334             tcg_temp_free_i64(tcg_op2);
7335         } else {
7336             /* Single */
7337             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7338             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7339             TCGv_i32 tcg_res = tcg_temp_new_i32();
7340
7341             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7342             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7343
7344             switch (fpopcode) {
7345             case 0x39: /* FMLS */
7346                 /* As usual for ARM, separate negation for fused multiply-add */
7347                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7348                 /* fall through */
7349             case 0x19: /* FMLA */
7350                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7351                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7352                                        tcg_res, fpst);
7353                 break;
7354             case 0x1a: /* FADD */
7355                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7356                 break;
7357             case 0x1b: /* FMULX */
7358                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7359                 break;
7360             case 0x1c: /* FCMEQ */
7361                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7362                 break;
7363             case 0x1e: /* FMAX */
7364                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7365                 break;
7366             case 0x1f: /* FRECPS */
7367                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7368                 break;
7369             case 0x18: /* FMAXNM */
7370                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7371                 break;
7372             case 0x38: /* FMINNM */
7373                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7374                 break;
7375             case 0x3a: /* FSUB */
7376                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7377                 break;
7378             case 0x3e: /* FMIN */
7379                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7380                 break;
7381             case 0x3f: /* FRSQRTS */
7382                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7383                 break;
7384             case 0x5b: /* FMUL */
7385                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7386                 break;
7387             case 0x5c: /* FCMGE */
7388                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7389                 break;
7390             case 0x5d: /* FACGE */
7391                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7392                 break;
7393             case 0x5f: /* FDIV */
7394                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7395                 break;
7396             case 0x7a: /* FABD */
7397                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7398                 gen_helper_vfp_abss(tcg_res, tcg_res);
7399                 break;
7400             case 0x7c: /* FCMGT */
7401                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7402                 break;
7403             case 0x7d: /* FACGT */
7404                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7405                 break;
7406             default:
7407                 g_assert_not_reached();
7408             }
7409
7410             if (elements == 1) {
7411                 /* scalar single so clear high part */
7412                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7413
7414                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7415                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7416                 tcg_temp_free_i64(tcg_tmp);
7417             } else {
7418                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7419             }
7420
7421             tcg_temp_free_i32(tcg_res);
7422             tcg_temp_free_i32(tcg_op1);
7423             tcg_temp_free_i32(tcg_op2);
7424         }
7425     }
7426
7427     tcg_temp_free_ptr(fpst);
7428
7429     if ((elements << size) < 4) {
7430         /* scalar, or non-quad vector op */
7431         clear_vec_high(s, rd);
7432     }
7433 }
7434
7435 /* C3.6.11 AdvSIMD scalar three same
7436  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7437  * +-----+---+-----------+------+---+------+--------+---+------+------+
7438  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7439  * +-----+---+-----------+------+---+------+--------+---+------+------+
7440  */
7441 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7442 {
7443     int rd = extract32(insn, 0, 5);
7444     int rn = extract32(insn, 5, 5);
7445     int opcode = extract32(insn, 11, 5);
7446     int rm = extract32(insn, 16, 5);
7447     int size = extract32(insn, 22, 2);
7448     bool u = extract32(insn, 29, 1);
7449     TCGv_i64 tcg_rd;
7450
7451     if (opcode >= 0x18) {
7452         /* Floating point: U, size[1] and opcode indicate operation */
7453         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7454         switch (fpopcode) {
7455         case 0x1b: /* FMULX */
7456         case 0x1f: /* FRECPS */
7457         case 0x3f: /* FRSQRTS */
7458         case 0x5d: /* FACGE */
7459         case 0x7d: /* FACGT */
7460         case 0x1c: /* FCMEQ */
7461         case 0x5c: /* FCMGE */
7462         case 0x7c: /* FCMGT */
7463         case 0x7a: /* FABD */
7464             break;
7465         default:
7466             unallocated_encoding(s);
7467             return;
7468         }
7469
7470         if (!fp_access_check(s)) {
7471             return;
7472         }
7473
7474         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7475         return;
7476     }
7477
7478     switch (opcode) {
7479     case 0x1: /* SQADD, UQADD */
7480     case 0x5: /* SQSUB, UQSUB */
7481     case 0x9: /* SQSHL, UQSHL */
7482     case 0xb: /* SQRSHL, UQRSHL */
7483         break;
7484     case 0x8: /* SSHL, USHL */
7485     case 0xa: /* SRSHL, URSHL */
7486     case 0x6: /* CMGT, CMHI */
7487     case 0x7: /* CMGE, CMHS */
7488     case 0x11: /* CMTST, CMEQ */
7489     case 0x10: /* ADD, SUB (vector) */
7490         if (size != 3) {
7491             unallocated_encoding(s);
7492             return;
7493         }
7494         break;
7495     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7496         if (size != 1 && size != 2) {
7497             unallocated_encoding(s);
7498             return;
7499         }
7500         break;
7501     default:
7502         unallocated_encoding(s);
7503         return;
7504     }
7505
7506     if (!fp_access_check(s)) {
7507         return;
7508     }
7509
7510     tcg_rd = tcg_temp_new_i64();
7511
7512     if (size == 3) {
7513         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7514         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7515
7516         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7517         tcg_temp_free_i64(tcg_rn);
7518         tcg_temp_free_i64(tcg_rm);
7519     } else {
7520         /* Do a single operation on the lowest element in the vector.
7521          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7522          * no side effects for all these operations.
7523          * OPTME: special-purpose helpers would avoid doing some
7524          * unnecessary work in the helper for the 8 and 16 bit cases.
7525          */
7526         NeonGenTwoOpEnvFn *genenvfn;
7527         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7528         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7529         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7530
7531         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7532         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7533
7534         switch (opcode) {
7535         case 0x1: /* SQADD, UQADD */
7536         {
7537             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7538                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7539                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7540                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7541             };
7542             genenvfn = fns[size][u];
7543             break;
7544         }
7545         case 0x5: /* SQSUB, UQSUB */
7546         {
7547             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7548                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7549                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7550                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7551             };
7552             genenvfn = fns[size][u];
7553             break;
7554         }
7555         case 0x9: /* SQSHL, UQSHL */
7556         {
7557             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7558                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7559                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7560                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7561             };
7562             genenvfn = fns[size][u];
7563             break;
7564         }
7565         case 0xb: /* SQRSHL, UQRSHL */
7566         {
7567             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7568                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7569                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7570                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7571             };
7572             genenvfn = fns[size][u];
7573             break;
7574         }
7575         case 0x16: /* SQDMULH, SQRDMULH */
7576         {
7577             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7578                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7579                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7580             };
7581             assert(size == 1 || size == 2);
7582             genenvfn = fns[size - 1][u];
7583             break;
7584         }
7585         default:
7586             g_assert_not_reached();
7587         }
7588
7589         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7590         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7591         tcg_temp_free_i32(tcg_rd32);
7592         tcg_temp_free_i32(tcg_rn);
7593         tcg_temp_free_i32(tcg_rm);
7594     }
7595
7596     write_fp_dreg(s, rd, tcg_rd);
7597
7598     tcg_temp_free_i64(tcg_rd);
7599 }
7600
7601 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7602                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7603                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7604 {
7605     /* Handle 64->64 opcodes which are shared between the scalar and
7606      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7607      * is valid in either group and also the double-precision fp ops.
7608      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7609      * requires them.
7610      */
7611     TCGCond cond;
7612
7613     switch (opcode) {
7614     case 0x4: /* CLS, CLZ */
7615         if (u) {
7616             gen_helper_clz64(tcg_rd, tcg_rn);
7617         } else {
7618             gen_helper_cls64(tcg_rd, tcg_rn);
7619         }
7620         break;
7621     case 0x5: /* NOT */
7622         /* This opcode is shared with CNT and RBIT but we have earlier
7623          * enforced that size == 3 if and only if this is the NOT insn.
7624          */
7625         tcg_gen_not_i64(tcg_rd, tcg_rn);
7626         break;
7627     case 0x7: /* SQABS, SQNEG */
7628         if (u) {
7629             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7630         } else {
7631             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7632         }
7633         break;
7634     case 0xa: /* CMLT */
7635         /* 64 bit integer comparison against zero, result is
7636          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7637          * subtracting 1.
7638          */
7639         cond = TCG_COND_LT;
7640     do_cmop:
7641         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7642         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7643         break;
7644     case 0x8: /* CMGT, CMGE */
7645         cond = u ? TCG_COND_GE : TCG_COND_GT;
7646         goto do_cmop;
7647     case 0x9: /* CMEQ, CMLE */
7648         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7649         goto do_cmop;
7650     case 0xb: /* ABS, NEG */
7651         if (u) {
7652             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7653         } else {
7654             TCGv_i64 tcg_zero = tcg_const_i64(0);
7655             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7656             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7657                                 tcg_rn, tcg_rd);
7658             tcg_temp_free_i64(tcg_zero);
7659         }
7660         break;
7661     case 0x2f: /* FABS */
7662         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7663         break;
7664     case 0x6f: /* FNEG */
7665         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7666         break;
7667     case 0x7f: /* FSQRT */
7668         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7669         break;
7670     case 0x1a: /* FCVTNS */
7671     case 0x1b: /* FCVTMS */
7672     case 0x1c: /* FCVTAS */
7673     case 0x3a: /* FCVTPS */
7674     case 0x3b: /* FCVTZS */
7675     {
7676         TCGv_i32 tcg_shift = tcg_const_i32(0);
7677         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7678         tcg_temp_free_i32(tcg_shift);
7679         break;
7680     }
7681     case 0x5a: /* FCVTNU */
7682     case 0x5b: /* FCVTMU */
7683     case 0x5c: /* FCVTAU */
7684     case 0x7a: /* FCVTPU */
7685     case 0x7b: /* FCVTZU */
7686     {
7687         TCGv_i32 tcg_shift = tcg_const_i32(0);
7688         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7689         tcg_temp_free_i32(tcg_shift);
7690         break;
7691     }
7692     case 0x18: /* FRINTN */
7693     case 0x19: /* FRINTM */
7694     case 0x38: /* FRINTP */
7695     case 0x39: /* FRINTZ */
7696     case 0x58: /* FRINTA */
7697     case 0x79: /* FRINTI */
7698         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7699         break;
7700     case 0x59: /* FRINTX */
7701         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7702         break;
7703     default:
7704         g_assert_not_reached();
7705     }
7706 }
7707
7708 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7709                                    bool is_scalar, bool is_u, bool is_q,
7710                                    int size, int rn, int rd)
7711 {
7712     bool is_double = (size == 3);
7713     TCGv_ptr fpst;
7714
7715     if (!fp_access_check(s)) {
7716         return;
7717     }
7718
7719     fpst = get_fpstatus_ptr();
7720
7721     if (is_double) {
7722         TCGv_i64 tcg_op = tcg_temp_new_i64();
7723         TCGv_i64 tcg_zero = tcg_const_i64(0);
7724         TCGv_i64 tcg_res = tcg_temp_new_i64();
7725         NeonGenTwoDoubleOPFn *genfn;
7726         bool swap = false;
7727         int pass;
7728
7729         switch (opcode) {
7730         case 0x2e: /* FCMLT (zero) */
7731             swap = true;
7732             /* fallthrough */
7733         case 0x2c: /* FCMGT (zero) */
7734             genfn = gen_helper_neon_cgt_f64;
7735             break;
7736         case 0x2d: /* FCMEQ (zero) */
7737             genfn = gen_helper_neon_ceq_f64;
7738             break;
7739         case 0x6d: /* FCMLE (zero) */
7740             swap = true;
7741             /* fall through */
7742         case 0x6c: /* FCMGE (zero) */
7743             genfn = gen_helper_neon_cge_f64;
7744             break;
7745         default:
7746             g_assert_not_reached();
7747         }
7748
7749         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7750             read_vec_element(s, tcg_op, rn, pass, MO_64);
7751             if (swap) {
7752                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7753             } else {
7754                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7755             }
7756             write_vec_element(s, tcg_res, rd, pass, MO_64);
7757         }
7758         if (is_scalar) {
7759             clear_vec_high(s, rd);
7760         }
7761
7762         tcg_temp_free_i64(tcg_res);
7763         tcg_temp_free_i64(tcg_zero);
7764         tcg_temp_free_i64(tcg_op);
7765     } else {
7766         TCGv_i32 tcg_op = tcg_temp_new_i32();
7767         TCGv_i32 tcg_zero = tcg_const_i32(0);
7768         TCGv_i32 tcg_res = tcg_temp_new_i32();
7769         NeonGenTwoSingleOPFn *genfn;
7770         bool swap = false;
7771         int pass, maxpasses;
7772
7773         switch (opcode) {
7774         case 0x2e: /* FCMLT (zero) */
7775             swap = true;
7776             /* fall through */
7777         case 0x2c: /* FCMGT (zero) */
7778             genfn = gen_helper_neon_cgt_f32;
7779             break;
7780         case 0x2d: /* FCMEQ (zero) */
7781             genfn = gen_helper_neon_ceq_f32;
7782             break;
7783         case 0x6d: /* FCMLE (zero) */
7784             swap = true;
7785             /* fall through */
7786         case 0x6c: /* FCMGE (zero) */
7787             genfn = gen_helper_neon_cge_f32;
7788             break;
7789         default:
7790             g_assert_not_reached();
7791         }
7792
7793         if (is_scalar) {
7794             maxpasses = 1;
7795         } else {
7796             maxpasses = is_q ? 4 : 2;
7797         }
7798
7799         for (pass = 0; pass < maxpasses; pass++) {
7800             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7801             if (swap) {
7802                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7803             } else {
7804                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7805             }
7806             if (is_scalar) {
7807                 write_fp_sreg(s, rd, tcg_res);
7808             } else {
7809                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7810             }
7811         }
7812         tcg_temp_free_i32(tcg_res);
7813         tcg_temp_free_i32(tcg_zero);
7814         tcg_temp_free_i32(tcg_op);
7815         if (!is_q && !is_scalar) {
7816             clear_vec_high(s, rd);
7817         }
7818     }
7819
7820     tcg_temp_free_ptr(fpst);
7821 }
7822
7823 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7824                                     bool is_scalar, bool is_u, bool is_q,
7825                                     int size, int rn, int rd)
7826 {
7827     bool is_double = (size == 3);
7828     TCGv_ptr fpst = get_fpstatus_ptr();
7829
7830     if (is_double) {
7831         TCGv_i64 tcg_op = tcg_temp_new_i64();
7832         TCGv_i64 tcg_res = tcg_temp_new_i64();
7833         int pass;
7834
7835         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7836             read_vec_element(s, tcg_op, rn, pass, MO_64);
7837             switch (opcode) {
7838             case 0x3d: /* FRECPE */
7839                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7840                 break;
7841             case 0x3f: /* FRECPX */
7842                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7843                 break;
7844             case 0x7d: /* FRSQRTE */
7845                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7846                 break;
7847             default:
7848                 g_assert_not_reached();
7849             }
7850             write_vec_element(s, tcg_res, rd, pass, MO_64);
7851         }
7852         if (is_scalar) {
7853             clear_vec_high(s, rd);
7854         }
7855
7856         tcg_temp_free_i64(tcg_res);
7857         tcg_temp_free_i64(tcg_op);
7858     } else {
7859         TCGv_i32 tcg_op = tcg_temp_new_i32();
7860         TCGv_i32 tcg_res = tcg_temp_new_i32();
7861         int pass, maxpasses;
7862
7863         if (is_scalar) {
7864             maxpasses = 1;
7865         } else {
7866             maxpasses = is_q ? 4 : 2;
7867         }
7868
7869         for (pass = 0; pass < maxpasses; pass++) {
7870             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7871
7872             switch (opcode) {
7873             case 0x3c: /* URECPE */
7874                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7875                 break;
7876             case 0x3d: /* FRECPE */
7877                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7878                 break;
7879             case 0x3f: /* FRECPX */
7880                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7881                 break;
7882             case 0x7d: /* FRSQRTE */
7883                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7884                 break;
7885             default:
7886                 g_assert_not_reached();
7887             }
7888
7889             if (is_scalar) {
7890                 write_fp_sreg(s, rd, tcg_res);
7891             } else {
7892                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7893             }
7894         }
7895         tcg_temp_free_i32(tcg_res);
7896         tcg_temp_free_i32(tcg_op);
7897         if (!is_q && !is_scalar) {
7898             clear_vec_high(s, rd);
7899         }
7900     }
7901     tcg_temp_free_ptr(fpst);
7902 }
7903
7904 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7905                                 int opcode, bool u, bool is_q,
7906                                 int size, int rn, int rd)
7907 {
7908     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7909      * in the source becomes a size element in the destination).
7910      */
7911     int pass;
7912     TCGv_i32 tcg_res[2];
7913     int destelt = is_q ? 2 : 0;
7914     int passes = scalar ? 1 : 2;
7915
7916     if (scalar) {
7917         tcg_res[1] = tcg_const_i32(0);
7918     }
7919
7920     for (pass = 0; pass < passes; pass++) {
7921         TCGv_i64 tcg_op = tcg_temp_new_i64();
7922         NeonGenNarrowFn *genfn = NULL;
7923         NeonGenNarrowEnvFn *genenvfn = NULL;
7924
7925         if (scalar) {
7926             read_vec_element(s, tcg_op, rn, pass, size + 1);
7927         } else {
7928             read_vec_element(s, tcg_op, rn, pass, MO_64);
7929         }
7930         tcg_res[pass] = tcg_temp_new_i32();
7931
7932         switch (opcode) {
7933         case 0x12: /* XTN, SQXTUN */
7934         {
7935             static NeonGenNarrowFn * const xtnfns[3] = {
7936                 gen_helper_neon_narrow_u8,
7937                 gen_helper_neon_narrow_u16,
7938                 tcg_gen_extrl_i64_i32,
7939             };
7940             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7941                 gen_helper_neon_unarrow_sat8,
7942                 gen_helper_neon_unarrow_sat16,
7943                 gen_helper_neon_unarrow_sat32,
7944             };
7945             if (u) {
7946                 genenvfn = sqxtunfns[size];
7947             } else {
7948                 genfn = xtnfns[size];
7949             }
7950             break;
7951         }
7952         case 0x14: /* SQXTN, UQXTN */
7953         {
7954             static NeonGenNarrowEnvFn * const fns[3][2] = {
7955                 { gen_helper_neon_narrow_sat_s8,
7956                   gen_helper_neon_narrow_sat_u8 },
7957                 { gen_helper_neon_narrow_sat_s16,
7958                   gen_helper_neon_narrow_sat_u16 },
7959                 { gen_helper_neon_narrow_sat_s32,
7960                   gen_helper_neon_narrow_sat_u32 },
7961             };
7962             genenvfn = fns[size][u];
7963             break;
7964         }
7965         case 0x16: /* FCVTN, FCVTN2 */
7966             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7967             if (size == 2) {
7968                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7969             } else {
7970                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7971                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7972                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7973                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7974                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7975                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7976                 tcg_temp_free_i32(tcg_lo);
7977                 tcg_temp_free_i32(tcg_hi);
7978             }
7979             break;
7980         case 0x56:  /* FCVTXN, FCVTXN2 */
7981             /* 64 bit to 32 bit float conversion
7982              * with von Neumann rounding (round to odd)
7983              */
7984             assert(size == 2);
7985             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7986             break;
7987         default:
7988             g_assert_not_reached();
7989         }
7990
7991         if (genfn) {
7992             genfn(tcg_res[pass], tcg_op);
7993         } else if (genenvfn) {
7994             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7995         }
7996
7997         tcg_temp_free_i64(tcg_op);
7998     }
7999
8000     for (pass = 0; pass < 2; pass++) {
8001         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8002         tcg_temp_free_i32(tcg_res[pass]);
8003     }
8004     if (!is_q) {
8005         clear_vec_high(s, rd);
8006     }
8007 }
8008
8009 /* Remaining saturating accumulating ops */
8010 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
8011                                 bool is_q, int size, int rn, int rd)
8012 {
8013     bool is_double = (size == 3);
8014
8015     if (is_double) {
8016         TCGv_i64 tcg_rn = tcg_temp_new_i64();
8017         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8018         int pass;
8019
8020         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8021             read_vec_element(s, tcg_rn, rn, pass, MO_64);
8022             read_vec_element(s, tcg_rd, rd, pass, MO_64);
8023
8024             if (is_u) { /* USQADD */
8025                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8026             } else { /* SUQADD */
8027                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8028             }
8029             write_vec_element(s, tcg_rd, rd, pass, MO_64);
8030         }
8031         if (is_scalar) {
8032             clear_vec_high(s, rd);
8033         }
8034
8035         tcg_temp_free_i64(tcg_rd);
8036         tcg_temp_free_i64(tcg_rn);
8037     } else {
8038         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8039         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8040         int pass, maxpasses;
8041
8042         if (is_scalar) {
8043             maxpasses = 1;
8044         } else {
8045             maxpasses = is_q ? 4 : 2;
8046         }
8047
8048         for (pass = 0; pass < maxpasses; pass++) {
8049             if (is_scalar) {
8050                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
8051                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
8052             } else {
8053                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8054                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8055             }
8056
8057             if (is_u) { /* USQADD */
8058                 switch (size) {
8059                 case 0:
8060                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8061                     break;
8062                 case 1:
8063                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8064                     break;
8065                 case 2:
8066                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8067                     break;
8068                 default:
8069                     g_assert_not_reached();
8070                 }
8071             } else { /* SUQADD */
8072                 switch (size) {
8073                 case 0:
8074                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8075                     break;
8076                 case 1:
8077                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8078                     break;
8079                 case 2:
8080                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8081                     break;
8082                 default:
8083                     g_assert_not_reached();
8084                 }
8085             }
8086
8087             if (is_scalar) {
8088                 TCGv_i64 tcg_zero = tcg_const_i64(0);
8089                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
8090                 tcg_temp_free_i64(tcg_zero);
8091             }
8092             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8093         }
8094
8095         if (!is_q) {
8096             clear_vec_high(s, rd);
8097         }
8098
8099         tcg_temp_free_i32(tcg_rd);
8100         tcg_temp_free_i32(tcg_rn);
8101     }
8102 }
8103
8104 /* C3.6.12 AdvSIMD scalar two reg misc
8105  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8106  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8107  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8108  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8109  */
8110 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8111 {
8112     int rd = extract32(insn, 0, 5);
8113     int rn = extract32(insn, 5, 5);
8114     int opcode = extract32(insn, 12, 5);
8115     int size = extract32(insn, 22, 2);
8116     bool u = extract32(insn, 29, 1);
8117     bool is_fcvt = false;
8118     int rmode;
8119     TCGv_i32 tcg_rmode;
8120     TCGv_ptr tcg_fpstatus;
8121
8122     switch (opcode) {
8123     case 0x3: /* USQADD / SUQADD*/
8124         if (!fp_access_check(s)) {
8125             return;
8126         }
8127         handle_2misc_satacc(s, true, u, false, size, rn, rd);
8128         return;
8129     case 0x7: /* SQABS / SQNEG */
8130         break;
8131     case 0xa: /* CMLT */
8132         if (u) {
8133             unallocated_encoding(s);
8134             return;
8135         }
8136         /* fall through */
8137     case 0x8: /* CMGT, CMGE */
8138     case 0x9: /* CMEQ, CMLE */
8139     case 0xb: /* ABS, NEG */
8140         if (size != 3) {
8141             unallocated_encoding(s);
8142             return;
8143         }
8144         break;
8145     case 0x12: /* SQXTUN */
8146         if (!u) {
8147             unallocated_encoding(s);
8148             return;
8149         }
8150         /* fall through */
8151     case 0x14: /* SQXTN, UQXTN */
8152         if (size == 3) {
8153             unallocated_encoding(s);
8154             return;
8155         }
8156         if (!fp_access_check(s)) {
8157             return;
8158         }
8159         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8160         return;
8161     case 0xc ... 0xf:
8162     case 0x16 ... 0x1d:
8163     case 0x1f:
8164         /* Floating point: U, size[1] and opcode indicate operation;
8165          * size[0] indicates single or double precision.
8166          */
8167         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8168         size = extract32(size, 0, 1) ? 3 : 2;
8169         switch (opcode) {
8170         case 0x2c: /* FCMGT (zero) */
8171         case 0x2d: /* FCMEQ (zero) */
8172         case 0x2e: /* FCMLT (zero) */
8173         case 0x6c: /* FCMGE (zero) */
8174         case 0x6d: /* FCMLE (zero) */
8175             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8176             return;
8177         case 0x1d: /* SCVTF */
8178         case 0x5d: /* UCVTF */
8179         {
8180             bool is_signed = (opcode == 0x1d);
8181             if (!fp_access_check(s)) {
8182                 return;
8183             }
8184             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8185             return;
8186         }
8187         case 0x3d: /* FRECPE */
8188         case 0x3f: /* FRECPX */
8189         case 0x7d: /* FRSQRTE */
8190             if (!fp_access_check(s)) {
8191                 return;
8192             }
8193             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8194             return;
8195         case 0x1a: /* FCVTNS */
8196         case 0x1b: /* FCVTMS */
8197         case 0x3a: /* FCVTPS */
8198         case 0x3b: /* FCVTZS */
8199         case 0x5a: /* FCVTNU */
8200         case 0x5b: /* FCVTMU */
8201         case 0x7a: /* FCVTPU */
8202         case 0x7b: /* FCVTZU */
8203             is_fcvt = true;
8204             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8205             break;
8206         case 0x1c: /* FCVTAS */
8207         case 0x5c: /* FCVTAU */
8208             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8209             is_fcvt = true;
8210             rmode = FPROUNDING_TIEAWAY;
8211             break;
8212         case 0x56: /* FCVTXN, FCVTXN2 */
8213             if (size == 2) {
8214                 unallocated_encoding(s);
8215                 return;
8216             }
8217             if (!fp_access_check(s)) {
8218                 return;
8219             }
8220             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8221             return;
8222         default:
8223             unallocated_encoding(s);
8224             return;
8225         }
8226         break;
8227     default:
8228         unallocated_encoding(s);
8229         return;
8230     }
8231
8232     if (!fp_access_check(s)) {
8233         return;
8234     }
8235
8236     if (is_fcvt) {
8237         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8238         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8239         tcg_fpstatus = get_fpstatus_ptr();
8240     } else {
8241         TCGV_UNUSED_I32(tcg_rmode);
8242         TCGV_UNUSED_PTR(tcg_fpstatus);
8243     }
8244
8245     if (size == 3) {
8246         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8247         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8248
8249         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8250         write_fp_dreg(s, rd, tcg_rd);
8251         tcg_temp_free_i64(tcg_rd);
8252         tcg_temp_free_i64(tcg_rn);
8253     } else {
8254         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8255         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8256
8257         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8258
8259         switch (opcode) {
8260         case 0x7: /* SQABS, SQNEG */
8261         {
8262             NeonGenOneOpEnvFn *genfn;
8263             static NeonGenOneOpEnvFn * const fns[3][2] = {
8264                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8265                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8266                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8267             };
8268             genfn = fns[size][u];
8269             genfn(tcg_rd, cpu_env, tcg_rn);
8270             break;
8271         }
8272         case 0x1a: /* FCVTNS */
8273         case 0x1b: /* FCVTMS */
8274         case 0x1c: /* FCVTAS */
8275         case 0x3a: /* FCVTPS */
8276         case 0x3b: /* FCVTZS */
8277         {
8278             TCGv_i32 tcg_shift = tcg_const_i32(0);
8279             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8280             tcg_temp_free_i32(tcg_shift);
8281             break;
8282         }
8283         case 0x5a: /* FCVTNU */
8284         case 0x5b: /* FCVTMU */
8285         case 0x5c: /* FCVTAU */
8286         case 0x7a: /* FCVTPU */
8287         case 0x7b: /* FCVTZU */
8288         {
8289             TCGv_i32 tcg_shift = tcg_const_i32(0);
8290             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8291             tcg_temp_free_i32(tcg_shift);
8292             break;
8293         }
8294         default:
8295             g_assert_not_reached();
8296         }
8297
8298         write_fp_sreg(s, rd, tcg_rd);
8299         tcg_temp_free_i32(tcg_rd);
8300         tcg_temp_free_i32(tcg_rn);
8301     }
8302
8303     if (is_fcvt) {
8304         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8305         tcg_temp_free_i32(tcg_rmode);
8306         tcg_temp_free_ptr(tcg_fpstatus);
8307     }
8308 }
8309
8310 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8311 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8312                                  int immh, int immb, int opcode, int rn, int rd)
8313 {
8314     int size = 32 - clz32(immh) - 1;
8315     int immhb = immh << 3 | immb;
8316     int shift = 2 * (8 << size) - immhb;
8317     bool accumulate = false;
8318     bool round = false;
8319     bool insert = false;
8320     int dsize = is_q ? 128 : 64;
8321     int esize = 8 << size;
8322     int elements = dsize/esize;
8323     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8324     TCGv_i64 tcg_rn = new_tmp_a64(s);
8325     TCGv_i64 tcg_rd = new_tmp_a64(s);
8326     TCGv_i64 tcg_round;
8327     int i;
8328
8329     if (extract32(immh, 3, 1) && !is_q) {
8330         unallocated_encoding(s);
8331         return;
8332     }
8333
8334     if (size > 3 && !is_q) {
8335         unallocated_encoding(s);
8336         return;
8337     }
8338
8339     if (!fp_access_check(s)) {
8340         return;
8341     }
8342
8343     switch (opcode) {
8344     case 0x02: /* SSRA / USRA (accumulate) */
8345         accumulate = true;
8346         break;
8347     case 0x04: /* SRSHR / URSHR (rounding) */
8348         round = true;
8349         break;
8350     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8351         accumulate = round = true;
8352         break;
8353     case 0x08: /* SRI */
8354         insert = true;
8355         break;
8356     }
8357
8358     if (round) {
8359         uint64_t round_const = 1ULL << (shift - 1);
8360         tcg_round = tcg_const_i64(round_const);
8361     } else {
8362         TCGV_UNUSED_I64(tcg_round);
8363     }
8364
8365     for (i = 0; i < elements; i++) {
8366         read_vec_element(s, tcg_rn, rn, i, memop);
8367         if (accumulate || insert) {
8368             read_vec_element(s, tcg_rd, rd, i, memop);
8369         }
8370
8371         if (insert) {
8372             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8373         } else {
8374             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8375                                     accumulate, is_u, size, shift);
8376         }
8377
8378         write_vec_element(s, tcg_rd, rd, i, size);
8379     }
8380
8381     if (!is_q) {
8382         clear_vec_high(s, rd);
8383     }
8384
8385     if (round) {
8386         tcg_temp_free_i64(tcg_round);
8387     }
8388 }
8389
8390 /* SHL/SLI - Vector shift left */
8391 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8392                                 int immh, int immb, int opcode, int rn, int rd)
8393 {
8394     int size = 32 - clz32(immh) - 1;
8395     int immhb = immh << 3 | immb;
8396     int shift = immhb - (8 << size);
8397     int dsize = is_q ? 128 : 64;
8398     int esize = 8 << size;
8399     int elements = dsize/esize;
8400     TCGv_i64 tcg_rn = new_tmp_a64(s);
8401     TCGv_i64 tcg_rd = new_tmp_a64(s);
8402     int i;
8403
8404     if (extract32(immh, 3, 1) && !is_q) {
8405         unallocated_encoding(s);
8406         return;
8407     }
8408
8409     if (size > 3 && !is_q) {
8410         unallocated_encoding(s);
8411         return;
8412     }
8413
8414     if (!fp_access_check(s)) {
8415         return;
8416     }
8417
8418     for (i = 0; i < elements; i++) {
8419         read_vec_element(s, tcg_rn, rn, i, size);
8420         if (insert) {
8421             read_vec_element(s, tcg_rd, rd, i, size);
8422         }
8423
8424         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8425
8426         write_vec_element(s, tcg_rd, rd, i, size);
8427     }
8428
8429     if (!is_q) {
8430         clear_vec_high(s, rd);
8431     }
8432 }
8433
8434 /* USHLL/SHLL - Vector shift left with widening */
8435 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8436                                  int immh, int immb, int opcode, int rn, int rd)
8437 {
8438     int size = 32 - clz32(immh) - 1;
8439     int immhb = immh << 3 | immb;
8440     int shift = immhb - (8 << size);
8441     int dsize = 64;
8442     int esize = 8 << size;
8443     int elements = dsize/esize;
8444     TCGv_i64 tcg_rn = new_tmp_a64(s);
8445     TCGv_i64 tcg_rd = new_tmp_a64(s);
8446     int i;
8447
8448     if (size >= 3) {
8449         unallocated_encoding(s);
8450         return;
8451     }
8452
8453     if (!fp_access_check(s)) {
8454         return;
8455     }
8456
8457     /* For the LL variants the store is larger than the load,
8458      * so if rd == rn we would overwrite parts of our input.
8459      * So load everything right now and use shifts in the main loop.
8460      */
8461     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8462
8463     for (i = 0; i < elements; i++) {
8464         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8465         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8466         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8467         write_vec_element(s, tcg_rd, rd, i, size + 1);
8468     }
8469 }
8470
8471 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8472 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8473                                  int immh, int immb, int opcode, int rn, int rd)
8474 {
8475     int immhb = immh << 3 | immb;
8476     int size = 32 - clz32(immh) - 1;
8477     int dsize = 64;
8478     int esize = 8 << size;
8479     int elements = dsize/esize;
8480     int shift = (2 * esize) - immhb;
8481     bool round = extract32(opcode, 0, 1);
8482     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8483     TCGv_i64 tcg_round;
8484     int i;
8485
8486     if (extract32(immh, 3, 1)) {
8487         unallocated_encoding(s);
8488         return;
8489     }
8490
8491     if (!fp_access_check(s)) {
8492         return;
8493     }
8494
8495     tcg_rn = tcg_temp_new_i64();
8496     tcg_rd = tcg_temp_new_i64();
8497     tcg_final = tcg_temp_new_i64();
8498     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8499
8500     if (round) {
8501         uint64_t round_const = 1ULL << (shift - 1);
8502         tcg_round = tcg_const_i64(round_const);
8503     } else {
8504         TCGV_UNUSED_I64(tcg_round);
8505     }
8506
8507     for (i = 0; i < elements; i++) {
8508         read_vec_element(s, tcg_rn, rn, i, size+1);
8509         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8510                                 false, true, size+1, shift);
8511
8512         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8513     }
8514
8515     if (!is_q) {
8516         clear_vec_high(s, rd);
8517         write_vec_element(s, tcg_final, rd, 0, MO_64);
8518     } else {
8519         write_vec_element(s, tcg_final, rd, 1, MO_64);
8520     }
8521
8522     if (round) {
8523         tcg_temp_free_i64(tcg_round);
8524     }
8525     tcg_temp_free_i64(tcg_rn);
8526     tcg_temp_free_i64(tcg_rd);
8527     tcg_temp_free_i64(tcg_final);
8528     return;
8529 }
8530
8531
8532 /* C3.6.14 AdvSIMD shift by immediate
8533  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8534  * +---+---+---+-------------+------+------+--------+---+------+------+
8535  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8536  * +---+---+---+-------------+------+------+--------+---+------+------+
8537  */
8538 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8539 {
8540     int rd = extract32(insn, 0, 5);
8541     int rn = extract32(insn, 5, 5);
8542     int opcode = extract32(insn, 11, 5);
8543     int immb = extract32(insn, 16, 3);
8544     int immh = extract32(insn, 19, 4);
8545     bool is_u = extract32(insn, 29, 1);
8546     bool is_q = extract32(insn, 30, 1);
8547
8548     switch (opcode) {
8549     case 0x08: /* SRI */
8550         if (!is_u) {
8551             unallocated_encoding(s);
8552             return;
8553         }
8554         /* fall through */
8555     case 0x00: /* SSHR / USHR */
8556     case 0x02: /* SSRA / USRA (accumulate) */
8557     case 0x04: /* SRSHR / URSHR (rounding) */
8558     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8559         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8560         break;
8561     case 0x0a: /* SHL / SLI */
8562         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8563         break;
8564     case 0x10: /* SHRN */
8565     case 0x11: /* RSHRN / SQRSHRUN */
8566         if (is_u) {
8567             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8568                                    opcode, rn, rd);
8569         } else {
8570             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8571         }
8572         break;
8573     case 0x12: /* SQSHRN / UQSHRN */
8574     case 0x13: /* SQRSHRN / UQRSHRN */
8575         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8576                                opcode, rn, rd);
8577         break;
8578     case 0x14: /* SSHLL / USHLL */
8579         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8580         break;
8581     case 0x1c: /* SCVTF / UCVTF */
8582         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8583                                      opcode, rn, rd);
8584         break;
8585     case 0xc: /* SQSHLU */
8586         if (!is_u) {
8587             unallocated_encoding(s);
8588             return;
8589         }
8590         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8591         break;
8592     case 0xe: /* SQSHL, UQSHL */
8593         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8594         break;
8595     case 0x1f: /* FCVTZS/ FCVTZU */
8596         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8597         return;
8598     default:
8599         unallocated_encoding(s);
8600         return;
8601     }
8602 }
8603
8604 /* Generate code to do a "long" addition or subtraction, ie one done in
8605  * TCGv_i64 on vector lanes twice the width specified by size.
8606  */
8607 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8608                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8609 {
8610     static NeonGenTwo64OpFn * const fns[3][2] = {
8611         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8612         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8613         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8614     };
8615     NeonGenTwo64OpFn *genfn;
8616     assert(size < 3);
8617
8618     genfn = fns[size][is_sub];
8619     genfn(tcg_res, tcg_op1, tcg_op2);
8620 }
8621
8622 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8623                                 int opcode, int rd, int rn, int rm)
8624 {
8625     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8626     TCGv_i64 tcg_res[2];
8627     int pass, accop;
8628
8629     tcg_res[0] = tcg_temp_new_i64();
8630     tcg_res[1] = tcg_temp_new_i64();
8631
8632     /* Does this op do an adding accumulate, a subtracting accumulate,
8633      * or no accumulate at all?
8634      */
8635     switch (opcode) {
8636     case 5:
8637     case 8:
8638     case 9:
8639         accop = 1;
8640         break;
8641     case 10:
8642     case 11:
8643         accop = -1;
8644         break;
8645     default:
8646         accop = 0;
8647         break;
8648     }
8649
8650     if (accop != 0) {
8651         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8652         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8653     }
8654
8655     /* size == 2 means two 32x32->64 operations; this is worth special
8656      * casing because we can generally handle it inline.
8657      */
8658     if (size == 2) {
8659         for (pass = 0; pass < 2; pass++) {
8660             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8661             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8662             TCGv_i64 tcg_passres;
8663             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8664
8665             int elt = pass + is_q * 2;
8666
8667             read_vec_element(s, tcg_op1, rn, elt, memop);
8668             read_vec_element(s, tcg_op2, rm, elt, memop);
8669
8670             if (accop == 0) {
8671                 tcg_passres = tcg_res[pass];
8672             } else {
8673                 tcg_passres = tcg_temp_new_i64();
8674             }
8675
8676             switch (opcode) {
8677             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8678                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8679                 break;
8680             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8681                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8682                 break;
8683             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8684             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8685             {
8686                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8687                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8688
8689                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8690                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8691                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8692                                     tcg_passres,
8693                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8694                 tcg_temp_free_i64(tcg_tmp1);
8695                 tcg_temp_free_i64(tcg_tmp2);
8696                 break;
8697             }
8698             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8699             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8700             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8701                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8702                 break;
8703             case 9: /* SQDMLAL, SQDMLAL2 */
8704             case 11: /* SQDMLSL, SQDMLSL2 */
8705             case 13: /* SQDMULL, SQDMULL2 */
8706                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8707                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8708                                                   tcg_passres, tcg_passres);
8709                 break;
8710             default:
8711                 g_assert_not_reached();
8712             }
8713
8714             if (opcode == 9 || opcode == 11) {
8715                 /* saturating accumulate ops */
8716                 if (accop < 0) {
8717                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8718                 }
8719                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8720                                                   tcg_res[pass], tcg_passres);
8721             } else if (accop > 0) {
8722                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8723             } else if (accop < 0) {
8724                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8725             }
8726
8727             if (accop != 0) {
8728                 tcg_temp_free_i64(tcg_passres);
8729             }
8730
8731             tcg_temp_free_i64(tcg_op1);
8732             tcg_temp_free_i64(tcg_op2);
8733         }
8734     } else {
8735         /* size 0 or 1, generally helper functions */
8736         for (pass = 0; pass < 2; pass++) {
8737             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8738             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8739             TCGv_i64 tcg_passres;
8740             int elt = pass + is_q * 2;
8741
8742             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8743             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8744
8745             if (accop == 0) {
8746                 tcg_passres = tcg_res[pass];
8747             } else {
8748                 tcg_passres = tcg_temp_new_i64();
8749             }
8750
8751             switch (opcode) {
8752             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8753             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8754             {
8755                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8756                 static NeonGenWidenFn * const widenfns[2][2] = {
8757                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8758                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8759                 };
8760                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8761
8762                 widenfn(tcg_op2_64, tcg_op2);
8763                 widenfn(tcg_passres, tcg_op1);
8764                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8765                               tcg_passres, tcg_op2_64);
8766                 tcg_temp_free_i64(tcg_op2_64);
8767                 break;
8768             }
8769             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8770             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8771                 if (size == 0) {
8772                     if (is_u) {
8773                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8774                     } else {
8775                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8776                     }
8777                 } else {
8778                     if (is_u) {
8779                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8780                     } else {
8781                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8782                     }
8783                 }
8784                 break;
8785             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8786             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8787             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8788                 if (size == 0) {
8789                     if (is_u) {
8790                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8791                     } else {
8792                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8793                     }
8794                 } else {
8795                     if (is_u) {
8796                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8797                     } else {
8798                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8799                     }
8800                 }
8801                 break;
8802             case 9: /* SQDMLAL, SQDMLAL2 */
8803             case 11: /* SQDMLSL, SQDMLSL2 */
8804             case 13: /* SQDMULL, SQDMULL2 */
8805                 assert(size == 1);
8806                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8807                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8808                                                   tcg_passres, tcg_passres);
8809                 break;
8810             case 14: /* PMULL */
8811                 assert(size == 0);
8812                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8813                 break;
8814             default:
8815                 g_assert_not_reached();
8816             }
8817             tcg_temp_free_i32(tcg_op1);
8818             tcg_temp_free_i32(tcg_op2);
8819
8820             if (accop != 0) {
8821                 if (opcode == 9 || opcode == 11) {
8822                     /* saturating accumulate ops */
8823                     if (accop < 0) {
8824                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8825                     }
8826                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8827                                                       tcg_res[pass],
8828                                                       tcg_passres);
8829                 } else {
8830                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8831                                   tcg_res[pass], tcg_passres);
8832                 }
8833                 tcg_temp_free_i64(tcg_passres);
8834             }
8835         }
8836     }
8837
8838     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8839     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8840     tcg_temp_free_i64(tcg_res[0]);
8841     tcg_temp_free_i64(tcg_res[1]);
8842 }
8843
8844 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8845                             int opcode, int rd, int rn, int rm)
8846 {
8847     TCGv_i64 tcg_res[2];
8848     int part = is_q ? 2 : 0;
8849     int pass;
8850
8851     for (pass = 0; pass < 2; pass++) {
8852         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8853         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8854         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8855         static NeonGenWidenFn * const widenfns[3][2] = {
8856             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8857             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8858             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8859         };
8860         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8861
8862         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8863         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8864         widenfn(tcg_op2_wide, tcg_op2);
8865         tcg_temp_free_i32(tcg_op2);
8866         tcg_res[pass] = tcg_temp_new_i64();
8867         gen_neon_addl(size, (opcode == 3),
8868                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8869         tcg_temp_free_i64(tcg_op1);
8870         tcg_temp_free_i64(tcg_op2_wide);
8871     }
8872
8873     for (pass = 0; pass < 2; pass++) {
8874         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8875         tcg_temp_free_i64(tcg_res[pass]);
8876     }
8877 }
8878
8879 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8880 {
8881     tcg_gen_addi_i64(in, in, 1U << 31);
8882     tcg_gen_extrh_i64_i32(res, in);
8883 }
8884
8885 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8886                                  int opcode, int rd, int rn, int rm)
8887 {
8888     TCGv_i32 tcg_res[2];
8889     int part = is_q ? 2 : 0;
8890     int pass;
8891
8892     for (pass = 0; pass < 2; pass++) {
8893         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8894         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8895         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8896         static NeonGenNarrowFn * const narrowfns[3][2] = {
8897             { gen_helper_neon_narrow_high_u8,
8898               gen_helper_neon_narrow_round_high_u8 },
8899             { gen_helper_neon_narrow_high_u16,
8900               gen_helper_neon_narrow_round_high_u16 },
8901             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8902         };
8903         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8904
8905         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8906         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8907
8908         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8909
8910         tcg_temp_free_i64(tcg_op1);
8911         tcg_temp_free_i64(tcg_op2);
8912
8913         tcg_res[pass] = tcg_temp_new_i32();
8914         gennarrow(tcg_res[pass], tcg_wideres);
8915         tcg_temp_free_i64(tcg_wideres);
8916     }
8917
8918     for (pass = 0; pass < 2; pass++) {
8919         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8920         tcg_temp_free_i32(tcg_res[pass]);
8921     }
8922     if (!is_q) {
8923         clear_vec_high(s, rd);
8924     }
8925 }
8926
8927 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8928 {
8929     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8930      * is the only three-reg-diff instruction which produces a
8931      * 128-bit wide result from a single operation. However since
8932      * it's possible to calculate the two halves more or less
8933      * separately we just use two helper calls.
8934      */
8935     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8936     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8937     TCGv_i64 tcg_res = tcg_temp_new_i64();
8938
8939     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8940     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8941     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8942     write_vec_element(s, tcg_res, rd, 0, MO_64);
8943     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8944     write_vec_element(s, tcg_res, rd, 1, MO_64);
8945
8946     tcg_temp_free_i64(tcg_op1);
8947     tcg_temp_free_i64(tcg_op2);
8948     tcg_temp_free_i64(tcg_res);
8949 }
8950
8951 /* C3.6.15 AdvSIMD three different
8952  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8953  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8954  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8955  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8956  */
8957 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8958 {
8959     /* Instructions in this group fall into three basic classes
8960      * (in each case with the operation working on each element in
8961      * the input vectors):
8962      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8963      *     128 bit input)
8964      * (2) wide 64 x 128 -> 128
8965      * (3) narrowing 128 x 128 -> 64
8966      * Here we do initial decode, catch unallocated cases and
8967      * dispatch to separate functions for each class.
8968      */
8969     int is_q = extract32(insn, 30, 1);
8970     int is_u = extract32(insn, 29, 1);
8971     int size = extract32(insn, 22, 2);
8972     int opcode = extract32(insn, 12, 4);
8973     int rm = extract32(insn, 16, 5);
8974     int rn = extract32(insn, 5, 5);
8975     int rd = extract32(insn, 0, 5);
8976
8977     switch (opcode) {
8978     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8979     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8980         /* 64 x 128 -> 128 */
8981         if (size == 3) {
8982             unallocated_encoding(s);
8983             return;
8984         }
8985         if (!fp_access_check(s)) {
8986             return;
8987         }
8988         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8989         break;
8990     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8991     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8992         /* 128 x 128 -> 64 */
8993         if (size == 3) {
8994             unallocated_encoding(s);
8995             return;
8996         }
8997         if (!fp_access_check(s)) {
8998             return;
8999         }
9000         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
9001         break;
9002     case 14: /* PMULL, PMULL2 */
9003         if (is_u || size == 1 || size == 2) {
9004             unallocated_encoding(s);
9005             return;
9006         }
9007         if (size == 3) {
9008             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
9009                 unallocated_encoding(s);
9010                 return;
9011             }
9012             if (!fp_access_check(s)) {
9013                 return;
9014             }
9015             handle_pmull_64(s, is_q, rd, rn, rm);
9016             return;
9017         }
9018         goto is_widening;
9019     case 9: /* SQDMLAL, SQDMLAL2 */
9020     case 11: /* SQDMLSL, SQDMLSL2 */
9021     case 13: /* SQDMULL, SQDMULL2 */
9022         if (is_u || size == 0) {
9023             unallocated_encoding(s);
9024             return;
9025         }
9026         /* fall through */
9027     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9028     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9029     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9030     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9031     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9032     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9033     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
9034         /* 64 x 64 -> 128 */
9035         if (size == 3) {
9036             unallocated_encoding(s);
9037             return;
9038         }
9039     is_widening:
9040         if (!fp_access_check(s)) {
9041             return;
9042         }
9043
9044         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9045         break;
9046     default:
9047         /* opcode 15 not allocated */
9048         unallocated_encoding(s);
9049         break;
9050     }
9051 }
9052
9053 /* Logic op (opcode == 3) subgroup of C3.6.16. */
9054 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9055 {
9056     int rd = extract32(insn, 0, 5);
9057     int rn = extract32(insn, 5, 5);
9058     int rm = extract32(insn, 16, 5);
9059     int size = extract32(insn, 22, 2);
9060     bool is_u = extract32(insn, 29, 1);
9061     bool is_q = extract32(insn, 30, 1);
9062     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
9063     int pass;
9064
9065     if (!fp_access_check(s)) {
9066         return;
9067     }
9068
9069     tcg_op1 = tcg_temp_new_i64();
9070     tcg_op2 = tcg_temp_new_i64();
9071     tcg_res[0] = tcg_temp_new_i64();
9072     tcg_res[1] = tcg_temp_new_i64();
9073
9074     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
9075         read_vec_element(s, tcg_op1, rn, pass, MO_64);
9076         read_vec_element(s, tcg_op2, rm, pass, MO_64);
9077
9078         if (!is_u) {
9079             switch (size) {
9080             case 0: /* AND */
9081                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
9082                 break;
9083             case 1: /* BIC */
9084                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9085                 break;
9086             case 2: /* ORR */
9087                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
9088                 break;
9089             case 3: /* ORN */
9090                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9091                 break;
9092             }
9093         } else {
9094             if (size != 0) {
9095                 /* B* ops need res loaded to operate on */
9096                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9097             }
9098
9099             switch (size) {
9100             case 0: /* EOR */
9101                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
9102                 break;
9103             case 1: /* BSL bitwise select */
9104                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
9105                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9106                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
9107                 break;
9108             case 2: /* BIT, bitwise insert if true */
9109                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9110                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
9111                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9112                 break;
9113             case 3: /* BIF, bitwise insert if false */
9114                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9115                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
9116                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9117                 break;
9118             }
9119         }
9120     }
9121
9122     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9123     if (!is_q) {
9124         tcg_gen_movi_i64(tcg_res[1], 0);
9125     }
9126     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9127
9128     tcg_temp_free_i64(tcg_op1);
9129     tcg_temp_free_i64(tcg_op2);
9130     tcg_temp_free_i64(tcg_res[0]);
9131     tcg_temp_free_i64(tcg_res[1]);
9132 }
9133
9134 /* Helper functions for 32 bit comparisons */
9135 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9136 {
9137     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9138 }
9139
9140 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9141 {
9142     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9143 }
9144
9145 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9146 {
9147     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9148 }
9149
9150 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9151 {
9152     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9153 }
9154
9155 /* Pairwise op subgroup of C3.6.16.
9156  *
9157  * This is called directly or via the handle_3same_float for float pairwise
9158  * operations where the opcode and size are calculated differently.
9159  */
9160 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9161                                    int size, int rn, int rm, int rd)
9162 {
9163     TCGv_ptr fpst;
9164     int pass;
9165
9166     /* Floating point operations need fpst */
9167     if (opcode >= 0x58) {
9168         fpst = get_fpstatus_ptr();
9169     } else {
9170         TCGV_UNUSED_PTR(fpst);
9171     }
9172
9173     if (!fp_access_check(s)) {
9174         return;
9175     }
9176
9177     /* These operations work on the concatenated rm:rn, with each pair of
9178      * adjacent elements being operated on to produce an element in the result.
9179      */
9180     if (size == 3) {
9181         TCGv_i64 tcg_res[2];
9182
9183         for (pass = 0; pass < 2; pass++) {
9184             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9185             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9186             int passreg = (pass == 0) ? rn : rm;
9187
9188             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9189             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9190             tcg_res[pass] = tcg_temp_new_i64();
9191
9192             switch (opcode) {
9193             case 0x17: /* ADDP */
9194                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9195                 break;
9196             case 0x58: /* FMAXNMP */
9197                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9198                 break;
9199             case 0x5a: /* FADDP */
9200                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9201                 break;
9202             case 0x5e: /* FMAXP */
9203                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9204                 break;
9205             case 0x78: /* FMINNMP */
9206                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9207                 break;
9208             case 0x7e: /* FMINP */
9209                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9210                 break;
9211             default:
9212                 g_assert_not_reached();
9213             }
9214
9215             tcg_temp_free_i64(tcg_op1);
9216             tcg_temp_free_i64(tcg_op2);
9217         }
9218
9219         for (pass = 0; pass < 2; pass++) {
9220             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9221             tcg_temp_free_i64(tcg_res[pass]);
9222         }
9223     } else {
9224         int maxpass = is_q ? 4 : 2;
9225         TCGv_i32 tcg_res[4];
9226
9227         for (pass = 0; pass < maxpass; pass++) {
9228             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9229             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9230             NeonGenTwoOpFn *genfn = NULL;
9231             int passreg = pass < (maxpass / 2) ? rn : rm;
9232             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9233
9234             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9235             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9236             tcg_res[pass] = tcg_temp_new_i32();
9237
9238             switch (opcode) {
9239             case 0x17: /* ADDP */
9240             {
9241                 static NeonGenTwoOpFn * const fns[3] = {
9242                     gen_helper_neon_padd_u8,
9243                     gen_helper_neon_padd_u16,
9244                     tcg_gen_add_i32,
9245                 };
9246                 genfn = fns[size];
9247                 break;
9248             }
9249             case 0x14: /* SMAXP, UMAXP */
9250             {
9251                 static NeonGenTwoOpFn * const fns[3][2] = {
9252                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9253                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9254                     { gen_max_s32, gen_max_u32 },
9255                 };
9256                 genfn = fns[size][u];
9257                 break;
9258             }
9259             case 0x15: /* SMINP, UMINP */
9260             {
9261                 static NeonGenTwoOpFn * const fns[3][2] = {
9262                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9263                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9264                     { gen_min_s32, gen_min_u32 },
9265                 };
9266                 genfn = fns[size][u];
9267                 break;
9268             }
9269             /* The FP operations are all on single floats (32 bit) */
9270             case 0x58: /* FMAXNMP */
9271                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9272                 break;
9273             case 0x5a: /* FADDP */
9274                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9275                 break;
9276             case 0x5e: /* FMAXP */
9277                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9278                 break;
9279             case 0x78: /* FMINNMP */
9280                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9281                 break;
9282             case 0x7e: /* FMINP */
9283                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9284                 break;
9285             default:
9286                 g_assert_not_reached();
9287             }
9288
9289             /* FP ops called directly, otherwise call now */
9290             if (genfn) {
9291                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9292             }
9293
9294             tcg_temp_free_i32(tcg_op1);
9295             tcg_temp_free_i32(tcg_op2);
9296         }
9297
9298         for (pass = 0; pass < maxpass; pass++) {
9299             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9300             tcg_temp_free_i32(tcg_res[pass]);
9301         }
9302         if (!is_q) {
9303             clear_vec_high(s, rd);
9304         }
9305     }
9306
9307     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9308         tcg_temp_free_ptr(fpst);
9309     }
9310 }
9311
9312 /* Floating point op subgroup of C3.6.16. */
9313 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9314 {
9315     /* For floating point ops, the U, size[1] and opcode bits
9316      * together indicate the operation. size[0] indicates single
9317      * or double.
9318      */
9319     int fpopcode = extract32(insn, 11, 5)
9320         | (extract32(insn, 23, 1) << 5)
9321         | (extract32(insn, 29, 1) << 6);
9322     int is_q = extract32(insn, 30, 1);
9323     int size = extract32(insn, 22, 1);
9324     int rm = extract32(insn, 16, 5);
9325     int rn = extract32(insn, 5, 5);
9326     int rd = extract32(insn, 0, 5);
9327
9328     int datasize = is_q ? 128 : 64;
9329     int esize = 32 << size;
9330     int elements = datasize / esize;
9331
9332     if (size == 1 && !is_q) {
9333         unallocated_encoding(s);
9334         return;
9335     }
9336
9337     switch (fpopcode) {
9338     case 0x58: /* FMAXNMP */
9339     case 0x5a: /* FADDP */
9340     case 0x5e: /* FMAXP */
9341     case 0x78: /* FMINNMP */
9342     case 0x7e: /* FMINP */
9343         if (size && !is_q) {
9344             unallocated_encoding(s);
9345             return;
9346         }
9347         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9348                                rn, rm, rd);
9349         return;
9350     case 0x1b: /* FMULX */
9351     case 0x1f: /* FRECPS */
9352     case 0x3f: /* FRSQRTS */
9353     case 0x5d: /* FACGE */
9354     case 0x7d: /* FACGT */
9355     case 0x19: /* FMLA */
9356     case 0x39: /* FMLS */
9357     case 0x18: /* FMAXNM */
9358     case 0x1a: /* FADD */
9359     case 0x1c: /* FCMEQ */
9360     case 0x1e: /* FMAX */
9361     case 0x38: /* FMINNM */
9362     case 0x3a: /* FSUB */
9363     case 0x3e: /* FMIN */
9364     case 0x5b: /* FMUL */
9365     case 0x5c: /* FCMGE */
9366     case 0x5f: /* FDIV */
9367     case 0x7a: /* FABD */
9368     case 0x7c: /* FCMGT */
9369         if (!fp_access_check(s)) {
9370             return;
9371         }
9372
9373         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9374         return;
9375     default:
9376         unallocated_encoding(s);
9377         return;
9378     }
9379 }
9380
9381 /* Integer op subgroup of C3.6.16. */
9382 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9383 {
9384     int is_q = extract32(insn, 30, 1);
9385     int u = extract32(insn, 29, 1);
9386     int size = extract32(insn, 22, 2);
9387     int opcode = extract32(insn, 11, 5);
9388     int rm = extract32(insn, 16, 5);
9389     int rn = extract32(insn, 5, 5);
9390     int rd = extract32(insn, 0, 5);
9391     int pass;
9392
9393     switch (opcode) {
9394     case 0x13: /* MUL, PMUL */
9395         if (u && size != 0) {
9396             unallocated_encoding(s);
9397             return;
9398         }
9399         /* fall through */
9400     case 0x0: /* SHADD, UHADD */
9401     case 0x2: /* SRHADD, URHADD */
9402     case 0x4: /* SHSUB, UHSUB */
9403     case 0xc: /* SMAX, UMAX */
9404     case 0xd: /* SMIN, UMIN */
9405     case 0xe: /* SABD, UABD */
9406     case 0xf: /* SABA, UABA */
9407     case 0x12: /* MLA, MLS */
9408         if (size == 3) {
9409             unallocated_encoding(s);
9410             return;
9411         }
9412         break;
9413     case 0x16: /* SQDMULH, SQRDMULH */
9414         if (size == 0 || size == 3) {
9415             unallocated_encoding(s);
9416             return;
9417         }
9418         break;
9419     default:
9420         if (size == 3 && !is_q) {
9421             unallocated_encoding(s);
9422             return;
9423         }
9424         break;
9425     }
9426
9427     if (!fp_access_check(s)) {
9428         return;
9429     }
9430
9431     if (size == 3) {
9432         assert(is_q);
9433         for (pass = 0; pass < 2; pass++) {
9434             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9435             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9436             TCGv_i64 tcg_res = tcg_temp_new_i64();
9437
9438             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9439             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9440
9441             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9442
9443             write_vec_element(s, tcg_res, rd, pass, MO_64);
9444
9445             tcg_temp_free_i64(tcg_res);
9446             tcg_temp_free_i64(tcg_op1);
9447             tcg_temp_free_i64(tcg_op2);
9448         }
9449     } else {
9450         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9451             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9452             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9453             TCGv_i32 tcg_res = tcg_temp_new_i32();
9454             NeonGenTwoOpFn *genfn = NULL;
9455             NeonGenTwoOpEnvFn *genenvfn = NULL;
9456
9457             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9458             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9459
9460             switch (opcode) {
9461             case 0x0: /* SHADD, UHADD */
9462             {
9463                 static NeonGenTwoOpFn * const fns[3][2] = {
9464                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9465                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9466                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9467                 };
9468                 genfn = fns[size][u];
9469                 break;
9470             }
9471             case 0x1: /* SQADD, UQADD */
9472             {
9473                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9474                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9475                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9476                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9477                 };
9478                 genenvfn = fns[size][u];
9479                 break;
9480             }
9481             case 0x2: /* SRHADD, URHADD */
9482             {
9483                 static NeonGenTwoOpFn * const fns[3][2] = {
9484                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9485                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9486                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9487                 };
9488                 genfn = fns[size][u];
9489                 break;
9490             }
9491             case 0x4: /* SHSUB, UHSUB */
9492             {
9493                 static NeonGenTwoOpFn * const fns[3][2] = {
9494                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9495                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9496                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9497                 };
9498                 genfn = fns[size][u];
9499                 break;
9500             }
9501             case 0x5: /* SQSUB, UQSUB */
9502             {
9503                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9504                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9505                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9506                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9507                 };
9508                 genenvfn = fns[size][u];
9509                 break;
9510             }
9511             case 0x6: /* CMGT, CMHI */
9512             {
9513                 static NeonGenTwoOpFn * const fns[3][2] = {
9514                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9515                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9516                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9517                 };
9518                 genfn = fns[size][u];
9519                 break;
9520             }
9521             case 0x7: /* CMGE, CMHS */
9522             {
9523                 static NeonGenTwoOpFn * const fns[3][2] = {
9524                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9525                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9526                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9527                 };
9528                 genfn = fns[size][u];
9529                 break;
9530             }
9531             case 0x8: /* SSHL, USHL */
9532             {
9533                 static NeonGenTwoOpFn * const fns[3][2] = {
9534                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9535                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9536                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9537                 };
9538                 genfn = fns[size][u];
9539                 break;
9540             }
9541             case 0x9: /* SQSHL, UQSHL */
9542             {
9543                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9544                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9545                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9546                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9547                 };
9548                 genenvfn = fns[size][u];
9549                 break;
9550             }
9551             case 0xa: /* SRSHL, URSHL */
9552             {
9553                 static NeonGenTwoOpFn * const fns[3][2] = {
9554                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9555                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9556                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9557                 };
9558                 genfn = fns[size][u];
9559                 break;
9560             }
9561             case 0xb: /* SQRSHL, UQRSHL */
9562             {
9563                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9564                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9565                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9566                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9567                 };
9568                 genenvfn = fns[size][u];
9569                 break;
9570             }
9571             case 0xc: /* SMAX, UMAX */
9572             {
9573                 static NeonGenTwoOpFn * const fns[3][2] = {
9574                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9575                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9576                     { gen_max_s32, gen_max_u32 },
9577                 };
9578                 genfn = fns[size][u];
9579                 break;
9580             }
9581
9582             case 0xd: /* SMIN, UMIN */
9583             {
9584                 static NeonGenTwoOpFn * const fns[3][2] = {
9585                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9586                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9587                     { gen_min_s32, gen_min_u32 },
9588                 };
9589                 genfn = fns[size][u];
9590                 break;
9591             }
9592             case 0xe: /* SABD, UABD */
9593             case 0xf: /* SABA, UABA */
9594             {
9595                 static NeonGenTwoOpFn * const fns[3][2] = {
9596                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9597                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9598                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9599                 };
9600                 genfn = fns[size][u];
9601                 break;
9602             }
9603             case 0x10: /* ADD, SUB */
9604             {
9605                 static NeonGenTwoOpFn * const fns[3][2] = {
9606                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9607                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9608                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9609                 };
9610                 genfn = fns[size][u];
9611                 break;
9612             }
9613             case 0x11: /* CMTST, CMEQ */
9614             {
9615                 static NeonGenTwoOpFn * const fns[3][2] = {
9616                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9617                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9618                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9619                 };
9620                 genfn = fns[size][u];
9621                 break;
9622             }
9623             case 0x13: /* MUL, PMUL */
9624                 if (u) {
9625                     /* PMUL */
9626                     assert(size == 0);
9627                     genfn = gen_helper_neon_mul_p8;
9628                     break;
9629                 }
9630                 /* fall through : MUL */
9631             case 0x12: /* MLA, MLS */
9632             {
9633                 static NeonGenTwoOpFn * const fns[3] = {
9634                     gen_helper_neon_mul_u8,
9635                     gen_helper_neon_mul_u16,
9636                     tcg_gen_mul_i32,
9637                 };
9638                 genfn = fns[size];
9639                 break;
9640             }
9641             case 0x16: /* SQDMULH, SQRDMULH */
9642             {
9643                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9644                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9645                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9646                 };
9647                 assert(size == 1 || size == 2);
9648                 genenvfn = fns[size - 1][u];
9649                 break;
9650             }
9651             default:
9652                 g_assert_not_reached();
9653             }
9654
9655             if (genenvfn) {
9656                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9657             } else {
9658                 genfn(tcg_res, tcg_op1, tcg_op2);
9659             }
9660
9661             if (opcode == 0xf || opcode == 0x12) {
9662                 /* SABA, UABA, MLA, MLS: accumulating ops */
9663                 static NeonGenTwoOpFn * const fns[3][2] = {
9664                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9665                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9666                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9667                 };
9668                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9669
9670                 genfn = fns[size][is_sub];
9671                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9672                 genfn(tcg_res, tcg_op1, tcg_res);
9673             }
9674
9675             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9676
9677             tcg_temp_free_i32(tcg_res);
9678             tcg_temp_free_i32(tcg_op1);
9679             tcg_temp_free_i32(tcg_op2);
9680         }
9681     }
9682
9683     if (!is_q) {
9684         clear_vec_high(s, rd);
9685     }
9686 }
9687
9688 /* C3.6.16 AdvSIMD three same
9689  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9690  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9691  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9692  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9693  */
9694 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9695 {
9696     int opcode = extract32(insn, 11, 5);
9697
9698     switch (opcode) {
9699     case 0x3: /* logic ops */
9700         disas_simd_3same_logic(s, insn);
9701         break;
9702     case 0x17: /* ADDP */
9703     case 0x14: /* SMAXP, UMAXP */
9704     case 0x15: /* SMINP, UMINP */
9705     {
9706         /* Pairwise operations */
9707         int is_q = extract32(insn, 30, 1);
9708         int u = extract32(insn, 29, 1);
9709         int size = extract32(insn, 22, 2);
9710         int rm = extract32(insn, 16, 5);
9711         int rn = extract32(insn, 5, 5);
9712         int rd = extract32(insn, 0, 5);
9713         if (opcode == 0x17) {
9714             if (u || (size == 3 && !is_q)) {
9715                 unallocated_encoding(s);
9716                 return;
9717             }
9718         } else {
9719             if (size == 3) {
9720                 unallocated_encoding(s);
9721                 return;
9722             }
9723         }
9724         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9725         break;
9726     }
9727     case 0x18 ... 0x31:
9728         /* floating point ops, sz[1] and U are part of opcode */
9729         disas_simd_3same_float(s, insn);
9730         break;
9731     default:
9732         disas_simd_3same_int(s, insn);
9733         break;
9734     }
9735 }
9736
9737 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9738                                   int size, int rn, int rd)
9739 {
9740     /* Handle 2-reg-misc ops which are widening (so each size element
9741      * in the source becomes a 2*size element in the destination.
9742      * The only instruction like this is FCVTL.
9743      */
9744     int pass;
9745
9746     if (size == 3) {
9747         /* 32 -> 64 bit fp conversion */
9748         TCGv_i64 tcg_res[2];
9749         int srcelt = is_q ? 2 : 0;
9750
9751         for (pass = 0; pass < 2; pass++) {
9752             TCGv_i32 tcg_op = tcg_temp_new_i32();
9753             tcg_res[pass] = tcg_temp_new_i64();
9754
9755             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9756             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9757             tcg_temp_free_i32(tcg_op);
9758         }
9759         for (pass = 0; pass < 2; pass++) {
9760             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9761             tcg_temp_free_i64(tcg_res[pass]);
9762         }
9763     } else {
9764         /* 16 -> 32 bit fp conversion */
9765         int srcelt = is_q ? 4 : 0;
9766         TCGv_i32 tcg_res[4];
9767
9768         for (pass = 0; pass < 4; pass++) {
9769             tcg_res[pass] = tcg_temp_new_i32();
9770
9771             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9772             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9773                                            cpu_env);
9774         }
9775         for (pass = 0; pass < 4; pass++) {
9776             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9777             tcg_temp_free_i32(tcg_res[pass]);
9778         }
9779     }
9780 }
9781
9782 static void handle_rev(DisasContext *s, int opcode, bool u,
9783                        bool is_q, int size, int rn, int rd)
9784 {
9785     int op = (opcode << 1) | u;
9786     int opsz = op + size;
9787     int grp_size = 3 - opsz;
9788     int dsize = is_q ? 128 : 64;
9789     int i;
9790
9791     if (opsz >= 3) {
9792         unallocated_encoding(s);
9793         return;
9794     }
9795
9796     if (!fp_access_check(s)) {
9797         return;
9798     }
9799
9800     if (size == 0) {
9801         /* Special case bytes, use bswap op on each group of elements */
9802         int groups = dsize / (8 << grp_size);
9803
9804         for (i = 0; i < groups; i++) {
9805             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9806
9807             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9808             switch (grp_size) {
9809             case MO_16:
9810                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9811                 break;
9812             case MO_32:
9813                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9814                 break;
9815             case MO_64:
9816                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9817                 break;
9818             default:
9819                 g_assert_not_reached();
9820             }
9821             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9822             tcg_temp_free_i64(tcg_tmp);
9823         }
9824         if (!is_q) {
9825             clear_vec_high(s, rd);
9826         }
9827     } else {
9828         int revmask = (1 << grp_size) - 1;
9829         int esize = 8 << size;
9830         int elements = dsize / esize;
9831         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9832         TCGv_i64 tcg_rd = tcg_const_i64(0);
9833         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9834
9835         for (i = 0; i < elements; i++) {
9836             int e_rev = (i & 0xf) ^ revmask;
9837             int off = e_rev * esize;
9838             read_vec_element(s, tcg_rn, rn, i, size);
9839             if (off >= 64) {
9840                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9841                                     tcg_rn, off - 64, esize);
9842             } else {
9843                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9844             }
9845         }
9846         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9847         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9848
9849         tcg_temp_free_i64(tcg_rd_hi);
9850         tcg_temp_free_i64(tcg_rd);
9851         tcg_temp_free_i64(tcg_rn);
9852     }
9853 }
9854
9855 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9856                                   bool is_q, int size, int rn, int rd)
9857 {
9858     /* Implement the pairwise operations from 2-misc:
9859      * SADDLP, UADDLP, SADALP, UADALP.
9860      * These all add pairs of elements in the input to produce a
9861      * double-width result element in the output (possibly accumulating).
9862      */
9863     bool accum = (opcode == 0x6);
9864     int maxpass = is_q ? 2 : 1;
9865     int pass;
9866     TCGv_i64 tcg_res[2];
9867
9868     if (size == 2) {
9869         /* 32 + 32 -> 64 op */
9870         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9871
9872         for (pass = 0; pass < maxpass; pass++) {
9873             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9874             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9875
9876             tcg_res[pass] = tcg_temp_new_i64();
9877
9878             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9879             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9880             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9881             if (accum) {
9882                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9883                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9884             }
9885
9886             tcg_temp_free_i64(tcg_op1);
9887             tcg_temp_free_i64(tcg_op2);
9888         }
9889     } else {
9890         for (pass = 0; pass < maxpass; pass++) {
9891             TCGv_i64 tcg_op = tcg_temp_new_i64();
9892             NeonGenOneOpFn *genfn;
9893             static NeonGenOneOpFn * const fns[2][2] = {
9894                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9895                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9896             };
9897
9898             genfn = fns[size][u];
9899
9900             tcg_res[pass] = tcg_temp_new_i64();
9901
9902             read_vec_element(s, tcg_op, rn, pass, MO_64);
9903             genfn(tcg_res[pass], tcg_op);
9904
9905             if (accum) {
9906                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9907                 if (size == 0) {
9908                     gen_helper_neon_addl_u16(tcg_res[pass],
9909                                              tcg_res[pass], tcg_op);
9910                 } else {
9911                     gen_helper_neon_addl_u32(tcg_res[pass],
9912                                              tcg_res[pass], tcg_op);
9913                 }
9914             }
9915             tcg_temp_free_i64(tcg_op);
9916         }
9917     }
9918     if (!is_q) {
9919         tcg_res[1] = tcg_const_i64(0);
9920     }
9921     for (pass = 0; pass < 2; pass++) {
9922         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9923         tcg_temp_free_i64(tcg_res[pass]);
9924     }
9925 }
9926
9927 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9928 {
9929     /* Implement SHLL and SHLL2 */
9930     int pass;
9931     int part = is_q ? 2 : 0;
9932     TCGv_i64 tcg_res[2];
9933
9934     for (pass = 0; pass < 2; pass++) {
9935         static NeonGenWidenFn * const widenfns[3] = {
9936             gen_helper_neon_widen_u8,
9937             gen_helper_neon_widen_u16,
9938             tcg_gen_extu_i32_i64,
9939         };
9940         NeonGenWidenFn *widenfn = widenfns[size];
9941         TCGv_i32 tcg_op = tcg_temp_new_i32();
9942
9943         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9944         tcg_res[pass] = tcg_temp_new_i64();
9945         widenfn(tcg_res[pass], tcg_op);
9946         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9947
9948         tcg_temp_free_i32(tcg_op);
9949     }
9950
9951     for (pass = 0; pass < 2; pass++) {
9952         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9953         tcg_temp_free_i64(tcg_res[pass]);
9954     }
9955 }
9956
9957 /* C3.6.17 AdvSIMD two reg misc
9958  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9959  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9960  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9961  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9962  */
9963 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9964 {
9965     int size = extract32(insn, 22, 2);
9966     int opcode = extract32(insn, 12, 5);
9967     bool u = extract32(insn, 29, 1);
9968     bool is_q = extract32(insn, 30, 1);
9969     int rn = extract32(insn, 5, 5);
9970     int rd = extract32(insn, 0, 5);
9971     bool need_fpstatus = false;
9972     bool need_rmode = false;
9973     int rmode = -1;
9974     TCGv_i32 tcg_rmode;
9975     TCGv_ptr tcg_fpstatus;
9976
9977     switch (opcode) {
9978     case 0x0: /* REV64, REV32 */
9979     case 0x1: /* REV16 */
9980         handle_rev(s, opcode, u, is_q, size, rn, rd);
9981         return;
9982     case 0x5: /* CNT, NOT, RBIT */
9983         if (u && size == 0) {
9984             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9985             size = 3;
9986             break;
9987         } else if (u && size == 1) {
9988             /* RBIT */
9989             break;
9990         } else if (!u && size == 0) {
9991             /* CNT */
9992             break;
9993         }
9994         unallocated_encoding(s);
9995         return;
9996     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9997     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9998         if (size == 3) {
9999             unallocated_encoding(s);
10000             return;
10001         }
10002         if (!fp_access_check(s)) {
10003             return;
10004         }
10005
10006         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
10007         return;
10008     case 0x4: /* CLS, CLZ */
10009         if (size == 3) {
10010             unallocated_encoding(s);
10011             return;
10012         }
10013         break;
10014     case 0x2: /* SADDLP, UADDLP */
10015     case 0x6: /* SADALP, UADALP */
10016         if (size == 3) {
10017             unallocated_encoding(s);
10018             return;
10019         }
10020         if (!fp_access_check(s)) {
10021             return;
10022         }
10023         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
10024         return;
10025     case 0x13: /* SHLL, SHLL2 */
10026         if (u == 0 || size == 3) {
10027             unallocated_encoding(s);
10028             return;
10029         }
10030         if (!fp_access_check(s)) {
10031             return;
10032         }
10033         handle_shll(s, is_q, size, rn, rd);
10034         return;
10035     case 0xa: /* CMLT */
10036         if (u == 1) {
10037             unallocated_encoding(s);
10038             return;
10039         }
10040         /* fall through */
10041     case 0x8: /* CMGT, CMGE */
10042     case 0x9: /* CMEQ, CMLE */
10043     case 0xb: /* ABS, NEG */
10044         if (size == 3 && !is_q) {
10045             unallocated_encoding(s);
10046             return;
10047         }
10048         break;
10049     case 0x3: /* SUQADD, USQADD */
10050         if (size == 3 && !is_q) {
10051             unallocated_encoding(s);
10052             return;
10053         }
10054         if (!fp_access_check(s)) {
10055             return;
10056         }
10057         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
10058         return;
10059     case 0x7: /* SQABS, SQNEG */
10060         if (size == 3 && !is_q) {
10061             unallocated_encoding(s);
10062             return;
10063         }
10064         break;
10065     case 0xc ... 0xf:
10066     case 0x16 ... 0x1d:
10067     case 0x1f:
10068     {
10069         /* Floating point: U, size[1] and opcode indicate operation;
10070          * size[0] indicates single or double precision.
10071          */
10072         int is_double = extract32(size, 0, 1);
10073         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10074         size = is_double ? 3 : 2;
10075         switch (opcode) {
10076         case 0x2f: /* FABS */
10077         case 0x6f: /* FNEG */
10078             if (size == 3 && !is_q) {
10079                 unallocated_encoding(s);
10080                 return;
10081             }
10082             break;
10083         case 0x1d: /* SCVTF */
10084         case 0x5d: /* UCVTF */
10085         {
10086             bool is_signed = (opcode == 0x1d) ? true : false;
10087             int elements = is_double ? 2 : is_q ? 4 : 2;
10088             if (is_double && !is_q) {
10089                 unallocated_encoding(s);
10090                 return;
10091             }
10092             if (!fp_access_check(s)) {
10093                 return;
10094             }
10095             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10096             return;
10097         }
10098         case 0x2c: /* FCMGT (zero) */
10099         case 0x2d: /* FCMEQ (zero) */
10100         case 0x2e: /* FCMLT (zero) */
10101         case 0x6c: /* FCMGE (zero) */
10102         case 0x6d: /* FCMLE (zero) */
10103             if (size == 3 && !is_q) {
10104                 unallocated_encoding(s);
10105                 return;
10106             }
10107             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10108             return;
10109         case 0x7f: /* FSQRT */
10110             if (size == 3 && !is_q) {
10111                 unallocated_encoding(s);
10112                 return;
10113             }
10114             break;
10115         case 0x1a: /* FCVTNS */
10116         case 0x1b: /* FCVTMS */
10117         case 0x3a: /* FCVTPS */
10118         case 0x3b: /* FCVTZS */
10119         case 0x5a: /* FCVTNU */
10120         case 0x5b: /* FCVTMU */
10121         case 0x7a: /* FCVTPU */
10122         case 0x7b: /* FCVTZU */
10123             need_fpstatus = true;
10124             need_rmode = true;
10125             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10126             if (size == 3 && !is_q) {
10127                 unallocated_encoding(s);
10128                 return;
10129             }
10130             break;
10131         case 0x5c: /* FCVTAU */
10132         case 0x1c: /* FCVTAS */
10133             need_fpstatus = true;
10134             need_rmode = true;
10135             rmode = FPROUNDING_TIEAWAY;
10136             if (size == 3 && !is_q) {
10137                 unallocated_encoding(s);
10138                 return;
10139             }
10140             break;
10141         case 0x3c: /* URECPE */
10142             if (size == 3) {
10143                 unallocated_encoding(s);
10144                 return;
10145             }
10146             /* fall through */
10147         case 0x3d: /* FRECPE */
10148         case 0x7d: /* FRSQRTE */
10149             if (size == 3 && !is_q) {
10150                 unallocated_encoding(s);
10151                 return;
10152             }
10153             if (!fp_access_check(s)) {
10154                 return;
10155             }
10156             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10157             return;
10158         case 0x56: /* FCVTXN, FCVTXN2 */
10159             if (size == 2) {
10160                 unallocated_encoding(s);
10161                 return;
10162             }
10163             /* fall through */
10164         case 0x16: /* FCVTN, FCVTN2 */
10165             /* handle_2misc_narrow does a 2*size -> size operation, but these
10166              * instructions encode the source size rather than dest size.
10167              */
10168             if (!fp_access_check(s)) {
10169                 return;
10170             }
10171             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10172             return;
10173         case 0x17: /* FCVTL, FCVTL2 */
10174             if (!fp_access_check(s)) {
10175                 return;
10176             }
10177             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10178             return;
10179         case 0x18: /* FRINTN */
10180         case 0x19: /* FRINTM */
10181         case 0x38: /* FRINTP */
10182         case 0x39: /* FRINTZ */
10183             need_rmode = true;
10184             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10185             /* fall through */
10186         case 0x59: /* FRINTX */
10187         case 0x79: /* FRINTI */
10188             need_fpstatus = true;
10189             if (size == 3 && !is_q) {
10190                 unallocated_encoding(s);
10191                 return;
10192             }
10193             break;
10194         case 0x58: /* FRINTA */
10195             need_rmode = true;
10196             rmode = FPROUNDING_TIEAWAY;
10197             need_fpstatus = true;
10198             if (size == 3 && !is_q) {
10199                 unallocated_encoding(s);
10200                 return;
10201             }
10202             break;
10203         case 0x7c: /* URSQRTE */
10204             if (size == 3) {
10205                 unallocated_encoding(s);
10206                 return;
10207             }
10208             need_fpstatus = true;
10209             break;
10210         default:
10211             unallocated_encoding(s);
10212             return;
10213         }
10214         break;
10215     }
10216     default:
10217         unallocated_encoding(s);
10218         return;
10219     }
10220
10221     if (!fp_access_check(s)) {
10222         return;
10223     }
10224
10225     if (need_fpstatus) {
10226         tcg_fpstatus = get_fpstatus_ptr();
10227     } else {
10228         TCGV_UNUSED_PTR(tcg_fpstatus);
10229     }
10230     if (need_rmode) {
10231         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10232         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10233     } else {
10234         TCGV_UNUSED_I32(tcg_rmode);
10235     }
10236
10237     if (size == 3) {
10238         /* All 64-bit element operations can be shared with scalar 2misc */
10239         int pass;
10240
10241         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10242             TCGv_i64 tcg_op = tcg_temp_new_i64();
10243             TCGv_i64 tcg_res = tcg_temp_new_i64();
10244
10245             read_vec_element(s, tcg_op, rn, pass, MO_64);
10246
10247             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10248                             tcg_rmode, tcg_fpstatus);
10249
10250             write_vec_element(s, tcg_res, rd, pass, MO_64);
10251
10252             tcg_temp_free_i64(tcg_res);
10253             tcg_temp_free_i64(tcg_op);
10254         }
10255     } else {
10256         int pass;
10257
10258         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10259             TCGv_i32 tcg_op = tcg_temp_new_i32();
10260             TCGv_i32 tcg_res = tcg_temp_new_i32();
10261             TCGCond cond;
10262
10263             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10264
10265             if (size == 2) {
10266                 /* Special cases for 32 bit elements */
10267                 switch (opcode) {
10268                 case 0xa: /* CMLT */
10269                     /* 32 bit integer comparison against zero, result is
10270                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10271                      * and inverting.
10272                      */
10273                     cond = TCG_COND_LT;
10274                 do_cmop:
10275                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10276                     tcg_gen_neg_i32(tcg_res, tcg_res);
10277                     break;
10278                 case 0x8: /* CMGT, CMGE */
10279                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10280                     goto do_cmop;
10281                 case 0x9: /* CMEQ, CMLE */
10282                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10283                     goto do_cmop;
10284                 case 0x4: /* CLS */
10285                     if (u) {
10286                         gen_helper_clz32(tcg_res, tcg_op);
10287                     } else {
10288                         gen_helper_cls32(tcg_res, tcg_op);
10289                     }
10290                     break;
10291                 case 0x7: /* SQABS, SQNEG */
10292                     if (u) {
10293                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10294                     } else {
10295                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10296                     }
10297                     break;
10298                 case 0xb: /* ABS, NEG */
10299                     if (u) {
10300                         tcg_gen_neg_i32(tcg_res, tcg_op);
10301                     } else {
10302                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10303                         tcg_gen_neg_i32(tcg_res, tcg_op);
10304                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10305                                             tcg_zero, tcg_op, tcg_res);
10306                         tcg_temp_free_i32(tcg_zero);
10307                     }
10308                     break;
10309                 case 0x2f: /* FABS */
10310                     gen_helper_vfp_abss(tcg_res, tcg_op);
10311                     break;
10312                 case 0x6f: /* FNEG */
10313                     gen_helper_vfp_negs(tcg_res, tcg_op);
10314                     break;
10315                 case 0x7f: /* FSQRT */
10316                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10317                     break;
10318                 case 0x1a: /* FCVTNS */
10319                 case 0x1b: /* FCVTMS */
10320                 case 0x1c: /* FCVTAS */
10321                 case 0x3a: /* FCVTPS */
10322                 case 0x3b: /* FCVTZS */
10323                 {
10324                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10325                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10326                                          tcg_shift, tcg_fpstatus);
10327                     tcg_temp_free_i32(tcg_shift);
10328                     break;
10329                 }
10330                 case 0x5a: /* FCVTNU */
10331                 case 0x5b: /* FCVTMU */
10332                 case 0x5c: /* FCVTAU */
10333                 case 0x7a: /* FCVTPU */
10334                 case 0x7b: /* FCVTZU */
10335                 {
10336                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10337                     gen_helper_vfp_touls(tcg_res, tcg_op,
10338                                          tcg_shift, tcg_fpstatus);
10339                     tcg_temp_free_i32(tcg_shift);
10340                     break;
10341                 }
10342                 case 0x18: /* FRINTN */
10343                 case 0x19: /* FRINTM */
10344                 case 0x38: /* FRINTP */
10345                 case 0x39: /* FRINTZ */
10346                 case 0x58: /* FRINTA */
10347                 case 0x79: /* FRINTI */
10348                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10349                     break;
10350                 case 0x59: /* FRINTX */
10351                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10352                     break;
10353                 case 0x7c: /* URSQRTE */
10354                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10355                     break;
10356                 default:
10357                     g_assert_not_reached();
10358                 }
10359             } else {
10360                 /* Use helpers for 8 and 16 bit elements */
10361                 switch (opcode) {
10362                 case 0x5: /* CNT, RBIT */
10363                     /* For these two insns size is part of the opcode specifier
10364                      * (handled earlier); they always operate on byte elements.
10365                      */
10366                     if (u) {
10367                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10368                     } else {
10369                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10370                     }
10371                     break;
10372                 case 0x7: /* SQABS, SQNEG */
10373                 {
10374                     NeonGenOneOpEnvFn *genfn;
10375                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10376                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10377                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10378                     };
10379                     genfn = fns[size][u];
10380                     genfn(tcg_res, cpu_env, tcg_op);
10381                     break;
10382                 }
10383                 case 0x8: /* CMGT, CMGE */
10384                 case 0x9: /* CMEQ, CMLE */
10385                 case 0xa: /* CMLT */
10386                 {
10387                     static NeonGenTwoOpFn * const fns[3][2] = {
10388                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10389                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10390                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10391                     };
10392                     NeonGenTwoOpFn *genfn;
10393                     int comp;
10394                     bool reverse;
10395                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10396
10397                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10398                     comp = (opcode - 0x8) * 2 + u;
10399                     /* ...but LE, LT are implemented as reverse GE, GT */
10400                     reverse = (comp > 2);
10401                     if (reverse) {
10402                         comp = 4 - comp;
10403                     }
10404                     genfn = fns[comp][size];
10405                     if (reverse) {
10406                         genfn(tcg_res, tcg_zero, tcg_op);
10407                     } else {
10408                         genfn(tcg_res, tcg_op, tcg_zero);
10409                     }
10410                     tcg_temp_free_i32(tcg_zero);
10411                     break;
10412                 }
10413                 case 0xb: /* ABS, NEG */
10414                     if (u) {
10415                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10416                         if (size) {
10417                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10418                         } else {
10419                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10420                         }
10421                         tcg_temp_free_i32(tcg_zero);
10422                     } else {
10423                         if (size) {
10424                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10425                         } else {
10426                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10427                         }
10428                     }
10429                     break;
10430                 case 0x4: /* CLS, CLZ */
10431                     if (u) {
10432                         if (size == 0) {
10433                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10434                         } else {
10435                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10436                         }
10437                     } else {
10438                         if (size == 0) {
10439                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10440                         } else {
10441                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10442                         }
10443                     }
10444                     break;
10445                 default:
10446                     g_assert_not_reached();
10447                 }
10448             }
10449
10450             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10451
10452             tcg_temp_free_i32(tcg_res);
10453             tcg_temp_free_i32(tcg_op);
10454         }
10455     }
10456     if (!is_q) {
10457         clear_vec_high(s, rd);
10458     }
10459
10460     if (need_rmode) {
10461         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10462         tcg_temp_free_i32(tcg_rmode);
10463     }
10464     if (need_fpstatus) {
10465         tcg_temp_free_ptr(tcg_fpstatus);
10466     }
10467 }
10468
10469 /* C3.6.13 AdvSIMD scalar x indexed element
10470  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10471  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10472  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10473  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10474  * C3.6.18 AdvSIMD vector x indexed element
10475  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10476  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10477  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10478  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10479  */
10480 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10481 {
10482     /* This encoding has two kinds of instruction:
10483      *  normal, where we perform elt x idxelt => elt for each
10484      *     element in the vector
10485      *  long, where we perform elt x idxelt and generate a result of
10486      *     double the width of the input element
10487      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10488      */
10489     bool is_scalar = extract32(insn, 28, 1);
10490     bool is_q = extract32(insn, 30, 1);
10491     bool u = extract32(insn, 29, 1);
10492     int size = extract32(insn, 22, 2);
10493     int l = extract32(insn, 21, 1);
10494     int m = extract32(insn, 20, 1);
10495     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10496     int rm = extract32(insn, 16, 4);
10497     int opcode = extract32(insn, 12, 4);
10498     int h = extract32(insn, 11, 1);
10499     int rn = extract32(insn, 5, 5);
10500     int rd = extract32(insn, 0, 5);
10501     bool is_long = false;
10502     bool is_fp = false;
10503     int index;
10504     TCGv_ptr fpst;
10505
10506     switch (opcode) {
10507     case 0x0: /* MLA */
10508     case 0x4: /* MLS */
10509         if (!u || is_scalar) {
10510             unallocated_encoding(s);
10511             return;
10512         }
10513         break;
10514     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10515     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10516     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10517         if (is_scalar) {
10518             unallocated_encoding(s);
10519             return;
10520         }
10521         is_long = true;
10522         break;
10523     case 0x3: /* SQDMLAL, SQDMLAL2 */
10524     case 0x7: /* SQDMLSL, SQDMLSL2 */
10525     case 0xb: /* SQDMULL, SQDMULL2 */
10526         is_long = true;
10527         /* fall through */
10528     case 0xc: /* SQDMULH */
10529     case 0xd: /* SQRDMULH */
10530         if (u) {
10531             unallocated_encoding(s);
10532             return;
10533         }
10534         break;
10535     case 0x8: /* MUL */
10536         if (u || is_scalar) {
10537             unallocated_encoding(s);
10538             return;
10539         }
10540         break;
10541     case 0x1: /* FMLA */
10542     case 0x5: /* FMLS */
10543         if (u) {
10544             unallocated_encoding(s);
10545             return;
10546         }
10547         /* fall through */
10548     case 0x9: /* FMUL, FMULX */
10549         if (!extract32(size, 1, 1)) {
10550             unallocated_encoding(s);
10551             return;
10552         }
10553         is_fp = true;
10554         break;
10555     default:
10556         unallocated_encoding(s);
10557         return;
10558     }
10559
10560     if (is_fp) {
10561         /* low bit of size indicates single/double */
10562         size = extract32(size, 0, 1) ? 3 : 2;
10563         if (size == 2) {
10564             index = h << 1 | l;
10565         } else {
10566             if (l || !is_q) {
10567                 unallocated_encoding(s);
10568                 return;
10569             }
10570             index = h;
10571         }
10572         rm |= (m << 4);
10573     } else {
10574         switch (size) {
10575         case 1:
10576             index = h << 2 | l << 1 | m;
10577             break;
10578         case 2:
10579             index = h << 1 | l;
10580             rm |= (m << 4);
10581             break;
10582         default:
10583             unallocated_encoding(s);
10584             return;
10585         }
10586     }
10587
10588     if (!fp_access_check(s)) {
10589         return;
10590     }
10591
10592     if (is_fp) {
10593         fpst = get_fpstatus_ptr();
10594     } else {
10595         TCGV_UNUSED_PTR(fpst);
10596     }
10597
10598     if (size == 3) {
10599         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10600         int pass;
10601
10602         assert(is_fp && is_q && !is_long);
10603
10604         read_vec_element(s, tcg_idx, rm, index, MO_64);
10605
10606         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10607             TCGv_i64 tcg_op = tcg_temp_new_i64();
10608             TCGv_i64 tcg_res = tcg_temp_new_i64();
10609
10610             read_vec_element(s, tcg_op, rn, pass, MO_64);
10611
10612             switch (opcode) {
10613             case 0x5: /* FMLS */
10614                 /* As usual for ARM, separate negation for fused multiply-add */
10615                 gen_helper_vfp_negd(tcg_op, tcg_op);
10616                 /* fall through */
10617             case 0x1: /* FMLA */
10618                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10619                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10620                 break;
10621             case 0x9: /* FMUL, FMULX */
10622                 if (u) {
10623                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10624                 } else {
10625                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10626                 }
10627                 break;
10628             default:
10629                 g_assert_not_reached();
10630             }
10631
10632             write_vec_element(s, tcg_res, rd, pass, MO_64);
10633             tcg_temp_free_i64(tcg_op);
10634             tcg_temp_free_i64(tcg_res);
10635         }
10636
10637         if (is_scalar) {
10638             clear_vec_high(s, rd);
10639         }
10640
10641         tcg_temp_free_i64(tcg_idx);
10642     } else if (!is_long) {
10643         /* 32 bit floating point, or 16 or 32 bit integer.
10644          * For the 16 bit scalar case we use the usual Neon helpers and
10645          * rely on the fact that 0 op 0 == 0 with no side effects.
10646          */
10647         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10648         int pass, maxpasses;
10649
10650         if (is_scalar) {
10651             maxpasses = 1;
10652         } else {
10653             maxpasses = is_q ? 4 : 2;
10654         }
10655
10656         read_vec_element_i32(s, tcg_idx, rm, index, size);
10657
10658         if (size == 1 && !is_scalar) {
10659             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10660              * the index into both halves of the 32 bit tcg_idx and then use
10661              * the usual Neon helpers.
10662              */
10663             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10664         }
10665
10666         for (pass = 0; pass < maxpasses; pass++) {
10667             TCGv_i32 tcg_op = tcg_temp_new_i32();
10668             TCGv_i32 tcg_res = tcg_temp_new_i32();
10669
10670             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10671
10672             switch (opcode) {
10673             case 0x0: /* MLA */
10674             case 0x4: /* MLS */
10675             case 0x8: /* MUL */
10676             {
10677                 static NeonGenTwoOpFn * const fns[2][2] = {
10678                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10679                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10680                 };
10681                 NeonGenTwoOpFn *genfn;
10682                 bool is_sub = opcode == 0x4;
10683
10684                 if (size == 1) {
10685                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10686                 } else {
10687                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10688                 }
10689                 if (opcode == 0x8) {
10690                     break;
10691                 }
10692                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10693                 genfn = fns[size - 1][is_sub];
10694                 genfn(tcg_res, tcg_op, tcg_res);
10695                 break;
10696             }
10697             case 0x5: /* FMLS */
10698                 /* As usual for ARM, separate negation for fused multiply-add */
10699                 gen_helper_vfp_negs(tcg_op, tcg_op);
10700                 /* fall through */
10701             case 0x1: /* FMLA */
10702                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10703                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10704                 break;
10705             case 0x9: /* FMUL, FMULX */
10706                 if (u) {
10707                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10708                 } else {
10709                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10710                 }
10711                 break;
10712             case 0xc: /* SQDMULH */
10713                 if (size == 1) {
10714                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10715                                                tcg_op, tcg_idx);
10716                 } else {
10717                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10718                                                tcg_op, tcg_idx);
10719                 }
10720                 break;
10721             case 0xd: /* SQRDMULH */
10722                 if (size == 1) {
10723                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10724                                                 tcg_op, tcg_idx);
10725                 } else {
10726                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10727                                                 tcg_op, tcg_idx);
10728                 }
10729                 break;
10730             default:
10731                 g_assert_not_reached();
10732             }
10733
10734             if (is_scalar) {
10735                 write_fp_sreg(s, rd, tcg_res);
10736             } else {
10737                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10738             }
10739
10740             tcg_temp_free_i32(tcg_op);
10741             tcg_temp_free_i32(tcg_res);
10742         }
10743
10744         tcg_temp_free_i32(tcg_idx);
10745
10746         if (!is_q) {
10747             clear_vec_high(s, rd);
10748         }
10749     } else {
10750         /* long ops: 16x16->32 or 32x32->64 */
10751         TCGv_i64 tcg_res[2];
10752         int pass;
10753         bool satop = extract32(opcode, 0, 1);
10754         TCGMemOp memop = MO_32;
10755
10756         if (satop || !u) {
10757             memop |= MO_SIGN;
10758         }
10759
10760         if (size == 2) {
10761             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10762
10763             read_vec_element(s, tcg_idx, rm, index, memop);
10764
10765             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10766                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10767                 TCGv_i64 tcg_passres;
10768                 int passelt;
10769
10770                 if (is_scalar) {
10771                     passelt = 0;
10772                 } else {
10773                     passelt = pass + (is_q * 2);
10774                 }
10775
10776                 read_vec_element(s, tcg_op, rn, passelt, memop);
10777
10778                 tcg_res[pass] = tcg_temp_new_i64();
10779
10780                 if (opcode == 0xa || opcode == 0xb) {
10781                     /* Non-accumulating ops */
10782                     tcg_passres = tcg_res[pass];
10783                 } else {
10784                     tcg_passres = tcg_temp_new_i64();
10785                 }
10786
10787                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10788                 tcg_temp_free_i64(tcg_op);
10789
10790                 if (satop) {
10791                     /* saturating, doubling */
10792                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10793                                                       tcg_passres, tcg_passres);
10794                 }
10795
10796                 if (opcode == 0xa || opcode == 0xb) {
10797                     continue;
10798                 }
10799
10800                 /* Accumulating op: handle accumulate step */
10801                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10802
10803                 switch (opcode) {
10804                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10805                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10806                     break;
10807                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10808                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10809                     break;
10810                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10811                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10812                     /* fall through */
10813                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10814                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10815                                                       tcg_res[pass],
10816                                                       tcg_passres);
10817                     break;
10818                 default:
10819                     g_assert_not_reached();
10820                 }
10821                 tcg_temp_free_i64(tcg_passres);
10822             }
10823             tcg_temp_free_i64(tcg_idx);
10824
10825             if (is_scalar) {
10826                 clear_vec_high(s, rd);
10827             }
10828         } else {
10829             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10830
10831             assert(size == 1);
10832             read_vec_element_i32(s, tcg_idx, rm, index, size);
10833
10834             if (!is_scalar) {
10835                 /* The simplest way to handle the 16x16 indexed ops is to
10836                  * duplicate the index into both halves of the 32 bit tcg_idx
10837                  * and then use the usual Neon helpers.
10838                  */
10839                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10840             }
10841
10842             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10843                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10844                 TCGv_i64 tcg_passres;
10845
10846                 if (is_scalar) {
10847                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10848                 } else {
10849                     read_vec_element_i32(s, tcg_op, rn,
10850                                          pass + (is_q * 2), MO_32);
10851                 }
10852
10853                 tcg_res[pass] = tcg_temp_new_i64();
10854
10855                 if (opcode == 0xa || opcode == 0xb) {
10856                     /* Non-accumulating ops */
10857                     tcg_passres = tcg_res[pass];
10858                 } else {
10859                     tcg_passres = tcg_temp_new_i64();
10860                 }
10861
10862                 if (memop & MO_SIGN) {
10863                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10864                 } else {
10865                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10866                 }
10867                 if (satop) {
10868                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10869                                                       tcg_passres, tcg_passres);
10870                 }
10871                 tcg_temp_free_i32(tcg_op);
10872
10873                 if (opcode == 0xa || opcode == 0xb) {
10874                     continue;
10875                 }
10876
10877                 /* Accumulating op: handle accumulate step */
10878                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10879
10880                 switch (opcode) {
10881                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10882                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10883                                              tcg_passres);
10884                     break;
10885                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10886                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10887                                              tcg_passres);
10888                     break;
10889                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10890                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10891                     /* fall through */
10892                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10893                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10894                                                       tcg_res[pass],
10895                                                       tcg_passres);
10896                     break;
10897                 default:
10898                     g_assert_not_reached();
10899                 }
10900                 tcg_temp_free_i64(tcg_passres);
10901             }
10902             tcg_temp_free_i32(tcg_idx);
10903
10904             if (is_scalar) {
10905                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10906             }
10907         }
10908
10909         if (is_scalar) {
10910             tcg_res[1] = tcg_const_i64(0);
10911         }
10912
10913         for (pass = 0; pass < 2; pass++) {
10914             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10915             tcg_temp_free_i64(tcg_res[pass]);
10916         }
10917     }
10918
10919     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10920         tcg_temp_free_ptr(fpst);
10921     }
10922 }
10923
10924 /* C3.6.19 Crypto AES
10925  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10926  * +-----------------+------+-----------+--------+-----+------+------+
10927  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10928  * +-----------------+------+-----------+--------+-----+------+------+
10929  */
10930 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10931 {
10932     int size = extract32(insn, 22, 2);
10933     int opcode = extract32(insn, 12, 5);
10934     int rn = extract32(insn, 5, 5);
10935     int rd = extract32(insn, 0, 5);
10936     int decrypt;
10937     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10938     CryptoThreeOpEnvFn *genfn;
10939
10940     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10941         || size != 0) {
10942         unallocated_encoding(s);
10943         return;
10944     }
10945
10946     switch (opcode) {
10947     case 0x4: /* AESE */
10948         decrypt = 0;
10949         genfn = gen_helper_crypto_aese;
10950         break;
10951     case 0x6: /* AESMC */
10952         decrypt = 0;
10953         genfn = gen_helper_crypto_aesmc;
10954         break;
10955     case 0x5: /* AESD */
10956         decrypt = 1;
10957         genfn = gen_helper_crypto_aese;
10958         break;
10959     case 0x7: /* AESIMC */
10960         decrypt = 1;
10961         genfn = gen_helper_crypto_aesmc;
10962         break;
10963     default:
10964         unallocated_encoding(s);
10965         return;
10966     }
10967
10968     /* Note that we convert the Vx register indexes into the
10969      * index within the vfp.regs[] array, so we can share the
10970      * helper with the AArch32 instructions.
10971      */
10972     tcg_rd_regno = tcg_const_i32(rd << 1);
10973     tcg_rn_regno = tcg_const_i32(rn << 1);
10974     tcg_decrypt = tcg_const_i32(decrypt);
10975
10976     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10977
10978     tcg_temp_free_i32(tcg_rd_regno);
10979     tcg_temp_free_i32(tcg_rn_regno);
10980     tcg_temp_free_i32(tcg_decrypt);
10981 }
10982
10983 /* C3.6.20 Crypto three-reg SHA
10984  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10985  * +-----------------+------+---+------+---+--------+-----+------+------+
10986  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10987  * +-----------------+------+---+------+---+--------+-----+------+------+
10988  */
10989 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10990 {
10991     int size = extract32(insn, 22, 2);
10992     int opcode = extract32(insn, 12, 3);
10993     int rm = extract32(insn, 16, 5);
10994     int rn = extract32(insn, 5, 5);
10995     int rd = extract32(insn, 0, 5);
10996     CryptoThreeOpEnvFn *genfn;
10997     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10998     int feature = ARM_FEATURE_V8_SHA256;
10999
11000     if (size != 0) {
11001         unallocated_encoding(s);
11002         return;
11003     }
11004
11005     switch (opcode) {
11006     case 0: /* SHA1C */
11007     case 1: /* SHA1P */
11008     case 2: /* SHA1M */
11009     case 3: /* SHA1SU0 */
11010         genfn = NULL;
11011         feature = ARM_FEATURE_V8_SHA1;
11012         break;
11013     case 4: /* SHA256H */
11014         genfn = gen_helper_crypto_sha256h;
11015         break;
11016     case 5: /* SHA256H2 */
11017         genfn = gen_helper_crypto_sha256h2;
11018         break;
11019     case 6: /* SHA256SU1 */
11020         genfn = gen_helper_crypto_sha256su1;
11021         break;
11022     default:
11023         unallocated_encoding(s);
11024         return;
11025     }
11026
11027     if (!arm_dc_feature(s, feature)) {
11028         unallocated_encoding(s);
11029         return;
11030     }
11031
11032     tcg_rd_regno = tcg_const_i32(rd << 1);
11033     tcg_rn_regno = tcg_const_i32(rn << 1);
11034     tcg_rm_regno = tcg_const_i32(rm << 1);
11035
11036     if (genfn) {
11037         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
11038     } else {
11039         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
11040
11041         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
11042                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
11043         tcg_temp_free_i32(tcg_opcode);
11044     }
11045
11046     tcg_temp_free_i32(tcg_rd_regno);
11047     tcg_temp_free_i32(tcg_rn_regno);
11048     tcg_temp_free_i32(tcg_rm_regno);
11049 }
11050
11051 /* C3.6.21 Crypto two-reg SHA
11052  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
11053  * +-----------------+------+-----------+--------+-----+------+------+
11054  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11055  * +-----------------+------+-----------+--------+-----+------+------+
11056  */
11057 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
11058 {
11059     int size = extract32(insn, 22, 2);
11060     int opcode = extract32(insn, 12, 5);
11061     int rn = extract32(insn, 5, 5);
11062     int rd = extract32(insn, 0, 5);
11063     CryptoTwoOpEnvFn *genfn;
11064     int feature;
11065     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
11066
11067     if (size != 0) {
11068         unallocated_encoding(s);
11069         return;
11070     }
11071
11072     switch (opcode) {
11073     case 0: /* SHA1H */
11074         feature = ARM_FEATURE_V8_SHA1;
11075         genfn = gen_helper_crypto_sha1h;
11076         break;
11077     case 1: /* SHA1SU1 */
11078         feature = ARM_FEATURE_V8_SHA1;
11079         genfn = gen_helper_crypto_sha1su1;
11080         break;
11081     case 2: /* SHA256SU0 */
11082         feature = ARM_FEATURE_V8_SHA256;
11083         genfn = gen_helper_crypto_sha256su0;
11084         break;
11085     default:
11086         unallocated_encoding(s);
11087         return;
11088     }
11089
11090     if (!arm_dc_feature(s, feature)) {
11091         unallocated_encoding(s);
11092         return;
11093     }
11094
11095     tcg_rd_regno = tcg_const_i32(rd << 1);
11096     tcg_rn_regno = tcg_const_i32(rn << 1);
11097
11098     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
11099
11100     tcg_temp_free_i32(tcg_rd_regno);
11101     tcg_temp_free_i32(tcg_rn_regno);
11102 }
11103
11104 /* C3.6 Data processing - SIMD, inc Crypto
11105  *
11106  * As the decode gets a little complex we are using a table based
11107  * approach for this part of the decode.
11108  */
11109 static const AArch64DecodeTable data_proc_simd[] = {
11110     /* pattern  ,  mask     ,  fn                        */
11111     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11112     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11113     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11114     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11115     { 0x0e000400, 0x9fe08400, disas_simd_copy },
11116     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11117     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11118     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11119     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11120     { 0x0e000000, 0xbf208c00, disas_simd_tb },
11121     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11122     { 0x2e000000, 0xbf208400, disas_simd_ext },
11123     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11124     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11125     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11126     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11127     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11128     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11129     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11130     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11131     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11132     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11133     { 0x00000000, 0x00000000, NULL }
11134 };
11135
11136 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11137 {
11138     /* Note that this is called with all non-FP cases from
11139      * table C3-6 so it must UNDEF for entries not specifically
11140      * allocated to instructions in that table.
11141      */
11142     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11143     if (fn) {
11144         fn(s, insn);
11145     } else {
11146         unallocated_encoding(s);
11147     }
11148 }
11149
11150 /* C3.6 Data processing - SIMD and floating point */
11151 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11152 {
11153     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11154         disas_data_proc_fp(s, insn);
11155     } else {
11156         /* SIMD, including crypto */
11157         disas_data_proc_simd(s, insn);
11158     }
11159 }
11160
11161 /* C3.1 A64 instruction index by encoding */
11162 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11163 {
11164     uint32_t insn;
11165
11166     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11167     s->insn = insn;
11168     s->pc += 4;
11169
11170     s->fp_access_checked = false;
11171
11172     switch (extract32(insn, 25, 4)) {
11173     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11174         unallocated_encoding(s);
11175         break;
11176     case 0x8: case 0x9: /* Data processing - immediate */
11177         disas_data_proc_imm(s, insn);
11178         break;
11179     case 0xa: case 0xb: /* Branch, exception generation and system insns */
11180         disas_b_exc_sys(s, insn);
11181         break;
11182     case 0x4:
11183     case 0x6:
11184     case 0xc:
11185     case 0xe:      /* Loads and stores */
11186         disas_ldst(s, insn);
11187         break;
11188     case 0x5:
11189     case 0xd:      /* Data processing - register */
11190         disas_data_proc_reg(s, insn);
11191         break;
11192     case 0x7:
11193     case 0xf:      /* Data processing - SIMD and floating point */
11194         disas_data_proc_simd_fp(s, insn);
11195         break;
11196     default:
11197         assert(FALSE); /* all 15 cases should be handled above */
11198         break;
11199     }
11200
11201     /* if we allocated any temporaries, free them here */
11202     free_tmp_a64(s);
11203 }
11204
11205 void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11206 {
11207     CPUState *cs = CPU(cpu);
11208     CPUARMState *env = &cpu->env;
11209     DisasContext dc1, *dc = &dc1;
11210     target_ulong pc_start;
11211     target_ulong next_page_start;
11212     int num_insns;
11213     int max_insns;
11214
11215     pc_start = tb->pc;
11216
11217     dc->tb = tb;
11218
11219     dc->is_jmp = DISAS_NEXT;
11220     dc->pc = pc_start;
11221     dc->singlestep_enabled = cs->singlestep_enabled;
11222     dc->condjmp = 0;
11223
11224     dc->aarch64 = 1;
11225     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11226      * there is no secure EL1, so we route exceptions to EL3.
11227      */
11228     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11229                                !arm_el_is_aa64(env, 3);
11230     dc->thumb = 0;
11231     dc->sctlr_b = 0;
11232     dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11233     dc->condexec_mask = 0;
11234     dc->condexec_cond = 0;
11235     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11236     dc->tbi0 = ARM_TBFLAG_TBI0(tb->flags);
11237     dc->tbi1 = ARM_TBFLAG_TBI1(tb->flags);
11238     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11239 #if !defined(CONFIG_USER_ONLY)
11240     dc->user = (dc->current_el == 0);
11241 #endif
11242     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11243     dc->vec_len = 0;
11244     dc->vec_stride = 0;
11245     dc->cp_regs = cpu->cp_regs;
11246     dc->features = env->features;
11247
11248     /* Single step state. The code-generation logic here is:
11249      *  SS_ACTIVE == 0:
11250      *   generate code with no special handling for single-stepping (except
11251      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11252      *   this happens anyway because those changes are all system register or
11253      *   PSTATE writes).
11254      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11255      *   emit code for one insn
11256      *   emit code to clear PSTATE.SS
11257      *   emit code to generate software step exception for completed step
11258      *   end TB (as usual for having generated an exception)
11259      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11260      *   emit code to generate a software step exception
11261      *   end the TB
11262      */
11263     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11264     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11265     dc->is_ldex = false;
11266     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11267
11268     init_tmp_a64_array(dc);
11269
11270     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11271     num_insns = 0;
11272     max_insns = tb->cflags & CF_COUNT_MASK;
11273     if (max_insns == 0) {
11274         max_insns = CF_COUNT_MASK;
11275     }
11276     if (max_insns > TCG_MAX_INSNS) {
11277         max_insns = TCG_MAX_INSNS;
11278     }
11279
11280     gen_tb_start(tb);
11281
11282     tcg_clear_temp_count();
11283
11284     do {
11285         dc->insn_start_idx = tcg_op_buf_count();
11286         tcg_gen_insn_start(dc->pc, 0, 0);
11287         num_insns++;
11288
11289         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11290             CPUBreakpoint *bp;
11291             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11292                 if (bp->pc == dc->pc) {
11293                     if (bp->flags & BP_CPU) {
11294                         gen_a64_set_pc_im(dc->pc);
11295                         gen_helper_check_breakpoints(cpu_env);
11296                         /* End the TB early; it likely won't be executed */
11297                         dc->is_jmp = DISAS_UPDATE;
11298                     } else {
11299                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11300                         /* The address covered by the breakpoint must be
11301                            included in [tb->pc, tb->pc + tb->size) in order
11302                            to for it to be properly cleared -- thus we
11303                            increment the PC here so that the logic setting
11304                            tb->size below does the right thing.  */
11305                         dc->pc += 4;
11306                         goto done_generating;
11307                     }
11308                     break;
11309                 }
11310             }
11311         }
11312
11313         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11314             gen_io_start();
11315         }
11316
11317         if (dc->ss_active && !dc->pstate_ss) {
11318             /* Singlestep state is Active-pending.
11319              * If we're in this state at the start of a TB then either
11320              *  a) we just took an exception to an EL which is being debugged
11321              *     and this is the first insn in the exception handler
11322              *  b) debug exceptions were masked and we just unmasked them
11323              *     without changing EL (eg by clearing PSTATE.D)
11324              * In either case we're going to take a swstep exception in the
11325              * "did not step an insn" case, and so the syndrome ISV and EX
11326              * bits should be zero.
11327              */
11328             assert(num_insns == 1);
11329             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11330                           default_exception_el(dc));
11331             dc->is_jmp = DISAS_EXC;
11332             break;
11333         }
11334
11335         disas_a64_insn(env, dc);
11336
11337         if (tcg_check_temp_count()) {
11338             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11339                     dc->pc);
11340         }
11341
11342         /* Translation stops when a conditional branch is encountered.
11343          * Otherwise the subsequent code could get translated several times.
11344          * Also stop translation when a page boundary is reached.  This
11345          * ensures prefetch aborts occur at the right place.
11346          */
11347     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11348              !cs->singlestep_enabled &&
11349              !singlestep &&
11350              !dc->ss_active &&
11351              dc->pc < next_page_start &&
11352              num_insns < max_insns);
11353
11354     if (tb->cflags & CF_LAST_IO) {
11355         gen_io_end();
11356     }
11357
11358     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11359         && dc->is_jmp != DISAS_EXC) {
11360         /* Note that this means single stepping WFI doesn't halt the CPU.
11361          * For conditional branch insns this is harmless unreachable code as
11362          * gen_goto_tb() has already handled emitting the debug exception
11363          * (and thus a tb-jump is not possible when singlestepping).
11364          */
11365         assert(dc->is_jmp != DISAS_TB_JUMP);
11366         if (dc->is_jmp != DISAS_JUMP) {
11367             gen_a64_set_pc_im(dc->pc);
11368         }
11369         if (cs->singlestep_enabled) {
11370             gen_exception_internal(EXCP_DEBUG);
11371         } else {
11372             gen_step_complete_exception(dc);
11373         }
11374     } else {
11375         switch (dc->is_jmp) {
11376         case DISAS_NEXT:
11377             gen_goto_tb(dc, 1, dc->pc);
11378             break;
11379         default:
11380         case DISAS_UPDATE:
11381             gen_a64_set_pc_im(dc->pc);
11382             /* fall through */
11383         case DISAS_JUMP:
11384             /* indicate that the hash table must be used to find the next TB */
11385             tcg_gen_exit_tb(0);
11386             break;
11387         case DISAS_TB_JUMP:
11388         case DISAS_EXC:
11389         case DISAS_SWI:
11390             break;
11391         case DISAS_WFE:
11392             gen_a64_set_pc_im(dc->pc);
11393             gen_helper_wfe(cpu_env);
11394             break;
11395         case DISAS_YIELD:
11396             gen_a64_set_pc_im(dc->pc);
11397             gen_helper_yield(cpu_env);
11398             break;
11399         case DISAS_WFI:
11400             /* This is a special case because we don't want to just halt the CPU
11401              * if trying to debug across a WFI.
11402              */
11403             gen_a64_set_pc_im(dc->pc);
11404             gen_helper_wfi(cpu_env);
11405             /* The helper doesn't necessarily throw an exception, but we
11406              * must go back to the main loop to check for interrupts anyway.
11407              */
11408             tcg_gen_exit_tb(0);
11409             break;
11410         }
11411     }
11412
11413 done_generating:
11414     gen_tb_end(tb, num_insns);
11415
11416 #ifdef DEBUG_DISAS
11417     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11418         qemu_log_in_addr_range(pc_start)) {
11419         qemu_log_lock();
11420         qemu_log("----------------\n");
11421         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11422         log_target_disas(cs, pc_start, dc->pc - pc_start,
11423                          4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11424         qemu_log("\n");
11425         qemu_log_unlock();
11426     }
11427 #endif
11428     tb->size = dc->pc - pc_start;
11429     tb->icount = num_insns;
11430 }