target/arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "qemu/log.h"
  26 #include "arm_ldst.h"
  27 #include "translate.h"
  28 #include "internals.h"
  29 #include "qemu/host-utils.h"
  30
  31 #include "exec/semihost.h"
  32 #include "exec/gen-icount.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36 #include "exec/log.h"
  37
  38 #include "trace-tcg.h"
  39 #include "translate-a64.h"
  40 #include "qemu/atomic128.h"
  41
  42 static TCGv_i64 cpu_X[32];
  43 static TCGv_i64 cpu_pc;
  44
  45 /* Load/store exclusive handling */
  46 static TCGv_i64 cpu_exclusive_high;
  47
  48 static const char *regnames[] = {
  49     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  50     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  51     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  52     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  53 };
  54
  55 enum a64_shift_type {
  56     A64_SHIFT_TYPE_LSL = 0,
  57     A64_SHIFT_TYPE_LSR = 1,
  58     A64_SHIFT_TYPE_ASR = 2,
  59     A64_SHIFT_TYPE_ROR = 3
  60 };
  61
  62 /* Table based decoder typedefs - used when the relevant bits for decode
  63  * are too awkwardly scattered across the instruction (eg SIMD).
  64  */
  65 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  66
  67 typedef struct AArch64DecodeTable {
  68     uint32_t pattern;
  69     uint32_t mask;
  70     AArch64DecodeFn *disas_fn;
  71 } AArch64DecodeTable;
  72
  73 /* Function prototype for gen_ functions for calling Neon helpers */
  74 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  75 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  77 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  79 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  80 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  81 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  82 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  83 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  84 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  85 typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  86 typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  87 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  88 typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
  89
  90 /* initialize TCG globals.  */
  91 void a64_translate_init(void)
  92 {
  93     int i;
  94
  95     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  96                                     offsetof(CPUARMState, pc),
  97                                     "pc");
  98     for (i = 0; i < 32; i++) {
  99         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
 100                                           offsetof(CPUARMState, xregs[i]),
 101                                           regnames[i]);
 102     }
 103
 104     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 105         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 106 }
 107
 108 static inline int get_a64_user_mem_index(DisasContext *s)
 109 {
 110     /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 111      *  if EL1, access as if EL0; otherwise access at current EL
 112      */
 113     ARMMMUIdx useridx;
 114
 115     switch (s->mmu_idx) {
 116     case ARMMMUIdx_S12NSE1:
 117         useridx = ARMMMUIdx_S12NSE0;
 118         break;
 119     case ARMMMUIdx_S1SE1:
 120         useridx = ARMMMUIdx_S1SE0;
 121         break;
 122     case ARMMMUIdx_S2NS:
 123         g_assert_not_reached();
 124     default:
 125         useridx = s->mmu_idx;
 126         break;
 127     }
 128     return arm_to_core_mmu_idx(useridx);
 129 }
 130
 131 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 132                             fprintf_function cpu_fprintf, int flags)
 133 {
 134     ARMCPU *cpu = ARM_CPU(cs);
 135     CPUARMState *env = &cpu->env;
 136     uint32_t psr = pstate_read(env);
 137     int i;
 138     int el = arm_current_el(env);
 139     const char *ns_status;
 140
 141     cpu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
 142     for (i = 0; i < 32; i++) {
 143         if (i == 31) {
 144             cpu_fprintf(f, " SP=%016" PRIx64 "\n", env->xregs[i]);
 145         } else {
 146             cpu_fprintf(f, "X%02d=%016" PRIx64 "%s", i, env->xregs[i],
 147                         (i + 2) % 3 ? " " : "\n");
 148         }
 149     }
 150
 151     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 152         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 153     } else {
 154         ns_status = "";
 155     }
 156     cpu_fprintf(f, "PSTATE=%08x %c%c%c%c %sEL%d%c",
 157                 psr,
 158                 psr & PSTATE_N ? 'N' : '-',
 159                 psr & PSTATE_Z ? 'Z' : '-',
 160                 psr & PSTATE_C ? 'C' : '-',
 161                 psr & PSTATE_V ? 'V' : '-',
 162                 ns_status,
 163                 el,
 164                 psr & PSTATE_SP ? 'h' : 't');
 165
 166     if (!(flags & CPU_DUMP_FPU)) {
 167         cpu_fprintf(f, "\n");
 168         return;
 169     }
 170     if (fp_exception_el(env, el) != 0) {
 171         cpu_fprintf(f, "    FPU disabled\n");
 172         return;
 173     }
 174     cpu_fprintf(f, "     FPCR=%08x FPSR=%08x\n",
 175                 vfp_get_fpcr(env), vfp_get_fpsr(env));
 176
 177     if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
 178         int j, zcr_len = sve_zcr_len_for_el(env, el);
 179
 180         for (i = 0; i <= FFR_PRED_NUM; i++) {
 181             bool eol;
 182             if (i == FFR_PRED_NUM) {
 183                 cpu_fprintf(f, "FFR=");
 184                 /* It's last, so end the line.  */
 185                 eol = true;
 186             } else {
 187                 cpu_fprintf(f, "P%02d=", i);
 188                 switch (zcr_len) {
 189                 case 0:
 190                     eol = i % 8 == 7;
 191                     break;
 192                 case 1:
 193                     eol = i % 6 == 5;
 194                     break;
 195                 case 2:
 196                 case 3:
 197                     eol = i % 3 == 2;
 198                     break;
 199                 default:
 200                     /* More than one quadword per predicate.  */
 201                     eol = true;
 202                     break;
 203                 }
 204             }
 205             for (j = zcr_len / 4; j >= 0; j--) {
 206                 int digits;
 207                 if (j * 4 + 4 <= zcr_len + 1) {
 208                     digits = 16;
 209                 } else {
 210                     digits = (zcr_len % 4 + 1) * 4;
 211                 }
 212                 cpu_fprintf(f, "%0*" PRIx64 "%s", digits,
 213                             env->vfp.pregs[i].p[j],
 214                             j ? ":" : eol ? "\n" : " ");
 215             }
 216         }
 217
 218         for (i = 0; i < 32; i++) {
 219             if (zcr_len == 0) {
 220                 cpu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64 "%s",
 221                             i, env->vfp.zregs[i].d[1],
 222                             env->vfp.zregs[i].d[0], i & 1 ? "\n" : " ");
 223             } else if (zcr_len == 1) {
 224                 cpu_fprintf(f, "Z%02d=%016" PRIx64 ":%016" PRIx64
 225                             ":%016" PRIx64 ":%016" PRIx64 "\n",
 226                             i, env->vfp.zregs[i].d[3], env->vfp.zregs[i].d[2],
 227                             env->vfp.zregs[i].d[1], env->vfp.zregs[i].d[0]);
 228             } else {
 229                 for (j = zcr_len; j >= 0; j--) {
 230                     bool odd = (zcr_len - j) % 2 != 0;
 231                     if (j == zcr_len) {
 232                         cpu_fprintf(f, "Z%02d[%x-%x]=", i, j, j - 1);
 233                     } else if (!odd) {
 234                         if (j > 0) {
 235                             cpu_fprintf(f, "   [%x-%x]=", j, j - 1);
 236                         } else {
 237                             cpu_fprintf(f, "     [%x]=", j);
 238                         }
 239                     }
 240                     cpu_fprintf(f, "%016" PRIx64 ":%016" PRIx64 "%s",
 241                                 env->vfp.zregs[i].d[j * 2 + 1],
 242                                 env->vfp.zregs[i].d[j * 2],
 243                                 odd || j == 0 ? "\n" : ":");
 244                 }
 245             }
 246         }
 247     } else {
 248         for (i = 0; i < 32; i++) {
 249             uint64_t *q = aa64_vfp_qreg(env, i);
 250             cpu_fprintf(f, "Q%02d=%016" PRIx64 ":%016" PRIx64 "%s",
 251                         i, q[1], q[0], (i & 1 ? "\n" : " "));
 252         }
 253     }
 254 }
 255
 256 void gen_a64_set_pc_im(uint64_t val)
 257 {
 258     tcg_gen_movi_i64(cpu_pc, val);
 259 }
 260
 261 /* Load the PC from a generic TCG variable.
 262  *
 263  * If address tagging is enabled via the TCR TBI bits, then loading
 264  * an address into the PC will clear out any tag in the it:
 265  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 266  *    then the address is zero-extended, clearing bits [63:56]
 267  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 268  *    and TBI1 controls addressses with bit 55 == 1.
 269  *    If the appropriate TBI bit is set for the address then
 270  *    the address is sign-extended from bit 55 into bits [63:56]
 271  *
 272  * We can avoid doing this for relative-branches, because the
 273  * PC + offset can never overflow into the tag bits (assuming
 274  * that virtual addresses are less than 56 bits wide, as they
 275  * are currently), but we must handle it for branch-to-register.
 276  */
 277 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 278 {
 279
 280     if (s->current_el <= 1) {
 281         /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 282          * examine bit 55 of address, can just generate code.
 283          * If mixed, then test via generated code
 284          */
 285         if (s->tbi0 && s->tbi1) {
 286             TCGv_i64 tmp_reg = tcg_temp_new_i64();
 287             /* Both bits set, sign extension from bit 55 into [63:56] will
 288              * cover both cases
 289              */
 290             tcg_gen_shli_i64(tmp_reg, src, 8);
 291             tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 292             tcg_temp_free_i64(tmp_reg);
 293         } else if (!s->tbi0 && !s->tbi1) {
 294             /* Neither bit set, just load it as-is */
 295             tcg_gen_mov_i64(cpu_pc, src);
 296         } else {
 297             TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 298             TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 299             TCGv_i64 tcg_zero   = tcg_const_i64(0);
 300
 301             tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 302
 303             if (s->tbi0) {
 304                 /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 305                 tcg_gen_andi_i64(tcg_tmpval, src,
 306                                  0x00FFFFFFFFFFFFFFull);
 307                 tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 308                                     tcg_tmpval, src);
 309             } else {
 310                 /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 311                 tcg_gen_ori_i64(tcg_tmpval, src,
 312                                 0xFF00000000000000ull);
 313                 tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 314                                     tcg_tmpval, src);
 315             }
 316             tcg_temp_free_i64(tcg_zero);
 317             tcg_temp_free_i64(tcg_bit55);
 318             tcg_temp_free_i64(tcg_tmpval);
 319         }
 320     } else {  /* EL > 1 */
 321         if (s->tbi0) {
 322             /* Force tag byte to all zero */
 323             tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 324         } else {
 325             /* Load unmodified address */
 326             tcg_gen_mov_i64(cpu_pc, src);
 327         }
 328     }
 329 }
 330
 331 typedef struct DisasCompare64 {
 332     TCGCond cond;
 333     TCGv_i64 value;
 334 } DisasCompare64;
 335
 336 static void a64_test_cc(DisasCompare64 *c64, int cc)
 337 {
 338     DisasCompare c32;
 339
 340     arm_test_cc(&c32, cc);
 341
 342     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 343        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 344     c64->cond = c32.cond;
 345     c64->value = tcg_temp_new_i64();
 346     tcg_gen_ext_i32_i64(c64->value, c32.value);
 347
 348     arm_free_cc(&c32);
 349 }
 350
 351 static void a64_free_cc(DisasCompare64 *c64)
 352 {
 353     tcg_temp_free_i64(c64->value);
 354 }
 355
 356 static void gen_exception_internal(int excp)
 357 {
 358     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 359
 360     assert(excp_is_internal(excp));
 361     gen_helper_exception_internal(cpu_env, tcg_excp);
 362     tcg_temp_free_i32(tcg_excp);
 363 }
 364
 365 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 366 {
 367     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 368     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 369     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 370
 371     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 372                                        tcg_syn, tcg_el);
 373     tcg_temp_free_i32(tcg_el);
 374     tcg_temp_free_i32(tcg_syn);
 375     tcg_temp_free_i32(tcg_excp);
 376 }
 377
 378 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 379 {
 380     gen_a64_set_pc_im(s->pc - offset);
 381     gen_exception_internal(excp);
 382     s->base.is_jmp = DISAS_NORETURN;
 383 }
 384
 385 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 386                                uint32_t syndrome, uint32_t target_el)
 387 {
 388     gen_a64_set_pc_im(s->pc - offset);
 389     gen_exception(excp, syndrome, target_el);
 390     s->base.is_jmp = DISAS_NORETURN;
 391 }
 392
 393 static void gen_exception_bkpt_insn(DisasContext *s, int offset,
 394                                     uint32_t syndrome)
 395 {
 396     TCGv_i32 tcg_syn;
 397
 398     gen_a64_set_pc_im(s->pc - offset);
 399     tcg_syn = tcg_const_i32(syndrome);
 400     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 401     tcg_temp_free_i32(tcg_syn);
 402     s->base.is_jmp = DISAS_NORETURN;
 403 }
 404
 405 static void gen_ss_advance(DisasContext *s)
 406 {
 407     /* If the singlestep state is Active-not-pending, advance to
 408      * Active-pending.
 409      */
 410     if (s->ss_active) {
 411         s->pstate_ss = 0;
 412         gen_helper_clear_pstate_ss(cpu_env);
 413     }
 414 }
 415
 416 static void gen_step_complete_exception(DisasContext *s)
 417 {
 418     /* We just completed step of an insn. Move from Active-not-pending
 419      * to Active-pending, and then also take the swstep exception.
 420      * This corresponds to making the (IMPDEF) choice to prioritize
 421      * swstep exceptions over asynchronous exceptions taken to an exception
 422      * level where debug is disabled. This choice has the advantage that
 423      * we do not need to maintain internal state corresponding to the
 424      * ISV/EX syndrome bits between completion of the step and generation
 425      * of the exception, and our syndrome information is always correct.
 426      */
 427     gen_ss_advance(s);
 428     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 429                   default_exception_el(s));
 430     s->base.is_jmp = DISAS_NORETURN;
 431 }
 432
 433 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 434 {
 435     /* No direct tb linking with singlestep (either QEMU's or the ARM
 436      * debug architecture kind) or deterministic io
 437      */
 438     if (s->base.singlestep_enabled || s->ss_active ||
 439         (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 440         return false;
 441     }
 442
 443 #ifndef CONFIG_USER_ONLY
 444     /* Only link tbs from inside the same guest page */
 445     if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 446         return false;
 447     }
 448 #endif
 449
 450     return true;
 451 }
 452
 453 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 454 {
 455     TranslationBlock *tb;
 456
 457     tb = s->base.tb;
 458     if (use_goto_tb(s, n, dest)) {
 459         tcg_gen_goto_tb(n);
 460         gen_a64_set_pc_im(dest);
 461         tcg_gen_exit_tb(tb, n);
 462         s->base.is_jmp = DISAS_NORETURN;
 463     } else {
 464         gen_a64_set_pc_im(dest);
 465         if (s->ss_active) {
 466             gen_step_complete_exception(s);
 467         } else if (s->base.singlestep_enabled) {
 468             gen_exception_internal(EXCP_DEBUG);
 469         } else {
 470             tcg_gen_lookup_and_goto_ptr();
 471             s->base.is_jmp = DISAS_NORETURN;
 472         }
 473     }
 474 }
 475
 476 void unallocated_encoding(DisasContext *s)
 477 {
 478     /* Unallocated and reserved encodings are uncategorized */
 479     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 480                        default_exception_el(s));
 481 }
 482
 483 static void init_tmp_a64_array(DisasContext *s)
 484 {
 485 #ifdef CONFIG_DEBUG_TCG
 486     memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 487 #endif
 488     s->tmp_a64_count = 0;
 489 }
 490
 491 static void free_tmp_a64(DisasContext *s)
 492 {
 493     int i;
 494     for (i = 0; i < s->tmp_a64_count; i++) {
 495         tcg_temp_free_i64(s->tmp_a64[i]);
 496     }
 497     init_tmp_a64_array(s);
 498 }
 499
 500 TCGv_i64 new_tmp_a64(DisasContext *s)
 501 {
 502     assert(s->tmp_a64_count < TMP_A64_MAX);
 503     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 504 }
 505
 506 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 507 {
 508     TCGv_i64 t = new_tmp_a64(s);
 509     tcg_gen_movi_i64(t, 0);
 510     return t;
 511 }
 512
 513 /*
 514  * Register access functions
 515  *
 516  * These functions are used for directly accessing a register in where
 517  * changes to the final register value are likely to be made. If you
 518  * need to use a register for temporary calculation (e.g. index type
 519  * operations) use the read_* form.
 520  *
 521  * B1.2.1 Register mappings
 522  *
 523  * In instruction register encoding 31 can refer to ZR (zero register) or
 524  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 525  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 526  * This is the point of the _sp forms.
 527  */
 528 TCGv_i64 cpu_reg(DisasContext *s, int reg)
 529 {
 530     if (reg == 31) {
 531         return new_tmp_a64_zero(s);
 532     } else {
 533         return cpu_X[reg];
 534     }
 535 }
 536
 537 /* register access for when 31 == SP */
 538 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 539 {
 540     return cpu_X[reg];
 541 }
 542
 543 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 544  * representing the register contents. This TCGv is an auto-freed
 545  * temporary so it need not be explicitly freed, and may be modified.
 546  */
 547 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 548 {
 549     TCGv_i64 v = new_tmp_a64(s);
 550     if (reg != 31) {
 551         if (sf) {
 552             tcg_gen_mov_i64(v, cpu_X[reg]);
 553         } else {
 554             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 555         }
 556     } else {
 557         tcg_gen_movi_i64(v, 0);
 558     }
 559     return v;
 560 }
 561
 562 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 563 {
 564     TCGv_i64 v = new_tmp_a64(s);
 565     if (sf) {
 566         tcg_gen_mov_i64(v, cpu_X[reg]);
 567     } else {
 568         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 569     }
 570     return v;
 571 }
 572
 573 /* Return the offset into CPUARMState of a slice (from
 574  * the least significant end) of FP register Qn (ie
 575  * Dn, Sn, Hn or Bn).
 576  * (Note that this is not the same mapping as for A32; see cpu.h)
 577  */
 578 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 579 {
 580     return vec_reg_offset(s, regno, 0, size);
 581 }
 582
 583 /* Offset of the high half of the 128 bit vector Qn */
 584 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 585 {
 586     return vec_reg_offset(s, regno, 1, MO_64);
 587 }
 588
 589 /* Convenience accessors for reading and writing single and double
 590  * FP registers. Writing clears the upper parts of the associated
 591  * 128 bit vector register, as required by the architecture.
 592  * Note that unlike the GP register accessors, the values returned
 593  * by the read functions must be manually freed.
 594  */
 595 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 596 {
 597     TCGv_i64 v = tcg_temp_new_i64();
 598
 599     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 600     return v;
 601 }
 602
 603 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 604 {
 605     TCGv_i32 v = tcg_temp_new_i32();
 606
 607     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 608     return v;
 609 }
 610
 611 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 612 {
 613     TCGv_i32 v = tcg_temp_new_i32();
 614
 615     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 616     return v;
 617 }
 618
 619 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 620  * If SVE is not enabled, then there are only 128 bits in the vector.
 621  */
 622 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 623 {
 624     unsigned ofs = fp_reg_offset(s, rd, MO_64);
 625     unsigned vsz = vec_full_reg_size(s);
 626
 627     if (!is_q) {
 628         TCGv_i64 tcg_zero = tcg_const_i64(0);
 629         tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
 630         tcg_temp_free_i64(tcg_zero);
 631     }
 632     if (vsz > 16) {
 633         tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
 634     }
 635 }
 636
 637 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 638 {
 639     unsigned ofs = fp_reg_offset(s, reg, MO_64);
 640
 641     tcg_gen_st_i64(v, cpu_env, ofs);
 642     clear_vec_high(s, false, reg);
 643 }
 644
 645 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 646 {
 647     TCGv_i64 tmp = tcg_temp_new_i64();
 648
 649     tcg_gen_extu_i32_i64(tmp, v);
 650     write_fp_dreg(s, reg, tmp);
 651     tcg_temp_free_i64(tmp);
 652 }
 653
 654 TCGv_ptr get_fpstatus_ptr(bool is_f16)
 655 {
 656     TCGv_ptr statusptr = tcg_temp_new_ptr();
 657     int offset;
 658
 659     /* In A64 all instructions (both FP and Neon) use the FPCR; there
 660      * is no equivalent of the A32 Neon "standard FPSCR value".
 661      * However half-precision operations operate under a different
 662      * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
 663      */
 664     if (is_f16) {
 665         offset = offsetof(CPUARMState, vfp.fp_status_f16);
 666     } else {
 667         offset = offsetof(CPUARMState, vfp.fp_status);
 668     }
 669     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 670     return statusptr;
 671 }
 672
 673 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 674 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 675                          GVecGen2Fn *gvec_fn, int vece)
 676 {
 677     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 678             is_q ? 16 : 8, vec_full_reg_size(s));
 679 }
 680
 681 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 682  * an expander function.
 683  */
 684 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 685                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 686 {
 687     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 688             imm, is_q ? 16 : 8, vec_full_reg_size(s));
 689 }
 690
 691 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 692 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 693                          GVecGen3Fn *gvec_fn, int vece)
 694 {
 695     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 696             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 697 }
 698
 699 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 700  * an op descriptor.
 701  */
 702 static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
 703                           int rn, int64_t imm, const GVecGen2i *gvec_op)
 704 {
 705     tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 706                     is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
 707 }
 708
 709 /* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 710 static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 711                          int rn, int rm, const GVecGen3 *gvec_op)
 712 {
 713     tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 714                    vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 715                    vec_full_reg_size(s), gvec_op);
 716 }
 717
 718 /* Expand a 3-operand operation using an out-of-line helper.  */
 719 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 720                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
 721 {
 722     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 723                        vec_full_reg_offset(s, rn),
 724                        vec_full_reg_offset(s, rm),
 725                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 726 }
 727
 728 /* Expand a 3-operand + env pointer operation using
 729  * an out-of-line helper.
 730  */
 731 static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
 732                              int rn, int rm, gen_helper_gvec_3_ptr *fn)
 733 {
 734     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 735                        vec_full_reg_offset(s, rn),
 736                        vec_full_reg_offset(s, rm), cpu_env,
 737                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 738 }
 739
 740 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
 741  * an out-of-line helper.
 742  */
 743 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 744                               int rm, bool is_fp16, int data,
 745                               gen_helper_gvec_3_ptr *fn)
 746 {
 747     TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
 748     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 749                        vec_full_reg_offset(s, rn),
 750                        vec_full_reg_offset(s, rm), fpst,
 751                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 752     tcg_temp_free_ptr(fpst);
 753 }
 754
 755 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 756  * than the 32 bit equivalent.
 757  */
 758 static inline void gen_set_NZ64(TCGv_i64 result)
 759 {
 760     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 761     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 762 }
 763
 764 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 765 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 766 {
 767     if (sf) {
 768         gen_set_NZ64(result);
 769     } else {
 770         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 771         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 772     }
 773     tcg_gen_movi_i32(cpu_CF, 0);
 774     tcg_gen_movi_i32(cpu_VF, 0);
 775 }
 776
 777 /* dest = T0 + T1; compute C, N, V and Z flags */
 778 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 779 {
 780     if (sf) {
 781         TCGv_i64 result, flag, tmp;
 782         result = tcg_temp_new_i64();
 783         flag = tcg_temp_new_i64();
 784         tmp = tcg_temp_new_i64();
 785
 786         tcg_gen_movi_i64(tmp, 0);
 787         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 788
 789         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 790
 791         gen_set_NZ64(result);
 792
 793         tcg_gen_xor_i64(flag, result, t0);
 794         tcg_gen_xor_i64(tmp, t0, t1);
 795         tcg_gen_andc_i64(flag, flag, tmp);
 796         tcg_temp_free_i64(tmp);
 797         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 798
 799         tcg_gen_mov_i64(dest, result);
 800         tcg_temp_free_i64(result);
 801         tcg_temp_free_i64(flag);
 802     } else {
 803         /* 32 bit arithmetic */
 804         TCGv_i32 t0_32 = tcg_temp_new_i32();
 805         TCGv_i32 t1_32 = tcg_temp_new_i32();
 806         TCGv_i32 tmp = tcg_temp_new_i32();
 807
 808         tcg_gen_movi_i32(tmp, 0);
 809         tcg_gen_extrl_i64_i32(t0_32, t0);
 810         tcg_gen_extrl_i64_i32(t1_32, t1);
 811         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 812         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 813         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 814         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 815         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 816         tcg_gen_extu_i32_i64(dest, cpu_NF);
 817
 818         tcg_temp_free_i32(tmp);
 819         tcg_temp_free_i32(t0_32);
 820         tcg_temp_free_i32(t1_32);
 821     }
 822 }
 823
 824 /* dest = T0 - T1; compute C, N, V and Z flags */
 825 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 826 {
 827     if (sf) {
 828         /* 64 bit arithmetic */
 829         TCGv_i64 result, flag, tmp;
 830
 831         result = tcg_temp_new_i64();
 832         flag = tcg_temp_new_i64();
 833         tcg_gen_sub_i64(result, t0, t1);
 834
 835         gen_set_NZ64(result);
 836
 837         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 838         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 839
 840         tcg_gen_xor_i64(flag, result, t0);
 841         tmp = tcg_temp_new_i64();
 842         tcg_gen_xor_i64(tmp, t0, t1);
 843         tcg_gen_and_i64(flag, flag, tmp);
 844         tcg_temp_free_i64(tmp);
 845         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 846         tcg_gen_mov_i64(dest, result);
 847         tcg_temp_free_i64(flag);
 848         tcg_temp_free_i64(result);
 849     } else {
 850         /* 32 bit arithmetic */
 851         TCGv_i32 t0_32 = tcg_temp_new_i32();
 852         TCGv_i32 t1_32 = tcg_temp_new_i32();
 853         TCGv_i32 tmp;
 854
 855         tcg_gen_extrl_i64_i32(t0_32, t0);
 856         tcg_gen_extrl_i64_i32(t1_32, t1);
 857         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 858         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 859         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 860         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 861         tmp = tcg_temp_new_i32();
 862         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 863         tcg_temp_free_i32(t0_32);
 864         tcg_temp_free_i32(t1_32);
 865         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 866         tcg_temp_free_i32(tmp);
 867         tcg_gen_extu_i32_i64(dest, cpu_NF);
 868     }
 869 }
 870
 871 /* dest = T0 + T1 + CF; do not compute flags. */
 872 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 873 {
 874     TCGv_i64 flag = tcg_temp_new_i64();
 875     tcg_gen_extu_i32_i64(flag, cpu_CF);
 876     tcg_gen_add_i64(dest, t0, t1);
 877     tcg_gen_add_i64(dest, dest, flag);
 878     tcg_temp_free_i64(flag);
 879
 880     if (!sf) {
 881         tcg_gen_ext32u_i64(dest, dest);
 882     }
 883 }
 884
 885 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 886 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 887 {
 888     if (sf) {
 889         TCGv_i64 result, cf_64, vf_64, tmp;
 890         result = tcg_temp_new_i64();
 891         cf_64 = tcg_temp_new_i64();
 892         vf_64 = tcg_temp_new_i64();
 893         tmp = tcg_const_i64(0);
 894
 895         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 896         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 897         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 898         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 899         gen_set_NZ64(result);
 900
 901         tcg_gen_xor_i64(vf_64, result, t0);
 902         tcg_gen_xor_i64(tmp, t0, t1);
 903         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 904         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 905
 906         tcg_gen_mov_i64(dest, result);
 907
 908         tcg_temp_free_i64(tmp);
 909         tcg_temp_free_i64(vf_64);
 910         tcg_temp_free_i64(cf_64);
 911         tcg_temp_free_i64(result);
 912     } else {
 913         TCGv_i32 t0_32, t1_32, tmp;
 914         t0_32 = tcg_temp_new_i32();
 915         t1_32 = tcg_temp_new_i32();
 916         tmp = tcg_const_i32(0);
 917
 918         tcg_gen_extrl_i64_i32(t0_32, t0);
 919         tcg_gen_extrl_i64_i32(t1_32, t1);
 920         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 921         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 922
 923         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 924         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 925         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 926         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 927         tcg_gen_extu_i32_i64(dest, cpu_NF);
 928
 929         tcg_temp_free_i32(tmp);
 930         tcg_temp_free_i32(t1_32);
 931         tcg_temp_free_i32(t0_32);
 932     }
 933 }
 934
 935 /*
 936  * Load/Store generators
 937  */
 938
 939 /*
 940  * Store from GPR register to memory.
 941  */
 942 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 943                              TCGv_i64 tcg_addr, int size, int memidx,
 944                              bool iss_valid,
 945                              unsigned int iss_srt,
 946                              bool iss_sf, bool iss_ar)
 947 {
 948     g_assert(size <= 3);
 949     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 950
 951     if (iss_valid) {
 952         uint32_t syn;
 953
 954         syn = syn_data_abort_with_iss(0,
 955                                       size,
 956                                       false,
 957                                       iss_srt,
 958                                       iss_sf,
 959                                       iss_ar,
 960                                       0, 0, 0, 0, 0, false);
 961         disas_set_insn_syndrome(s, syn);
 962     }
 963 }
 964
 965 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 966                       TCGv_i64 tcg_addr, int size,
 967                       bool iss_valid,
 968                       unsigned int iss_srt,
 969                       bool iss_sf, bool iss_ar)
 970 {
 971     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 972                      iss_valid, iss_srt, iss_sf, iss_ar);
 973 }
 974
 975 /*
 976  * Load from memory to GPR register
 977  */
 978 static void do_gpr_ld_memidx(DisasContext *s,
 979                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 980                              int size, bool is_signed,
 981                              bool extend, int memidx,
 982                              bool iss_valid, unsigned int iss_srt,
 983                              bool iss_sf, bool iss_ar)
 984 {
 985     TCGMemOp memop = s->be_data + size;
 986
 987     g_assert(size <= 3);
 988
 989     if (is_signed) {
 990         memop += MO_SIGN;
 991     }
 992
 993     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 994
 995     if (extend && is_signed) {
 996         g_assert(size < 3);
 997         tcg_gen_ext32u_i64(dest, dest);
 998     }
 999
1000     if (iss_valid) {
1001         uint32_t syn;
1002
1003         syn = syn_data_abort_with_iss(0,
1004                                       size,
1005                                       is_signed,
1006                                       iss_srt,
1007                                       iss_sf,
1008                                       iss_ar,
1009                                       0, 0, 0, 0, 0, false);
1010         disas_set_insn_syndrome(s, syn);
1011     }
1012 }
1013
1014 static void do_gpr_ld(DisasContext *s,
1015                       TCGv_i64 dest, TCGv_i64 tcg_addr,
1016                       int size, bool is_signed, bool extend,
1017                       bool iss_valid, unsigned int iss_srt,
1018                       bool iss_sf, bool iss_ar)
1019 {
1020     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
1021                      get_mem_index(s),
1022                      iss_valid, iss_srt, iss_sf, iss_ar);
1023 }
1024
1025 /*
1026  * Store from FP register to memory
1027  */
1028 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
1029 {
1030     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1031     TCGv_i64 tmp = tcg_temp_new_i64();
1032     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
1033     if (size < 4) {
1034         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
1035                             s->be_data + size);
1036     } else {
1037         bool be = s->be_data == MO_BE;
1038         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
1039
1040         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1041         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1042                             s->be_data | MO_Q);
1043         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
1044         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1045                             s->be_data | MO_Q);
1046         tcg_temp_free_i64(tcg_hiaddr);
1047     }
1048
1049     tcg_temp_free_i64(tmp);
1050 }
1051
1052 /*
1053  * Load from memory to FP register
1054  */
1055 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
1056 {
1057     /* This always zero-extends and writes to a full 128 bit wide vector */
1058     TCGv_i64 tmplo = tcg_temp_new_i64();
1059     TCGv_i64 tmphi;
1060
1061     if (size < 4) {
1062         TCGMemOp memop = s->be_data + size;
1063         tmphi = tcg_const_i64(0);
1064         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
1065     } else {
1066         bool be = s->be_data == MO_BE;
1067         TCGv_i64 tcg_hiaddr;
1068
1069         tmphi = tcg_temp_new_i64();
1070         tcg_hiaddr = tcg_temp_new_i64();
1071
1072         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1073         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1074                             s->be_data | MO_Q);
1075         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1076                             s->be_data | MO_Q);
1077         tcg_temp_free_i64(tcg_hiaddr);
1078     }
1079
1080     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1081     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1082
1083     tcg_temp_free_i64(tmplo);
1084     tcg_temp_free_i64(tmphi);
1085
1086     clear_vec_high(s, true, destidx);
1087 }
1088
1089 /*
1090  * Vector load/store helpers.
1091  *
1092  * The principal difference between this and a FP load is that we don't
1093  * zero extend as we are filling a partial chunk of the vector register.
1094  * These functions don't support 128 bit loads/stores, which would be
1095  * normal load/store operations.
1096  *
1097  * The _i32 versions are useful when operating on 32 bit quantities
1098  * (eg for floating point single or using Neon helper functions).
1099  */
1100
1101 /* Get value of an element within a vector register */
1102 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1103                              int element, TCGMemOp memop)
1104 {
1105     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1106     switch (memop) {
1107     case MO_8:
1108         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1109         break;
1110     case MO_16:
1111         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1112         break;
1113     case MO_32:
1114         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1115         break;
1116     case MO_8|MO_SIGN:
1117         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1118         break;
1119     case MO_16|MO_SIGN:
1120         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1121         break;
1122     case MO_32|MO_SIGN:
1123         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1124         break;
1125     case MO_64:
1126     case MO_64|MO_SIGN:
1127         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1128         break;
1129     default:
1130         g_assert_not_reached();
1131     }
1132 }
1133
1134 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1135                                  int element, TCGMemOp memop)
1136 {
1137     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1138     switch (memop) {
1139     case MO_8:
1140         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1141         break;
1142     case MO_16:
1143         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1144         break;
1145     case MO_8|MO_SIGN:
1146         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1147         break;
1148     case MO_16|MO_SIGN:
1149         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1150         break;
1151     case MO_32:
1152     case MO_32|MO_SIGN:
1153         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1154         break;
1155     default:
1156         g_assert_not_reached();
1157     }
1158 }
1159
1160 /* Set value of an element within a vector register */
1161 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1162                               int element, TCGMemOp memop)
1163 {
1164     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1165     switch (memop) {
1166     case MO_8:
1167         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1168         break;
1169     case MO_16:
1170         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1171         break;
1172     case MO_32:
1173         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1174         break;
1175     case MO_64:
1176         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1177         break;
1178     default:
1179         g_assert_not_reached();
1180     }
1181 }
1182
1183 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1184                                   int destidx, int element, TCGMemOp memop)
1185 {
1186     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1187     switch (memop) {
1188     case MO_8:
1189         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1190         break;
1191     case MO_16:
1192         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1193         break;
1194     case MO_32:
1195         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1196         break;
1197     default:
1198         g_assert_not_reached();
1199     }
1200 }
1201
1202 /* Store from vector register to memory */
1203 static void do_vec_st(DisasContext *s, int srcidx, int element,
1204                       TCGv_i64 tcg_addr, int size, TCGMemOp endian)
1205 {
1206     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1207
1208     read_vec_element(s, tcg_tmp, srcidx, element, size);
1209     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1210
1211     tcg_temp_free_i64(tcg_tmp);
1212 }
1213
1214 /* Load from memory to vector register */
1215 static void do_vec_ld(DisasContext *s, int destidx, int element,
1216                       TCGv_i64 tcg_addr, int size, TCGMemOp endian)
1217 {
1218     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1219
1220     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1221     write_vec_element(s, tcg_tmp, destidx, element, size);
1222
1223     tcg_temp_free_i64(tcg_tmp);
1224 }
1225
1226 /* Check that FP/Neon access is enabled. If it is, return
1227  * true. If not, emit code to generate an appropriate exception,
1228  * and return false; the caller should not emit any code for
1229  * the instruction. Note that this check must happen after all
1230  * unallocated-encoding checks (otherwise the syndrome information
1231  * for the resulting exception will be incorrect).
1232  */
1233 static inline bool fp_access_check(DisasContext *s)
1234 {
1235     assert(!s->fp_access_checked);
1236     s->fp_access_checked = true;
1237
1238     if (!s->fp_excp_el) {
1239         return true;
1240     }
1241
1242     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1243                        s->fp_excp_el);
1244     return false;
1245 }
1246
1247 /* Check that SVE access is enabled.  If it is, return true.
1248  * If not, emit code to generate an appropriate exception and return false.
1249  */
1250 bool sve_access_check(DisasContext *s)
1251 {
1252     if (s->sve_excp_el) {
1253         gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
1254                            s->sve_excp_el);
1255         return false;
1256     }
1257     return fp_access_check(s);
1258 }
1259
1260 /*
1261  * This utility function is for doing register extension with an
1262  * optional shift. You will likely want to pass a temporary for the
1263  * destination register. See DecodeRegExtend() in the ARM ARM.
1264  */
1265 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1266                               int option, unsigned int shift)
1267 {
1268     int extsize = extract32(option, 0, 2);
1269     bool is_signed = extract32(option, 2, 1);
1270
1271     if (is_signed) {
1272         switch (extsize) {
1273         case 0:
1274             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1275             break;
1276         case 1:
1277             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1278             break;
1279         case 2:
1280             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1281             break;
1282         case 3:
1283             tcg_gen_mov_i64(tcg_out, tcg_in);
1284             break;
1285         }
1286     } else {
1287         switch (extsize) {
1288         case 0:
1289             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1290             break;
1291         case 1:
1292             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1293             break;
1294         case 2:
1295             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1296             break;
1297         case 3:
1298             tcg_gen_mov_i64(tcg_out, tcg_in);
1299             break;
1300         }
1301     }
1302
1303     if (shift) {
1304         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1305     }
1306 }
1307
1308 static inline void gen_check_sp_alignment(DisasContext *s)
1309 {
1310     /* The AArch64 architecture mandates that (if enabled via PSTATE
1311      * or SCTLR bits) there is a check that SP is 16-aligned on every
1312      * SP-relative load or store (with an exception generated if it is not).
1313      * In line with general QEMU practice regarding misaligned accesses,
1314      * we omit these checks for the sake of guest program performance.
1315      * This function is provided as a hook so we can more easily add these
1316      * checks in future (possibly as a "favour catching guest program bugs
1317      * over speed" user selectable option).
1318      */
1319 }
1320
1321 /*
1322  * This provides a simple table based table lookup decoder. It is
1323  * intended to be used when the relevant bits for decode are too
1324  * awkwardly placed and switch/if based logic would be confusing and
1325  * deeply nested. Since it's a linear search through the table, tables
1326  * should be kept small.
1327  *
1328  * It returns the first handler where insn & mask == pattern, or
1329  * NULL if there is no match.
1330  * The table is terminated by an empty mask (i.e. 0)
1331  */
1332 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1333                                                uint32_t insn)
1334 {
1335     const AArch64DecodeTable *tptr = table;
1336
1337     while (tptr->mask) {
1338         if ((insn & tptr->mask) == tptr->pattern) {
1339             return tptr->disas_fn;
1340         }
1341         tptr++;
1342     }
1343     return NULL;
1344 }
1345
1346 /*
1347  * The instruction disassembly implemented here matches
1348  * the instruction encoding classifications in chapter C4
1349  * of the ARM Architecture Reference Manual (DDI0487B_a);
1350  * classification names and decode diagrams here should generally
1351  * match up with those in the manual.
1352  */
1353
1354 /* Unconditional branch (immediate)
1355  *   31  30       26 25                                  0
1356  * +----+-----------+-------------------------------------+
1357  * | op | 0 0 1 0 1 |                 imm26               |
1358  * +----+-----------+-------------------------------------+
1359  */
1360 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1361 {
1362     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1363
1364     if (insn & (1U << 31)) {
1365         /* BL Branch with link */
1366         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1367     }
1368
1369     /* B Branch / BL Branch with link */
1370     gen_goto_tb(s, 0, addr);
1371 }
1372
1373 /* Compare and branch (immediate)
1374  *   31  30         25  24  23                  5 4      0
1375  * +----+-------------+----+---------------------+--------+
1376  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1377  * +----+-------------+----+---------------------+--------+
1378  */
1379 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1380 {
1381     unsigned int sf, op, rt;
1382     uint64_t addr;
1383     TCGLabel *label_match;
1384     TCGv_i64 tcg_cmp;
1385
1386     sf = extract32(insn, 31, 1);
1387     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1388     rt = extract32(insn, 0, 5);
1389     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1390
1391     tcg_cmp = read_cpu_reg(s, rt, sf);
1392     label_match = gen_new_label();
1393
1394     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1395                         tcg_cmp, 0, label_match);
1396
1397     gen_goto_tb(s, 0, s->pc);
1398     gen_set_label(label_match);
1399     gen_goto_tb(s, 1, addr);
1400 }
1401
1402 /* Test and branch (immediate)
1403  *   31  30         25  24  23   19 18          5 4    0
1404  * +----+-------------+----+-------+-------------+------+
1405  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1406  * +----+-------------+----+-------+-------------+------+
1407  */
1408 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1409 {
1410     unsigned int bit_pos, op, rt;
1411     uint64_t addr;
1412     TCGLabel *label_match;
1413     TCGv_i64 tcg_cmp;
1414
1415     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1416     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1417     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1418     rt = extract32(insn, 0, 5);
1419
1420     tcg_cmp = tcg_temp_new_i64();
1421     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1422     label_match = gen_new_label();
1423     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1424                         tcg_cmp, 0, label_match);
1425     tcg_temp_free_i64(tcg_cmp);
1426     gen_goto_tb(s, 0, s->pc);
1427     gen_set_label(label_match);
1428     gen_goto_tb(s, 1, addr);
1429 }
1430
1431 /* Conditional branch (immediate)
1432  *  31           25  24  23                  5   4  3    0
1433  * +---------------+----+---------------------+----+------+
1434  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1435  * +---------------+----+---------------------+----+------+
1436  */
1437 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1438 {
1439     unsigned int cond;
1440     uint64_t addr;
1441
1442     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1443         unallocated_encoding(s);
1444         return;
1445     }
1446     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1447     cond = extract32(insn, 0, 4);
1448
1449     if (cond < 0x0e) {
1450         /* genuinely conditional branches */
1451         TCGLabel *label_match = gen_new_label();
1452         arm_gen_test_cc(cond, label_match);
1453         gen_goto_tb(s, 0, s->pc);
1454         gen_set_label(label_match);
1455         gen_goto_tb(s, 1, addr);
1456     } else {
1457         /* 0xe and 0xf are both "always" conditions */
1458         gen_goto_tb(s, 0, addr);
1459     }
1460 }
1461
1462 /* HINT instruction group, including various allocated HINTs */
1463 static void handle_hint(DisasContext *s, uint32_t insn,
1464                         unsigned int op1, unsigned int op2, unsigned int crm)
1465 {
1466     unsigned int selector = crm << 3 | op2;
1467
1468     if (op1 != 3) {
1469         unallocated_encoding(s);
1470         return;
1471     }
1472
1473     switch (selector) {
1474     case 0: /* NOP */
1475         return;
1476     case 3: /* WFI */
1477         s->base.is_jmp = DISAS_WFI;
1478         return;
1479         /* When running in MTTCG we don't generate jumps to the yield and
1480          * WFE helpers as it won't affect the scheduling of other vCPUs.
1481          * If we wanted to more completely model WFE/SEV so we don't busy
1482          * spin unnecessarily we would need to do something more involved.
1483          */
1484     case 1: /* YIELD */
1485         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1486             s->base.is_jmp = DISAS_YIELD;
1487         }
1488         return;
1489     case 2: /* WFE */
1490         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1491             s->base.is_jmp = DISAS_WFE;
1492         }
1493         return;
1494     case 4: /* SEV */
1495     case 5: /* SEVL */
1496         /* we treat all as NOP at least for now */
1497         return;
1498     default:
1499         /* default specified as NOP equivalent */
1500         return;
1501     }
1502 }
1503
1504 static void gen_clrex(DisasContext *s, uint32_t insn)
1505 {
1506     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1507 }
1508
1509 /* CLREX, DSB, DMB, ISB */
1510 static void handle_sync(DisasContext *s, uint32_t insn,
1511                         unsigned int op1, unsigned int op2, unsigned int crm)
1512 {
1513     TCGBar bar;
1514
1515     if (op1 != 3) {
1516         unallocated_encoding(s);
1517         return;
1518     }
1519
1520     switch (op2) {
1521     case 2: /* CLREX */
1522         gen_clrex(s, insn);
1523         return;
1524     case 4: /* DSB */
1525     case 5: /* DMB */
1526         switch (crm & 3) {
1527         case 1: /* MBReqTypes_Reads */
1528             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1529             break;
1530         case 2: /* MBReqTypes_Writes */
1531             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1532             break;
1533         default: /* MBReqTypes_All */
1534             bar = TCG_BAR_SC | TCG_MO_ALL;
1535             break;
1536         }
1537         tcg_gen_mb(bar);
1538         return;
1539     case 6: /* ISB */
1540         /* We need to break the TB after this insn to execute
1541          * a self-modified code correctly and also to take
1542          * any pending interrupts immediately.
1543          */
1544         gen_goto_tb(s, 0, s->pc);
1545         return;
1546     default:
1547         unallocated_encoding(s);
1548         return;
1549     }
1550 }
1551
1552 /* MSR (immediate) - move immediate to processor state field */
1553 static void handle_msr_i(DisasContext *s, uint32_t insn,
1554                          unsigned int op1, unsigned int op2, unsigned int crm)
1555 {
1556     int op = op1 << 3 | op2;
1557     switch (op) {
1558     case 0x05: /* SPSel */
1559         if (s->current_el == 0) {
1560             unallocated_encoding(s);
1561             return;
1562         }
1563         /* fall through */
1564     case 0x1e: /* DAIFSet */
1565     case 0x1f: /* DAIFClear */
1566     {
1567         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1568         TCGv_i32 tcg_op = tcg_const_i32(op);
1569         gen_a64_set_pc_im(s->pc - 4);
1570         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1571         tcg_temp_free_i32(tcg_imm);
1572         tcg_temp_free_i32(tcg_op);
1573         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1574         gen_a64_set_pc_im(s->pc);
1575         s->base.is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
1576         break;
1577     }
1578     default:
1579         unallocated_encoding(s);
1580         return;
1581     }
1582 }
1583
1584 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1585 {
1586     TCGv_i32 tmp = tcg_temp_new_i32();
1587     TCGv_i32 nzcv = tcg_temp_new_i32();
1588
1589     /* build bit 31, N */
1590     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1591     /* build bit 30, Z */
1592     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1593     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1594     /* build bit 29, C */
1595     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1596     /* build bit 28, V */
1597     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1598     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1599     /* generate result */
1600     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1601
1602     tcg_temp_free_i32(nzcv);
1603     tcg_temp_free_i32(tmp);
1604 }
1605
1606 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1607
1608 {
1609     TCGv_i32 nzcv = tcg_temp_new_i32();
1610
1611     /* take NZCV from R[t] */
1612     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1613
1614     /* bit 31, N */
1615     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1616     /* bit 30, Z */
1617     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1618     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1619     /* bit 29, C */
1620     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1621     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1622     /* bit 28, V */
1623     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1624     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1625     tcg_temp_free_i32(nzcv);
1626 }
1627
1628 /* MRS - move from system register
1629  * MSR (register) - move to system register
1630  * SYS
1631  * SYSL
1632  * These are all essentially the same insn in 'read' and 'write'
1633  * versions, with varying op0 fields.
1634  */
1635 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1636                        unsigned int op0, unsigned int op1, unsigned int op2,
1637                        unsigned int crn, unsigned int crm, unsigned int rt)
1638 {
1639     const ARMCPRegInfo *ri;
1640     TCGv_i64 tcg_rt;
1641
1642     ri = get_arm_cp_reginfo(s->cp_regs,
1643                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1644                                                crn, crm, op0, op1, op2));
1645
1646     if (!ri) {
1647         /* Unknown register; this might be a guest error or a QEMU
1648          * unimplemented feature.
1649          */
1650         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1651                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1652                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1653         unallocated_encoding(s);
1654         return;
1655     }
1656
1657     /* Check access permissions */
1658     if (!cp_access_ok(s->current_el, ri, isread)) {
1659         unallocated_encoding(s);
1660         return;
1661     }
1662
1663     if (ri->accessfn) {
1664         /* Emit code to perform further access permissions checks at
1665          * runtime; this may result in an exception.
1666          */
1667         TCGv_ptr tmpptr;
1668         TCGv_i32 tcg_syn, tcg_isread;
1669         uint32_t syndrome;
1670
1671         gen_a64_set_pc_im(s->pc - 4);
1672         tmpptr = tcg_const_ptr(ri);
1673         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1674         tcg_syn = tcg_const_i32(syndrome);
1675         tcg_isread = tcg_const_i32(isread);
1676         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1677         tcg_temp_free_ptr(tmpptr);
1678         tcg_temp_free_i32(tcg_syn);
1679         tcg_temp_free_i32(tcg_isread);
1680     }
1681
1682     /* Handle special cases first */
1683     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1684     case ARM_CP_NOP:
1685         return;
1686     case ARM_CP_NZCV:
1687         tcg_rt = cpu_reg(s, rt);
1688         if (isread) {
1689             gen_get_nzcv(tcg_rt);
1690         } else {
1691             gen_set_nzcv(tcg_rt);
1692         }
1693         return;
1694     case ARM_CP_CURRENTEL:
1695         /* Reads as current EL value from pstate, which is
1696          * guaranteed to be constant by the tb flags.
1697          */
1698         tcg_rt = cpu_reg(s, rt);
1699         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1700         return;
1701     case ARM_CP_DC_ZVA:
1702         /* Writes clear the aligned block of memory which rt points into. */
1703         tcg_rt = cpu_reg(s, rt);
1704         gen_helper_dc_zva(cpu_env, tcg_rt);
1705         return;
1706     default:
1707         break;
1708     }
1709     if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1710         return;
1711     } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1712         return;
1713     }
1714
1715     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1716         gen_io_start();
1717     }
1718
1719     tcg_rt = cpu_reg(s, rt);
1720
1721     if (isread) {
1722         if (ri->type & ARM_CP_CONST) {
1723             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1724         } else if (ri->readfn) {
1725             TCGv_ptr tmpptr;
1726             tmpptr = tcg_const_ptr(ri);
1727             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1728             tcg_temp_free_ptr(tmpptr);
1729         } else {
1730             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1731         }
1732     } else {
1733         if (ri->type & ARM_CP_CONST) {
1734             /* If not forbidden by access permissions, treat as WI */
1735             return;
1736         } else if (ri->writefn) {
1737             TCGv_ptr tmpptr;
1738             tmpptr = tcg_const_ptr(ri);
1739             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1740             tcg_temp_free_ptr(tmpptr);
1741         } else {
1742             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1743         }
1744     }
1745
1746     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1747         /* I/O operations must end the TB here (whether read or write) */
1748         gen_io_end();
1749         s->base.is_jmp = DISAS_UPDATE;
1750     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1751         /* We default to ending the TB on a coprocessor register write,
1752          * but allow this to be suppressed by the register definition
1753          * (usually only necessary to work around guest bugs).
1754          */
1755         s->base.is_jmp = DISAS_UPDATE;
1756     }
1757 }
1758
1759 /* System
1760  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1761  * +---------------------+---+-----+-----+-------+-------+-----+------+
1762  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1763  * +---------------------+---+-----+-----+-------+-------+-----+------+
1764  */
1765 static void disas_system(DisasContext *s, uint32_t insn)
1766 {
1767     unsigned int l, op0, op1, crn, crm, op2, rt;
1768     l = extract32(insn, 21, 1);
1769     op0 = extract32(insn, 19, 2);
1770     op1 = extract32(insn, 16, 3);
1771     crn = extract32(insn, 12, 4);
1772     crm = extract32(insn, 8, 4);
1773     op2 = extract32(insn, 5, 3);
1774     rt = extract32(insn, 0, 5);
1775
1776     if (op0 == 0) {
1777         if (l || rt != 31) {
1778             unallocated_encoding(s);
1779             return;
1780         }
1781         switch (crn) {
1782         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1783             handle_hint(s, insn, op1, op2, crm);
1784             break;
1785         case 3: /* CLREX, DSB, DMB, ISB */
1786             handle_sync(s, insn, op1, op2, crm);
1787             break;
1788         case 4: /* MSR (immediate) */
1789             handle_msr_i(s, insn, op1, op2, crm);
1790             break;
1791         default:
1792             unallocated_encoding(s);
1793             break;
1794         }
1795         return;
1796     }
1797     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1798 }
1799
1800 /* Exception generation
1801  *
1802  *  31             24 23 21 20                     5 4   2 1  0
1803  * +-----------------+-----+------------------------+-----+----+
1804  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1805  * +-----------------------+------------------------+----------+
1806  */
1807 static void disas_exc(DisasContext *s, uint32_t insn)
1808 {
1809     int opc = extract32(insn, 21, 3);
1810     int op2_ll = extract32(insn, 0, 5);
1811     int imm16 = extract32(insn, 5, 16);
1812     TCGv_i32 tmp;
1813
1814     switch (opc) {
1815     case 0:
1816         /* For SVC, HVC and SMC we advance the single-step state
1817          * machine before taking the exception. This is architecturally
1818          * mandated, to ensure that single-stepping a system call
1819          * instruction works properly.
1820          */
1821         switch (op2_ll) {
1822         case 1:                                                     /* SVC */
1823             gen_ss_advance(s);
1824             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1825                                default_exception_el(s));
1826             break;
1827         case 2:                                                     /* HVC */
1828             if (s->current_el == 0) {
1829                 unallocated_encoding(s);
1830                 break;
1831             }
1832             /* The pre HVC helper handles cases when HVC gets trapped
1833              * as an undefined insn by runtime configuration.
1834              */
1835             gen_a64_set_pc_im(s->pc - 4);
1836             gen_helper_pre_hvc(cpu_env);
1837             gen_ss_advance(s);
1838             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1839             break;
1840         case 3:                                                     /* SMC */
1841             if (s->current_el == 0) {
1842                 unallocated_encoding(s);
1843                 break;
1844             }
1845             gen_a64_set_pc_im(s->pc - 4);
1846             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1847             gen_helper_pre_smc(cpu_env, tmp);
1848             tcg_temp_free_i32(tmp);
1849             gen_ss_advance(s);
1850             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1851             break;
1852         default:
1853             unallocated_encoding(s);
1854             break;
1855         }
1856         break;
1857     case 1:
1858         if (op2_ll != 0) {
1859             unallocated_encoding(s);
1860             break;
1861         }
1862         /* BRK */
1863         gen_exception_bkpt_insn(s, 4, syn_aa64_bkpt(imm16));
1864         break;
1865     case 2:
1866         if (op2_ll != 0) {
1867             unallocated_encoding(s);
1868             break;
1869         }
1870         /* HLT. This has two purposes.
1871          * Architecturally, it is an external halting debug instruction.
1872          * Since QEMU doesn't implement external debug, we treat this as
1873          * it is required for halting debug disabled: it will UNDEF.
1874          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1875          */
1876         if (semihosting_enabled() && imm16 == 0xf000) {
1877 #ifndef CONFIG_USER_ONLY
1878             /* In system mode, don't allow userspace access to semihosting,
1879              * to provide some semblance of security (and for consistency
1880              * with our 32-bit semihosting).
1881              */
1882             if (s->current_el == 0) {
1883                 unsupported_encoding(s, insn);
1884                 break;
1885             }
1886 #endif
1887             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1888         } else {
1889             unsupported_encoding(s, insn);
1890         }
1891         break;
1892     case 5:
1893         if (op2_ll < 1 || op2_ll > 3) {
1894             unallocated_encoding(s);
1895             break;
1896         }
1897         /* DCPS1, DCPS2, DCPS3 */
1898         unsupported_encoding(s, insn);
1899         break;
1900     default:
1901         unallocated_encoding(s);
1902         break;
1903     }
1904 }
1905
1906 /* Unconditional branch (register)
1907  *  31           25 24   21 20   16 15   10 9    5 4     0
1908  * +---------------+-------+-------+-------+------+-------+
1909  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1910  * +---------------+-------+-------+-------+------+-------+
1911  */
1912 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1913 {
1914     unsigned int opc, op2, op3, rn, op4;
1915
1916     opc = extract32(insn, 21, 4);
1917     op2 = extract32(insn, 16, 5);
1918     op3 = extract32(insn, 10, 6);
1919     rn = extract32(insn, 5, 5);
1920     op4 = extract32(insn, 0, 5);
1921
1922     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1923         unallocated_encoding(s);
1924         return;
1925     }
1926
1927     switch (opc) {
1928     case 0: /* BR */
1929     case 1: /* BLR */
1930     case 2: /* RET */
1931         gen_a64_set_pc(s, cpu_reg(s, rn));
1932         /* BLR also needs to load return address */
1933         if (opc == 1) {
1934             tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1935         }
1936         break;
1937     case 4: /* ERET */
1938         if (s->current_el == 0) {
1939             unallocated_encoding(s);
1940             return;
1941         }
1942         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1943             gen_io_start();
1944         }
1945         gen_helper_exception_return(cpu_env);
1946         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1947             gen_io_end();
1948         }
1949         /* Must exit loop to check un-masked IRQs */
1950         s->base.is_jmp = DISAS_EXIT;
1951         return;
1952     case 5: /* DRPS */
1953         if (rn != 0x1f) {
1954             unallocated_encoding(s);
1955         } else {
1956             unsupported_encoding(s, insn);
1957         }
1958         return;
1959     default:
1960         unallocated_encoding(s);
1961         return;
1962     }
1963
1964     s->base.is_jmp = DISAS_JUMP;
1965 }
1966
1967 /* Branches, exception generating and system instructions */
1968 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1969 {
1970     switch (extract32(insn, 25, 7)) {
1971     case 0x0a: case 0x0b:
1972     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1973         disas_uncond_b_imm(s, insn);
1974         break;
1975     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1976         disas_comp_b_imm(s, insn);
1977         break;
1978     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1979         disas_test_b_imm(s, insn);
1980         break;
1981     case 0x2a: /* Conditional branch (immediate) */
1982         disas_cond_b_imm(s, insn);
1983         break;
1984     case 0x6a: /* Exception generation / System */
1985         if (insn & (1 << 24)) {
1986             disas_system(s, insn);
1987         } else {
1988             disas_exc(s, insn);
1989         }
1990         break;
1991     case 0x6b: /* Unconditional branch (register) */
1992         disas_uncond_b_reg(s, insn);
1993         break;
1994     default:
1995         unallocated_encoding(s);
1996         break;
1997     }
1998 }
1999
2000 /*
2001  * Load/Store exclusive instructions are implemented by remembering
2002  * the value/address loaded, and seeing if these are the same
2003  * when the store is performed. This is not actually the architecturally
2004  * mandated semantics, but it works for typical guest code sequences
2005  * and avoids having to monitor regular stores.
2006  *
2007  * The store exclusive uses the atomic cmpxchg primitives to avoid
2008  * races in multi-threaded linux-user and when MTTCG softmmu is
2009  * enabled.
2010  */
2011 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2012                                TCGv_i64 addr, int size, bool is_pair)
2013 {
2014     int idx = get_mem_index(s);
2015     TCGMemOp memop = s->be_data;
2016
2017     g_assert(size <= 3);
2018     if (is_pair) {
2019         g_assert(size >= 2);
2020         if (size == 2) {
2021             /* The pair must be single-copy atomic for the doubleword.  */
2022             memop |= MO_64 | MO_ALIGN;
2023             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2024             if (s->be_data == MO_LE) {
2025                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2026                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2027             } else {
2028                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2029                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2030             }
2031         } else {
2032             /* The pair must be single-copy atomic for *each* doubleword, not
2033                the entire quadword, however it must be quadword aligned.  */
2034             memop |= MO_64;
2035             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2036                                 memop | MO_ALIGN_16);
2037
2038             TCGv_i64 addr2 = tcg_temp_new_i64();
2039             tcg_gen_addi_i64(addr2, addr, 8);
2040             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2041             tcg_temp_free_i64(addr2);
2042
2043             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2044             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2045         }
2046     } else {
2047         memop |= size | MO_ALIGN;
2048         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2049         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2050     }
2051     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2052 }
2053
2054 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2055                                 TCGv_i64 addr, int size, int is_pair)
2056 {
2057     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2058      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2059      *     [addr] = {Rt};
2060      *     if (is_pair) {
2061      *         [addr + datasize] = {Rt2};
2062      *     }
2063      *     {Rd} = 0;
2064      * } else {
2065      *     {Rd} = 1;
2066      * }
2067      * env->exclusive_addr = -1;
2068      */
2069     TCGLabel *fail_label = gen_new_label();
2070     TCGLabel *done_label = gen_new_label();
2071     TCGv_i64 tmp;
2072
2073     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2074
2075     tmp = tcg_temp_new_i64();
2076     if (is_pair) {
2077         if (size == 2) {
2078             if (s->be_data == MO_LE) {
2079                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2080             } else {
2081                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2082             }
2083             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2084                                        cpu_exclusive_val, tmp,
2085                                        get_mem_index(s),
2086                                        MO_64 | MO_ALIGN | s->be_data);
2087             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2088         } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2089             if (!HAVE_CMPXCHG128) {
2090                 gen_helper_exit_atomic(cpu_env);
2091                 s->base.is_jmp = DISAS_NORETURN;
2092             } else if (s->be_data == MO_LE) {
2093                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2094                                                         cpu_exclusive_addr,
2095                                                         cpu_reg(s, rt),
2096                                                         cpu_reg(s, rt2));
2097             } else {
2098                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2099                                                         cpu_exclusive_addr,
2100                                                         cpu_reg(s, rt),
2101                                                         cpu_reg(s, rt2));
2102             }
2103         } else if (s->be_data == MO_LE) {
2104             gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2105                                            cpu_reg(s, rt), cpu_reg(s, rt2));
2106         } else {
2107             gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2108                                            cpu_reg(s, rt), cpu_reg(s, rt2));
2109         }
2110     } else {
2111         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2112                                    cpu_reg(s, rt), get_mem_index(s),
2113                                    size | MO_ALIGN | s->be_data);
2114         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2115     }
2116     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2117     tcg_temp_free_i64(tmp);
2118     tcg_gen_br(done_label);
2119
2120     gen_set_label(fail_label);
2121     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2122     gen_set_label(done_label);
2123     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2124 }
2125
2126 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2127                                  int rn, int size)
2128 {
2129     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2130     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2131     int memidx = get_mem_index(s);
2132     TCGv_i64 addr = cpu_reg_sp(s, rn);
2133
2134     if (rn == 31) {
2135         gen_check_sp_alignment(s);
2136     }
2137     tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
2138                                size | MO_ALIGN | s->be_data);
2139 }
2140
2141 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2142                                       int rn, int size)
2143 {
2144     TCGv_i64 s1 = cpu_reg(s, rs);
2145     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2146     TCGv_i64 t1 = cpu_reg(s, rt);
2147     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2148     TCGv_i64 addr = cpu_reg_sp(s, rn);
2149     int memidx = get_mem_index(s);
2150
2151     if (rn == 31) {
2152         gen_check_sp_alignment(s);
2153     }
2154
2155     if (size == 2) {
2156         TCGv_i64 cmp = tcg_temp_new_i64();
2157         TCGv_i64 val = tcg_temp_new_i64();
2158
2159         if (s->be_data == MO_LE) {
2160             tcg_gen_concat32_i64(val, t1, t2);
2161             tcg_gen_concat32_i64(cmp, s1, s2);
2162         } else {
2163             tcg_gen_concat32_i64(val, t2, t1);
2164             tcg_gen_concat32_i64(cmp, s2, s1);
2165         }
2166
2167         tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
2168                                    MO_64 | MO_ALIGN | s->be_data);
2169         tcg_temp_free_i64(val);
2170
2171         if (s->be_data == MO_LE) {
2172             tcg_gen_extr32_i64(s1, s2, cmp);
2173         } else {
2174             tcg_gen_extr32_i64(s2, s1, cmp);
2175         }
2176         tcg_temp_free_i64(cmp);
2177     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2178         if (HAVE_CMPXCHG128) {
2179             TCGv_i32 tcg_rs = tcg_const_i32(rs);
2180             if (s->be_data == MO_LE) {
2181                 gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
2182             } else {
2183                 gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
2184             }
2185             tcg_temp_free_i32(tcg_rs);
2186         } else {
2187             gen_helper_exit_atomic(cpu_env);
2188             s->base.is_jmp = DISAS_NORETURN;
2189         }
2190     } else {
2191         TCGv_i64 d1 = tcg_temp_new_i64();
2192         TCGv_i64 d2 = tcg_temp_new_i64();
2193         TCGv_i64 a2 = tcg_temp_new_i64();
2194         TCGv_i64 c1 = tcg_temp_new_i64();
2195         TCGv_i64 c2 = tcg_temp_new_i64();
2196         TCGv_i64 zero = tcg_const_i64(0);
2197
2198         /* Load the two words, in memory order.  */
2199         tcg_gen_qemu_ld_i64(d1, addr, memidx,
2200                             MO_64 | MO_ALIGN_16 | s->be_data);
2201         tcg_gen_addi_i64(a2, addr, 8);
2202         tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
2203
2204         /* Compare the two words, also in memory order.  */
2205         tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2206         tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2207         tcg_gen_and_i64(c2, c2, c1);
2208
2209         /* If compare equal, write back new data, else write back old data.  */
2210         tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2211         tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2212         tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
2213         tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2214         tcg_temp_free_i64(a2);
2215         tcg_temp_free_i64(c1);
2216         tcg_temp_free_i64(c2);
2217         tcg_temp_free_i64(zero);
2218
2219         /* Write back the data from memory to Rs.  */
2220         tcg_gen_mov_i64(s1, d1);
2221         tcg_gen_mov_i64(s2, d2);
2222         tcg_temp_free_i64(d1);
2223         tcg_temp_free_i64(d2);
2224     }
2225 }
2226
2227 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2228  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2229  */
2230 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2231 {
2232     int opc0 = extract32(opc, 0, 1);
2233     int regsize;
2234
2235     if (is_signed) {
2236         regsize = opc0 ? 32 : 64;
2237     } else {
2238         regsize = size == 3 ? 64 : 32;
2239     }
2240     return regsize == 64;
2241 }
2242
2243 /* Load/store exclusive
2244  *
2245  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2246  * +-----+-------------+----+---+----+------+----+-------+------+------+
2247  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2248  * +-----+-------------+----+---+----+------+----+-------+------+------+
2249  *
2250  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2251  *   L: 0 -> store, 1 -> load
2252  *  o2: 0 -> exclusive, 1 -> not
2253  *  o1: 0 -> single register, 1 -> register pair
2254  *  o0: 1 -> load-acquire/store-release, 0 -> not
2255  */
2256 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2257 {
2258     int rt = extract32(insn, 0, 5);
2259     int rn = extract32(insn, 5, 5);
2260     int rt2 = extract32(insn, 10, 5);
2261     int rs = extract32(insn, 16, 5);
2262     int is_lasr = extract32(insn, 15, 1);
2263     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2264     int size = extract32(insn, 30, 2);
2265     TCGv_i64 tcg_addr;
2266
2267     switch (o2_L_o1_o0) {
2268     case 0x0: /* STXR */
2269     case 0x1: /* STLXR */
2270         if (rn == 31) {
2271             gen_check_sp_alignment(s);
2272         }
2273         if (is_lasr) {
2274             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2275         }
2276         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2277         gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
2278         return;
2279
2280     case 0x4: /* LDXR */
2281     case 0x5: /* LDAXR */
2282         if (rn == 31) {
2283             gen_check_sp_alignment(s);
2284         }
2285         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2286         s->is_ldex = true;
2287         gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
2288         if (is_lasr) {
2289             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2290         }
2291         return;
2292
2293     case 0x8: /* STLLR */
2294         if (!dc_isar_feature(aa64_lor, s)) {
2295             break;
2296         }
2297         /* StoreLORelease is the same as Store-Release for QEMU.  */
2298         /* fall through */
2299     case 0x9: /* STLR */
2300         /* Generate ISS for non-exclusive accesses including LASR.  */
2301         if (rn == 31) {
2302             gen_check_sp_alignment(s);
2303         }
2304         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2305         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2306         do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
2307                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2308         return;
2309
2310     case 0xc: /* LDLAR */
2311         if (!dc_isar_feature(aa64_lor, s)) {
2312             break;
2313         }
2314         /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2315         /* fall through */
2316     case 0xd: /* LDAR */
2317         /* Generate ISS for non-exclusive accesses including LASR.  */
2318         if (rn == 31) {
2319             gen_check_sp_alignment(s);
2320         }
2321         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2322         do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
2323                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2324         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2325         return;
2326
2327     case 0x2: case 0x3: /* CASP / STXP */
2328         if (size & 2) { /* STXP / STLXP */
2329             if (rn == 31) {
2330                 gen_check_sp_alignment(s);
2331             }
2332             if (is_lasr) {
2333                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2334             }
2335             tcg_addr = read_cpu_reg_sp(s, rn, 1);
2336             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
2337             return;
2338         }
2339         if (rt2 == 31
2340             && ((rt | rs) & 1) == 0
2341             && dc_isar_feature(aa64_atomics, s)) {
2342             /* CASP / CASPL */
2343             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2344             return;
2345         }
2346         break;
2347
2348     case 0x6: case 0x7: /* CASPA / LDXP */
2349         if (size & 2) { /* LDXP / LDAXP */
2350             if (rn == 31) {
2351                 gen_check_sp_alignment(s);
2352             }
2353             tcg_addr = read_cpu_reg_sp(s, rn, 1);
2354             s->is_ldex = true;
2355             gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
2356             if (is_lasr) {
2357                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2358             }
2359             return;
2360         }
2361         if (rt2 == 31
2362             && ((rt | rs) & 1) == 0
2363             && dc_isar_feature(aa64_atomics, s)) {
2364             /* CASPA / CASPAL */
2365             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2366             return;
2367         }
2368         break;
2369
2370     case 0xa: /* CAS */
2371     case 0xb: /* CASL */
2372     case 0xe: /* CASA */
2373     case 0xf: /* CASAL */
2374         if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2375             gen_compare_and_swap(s, rs, rt, rn, size);
2376             return;
2377         }
2378         break;
2379     }
2380     unallocated_encoding(s);
2381 }
2382
2383 /*
2384  * Load register (literal)
2385  *
2386  *  31 30 29   27  26 25 24 23                5 4     0
2387  * +-----+-------+---+-----+-------------------+-------+
2388  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2389  * +-----+-------+---+-----+-------------------+-------+
2390  *
2391  * V: 1 -> vector (simd/fp)
2392  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2393  *                   10-> 32 bit signed, 11 -> prefetch
2394  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2395  */
2396 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2397 {
2398     int rt = extract32(insn, 0, 5);
2399     int64_t imm = sextract32(insn, 5, 19) << 2;
2400     bool is_vector = extract32(insn, 26, 1);
2401     int opc = extract32(insn, 30, 2);
2402     bool is_signed = false;
2403     int size = 2;
2404     TCGv_i64 tcg_rt, tcg_addr;
2405
2406     if (is_vector) {
2407         if (opc == 3) {
2408             unallocated_encoding(s);
2409             return;
2410         }
2411         size = 2 + opc;
2412         if (!fp_access_check(s)) {
2413             return;
2414         }
2415     } else {
2416         if (opc == 3) {
2417             /* PRFM (literal) : prefetch */
2418             return;
2419         }
2420         size = 2 + extract32(opc, 0, 1);
2421         is_signed = extract32(opc, 1, 1);
2422     }
2423
2424     tcg_rt = cpu_reg(s, rt);
2425
2426     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2427     if (is_vector) {
2428         do_fp_ld(s, rt, tcg_addr, size);
2429     } else {
2430         /* Only unsigned 32bit loads target 32bit registers.  */
2431         bool iss_sf = opc != 0;
2432
2433         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2434                   true, rt, iss_sf, false);
2435     }
2436     tcg_temp_free_i64(tcg_addr);
2437 }
2438
2439 /*
2440  * LDNP (Load Pair - non-temporal hint)
2441  * LDP (Load Pair - non vector)
2442  * LDPSW (Load Pair Signed Word - non vector)
2443  * STNP (Store Pair - non-temporal hint)
2444  * STP (Store Pair - non vector)
2445  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2446  * LDP (Load Pair of SIMD&FP)
2447  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2448  * STP (Store Pair of SIMD&FP)
2449  *
2450  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2451  * +-----+-------+---+---+-------+---+-----------------------------+
2452  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2453  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2454  *
2455  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2456  *      LDPSW                    01
2457  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2458  *   V: 0 -> GPR, 1 -> Vector
2459  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2460  *      10 -> signed offset, 11 -> pre-index
2461  *   L: 0 -> Store 1 -> Load
2462  *
2463  * Rt, Rt2 = GPR or SIMD registers to be stored
2464  * Rn = general purpose register containing address
2465  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2466  */
2467 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2468 {
2469     int rt = extract32(insn, 0, 5);
2470     int rn = extract32(insn, 5, 5);
2471     int rt2 = extract32(insn, 10, 5);
2472     uint64_t offset = sextract64(insn, 15, 7);
2473     int index = extract32(insn, 23, 2);
2474     bool is_vector = extract32(insn, 26, 1);
2475     bool is_load = extract32(insn, 22, 1);
2476     int opc = extract32(insn, 30, 2);
2477
2478     bool is_signed = false;
2479     bool postindex = false;
2480     bool wback = false;
2481
2482     TCGv_i64 tcg_addr; /* calculated address */
2483     int size;
2484
2485     if (opc == 3) {
2486         unallocated_encoding(s);
2487         return;
2488     }
2489
2490     if (is_vector) {
2491         size = 2 + opc;
2492     } else {
2493         size = 2 + extract32(opc, 1, 1);
2494         is_signed = extract32(opc, 0, 1);
2495         if (!is_load && is_signed) {
2496             unallocated_encoding(s);
2497             return;
2498         }
2499     }
2500
2501     switch (index) {
2502     case 1: /* post-index */
2503         postindex = true;
2504         wback = true;
2505         break;
2506     case 0:
2507         /* signed offset with "non-temporal" hint. Since we don't emulate
2508          * caches we don't care about hints to the cache system about
2509          * data access patterns, and handle this identically to plain
2510          * signed offset.
2511          */
2512         if (is_signed) {
2513             /* There is no non-temporal-hint version of LDPSW */
2514             unallocated_encoding(s);
2515             return;
2516         }
2517         postindex = false;
2518         break;
2519     case 2: /* signed offset, rn not updated */
2520         postindex = false;
2521         break;
2522     case 3: /* pre-index */
2523         postindex = false;
2524         wback = true;
2525         break;
2526     }
2527
2528     if (is_vector && !fp_access_check(s)) {
2529         return;
2530     }
2531
2532     offset <<= size;
2533
2534     if (rn == 31) {
2535         gen_check_sp_alignment(s);
2536     }
2537
2538     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2539
2540     if (!postindex) {
2541         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2542     }
2543
2544     if (is_vector) {
2545         if (is_load) {
2546             do_fp_ld(s, rt, tcg_addr, size);
2547         } else {
2548             do_fp_st(s, rt, tcg_addr, size);
2549         }
2550         tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2551         if (is_load) {
2552             do_fp_ld(s, rt2, tcg_addr, size);
2553         } else {
2554             do_fp_st(s, rt2, tcg_addr, size);
2555         }
2556     } else {
2557         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2558         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2559
2560         if (is_load) {
2561             TCGv_i64 tmp = tcg_temp_new_i64();
2562
2563             /* Do not modify tcg_rt before recognizing any exception
2564              * from the second load.
2565              */
2566             do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
2567                       false, 0, false, false);
2568             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2569             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2570                       false, 0, false, false);
2571
2572             tcg_gen_mov_i64(tcg_rt, tmp);
2573             tcg_temp_free_i64(tmp);
2574         } else {
2575             do_gpr_st(s, tcg_rt, tcg_addr, size,
2576                       false, 0, false, false);
2577             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2578             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2579                       false, 0, false, false);
2580         }
2581     }
2582
2583     if (wback) {
2584         if (postindex) {
2585             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2586         } else {
2587             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2588         }
2589         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2590     }
2591 }
2592
2593 /*
2594  * Load/store (immediate post-indexed)
2595  * Load/store (immediate pre-indexed)
2596  * Load/store (unscaled immediate)
2597  *
2598  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2599  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2600  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2601  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2602  *
2603  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2604          10 -> unprivileged
2605  * V = 0 -> non-vector
2606  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2607  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2608  */
2609 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2610                                 int opc,
2611                                 int size,
2612                                 int rt,
2613                                 bool is_vector)
2614 {
2615     int rn = extract32(insn, 5, 5);
2616     int imm9 = sextract32(insn, 12, 9);
2617     int idx = extract32(insn, 10, 2);
2618     bool is_signed = false;
2619     bool is_store = false;
2620     bool is_extended = false;
2621     bool is_unpriv = (idx == 2);
2622     bool iss_valid = !is_vector;
2623     bool post_index;
2624     bool writeback;
2625
2626     TCGv_i64 tcg_addr;
2627
2628     if (is_vector) {
2629         size |= (opc & 2) << 1;
2630         if (size > 4 || is_unpriv) {
2631             unallocated_encoding(s);
2632             return;
2633         }
2634         is_store = ((opc & 1) == 0);
2635         if (!fp_access_check(s)) {
2636             return;
2637         }
2638     } else {
2639         if (size == 3 && opc == 2) {
2640             /* PRFM - prefetch */
2641             if (is_unpriv) {
2642                 unallocated_encoding(s);
2643                 return;
2644             }
2645             return;
2646         }
2647         if (opc == 3 && size > 1) {
2648             unallocated_encoding(s);
2649             return;
2650         }
2651         is_store = (opc == 0);
2652         is_signed = extract32(opc, 1, 1);
2653         is_extended = (size < 3) && extract32(opc, 0, 1);
2654     }
2655
2656     switch (idx) {
2657     case 0:
2658     case 2:
2659         post_index = false;
2660         writeback = false;
2661         break;
2662     case 1:
2663         post_index = true;
2664         writeback = true;
2665         break;
2666     case 3:
2667         post_index = false;
2668         writeback = true;
2669         break;
2670     default:
2671         g_assert_not_reached();
2672     }
2673
2674     if (rn == 31) {
2675         gen_check_sp_alignment(s);
2676     }
2677     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2678
2679     if (!post_index) {
2680         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2681     }
2682
2683     if (is_vector) {
2684         if (is_store) {
2685             do_fp_st(s, rt, tcg_addr, size);
2686         } else {
2687             do_fp_ld(s, rt, tcg_addr, size);
2688         }
2689     } else {
2690         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2691         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2692         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2693
2694         if (is_store) {
2695             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2696                              iss_valid, rt, iss_sf, false);
2697         } else {
2698             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2699                              is_signed, is_extended, memidx,
2700                              iss_valid, rt, iss_sf, false);
2701         }
2702     }
2703
2704     if (writeback) {
2705         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2706         if (post_index) {
2707             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2708         }
2709         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2710     }
2711 }
2712
2713 /*
2714  * Load/store (register offset)
2715  *
2716  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2717  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2718  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2719  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2720  *
2721  * For non-vector:
2722  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2723  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2724  * For vector:
2725  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2726  *   opc<0>: 0 -> store, 1 -> load
2727  * V: 1 -> vector/simd
2728  * opt: extend encoding (see DecodeRegExtend)
2729  * S: if S=1 then scale (essentially index by sizeof(size))
2730  * Rt: register to transfer into/out of
2731  * Rn: address register or SP for base
2732  * Rm: offset register or ZR for offset
2733  */
2734 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2735                                    int opc,
2736                                    int size,
2737                                    int rt,
2738                                    bool is_vector)
2739 {
2740     int rn = extract32(insn, 5, 5);
2741     int shift = extract32(insn, 12, 1);
2742     int rm = extract32(insn, 16, 5);
2743     int opt = extract32(insn, 13, 3);
2744     bool is_signed = false;
2745     bool is_store = false;
2746     bool is_extended = false;
2747
2748     TCGv_i64 tcg_rm;
2749     TCGv_i64 tcg_addr;
2750
2751     if (extract32(opt, 1, 1) == 0) {
2752         unallocated_encoding(s);
2753         return;
2754     }
2755
2756     if (is_vector) {
2757         size |= (opc & 2) << 1;
2758         if (size > 4) {
2759             unallocated_encoding(s);
2760             return;
2761         }
2762         is_store = !extract32(opc, 0, 1);
2763         if (!fp_access_check(s)) {
2764             return;
2765         }
2766     } else {
2767         if (size == 3 && opc == 2) {
2768             /* PRFM - prefetch */
2769             return;
2770         }
2771         if (opc == 3 && size > 1) {
2772             unallocated_encoding(s);
2773             return;
2774         }
2775         is_store = (opc == 0);
2776         is_signed = extract32(opc, 1, 1);
2777         is_extended = (size < 3) && extract32(opc, 0, 1);
2778     }
2779
2780     if (rn == 31) {
2781         gen_check_sp_alignment(s);
2782     }
2783     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2784
2785     tcg_rm = read_cpu_reg(s, rm, 1);
2786     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2787
2788     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2789
2790     if (is_vector) {
2791         if (is_store) {
2792             do_fp_st(s, rt, tcg_addr, size);
2793         } else {
2794             do_fp_ld(s, rt, tcg_addr, size);
2795         }
2796     } else {
2797         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2798         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2799         if (is_store) {
2800             do_gpr_st(s, tcg_rt, tcg_addr, size,
2801                       true, rt, iss_sf, false);
2802         } else {
2803             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2804                       is_signed, is_extended,
2805                       true, rt, iss_sf, false);
2806         }
2807     }
2808 }
2809
2810 /*
2811  * Load/store (unsigned immediate)
2812  *
2813  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2814  * +----+-------+---+-----+-----+------------+-------+------+
2815  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2816  * +----+-------+---+-----+-----+------------+-------+------+
2817  *
2818  * For non-vector:
2819  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2820  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2821  * For vector:
2822  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2823  *   opc<0>: 0 -> store, 1 -> load
2824  * Rn: base address register (inc SP)
2825  * Rt: target register
2826  */
2827 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2828                                         int opc,
2829                                         int size,
2830                                         int rt,
2831                                         bool is_vector)
2832 {
2833     int rn = extract32(insn, 5, 5);
2834     unsigned int imm12 = extract32(insn, 10, 12);
2835     unsigned int offset;
2836
2837     TCGv_i64 tcg_addr;
2838
2839     bool is_store;
2840     bool is_signed = false;
2841     bool is_extended = false;
2842
2843     if (is_vector) {
2844         size |= (opc & 2) << 1;
2845         if (size > 4) {
2846             unallocated_encoding(s);
2847             return;
2848         }
2849         is_store = !extract32(opc, 0, 1);
2850         if (!fp_access_check(s)) {
2851             return;
2852         }
2853     } else {
2854         if (size == 3 && opc == 2) {
2855             /* PRFM - prefetch */
2856             return;
2857         }
2858         if (opc == 3 && size > 1) {
2859             unallocated_encoding(s);
2860             return;
2861         }
2862         is_store = (opc == 0);
2863         is_signed = extract32(opc, 1, 1);
2864         is_extended = (size < 3) && extract32(opc, 0, 1);
2865     }
2866
2867     if (rn == 31) {
2868         gen_check_sp_alignment(s);
2869     }
2870     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2871     offset = imm12 << size;
2872     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2873
2874     if (is_vector) {
2875         if (is_store) {
2876             do_fp_st(s, rt, tcg_addr, size);
2877         } else {
2878             do_fp_ld(s, rt, tcg_addr, size);
2879         }
2880     } else {
2881         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2882         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2883         if (is_store) {
2884             do_gpr_st(s, tcg_rt, tcg_addr, size,
2885                       true, rt, iss_sf, false);
2886         } else {
2887             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2888                       true, rt, iss_sf, false);
2889         }
2890     }
2891 }
2892
2893 /* Atomic memory operations
2894  *
2895  *  31  30      27  26    24    22  21   16   15    12    10    5     0
2896  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
2897  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
2898  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
2899  *
2900  * Rt: the result register
2901  * Rn: base address or SP
2902  * Rs: the source register for the operation
2903  * V: vector flag (always 0 as of v8.3)
2904  * A: acquire flag
2905  * R: release flag
2906  */
2907 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
2908                               int size, int rt, bool is_vector)
2909 {
2910     int rs = extract32(insn, 16, 5);
2911     int rn = extract32(insn, 5, 5);
2912     int o3_opc = extract32(insn, 12, 4);
2913     TCGv_i64 tcg_rn, tcg_rs;
2914     AtomicThreeOpFn *fn;
2915
2916     if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
2917         unallocated_encoding(s);
2918         return;
2919     }
2920     switch (o3_opc) {
2921     case 000: /* LDADD */
2922         fn = tcg_gen_atomic_fetch_add_i64;
2923         break;
2924     case 001: /* LDCLR */
2925         fn = tcg_gen_atomic_fetch_and_i64;
2926         break;
2927     case 002: /* LDEOR */
2928         fn = tcg_gen_atomic_fetch_xor_i64;
2929         break;
2930     case 003: /* LDSET */
2931         fn = tcg_gen_atomic_fetch_or_i64;
2932         break;
2933     case 004: /* LDSMAX */
2934         fn = tcg_gen_atomic_fetch_smax_i64;
2935         break;
2936     case 005: /* LDSMIN */
2937         fn = tcg_gen_atomic_fetch_smin_i64;
2938         break;
2939     case 006: /* LDUMAX */
2940         fn = tcg_gen_atomic_fetch_umax_i64;
2941         break;
2942     case 007: /* LDUMIN */
2943         fn = tcg_gen_atomic_fetch_umin_i64;
2944         break;
2945     case 010: /* SWP */
2946         fn = tcg_gen_atomic_xchg_i64;
2947         break;
2948     default:
2949         unallocated_encoding(s);
2950         return;
2951     }
2952
2953     if (rn == 31) {
2954         gen_check_sp_alignment(s);
2955     }
2956     tcg_rn = cpu_reg_sp(s, rn);
2957     tcg_rs = read_cpu_reg(s, rs, true);
2958
2959     if (o3_opc == 1) { /* LDCLR */
2960         tcg_gen_not_i64(tcg_rs, tcg_rs);
2961     }
2962
2963     /* The tcg atomic primitives are all full barriers.  Therefore we
2964      * can ignore the Acquire and Release bits of this instruction.
2965      */
2966     fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
2967        s->be_data | size | MO_ALIGN);
2968 }
2969
2970 /* Load/store register (all forms) */
2971 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2972 {
2973     int rt = extract32(insn, 0, 5);
2974     int opc = extract32(insn, 22, 2);
2975     bool is_vector = extract32(insn, 26, 1);
2976     int size = extract32(insn, 30, 2);
2977
2978     switch (extract32(insn, 24, 2)) {
2979     case 0:
2980         if (extract32(insn, 21, 1) == 0) {
2981             /* Load/store register (unscaled immediate)
2982              * Load/store immediate pre/post-indexed
2983              * Load/store register unprivileged
2984              */
2985             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2986             return;
2987         }
2988         switch (extract32(insn, 10, 2)) {
2989         case 0:
2990             disas_ldst_atomic(s, insn, size, rt, is_vector);
2991             return;
2992         case 2:
2993             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2994             return;
2995         }
2996         break;
2997     case 1:
2998         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2999         return;
3000     }
3001     unallocated_encoding(s);
3002 }
3003
3004 /* AdvSIMD load/store multiple structures
3005  *
3006  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3007  * +---+---+---------------+---+-------------+--------+------+------+------+
3008  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3009  * +---+---+---------------+---+-------------+--------+------+------+------+
3010  *
3011  * AdvSIMD load/store multiple structures (post-indexed)
3012  *
3013  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3014  * +---+---+---------------+---+---+---------+--------+------+------+------+
3015  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3016  * +---+---+---------------+---+---+---------+--------+------+------+------+
3017  *
3018  * Rt: first (or only) SIMD&FP register to be transferred
3019  * Rn: base address or SP
3020  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3021  */
3022 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3023 {
3024     int rt = extract32(insn, 0, 5);
3025     int rn = extract32(insn, 5, 5);
3026     int size = extract32(insn, 10, 2);
3027     int opcode = extract32(insn, 12, 4);
3028     bool is_store = !extract32(insn, 22, 1);
3029     bool is_postidx = extract32(insn, 23, 1);
3030     bool is_q = extract32(insn, 30, 1);
3031     TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
3032     TCGMemOp endian = s->be_data;
3033
3034     int ebytes;   /* bytes per element */
3035     int elements; /* elements per vector */
3036     int rpt;    /* num iterations */
3037     int selem;  /* structure elements */
3038     int r;
3039
3040     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3041         unallocated_encoding(s);
3042         return;
3043     }
3044
3045     /* From the shared decode logic */
3046     switch (opcode) {
3047     case 0x0:
3048         rpt = 1;
3049         selem = 4;
3050         break;
3051     case 0x2:
3052         rpt = 4;
3053         selem = 1;
3054         break;
3055     case 0x4:
3056         rpt = 1;
3057         selem = 3;
3058         break;
3059     case 0x6:
3060         rpt = 3;
3061         selem = 1;
3062         break;
3063     case 0x7:
3064         rpt = 1;
3065         selem = 1;
3066         break;
3067     case 0x8:
3068         rpt = 1;
3069         selem = 2;
3070         break;
3071     case 0xa:
3072         rpt = 2;
3073         selem = 1;
3074         break;
3075     default:
3076         unallocated_encoding(s);
3077         return;
3078     }
3079
3080     if (size == 3 && !is_q && selem != 1) {
3081         /* reserved */
3082         unallocated_encoding(s);
3083         return;
3084     }
3085
3086     if (!fp_access_check(s)) {
3087         return;
3088     }
3089
3090     if (rn == 31) {
3091         gen_check_sp_alignment(s);
3092     }
3093
3094     /* For our purposes, bytes are always little-endian.  */
3095     if (size == 0) {
3096         endian = MO_LE;
3097     }
3098
3099     /* Consecutive little-endian elements from a single register
3100      * can be promoted to a larger little-endian operation.
3101      */
3102     if (selem == 1 && endian == MO_LE) {
3103         size = 3;
3104     }
3105     ebytes = 1 << size;
3106     elements = (is_q ? 16 : 8) / ebytes;
3107
3108     tcg_rn = cpu_reg_sp(s, rn);
3109     tcg_addr = tcg_temp_new_i64();
3110     tcg_gen_mov_i64(tcg_addr, tcg_rn);
3111     tcg_ebytes = tcg_const_i64(ebytes);
3112
3113     for (r = 0; r < rpt; r++) {
3114         int e;
3115         for (e = 0; e < elements; e++) {
3116             int xs;
3117             for (xs = 0; xs < selem; xs++) {
3118                 int tt = (rt + r + xs) % 32;
3119                 if (is_store) {
3120                     do_vec_st(s, tt, e, tcg_addr, size, endian);
3121                 } else {
3122                     do_vec_ld(s, tt, e, tcg_addr, size, endian);
3123                 }
3124                 tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes);
3125             }
3126         }
3127     }
3128
3129     if (!is_store) {
3130         /* For non-quad operations, setting a slice of the low
3131          * 64 bits of the register clears the high 64 bits (in
3132          * the ARM ARM pseudocode this is implicit in the fact
3133          * that 'rval' is a 64 bit wide variable).
3134          * For quad operations, we might still need to zero the
3135          * high bits of SVE.
3136          */
3137         for (r = 0; r < rpt * selem; r++) {
3138             int tt = (rt + r) % 32;
3139             clear_vec_high(s, is_q, tt);
3140         }
3141     }
3142
3143     if (is_postidx) {
3144         int rm = extract32(insn, 16, 5);
3145         if (rm == 31) {
3146             tcg_gen_mov_i64(tcg_rn, tcg_addr);
3147         } else {
3148             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3149         }
3150     }
3151     tcg_temp_free_i64(tcg_ebytes);
3152     tcg_temp_free_i64(tcg_addr);
3153 }
3154
3155 /* AdvSIMD load/store single structure
3156  *
3157  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3158  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3159  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3160  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3161  *
3162  * AdvSIMD load/store single structure (post-indexed)
3163  *
3164  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3165  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3166  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3167  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3168  *
3169  * Rt: first (or only) SIMD&FP register to be transferred
3170  * Rn: base address or SP
3171  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3172  * index = encoded in Q:S:size dependent on size
3173  *
3174  * lane_size = encoded in R, opc
3175  * transfer width = encoded in opc, S, size
3176  */
3177 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3178 {
3179     int rt = extract32(insn, 0, 5);
3180     int rn = extract32(insn, 5, 5);
3181     int size = extract32(insn, 10, 2);
3182     int S = extract32(insn, 12, 1);
3183     int opc = extract32(insn, 13, 3);
3184     int R = extract32(insn, 21, 1);
3185     int is_load = extract32(insn, 22, 1);
3186     int is_postidx = extract32(insn, 23, 1);
3187     int is_q = extract32(insn, 30, 1);
3188
3189     int scale = extract32(opc, 1, 2);
3190     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3191     bool replicate = false;
3192     int index = is_q << 3 | S << 2 | size;
3193     int ebytes, xs;
3194     TCGv_i64 tcg_addr, tcg_rn, tcg_ebytes;
3195
3196     switch (scale) {
3197     case 3:
3198         if (!is_load || S) {
3199             unallocated_encoding(s);
3200             return;
3201         }
3202         scale = size;
3203         replicate = true;
3204         break;
3205     case 0:
3206         break;
3207     case 1:
3208         if (extract32(size, 0, 1)) {
3209             unallocated_encoding(s);
3210             return;
3211         }
3212         index >>= 1;
3213         break;
3214     case 2:
3215         if (extract32(size, 1, 1)) {
3216             unallocated_encoding(s);
3217             return;
3218         }
3219         if (!extract32(size, 0, 1)) {
3220             index >>= 2;
3221         } else {
3222             if (S) {
3223                 unallocated_encoding(s);
3224                 return;
3225             }
3226             index >>= 3;
3227             scale = 3;
3228         }
3229         break;
3230     default:
3231         g_assert_not_reached();
3232     }
3233
3234     if (!fp_access_check(s)) {
3235         return;
3236     }
3237
3238     ebytes = 1 << scale;
3239
3240     if (rn == 31) {
3241         gen_check_sp_alignment(s);
3242     }
3243
3244     tcg_rn = cpu_reg_sp(s, rn);
3245     tcg_addr = tcg_temp_new_i64();
3246     tcg_gen_mov_i64(tcg_addr, tcg_rn);
3247     tcg_ebytes = tcg_const_i64(ebytes);
3248
3249     for (xs = 0; xs < selem; xs++) {
3250         if (replicate) {
3251             /* Load and replicate to all elements */
3252             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3253
3254             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
3255                                 get_mem_index(s), s->be_data + scale);
3256             tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3257                                  (is_q + 1) * 8, vec_full_reg_size(s),
3258                                  tcg_tmp);
3259             tcg_temp_free_i64(tcg_tmp);
3260         } else {
3261             /* Load/store one element per register */
3262             if (is_load) {
3263                 do_vec_ld(s, rt, index, tcg_addr, scale, s->be_data);
3264             } else {
3265                 do_vec_st(s, rt, index, tcg_addr, scale, s->be_data);
3266             }
3267         }
3268         tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_ebytes);
3269         rt = (rt + 1) % 32;
3270     }
3271
3272     if (is_postidx) {
3273         int rm = extract32(insn, 16, 5);
3274         if (rm == 31) {
3275             tcg_gen_mov_i64(tcg_rn, tcg_addr);
3276         } else {
3277             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3278         }
3279     }
3280     tcg_temp_free_i64(tcg_ebytes);
3281     tcg_temp_free_i64(tcg_addr);
3282 }
3283
3284 /* Loads and stores */
3285 static void disas_ldst(DisasContext *s, uint32_t insn)
3286 {
3287     switch (extract32(insn, 24, 6)) {
3288     case 0x08: /* Load/store exclusive */
3289         disas_ldst_excl(s, insn);
3290         break;
3291     case 0x18: case 0x1c: /* Load register (literal) */
3292         disas_ld_lit(s, insn);
3293         break;
3294     case 0x28: case 0x29:
3295     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3296         disas_ldst_pair(s, insn);
3297         break;
3298     case 0x38: case 0x39:
3299     case 0x3c: case 0x3d: /* Load/store register (all forms) */
3300         disas_ldst_reg(s, insn);
3301         break;
3302     case 0x0c: /* AdvSIMD load/store multiple structures */
3303         disas_ldst_multiple_struct(s, insn);
3304         break;
3305     case 0x0d: /* AdvSIMD load/store single structure */
3306         disas_ldst_single_struct(s, insn);
3307         break;
3308     default:
3309         unallocated_encoding(s);
3310         break;
3311     }
3312 }
3313
3314 /* PC-rel. addressing
3315  *   31  30   29 28       24 23                5 4    0
3316  * +----+-------+-----------+-------------------+------+
3317  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
3318  * +----+-------+-----------+-------------------+------+
3319  */
3320 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3321 {
3322     unsigned int page, rd;
3323     uint64_t base;
3324     uint64_t offset;
3325
3326     page = extract32(insn, 31, 1);
3327     /* SignExtend(immhi:immlo) -> offset */
3328     offset = sextract64(insn, 5, 19);
3329     offset = offset << 2 | extract32(insn, 29, 2);
3330     rd = extract32(insn, 0, 5);
3331     base = s->pc - 4;
3332
3333     if (page) {
3334         /* ADRP (page based) */
3335         base &= ~0xfff;
3336         offset <<= 12;
3337     }
3338
3339     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3340 }
3341
3342 /*
3343  * Add/subtract (immediate)
3344  *
3345  *  31 30 29 28       24 23 22 21         10 9   5 4   0
3346  * +--+--+--+-----------+-----+-------------+-----+-----+
3347  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
3348  * +--+--+--+-----------+-----+-------------+-----+-----+
3349  *
3350  *    sf: 0 -> 32bit, 1 -> 64bit
3351  *    op: 0 -> add  , 1 -> sub
3352  *     S: 1 -> set flags
3353  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3354  */
3355 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3356 {
3357     int rd = extract32(insn, 0, 5);
3358     int rn = extract32(insn, 5, 5);
3359     uint64_t imm = extract32(insn, 10, 12);
3360     int shift = extract32(insn, 22, 2);
3361     bool setflags = extract32(insn, 29, 1);
3362     bool sub_op = extract32(insn, 30, 1);
3363     bool is_64bit = extract32(insn, 31, 1);
3364
3365     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3366     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3367     TCGv_i64 tcg_result;
3368
3369     switch (shift) {
3370     case 0x0:
3371         break;
3372     case 0x1:
3373         imm <<= 12;
3374         break;
3375     default:
3376         unallocated_encoding(s);
3377         return;
3378     }
3379
3380     tcg_result = tcg_temp_new_i64();
3381     if (!setflags) {
3382         if (sub_op) {
3383             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3384         } else {
3385             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3386         }
3387     } else {
3388         TCGv_i64 tcg_imm = tcg_const_i64(imm);
3389         if (sub_op) {
3390             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3391         } else {
3392             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3393         }
3394         tcg_temp_free_i64(tcg_imm);
3395     }
3396
3397     if (is_64bit) {
3398         tcg_gen_mov_i64(tcg_rd, tcg_result);
3399     } else {
3400         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3401     }
3402
3403     tcg_temp_free_i64(tcg_result);
3404 }
3405
3406 /* The input should be a value in the bottom e bits (with higher
3407  * bits zero); returns that value replicated into every element
3408  * of size e in a 64 bit integer.
3409  */
3410 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3411 {
3412     assert(e != 0);
3413     while (e < 64) {
3414         mask |= mask << e;
3415         e *= 2;
3416     }
3417     return mask;
3418 }
3419
3420 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
3421 static inline uint64_t bitmask64(unsigned int length)
3422 {
3423     assert(length > 0 && length <= 64);
3424     return ~0ULL >> (64 - length);
3425 }
3426
3427 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3428  * only require the wmask. Returns false if the imms/immr/immn are a reserved
3429  * value (ie should cause a guest UNDEF exception), and true if they are
3430  * valid, in which case the decoded bit pattern is written to result.
3431  */
3432 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3433                             unsigned int imms, unsigned int immr)
3434 {
3435     uint64_t mask;
3436     unsigned e, levels, s, r;
3437     int len;
3438
3439     assert(immn < 2 && imms < 64 && immr < 64);
3440
3441     /* The bit patterns we create here are 64 bit patterns which
3442      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3443      * 64 bits each. Each element contains the same value: a run
3444      * of between 1 and e-1 non-zero bits, rotated within the
3445      * element by between 0 and e-1 bits.
3446      *
3447      * The element size and run length are encoded into immn (1 bit)
3448      * and imms (6 bits) as follows:
3449      * 64 bit elements: immn = 1, imms = <length of run - 1>
3450      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3451      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3452      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3453      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3454      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3455      * Notice that immn = 0, imms = 11111x is the only combination
3456      * not covered by one of the above options; this is reserved.
3457      * Further, <length of run - 1> all-ones is a reserved pattern.
3458      *
3459      * In all cases the rotation is by immr % e (and immr is 6 bits).
3460      */
3461
3462     /* First determine the element size */
3463     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3464     if (len < 1) {
3465         /* This is the immn == 0, imms == 0x11111x case */
3466         return false;
3467     }
3468     e = 1 << len;
3469
3470     levels = e - 1;
3471     s = imms & levels;
3472     r = immr & levels;
3473
3474     if (s == levels) {
3475         /* <length of run - 1> mustn't be all-ones. */
3476         return false;
3477     }
3478
3479     /* Create the value of one element: s+1 set bits rotated
3480      * by r within the element (which is e bits wide)...
3481      */
3482     mask = bitmask64(s + 1);
3483     if (r) {
3484         mask = (mask >> r) | (mask << (e - r));
3485         mask &= bitmask64(e);
3486     }
3487     /* ...then replicate the element over the whole 64 bit value */
3488     mask = bitfield_replicate(mask, e);
3489     *result = mask;
3490     return true;
3491 }
3492
3493 /* Logical (immediate)
3494  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3495  * +----+-----+-------------+---+------+------+------+------+
3496  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3497  * +----+-----+-------------+---+------+------+------+------+
3498  */
3499 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3500 {
3501     unsigned int sf, opc, is_n, immr, imms, rn, rd;
3502     TCGv_i64 tcg_rd, tcg_rn;
3503     uint64_t wmask;
3504     bool is_and = false;
3505
3506     sf = extract32(insn, 31, 1);
3507     opc = extract32(insn, 29, 2);
3508     is_n = extract32(insn, 22, 1);
3509     immr = extract32(insn, 16, 6);
3510     imms = extract32(insn, 10, 6);
3511     rn = extract32(insn, 5, 5);
3512     rd = extract32(insn, 0, 5);
3513
3514     if (!sf && is_n) {
3515         unallocated_encoding(s);
3516         return;
3517     }
3518
3519     if (opc == 0x3) { /* ANDS */
3520         tcg_rd = cpu_reg(s, rd);
3521     } else {
3522         tcg_rd = cpu_reg_sp(s, rd);
3523     }
3524     tcg_rn = cpu_reg(s, rn);
3525
3526     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3527         /* some immediate field values are reserved */
3528         unallocated_encoding(s);
3529         return;
3530     }
3531
3532     if (!sf) {
3533         wmask &= 0xffffffff;
3534     }
3535
3536     switch (opc) {
3537     case 0x3: /* ANDS */
3538     case 0x0: /* AND */
3539         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3540         is_and = true;
3541         break;
3542     case 0x1: /* ORR */
3543         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3544         break;
3545     case 0x2: /* EOR */
3546         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3547         break;
3548     default:
3549         assert(FALSE); /* must handle all above */
3550         break;
3551     }
3552
3553     if (!sf && !is_and) {
3554         /* zero extend final result; we know we can skip this for AND
3555          * since the immediate had the high 32 bits clear.
3556          */
3557         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3558     }
3559
3560     if (opc == 3) { /* ANDS */
3561         gen_logic_CC(sf, tcg_rd);
3562     }
3563 }
3564
3565 /*
3566  * Move wide (immediate)
3567  *
3568  *  31 30 29 28         23 22 21 20             5 4    0
3569  * +--+-----+-------------+-----+----------------+------+
3570  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3571  * +--+-----+-------------+-----+----------------+------+
3572  *
3573  * sf: 0 -> 32 bit, 1 -> 64 bit
3574  * opc: 00 -> N, 10 -> Z, 11 -> K
3575  * hw: shift/16 (0,16, and sf only 32, 48)
3576  */
3577 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3578 {
3579     int rd = extract32(insn, 0, 5);
3580     uint64_t imm = extract32(insn, 5, 16);
3581     int sf = extract32(insn, 31, 1);
3582     int opc = extract32(insn, 29, 2);
3583     int pos = extract32(insn, 21, 2) << 4;
3584     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3585     TCGv_i64 tcg_imm;
3586
3587     if (!sf && (pos >= 32)) {
3588         unallocated_encoding(s);
3589         return;
3590     }
3591
3592     switch (opc) {
3593     case 0: /* MOVN */
3594     case 2: /* MOVZ */
3595         imm <<= pos;
3596         if (opc == 0) {
3597             imm = ~imm;
3598         }
3599         if (!sf) {
3600             imm &= 0xffffffffu;
3601         }
3602         tcg_gen_movi_i64(tcg_rd, imm);
3603         break;
3604     case 3: /* MOVK */
3605         tcg_imm = tcg_const_i64(imm);
3606         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3607         tcg_temp_free_i64(tcg_imm);
3608         if (!sf) {
3609             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3610         }
3611         break;
3612     default:
3613         unallocated_encoding(s);
3614         break;
3615     }
3616 }
3617
3618 /* Bitfield
3619  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3620  * +----+-----+-------------+---+------+------+------+------+
3621  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3622  * +----+-----+-------------+---+------+------+------+------+
3623  */
3624 static void disas_bitfield(DisasContext *s, uint32_t insn)
3625 {
3626     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3627     TCGv_i64 tcg_rd, tcg_tmp;
3628
3629     sf = extract32(insn, 31, 1);
3630     opc = extract32(insn, 29, 2);
3631     n = extract32(insn, 22, 1);
3632     ri = extract32(insn, 16, 6);
3633     si = extract32(insn, 10, 6);
3634     rn = extract32(insn, 5, 5);
3635     rd = extract32(insn, 0, 5);
3636     bitsize = sf ? 64 : 32;
3637
3638     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3639         unallocated_encoding(s);
3640         return;
3641     }
3642
3643     tcg_rd = cpu_reg(s, rd);
3644
3645     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3646        to be smaller than bitsize, we'll never reference data outside the
3647        low 32-bits anyway.  */
3648     tcg_tmp = read_cpu_reg(s, rn, 1);
3649
3650     /* Recognize simple(r) extractions.  */
3651     if (si >= ri) {
3652         /* Wd<s-r:0> = Wn<s:r> */
3653         len = (si - ri) + 1;
3654         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3655             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3656             goto done;
3657         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3658             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3659             return;
3660         }
3661         /* opc == 1, BXFIL fall through to deposit */
3662         tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3663         pos = 0;
3664     } else {
3665         /* Handle the ri > si case with a deposit
3666          * Wd<32+s-r,32-r> = Wn<s:0>
3667          */
3668         len = si + 1;
3669         pos = (bitsize - ri) & (bitsize - 1);
3670     }
3671
3672     if (opc == 0 && len < ri) {
3673         /* SBFM: sign extend the destination field from len to fill
3674            the balance of the word.  Let the deposit below insert all
3675            of those sign bits.  */
3676         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3677         len = ri;
3678     }
3679
3680     if (opc == 1) { /* BFM, BXFIL */
3681         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3682     } else {
3683         /* SBFM or UBFM: We start with zero, and we haven't modified
3684            any bits outside bitsize, therefore the zero-extension
3685            below is unneeded.  */
3686         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3687         return;
3688     }
3689
3690  done:
3691     if (!sf) { /* zero extend final result */
3692         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3693     }
3694 }
3695
3696 /* Extract
3697  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3698  * +----+------+-------------+---+----+------+--------+------+------+
3699  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3700  * +----+------+-------------+---+----+------+--------+------+------+
3701  */
3702 static void disas_extract(DisasContext *s, uint32_t insn)
3703 {
3704     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3705
3706     sf = extract32(insn, 31, 1);
3707     n = extract32(insn, 22, 1);
3708     rm = extract32(insn, 16, 5);
3709     imm = extract32(insn, 10, 6);
3710     rn = extract32(insn, 5, 5);
3711     rd = extract32(insn, 0, 5);
3712     op21 = extract32(insn, 29, 2);
3713     op0 = extract32(insn, 21, 1);
3714     bitsize = sf ? 64 : 32;
3715
3716     if (sf != n || op21 || op0 || imm >= bitsize) {
3717         unallocated_encoding(s);
3718     } else {
3719         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3720
3721         tcg_rd = cpu_reg(s, rd);
3722
3723         if (unlikely(imm == 0)) {
3724             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3725              * so an extract from bit 0 is a special case.
3726              */
3727             if (sf) {
3728                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3729             } else {
3730                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3731             }
3732         } else if (rm == rn) { /* ROR */
3733             tcg_rm = cpu_reg(s, rm);
3734             if (sf) {
3735                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3736             } else {
3737                 TCGv_i32 tmp = tcg_temp_new_i32();
3738                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3739                 tcg_gen_rotri_i32(tmp, tmp, imm);
3740                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3741                 tcg_temp_free_i32(tmp);
3742             }
3743         } else {
3744             tcg_rm = read_cpu_reg(s, rm, sf);
3745             tcg_rn = read_cpu_reg(s, rn, sf);
3746             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3747             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3748             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3749             if (!sf) {
3750                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3751             }
3752         }
3753     }
3754 }
3755
3756 /* Data processing - immediate */
3757 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3758 {
3759     switch (extract32(insn, 23, 6)) {
3760     case 0x20: case 0x21: /* PC-rel. addressing */
3761         disas_pc_rel_adr(s, insn);
3762         break;
3763     case 0x22: case 0x23: /* Add/subtract (immediate) */
3764         disas_add_sub_imm(s, insn);
3765         break;
3766     case 0x24: /* Logical (immediate) */
3767         disas_logic_imm(s, insn);
3768         break;
3769     case 0x25: /* Move wide (immediate) */
3770         disas_movw_imm(s, insn);
3771         break;
3772     case 0x26: /* Bitfield */
3773         disas_bitfield(s, insn);
3774         break;
3775     case 0x27: /* Extract */
3776         disas_extract(s, insn);
3777         break;
3778     default:
3779         unallocated_encoding(s);
3780         break;
3781     }
3782 }
3783
3784 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3785  * Note that it is the caller's responsibility to ensure that the
3786  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3787  * mandated semantics for out of range shifts.
3788  */
3789 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3790                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3791 {
3792     switch (shift_type) {
3793     case A64_SHIFT_TYPE_LSL:
3794         tcg_gen_shl_i64(dst, src, shift_amount);
3795         break;
3796     case A64_SHIFT_TYPE_LSR:
3797         tcg_gen_shr_i64(dst, src, shift_amount);
3798         break;
3799     case A64_SHIFT_TYPE_ASR:
3800         if (!sf) {
3801             tcg_gen_ext32s_i64(dst, src);
3802         }
3803         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3804         break;
3805     case A64_SHIFT_TYPE_ROR:
3806         if (sf) {
3807             tcg_gen_rotr_i64(dst, src, shift_amount);
3808         } else {
3809             TCGv_i32 t0, t1;
3810             t0 = tcg_temp_new_i32();
3811             t1 = tcg_temp_new_i32();
3812             tcg_gen_extrl_i64_i32(t0, src);
3813             tcg_gen_extrl_i64_i32(t1, shift_amount);
3814             tcg_gen_rotr_i32(t0, t0, t1);
3815             tcg_gen_extu_i32_i64(dst, t0);
3816             tcg_temp_free_i32(t0);
3817             tcg_temp_free_i32(t1);
3818         }
3819         break;
3820     default:
3821         assert(FALSE); /* all shift types should be handled */
3822         break;
3823     }
3824
3825     if (!sf) { /* zero extend final result */
3826         tcg_gen_ext32u_i64(dst, dst);
3827     }
3828 }
3829
3830 /* Shift a TCGv src by immediate, put result in dst.
3831  * The shift amount must be in range (this should always be true as the
3832  * relevant instructions will UNDEF on bad shift immediates).
3833  */
3834 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3835                           enum a64_shift_type shift_type, unsigned int shift_i)
3836 {
3837     assert(shift_i < (sf ? 64 : 32));
3838
3839     if (shift_i == 0) {
3840         tcg_gen_mov_i64(dst, src);
3841     } else {
3842         TCGv_i64 shift_const;
3843
3844         shift_const = tcg_const_i64(shift_i);
3845         shift_reg(dst, src, sf, shift_type, shift_const);
3846         tcg_temp_free_i64(shift_const);
3847     }
3848 }
3849
3850 /* Logical (shifted register)
3851  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3852  * +----+-----+-----------+-------+---+------+--------+------+------+
3853  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3854  * +----+-----+-----------+-------+---+------+--------+------+------+
3855  */
3856 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3857 {
3858     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3859     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3860
3861     sf = extract32(insn, 31, 1);
3862     opc = extract32(insn, 29, 2);
3863     shift_type = extract32(insn, 22, 2);
3864     invert = extract32(insn, 21, 1);
3865     rm = extract32(insn, 16, 5);
3866     shift_amount = extract32(insn, 10, 6);
3867     rn = extract32(insn, 5, 5);
3868     rd = extract32(insn, 0, 5);
3869
3870     if (!sf && (shift_amount & (1 << 5))) {
3871         unallocated_encoding(s);
3872         return;
3873     }
3874
3875     tcg_rd = cpu_reg(s, rd);
3876
3877     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3878         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3879          * register-register MOV and MVN, so it is worth special casing.
3880          */
3881         tcg_rm = cpu_reg(s, rm);
3882         if (invert) {
3883             tcg_gen_not_i64(tcg_rd, tcg_rm);
3884             if (!sf) {
3885                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3886             }
3887         } else {
3888             if (sf) {
3889                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3890             } else {
3891                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3892             }
3893         }
3894         return;
3895     }
3896
3897     tcg_rm = read_cpu_reg(s, rm, sf);
3898
3899     if (shift_amount) {
3900         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3901     }
3902
3903     tcg_rn = cpu_reg(s, rn);
3904
3905     switch (opc | (invert << 2)) {
3906     case 0: /* AND */
3907     case 3: /* ANDS */
3908         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3909         break;
3910     case 1: /* ORR */
3911         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3912         break;
3913     case 2: /* EOR */
3914         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3915         break;
3916     case 4: /* BIC */
3917     case 7: /* BICS */
3918         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3919         break;
3920     case 5: /* ORN */
3921         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3922         break;
3923     case 6: /* EON */
3924         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3925         break;
3926     default:
3927         assert(FALSE);
3928         break;
3929     }
3930
3931     if (!sf) {
3932         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3933     }
3934
3935     if (opc == 3) {
3936         gen_logic_CC(sf, tcg_rd);
3937     }
3938 }
3939
3940 /*
3941  * Add/subtract (extended register)
3942  *
3943  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3944  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3945  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3946  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3947  *
3948  *  sf: 0 -> 32bit, 1 -> 64bit
3949  *  op: 0 -> add  , 1 -> sub
3950  *   S: 1 -> set flags
3951  * opt: 00
3952  * option: extension type (see DecodeRegExtend)
3953  * imm3: optional shift to Rm
3954  *
3955  * Rd = Rn + LSL(extend(Rm), amount)
3956  */
3957 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3958 {
3959     int rd = extract32(insn, 0, 5);
3960     int rn = extract32(insn, 5, 5);
3961     int imm3 = extract32(insn, 10, 3);
3962     int option = extract32(insn, 13, 3);
3963     int rm = extract32(insn, 16, 5);
3964     bool setflags = extract32(insn, 29, 1);
3965     bool sub_op = extract32(insn, 30, 1);
3966     bool sf = extract32(insn, 31, 1);
3967
3968     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3969     TCGv_i64 tcg_rd;
3970     TCGv_i64 tcg_result;
3971
3972     if (imm3 > 4) {
3973         unallocated_encoding(s);
3974         return;
3975     }
3976
3977     /* non-flag setting ops may use SP */
3978     if (!setflags) {
3979         tcg_rd = cpu_reg_sp(s, rd);
3980     } else {
3981         tcg_rd = cpu_reg(s, rd);
3982     }
3983     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3984
3985     tcg_rm = read_cpu_reg(s, rm, sf);
3986     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3987
3988     tcg_result = tcg_temp_new_i64();
3989
3990     if (!setflags) {
3991         if (sub_op) {
3992             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3993         } else {
3994             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3995         }
3996     } else {
3997         if (sub_op) {
3998             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3999         } else {
4000             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4001         }
4002     }
4003
4004     if (sf) {
4005         tcg_gen_mov_i64(tcg_rd, tcg_result);
4006     } else {
4007         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4008     }
4009
4010     tcg_temp_free_i64(tcg_result);
4011 }
4012
4013 /*
4014  * Add/subtract (shifted register)
4015  *
4016  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4017  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4018  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4019  * +--+--+--+-----------+-----+--+-------+---------+------+------+
4020  *
4021  *    sf: 0 -> 32bit, 1 -> 64bit
4022  *    op: 0 -> add  , 1 -> sub
4023  *     S: 1 -> set flags
4024  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4025  *  imm6: Shift amount to apply to Rm before the add/sub
4026  */
4027 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4028 {
4029     int rd = extract32(insn, 0, 5);
4030     int rn = extract32(insn, 5, 5);
4031     int imm6 = extract32(insn, 10, 6);
4032     int rm = extract32(insn, 16, 5);
4033     int shift_type = extract32(insn, 22, 2);
4034     bool setflags = extract32(insn, 29, 1);
4035     bool sub_op = extract32(insn, 30, 1);
4036     bool sf = extract32(insn, 31, 1);
4037
4038     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4039     TCGv_i64 tcg_rn, tcg_rm;
4040     TCGv_i64 tcg_result;
4041
4042     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4043         unallocated_encoding(s);
4044         return;
4045     }
4046
4047     tcg_rn = read_cpu_reg(s, rn, sf);
4048     tcg_rm = read_cpu_reg(s, rm, sf);
4049
4050     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4051
4052     tcg_result = tcg_temp_new_i64();
4053
4054     if (!setflags) {
4055         if (sub_op) {
4056             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4057         } else {
4058             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4059         }
4060     } else {
4061         if (sub_op) {
4062             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4063         } else {
4064             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4065         }
4066     }
4067
4068     if (sf) {
4069         tcg_gen_mov_i64(tcg_rd, tcg_result);
4070     } else {
4071         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4072     }
4073
4074     tcg_temp_free_i64(tcg_result);
4075 }
4076
4077 /* Data-processing (3 source)
4078  *
4079  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4080  *  +--+------+-----------+------+------+----+------+------+------+
4081  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4082  *  +--+------+-----------+------+------+----+------+------+------+
4083  */
4084 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4085 {
4086     int rd = extract32(insn, 0, 5);
4087     int rn = extract32(insn, 5, 5);
4088     int ra = extract32(insn, 10, 5);
4089     int rm = extract32(insn, 16, 5);
4090     int op_id = (extract32(insn, 29, 3) << 4) |
4091         (extract32(insn, 21, 3) << 1) |
4092         extract32(insn, 15, 1);
4093     bool sf = extract32(insn, 31, 1);
4094     bool is_sub = extract32(op_id, 0, 1);
4095     bool is_high = extract32(op_id, 2, 1);
4096     bool is_signed = false;
4097     TCGv_i64 tcg_op1;
4098     TCGv_i64 tcg_op2;
4099     TCGv_i64 tcg_tmp;
4100
4101     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4102     switch (op_id) {
4103     case 0x42: /* SMADDL */
4104     case 0x43: /* SMSUBL */
4105     case 0x44: /* SMULH */
4106         is_signed = true;
4107         break;
4108     case 0x0: /* MADD (32bit) */
4109     case 0x1: /* MSUB (32bit) */
4110     case 0x40: /* MADD (64bit) */
4111     case 0x41: /* MSUB (64bit) */
4112     case 0x4a: /* UMADDL */
4113     case 0x4b: /* UMSUBL */
4114     case 0x4c: /* UMULH */
4115         break;
4116     default:
4117         unallocated_encoding(s);
4118         return;
4119     }
4120
4121     if (is_high) {
4122         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4123         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4124         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4125         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4126
4127         if (is_signed) {
4128             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4129         } else {
4130             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4131         }
4132
4133         tcg_temp_free_i64(low_bits);
4134         return;
4135     }
4136
4137     tcg_op1 = tcg_temp_new_i64();
4138     tcg_op2 = tcg_temp_new_i64();
4139     tcg_tmp = tcg_temp_new_i64();
4140
4141     if (op_id < 0x42) {
4142         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4143         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4144     } else {
4145         if (is_signed) {
4146             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4147             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4148         } else {
4149             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4150             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4151         }
4152     }
4153
4154     if (ra == 31 && !is_sub) {
4155         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4156         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4157     } else {
4158         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4159         if (is_sub) {
4160             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4161         } else {
4162             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4163         }
4164     }
4165
4166     if (!sf) {
4167         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4168     }
4169
4170     tcg_temp_free_i64(tcg_op1);
4171     tcg_temp_free_i64(tcg_op2);
4172     tcg_temp_free_i64(tcg_tmp);
4173 }
4174
4175 /* Add/subtract (with carry)
4176  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
4177  * +--+--+--+------------------------+------+---------+------+-----+
4178  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
4179  * +--+--+--+------------------------+------+---------+------+-----+
4180  *                                            [000000]
4181  */
4182
4183 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4184 {
4185     unsigned int sf, op, setflags, rm, rn, rd;
4186     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4187
4188     if (extract32(insn, 10, 6) != 0) {
4189         unallocated_encoding(s);
4190         return;
4191     }
4192
4193     sf = extract32(insn, 31, 1);
4194     op = extract32(insn, 30, 1);
4195     setflags = extract32(insn, 29, 1);
4196     rm = extract32(insn, 16, 5);
4197     rn = extract32(insn, 5, 5);
4198     rd = extract32(insn, 0, 5);
4199
4200     tcg_rd = cpu_reg(s, rd);
4201     tcg_rn = cpu_reg(s, rn);
4202
4203     if (op) {
4204         tcg_y = new_tmp_a64(s);
4205         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4206     } else {
4207         tcg_y = cpu_reg(s, rm);
4208     }
4209
4210     if (setflags) {
4211         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4212     } else {
4213         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4214     }
4215 }
4216
4217 /* Conditional compare (immediate / register)
4218  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4219  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4220  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4221  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4222  *        [1]                             y                [0]       [0]
4223  */
4224 static void disas_cc(DisasContext *s, uint32_t insn)
4225 {
4226     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4227     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4228     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4229     DisasCompare c;
4230
4231     if (!extract32(insn, 29, 1)) {
4232         unallocated_encoding(s);
4233         return;
4234     }
4235     if (insn & (1 << 10 | 1 << 4)) {
4236         unallocated_encoding(s);
4237         return;
4238     }
4239     sf = extract32(insn, 31, 1);
4240     op = extract32(insn, 30, 1);
4241     is_imm = extract32(insn, 11, 1);
4242     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4243     cond = extract32(insn, 12, 4);
4244     rn = extract32(insn, 5, 5);
4245     nzcv = extract32(insn, 0, 4);
4246
4247     /* Set T0 = !COND.  */
4248     tcg_t0 = tcg_temp_new_i32();
4249     arm_test_cc(&c, cond);
4250     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4251     arm_free_cc(&c);
4252
4253     /* Load the arguments for the new comparison.  */
4254     if (is_imm) {
4255         tcg_y = new_tmp_a64(s);
4256         tcg_gen_movi_i64(tcg_y, y);
4257     } else {
4258         tcg_y = cpu_reg(s, y);
4259     }
4260     tcg_rn = cpu_reg(s, rn);
4261
4262     /* Set the flags for the new comparison.  */
4263     tcg_tmp = tcg_temp_new_i64();
4264     if (op) {
4265         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4266     } else {
4267         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4268     }
4269     tcg_temp_free_i64(tcg_tmp);
4270
4271     /* If COND was false, force the flags to #nzcv.  Compute two masks
4272      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4273      * For tcg hosts that support ANDC, we can make do with just T1.
4274      * In either case, allow the tcg optimizer to delete any unused mask.
4275      */
4276     tcg_t1 = tcg_temp_new_i32();
4277     tcg_t2 = tcg_temp_new_i32();
4278     tcg_gen_neg_i32(tcg_t1, tcg_t0);
4279     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4280
4281     if (nzcv & 8) { /* N */
4282         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4283     } else {
4284         if (TCG_TARGET_HAS_andc_i32) {
4285             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4286         } else {
4287             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4288         }
4289     }
4290     if (nzcv & 4) { /* Z */
4291         if (TCG_TARGET_HAS_andc_i32) {
4292             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4293         } else {
4294             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4295         }
4296     } else {
4297         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4298     }
4299     if (nzcv & 2) { /* C */
4300         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4301     } else {
4302         if (TCG_TARGET_HAS_andc_i32) {
4303             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4304         } else {
4305             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4306         }
4307     }
4308     if (nzcv & 1) { /* V */
4309         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4310     } else {
4311         if (TCG_TARGET_HAS_andc_i32) {
4312             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4313         } else {
4314             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4315         }
4316     }
4317     tcg_temp_free_i32(tcg_t0);
4318     tcg_temp_free_i32(tcg_t1);
4319     tcg_temp_free_i32(tcg_t2);
4320 }
4321
4322 /* Conditional select
4323  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4324  * +----+----+---+-----------------+------+------+-----+------+------+
4325  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4326  * +----+----+---+-----------------+------+------+-----+------+------+
4327  */
4328 static void disas_cond_select(DisasContext *s, uint32_t insn)
4329 {
4330     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4331     TCGv_i64 tcg_rd, zero;
4332     DisasCompare64 c;
4333
4334     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4335         /* S == 1 or op2<1> == 1 */
4336         unallocated_encoding(s);
4337         return;
4338     }
4339     sf = extract32(insn, 31, 1);
4340     else_inv = extract32(insn, 30, 1);
4341     rm = extract32(insn, 16, 5);
4342     cond = extract32(insn, 12, 4);
4343     else_inc = extract32(insn, 10, 1);
4344     rn = extract32(insn, 5, 5);
4345     rd = extract32(insn, 0, 5);
4346
4347     tcg_rd = cpu_reg(s, rd);
4348
4349     a64_test_cc(&c, cond);
4350     zero = tcg_const_i64(0);
4351
4352     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4353         /* CSET & CSETM.  */
4354         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4355         if (else_inv) {
4356             tcg_gen_neg_i64(tcg_rd, tcg_rd);
4357         }
4358     } else {
4359         TCGv_i64 t_true = cpu_reg(s, rn);
4360         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4361         if (else_inv && else_inc) {
4362             tcg_gen_neg_i64(t_false, t_false);
4363         } else if (else_inv) {
4364             tcg_gen_not_i64(t_false, t_false);
4365         } else if (else_inc) {
4366             tcg_gen_addi_i64(t_false, t_false, 1);
4367         }
4368         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4369     }
4370
4371     tcg_temp_free_i64(zero);
4372     a64_free_cc(&c);
4373
4374     if (!sf) {
4375         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4376     }
4377 }
4378
4379 static void handle_clz(DisasContext *s, unsigned int sf,
4380                        unsigned int rn, unsigned int rd)
4381 {
4382     TCGv_i64 tcg_rd, tcg_rn;
4383     tcg_rd = cpu_reg(s, rd);
4384     tcg_rn = cpu_reg(s, rn);
4385
4386     if (sf) {
4387         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4388     } else {
4389         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4390         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4391         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4392         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4393         tcg_temp_free_i32(tcg_tmp32);
4394     }
4395 }
4396
4397 static void handle_cls(DisasContext *s, unsigned int sf,
4398                        unsigned int rn, unsigned int rd)
4399 {
4400     TCGv_i64 tcg_rd, tcg_rn;
4401     tcg_rd = cpu_reg(s, rd);
4402     tcg_rn = cpu_reg(s, rn);
4403
4404     if (sf) {
4405         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4406     } else {
4407         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4408         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4409         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4410         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4411         tcg_temp_free_i32(tcg_tmp32);
4412     }
4413 }
4414
4415 static void handle_rbit(DisasContext *s, unsigned int sf,
4416                         unsigned int rn, unsigned int rd)
4417 {
4418     TCGv_i64 tcg_rd, tcg_rn;
4419     tcg_rd = cpu_reg(s, rd);
4420     tcg_rn = cpu_reg(s, rn);
4421
4422     if (sf) {
4423         gen_helper_rbit64(tcg_rd, tcg_rn);
4424     } else {
4425         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4426         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4427         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4428         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4429         tcg_temp_free_i32(tcg_tmp32);
4430     }
4431 }
4432
4433 /* REV with sf==1, opcode==3 ("REV64") */
4434 static void handle_rev64(DisasContext *s, unsigned int sf,
4435                          unsigned int rn, unsigned int rd)
4436 {
4437     if (!sf) {
4438         unallocated_encoding(s);
4439         return;
4440     }
4441     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4442 }
4443
4444 /* REV with sf==0, opcode==2
4445  * REV32 (sf==1, opcode==2)
4446  */
4447 static void handle_rev32(DisasContext *s, unsigned int sf,
4448                          unsigned int rn, unsigned int rd)
4449 {
4450     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4451
4452     if (sf) {
4453         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4454         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4455
4456         /* bswap32_i64 requires zero high word */
4457         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4458         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4459         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4460         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4461         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4462
4463         tcg_temp_free_i64(tcg_tmp);
4464     } else {
4465         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4466         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4467     }
4468 }
4469
4470 /* REV16 (opcode==1) */
4471 static void handle_rev16(DisasContext *s, unsigned int sf,
4472                          unsigned int rn, unsigned int rd)
4473 {
4474     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4475     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4476     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4477     TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4478
4479     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4480     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4481     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4482     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4483     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4484
4485     tcg_temp_free_i64(mask);
4486     tcg_temp_free_i64(tcg_tmp);
4487 }
4488
4489 /* Data-processing (1 source)
4490  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4491  * +----+---+---+-----------------+---------+--------+------+------+
4492  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4493  * +----+---+---+-----------------+---------+--------+------+------+
4494  */
4495 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4496 {
4497     unsigned int sf, opcode, rn, rd;
4498
4499     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4500         unallocated_encoding(s);
4501         return;
4502     }
4503
4504     sf = extract32(insn, 31, 1);
4505     opcode = extract32(insn, 10, 6);
4506     rn = extract32(insn, 5, 5);
4507     rd = extract32(insn, 0, 5);
4508
4509     switch (opcode) {
4510     case 0: /* RBIT */
4511         handle_rbit(s, sf, rn, rd);
4512         break;
4513     case 1: /* REV16 */
4514         handle_rev16(s, sf, rn, rd);
4515         break;
4516     case 2: /* REV32 */
4517         handle_rev32(s, sf, rn, rd);
4518         break;
4519     case 3: /* REV64 */
4520         handle_rev64(s, sf, rn, rd);
4521         break;
4522     case 4: /* CLZ */
4523         handle_clz(s, sf, rn, rd);
4524         break;
4525     case 5: /* CLS */
4526         handle_cls(s, sf, rn, rd);
4527         break;
4528     }
4529 }
4530
4531 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4532                        unsigned int rm, unsigned int rn, unsigned int rd)
4533 {
4534     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4535     tcg_rd = cpu_reg(s, rd);
4536
4537     if (!sf && is_signed) {
4538         tcg_n = new_tmp_a64(s);
4539         tcg_m = new_tmp_a64(s);
4540         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4541         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4542     } else {
4543         tcg_n = read_cpu_reg(s, rn, sf);
4544         tcg_m = read_cpu_reg(s, rm, sf);
4545     }
4546
4547     if (is_signed) {
4548         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4549     } else {
4550         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4551     }
4552
4553     if (!sf) { /* zero extend final result */
4554         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4555     }
4556 }
4557
4558 /* LSLV, LSRV, ASRV, RORV */
4559 static void handle_shift_reg(DisasContext *s,
4560                              enum a64_shift_type shift_type, unsigned int sf,
4561                              unsigned int rm, unsigned int rn, unsigned int rd)
4562 {
4563     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4564     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4565     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4566
4567     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4568     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4569     tcg_temp_free_i64(tcg_shift);
4570 }
4571
4572 /* CRC32[BHWX], CRC32C[BHWX] */
4573 static void handle_crc32(DisasContext *s,
4574                          unsigned int sf, unsigned int sz, bool crc32c,
4575                          unsigned int rm, unsigned int rn, unsigned int rd)
4576 {
4577     TCGv_i64 tcg_acc, tcg_val;
4578     TCGv_i32 tcg_bytes;
4579
4580     if (!dc_isar_feature(aa64_crc32, s)
4581         || (sf == 1 && sz != 3)
4582         || (sf == 0 && sz == 3)) {
4583         unallocated_encoding(s);
4584         return;
4585     }
4586
4587     if (sz == 3) {
4588         tcg_val = cpu_reg(s, rm);
4589     } else {
4590         uint64_t mask;
4591         switch (sz) {
4592         case 0:
4593             mask = 0xFF;
4594             break;
4595         case 1:
4596             mask = 0xFFFF;
4597             break;
4598         case 2:
4599             mask = 0xFFFFFFFF;
4600             break;
4601         default:
4602             g_assert_not_reached();
4603         }
4604         tcg_val = new_tmp_a64(s);
4605         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4606     }
4607
4608     tcg_acc = cpu_reg(s, rn);
4609     tcg_bytes = tcg_const_i32(1 << sz);
4610
4611     if (crc32c) {
4612         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4613     } else {
4614         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4615     }
4616
4617     tcg_temp_free_i32(tcg_bytes);
4618 }
4619
4620 /* Data-processing (2 source)
4621  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4622  * +----+---+---+-----------------+------+--------+------+------+
4623  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4624  * +----+---+---+-----------------+------+--------+------+------+
4625  */
4626 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4627 {
4628     unsigned int sf, rm, opcode, rn, rd;
4629     sf = extract32(insn, 31, 1);
4630     rm = extract32(insn, 16, 5);
4631     opcode = extract32(insn, 10, 6);
4632     rn = extract32(insn, 5, 5);
4633     rd = extract32(insn, 0, 5);
4634
4635     if (extract32(insn, 29, 1)) {
4636         unallocated_encoding(s);
4637         return;
4638     }
4639
4640     switch (opcode) {
4641     case 2: /* UDIV */
4642         handle_div(s, false, sf, rm, rn, rd);
4643         break;
4644     case 3: /* SDIV */
4645         handle_div(s, true, sf, rm, rn, rd);
4646         break;
4647     case 8: /* LSLV */
4648         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4649         break;
4650     case 9: /* LSRV */
4651         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4652         break;
4653     case 10: /* ASRV */
4654         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4655         break;
4656     case 11: /* RORV */
4657         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4658         break;
4659     case 16:
4660     case 17:
4661     case 18:
4662     case 19:
4663     case 20:
4664     case 21:
4665     case 22:
4666     case 23: /* CRC32 */
4667     {
4668         int sz = extract32(opcode, 0, 2);
4669         bool crc32c = extract32(opcode, 2, 1);
4670         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4671         break;
4672     }
4673     default:
4674         unallocated_encoding(s);
4675         break;
4676     }
4677 }
4678
4679 /* Data processing - register */
4680 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4681 {
4682     switch (extract32(insn, 24, 5)) {
4683     case 0x0a: /* Logical (shifted register) */
4684         disas_logic_reg(s, insn);
4685         break;
4686     case 0x0b: /* Add/subtract */
4687         if (insn & (1 << 21)) { /* (extended register) */
4688             disas_add_sub_ext_reg(s, insn);
4689         } else {
4690             disas_add_sub_reg(s, insn);
4691         }
4692         break;
4693     case 0x1b: /* Data-processing (3 source) */
4694         disas_data_proc_3src(s, insn);
4695         break;
4696     case 0x1a:
4697         switch (extract32(insn, 21, 3)) {
4698         case 0x0: /* Add/subtract (with carry) */
4699             disas_adc_sbc(s, insn);
4700             break;
4701         case 0x2: /* Conditional compare */
4702             disas_cc(s, insn); /* both imm and reg forms */
4703             break;
4704         case 0x4: /* Conditional select */
4705             disas_cond_select(s, insn);
4706             break;
4707         case 0x6: /* Data-processing */
4708             if (insn & (1 << 30)) { /* (1 source) */
4709                 disas_data_proc_1src(s, insn);
4710             } else {            /* (2 source) */
4711                 disas_data_proc_2src(s, insn);
4712             }
4713             break;
4714         default:
4715             unallocated_encoding(s);
4716             break;
4717         }
4718         break;
4719     default:
4720         unallocated_encoding(s);
4721         break;
4722     }
4723 }
4724
4725 static void handle_fp_compare(DisasContext *s, int size,
4726                               unsigned int rn, unsigned int rm,
4727                               bool cmp_with_zero, bool signal_all_nans)
4728 {
4729     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4730     TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
4731
4732     if (size == MO_64) {
4733         TCGv_i64 tcg_vn, tcg_vm;
4734
4735         tcg_vn = read_fp_dreg(s, rn);
4736         if (cmp_with_zero) {
4737             tcg_vm = tcg_const_i64(0);
4738         } else {
4739             tcg_vm = read_fp_dreg(s, rm);
4740         }
4741         if (signal_all_nans) {
4742             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4743         } else {
4744             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4745         }
4746         tcg_temp_free_i64(tcg_vn);
4747         tcg_temp_free_i64(tcg_vm);
4748     } else {
4749         TCGv_i32 tcg_vn = tcg_temp_new_i32();
4750         TCGv_i32 tcg_vm = tcg_temp_new_i32();
4751
4752         read_vec_element_i32(s, tcg_vn, rn, 0, size);
4753         if (cmp_with_zero) {
4754             tcg_gen_movi_i32(tcg_vm, 0);
4755         } else {
4756             read_vec_element_i32(s, tcg_vm, rm, 0, size);
4757         }
4758
4759         switch (size) {
4760         case MO_32:
4761             if (signal_all_nans) {
4762                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4763             } else {
4764                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4765             }
4766             break;
4767         case MO_16:
4768             if (signal_all_nans) {
4769                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4770             } else {
4771                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4772             }
4773             break;
4774         default:
4775             g_assert_not_reached();
4776         }
4777
4778         tcg_temp_free_i32(tcg_vn);
4779         tcg_temp_free_i32(tcg_vm);
4780     }
4781
4782     tcg_temp_free_ptr(fpst);
4783
4784     gen_set_nzcv(tcg_flags);
4785
4786     tcg_temp_free_i64(tcg_flags);
4787 }
4788
4789 /* Floating point compare
4790  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4791  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4792  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4793  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4794  */
4795 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4796 {
4797     unsigned int mos, type, rm, op, rn, opc, op2r;
4798     int size;
4799
4800     mos = extract32(insn, 29, 3);
4801     type = extract32(insn, 22, 2);
4802     rm = extract32(insn, 16, 5);
4803     op = extract32(insn, 14, 2);
4804     rn = extract32(insn, 5, 5);
4805     opc = extract32(insn, 3, 2);
4806     op2r = extract32(insn, 0, 3);
4807
4808     if (mos || op || op2r) {
4809         unallocated_encoding(s);
4810         return;
4811     }
4812
4813     switch (type) {
4814     case 0:
4815         size = MO_32;
4816         break;
4817     case 1:
4818         size = MO_64;
4819         break;
4820     case 3:
4821         size = MO_16;
4822         if (dc_isar_feature(aa64_fp16, s)) {
4823             break;
4824         }
4825         /* fallthru */
4826     default:
4827         unallocated_encoding(s);
4828         return;
4829     }
4830
4831     if (!fp_access_check(s)) {
4832         return;
4833     }
4834
4835     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
4836 }
4837
4838 /* Floating point conditional compare
4839  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4840  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4841  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4842  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4843  */
4844 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4845 {
4846     unsigned int mos, type, rm, cond, rn, op, nzcv;
4847     TCGv_i64 tcg_flags;
4848     TCGLabel *label_continue = NULL;
4849     int size;
4850
4851     mos = extract32(insn, 29, 3);
4852     type = extract32(insn, 22, 2);
4853     rm = extract32(insn, 16, 5);
4854     cond = extract32(insn, 12, 4);
4855     rn = extract32(insn, 5, 5);
4856     op = extract32(insn, 4, 1);
4857     nzcv = extract32(insn, 0, 4);
4858
4859     if (mos) {
4860         unallocated_encoding(s);
4861         return;
4862     }
4863
4864     switch (type) {
4865     case 0:
4866         size = MO_32;
4867         break;
4868     case 1:
4869         size = MO_64;
4870         break;
4871     case 3:
4872         size = MO_16;
4873         if (dc_isar_feature(aa64_fp16, s)) {
4874             break;
4875         }
4876         /* fallthru */
4877     default:
4878         unallocated_encoding(s);
4879         return;
4880     }
4881
4882     if (!fp_access_check(s)) {
4883         return;
4884     }
4885
4886     if (cond < 0x0e) { /* not always */
4887         TCGLabel *label_match = gen_new_label();
4888         label_continue = gen_new_label();
4889         arm_gen_test_cc(cond, label_match);
4890         /* nomatch: */
4891         tcg_flags = tcg_const_i64(nzcv << 28);
4892         gen_set_nzcv(tcg_flags);
4893         tcg_temp_free_i64(tcg_flags);
4894         tcg_gen_br(label_continue);
4895         gen_set_label(label_match);
4896     }
4897
4898     handle_fp_compare(s, size, rn, rm, false, op);
4899
4900     if (cond < 0x0e) {
4901         gen_set_label(label_continue);
4902     }
4903 }
4904
4905 /* Floating point conditional select
4906  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4907  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4908  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4909  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4910  */
4911 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4912 {
4913     unsigned int mos, type, rm, cond, rn, rd;
4914     TCGv_i64 t_true, t_false, t_zero;
4915     DisasCompare64 c;
4916     TCGMemOp sz;
4917
4918     mos = extract32(insn, 29, 3);
4919     type = extract32(insn, 22, 2);
4920     rm = extract32(insn, 16, 5);
4921     cond = extract32(insn, 12, 4);
4922     rn = extract32(insn, 5, 5);
4923     rd = extract32(insn, 0, 5);
4924
4925     if (mos) {
4926         unallocated_encoding(s);
4927         return;
4928     }
4929
4930     switch (type) {
4931     case 0:
4932         sz = MO_32;
4933         break;
4934     case 1:
4935         sz = MO_64;
4936         break;
4937     case 3:
4938         sz = MO_16;
4939         if (dc_isar_feature(aa64_fp16, s)) {
4940             break;
4941         }
4942         /* fallthru */
4943     default:
4944         unallocated_encoding(s);
4945         return;
4946     }
4947
4948     if (!fp_access_check(s)) {
4949         return;
4950     }
4951
4952     /* Zero extend sreg & hreg inputs to 64 bits now.  */
4953     t_true = tcg_temp_new_i64();
4954     t_false = tcg_temp_new_i64();
4955     read_vec_element(s, t_true, rn, 0, sz);
4956     read_vec_element(s, t_false, rm, 0, sz);
4957
4958     a64_test_cc(&c, cond);
4959     t_zero = tcg_const_i64(0);
4960     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4961     tcg_temp_free_i64(t_zero);
4962     tcg_temp_free_i64(t_false);
4963     a64_free_cc(&c);
4964
4965     /* Note that sregs & hregs write back zeros to the high bits,
4966        and we've already done the zero-extension.  */
4967     write_fp_dreg(s, rd, t_true);
4968     tcg_temp_free_i64(t_true);
4969 }
4970
4971 /* Floating-point data-processing (1 source) - half precision */
4972 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
4973 {
4974     TCGv_ptr fpst = NULL;
4975     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
4976     TCGv_i32 tcg_res = tcg_temp_new_i32();
4977
4978     switch (opcode) {
4979     case 0x0: /* FMOV */
4980         tcg_gen_mov_i32(tcg_res, tcg_op);
4981         break;
4982     case 0x1: /* FABS */
4983         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
4984         break;
4985     case 0x2: /* FNEG */
4986         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
4987         break;
4988     case 0x3: /* FSQRT */
4989         fpst = get_fpstatus_ptr(true);
4990         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
4991         break;
4992     case 0x8: /* FRINTN */
4993     case 0x9: /* FRINTP */
4994     case 0xa: /* FRINTM */
4995     case 0xb: /* FRINTZ */
4996     case 0xc: /* FRINTA */
4997     {
4998         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4999         fpst = get_fpstatus_ptr(true);
5000
5001         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5002         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5003
5004         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5005         tcg_temp_free_i32(tcg_rmode);
5006         break;
5007     }
5008     case 0xe: /* FRINTX */
5009         fpst = get_fpstatus_ptr(true);
5010         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5011         break;
5012     case 0xf: /* FRINTI */
5013         fpst = get_fpstatus_ptr(true);
5014         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5015         break;
5016     default:
5017         abort();
5018     }
5019
5020     write_fp_sreg(s, rd, tcg_res);
5021
5022     if (fpst) {
5023         tcg_temp_free_ptr(fpst);
5024     }
5025     tcg_temp_free_i32(tcg_op);
5026     tcg_temp_free_i32(tcg_res);
5027 }
5028
5029 /* Floating-point data-processing (1 source) - single precision */
5030 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5031 {
5032     TCGv_ptr fpst;
5033     TCGv_i32 tcg_op;
5034     TCGv_i32 tcg_res;
5035
5036     fpst = get_fpstatus_ptr(false);
5037     tcg_op = read_fp_sreg(s, rn);
5038     tcg_res = tcg_temp_new_i32();
5039
5040     switch (opcode) {
5041     case 0x0: /* FMOV */
5042         tcg_gen_mov_i32(tcg_res, tcg_op);
5043         break;
5044     case 0x1: /* FABS */
5045         gen_helper_vfp_abss(tcg_res, tcg_op);
5046         break;
5047     case 0x2: /* FNEG */
5048         gen_helper_vfp_negs(tcg_res, tcg_op);
5049         break;
5050     case 0x3: /* FSQRT */
5051         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
5052         break;
5053     case 0x8: /* FRINTN */
5054     case 0x9: /* FRINTP */
5055     case 0xa: /* FRINTM */
5056     case 0xb: /* FRINTZ */
5057     case 0xc: /* FRINTA */
5058     {
5059         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5060
5061         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5062         gen_helper_rints(tcg_res, tcg_op, fpst);
5063
5064         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5065         tcg_temp_free_i32(tcg_rmode);
5066         break;
5067     }
5068     case 0xe: /* FRINTX */
5069         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
5070         break;
5071     case 0xf: /* FRINTI */
5072         gen_helper_rints(tcg_res, tcg_op, fpst);
5073         break;
5074     default:
5075         abort();
5076     }
5077
5078     write_fp_sreg(s, rd, tcg_res);
5079
5080     tcg_temp_free_ptr(fpst);
5081     tcg_temp_free_i32(tcg_op);
5082     tcg_temp_free_i32(tcg_res);
5083 }
5084
5085 /* Floating-point data-processing (1 source) - double precision */
5086 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5087 {
5088     TCGv_ptr fpst;
5089     TCGv_i64 tcg_op;
5090     TCGv_i64 tcg_res;
5091
5092     switch (opcode) {
5093     case 0x0: /* FMOV */
5094         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5095         return;
5096     }
5097
5098     fpst = get_fpstatus_ptr(false);
5099     tcg_op = read_fp_dreg(s, rn);
5100     tcg_res = tcg_temp_new_i64();
5101
5102     switch (opcode) {
5103     case 0x1: /* FABS */
5104         gen_helper_vfp_absd(tcg_res, tcg_op);
5105         break;
5106     case 0x2: /* FNEG */
5107         gen_helper_vfp_negd(tcg_res, tcg_op);
5108         break;
5109     case 0x3: /* FSQRT */
5110         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5111         break;
5112     case 0x8: /* FRINTN */
5113     case 0x9: /* FRINTP */
5114     case 0xa: /* FRINTM */
5115     case 0xb: /* FRINTZ */
5116     case 0xc: /* FRINTA */
5117     {
5118         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5119
5120         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5121         gen_helper_rintd(tcg_res, tcg_op, fpst);
5122
5123         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5124         tcg_temp_free_i32(tcg_rmode);
5125         break;
5126     }
5127     case 0xe: /* FRINTX */
5128         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
5129         break;
5130     case 0xf: /* FRINTI */
5131         gen_helper_rintd(tcg_res, tcg_op, fpst);
5132         break;
5133     default:
5134         abort();
5135     }
5136
5137     write_fp_dreg(s, rd, tcg_res);
5138
5139     tcg_temp_free_ptr(fpst);
5140     tcg_temp_free_i64(tcg_op);
5141     tcg_temp_free_i64(tcg_res);
5142 }
5143
5144 static void handle_fp_fcvt(DisasContext *s, int opcode,
5145                            int rd, int rn, int dtype, int ntype)
5146 {
5147     switch (ntype) {
5148     case 0x0:
5149     {
5150         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5151         if (dtype == 1) {
5152             /* Single to double */
5153             TCGv_i64 tcg_rd = tcg_temp_new_i64();
5154             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5155             write_fp_dreg(s, rd, tcg_rd);
5156             tcg_temp_free_i64(tcg_rd);
5157         } else {
5158             /* Single to half */
5159             TCGv_i32 tcg_rd = tcg_temp_new_i32();
5160             TCGv_i32 ahp = get_ahp_flag();
5161             TCGv_ptr fpst = get_fpstatus_ptr(false);
5162
5163             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5164             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5165             write_fp_sreg(s, rd, tcg_rd);
5166             tcg_temp_free_i32(tcg_rd);
5167             tcg_temp_free_i32(ahp);
5168             tcg_temp_free_ptr(fpst);
5169         }
5170         tcg_temp_free_i32(tcg_rn);
5171         break;
5172     }
5173     case 0x1:
5174     {
5175         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5176         TCGv_i32 tcg_rd = tcg_temp_new_i32();
5177         if (dtype == 0) {
5178             /* Double to single */
5179             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5180         } else {
5181             TCGv_ptr fpst = get_fpstatus_ptr(false);
5182             TCGv_i32 ahp = get_ahp_flag();
5183             /* Double to half */
5184             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5185             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5186             tcg_temp_free_ptr(fpst);
5187             tcg_temp_free_i32(ahp);
5188         }
5189         write_fp_sreg(s, rd, tcg_rd);
5190         tcg_temp_free_i32(tcg_rd);
5191         tcg_temp_free_i64(tcg_rn);
5192         break;
5193     }
5194     case 0x3:
5195     {
5196         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5197         TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
5198         TCGv_i32 tcg_ahp = get_ahp_flag();
5199         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
5200         if (dtype == 0) {
5201             /* Half to single */
5202             TCGv_i32 tcg_rd = tcg_temp_new_i32();
5203             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5204             write_fp_sreg(s, rd, tcg_rd);
5205             tcg_temp_free_ptr(tcg_fpst);
5206             tcg_temp_free_i32(tcg_ahp);
5207             tcg_temp_free_i32(tcg_rd);
5208         } else {
5209             /* Half to double */
5210             TCGv_i64 tcg_rd = tcg_temp_new_i64();
5211             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5212             write_fp_dreg(s, rd, tcg_rd);
5213             tcg_temp_free_i64(tcg_rd);
5214         }
5215         tcg_temp_free_i32(tcg_rn);
5216         break;
5217     }
5218     default:
5219         abort();
5220     }
5221 }
5222
5223 /* Floating point data-processing (1 source)
5224  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
5225  * +---+---+---+-----------+------+---+--------+-----------+------+------+
5226  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
5227  * +---+---+---+-----------+------+---+--------+-----------+------+------+
5228  */
5229 static void disas_fp_1src(DisasContext *s, uint32_t insn)
5230 {
5231     int type = extract32(insn, 22, 2);
5232     int opcode = extract32(insn, 15, 6);
5233     int rn = extract32(insn, 5, 5);
5234     int rd = extract32(insn, 0, 5);
5235
5236     switch (opcode) {
5237     case 0x4: case 0x5: case 0x7:
5238     {
5239         /* FCVT between half, single and double precision */
5240         int dtype = extract32(opcode, 0, 2);
5241         if (type == 2 || dtype == type) {
5242             unallocated_encoding(s);
5243             return;
5244         }
5245         if (!fp_access_check(s)) {
5246             return;
5247         }
5248
5249         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
5250         break;
5251     }
5252     case 0x0 ... 0x3:
5253     case 0x8 ... 0xc:
5254     case 0xe ... 0xf:
5255         /* 32-to-32 and 64-to-64 ops */
5256         switch (type) {
5257         case 0:
5258             if (!fp_access_check(s)) {
5259                 return;
5260             }
5261
5262             handle_fp_1src_single(s, opcode, rd, rn);
5263             break;
5264         case 1:
5265             if (!fp_access_check(s)) {
5266                 return;
5267             }
5268
5269             handle_fp_1src_double(s, opcode, rd, rn);
5270             break;
5271         case 3:
5272             if (!dc_isar_feature(aa64_fp16, s)) {
5273                 unallocated_encoding(s);
5274                 return;
5275             }
5276
5277             if (!fp_access_check(s)) {
5278                 return;
5279             }
5280
5281             handle_fp_1src_half(s, opcode, rd, rn);
5282             break;
5283         default:
5284             unallocated_encoding(s);
5285         }
5286         break;
5287     default:
5288         unallocated_encoding(s);
5289         break;
5290     }
5291 }
5292
5293 /* Floating-point data-processing (2 source) - single precision */
5294 static void handle_fp_2src_single(DisasContext *s, int opcode,
5295                                   int rd, int rn, int rm)
5296 {
5297     TCGv_i32 tcg_op1;
5298     TCGv_i32 tcg_op2;
5299     TCGv_i32 tcg_res;
5300     TCGv_ptr fpst;
5301
5302     tcg_res = tcg_temp_new_i32();
5303     fpst = get_fpstatus_ptr(false);
5304     tcg_op1 = read_fp_sreg(s, rn);
5305     tcg_op2 = read_fp_sreg(s, rm);
5306
5307     switch (opcode) {
5308     case 0x0: /* FMUL */
5309         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5310         break;
5311     case 0x1: /* FDIV */
5312         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
5313         break;
5314     case 0x2: /* FADD */
5315         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5316         break;
5317     case 0x3: /* FSUB */
5318         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5319         break;
5320     case 0x4: /* FMAX */
5321         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5322         break;
5323     case 0x5: /* FMIN */
5324         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5325         break;
5326     case 0x6: /* FMAXNM */
5327         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5328         break;
5329     case 0x7: /* FMINNM */
5330         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5331         break;
5332     case 0x8: /* FNMUL */
5333         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5334         gen_helper_vfp_negs(tcg_res, tcg_res);
5335         break;
5336     }
5337
5338     write_fp_sreg(s, rd, tcg_res);
5339
5340     tcg_temp_free_ptr(fpst);
5341     tcg_temp_free_i32(tcg_op1);
5342     tcg_temp_free_i32(tcg_op2);
5343     tcg_temp_free_i32(tcg_res);
5344 }
5345
5346 /* Floating-point data-processing (2 source) - double precision */
5347 static void handle_fp_2src_double(DisasContext *s, int opcode,
5348                                   int rd, int rn, int rm)
5349 {
5350     TCGv_i64 tcg_op1;
5351     TCGv_i64 tcg_op2;
5352     TCGv_i64 tcg_res;
5353     TCGv_ptr fpst;
5354
5355     tcg_res = tcg_temp_new_i64();
5356     fpst = get_fpstatus_ptr(false);
5357     tcg_op1 = read_fp_dreg(s, rn);
5358     tcg_op2 = read_fp_dreg(s, rm);
5359
5360     switch (opcode) {
5361     case 0x0: /* FMUL */
5362         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5363         break;
5364     case 0x1: /* FDIV */
5365         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5366         break;
5367     case 0x2: /* FADD */
5368         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5369         break;
5370     case 0x3: /* FSUB */
5371         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5372         break;
5373     case 0x4: /* FMAX */
5374         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5375         break;
5376     case 0x5: /* FMIN */
5377         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5378         break;
5379     case 0x6: /* FMAXNM */
5380         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5381         break;
5382     case 0x7: /* FMINNM */
5383         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5384         break;
5385     case 0x8: /* FNMUL */
5386         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5387         gen_helper_vfp_negd(tcg_res, tcg_res);
5388         break;
5389     }
5390
5391     write_fp_dreg(s, rd, tcg_res);
5392
5393     tcg_temp_free_ptr(fpst);
5394     tcg_temp_free_i64(tcg_op1);
5395     tcg_temp_free_i64(tcg_op2);
5396     tcg_temp_free_i64(tcg_res);
5397 }
5398
5399 /* Floating-point data-processing (2 source) - half precision */
5400 static void handle_fp_2src_half(DisasContext *s, int opcode,
5401                                 int rd, int rn, int rm)
5402 {
5403     TCGv_i32 tcg_op1;
5404     TCGv_i32 tcg_op2;
5405     TCGv_i32 tcg_res;
5406     TCGv_ptr fpst;
5407
5408     tcg_res = tcg_temp_new_i32();
5409     fpst = get_fpstatus_ptr(true);
5410     tcg_op1 = read_fp_hreg(s, rn);
5411     tcg_op2 = read_fp_hreg(s, rm);
5412
5413     switch (opcode) {
5414     case 0x0: /* FMUL */
5415         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
5416         break;
5417     case 0x1: /* FDIV */
5418         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
5419         break;
5420     case 0x2: /* FADD */
5421         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
5422         break;
5423     case 0x3: /* FSUB */
5424         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
5425         break;
5426     case 0x4: /* FMAX */
5427         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
5428         break;
5429     case 0x5: /* FMIN */
5430         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
5431         break;
5432     case 0x6: /* FMAXNM */
5433         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
5434         break;
5435     case 0x7: /* FMINNM */
5436         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
5437         break;
5438     case 0x8: /* FNMUL */
5439         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
5440         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
5441         break;
5442     default:
5443         g_assert_not_reached();
5444     }
5445
5446     write_fp_sreg(s, rd, tcg_res);
5447
5448     tcg_temp_free_ptr(fpst);
5449     tcg_temp_free_i32(tcg_op1);
5450     tcg_temp_free_i32(tcg_op2);
5451     tcg_temp_free_i32(tcg_res);
5452 }
5453
5454 /* Floating point data-processing (2 source)
5455  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
5456  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5457  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
5458  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5459  */
5460 static void disas_fp_2src(DisasContext *s, uint32_t insn)
5461 {
5462     int type = extract32(insn, 22, 2);
5463     int rd = extract32(insn, 0, 5);
5464     int rn = extract32(insn, 5, 5);
5465     int rm = extract32(insn, 16, 5);
5466     int opcode = extract32(insn, 12, 4);
5467
5468     if (opcode > 8) {
5469         unallocated_encoding(s);
5470         return;
5471     }
5472
5473     switch (type) {
5474     case 0:
5475         if (!fp_access_check(s)) {
5476             return;
5477         }
5478         handle_fp_2src_single(s, opcode, rd, rn, rm);
5479         break;
5480     case 1:
5481         if (!fp_access_check(s)) {
5482             return;
5483         }
5484         handle_fp_2src_double(s, opcode, rd, rn, rm);
5485         break;
5486     case 3:
5487         if (!dc_isar_feature(aa64_fp16, s)) {
5488             unallocated_encoding(s);
5489             return;
5490         }
5491         if (!fp_access_check(s)) {
5492             return;
5493         }
5494         handle_fp_2src_half(s, opcode, rd, rn, rm);
5495         break;
5496     default:
5497         unallocated_encoding(s);
5498     }
5499 }
5500
5501 /* Floating-point data-processing (3 source) - single precision */
5502 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
5503                                   int rd, int rn, int rm, int ra)
5504 {
5505     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
5506     TCGv_i32 tcg_res = tcg_temp_new_i32();
5507     TCGv_ptr fpst = get_fpstatus_ptr(false);
5508
5509     tcg_op1 = read_fp_sreg(s, rn);
5510     tcg_op2 = read_fp_sreg(s, rm);
5511     tcg_op3 = read_fp_sreg(s, ra);
5512
5513     /* These are fused multiply-add, and must be done as one
5514      * floating point operation with no rounding between the
5515      * multiplication and addition steps.
5516      * NB that doing the negations here as separate steps is
5517      * correct : an input NaN should come out with its sign bit
5518      * flipped if it is a negated-input.
5519      */
5520     if (o1 == true) {
5521         gen_helper_vfp_negs(tcg_op3, tcg_op3);
5522     }
5523
5524     if (o0 != o1) {
5525         gen_helper_vfp_negs(tcg_op1, tcg_op1);
5526     }
5527
5528     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5529
5530     write_fp_sreg(s, rd, tcg_res);
5531
5532     tcg_temp_free_ptr(fpst);
5533     tcg_temp_free_i32(tcg_op1);
5534     tcg_temp_free_i32(tcg_op2);
5535     tcg_temp_free_i32(tcg_op3);
5536     tcg_temp_free_i32(tcg_res);
5537 }
5538
5539 /* Floating-point data-processing (3 source) - double precision */
5540 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
5541                                   int rd, int rn, int rm, int ra)
5542 {
5543     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
5544     TCGv_i64 tcg_res = tcg_temp_new_i64();
5545     TCGv_ptr fpst = get_fpstatus_ptr(false);
5546
5547     tcg_op1 = read_fp_dreg(s, rn);
5548     tcg_op2 = read_fp_dreg(s, rm);
5549     tcg_op3 = read_fp_dreg(s, ra);
5550
5551     /* These are fused multiply-add, and must be done as one
5552      * floating point operation with no rounding between the
5553      * multiplication and addition steps.
5554      * NB that doing the negations here as separate steps is
5555      * correct : an input NaN should come out with its sign bit
5556      * flipped if it is a negated-input.
5557      */
5558     if (o1 == true) {
5559         gen_helper_vfp_negd(tcg_op3, tcg_op3);
5560     }
5561
5562     if (o0 != o1) {
5563         gen_helper_vfp_negd(tcg_op1, tcg_op1);
5564     }
5565
5566     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5567
5568     write_fp_dreg(s, rd, tcg_res);
5569
5570     tcg_temp_free_ptr(fpst);
5571     tcg_temp_free_i64(tcg_op1);
5572     tcg_temp_free_i64(tcg_op2);
5573     tcg_temp_free_i64(tcg_op3);
5574     tcg_temp_free_i64(tcg_res);
5575 }
5576
5577 /* Floating-point data-processing (3 source) - half precision */
5578 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
5579                                 int rd, int rn, int rm, int ra)
5580 {
5581     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
5582     TCGv_i32 tcg_res = tcg_temp_new_i32();
5583     TCGv_ptr fpst = get_fpstatus_ptr(true);
5584
5585     tcg_op1 = read_fp_hreg(s, rn);
5586     tcg_op2 = read_fp_hreg(s, rm);
5587     tcg_op3 = read_fp_hreg(s, ra);
5588
5589     /* These are fused multiply-add, and must be done as one
5590      * floating point operation with no rounding between the
5591      * multiplication and addition steps.
5592      * NB that doing the negations here as separate steps is
5593      * correct : an input NaN should come out with its sign bit
5594      * flipped if it is a negated-input.
5595      */
5596     if (o1 == true) {
5597         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
5598     }
5599
5600     if (o0 != o1) {
5601         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
5602     }
5603
5604     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5605
5606     write_fp_sreg(s, rd, tcg_res);
5607
5608     tcg_temp_free_ptr(fpst);
5609     tcg_temp_free_i32(tcg_op1);
5610     tcg_temp_free_i32(tcg_op2);
5611     tcg_temp_free_i32(tcg_op3);
5612     tcg_temp_free_i32(tcg_res);
5613 }
5614
5615 /* Floating point data-processing (3 source)
5616  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
5617  * +---+---+---+-----------+------+----+------+----+------+------+------+
5618  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
5619  * +---+---+---+-----------+------+----+------+----+------+------+------+
5620  */
5621 static void disas_fp_3src(DisasContext *s, uint32_t insn)
5622 {
5623     int type = extract32(insn, 22, 2);
5624     int rd = extract32(insn, 0, 5);
5625     int rn = extract32(insn, 5, 5);
5626     int ra = extract32(insn, 10, 5);
5627     int rm = extract32(insn, 16, 5);
5628     bool o0 = extract32(insn, 15, 1);
5629     bool o1 = extract32(insn, 21, 1);
5630
5631     switch (type) {
5632     case 0:
5633         if (!fp_access_check(s)) {
5634             return;
5635         }
5636         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
5637         break;
5638     case 1:
5639         if (!fp_access_check(s)) {
5640             return;
5641         }
5642         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
5643         break;
5644     case 3:
5645         if (!dc_isar_feature(aa64_fp16, s)) {
5646             unallocated_encoding(s);
5647             return;
5648         }
5649         if (!fp_access_check(s)) {
5650             return;
5651         }
5652         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
5653         break;
5654     default:
5655         unallocated_encoding(s);
5656     }
5657 }
5658
5659 /* The imm8 encodes the sign bit, enough bits to represent an exponent in
5660  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
5661  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
5662  */
5663 uint64_t vfp_expand_imm(int size, uint8_t imm8)
5664 {
5665     uint64_t imm;
5666
5667     switch (size) {
5668     case MO_64:
5669         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5670             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5671             extract32(imm8, 0, 6);
5672         imm <<= 48;
5673         break;
5674     case MO_32:
5675         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5676             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5677             (extract32(imm8, 0, 6) << 3);
5678         imm <<= 16;
5679         break;
5680     case MO_16:
5681         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5682             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
5683             (extract32(imm8, 0, 6) << 6);
5684         break;
5685     default:
5686         g_assert_not_reached();
5687     }
5688     return imm;
5689 }
5690
5691 /* Floating point immediate
5692  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
5693  * +---+---+---+-----------+------+---+------------+-------+------+------+
5694  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
5695  * +---+---+---+-----------+------+---+------------+-------+------+------+
5696  */
5697 static void disas_fp_imm(DisasContext *s, uint32_t insn)
5698 {
5699     int rd = extract32(insn, 0, 5);
5700     int imm8 = extract32(insn, 13, 8);
5701     int type = extract32(insn, 22, 2);
5702     uint64_t imm;
5703     TCGv_i64 tcg_res;
5704     TCGMemOp sz;
5705
5706     switch (type) {
5707     case 0:
5708         sz = MO_32;
5709         break;
5710     case 1:
5711         sz = MO_64;
5712         break;
5713     case 3:
5714         sz = MO_16;
5715         if (dc_isar_feature(aa64_fp16, s)) {
5716             break;
5717         }
5718         /* fallthru */
5719     default:
5720         unallocated_encoding(s);
5721         return;
5722     }
5723
5724     if (!fp_access_check(s)) {
5725         return;
5726     }
5727
5728     imm = vfp_expand_imm(sz, imm8);
5729
5730     tcg_res = tcg_const_i64(imm);
5731     write_fp_dreg(s, rd, tcg_res);
5732     tcg_temp_free_i64(tcg_res);
5733 }
5734
5735 /* Handle floating point <=> fixed point conversions. Note that we can
5736  * also deal with fp <=> integer conversions as a special case (scale == 64)
5737  * OPTME: consider handling that special case specially or at least skipping
5738  * the call to scalbn in the helpers for zero shifts.
5739  */
5740 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5741                            bool itof, int rmode, int scale, int sf, int type)
5742 {
5743     bool is_signed = !(opcode & 1);
5744     TCGv_ptr tcg_fpstatus;
5745     TCGv_i32 tcg_shift, tcg_single;
5746     TCGv_i64 tcg_double;
5747
5748     tcg_fpstatus = get_fpstatus_ptr(type == 3);
5749
5750     tcg_shift = tcg_const_i32(64 - scale);
5751
5752     if (itof) {
5753         TCGv_i64 tcg_int = cpu_reg(s, rn);
5754         if (!sf) {
5755             TCGv_i64 tcg_extend = new_tmp_a64(s);
5756
5757             if (is_signed) {
5758                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5759             } else {
5760                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5761             }
5762
5763             tcg_int = tcg_extend;
5764         }
5765
5766         switch (type) {
5767         case 1: /* float64 */
5768             tcg_double = tcg_temp_new_i64();
5769             if (is_signed) {
5770                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
5771                                      tcg_shift, tcg_fpstatus);
5772             } else {
5773                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
5774                                      tcg_shift, tcg_fpstatus);
5775             }
5776             write_fp_dreg(s, rd, tcg_double);
5777             tcg_temp_free_i64(tcg_double);
5778             break;
5779
5780         case 0: /* float32 */
5781             tcg_single = tcg_temp_new_i32();
5782             if (is_signed) {
5783                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
5784                                      tcg_shift, tcg_fpstatus);
5785             } else {
5786                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5787                                      tcg_shift, tcg_fpstatus);
5788             }
5789             write_fp_sreg(s, rd, tcg_single);
5790             tcg_temp_free_i32(tcg_single);
5791             break;
5792
5793         case 3: /* float16 */
5794             tcg_single = tcg_temp_new_i32();
5795             if (is_signed) {
5796                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
5797                                      tcg_shift, tcg_fpstatus);
5798             } else {
5799                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
5800                                      tcg_shift, tcg_fpstatus);
5801             }
5802             write_fp_sreg(s, rd, tcg_single);
5803             tcg_temp_free_i32(tcg_single);
5804             break;
5805
5806         default:
5807             g_assert_not_reached();
5808         }
5809     } else {
5810         TCGv_i64 tcg_int = cpu_reg(s, rd);
5811         TCGv_i32 tcg_rmode;
5812
5813         if (extract32(opcode, 2, 1)) {
5814             /* There are too many rounding modes to all fit into rmode,
5815              * so FCVTA[US] is a special case.
5816              */
5817             rmode = FPROUNDING_TIEAWAY;
5818         }
5819
5820         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5821
5822         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5823
5824         switch (type) {
5825         case 1: /* float64 */
5826             tcg_double = read_fp_dreg(s, rn);
5827             if (is_signed) {
5828                 if (!sf) {
5829                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5830                                          tcg_shift, tcg_fpstatus);
5831                 } else {
5832                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5833                                          tcg_shift, tcg_fpstatus);
5834                 }
5835             } else {
5836                 if (!sf) {
5837                     gen_helper_vfp_tould(tcg_int, tcg_double,
5838                                          tcg_shift, tcg_fpstatus);
5839                 } else {
5840                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5841                                          tcg_shift, tcg_fpstatus);
5842                 }
5843             }
5844             if (!sf) {
5845                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
5846             }
5847             tcg_temp_free_i64(tcg_double);
5848             break;
5849
5850         case 0: /* float32 */
5851             tcg_single = read_fp_sreg(s, rn);
5852             if (sf) {
5853                 if (is_signed) {
5854                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5855                                          tcg_shift, tcg_fpstatus);
5856                 } else {
5857                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5858                                          tcg_shift, tcg_fpstatus);
5859                 }
5860             } else {
5861                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5862                 if (is_signed) {
5863                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5864                                          tcg_shift, tcg_fpstatus);
5865                 } else {
5866                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5867                                          tcg_shift, tcg_fpstatus);
5868                 }
5869                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5870                 tcg_temp_free_i32(tcg_dest);
5871             }
5872             tcg_temp_free_i32(tcg_single);
5873             break;
5874
5875         case 3: /* float16 */
5876             tcg_single = read_fp_sreg(s, rn);
5877             if (sf) {
5878                 if (is_signed) {
5879                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
5880                                          tcg_shift, tcg_fpstatus);
5881                 } else {
5882                     gen_helper_vfp_touqh(tcg_int, tcg_single,
5883                                          tcg_shift, tcg_fpstatus);
5884                 }
5885             } else {
5886                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5887                 if (is_signed) {
5888                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
5889                                          tcg_shift, tcg_fpstatus);
5890                 } else {
5891                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
5892                                          tcg_shift, tcg_fpstatus);
5893                 }
5894                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5895                 tcg_temp_free_i32(tcg_dest);
5896             }
5897             tcg_temp_free_i32(tcg_single);
5898             break;
5899
5900         default:
5901             g_assert_not_reached();
5902         }
5903
5904         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5905         tcg_temp_free_i32(tcg_rmode);
5906     }
5907
5908     tcg_temp_free_ptr(tcg_fpstatus);
5909     tcg_temp_free_i32(tcg_shift);
5910 }
5911
5912 /* Floating point <-> fixed point conversions
5913  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5914  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5915  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5916  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5917  */
5918 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5919 {
5920     int rd = extract32(insn, 0, 5);
5921     int rn = extract32(insn, 5, 5);
5922     int scale = extract32(insn, 10, 6);
5923     int opcode = extract32(insn, 16, 3);
5924     int rmode = extract32(insn, 19, 2);
5925     int type = extract32(insn, 22, 2);
5926     bool sbit = extract32(insn, 29, 1);
5927     bool sf = extract32(insn, 31, 1);
5928     bool itof;
5929
5930     if (sbit || (!sf && scale < 32)) {
5931         unallocated_encoding(s);
5932         return;
5933     }
5934
5935     switch (type) {
5936     case 0: /* float32 */
5937     case 1: /* float64 */
5938         break;
5939     case 3: /* float16 */
5940         if (dc_isar_feature(aa64_fp16, s)) {
5941             break;
5942         }
5943         /* fallthru */
5944     default:
5945         unallocated_encoding(s);
5946         return;
5947     }
5948
5949     switch ((rmode << 3) | opcode) {
5950     case 0x2: /* SCVTF */
5951     case 0x3: /* UCVTF */
5952         itof = true;
5953         break;
5954     case 0x18: /* FCVTZS */
5955     case 0x19: /* FCVTZU */
5956         itof = false;
5957         break;
5958     default:
5959         unallocated_encoding(s);
5960         return;
5961     }
5962
5963     if (!fp_access_check(s)) {
5964         return;
5965     }
5966
5967     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5968 }
5969
5970 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5971 {
5972     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5973      * without conversion.
5974      */
5975
5976     if (itof) {
5977         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5978         TCGv_i64 tmp;
5979
5980         switch (type) {
5981         case 0:
5982             /* 32 bit */
5983             tmp = tcg_temp_new_i64();
5984             tcg_gen_ext32u_i64(tmp, tcg_rn);
5985             write_fp_dreg(s, rd, tmp);
5986             tcg_temp_free_i64(tmp);
5987             break;
5988         case 1:
5989             /* 64 bit */
5990             write_fp_dreg(s, rd, tcg_rn);
5991             break;
5992         case 2:
5993             /* 64 bit to top half. */
5994             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5995             clear_vec_high(s, true, rd);
5996             break;
5997         case 3:
5998             /* 16 bit */
5999             tmp = tcg_temp_new_i64();
6000             tcg_gen_ext16u_i64(tmp, tcg_rn);
6001             write_fp_dreg(s, rd, tmp);
6002             tcg_temp_free_i64(tmp);
6003             break;
6004         default:
6005             g_assert_not_reached();
6006         }
6007     } else {
6008         TCGv_i64 tcg_rd = cpu_reg(s, rd);
6009
6010         switch (type) {
6011         case 0:
6012             /* 32 bit */
6013             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6014             break;
6015         case 1:
6016             /* 64 bit */
6017             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6018             break;
6019         case 2:
6020             /* 64 bits from top half */
6021             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6022             break;
6023         case 3:
6024             /* 16 bit */
6025             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6026             break;
6027         default:
6028             g_assert_not_reached();
6029         }
6030     }
6031 }
6032
6033 /* Floating point <-> integer conversions
6034  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
6035  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6036  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6037  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6038  */
6039 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6040 {
6041     int rd = extract32(insn, 0, 5);
6042     int rn = extract32(insn, 5, 5);
6043     int opcode = extract32(insn, 16, 3);
6044     int rmode = extract32(insn, 19, 2);
6045     int type = extract32(insn, 22, 2);
6046     bool sbit = extract32(insn, 29, 1);
6047     bool sf = extract32(insn, 31, 1);
6048
6049     if (sbit) {
6050         unallocated_encoding(s);
6051         return;
6052     }
6053
6054     if (opcode > 5) {
6055         /* FMOV */
6056         bool itof = opcode & 1;
6057
6058         if (rmode >= 2) {
6059             unallocated_encoding(s);
6060             return;
6061         }
6062
6063         switch (sf << 3 | type << 1 | rmode) {
6064         case 0x0: /* 32 bit */
6065         case 0xa: /* 64 bit */
6066         case 0xd: /* 64 bit to top half of quad */
6067             break;
6068         case 0x6: /* 16-bit float, 32-bit int */
6069         case 0xe: /* 16-bit float, 64-bit int */
6070             if (dc_isar_feature(aa64_fp16, s)) {
6071                 break;
6072             }
6073             /* fallthru */
6074         default:
6075             /* all other sf/type/rmode combinations are invalid */
6076             unallocated_encoding(s);
6077             return;
6078         }
6079
6080         if (!fp_access_check(s)) {
6081             return;
6082         }
6083         handle_fmov(s, rd, rn, type, itof);
6084     } else {
6085         /* actual FP conversions */
6086         bool itof = extract32(opcode, 1, 1);
6087
6088         if (rmode != 0 && opcode > 1) {
6089             unallocated_encoding(s);
6090             return;
6091         }
6092         switch (type) {
6093         case 0: /* float32 */
6094         case 1: /* float64 */
6095             break;
6096         case 3: /* float16 */
6097             if (dc_isar_feature(aa64_fp16, s)) {
6098                 break;
6099             }
6100             /* fallthru */
6101         default:
6102             unallocated_encoding(s);
6103             return;
6104         }
6105
6106         if (!fp_access_check(s)) {
6107             return;
6108         }
6109         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6110     }
6111 }
6112
6113 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6114  *   31  30  29 28     25 24                          0
6115  * +---+---+---+---------+-----------------------------+
6116  * |   | 0 |   | 1 1 1 1 |                             |
6117  * +---+---+---+---------+-----------------------------+
6118  */
6119 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6120 {
6121     if (extract32(insn, 24, 1)) {
6122         /* Floating point data-processing (3 source) */
6123         disas_fp_3src(s, insn);
6124     } else if (extract32(insn, 21, 1) == 0) {
6125         /* Floating point to fixed point conversions */
6126         disas_fp_fixed_conv(s, insn);
6127     } else {
6128         switch (extract32(insn, 10, 2)) {
6129         case 1:
6130             /* Floating point conditional compare */
6131             disas_fp_ccomp(s, insn);
6132             break;
6133         case 2:
6134             /* Floating point data-processing (2 source) */
6135             disas_fp_2src(s, insn);
6136             break;
6137         case 3:
6138             /* Floating point conditional select */
6139             disas_fp_csel(s, insn);
6140             break;
6141         case 0:
6142             switch (ctz32(extract32(insn, 12, 4))) {
6143             case 0: /* [15:12] == xxx1 */
6144                 /* Floating point immediate */
6145                 disas_fp_imm(s, insn);
6146                 break;
6147             case 1: /* [15:12] == xx10 */
6148                 /* Floating point compare */
6149                 disas_fp_compare(s, insn);
6150                 break;
6151             case 2: /* [15:12] == x100 */
6152                 /* Floating point data-processing (1 source) */
6153                 disas_fp_1src(s, insn);
6154                 break;
6155             case 3: /* [15:12] == 1000 */
6156                 unallocated_encoding(s);
6157                 break;
6158             default: /* [15:12] == 0000 */
6159                 /* Floating point <-> integer conversions */
6160                 disas_fp_int_conv(s, insn);
6161                 break;
6162             }
6163             break;
6164         }
6165     }
6166 }
6167
6168 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6169                      int pos)
6170 {
6171     /* Extract 64 bits from the middle of two concatenated 64 bit
6172      * vector register slices left:right. The extracted bits start
6173      * at 'pos' bits into the right (least significant) side.
6174      * We return the result in tcg_right, and guarantee not to
6175      * trash tcg_left.
6176      */
6177     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6178     assert(pos > 0 && pos < 64);
6179
6180     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6181     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6182     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6183
6184     tcg_temp_free_i64(tcg_tmp);
6185 }
6186
6187 /* EXT
6188  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
6189  * +---+---+-------------+-----+---+------+---+------+---+------+------+
6190  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
6191  * +---+---+-------------+-----+---+------+---+------+---+------+------+
6192  */
6193 static void disas_simd_ext(DisasContext *s, uint32_t insn)
6194 {
6195     int is_q = extract32(insn, 30, 1);
6196     int op2 = extract32(insn, 22, 2);
6197     int imm4 = extract32(insn, 11, 4);
6198     int rm = extract32(insn, 16, 5);
6199     int rn = extract32(insn, 5, 5);
6200     int rd = extract32(insn, 0, 5);
6201     int pos = imm4 << 3;
6202     TCGv_i64 tcg_resl, tcg_resh;
6203
6204     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6205         unallocated_encoding(s);
6206         return;
6207     }
6208
6209     if (!fp_access_check(s)) {
6210         return;
6211     }
6212
6213     tcg_resh = tcg_temp_new_i64();
6214     tcg_resl = tcg_temp_new_i64();
6215
6216     /* Vd gets bits starting at pos bits into Vm:Vn. This is
6217      * either extracting 128 bits from a 128:128 concatenation, or
6218      * extracting 64 bits from a 64:64 concatenation.
6219      */
6220     if (!is_q) {
6221         read_vec_element(s, tcg_resl, rn, 0, MO_64);
6222         if (pos != 0) {
6223             read_vec_element(s, tcg_resh, rm, 0, MO_64);
6224             do_ext64(s, tcg_resh, tcg_resl, pos);
6225         }
6226         tcg_gen_movi_i64(tcg_resh, 0);
6227     } else {
6228         TCGv_i64 tcg_hh;
6229         typedef struct {
6230             int reg;
6231             int elt;
6232         } EltPosns;
6233         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
6234         EltPosns *elt = eltposns;
6235
6236         if (pos >= 64) {
6237             elt++;
6238             pos -= 64;
6239         }
6240
6241         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
6242         elt++;
6243         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
6244         elt++;
6245         if (pos != 0) {
6246             do_ext64(s, tcg_resh, tcg_resl, pos);
6247             tcg_hh = tcg_temp_new_i64();
6248             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
6249             do_ext64(s, tcg_hh, tcg_resh, pos);
6250             tcg_temp_free_i64(tcg_hh);
6251         }
6252     }
6253
6254     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6255     tcg_temp_free_i64(tcg_resl);
6256     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6257     tcg_temp_free_i64(tcg_resh);
6258 }
6259
6260 /* TBL/TBX
6261  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
6262  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6263  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
6264  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6265  */
6266 static void disas_simd_tb(DisasContext *s, uint32_t insn)
6267 {
6268     int op2 = extract32(insn, 22, 2);
6269     int is_q = extract32(insn, 30, 1);
6270     int rm = extract32(insn, 16, 5);
6271     int rn = extract32(insn, 5, 5);
6272     int rd = extract32(insn, 0, 5);
6273     int is_tblx = extract32(insn, 12, 1);
6274     int len = extract32(insn, 13, 2);
6275     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
6276     TCGv_i32 tcg_regno, tcg_numregs;
6277
6278     if (op2 != 0) {
6279         unallocated_encoding(s);
6280         return;
6281     }
6282
6283     if (!fp_access_check(s)) {
6284         return;
6285     }
6286
6287     /* This does a table lookup: for every byte element in the input
6288      * we index into a table formed from up to four vector registers,
6289      * and then the output is the result of the lookups. Our helper
6290      * function does the lookup operation for a single 64 bit part of
6291      * the input.
6292      */
6293     tcg_resl = tcg_temp_new_i64();
6294     tcg_resh = tcg_temp_new_i64();
6295
6296     if (is_tblx) {
6297         read_vec_element(s, tcg_resl, rd, 0, MO_64);
6298     } else {
6299         tcg_gen_movi_i64(tcg_resl, 0);
6300     }
6301     if (is_tblx && is_q) {
6302         read_vec_element(s, tcg_resh, rd, 1, MO_64);
6303     } else {
6304         tcg_gen_movi_i64(tcg_resh, 0);
6305     }
6306
6307     tcg_idx = tcg_temp_new_i64();
6308     tcg_regno = tcg_const_i32(rn);
6309     tcg_numregs = tcg_const_i32(len + 1);
6310     read_vec_element(s, tcg_idx, rm, 0, MO_64);
6311     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
6312                         tcg_regno, tcg_numregs);
6313     if (is_q) {
6314         read_vec_element(s, tcg_idx, rm, 1, MO_64);
6315         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
6316                             tcg_regno, tcg_numregs);
6317     }
6318     tcg_temp_free_i64(tcg_idx);
6319     tcg_temp_free_i32(tcg_regno);
6320     tcg_temp_free_i32(tcg_numregs);
6321
6322     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6323     tcg_temp_free_i64(tcg_resl);
6324     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6325     tcg_temp_free_i64(tcg_resh);
6326 }
6327
6328 /* ZIP/UZP/TRN
6329  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
6330  * +---+---+-------------+------+---+------+---+------------------+------+
6331  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
6332  * +---+---+-------------+------+---+------+---+------------------+------+
6333  */
6334 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
6335 {
6336     int rd = extract32(insn, 0, 5);
6337     int rn = extract32(insn, 5, 5);
6338     int rm = extract32(insn, 16, 5);
6339     int size = extract32(insn, 22, 2);
6340     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
6341      * bit 2 indicates 1 vs 2 variant of the insn.
6342      */
6343     int opcode = extract32(insn, 12, 2);
6344     bool part = extract32(insn, 14, 1);
6345     bool is_q = extract32(insn, 30, 1);
6346     int esize = 8 << size;
6347     int i, ofs;
6348     int datasize = is_q ? 128 : 64;
6349     int elements = datasize / esize;
6350     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
6351
6352     if (opcode == 0 || (size == 3 && !is_q)) {
6353         unallocated_encoding(s);
6354         return;
6355     }
6356
6357     if (!fp_access_check(s)) {
6358         return;
6359     }
6360
6361     tcg_resl = tcg_const_i64(0);
6362     tcg_resh = tcg_const_i64(0);
6363     tcg_res = tcg_temp_new_i64();
6364
6365     for (i = 0; i < elements; i++) {
6366         switch (opcode) {
6367         case 1: /* UZP1/2 */
6368         {
6369             int midpoint = elements / 2;
6370             if (i < midpoint) {
6371                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
6372             } else {
6373                 read_vec_element(s, tcg_res, rm,
6374                                  2 * (i - midpoint) + part, size);
6375             }
6376             break;
6377         }
6378         case 2: /* TRN1/2 */
6379             if (i & 1) {
6380                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
6381             } else {
6382                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
6383             }
6384             break;
6385         case 3: /* ZIP1/2 */
6386         {
6387             int base = part * elements / 2;
6388             if (i & 1) {
6389                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
6390             } else {
6391                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
6392             }
6393             break;
6394         }
6395         default:
6396             g_assert_not_reached();
6397         }
6398
6399         ofs = i * esize;
6400         if (ofs < 64) {
6401             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
6402             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
6403         } else {
6404             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
6405             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
6406         }
6407     }
6408
6409     tcg_temp_free_i64(tcg_res);
6410
6411     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6412     tcg_temp_free_i64(tcg_resl);
6413     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6414     tcg_temp_free_i64(tcg_resh);
6415 }
6416
6417 /*
6418  * do_reduction_op helper
6419  *
6420  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6421  * important for correct NaN propagation that we do these
6422  * operations in exactly the order specified by the pseudocode.
6423  *
6424  * This is a recursive function, TCG temps should be freed by the
6425  * calling function once it is done with the values.
6426  */
6427 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
6428                                 int esize, int size, int vmap, TCGv_ptr fpst)
6429 {
6430     if (esize == size) {
6431         int element;
6432         TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
6433         TCGv_i32 tcg_elem;
6434
6435         /* We should have one register left here */
6436         assert(ctpop8(vmap) == 1);
6437         element = ctz32(vmap);
6438         assert(element < 8);
6439
6440         tcg_elem = tcg_temp_new_i32();
6441         read_vec_element_i32(s, tcg_elem, rn, element, msize);
6442         return tcg_elem;
6443     } else {
6444         int bits = size / 2;
6445         int shift = ctpop8(vmap) / 2;
6446         int vmap_lo = (vmap >> shift) & vmap;
6447         int vmap_hi = (vmap & ~vmap_lo);
6448         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
6449
6450         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
6451         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
6452         tcg_res = tcg_temp_new_i32();
6453
6454         switch (fpopcode) {
6455         case 0x0c: /* fmaxnmv half-precision */
6456             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
6457             break;
6458         case 0x0f: /* fmaxv half-precision */
6459             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
6460             break;
6461         case 0x1c: /* fminnmv half-precision */
6462             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
6463             break;
6464         case 0x1f: /* fminv half-precision */
6465             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
6466             break;
6467         case 0x2c: /* fmaxnmv */
6468             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
6469             break;
6470         case 0x2f: /* fmaxv */
6471             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
6472             break;
6473         case 0x3c: /* fminnmv */
6474             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
6475             break;
6476         case 0x3f: /* fminv */
6477             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
6478             break;
6479         default:
6480             g_assert_not_reached();
6481         }
6482
6483         tcg_temp_free_i32(tcg_hi);
6484         tcg_temp_free_i32(tcg_lo);
6485         return tcg_res;
6486     }
6487 }
6488
6489 /* AdvSIMD across lanes
6490  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6491  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
6492  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6493  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
6494  */
6495 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
6496 {
6497     int rd = extract32(insn, 0, 5);
6498     int rn = extract32(insn, 5, 5);
6499     int size = extract32(insn, 22, 2);
6500     int opcode = extract32(insn, 12, 5);
6501     bool is_q = extract32(insn, 30, 1);
6502     bool is_u = extract32(insn, 29, 1);
6503     bool is_fp = false;
6504     bool is_min = false;
6505     int esize;
6506     int elements;
6507     int i;
6508     TCGv_i64 tcg_res, tcg_elt;
6509
6510     switch (opcode) {
6511     case 0x1b: /* ADDV */
6512         if (is_u) {
6513             unallocated_encoding(s);
6514             return;
6515         }
6516         /* fall through */
6517     case 0x3: /* SADDLV, UADDLV */
6518     case 0xa: /* SMAXV, UMAXV */
6519     case 0x1a: /* SMINV, UMINV */
6520         if (size == 3 || (size == 2 && !is_q)) {
6521             unallocated_encoding(s);
6522             return;
6523         }
6524         break;
6525     case 0xc: /* FMAXNMV, FMINNMV */
6526     case 0xf: /* FMAXV, FMINV */
6527         /* Bit 1 of size field encodes min vs max and the actual size
6528          * depends on the encoding of the U bit. If not set (and FP16
6529          * enabled) then we do half-precision float instead of single
6530          * precision.
6531          */
6532         is_min = extract32(size, 1, 1);
6533         is_fp = true;
6534         if (!is_u && dc_isar_feature(aa64_fp16, s)) {
6535             size = 1;
6536         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
6537             unallocated_encoding(s);
6538             return;
6539         } else {
6540             size = 2;
6541         }
6542         break;
6543     default:
6544         unallocated_encoding(s);
6545         return;
6546     }
6547
6548     if (!fp_access_check(s)) {
6549         return;
6550     }
6551
6552     esize = 8 << size;
6553     elements = (is_q ? 128 : 64) / esize;
6554
6555     tcg_res = tcg_temp_new_i64();
6556     tcg_elt = tcg_temp_new_i64();
6557
6558     /* These instructions operate across all lanes of a vector
6559      * to produce a single result. We can guarantee that a 64
6560      * bit intermediate is sufficient:
6561      *  + for [US]ADDLV the maximum element size is 32 bits, and
6562      *    the result type is 64 bits
6563      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
6564      *    same as the element size, which is 32 bits at most
6565      * For the integer operations we can choose to work at 64
6566      * or 32 bits and truncate at the end; for simplicity
6567      * we use 64 bits always. The floating point
6568      * ops do require 32 bit intermediates, though.
6569      */
6570     if (!is_fp) {
6571         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
6572
6573         for (i = 1; i < elements; i++) {
6574             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
6575
6576             switch (opcode) {
6577             case 0x03: /* SADDLV / UADDLV */
6578             case 0x1b: /* ADDV */
6579                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
6580                 break;
6581             case 0x0a: /* SMAXV / UMAXV */
6582                 if (is_u) {
6583                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
6584                 } else {
6585                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
6586                 }
6587                 break;
6588             case 0x1a: /* SMINV / UMINV */
6589                 if (is_u) {
6590                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
6591                 } else {
6592                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
6593                 }
6594                 break;
6595             default:
6596                 g_assert_not_reached();
6597             }
6598
6599         }
6600     } else {
6601         /* Floating point vector reduction ops which work across 32
6602          * bit (single) or 16 bit (half-precision) intermediates.
6603          * Note that correct NaN propagation requires that we do these
6604          * operations in exactly the order specified by the pseudocode.
6605          */
6606         TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
6607         int fpopcode = opcode | is_min << 4 | is_u << 5;
6608         int vmap = (1 << elements) - 1;
6609         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
6610                                              (is_q ? 128 : 64), vmap, fpst);
6611         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
6612         tcg_temp_free_i32(tcg_res32);
6613         tcg_temp_free_ptr(fpst);
6614     }
6615
6616     tcg_temp_free_i64(tcg_elt);
6617
6618     /* Now truncate the result to the width required for the final output */
6619     if (opcode == 0x03) {
6620         /* SADDLV, UADDLV: result is 2*esize */
6621         size++;
6622     }
6623
6624     switch (size) {
6625     case 0:
6626         tcg_gen_ext8u_i64(tcg_res, tcg_res);
6627         break;
6628     case 1:
6629         tcg_gen_ext16u_i64(tcg_res, tcg_res);
6630         break;
6631     case 2:
6632         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6633         break;
6634     case 3:
6635         break;
6636     default:
6637         g_assert_not_reached();
6638     }
6639
6640     write_fp_dreg(s, rd, tcg_res);
6641     tcg_temp_free_i64(tcg_res);
6642 }
6643
6644 /* DUP (Element, Vector)
6645  *
6646  *  31  30   29              21 20    16 15        10  9    5 4    0
6647  * +---+---+-------------------+--------+-------------+------+------+
6648  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6649  * +---+---+-------------------+--------+-------------+------+------+
6650  *
6651  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6652  */
6653 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
6654                              int imm5)
6655 {
6656     int size = ctz32(imm5);
6657     int index = imm5 >> (size + 1);
6658
6659     if (size > 3 || (size == 3 && !is_q)) {
6660         unallocated_encoding(s);
6661         return;
6662     }
6663
6664     if (!fp_access_check(s)) {
6665         return;
6666     }
6667
6668     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
6669                          vec_reg_offset(s, rn, index, size),
6670                          is_q ? 16 : 8, vec_full_reg_size(s));
6671 }
6672
6673 /* DUP (element, scalar)
6674  *  31                   21 20    16 15        10  9    5 4    0
6675  * +-----------------------+--------+-------------+------+------+
6676  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6677  * +-----------------------+--------+-------------+------+------+
6678  */
6679 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
6680                               int imm5)
6681 {
6682     int size = ctz32(imm5);
6683     int index;
6684     TCGv_i64 tmp;
6685
6686     if (size > 3) {
6687         unallocated_encoding(s);
6688         return;
6689     }
6690
6691     if (!fp_access_check(s)) {
6692         return;
6693     }
6694
6695     index = imm5 >> (size + 1);
6696
6697     /* This instruction just extracts the specified element and
6698      * zero-extends it into the bottom of the destination register.
6699      */
6700     tmp = tcg_temp_new_i64();
6701     read_vec_element(s, tmp, rn, index, size);
6702     write_fp_dreg(s, rd, tmp);
6703     tcg_temp_free_i64(tmp);
6704 }
6705
6706 /* DUP (General)
6707  *
6708  *  31  30   29              21 20    16 15        10  9    5 4    0
6709  * +---+---+-------------------+--------+-------------+------+------+
6710  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
6711  * +---+---+-------------------+--------+-------------+------+------+
6712  *
6713  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6714  */
6715 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
6716                              int imm5)
6717 {
6718     int size = ctz32(imm5);
6719     uint32_t dofs, oprsz, maxsz;
6720
6721     if (size > 3 || ((size == 3) && !is_q)) {
6722         unallocated_encoding(s);
6723         return;
6724     }
6725
6726     if (!fp_access_check(s)) {
6727         return;
6728     }
6729
6730     dofs = vec_full_reg_offset(s, rd);
6731     oprsz = is_q ? 16 : 8;
6732     maxsz = vec_full_reg_size(s);
6733
6734     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
6735 }
6736
6737 /* INS (Element)
6738  *
6739  *  31                   21 20    16 15  14    11  10 9    5 4    0
6740  * +-----------------------+--------+------------+---+------+------+
6741  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
6742  * +-----------------------+--------+------------+---+------+------+
6743  *
6744  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6745  * index: encoded in imm5<4:size+1>
6746  */
6747 static void handle_simd_inse(DisasContext *s, int rd, int rn,
6748                              int imm4, int imm5)
6749 {
6750     int size = ctz32(imm5);
6751     int src_index, dst_index;
6752     TCGv_i64 tmp;
6753
6754     if (size > 3) {
6755         unallocated_encoding(s);
6756         return;
6757     }
6758
6759     if (!fp_access_check(s)) {
6760         return;
6761     }
6762
6763     dst_index = extract32(imm5, 1+size, 5);
6764     src_index = extract32(imm4, size, 4);
6765
6766     tmp = tcg_temp_new_i64();
6767
6768     read_vec_element(s, tmp, rn, src_index, size);
6769     write_vec_element(s, tmp, rd, dst_index, size);
6770
6771     tcg_temp_free_i64(tmp);
6772 }
6773
6774
6775 /* INS (General)
6776  *
6777  *  31                   21 20    16 15        10  9    5 4    0
6778  * +-----------------------+--------+-------------+------+------+
6779  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
6780  * +-----------------------+--------+-------------+------+------+
6781  *
6782  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6783  * index: encoded in imm5<4:size+1>
6784  */
6785 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
6786 {
6787     int size = ctz32(imm5);
6788     int idx;
6789
6790     if (size > 3) {
6791         unallocated_encoding(s);
6792         return;
6793     }
6794
6795     if (!fp_access_check(s)) {
6796         return;
6797     }
6798
6799     idx = extract32(imm5, 1 + size, 4 - size);
6800     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
6801 }
6802
6803 /*
6804  * UMOV (General)
6805  * SMOV (General)
6806  *
6807  *  31  30   29              21 20    16 15    12   10 9    5 4    0
6808  * +---+---+-------------------+--------+-------------+------+------+
6809  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6810  * +---+---+-------------------+--------+-------------+------+------+
6811  *
6812  * U: unsigned when set
6813  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6814  */
6815 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6816                                   int rn, int rd, int imm5)
6817 {
6818     int size = ctz32(imm5);
6819     int element;
6820     TCGv_i64 tcg_rd;
6821
6822     /* Check for UnallocatedEncodings */
6823     if (is_signed) {
6824         if (size > 2 || (size == 2 && !is_q)) {
6825             unallocated_encoding(s);
6826             return;
6827         }
6828     } else {
6829         if (size > 3
6830             || (size < 3 && is_q)
6831             || (size == 3 && !is_q)) {
6832             unallocated_encoding(s);
6833             return;
6834         }
6835     }
6836
6837     if (!fp_access_check(s)) {
6838         return;
6839     }
6840
6841     element = extract32(imm5, 1+size, 4);
6842
6843     tcg_rd = cpu_reg(s, rd);
6844     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6845     if (is_signed && !is_q) {
6846         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6847     }
6848 }
6849
6850 /* AdvSIMD copy
6851  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6852  * +---+---+----+-----------------+------+---+------+---+------+------+
6853  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6854  * +---+---+----+-----------------+------+---+------+---+------+------+
6855  */
6856 static void disas_simd_copy(DisasContext *s, uint32_t insn)
6857 {
6858     int rd = extract32(insn, 0, 5);
6859     int rn = extract32(insn, 5, 5);
6860     int imm4 = extract32(insn, 11, 4);
6861     int op = extract32(insn, 29, 1);
6862     int is_q = extract32(insn, 30, 1);
6863     int imm5 = extract32(insn, 16, 5);
6864
6865     if (op) {
6866         if (is_q) {
6867             /* INS (element) */
6868             handle_simd_inse(s, rd, rn, imm4, imm5);
6869         } else {
6870             unallocated_encoding(s);
6871         }
6872     } else {
6873         switch (imm4) {
6874         case 0:
6875             /* DUP (element - vector) */
6876             handle_simd_dupe(s, is_q, rd, rn, imm5);
6877             break;
6878         case 1:
6879             /* DUP (general) */
6880             handle_simd_dupg(s, is_q, rd, rn, imm5);
6881             break;
6882         case 3:
6883             if (is_q) {
6884                 /* INS (general) */
6885                 handle_simd_insg(s, rd, rn, imm5);
6886             } else {
6887                 unallocated_encoding(s);
6888             }
6889             break;
6890         case 5:
6891         case 7:
6892             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6893             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6894             break;
6895         default:
6896             unallocated_encoding(s);
6897             break;
6898         }
6899     }
6900 }
6901
6902 /* AdvSIMD modified immediate
6903  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6904  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6905  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6906  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6907  *
6908  * There are a number of operations that can be carried out here:
6909  *   MOVI - move (shifted) imm into register
6910  *   MVNI - move inverted (shifted) imm into register
6911  *   ORR  - bitwise OR of (shifted) imm with register
6912  *   BIC  - bitwise clear of (shifted) imm with register
6913  * With ARMv8.2 we also have:
6914  *   FMOV half-precision
6915  */
6916 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6917 {
6918     int rd = extract32(insn, 0, 5);
6919     int cmode = extract32(insn, 12, 4);
6920     int cmode_3_1 = extract32(cmode, 1, 3);
6921     int cmode_0 = extract32(cmode, 0, 1);
6922     int o2 = extract32(insn, 11, 1);
6923     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6924     bool is_neg = extract32(insn, 29, 1);
6925     bool is_q = extract32(insn, 30, 1);
6926     uint64_t imm = 0;
6927
6928     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6929         /* Check for FMOV (vector, immediate) - half-precision */
6930         if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
6931             unallocated_encoding(s);
6932             return;
6933         }
6934     }
6935
6936     if (!fp_access_check(s)) {
6937         return;
6938     }
6939
6940     /* See AdvSIMDExpandImm() in ARM ARM */
6941     switch (cmode_3_1) {
6942     case 0: /* Replicate(Zeros(24):imm8, 2) */
6943     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6944     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6945     case 3: /* Replicate(imm8:Zeros(24), 2) */
6946     {
6947         int shift = cmode_3_1 * 8;
6948         imm = bitfield_replicate(abcdefgh << shift, 32);
6949         break;
6950     }
6951     case 4: /* Replicate(Zeros(8):imm8, 4) */
6952     case 5: /* Replicate(imm8:Zeros(8), 4) */
6953     {
6954         int shift = (cmode_3_1 & 0x1) * 8;
6955         imm = bitfield_replicate(abcdefgh << shift, 16);
6956         break;
6957     }
6958     case 6:
6959         if (cmode_0) {
6960             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6961             imm = (abcdefgh << 16) | 0xffff;
6962         } else {
6963             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6964             imm = (abcdefgh << 8) | 0xff;
6965         }
6966         imm = bitfield_replicate(imm, 32);
6967         break;
6968     case 7:
6969         if (!cmode_0 && !is_neg) {
6970             imm = bitfield_replicate(abcdefgh, 8);
6971         } else if (!cmode_0 && is_neg) {
6972             int i;
6973             imm = 0;
6974             for (i = 0; i < 8; i++) {
6975                 if ((abcdefgh) & (1 << i)) {
6976                     imm |= 0xffULL << (i * 8);
6977                 }
6978             }
6979         } else if (cmode_0) {
6980             if (is_neg) {
6981                 imm = (abcdefgh & 0x3f) << 48;
6982                 if (abcdefgh & 0x80) {
6983                     imm |= 0x8000000000000000ULL;
6984                 }
6985                 if (abcdefgh & 0x40) {
6986                     imm |= 0x3fc0000000000000ULL;
6987                 } else {
6988                     imm |= 0x4000000000000000ULL;
6989                 }
6990             } else {
6991                 if (o2) {
6992                     /* FMOV (vector, immediate) - half-precision */
6993                     imm = vfp_expand_imm(MO_16, abcdefgh);
6994                     /* now duplicate across the lanes */
6995                     imm = bitfield_replicate(imm, 16);
6996                 } else {
6997                     imm = (abcdefgh & 0x3f) << 19;
6998                     if (abcdefgh & 0x80) {
6999                         imm |= 0x80000000;
7000                     }
7001                     if (abcdefgh & 0x40) {
7002                         imm |= 0x3e000000;
7003                     } else {
7004                         imm |= 0x40000000;
7005                     }
7006                     imm |= (imm << 32);
7007                 }
7008             }
7009         }
7010         break;
7011     default:
7012         fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
7013         g_assert_not_reached();
7014     }
7015
7016     if (cmode_3_1 != 7 && is_neg) {
7017         imm = ~imm;
7018     }
7019
7020     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7021         /* MOVI or MVNI, with MVNI negation handled above.  */
7022         tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7023                             vec_full_reg_size(s), imm);
7024     } else {
7025         /* ORR or BIC, with BIC negation to AND handled above.  */
7026         if (is_neg) {
7027             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7028         } else {
7029             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7030         }
7031     }
7032 }
7033
7034 /* AdvSIMD scalar copy
7035  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7036  * +-----+----+-----------------+------+---+------+---+------+------+
7037  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7038  * +-----+----+-----------------+------+---+------+---+------+------+
7039  */
7040 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7041 {
7042     int rd = extract32(insn, 0, 5);
7043     int rn = extract32(insn, 5, 5);
7044     int imm4 = extract32(insn, 11, 4);
7045     int imm5 = extract32(insn, 16, 5);
7046     int op = extract32(insn, 29, 1);
7047
7048     if (op != 0 || imm4 != 0) {
7049         unallocated_encoding(s);
7050         return;
7051     }
7052
7053     /* DUP (element, scalar) */
7054     handle_simd_dupes(s, rd, rn, imm5);
7055 }
7056
7057 /* AdvSIMD scalar pairwise
7058  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7059  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7060  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7061  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7062  */
7063 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7064 {
7065     int u = extract32(insn, 29, 1);
7066     int size = extract32(insn, 22, 2);
7067     int opcode = extract32(insn, 12, 5);
7068     int rn = extract32(insn, 5, 5);
7069     int rd = extract32(insn, 0, 5);
7070     TCGv_ptr fpst;
7071
7072     /* For some ops (the FP ones), size[1] is part of the encoding.
7073      * For ADDP strictly it is not but size[1] is always 1 for valid
7074      * encodings.
7075      */
7076     opcode |= (extract32(size, 1, 1) << 5);
7077
7078     switch (opcode) {
7079     case 0x3b: /* ADDP */
7080         if (u || size != 3) {
7081             unallocated_encoding(s);
7082             return;
7083         }
7084         if (!fp_access_check(s)) {
7085             return;
7086         }
7087
7088         fpst = NULL;
7089         break;
7090     case 0xc: /* FMAXNMP */
7091     case 0xd: /* FADDP */
7092     case 0xf: /* FMAXP */
7093     case 0x2c: /* FMINNMP */
7094     case 0x2f: /* FMINP */
7095         /* FP op, size[0] is 32 or 64 bit*/
7096         if (!u) {
7097             if (!dc_isar_feature(aa64_fp16, s)) {
7098                 unallocated_encoding(s);
7099                 return;
7100             } else {
7101                 size = MO_16;
7102             }
7103         } else {
7104             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7105         }
7106
7107         if (!fp_access_check(s)) {
7108             return;
7109         }
7110
7111         fpst = get_fpstatus_ptr(size == MO_16);
7112         break;
7113     default:
7114         unallocated_encoding(s);
7115         return;
7116     }
7117
7118     if (size == MO_64) {
7119         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7120         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7121         TCGv_i64 tcg_res = tcg_temp_new_i64();
7122
7123         read_vec_element(s, tcg_op1, rn, 0, MO_64);
7124         read_vec_element(s, tcg_op2, rn, 1, MO_64);
7125
7126         switch (opcode) {
7127         case 0x3b: /* ADDP */
7128             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7129             break;
7130         case 0xc: /* FMAXNMP */
7131             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7132             break;
7133         case 0xd: /* FADDP */
7134             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7135             break;
7136         case 0xf: /* FMAXP */
7137             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7138             break;
7139         case 0x2c: /* FMINNMP */
7140             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7141             break;
7142         case 0x2f: /* FMINP */
7143             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7144             break;
7145         default:
7146             g_assert_not_reached();
7147         }
7148
7149         write_fp_dreg(s, rd, tcg_res);
7150
7151         tcg_temp_free_i64(tcg_op1);
7152         tcg_temp_free_i64(tcg_op2);
7153         tcg_temp_free_i64(tcg_res);
7154     } else {
7155         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7156         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7157         TCGv_i32 tcg_res = tcg_temp_new_i32();
7158
7159         read_vec_element_i32(s, tcg_op1, rn, 0, size);
7160         read_vec_element_i32(s, tcg_op2, rn, 1, size);
7161
7162         if (size == MO_16) {
7163             switch (opcode) {
7164             case 0xc: /* FMAXNMP */
7165                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7166                 break;
7167             case 0xd: /* FADDP */
7168                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7169                 break;
7170             case 0xf: /* FMAXP */
7171                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7172                 break;
7173             case 0x2c: /* FMINNMP */
7174                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7175                 break;
7176             case 0x2f: /* FMINP */
7177                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7178                 break;
7179             default:
7180                 g_assert_not_reached();
7181             }
7182         } else {
7183             switch (opcode) {
7184             case 0xc: /* FMAXNMP */
7185                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7186                 break;
7187             case 0xd: /* FADDP */
7188                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7189                 break;
7190             case 0xf: /* FMAXP */
7191                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7192                 break;
7193             case 0x2c: /* FMINNMP */
7194                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7195                 break;
7196             case 0x2f: /* FMINP */
7197                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7198                 break;
7199             default:
7200                 g_assert_not_reached();
7201             }
7202         }
7203
7204         write_fp_sreg(s, rd, tcg_res);
7205
7206         tcg_temp_free_i32(tcg_op1);
7207         tcg_temp_free_i32(tcg_op2);
7208         tcg_temp_free_i32(tcg_res);
7209     }
7210
7211     if (fpst) {
7212         tcg_temp_free_ptr(fpst);
7213     }
7214 }
7215
7216 /*
7217  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7218  *
7219  * This code is handles the common shifting code and is used by both
7220  * the vector and scalar code.
7221  */
7222 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7223                                     TCGv_i64 tcg_rnd, bool accumulate,
7224                                     bool is_u, int size, int shift)
7225 {
7226     bool extended_result = false;
7227     bool round = tcg_rnd != NULL;
7228     int ext_lshift = 0;
7229     TCGv_i64 tcg_src_hi;
7230
7231     if (round && size == 3) {
7232         extended_result = true;
7233         ext_lshift = 64 - shift;
7234         tcg_src_hi = tcg_temp_new_i64();
7235     } else if (shift == 64) {
7236         if (!accumulate && is_u) {
7237             /* result is zero */
7238             tcg_gen_movi_i64(tcg_res, 0);
7239             return;
7240         }
7241     }
7242
7243     /* Deal with the rounding step */
7244     if (round) {
7245         if (extended_result) {
7246             TCGv_i64 tcg_zero = tcg_const_i64(0);
7247             if (!is_u) {
7248                 /* take care of sign extending tcg_res */
7249                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7250                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7251                                  tcg_src, tcg_src_hi,
7252                                  tcg_rnd, tcg_zero);
7253             } else {
7254                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7255                                  tcg_src, tcg_zero,
7256                                  tcg_rnd, tcg_zero);
7257             }
7258             tcg_temp_free_i64(tcg_zero);
7259         } else {
7260             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7261         }
7262     }
7263
7264     /* Now do the shift right */
7265     if (round && extended_result) {
7266         /* extended case, >64 bit precision required */
7267         if (ext_lshift == 0) {
7268             /* special case, only high bits matter */
7269             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
7270         } else {
7271             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7272             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
7273             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
7274         }
7275     } else {
7276         if (is_u) {
7277             if (shift == 64) {
7278                 /* essentially shifting in 64 zeros */
7279                 tcg_gen_movi_i64(tcg_src, 0);
7280             } else {
7281                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7282             }
7283         } else {
7284             if (shift == 64) {
7285                 /* effectively extending the sign-bit */
7286                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
7287             } else {
7288                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
7289             }
7290         }
7291     }
7292
7293     if (accumulate) {
7294         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
7295     } else {
7296         tcg_gen_mov_i64(tcg_res, tcg_src);
7297     }
7298
7299     if (extended_result) {
7300         tcg_temp_free_i64(tcg_src_hi);
7301     }
7302 }
7303
7304 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7305 static void handle_scalar_simd_shri(DisasContext *s,
7306                                     bool is_u, int immh, int immb,
7307                                     int opcode, int rn, int rd)
7308 {
7309     const int size = 3;
7310     int immhb = immh << 3 | immb;
7311     int shift = 2 * (8 << size) - immhb;
7312     bool accumulate = false;
7313     bool round = false;
7314     bool insert = false;
7315     TCGv_i64 tcg_rn;
7316     TCGv_i64 tcg_rd;
7317     TCGv_i64 tcg_round;
7318
7319     if (!extract32(immh, 3, 1)) {
7320         unallocated_encoding(s);
7321         return;
7322     }
7323
7324     if (!fp_access_check(s)) {
7325         return;
7326     }
7327
7328     switch (opcode) {
7329     case 0x02: /* SSRA / USRA (accumulate) */
7330         accumulate = true;
7331         break;
7332     case 0x04: /* SRSHR / URSHR (rounding) */
7333         round = true;
7334         break;
7335     case 0x06: /* SRSRA / URSRA (accum + rounding) */
7336         accumulate = round = true;
7337         break;
7338     case 0x08: /* SRI */
7339         insert = true;
7340         break;
7341     }
7342
7343     if (round) {
7344         uint64_t round_const = 1ULL << (shift - 1);
7345         tcg_round = tcg_const_i64(round_const);
7346     } else {
7347         tcg_round = NULL;
7348     }
7349
7350     tcg_rn = read_fp_dreg(s, rn);
7351     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7352
7353     if (insert) {
7354         /* shift count same as element size is valid but does nothing;
7355          * special case to avoid potential shift by 64.
7356          */
7357         int esize = 8 << size;
7358         if (shift != esize) {
7359             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
7360             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
7361         }
7362     } else {
7363         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7364                                 accumulate, is_u, size, shift);
7365     }
7366
7367     write_fp_dreg(s, rd, tcg_rd);
7368
7369     tcg_temp_free_i64(tcg_rn);
7370     tcg_temp_free_i64(tcg_rd);
7371     if (round) {
7372         tcg_temp_free_i64(tcg_round);
7373     }
7374 }
7375
7376 /* SHL/SLI - Scalar shift left */
7377 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
7378                                     int immh, int immb, int opcode,
7379                                     int rn, int rd)
7380 {
7381     int size = 32 - clz32(immh) - 1;
7382     int immhb = immh << 3 | immb;
7383     int shift = immhb - (8 << size);
7384     TCGv_i64 tcg_rn = new_tmp_a64(s);
7385     TCGv_i64 tcg_rd = new_tmp_a64(s);
7386
7387     if (!extract32(immh, 3, 1)) {
7388         unallocated_encoding(s);
7389         return;
7390     }
7391
7392     if (!fp_access_check(s)) {
7393         return;
7394     }
7395
7396     tcg_rn = read_fp_dreg(s, rn);
7397     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7398
7399     if (insert) {
7400         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
7401     } else {
7402         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
7403     }
7404
7405     write_fp_dreg(s, rd, tcg_rd);
7406
7407     tcg_temp_free_i64(tcg_rn);
7408     tcg_temp_free_i64(tcg_rd);
7409 }
7410
7411 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
7412  * (signed/unsigned) narrowing */
7413 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
7414                                    bool is_u_shift, bool is_u_narrow,
7415                                    int immh, int immb, int opcode,
7416                                    int rn, int rd)
7417 {
7418     int immhb = immh << 3 | immb;
7419     int size = 32 - clz32(immh) - 1;
7420     int esize = 8 << size;
7421     int shift = (2 * esize) - immhb;
7422     int elements = is_scalar ? 1 : (64 / esize);
7423     bool round = extract32(opcode, 0, 1);
7424     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
7425     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
7426     TCGv_i32 tcg_rd_narrowed;
7427     TCGv_i64 tcg_final;
7428
7429     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
7430         { gen_helper_neon_narrow_sat_s8,
7431           gen_helper_neon_unarrow_sat8 },
7432         { gen_helper_neon_narrow_sat_s16,
7433           gen_helper_neon_unarrow_sat16 },
7434         { gen_helper_neon_narrow_sat_s32,
7435           gen_helper_neon_unarrow_sat32 },
7436         { NULL, NULL },
7437     };
7438     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
7439         gen_helper_neon_narrow_sat_u8,
7440         gen_helper_neon_narrow_sat_u16,
7441         gen_helper_neon_narrow_sat_u32,
7442         NULL
7443     };
7444     NeonGenNarrowEnvFn *narrowfn;
7445
7446     int i;
7447
7448     assert(size < 4);
7449
7450     if (extract32(immh, 3, 1)) {
7451         unallocated_encoding(s);
7452         return;
7453     }
7454
7455     if (!fp_access_check(s)) {
7456         return;
7457     }
7458
7459     if (is_u_shift) {
7460         narrowfn = unsigned_narrow_fns[size];
7461     } else {
7462         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
7463     }
7464
7465     tcg_rn = tcg_temp_new_i64();
7466     tcg_rd = tcg_temp_new_i64();
7467     tcg_rd_narrowed = tcg_temp_new_i32();
7468     tcg_final = tcg_const_i64(0);
7469
7470     if (round) {
7471         uint64_t round_const = 1ULL << (shift - 1);
7472         tcg_round = tcg_const_i64(round_const);
7473     } else {
7474         tcg_round = NULL;
7475     }
7476
7477     for (i = 0; i < elements; i++) {
7478         read_vec_element(s, tcg_rn, rn, i, ldop);
7479         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7480                                 false, is_u_shift, size+1, shift);
7481         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
7482         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
7483         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
7484     }
7485
7486     if (!is_q) {
7487         write_vec_element(s, tcg_final, rd, 0, MO_64);
7488     } else {
7489         write_vec_element(s, tcg_final, rd, 1, MO_64);
7490     }
7491
7492     if (round) {
7493         tcg_temp_free_i64(tcg_round);
7494     }
7495     tcg_temp_free_i64(tcg_rn);
7496     tcg_temp_free_i64(tcg_rd);
7497     tcg_temp_free_i32(tcg_rd_narrowed);
7498     tcg_temp_free_i64(tcg_final);
7499
7500     clear_vec_high(s, is_q, rd);
7501 }
7502
7503 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
7504 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
7505                              bool src_unsigned, bool dst_unsigned,
7506                              int immh, int immb, int rn, int rd)
7507 {
7508     int immhb = immh << 3 | immb;
7509     int size = 32 - clz32(immh) - 1;
7510     int shift = immhb - (8 << size);
7511     int pass;
7512
7513     assert(immh != 0);
7514     assert(!(scalar && is_q));
7515
7516     if (!scalar) {
7517         if (!is_q && extract32(immh, 3, 1)) {
7518             unallocated_encoding(s);
7519             return;
7520         }
7521
7522         /* Since we use the variable-shift helpers we must
7523          * replicate the shift count into each element of
7524          * the tcg_shift value.
7525          */
7526         switch (size) {
7527         case 0:
7528             shift |= shift << 8;
7529             /* fall through */
7530         case 1:
7531             shift |= shift << 16;
7532             break;
7533         case 2:
7534         case 3:
7535             break;
7536         default:
7537             g_assert_not_reached();
7538         }
7539     }
7540
7541     if (!fp_access_check(s)) {
7542         return;
7543     }
7544
7545     if (size == 3) {
7546         TCGv_i64 tcg_shift = tcg_const_i64(shift);
7547         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
7548             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
7549             { NULL, gen_helper_neon_qshl_u64 },
7550         };
7551         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
7552         int maxpass = is_q ? 2 : 1;
7553
7554         for (pass = 0; pass < maxpass; pass++) {
7555             TCGv_i64 tcg_op = tcg_temp_new_i64();
7556
7557             read_vec_element(s, tcg_op, rn, pass, MO_64);
7558             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
7559             write_vec_element(s, tcg_op, rd, pass, MO_64);
7560
7561             tcg_temp_free_i64(tcg_op);
7562         }
7563         tcg_temp_free_i64(tcg_shift);
7564         clear_vec_high(s, is_q, rd);
7565     } else {
7566         TCGv_i32 tcg_shift = tcg_const_i32(shift);
7567         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
7568             {
7569                 { gen_helper_neon_qshl_s8,
7570                   gen_helper_neon_qshl_s16,
7571                   gen_helper_neon_qshl_s32 },
7572                 { gen_helper_neon_qshlu_s8,
7573                   gen_helper_neon_qshlu_s16,
7574                   gen_helper_neon_qshlu_s32 }
7575             }, {
7576                 { NULL, NULL, NULL },
7577                 { gen_helper_neon_qshl_u8,
7578                   gen_helper_neon_qshl_u16,
7579                   gen_helper_neon_qshl_u32 }
7580             }
7581         };
7582         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
7583         TCGMemOp memop = scalar ? size : MO_32;
7584         int maxpass = scalar ? 1 : is_q ? 4 : 2;
7585
7586         for (pass = 0; pass < maxpass; pass++) {
7587             TCGv_i32 tcg_op = tcg_temp_new_i32();
7588
7589             read_vec_element_i32(s, tcg_op, rn, pass, memop);
7590             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
7591             if (scalar) {
7592                 switch (size) {
7593                 case 0:
7594                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
7595                     break;
7596                 case 1:
7597                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
7598                     break;
7599                 case 2:
7600                     break;
7601                 default:
7602                     g_assert_not_reached();
7603                 }
7604                 write_fp_sreg(s, rd, tcg_op);
7605             } else {
7606                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
7607             }
7608
7609             tcg_temp_free_i32(tcg_op);
7610         }
7611         tcg_temp_free_i32(tcg_shift);
7612
7613         if (!scalar) {
7614             clear_vec_high(s, is_q, rd);
7615         }
7616     }
7617 }
7618
7619 /* Common vector code for handling integer to FP conversion */
7620 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
7621                                    int elements, int is_signed,
7622                                    int fracbits, int size)
7623 {
7624     TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
7625     TCGv_i32 tcg_shift = NULL;
7626
7627     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
7628     int pass;
7629
7630     if (fracbits || size == MO_64) {
7631         tcg_shift = tcg_const_i32(fracbits);
7632     }
7633
7634     if (size == MO_64) {
7635         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
7636         TCGv_i64 tcg_double = tcg_temp_new_i64();
7637
7638         for (pass = 0; pass < elements; pass++) {
7639             read_vec_element(s, tcg_int64, rn, pass, mop);
7640
7641             if (is_signed) {
7642                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
7643                                      tcg_shift, tcg_fpst);
7644             } else {
7645                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
7646                                      tcg_shift, tcg_fpst);
7647             }
7648             if (elements == 1) {
7649                 write_fp_dreg(s, rd, tcg_double);
7650             } else {
7651                 write_vec_element(s, tcg_double, rd, pass, MO_64);
7652             }
7653         }
7654
7655         tcg_temp_free_i64(tcg_int64);
7656         tcg_temp_free_i64(tcg_double);
7657
7658     } else {
7659         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
7660         TCGv_i32 tcg_float = tcg_temp_new_i32();
7661
7662         for (pass = 0; pass < elements; pass++) {
7663             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
7664
7665             switch (size) {
7666             case MO_32:
7667                 if (fracbits) {
7668                     if (is_signed) {
7669                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
7670                                              tcg_shift, tcg_fpst);
7671                     } else {
7672                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
7673                                              tcg_shift, tcg_fpst);
7674                     }
7675                 } else {
7676                     if (is_signed) {
7677                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
7678                     } else {
7679                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
7680                     }
7681                 }
7682                 break;
7683             case MO_16:
7684                 if (fracbits) {
7685                     if (is_signed) {
7686                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
7687                                              tcg_shift, tcg_fpst);
7688                     } else {
7689                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
7690                                              tcg_shift, tcg_fpst);
7691                     }
7692                 } else {
7693                     if (is_signed) {
7694                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
7695                     } else {
7696                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
7697                     }
7698                 }
7699                 break;
7700             default:
7701                 g_assert_not_reached();
7702             }
7703
7704             if (elements == 1) {
7705                 write_fp_sreg(s, rd, tcg_float);
7706             } else {
7707                 write_vec_element_i32(s, tcg_float, rd, pass, size);
7708             }
7709         }
7710
7711         tcg_temp_free_i32(tcg_int32);
7712         tcg_temp_free_i32(tcg_float);
7713     }
7714
7715     tcg_temp_free_ptr(tcg_fpst);
7716     if (tcg_shift) {
7717         tcg_temp_free_i32(tcg_shift);
7718     }
7719
7720     clear_vec_high(s, elements << size == 16, rd);
7721 }
7722
7723 /* UCVTF/SCVTF - Integer to FP conversion */
7724 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
7725                                          bool is_q, bool is_u,
7726                                          int immh, int immb, int opcode,
7727                                          int rn, int rd)
7728 {
7729     int size, elements, fracbits;
7730     int immhb = immh << 3 | immb;
7731
7732     if (immh & 8) {
7733         size = MO_64;
7734         if (!is_scalar && !is_q) {
7735             unallocated_encoding(s);
7736             return;
7737         }
7738     } else if (immh & 4) {
7739         size = MO_32;
7740     } else if (immh & 2) {
7741         size = MO_16;
7742         if (!dc_isar_feature(aa64_fp16, s)) {
7743             unallocated_encoding(s);
7744             return;
7745         }
7746     } else {
7747         /* immh == 0 would be a failure of the decode logic */
7748         g_assert(immh == 1);
7749         unallocated_encoding(s);
7750         return;
7751     }
7752
7753     if (is_scalar) {
7754         elements = 1;
7755     } else {
7756         elements = (8 << is_q) >> size;
7757     }
7758     fracbits = (16 << size) - immhb;
7759
7760     if (!fp_access_check(s)) {
7761         return;
7762     }
7763
7764     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
7765 }
7766
7767 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
7768 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
7769                                          bool is_q, bool is_u,
7770                                          int immh, int immb, int rn, int rd)
7771 {
7772     int immhb = immh << 3 | immb;
7773     int pass, size, fracbits;
7774     TCGv_ptr tcg_fpstatus;
7775     TCGv_i32 tcg_rmode, tcg_shift;
7776
7777     if (immh & 0x8) {
7778         size = MO_64;
7779         if (!is_scalar && !is_q) {
7780             unallocated_encoding(s);
7781             return;
7782         }
7783     } else if (immh & 0x4) {
7784         size = MO_32;
7785     } else if (immh & 0x2) {
7786         size = MO_16;
7787         if (!dc_isar_feature(aa64_fp16, s)) {
7788             unallocated_encoding(s);
7789             return;
7790         }
7791     } else {
7792         /* Should have split out AdvSIMD modified immediate earlier.  */
7793         assert(immh == 1);
7794         unallocated_encoding(s);
7795         return;
7796     }
7797
7798     if (!fp_access_check(s)) {
7799         return;
7800     }
7801
7802     assert(!(is_scalar && is_q));
7803
7804     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
7805     tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
7806     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7807     fracbits = (16 << size) - immhb;
7808     tcg_shift = tcg_const_i32(fracbits);
7809
7810     if (size == MO_64) {
7811         int maxpass = is_scalar ? 1 : 2;
7812
7813         for (pass = 0; pass < maxpass; pass++) {
7814             TCGv_i64 tcg_op = tcg_temp_new_i64();
7815
7816             read_vec_element(s, tcg_op, rn, pass, MO_64);
7817             if (is_u) {
7818                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7819             } else {
7820                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7821             }
7822             write_vec_element(s, tcg_op, rd, pass, MO_64);
7823             tcg_temp_free_i64(tcg_op);
7824         }
7825         clear_vec_high(s, is_q, rd);
7826     } else {
7827         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
7828         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
7829
7830         switch (size) {
7831         case MO_16:
7832             if (is_u) {
7833                 fn = gen_helper_vfp_touhh;
7834             } else {
7835                 fn = gen_helper_vfp_toshh;
7836             }
7837             break;
7838         case MO_32:
7839             if (is_u) {
7840                 fn = gen_helper_vfp_touls;
7841             } else {
7842                 fn = gen_helper_vfp_tosls;
7843             }
7844             break;
7845         default:
7846             g_assert_not_reached();
7847         }
7848
7849         for (pass = 0; pass < maxpass; pass++) {
7850             TCGv_i32 tcg_op = tcg_temp_new_i32();
7851
7852             read_vec_element_i32(s, tcg_op, rn, pass, size);
7853             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7854             if (is_scalar) {
7855                 write_fp_sreg(s, rd, tcg_op);
7856             } else {
7857                 write_vec_element_i32(s, tcg_op, rd, pass, size);
7858             }
7859             tcg_temp_free_i32(tcg_op);
7860         }
7861         if (!is_scalar) {
7862             clear_vec_high(s, is_q, rd);
7863         }
7864     }
7865
7866     tcg_temp_free_ptr(tcg_fpstatus);
7867     tcg_temp_free_i32(tcg_shift);
7868     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7869     tcg_temp_free_i32(tcg_rmode);
7870 }
7871
7872 /* AdvSIMD scalar shift by immediate
7873  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
7874  * +-----+---+-------------+------+------+--------+---+------+------+
7875  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
7876  * +-----+---+-------------+------+------+--------+---+------+------+
7877  *
7878  * This is the scalar version so it works on a fixed sized registers
7879  */
7880 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
7881 {
7882     int rd = extract32(insn, 0, 5);
7883     int rn = extract32(insn, 5, 5);
7884     int opcode = extract32(insn, 11, 5);
7885     int immb = extract32(insn, 16, 3);
7886     int immh = extract32(insn, 19, 4);
7887     bool is_u = extract32(insn, 29, 1);
7888
7889     if (immh == 0) {
7890         unallocated_encoding(s);
7891         return;
7892     }
7893
7894     switch (opcode) {
7895     case 0x08: /* SRI */
7896         if (!is_u) {
7897             unallocated_encoding(s);
7898             return;
7899         }
7900         /* fall through */
7901     case 0x00: /* SSHR / USHR */
7902     case 0x02: /* SSRA / USRA */
7903     case 0x04: /* SRSHR / URSHR */
7904     case 0x06: /* SRSRA / URSRA */
7905         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7906         break;
7907     case 0x0a: /* SHL / SLI */
7908         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7909         break;
7910     case 0x1c: /* SCVTF, UCVTF */
7911         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7912                                      opcode, rn, rd);
7913         break;
7914     case 0x10: /* SQSHRUN, SQSHRUN2 */
7915     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7916         if (!is_u) {
7917             unallocated_encoding(s);
7918             return;
7919         }
7920         handle_vec_simd_sqshrn(s, true, false, false, true,
7921                                immh, immb, opcode, rn, rd);
7922         break;
7923     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7924     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7925         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7926                                immh, immb, opcode, rn, rd);
7927         break;
7928     case 0xc: /* SQSHLU */
7929         if (!is_u) {
7930             unallocated_encoding(s);
7931             return;
7932         }
7933         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7934         break;
7935     case 0xe: /* SQSHL, UQSHL */
7936         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7937         break;
7938     case 0x1f: /* FCVTZS, FCVTZU */
7939         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7940         break;
7941     default:
7942         unallocated_encoding(s);
7943         break;
7944     }
7945 }
7946
7947 /* AdvSIMD scalar three different
7948  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7949  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7950  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7951  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7952  */
7953 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7954 {
7955     bool is_u = extract32(insn, 29, 1);
7956     int size = extract32(insn, 22, 2);
7957     int opcode = extract32(insn, 12, 4);
7958     int rm = extract32(insn, 16, 5);
7959     int rn = extract32(insn, 5, 5);
7960     int rd = extract32(insn, 0, 5);
7961
7962     if (is_u) {
7963         unallocated_encoding(s);
7964         return;
7965     }
7966
7967     switch (opcode) {
7968     case 0x9: /* SQDMLAL, SQDMLAL2 */
7969     case 0xb: /* SQDMLSL, SQDMLSL2 */
7970     case 0xd: /* SQDMULL, SQDMULL2 */
7971         if (size == 0 || size == 3) {
7972             unallocated_encoding(s);
7973             return;
7974         }
7975         break;
7976     default:
7977         unallocated_encoding(s);
7978         return;
7979     }
7980
7981     if (!fp_access_check(s)) {
7982         return;
7983     }
7984
7985     if (size == 2) {
7986         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7987         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7988         TCGv_i64 tcg_res = tcg_temp_new_i64();
7989
7990         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7991         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7992
7993         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7994         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7995
7996         switch (opcode) {
7997         case 0xd: /* SQDMULL, SQDMULL2 */
7998             break;
7999         case 0xb: /* SQDMLSL, SQDMLSL2 */
8000             tcg_gen_neg_i64(tcg_res, tcg_res);
8001             /* fall through */
8002         case 0x9: /* SQDMLAL, SQDMLAL2 */
8003             read_vec_element(s, tcg_op1, rd, 0, MO_64);
8004             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8005                                               tcg_res, tcg_op1);
8006             break;
8007         default:
8008             g_assert_not_reached();
8009         }
8010
8011         write_fp_dreg(s, rd, tcg_res);
8012
8013         tcg_temp_free_i64(tcg_op1);
8014         tcg_temp_free_i64(tcg_op2);
8015         tcg_temp_free_i64(tcg_res);
8016     } else {
8017         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8018         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8019         TCGv_i64 tcg_res = tcg_temp_new_i64();
8020
8021         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8022         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8023
8024         switch (opcode) {
8025         case 0xd: /* SQDMULL, SQDMULL2 */
8026             break;
8027         case 0xb: /* SQDMLSL, SQDMLSL2 */
8028             gen_helper_neon_negl_u32(tcg_res, tcg_res);
8029             /* fall through */
8030         case 0x9: /* SQDMLAL, SQDMLAL2 */
8031         {
8032             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8033             read_vec_element(s, tcg_op3, rd, 0, MO_32);
8034             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8035                                               tcg_res, tcg_op3);
8036             tcg_temp_free_i64(tcg_op3);
8037             break;
8038         }
8039         default:
8040             g_assert_not_reached();
8041         }
8042
8043         tcg_gen_ext32u_i64(tcg_res, tcg_res);
8044         write_fp_dreg(s, rd, tcg_res);
8045
8046         tcg_temp_free_i32(tcg_op1);
8047         tcg_temp_free_i32(tcg_op2);
8048         tcg_temp_free_i64(tcg_res);
8049     }
8050 }
8051
8052 static void handle_3same_64(DisasContext *s, int opcode, bool u,
8053                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8054 {
8055     /* Handle 64x64->64 opcodes which are shared between the scalar
8056      * and vector 3-same groups. We cover every opcode where size == 3
8057      * is valid in either the three-reg-same (integer, not pairwise)
8058      * or scalar-three-reg-same groups.
8059      */
8060     TCGCond cond;
8061
8062     switch (opcode) {
8063     case 0x1: /* SQADD */
8064         if (u) {
8065             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8066         } else {
8067             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8068         }
8069         break;
8070     case 0x5: /* SQSUB */
8071         if (u) {
8072             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8073         } else {
8074             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8075         }
8076         break;
8077     case 0x6: /* CMGT, CMHI */
8078         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8079          * We implement this using setcond (test) and then negating.
8080          */
8081         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8082     do_cmop:
8083         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8084         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8085         break;
8086     case 0x7: /* CMGE, CMHS */
8087         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8088         goto do_cmop;
8089     case 0x11: /* CMTST, CMEQ */
8090         if (u) {
8091             cond = TCG_COND_EQ;
8092             goto do_cmop;
8093         }
8094         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8095         break;
8096     case 0x8: /* SSHL, USHL */
8097         if (u) {
8098             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
8099         } else {
8100             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
8101         }
8102         break;
8103     case 0x9: /* SQSHL, UQSHL */
8104         if (u) {
8105             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8106         } else {
8107             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8108         }
8109         break;
8110     case 0xa: /* SRSHL, URSHL */
8111         if (u) {
8112             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8113         } else {
8114             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8115         }
8116         break;
8117     case 0xb: /* SQRSHL, UQRSHL */
8118         if (u) {
8119             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8120         } else {
8121             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8122         }
8123         break;
8124     case 0x10: /* ADD, SUB */
8125         if (u) {
8126             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8127         } else {
8128             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8129         }
8130         break;
8131     default:
8132         g_assert_not_reached();
8133     }
8134 }
8135
8136 /* Handle the 3-same-operands float operations; shared by the scalar
8137  * and vector encodings. The caller must filter out any encodings
8138  * not allocated for the encoding it is dealing with.
8139  */
8140 static void handle_3same_float(DisasContext *s, int size, int elements,
8141                                int fpopcode, int rd, int rn, int rm)
8142 {
8143     int pass;
8144     TCGv_ptr fpst = get_fpstatus_ptr(false);
8145
8146     for (pass = 0; pass < elements; pass++) {
8147         if (size) {
8148             /* Double */
8149             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8150             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8151             TCGv_i64 tcg_res = tcg_temp_new_i64();
8152
8153             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8154             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8155
8156             switch (fpopcode) {
8157             case 0x39: /* FMLS */
8158                 /* As usual for ARM, separate negation for fused multiply-add */
8159                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8160                 /* fall through */
8161             case 0x19: /* FMLA */
8162                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8163                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8164                                        tcg_res, fpst);
8165                 break;
8166             case 0x18: /* FMAXNM */
8167                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8168                 break;
8169             case 0x1a: /* FADD */
8170                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8171                 break;
8172             case 0x1b: /* FMULX */
8173                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8174                 break;
8175             case 0x1c: /* FCMEQ */
8176                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8177                 break;
8178             case 0x1e: /* FMAX */
8179                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8180                 break;
8181             case 0x1f: /* FRECPS */
8182                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8183                 break;
8184             case 0x38: /* FMINNM */
8185                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8186                 break;
8187             case 0x3a: /* FSUB */
8188                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8189                 break;
8190             case 0x3e: /* FMIN */
8191                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8192                 break;
8193             case 0x3f: /* FRSQRTS */
8194                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8195                 break;
8196             case 0x5b: /* FMUL */
8197                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8198                 break;
8199             case 0x5c: /* FCMGE */
8200                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8201                 break;
8202             case 0x5d: /* FACGE */
8203                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8204                 break;
8205             case 0x5f: /* FDIV */
8206                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8207                 break;
8208             case 0x7a: /* FABD */
8209                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8210                 gen_helper_vfp_absd(tcg_res, tcg_res);
8211                 break;
8212             case 0x7c: /* FCMGT */
8213                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8214                 break;
8215             case 0x7d: /* FACGT */
8216                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8217                 break;
8218             default:
8219                 g_assert_not_reached();
8220             }
8221
8222             write_vec_element(s, tcg_res, rd, pass, MO_64);
8223
8224             tcg_temp_free_i64(tcg_res);
8225             tcg_temp_free_i64(tcg_op1);
8226             tcg_temp_free_i64(tcg_op2);
8227         } else {
8228             /* Single */
8229             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8230             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8231             TCGv_i32 tcg_res = tcg_temp_new_i32();
8232
8233             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8234             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8235
8236             switch (fpopcode) {
8237             case 0x39: /* FMLS */
8238                 /* As usual for ARM, separate negation for fused multiply-add */
8239                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
8240                 /* fall through */
8241             case 0x19: /* FMLA */
8242                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8243                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8244                                        tcg_res, fpst);
8245                 break;
8246             case 0x1a: /* FADD */
8247                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8248                 break;
8249             case 0x1b: /* FMULX */
8250                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8251                 break;
8252             case 0x1c: /* FCMEQ */
8253                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8254                 break;
8255             case 0x1e: /* FMAX */
8256                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8257                 break;
8258             case 0x1f: /* FRECPS */
8259                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8260                 break;
8261             case 0x18: /* FMAXNM */
8262                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8263                 break;
8264             case 0x38: /* FMINNM */
8265                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8266                 break;
8267             case 0x3a: /* FSUB */
8268                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8269                 break;
8270             case 0x3e: /* FMIN */
8271                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8272                 break;
8273             case 0x3f: /* FRSQRTS */
8274                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8275                 break;
8276             case 0x5b: /* FMUL */
8277                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8278                 break;
8279             case 0x5c: /* FCMGE */
8280                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8281                 break;
8282             case 0x5d: /* FACGE */
8283                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8284                 break;
8285             case 0x5f: /* FDIV */
8286                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8287                 break;
8288             case 0x7a: /* FABD */
8289                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8290                 gen_helper_vfp_abss(tcg_res, tcg_res);
8291                 break;
8292             case 0x7c: /* FCMGT */
8293                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8294                 break;
8295             case 0x7d: /* FACGT */
8296                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8297                 break;
8298             default:
8299                 g_assert_not_reached();
8300             }
8301
8302             if (elements == 1) {
8303                 /* scalar single so clear high part */
8304                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8305
8306                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
8307                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
8308                 tcg_temp_free_i64(tcg_tmp);
8309             } else {
8310                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8311             }
8312
8313             tcg_temp_free_i32(tcg_res);
8314             tcg_temp_free_i32(tcg_op1);
8315             tcg_temp_free_i32(tcg_op2);
8316         }
8317     }
8318
8319     tcg_temp_free_ptr(fpst);
8320
8321     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
8322 }
8323
8324 /* AdvSIMD scalar three same
8325  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
8326  * +-----+---+-----------+------+---+------+--------+---+------+------+
8327  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
8328  * +-----+---+-----------+------+---+------+--------+---+------+------+
8329  */
8330 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
8331 {
8332     int rd = extract32(insn, 0, 5);
8333     int rn = extract32(insn, 5, 5);
8334     int opcode = extract32(insn, 11, 5);
8335     int rm = extract32(insn, 16, 5);
8336     int size = extract32(insn, 22, 2);
8337     bool u = extract32(insn, 29, 1);
8338     TCGv_i64 tcg_rd;
8339
8340     if (opcode >= 0x18) {
8341         /* Floating point: U, size[1] and opcode indicate operation */
8342         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
8343         switch (fpopcode) {
8344         case 0x1b: /* FMULX */
8345         case 0x1f: /* FRECPS */
8346         case 0x3f: /* FRSQRTS */
8347         case 0x5d: /* FACGE */
8348         case 0x7d: /* FACGT */
8349         case 0x1c: /* FCMEQ */
8350         case 0x5c: /* FCMGE */
8351         case 0x7c: /* FCMGT */
8352         case 0x7a: /* FABD */
8353             break;
8354         default:
8355             unallocated_encoding(s);
8356             return;
8357         }
8358
8359         if (!fp_access_check(s)) {
8360             return;
8361         }
8362
8363         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
8364         return;
8365     }
8366
8367     switch (opcode) {
8368     case 0x1: /* SQADD, UQADD */
8369     case 0x5: /* SQSUB, UQSUB */
8370     case 0x9: /* SQSHL, UQSHL */
8371     case 0xb: /* SQRSHL, UQRSHL */
8372         break;
8373     case 0x8: /* SSHL, USHL */
8374     case 0xa: /* SRSHL, URSHL */
8375     case 0x6: /* CMGT, CMHI */
8376     case 0x7: /* CMGE, CMHS */
8377     case 0x11: /* CMTST, CMEQ */
8378     case 0x10: /* ADD, SUB (vector) */
8379         if (size != 3) {
8380             unallocated_encoding(s);
8381             return;
8382         }
8383         break;
8384     case 0x16: /* SQDMULH, SQRDMULH (vector) */
8385         if (size != 1 && size != 2) {
8386             unallocated_encoding(s);
8387             return;
8388         }
8389         break;
8390     default:
8391         unallocated_encoding(s);
8392         return;
8393     }
8394
8395     if (!fp_access_check(s)) {
8396         return;
8397     }
8398
8399     tcg_rd = tcg_temp_new_i64();
8400
8401     if (size == 3) {
8402         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8403         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
8404
8405         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
8406         tcg_temp_free_i64(tcg_rn);
8407         tcg_temp_free_i64(tcg_rm);
8408     } else {
8409         /* Do a single operation on the lowest element in the vector.
8410          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
8411          * no side effects for all these operations.
8412          * OPTME: special-purpose helpers would avoid doing some
8413          * unnecessary work in the helper for the 8 and 16 bit cases.
8414          */
8415         NeonGenTwoOpEnvFn *genenvfn;
8416         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8417         TCGv_i32 tcg_rm = tcg_temp_new_i32();
8418         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
8419
8420         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8421         read_vec_element_i32(s, tcg_rm, rm, 0, size);
8422
8423         switch (opcode) {
8424         case 0x1: /* SQADD, UQADD */
8425         {
8426             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8427                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
8428                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
8429                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
8430             };
8431             genenvfn = fns[size][u];
8432             break;
8433         }
8434         case 0x5: /* SQSUB, UQSUB */
8435         {
8436             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8437                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
8438                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
8439                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
8440             };
8441             genenvfn = fns[size][u];
8442             break;
8443         }
8444         case 0x9: /* SQSHL, UQSHL */
8445         {
8446             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8447                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
8448                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
8449                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
8450             };
8451             genenvfn = fns[size][u];
8452             break;
8453         }
8454         case 0xb: /* SQRSHL, UQRSHL */
8455         {
8456             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8457                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
8458                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
8459                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
8460             };
8461             genenvfn = fns[size][u];
8462             break;
8463         }
8464         case 0x16: /* SQDMULH, SQRDMULH */
8465         {
8466             static NeonGenTwoOpEnvFn * const fns[2][2] = {
8467                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
8468                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
8469             };
8470             assert(size == 1 || size == 2);
8471             genenvfn = fns[size - 1][u];
8472             break;
8473         }
8474         default:
8475             g_assert_not_reached();
8476         }
8477
8478         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
8479         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
8480         tcg_temp_free_i32(tcg_rd32);
8481         tcg_temp_free_i32(tcg_rn);
8482         tcg_temp_free_i32(tcg_rm);
8483     }
8484
8485     write_fp_dreg(s, rd, tcg_rd);
8486
8487     tcg_temp_free_i64(tcg_rd);
8488 }
8489
8490 /* AdvSIMD scalar three same FP16
8491  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
8492  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
8493  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
8494  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
8495  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
8496  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
8497  */
8498 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
8499                                                   uint32_t insn)
8500 {
8501     int rd = extract32(insn, 0, 5);
8502     int rn = extract32(insn, 5, 5);
8503     int opcode = extract32(insn, 11, 3);
8504     int rm = extract32(insn, 16, 5);
8505     bool u = extract32(insn, 29, 1);
8506     bool a = extract32(insn, 23, 1);
8507     int fpopcode = opcode | (a << 3) |  (u << 4);
8508     TCGv_ptr fpst;
8509     TCGv_i32 tcg_op1;
8510     TCGv_i32 tcg_op2;
8511     TCGv_i32 tcg_res;
8512
8513     switch (fpopcode) {
8514     case 0x03: /* FMULX */
8515     case 0x04: /* FCMEQ (reg) */
8516     case 0x07: /* FRECPS */
8517     case 0x0f: /* FRSQRTS */
8518     case 0x14: /* FCMGE (reg) */
8519     case 0x15: /* FACGE */
8520     case 0x1a: /* FABD */
8521     case 0x1c: /* FCMGT (reg) */
8522     case 0x1d: /* FACGT */
8523         break;
8524     default:
8525         unallocated_encoding(s);
8526         return;
8527     }
8528
8529     if (!dc_isar_feature(aa64_fp16, s)) {
8530         unallocated_encoding(s);
8531     }
8532
8533     if (!fp_access_check(s)) {
8534         return;
8535     }
8536
8537     fpst = get_fpstatus_ptr(true);
8538
8539     tcg_op1 = read_fp_hreg(s, rn);
8540     tcg_op2 = read_fp_hreg(s, rm);
8541     tcg_res = tcg_temp_new_i32();
8542
8543     switch (fpopcode) {
8544     case 0x03: /* FMULX */
8545         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
8546         break;
8547     case 0x04: /* FCMEQ (reg) */
8548         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8549         break;
8550     case 0x07: /* FRECPS */
8551         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8552         break;
8553     case 0x0f: /* FRSQRTS */
8554         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8555         break;
8556     case 0x14: /* FCMGE (reg) */
8557         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8558         break;
8559     case 0x15: /* FACGE */
8560         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8561         break;
8562     case 0x1a: /* FABD */
8563         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
8564         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
8565         break;
8566     case 0x1c: /* FCMGT (reg) */
8567         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8568         break;
8569     case 0x1d: /* FACGT */
8570         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8571         break;
8572     default:
8573         g_assert_not_reached();
8574     }
8575
8576     write_fp_sreg(s, rd, tcg_res);
8577
8578
8579     tcg_temp_free_i32(tcg_res);
8580     tcg_temp_free_i32(tcg_op1);
8581     tcg_temp_free_i32(tcg_op2);
8582     tcg_temp_free_ptr(fpst);
8583 }
8584
8585 /* AdvSIMD scalar three same extra
8586  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
8587  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8588  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
8589  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8590  */
8591 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
8592                                                    uint32_t insn)
8593 {
8594     int rd = extract32(insn, 0, 5);
8595     int rn = extract32(insn, 5, 5);
8596     int opcode = extract32(insn, 11, 4);
8597     int rm = extract32(insn, 16, 5);
8598     int size = extract32(insn, 22, 2);
8599     bool u = extract32(insn, 29, 1);
8600     TCGv_i32 ele1, ele2, ele3;
8601     TCGv_i64 res;
8602     bool feature;
8603
8604     switch (u * 16 + opcode) {
8605     case 0x10: /* SQRDMLAH (vector) */
8606     case 0x11: /* SQRDMLSH (vector) */
8607         if (size != 1 && size != 2) {
8608             unallocated_encoding(s);
8609             return;
8610         }
8611         feature = dc_isar_feature(aa64_rdm, s);
8612         break;
8613     default:
8614         unallocated_encoding(s);
8615         return;
8616     }
8617     if (!feature) {
8618         unallocated_encoding(s);
8619         return;
8620     }
8621     if (!fp_access_check(s)) {
8622         return;
8623     }
8624
8625     /* Do a single operation on the lowest element in the vector.
8626      * We use the standard Neon helpers and rely on 0 OP 0 == 0
8627      * with no side effects for all these operations.
8628      * OPTME: special-purpose helpers would avoid doing some
8629      * unnecessary work in the helper for the 16 bit cases.
8630      */
8631     ele1 = tcg_temp_new_i32();
8632     ele2 = tcg_temp_new_i32();
8633     ele3 = tcg_temp_new_i32();
8634
8635     read_vec_element_i32(s, ele1, rn, 0, size);
8636     read_vec_element_i32(s, ele2, rm, 0, size);
8637     read_vec_element_i32(s, ele3, rd, 0, size);
8638
8639     switch (opcode) {
8640     case 0x0: /* SQRDMLAH */
8641         if (size == 1) {
8642             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
8643         } else {
8644             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
8645         }
8646         break;
8647     case 0x1: /* SQRDMLSH */
8648         if (size == 1) {
8649             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
8650         } else {
8651             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
8652         }
8653         break;
8654     default:
8655         g_assert_not_reached();
8656     }
8657     tcg_temp_free_i32(ele1);
8658     tcg_temp_free_i32(ele2);
8659
8660     res = tcg_temp_new_i64();
8661     tcg_gen_extu_i32_i64(res, ele3);
8662     tcg_temp_free_i32(ele3);
8663
8664     write_fp_dreg(s, rd, res);
8665     tcg_temp_free_i64(res);
8666 }
8667
8668 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
8669                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
8670                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
8671 {
8672     /* Handle 64->64 opcodes which are shared between the scalar and
8673      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
8674      * is valid in either group and also the double-precision fp ops.
8675      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
8676      * requires them.
8677      */
8678     TCGCond cond;
8679
8680     switch (opcode) {
8681     case 0x4: /* CLS, CLZ */
8682         if (u) {
8683             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8684         } else {
8685             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
8686         }
8687         break;
8688     case 0x5: /* NOT */
8689         /* This opcode is shared with CNT and RBIT but we have earlier
8690          * enforced that size == 3 if and only if this is the NOT insn.
8691          */
8692         tcg_gen_not_i64(tcg_rd, tcg_rn);
8693         break;
8694     case 0x7: /* SQABS, SQNEG */
8695         if (u) {
8696             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
8697         } else {
8698             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
8699         }
8700         break;
8701     case 0xa: /* CMLT */
8702         /* 64 bit integer comparison against zero, result is
8703          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
8704          * subtracting 1.
8705          */
8706         cond = TCG_COND_LT;
8707     do_cmop:
8708         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
8709         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8710         break;
8711     case 0x8: /* CMGT, CMGE */
8712         cond = u ? TCG_COND_GE : TCG_COND_GT;
8713         goto do_cmop;
8714     case 0x9: /* CMEQ, CMLE */
8715         cond = u ? TCG_COND_LE : TCG_COND_EQ;
8716         goto do_cmop;
8717     case 0xb: /* ABS, NEG */
8718         if (u) {
8719             tcg_gen_neg_i64(tcg_rd, tcg_rn);
8720         } else {
8721             TCGv_i64 tcg_zero = tcg_const_i64(0);
8722             tcg_gen_neg_i64(tcg_rd, tcg_rn);
8723             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
8724                                 tcg_rn, tcg_rd);
8725             tcg_temp_free_i64(tcg_zero);
8726         }
8727         break;
8728     case 0x2f: /* FABS */
8729         gen_helper_vfp_absd(tcg_rd, tcg_rn);
8730         break;
8731     case 0x6f: /* FNEG */
8732         gen_helper_vfp_negd(tcg_rd, tcg_rn);
8733         break;
8734     case 0x7f: /* FSQRT */
8735         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
8736         break;
8737     case 0x1a: /* FCVTNS */
8738     case 0x1b: /* FCVTMS */
8739     case 0x1c: /* FCVTAS */
8740     case 0x3a: /* FCVTPS */
8741     case 0x3b: /* FCVTZS */
8742     {
8743         TCGv_i32 tcg_shift = tcg_const_i32(0);
8744         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8745         tcg_temp_free_i32(tcg_shift);
8746         break;
8747     }
8748     case 0x5a: /* FCVTNU */
8749     case 0x5b: /* FCVTMU */
8750     case 0x5c: /* FCVTAU */
8751     case 0x7a: /* FCVTPU */
8752     case 0x7b: /* FCVTZU */
8753     {
8754         TCGv_i32 tcg_shift = tcg_const_i32(0);
8755         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8756         tcg_temp_free_i32(tcg_shift);
8757         break;
8758     }
8759     case 0x18: /* FRINTN */
8760     case 0x19: /* FRINTM */
8761     case 0x38: /* FRINTP */
8762     case 0x39: /* FRINTZ */
8763     case 0x58: /* FRINTA */
8764     case 0x79: /* FRINTI */
8765         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
8766         break;
8767     case 0x59: /* FRINTX */
8768         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
8769         break;
8770     default:
8771         g_assert_not_reached();
8772     }
8773 }
8774
8775 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
8776                                    bool is_scalar, bool is_u, bool is_q,
8777                                    int size, int rn, int rd)
8778 {
8779     bool is_double = (size == MO_64);
8780     TCGv_ptr fpst;
8781
8782     if (!fp_access_check(s)) {
8783         return;
8784     }
8785
8786     fpst = get_fpstatus_ptr(size == MO_16);
8787
8788     if (is_double) {
8789         TCGv_i64 tcg_op = tcg_temp_new_i64();
8790         TCGv_i64 tcg_zero = tcg_const_i64(0);
8791         TCGv_i64 tcg_res = tcg_temp_new_i64();
8792         NeonGenTwoDoubleOPFn *genfn;
8793         bool swap = false;
8794         int pass;
8795
8796         switch (opcode) {
8797         case 0x2e: /* FCMLT (zero) */
8798             swap = true;
8799             /* fallthrough */
8800         case 0x2c: /* FCMGT (zero) */
8801             genfn = gen_helper_neon_cgt_f64;
8802             break;
8803         case 0x2d: /* FCMEQ (zero) */
8804             genfn = gen_helper_neon_ceq_f64;
8805             break;
8806         case 0x6d: /* FCMLE (zero) */
8807             swap = true;
8808             /* fall through */
8809         case 0x6c: /* FCMGE (zero) */
8810             genfn = gen_helper_neon_cge_f64;
8811             break;
8812         default:
8813             g_assert_not_reached();
8814         }
8815
8816         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8817             read_vec_element(s, tcg_op, rn, pass, MO_64);
8818             if (swap) {
8819                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
8820             } else {
8821                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
8822             }
8823             write_vec_element(s, tcg_res, rd, pass, MO_64);
8824         }
8825         tcg_temp_free_i64(tcg_res);
8826         tcg_temp_free_i64(tcg_zero);
8827         tcg_temp_free_i64(tcg_op);
8828
8829         clear_vec_high(s, !is_scalar, rd);
8830     } else {
8831         TCGv_i32 tcg_op = tcg_temp_new_i32();
8832         TCGv_i32 tcg_zero = tcg_const_i32(0);
8833         TCGv_i32 tcg_res = tcg_temp_new_i32();
8834         NeonGenTwoSingleOPFn *genfn;
8835         bool swap = false;
8836         int pass, maxpasses;
8837
8838         if (size == MO_16) {
8839             switch (opcode) {
8840             case 0x2e: /* FCMLT (zero) */
8841                 swap = true;
8842                 /* fall through */
8843             case 0x2c: /* FCMGT (zero) */
8844                 genfn = gen_helper_advsimd_cgt_f16;
8845                 break;
8846             case 0x2d: /* FCMEQ (zero) */
8847                 genfn = gen_helper_advsimd_ceq_f16;
8848                 break;
8849             case 0x6d: /* FCMLE (zero) */
8850                 swap = true;
8851                 /* fall through */
8852             case 0x6c: /* FCMGE (zero) */
8853                 genfn = gen_helper_advsimd_cge_f16;
8854                 break;
8855             default:
8856                 g_assert_not_reached();
8857             }
8858         } else {
8859             switch (opcode) {
8860             case 0x2e: /* FCMLT (zero) */
8861                 swap = true;
8862                 /* fall through */
8863             case 0x2c: /* FCMGT (zero) */
8864                 genfn = gen_helper_neon_cgt_f32;
8865                 break;
8866             case 0x2d: /* FCMEQ (zero) */
8867                 genfn = gen_helper_neon_ceq_f32;
8868                 break;
8869             case 0x6d: /* FCMLE (zero) */
8870                 swap = true;
8871                 /* fall through */
8872             case 0x6c: /* FCMGE (zero) */
8873                 genfn = gen_helper_neon_cge_f32;
8874                 break;
8875             default:
8876                 g_assert_not_reached();
8877             }
8878         }
8879
8880         if (is_scalar) {
8881             maxpasses = 1;
8882         } else {
8883             int vector_size = 8 << is_q;
8884             maxpasses = vector_size >> size;
8885         }
8886
8887         for (pass = 0; pass < maxpasses; pass++) {
8888             read_vec_element_i32(s, tcg_op, rn, pass, size);
8889             if (swap) {
8890                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
8891             } else {
8892                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
8893             }
8894             if (is_scalar) {
8895                 write_fp_sreg(s, rd, tcg_res);
8896             } else {
8897                 write_vec_element_i32(s, tcg_res, rd, pass, size);
8898             }
8899         }
8900         tcg_temp_free_i32(tcg_res);
8901         tcg_temp_free_i32(tcg_zero);
8902         tcg_temp_free_i32(tcg_op);
8903         if (!is_scalar) {
8904             clear_vec_high(s, is_q, rd);
8905         }
8906     }
8907
8908     tcg_temp_free_ptr(fpst);
8909 }
8910
8911 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
8912                                     bool is_scalar, bool is_u, bool is_q,
8913                                     int size, int rn, int rd)
8914 {
8915     bool is_double = (size == 3);
8916     TCGv_ptr fpst = get_fpstatus_ptr(false);
8917
8918     if (is_double) {
8919         TCGv_i64 tcg_op = tcg_temp_new_i64();
8920         TCGv_i64 tcg_res = tcg_temp_new_i64();
8921         int pass;
8922
8923         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8924             read_vec_element(s, tcg_op, rn, pass, MO_64);
8925             switch (opcode) {
8926             case 0x3d: /* FRECPE */
8927                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
8928                 break;
8929             case 0x3f: /* FRECPX */
8930                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
8931                 break;
8932             case 0x7d: /* FRSQRTE */
8933                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
8934                 break;
8935             default:
8936                 g_assert_not_reached();
8937             }
8938             write_vec_element(s, tcg_res, rd, pass, MO_64);
8939         }
8940         tcg_temp_free_i64(tcg_res);
8941         tcg_temp_free_i64(tcg_op);
8942         clear_vec_high(s, !is_scalar, rd);
8943     } else {
8944         TCGv_i32 tcg_op = tcg_temp_new_i32();
8945         TCGv_i32 tcg_res = tcg_temp_new_i32();
8946         int pass, maxpasses;
8947
8948         if (is_scalar) {
8949             maxpasses = 1;
8950         } else {
8951             maxpasses = is_q ? 4 : 2;
8952         }
8953
8954         for (pass = 0; pass < maxpasses; pass++) {
8955             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
8956
8957             switch (opcode) {
8958             case 0x3c: /* URECPE */
8959                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
8960                 break;
8961             case 0x3d: /* FRECPE */
8962                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
8963                 break;
8964             case 0x3f: /* FRECPX */
8965                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
8966                 break;
8967             case 0x7d: /* FRSQRTE */
8968                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
8969                 break;
8970             default:
8971                 g_assert_not_reached();
8972             }
8973
8974             if (is_scalar) {
8975                 write_fp_sreg(s, rd, tcg_res);
8976             } else {
8977                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8978             }
8979         }
8980         tcg_temp_free_i32(tcg_res);
8981         tcg_temp_free_i32(tcg_op);
8982         if (!is_scalar) {
8983             clear_vec_high(s, is_q, rd);
8984         }
8985     }
8986     tcg_temp_free_ptr(fpst);
8987 }
8988
8989 static void handle_2misc_narrow(DisasContext *s, bool scalar,
8990                                 int opcode, bool u, bool is_q,
8991                                 int size, int rn, int rd)
8992 {
8993     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
8994      * in the source becomes a size element in the destination).
8995      */
8996     int pass;
8997     TCGv_i32 tcg_res[2];
8998     int destelt = is_q ? 2 : 0;
8999     int passes = scalar ? 1 : 2;
9000
9001     if (scalar) {
9002         tcg_res[1] = tcg_const_i32(0);
9003     }
9004
9005     for (pass = 0; pass < passes; pass++) {
9006         TCGv_i64 tcg_op = tcg_temp_new_i64();
9007         NeonGenNarrowFn *genfn = NULL;
9008         NeonGenNarrowEnvFn *genenvfn = NULL;
9009
9010         if (scalar) {
9011             read_vec_element(s, tcg_op, rn, pass, size + 1);
9012         } else {
9013             read_vec_element(s, tcg_op, rn, pass, MO_64);
9014         }
9015         tcg_res[pass] = tcg_temp_new_i32();
9016
9017         switch (opcode) {
9018         case 0x12: /* XTN, SQXTUN */
9019         {
9020             static NeonGenNarrowFn * const xtnfns[3] = {
9021                 gen_helper_neon_narrow_u8,
9022                 gen_helper_neon_narrow_u16,
9023                 tcg_gen_extrl_i64_i32,
9024             };
9025             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9026                 gen_helper_neon_unarrow_sat8,
9027                 gen_helper_neon_unarrow_sat16,
9028                 gen_helper_neon_unarrow_sat32,
9029             };
9030             if (u) {
9031                 genenvfn = sqxtunfns[size];
9032             } else {
9033                 genfn = xtnfns[size];
9034             }
9035             break;
9036         }
9037         case 0x14: /* SQXTN, UQXTN */
9038         {
9039             static NeonGenNarrowEnvFn * const fns[3][2] = {
9040                 { gen_helper_neon_narrow_sat_s8,
9041                   gen_helper_neon_narrow_sat_u8 },
9042                 { gen_helper_neon_narrow_sat_s16,
9043                   gen_helper_neon_narrow_sat_u16 },
9044                 { gen_helper_neon_narrow_sat_s32,
9045                   gen_helper_neon_narrow_sat_u32 },
9046             };
9047             genenvfn = fns[size][u];
9048             break;
9049         }
9050         case 0x16: /* FCVTN, FCVTN2 */
9051             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9052             if (size == 2) {
9053                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9054             } else {
9055                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
9056                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
9057                 TCGv_ptr fpst = get_fpstatus_ptr(false);
9058                 TCGv_i32 ahp = get_ahp_flag();
9059
9060                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9061                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9062                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9063                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9064                 tcg_temp_free_i32(tcg_lo);
9065                 tcg_temp_free_i32(tcg_hi);
9066                 tcg_temp_free_ptr(fpst);
9067                 tcg_temp_free_i32(ahp);
9068             }
9069             break;
9070         case 0x56:  /* FCVTXN, FCVTXN2 */
9071             /* 64 bit to 32 bit float conversion
9072              * with von Neumann rounding (round to odd)
9073              */
9074             assert(size == 2);
9075             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9076             break;
9077         default:
9078             g_assert_not_reached();
9079         }
9080
9081         if (genfn) {
9082             genfn(tcg_res[pass], tcg_op);
9083         } else if (genenvfn) {
9084             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9085         }
9086
9087         tcg_temp_free_i64(tcg_op);
9088     }
9089
9090     for (pass = 0; pass < 2; pass++) {
9091         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9092         tcg_temp_free_i32(tcg_res[pass]);
9093     }
9094     clear_vec_high(s, is_q, rd);
9095 }
9096
9097 /* Remaining saturating accumulating ops */
9098 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9099                                 bool is_q, int size, int rn, int rd)
9100 {
9101     bool is_double = (size == 3);
9102
9103     if (is_double) {
9104         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9105         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9106         int pass;
9107
9108         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9109             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9110             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9111
9112             if (is_u) { /* USQADD */
9113                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9114             } else { /* SUQADD */
9115                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9116             }
9117             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9118         }
9119         tcg_temp_free_i64(tcg_rd);
9120         tcg_temp_free_i64(tcg_rn);
9121         clear_vec_high(s, !is_scalar, rd);
9122     } else {
9123         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9124         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9125         int pass, maxpasses;
9126
9127         if (is_scalar) {
9128             maxpasses = 1;
9129         } else {
9130             maxpasses = is_q ? 4 : 2;
9131         }
9132
9133         for (pass = 0; pass < maxpasses; pass++) {
9134             if (is_scalar) {
9135                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9136                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9137             } else {
9138                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9139                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9140             }
9141
9142             if (is_u) { /* USQADD */
9143                 switch (size) {
9144                 case 0:
9145                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9146                     break;
9147                 case 1:
9148                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9149                     break;
9150                 case 2:
9151                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9152                     break;
9153                 default:
9154                     g_assert_not_reached();
9155                 }
9156             } else { /* SUQADD */
9157                 switch (size) {
9158                 case 0:
9159                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9160                     break;
9161                 case 1:
9162                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9163                     break;
9164                 case 2:
9165                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9166                     break;
9167                 default:
9168                     g_assert_not_reached();
9169                 }
9170             }
9171
9172             if (is_scalar) {
9173                 TCGv_i64 tcg_zero = tcg_const_i64(0);
9174                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
9175                 tcg_temp_free_i64(tcg_zero);
9176             }
9177             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9178         }
9179         tcg_temp_free_i32(tcg_rd);
9180         tcg_temp_free_i32(tcg_rn);
9181         clear_vec_high(s, is_q, rd);
9182     }
9183 }
9184
9185 /* AdvSIMD scalar two reg misc
9186  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9187  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9188  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9189  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9190  */
9191 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9192 {
9193     int rd = extract32(insn, 0, 5);
9194     int rn = extract32(insn, 5, 5);
9195     int opcode = extract32(insn, 12, 5);
9196     int size = extract32(insn, 22, 2);
9197     bool u = extract32(insn, 29, 1);
9198     bool is_fcvt = false;
9199     int rmode;
9200     TCGv_i32 tcg_rmode;
9201     TCGv_ptr tcg_fpstatus;
9202
9203     switch (opcode) {
9204     case 0x3: /* USQADD / SUQADD*/
9205         if (!fp_access_check(s)) {
9206             return;
9207         }
9208         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9209         return;
9210     case 0x7: /* SQABS / SQNEG */
9211         break;
9212     case 0xa: /* CMLT */
9213         if (u) {
9214             unallocated_encoding(s);
9215             return;
9216         }
9217         /* fall through */
9218     case 0x8: /* CMGT, CMGE */
9219     case 0x9: /* CMEQ, CMLE */
9220     case 0xb: /* ABS, NEG */
9221         if (size != 3) {
9222             unallocated_encoding(s);
9223             return;
9224         }
9225         break;
9226     case 0x12: /* SQXTUN */
9227         if (!u) {
9228             unallocated_encoding(s);
9229             return;
9230         }
9231         /* fall through */
9232     case 0x14: /* SQXTN, UQXTN */
9233         if (size == 3) {
9234             unallocated_encoding(s);
9235             return;
9236         }
9237         if (!fp_access_check(s)) {
9238             return;
9239         }
9240         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9241         return;
9242     case 0xc ... 0xf:
9243     case 0x16 ... 0x1d:
9244     case 0x1f:
9245         /* Floating point: U, size[1] and opcode indicate operation;
9246          * size[0] indicates single or double precision.
9247          */
9248         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9249         size = extract32(size, 0, 1) ? 3 : 2;
9250         switch (opcode) {
9251         case 0x2c: /* FCMGT (zero) */
9252         case 0x2d: /* FCMEQ (zero) */
9253         case 0x2e: /* FCMLT (zero) */
9254         case 0x6c: /* FCMGE (zero) */
9255         case 0x6d: /* FCMLE (zero) */
9256             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9257             return;
9258         case 0x1d: /* SCVTF */
9259         case 0x5d: /* UCVTF */
9260         {
9261             bool is_signed = (opcode == 0x1d);
9262             if (!fp_access_check(s)) {
9263                 return;
9264             }
9265             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9266             return;
9267         }
9268         case 0x3d: /* FRECPE */
9269         case 0x3f: /* FRECPX */
9270         case 0x7d: /* FRSQRTE */
9271             if (!fp_access_check(s)) {
9272                 return;
9273             }
9274             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9275             return;
9276         case 0x1a: /* FCVTNS */
9277         case 0x1b: /* FCVTMS */
9278         case 0x3a: /* FCVTPS */
9279         case 0x3b: /* FCVTZS */
9280         case 0x5a: /* FCVTNU */
9281         case 0x5b: /* FCVTMU */
9282         case 0x7a: /* FCVTPU */
9283         case 0x7b: /* FCVTZU */
9284             is_fcvt = true;
9285             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9286             break;
9287         case 0x1c: /* FCVTAS */
9288         case 0x5c: /* FCVTAU */
9289             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9290             is_fcvt = true;
9291             rmode = FPROUNDING_TIEAWAY;
9292             break;
9293         case 0x56: /* FCVTXN, FCVTXN2 */
9294             if (size == 2) {
9295                 unallocated_encoding(s);
9296                 return;
9297             }
9298             if (!fp_access_check(s)) {
9299                 return;
9300             }
9301             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9302             return;
9303         default:
9304             unallocated_encoding(s);
9305             return;
9306         }
9307         break;
9308     default:
9309         unallocated_encoding(s);
9310         return;
9311     }
9312
9313     if (!fp_access_check(s)) {
9314         return;
9315     }
9316
9317     if (is_fcvt) {
9318         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
9319         tcg_fpstatus = get_fpstatus_ptr(false);
9320         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9321     } else {
9322         tcg_rmode = NULL;
9323         tcg_fpstatus = NULL;
9324     }
9325
9326     if (size == 3) {
9327         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9328         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9329
9330         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9331         write_fp_dreg(s, rd, tcg_rd);
9332         tcg_temp_free_i64(tcg_rd);
9333         tcg_temp_free_i64(tcg_rn);
9334     } else {
9335         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9336         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9337
9338         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9339
9340         switch (opcode) {
9341         case 0x7: /* SQABS, SQNEG */
9342         {
9343             NeonGenOneOpEnvFn *genfn;
9344             static NeonGenOneOpEnvFn * const fns[3][2] = {
9345                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9346                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9347                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9348             };
9349             genfn = fns[size][u];
9350             genfn(tcg_rd, cpu_env, tcg_rn);
9351             break;
9352         }
9353         case 0x1a: /* FCVTNS */
9354         case 0x1b: /* FCVTMS */
9355         case 0x1c: /* FCVTAS */
9356         case 0x3a: /* FCVTPS */
9357         case 0x3b: /* FCVTZS */
9358         {
9359             TCGv_i32 tcg_shift = tcg_const_i32(0);
9360             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9361             tcg_temp_free_i32(tcg_shift);
9362             break;
9363         }
9364         case 0x5a: /* FCVTNU */
9365         case 0x5b: /* FCVTMU */
9366         case 0x5c: /* FCVTAU */
9367         case 0x7a: /* FCVTPU */
9368         case 0x7b: /* FCVTZU */
9369         {
9370             TCGv_i32 tcg_shift = tcg_const_i32(0);
9371             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9372             tcg_temp_free_i32(tcg_shift);
9373             break;
9374         }
9375         default:
9376             g_assert_not_reached();
9377         }
9378
9379         write_fp_sreg(s, rd, tcg_rd);
9380         tcg_temp_free_i32(tcg_rd);
9381         tcg_temp_free_i32(tcg_rn);
9382     }
9383
9384     if (is_fcvt) {
9385         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9386         tcg_temp_free_i32(tcg_rmode);
9387         tcg_temp_free_ptr(tcg_fpstatus);
9388     }
9389 }
9390
9391 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9392 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
9393                                  int immh, int immb, int opcode, int rn, int rd)
9394 {
9395     int size = 32 - clz32(immh) - 1;
9396     int immhb = immh << 3 | immb;
9397     int shift = 2 * (8 << size) - immhb;
9398     bool accumulate = false;
9399     int dsize = is_q ? 128 : 64;
9400     int esize = 8 << size;
9401     int elements = dsize/esize;
9402     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
9403     TCGv_i64 tcg_rn = new_tmp_a64(s);
9404     TCGv_i64 tcg_rd = new_tmp_a64(s);
9405     TCGv_i64 tcg_round;
9406     uint64_t round_const;
9407     int i;
9408
9409     if (extract32(immh, 3, 1) && !is_q) {
9410         unallocated_encoding(s);
9411         return;
9412     }
9413     tcg_debug_assert(size <= 3);
9414
9415     if (!fp_access_check(s)) {
9416         return;
9417     }
9418
9419     switch (opcode) {
9420     case 0x02: /* SSRA / USRA (accumulate) */
9421         if (is_u) {
9422             /* Shift count same as element size produces zero to add.  */
9423             if (shift == 8 << size) {
9424                 goto done;
9425             }
9426             gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
9427         } else {
9428             /* Shift count same as element size produces all sign to add.  */
9429             if (shift == 8 << size) {
9430                 shift -= 1;
9431             }
9432             gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
9433         }
9434         return;
9435     case 0x08: /* SRI */
9436         /* Shift count same as element size is valid but does nothing.  */
9437         if (shift == 8 << size) {
9438             goto done;
9439         }
9440         gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
9441         return;
9442
9443     case 0x00: /* SSHR / USHR */
9444         if (is_u) {
9445             if (shift == 8 << size) {
9446                 /* Shift count the same size as element size produces zero.  */
9447                 tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
9448                                    is_q ? 16 : 8, vec_full_reg_size(s), 0);
9449             } else {
9450                 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
9451             }
9452         } else {
9453             /* Shift count the same size as element size produces all sign.  */
9454             if (shift == 8 << size) {
9455                 shift -= 1;
9456             }
9457             gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
9458         }
9459         return;
9460
9461     case 0x04: /* SRSHR / URSHR (rounding) */
9462         break;
9463     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9464         accumulate = true;
9465         break;
9466     default:
9467         g_assert_not_reached();
9468     }
9469
9470     round_const = 1ULL << (shift - 1);
9471     tcg_round = tcg_const_i64(round_const);
9472
9473     for (i = 0; i < elements; i++) {
9474         read_vec_element(s, tcg_rn, rn, i, memop);
9475         if (accumulate) {
9476             read_vec_element(s, tcg_rd, rd, i, memop);
9477         }
9478
9479         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9480                                 accumulate, is_u, size, shift);
9481
9482         write_vec_element(s, tcg_rd, rd, i, size);
9483     }
9484     tcg_temp_free_i64(tcg_round);
9485
9486  done:
9487     clear_vec_high(s, is_q, rd);
9488 }
9489
9490 /* SHL/SLI - Vector shift left */
9491 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
9492                                  int immh, int immb, int opcode, int rn, int rd)
9493 {
9494     int size = 32 - clz32(immh) - 1;
9495     int immhb = immh << 3 | immb;
9496     int shift = immhb - (8 << size);
9497
9498     /* Range of size is limited by decode: immh is a non-zero 4 bit field */
9499     assert(size >= 0 && size <= 3);
9500
9501     if (extract32(immh, 3, 1) && !is_q) {
9502         unallocated_encoding(s);
9503         return;
9504     }
9505
9506     if (!fp_access_check(s)) {
9507         return;
9508     }
9509
9510     if (insert) {
9511         gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
9512     } else {
9513         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
9514     }
9515 }
9516
9517 /* USHLL/SHLL - Vector shift left with widening */
9518 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
9519                                  int immh, int immb, int opcode, int rn, int rd)
9520 {
9521     int size = 32 - clz32(immh) - 1;
9522     int immhb = immh << 3 | immb;
9523     int shift = immhb - (8 << size);
9524     int dsize = 64;
9525     int esize = 8 << size;
9526     int elements = dsize/esize;
9527     TCGv_i64 tcg_rn = new_tmp_a64(s);
9528     TCGv_i64 tcg_rd = new_tmp_a64(s);
9529     int i;
9530
9531     if (size >= 3) {
9532         unallocated_encoding(s);
9533         return;
9534     }
9535
9536     if (!fp_access_check(s)) {
9537         return;
9538     }
9539
9540     /* For the LL variants the store is larger than the load,
9541      * so if rd == rn we would overwrite parts of our input.
9542      * So load everything right now and use shifts in the main loop.
9543      */
9544     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
9545
9546     for (i = 0; i < elements; i++) {
9547         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
9548         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
9549         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
9550         write_vec_element(s, tcg_rd, rd, i, size + 1);
9551     }
9552 }
9553
9554 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
9555 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
9556                                  int immh, int immb, int opcode, int rn, int rd)
9557 {
9558     int immhb = immh << 3 | immb;
9559     int size = 32 - clz32(immh) - 1;
9560     int dsize = 64;
9561     int esize = 8 << size;
9562     int elements = dsize/esize;
9563     int shift = (2 * esize) - immhb;
9564     bool round = extract32(opcode, 0, 1);
9565     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
9566     TCGv_i64 tcg_round;
9567     int i;
9568
9569     if (extract32(immh, 3, 1)) {
9570         unallocated_encoding(s);
9571         return;
9572     }
9573
9574     if (!fp_access_check(s)) {
9575         return;
9576     }
9577
9578     tcg_rn = tcg_temp_new_i64();
9579     tcg_rd = tcg_temp_new_i64();
9580     tcg_final = tcg_temp_new_i64();
9581     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
9582
9583     if (round) {
9584         uint64_t round_const = 1ULL << (shift - 1);
9585         tcg_round = tcg_const_i64(round_const);
9586     } else {
9587         tcg_round = NULL;
9588     }
9589
9590     for (i = 0; i < elements; i++) {
9591         read_vec_element(s, tcg_rn, rn, i, size+1);
9592         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9593                                 false, true, size+1, shift);
9594
9595         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
9596     }
9597
9598     if (!is_q) {
9599         write_vec_element(s, tcg_final, rd, 0, MO_64);
9600     } else {
9601         write_vec_element(s, tcg_final, rd, 1, MO_64);
9602     }
9603     if (round) {
9604         tcg_temp_free_i64(tcg_round);
9605     }
9606     tcg_temp_free_i64(tcg_rn);
9607     tcg_temp_free_i64(tcg_rd);
9608     tcg_temp_free_i64(tcg_final);
9609
9610     clear_vec_high(s, is_q, rd);
9611 }
9612
9613
9614 /* AdvSIMD shift by immediate
9615  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
9616  * +---+---+---+-------------+------+------+--------+---+------+------+
9617  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9618  * +---+---+---+-------------+------+------+--------+---+------+------+
9619  */
9620 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
9621 {
9622     int rd = extract32(insn, 0, 5);
9623     int rn = extract32(insn, 5, 5);
9624     int opcode = extract32(insn, 11, 5);
9625     int immb = extract32(insn, 16, 3);
9626     int immh = extract32(insn, 19, 4);
9627     bool is_u = extract32(insn, 29, 1);
9628     bool is_q = extract32(insn, 30, 1);
9629
9630     switch (opcode) {
9631     case 0x08: /* SRI */
9632         if (!is_u) {
9633             unallocated_encoding(s);
9634             return;
9635         }
9636         /* fall through */
9637     case 0x00: /* SSHR / USHR */
9638     case 0x02: /* SSRA / USRA (accumulate) */
9639     case 0x04: /* SRSHR / URSHR (rounding) */
9640     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9641         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
9642         break;
9643     case 0x0a: /* SHL / SLI */
9644         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9645         break;
9646     case 0x10: /* SHRN */
9647     case 0x11: /* RSHRN / SQRSHRUN */
9648         if (is_u) {
9649             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
9650                                    opcode, rn, rd);
9651         } else {
9652             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
9653         }
9654         break;
9655     case 0x12: /* SQSHRN / UQSHRN */
9656     case 0x13: /* SQRSHRN / UQRSHRN */
9657         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
9658                                opcode, rn, rd);
9659         break;
9660     case 0x14: /* SSHLL / USHLL */
9661         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9662         break;
9663     case 0x1c: /* SCVTF / UCVTF */
9664         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
9665                                      opcode, rn, rd);
9666         break;
9667     case 0xc: /* SQSHLU */
9668         if (!is_u) {
9669             unallocated_encoding(s);
9670             return;
9671         }
9672         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
9673         break;
9674     case 0xe: /* SQSHL, UQSHL */
9675         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
9676         break;
9677     case 0x1f: /* FCVTZS/ FCVTZU */
9678         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
9679         return;
9680     default:
9681         unallocated_encoding(s);
9682         return;
9683     }
9684 }
9685
9686 /* Generate code to do a "long" addition or subtraction, ie one done in
9687  * TCGv_i64 on vector lanes twice the width specified by size.
9688  */
9689 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
9690                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
9691 {
9692     static NeonGenTwo64OpFn * const fns[3][2] = {
9693         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
9694         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
9695         { tcg_gen_add_i64, tcg_gen_sub_i64 },
9696     };
9697     NeonGenTwo64OpFn *genfn;
9698     assert(size < 3);
9699
9700     genfn = fns[size][is_sub];
9701     genfn(tcg_res, tcg_op1, tcg_op2);
9702 }
9703
9704 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
9705                                 int opcode, int rd, int rn, int rm)
9706 {
9707     /* 3-reg-different widening insns: 64 x 64 -> 128 */
9708     TCGv_i64 tcg_res[2];
9709     int pass, accop;
9710
9711     tcg_res[0] = tcg_temp_new_i64();
9712     tcg_res[1] = tcg_temp_new_i64();
9713
9714     /* Does this op do an adding accumulate, a subtracting accumulate,
9715      * or no accumulate at all?
9716      */
9717     switch (opcode) {
9718     case 5:
9719     case 8:
9720     case 9:
9721         accop = 1;
9722         break;
9723     case 10:
9724     case 11:
9725         accop = -1;
9726         break;
9727     default:
9728         accop = 0;
9729         break;
9730     }
9731
9732     if (accop != 0) {
9733         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
9734         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
9735     }
9736
9737     /* size == 2 means two 32x32->64 operations; this is worth special
9738      * casing because we can generally handle it inline.
9739      */
9740     if (size == 2) {
9741         for (pass = 0; pass < 2; pass++) {
9742             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9743             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9744             TCGv_i64 tcg_passres;
9745             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
9746
9747             int elt = pass + is_q * 2;
9748
9749             read_vec_element(s, tcg_op1, rn, elt, memop);
9750             read_vec_element(s, tcg_op2, rm, elt, memop);
9751
9752             if (accop == 0) {
9753                 tcg_passres = tcg_res[pass];
9754             } else {
9755                 tcg_passres = tcg_temp_new_i64();
9756             }
9757
9758             switch (opcode) {
9759             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9760                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
9761                 break;
9762             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9763                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
9764                 break;
9765             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9766             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9767             {
9768                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
9769                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
9770
9771                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
9772                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
9773                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
9774                                     tcg_passres,
9775                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
9776                 tcg_temp_free_i64(tcg_tmp1);
9777                 tcg_temp_free_i64(tcg_tmp2);
9778                 break;
9779             }
9780             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9781             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9782             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
9783                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9784                 break;
9785             case 9: /* SQDMLAL, SQDMLAL2 */
9786             case 11: /* SQDMLSL, SQDMLSL2 */
9787             case 13: /* SQDMULL, SQDMULL2 */
9788                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9789                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
9790                                                   tcg_passres, tcg_passres);
9791                 break;
9792             default:
9793                 g_assert_not_reached();
9794             }
9795
9796             if (opcode == 9 || opcode == 11) {
9797                 /* saturating accumulate ops */
9798                 if (accop < 0) {
9799                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
9800                 }
9801                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
9802                                                   tcg_res[pass], tcg_passres);
9803             } else if (accop > 0) {
9804                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
9805             } else if (accop < 0) {
9806                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
9807             }
9808
9809             if (accop != 0) {
9810                 tcg_temp_free_i64(tcg_passres);
9811             }
9812
9813             tcg_temp_free_i64(tcg_op1);
9814             tcg_temp_free_i64(tcg_op2);
9815         }
9816     } else {
9817         /* size 0 or 1, generally helper functions */
9818         for (pass = 0; pass < 2; pass++) {
9819             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9820             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9821             TCGv_i64 tcg_passres;
9822             int elt = pass + is_q * 2;
9823
9824             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
9825             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
9826
9827             if (accop == 0) {
9828                 tcg_passres = tcg_res[pass];
9829             } else {
9830                 tcg_passres = tcg_temp_new_i64();
9831             }
9832
9833             switch (opcode) {
9834             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9835             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9836             {
9837                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
9838                 static NeonGenWidenFn * const widenfns[2][2] = {
9839                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
9840                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
9841                 };
9842                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
9843
9844                 widenfn(tcg_op2_64, tcg_op2);
9845                 widenfn(tcg_passres, tcg_op1);
9846                 gen_neon_addl(size, (opcode == 2), tcg_passres,
9847                               tcg_passres, tcg_op2_64);
9848                 tcg_temp_free_i64(tcg_op2_64);
9849                 break;
9850             }
9851             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9852             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9853                 if (size == 0) {
9854                     if (is_u) {
9855                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
9856                     } else {
9857                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
9858                     }
9859                 } else {
9860                     if (is_u) {
9861                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
9862                     } else {
9863                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
9864                     }
9865                 }
9866                 break;
9867             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9868             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9869             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
9870                 if (size == 0) {
9871                     if (is_u) {
9872                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
9873                     } else {
9874                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
9875                     }
9876                 } else {
9877                     if (is_u) {
9878                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
9879                     } else {
9880                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
9881                     }
9882                 }
9883                 break;
9884             case 9: /* SQDMLAL, SQDMLAL2 */
9885             case 11: /* SQDMLSL, SQDMLSL2 */
9886             case 13: /* SQDMULL, SQDMULL2 */
9887                 assert(size == 1);
9888                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
9889                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
9890                                                   tcg_passres, tcg_passres);
9891                 break;
9892             case 14: /* PMULL */
9893                 assert(size == 0);
9894                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
9895                 break;
9896             default:
9897                 g_assert_not_reached();
9898             }
9899             tcg_temp_free_i32(tcg_op1);
9900             tcg_temp_free_i32(tcg_op2);
9901
9902             if (accop != 0) {
9903                 if (opcode == 9 || opcode == 11) {
9904                     /* saturating accumulate ops */
9905                     if (accop < 0) {
9906                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
9907                     }
9908                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
9909                                                       tcg_res[pass],
9910                                                       tcg_passres);
9911                 } else {
9912                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
9913                                   tcg_res[pass], tcg_passres);
9914                 }
9915                 tcg_temp_free_i64(tcg_passres);
9916             }
9917         }
9918     }
9919
9920     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9921     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9922     tcg_temp_free_i64(tcg_res[0]);
9923     tcg_temp_free_i64(tcg_res[1]);
9924 }
9925
9926 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
9927                             int opcode, int rd, int rn, int rm)
9928 {
9929     TCGv_i64 tcg_res[2];
9930     int part = is_q ? 2 : 0;
9931     int pass;
9932
9933     for (pass = 0; pass < 2; pass++) {
9934         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9935         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9936         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
9937         static NeonGenWidenFn * const widenfns[3][2] = {
9938             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
9939             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
9940             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
9941         };
9942         NeonGenWidenFn *widenfn = widenfns[size][is_u];
9943
9944         read_vec_element(s, tcg_op1, rn, pass, MO_64);
9945         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
9946         widenfn(tcg_op2_wide, tcg_op2);
9947         tcg_temp_free_i32(tcg_op2);
9948         tcg_res[pass] = tcg_temp_new_i64();
9949         gen_neon_addl(size, (opcode == 3),
9950                       tcg_res[pass], tcg_op1, tcg_op2_wide);
9951         tcg_temp_free_i64(tcg_op1);
9952         tcg_temp_free_i64(tcg_op2_wide);
9953     }
9954
9955     for (pass = 0; pass < 2; pass++) {
9956         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9957         tcg_temp_free_i64(tcg_res[pass]);
9958     }
9959 }
9960
9961 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
9962 {
9963     tcg_gen_addi_i64(in, in, 1U << 31);
9964     tcg_gen_extrh_i64_i32(res, in);
9965 }
9966
9967 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
9968                                  int opcode, int rd, int rn, int rm)
9969 {
9970     TCGv_i32 tcg_res[2];
9971     int part = is_q ? 2 : 0;
9972     int pass;
9973
9974     for (pass = 0; pass < 2; pass++) {
9975         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9976         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9977         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
9978         static NeonGenNarrowFn * const narrowfns[3][2] = {
9979             { gen_helper_neon_narrow_high_u8,
9980               gen_helper_neon_narrow_round_high_u8 },
9981             { gen_helper_neon_narrow_high_u16,
9982               gen_helper_neon_narrow_round_high_u16 },
9983             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
9984         };
9985         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
9986
9987         read_vec_element(s, tcg_op1, rn, pass, MO_64);
9988         read_vec_element(s, tcg_op2, rm, pass, MO_64);
9989
9990         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
9991
9992         tcg_temp_free_i64(tcg_op1);
9993         tcg_temp_free_i64(tcg_op2);
9994
9995         tcg_res[pass] = tcg_temp_new_i32();
9996         gennarrow(tcg_res[pass], tcg_wideres);
9997         tcg_temp_free_i64(tcg_wideres);
9998     }
9999
10000     for (pass = 0; pass < 2; pass++) {
10001         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10002         tcg_temp_free_i32(tcg_res[pass]);
10003     }
10004     clear_vec_high(s, is_q, rd);
10005 }
10006
10007 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
10008 {
10009     /* PMULL of 64 x 64 -> 128 is an odd special case because it
10010      * is the only three-reg-diff instruction which produces a
10011      * 128-bit wide result from a single operation. However since
10012      * it's possible to calculate the two halves more or less
10013      * separately we just use two helper calls.
10014      */
10015     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10016     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10017     TCGv_i64 tcg_res = tcg_temp_new_i64();
10018
10019     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
10020     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
10021     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
10022     write_vec_element(s, tcg_res, rd, 0, MO_64);
10023     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
10024     write_vec_element(s, tcg_res, rd, 1, MO_64);
10025
10026     tcg_temp_free_i64(tcg_op1);
10027     tcg_temp_free_i64(tcg_op2);
10028     tcg_temp_free_i64(tcg_res);
10029 }
10030
10031 /* AdvSIMD three different
10032  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10033  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10034  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10035  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10036  */
10037 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10038 {
10039     /* Instructions in this group fall into three basic classes
10040      * (in each case with the operation working on each element in
10041      * the input vectors):
10042      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10043      *     128 bit input)
10044      * (2) wide 64 x 128 -> 128
10045      * (3) narrowing 128 x 128 -> 64
10046      * Here we do initial decode, catch unallocated cases and
10047      * dispatch to separate functions for each class.
10048      */
10049     int is_q = extract32(insn, 30, 1);
10050     int is_u = extract32(insn, 29, 1);
10051     int size = extract32(insn, 22, 2);
10052     int opcode = extract32(insn, 12, 4);
10053     int rm = extract32(insn, 16, 5);
10054     int rn = extract32(insn, 5, 5);
10055     int rd = extract32(insn, 0, 5);
10056
10057     switch (opcode) {
10058     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10059     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10060         /* 64 x 128 -> 128 */
10061         if (size == 3) {
10062             unallocated_encoding(s);
10063             return;
10064         }
10065         if (!fp_access_check(s)) {
10066             return;
10067         }
10068         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10069         break;
10070     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10071     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10072         /* 128 x 128 -> 64 */
10073         if (size == 3) {
10074             unallocated_encoding(s);
10075             return;
10076         }
10077         if (!fp_access_check(s)) {
10078             return;
10079         }
10080         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10081         break;
10082     case 14: /* PMULL, PMULL2 */
10083         if (is_u || size == 1 || size == 2) {
10084             unallocated_encoding(s);
10085             return;
10086         }
10087         if (size == 3) {
10088             if (!dc_isar_feature(aa64_pmull, s)) {
10089                 unallocated_encoding(s);
10090                 return;
10091             }
10092             if (!fp_access_check(s)) {
10093                 return;
10094             }
10095             handle_pmull_64(s, is_q, rd, rn, rm);
10096             return;
10097         }
10098         goto is_widening;
10099     case 9: /* SQDMLAL, SQDMLAL2 */
10100     case 11: /* SQDMLSL, SQDMLSL2 */
10101     case 13: /* SQDMULL, SQDMULL2 */
10102         if (is_u || size == 0) {
10103             unallocated_encoding(s);
10104             return;
10105         }
10106         /* fall through */
10107     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10108     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10109     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10110     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10111     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10112     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10113     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10114         /* 64 x 64 -> 128 */
10115         if (size == 3) {
10116             unallocated_encoding(s);
10117             return;
10118         }
10119     is_widening:
10120         if (!fp_access_check(s)) {
10121             return;
10122         }
10123
10124         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10125         break;
10126     default:
10127         /* opcode 15 not allocated */
10128         unallocated_encoding(s);
10129         break;
10130     }
10131 }
10132
10133 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10134 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10135 {
10136     int rd = extract32(insn, 0, 5);
10137     int rn = extract32(insn, 5, 5);
10138     int rm = extract32(insn, 16, 5);
10139     int size = extract32(insn, 22, 2);
10140     bool is_u = extract32(insn, 29, 1);
10141     bool is_q = extract32(insn, 30, 1);
10142
10143     if (!fp_access_check(s)) {
10144         return;
10145     }
10146
10147     switch (size + 4 * is_u) {
10148     case 0: /* AND */
10149         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10150         return;
10151     case 1: /* BIC */
10152         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10153         return;
10154     case 2: /* ORR */
10155         if (rn == rm) { /* MOV */
10156             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
10157         } else {
10158             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10159         }
10160         return;
10161     case 3: /* ORN */
10162         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10163         return;
10164     case 4: /* EOR */
10165         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10166         return;
10167
10168     case 5: /* BSL bitwise select */
10169         gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
10170         return;
10171     case 6: /* BIT, bitwise insert if true */
10172         gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
10173         return;
10174     case 7: /* BIF, bitwise insert if false */
10175         gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
10176         return;
10177
10178     default:
10179         g_assert_not_reached();
10180     }
10181 }
10182
10183 /* Pairwise op subgroup of C3.6.16.
10184  *
10185  * This is called directly or via the handle_3same_float for float pairwise
10186  * operations where the opcode and size are calculated differently.
10187  */
10188 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10189                                    int size, int rn, int rm, int rd)
10190 {
10191     TCGv_ptr fpst;
10192     int pass;
10193
10194     /* Floating point operations need fpst */
10195     if (opcode >= 0x58) {
10196         fpst = get_fpstatus_ptr(false);
10197     } else {
10198         fpst = NULL;
10199     }
10200
10201     if (!fp_access_check(s)) {
10202         return;
10203     }
10204
10205     /* These operations work on the concatenated rm:rn, with each pair of
10206      * adjacent elements being operated on to produce an element in the result.
10207      */
10208     if (size == 3) {
10209         TCGv_i64 tcg_res[2];
10210
10211         for (pass = 0; pass < 2; pass++) {
10212             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10213             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10214             int passreg = (pass == 0) ? rn : rm;
10215
10216             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10217             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10218             tcg_res[pass] = tcg_temp_new_i64();
10219
10220             switch (opcode) {
10221             case 0x17: /* ADDP */
10222                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10223                 break;
10224             case 0x58: /* FMAXNMP */
10225                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10226                 break;
10227             case 0x5a: /* FADDP */
10228                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10229                 break;
10230             case 0x5e: /* FMAXP */
10231                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10232                 break;
10233             case 0x78: /* FMINNMP */
10234                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10235                 break;
10236             case 0x7e: /* FMINP */
10237                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10238                 break;
10239             default:
10240                 g_assert_not_reached();
10241             }
10242
10243             tcg_temp_free_i64(tcg_op1);
10244             tcg_temp_free_i64(tcg_op2);
10245         }
10246
10247         for (pass = 0; pass < 2; pass++) {
10248             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10249             tcg_temp_free_i64(tcg_res[pass]);
10250         }
10251     } else {
10252         int maxpass = is_q ? 4 : 2;
10253         TCGv_i32 tcg_res[4];
10254
10255         for (pass = 0; pass < maxpass; pass++) {
10256             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10257             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10258             NeonGenTwoOpFn *genfn = NULL;
10259             int passreg = pass < (maxpass / 2) ? rn : rm;
10260             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10261
10262             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10263             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10264             tcg_res[pass] = tcg_temp_new_i32();
10265
10266             switch (opcode) {
10267             case 0x17: /* ADDP */
10268             {
10269                 static NeonGenTwoOpFn * const fns[3] = {
10270                     gen_helper_neon_padd_u8,
10271                     gen_helper_neon_padd_u16,
10272                     tcg_gen_add_i32,
10273                 };
10274                 genfn = fns[size];
10275                 break;
10276             }
10277             case 0x14: /* SMAXP, UMAXP */
10278             {
10279                 static NeonGenTwoOpFn * const fns[3][2] = {
10280                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10281                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10282                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10283                 };
10284                 genfn = fns[size][u];
10285                 break;
10286             }
10287             case 0x15: /* SMINP, UMINP */
10288             {
10289                 static NeonGenTwoOpFn * const fns[3][2] = {
10290                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10291                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10292                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10293                 };
10294                 genfn = fns[size][u];
10295                 break;
10296             }
10297             /* The FP operations are all on single floats (32 bit) */
10298             case 0x58: /* FMAXNMP */
10299                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10300                 break;
10301             case 0x5a: /* FADDP */
10302                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10303                 break;
10304             case 0x5e: /* FMAXP */
10305                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10306                 break;
10307             case 0x78: /* FMINNMP */
10308                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10309                 break;
10310             case 0x7e: /* FMINP */
10311                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10312                 break;
10313             default:
10314                 g_assert_not_reached();
10315             }
10316
10317             /* FP ops called directly, otherwise call now */
10318             if (genfn) {
10319                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10320             }
10321
10322             tcg_temp_free_i32(tcg_op1);
10323             tcg_temp_free_i32(tcg_op2);
10324         }
10325
10326         for (pass = 0; pass < maxpass; pass++) {
10327             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10328             tcg_temp_free_i32(tcg_res[pass]);
10329         }
10330         clear_vec_high(s, is_q, rd);
10331     }
10332
10333     if (fpst) {
10334         tcg_temp_free_ptr(fpst);
10335     }
10336 }
10337
10338 /* Floating point op subgroup of C3.6.16. */
10339 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10340 {
10341     /* For floating point ops, the U, size[1] and opcode bits
10342      * together indicate the operation. size[0] indicates single
10343      * or double.
10344      */
10345     int fpopcode = extract32(insn, 11, 5)
10346         | (extract32(insn, 23, 1) << 5)
10347         | (extract32(insn, 29, 1) << 6);
10348     int is_q = extract32(insn, 30, 1);
10349     int size = extract32(insn, 22, 1);
10350     int rm = extract32(insn, 16, 5);
10351     int rn = extract32(insn, 5, 5);
10352     int rd = extract32(insn, 0, 5);
10353
10354     int datasize = is_q ? 128 : 64;
10355     int esize = 32 << size;
10356     int elements = datasize / esize;
10357
10358     if (size == 1 && !is_q) {
10359         unallocated_encoding(s);
10360         return;
10361     }
10362
10363     switch (fpopcode) {
10364     case 0x58: /* FMAXNMP */
10365     case 0x5a: /* FADDP */
10366     case 0x5e: /* FMAXP */
10367     case 0x78: /* FMINNMP */
10368     case 0x7e: /* FMINP */
10369         if (size && !is_q) {
10370             unallocated_encoding(s);
10371             return;
10372         }
10373         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10374                                rn, rm, rd);
10375         return;
10376     case 0x1b: /* FMULX */
10377     case 0x1f: /* FRECPS */
10378     case 0x3f: /* FRSQRTS */
10379     case 0x5d: /* FACGE */
10380     case 0x7d: /* FACGT */
10381     case 0x19: /* FMLA */
10382     case 0x39: /* FMLS */
10383     case 0x18: /* FMAXNM */
10384     case 0x1a: /* FADD */
10385     case 0x1c: /* FCMEQ */
10386     case 0x1e: /* FMAX */
10387     case 0x38: /* FMINNM */
10388     case 0x3a: /* FSUB */
10389     case 0x3e: /* FMIN */
10390     case 0x5b: /* FMUL */
10391     case 0x5c: /* FCMGE */
10392     case 0x5f: /* FDIV */
10393     case 0x7a: /* FABD */
10394     case 0x7c: /* FCMGT */
10395         if (!fp_access_check(s)) {
10396             return;
10397         }
10398
10399         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10400         return;
10401     default:
10402         unallocated_encoding(s);
10403         return;
10404     }
10405 }
10406
10407 /* Integer op subgroup of C3.6.16. */
10408 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
10409 {
10410     int is_q = extract32(insn, 30, 1);
10411     int u = extract32(insn, 29, 1);
10412     int size = extract32(insn, 22, 2);
10413     int opcode = extract32(insn, 11, 5);
10414     int rm = extract32(insn, 16, 5);
10415     int rn = extract32(insn, 5, 5);
10416     int rd = extract32(insn, 0, 5);
10417     int pass;
10418     TCGCond cond;
10419
10420     switch (opcode) {
10421     case 0x13: /* MUL, PMUL */
10422         if (u && size != 0) {
10423             unallocated_encoding(s);
10424             return;
10425         }
10426         /* fall through */
10427     case 0x0: /* SHADD, UHADD */
10428     case 0x2: /* SRHADD, URHADD */
10429     case 0x4: /* SHSUB, UHSUB */
10430     case 0xc: /* SMAX, UMAX */
10431     case 0xd: /* SMIN, UMIN */
10432     case 0xe: /* SABD, UABD */
10433     case 0xf: /* SABA, UABA */
10434     case 0x12: /* MLA, MLS */
10435         if (size == 3) {
10436             unallocated_encoding(s);
10437             return;
10438         }
10439         break;
10440     case 0x16: /* SQDMULH, SQRDMULH */
10441         if (size == 0 || size == 3) {
10442             unallocated_encoding(s);
10443             return;
10444         }
10445         break;
10446     default:
10447         if (size == 3 && !is_q) {
10448             unallocated_encoding(s);
10449             return;
10450         }
10451         break;
10452     }
10453
10454     if (!fp_access_check(s)) {
10455         return;
10456     }
10457
10458     switch (opcode) {
10459     case 0x10: /* ADD, SUB */
10460         if (u) {
10461             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
10462         } else {
10463             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
10464         }
10465         return;
10466     case 0x13: /* MUL, PMUL */
10467         if (!u) { /* MUL */
10468             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
10469             return;
10470         }
10471         break;
10472     case 0x12: /* MLA, MLS */
10473         if (u) {
10474             gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
10475         } else {
10476             gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
10477         }
10478         return;
10479     case 0x11:
10480         if (!u) { /* CMTST */
10481             gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
10482             return;
10483         }
10484         /* else CMEQ */
10485         cond = TCG_COND_EQ;
10486         goto do_gvec_cmp;
10487     case 0x06: /* CMGT, CMHI */
10488         cond = u ? TCG_COND_GTU : TCG_COND_GT;
10489         goto do_gvec_cmp;
10490     case 0x07: /* CMGE, CMHS */
10491         cond = u ? TCG_COND_GEU : TCG_COND_GE;
10492     do_gvec_cmp:
10493         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
10494                          vec_full_reg_offset(s, rn),
10495                          vec_full_reg_offset(s, rm),
10496                          is_q ? 16 : 8, vec_full_reg_size(s));
10497         return;
10498     }
10499
10500     if (size == 3) {
10501         assert(is_q);
10502         for (pass = 0; pass < 2; pass++) {
10503             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10504             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10505             TCGv_i64 tcg_res = tcg_temp_new_i64();
10506
10507             read_vec_element(s, tcg_op1, rn, pass, MO_64);
10508             read_vec_element(s, tcg_op2, rm, pass, MO_64);
10509
10510             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
10511
10512             write_vec_element(s, tcg_res, rd, pass, MO_64);
10513
10514             tcg_temp_free_i64(tcg_res);
10515             tcg_temp_free_i64(tcg_op1);
10516             tcg_temp_free_i64(tcg_op2);
10517         }
10518     } else {
10519         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10520             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10521             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10522             TCGv_i32 tcg_res = tcg_temp_new_i32();
10523             NeonGenTwoOpFn *genfn = NULL;
10524             NeonGenTwoOpEnvFn *genenvfn = NULL;
10525
10526             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
10527             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
10528
10529             switch (opcode) {
10530             case 0x0: /* SHADD, UHADD */
10531             {
10532                 static NeonGenTwoOpFn * const fns[3][2] = {
10533                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
10534                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
10535                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
10536                 };
10537                 genfn = fns[size][u];
10538                 break;
10539             }
10540             case 0x1: /* SQADD, UQADD */
10541             {
10542                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10543                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
10544                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
10545                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
10546                 };
10547                 genenvfn = fns[size][u];
10548                 break;
10549             }
10550             case 0x2: /* SRHADD, URHADD */
10551             {
10552                 static NeonGenTwoOpFn * const fns[3][2] = {
10553                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
10554                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
10555                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
10556                 };
10557                 genfn = fns[size][u];
10558                 break;
10559             }
10560             case 0x4: /* SHSUB, UHSUB */
10561             {
10562                 static NeonGenTwoOpFn * const fns[3][2] = {
10563                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
10564                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
10565                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
10566                 };
10567                 genfn = fns[size][u];
10568                 break;
10569             }
10570             case 0x5: /* SQSUB, UQSUB */
10571             {
10572                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10573                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
10574                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
10575                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
10576                 };
10577                 genenvfn = fns[size][u];
10578                 break;
10579             }
10580             case 0x8: /* SSHL, USHL */
10581             {
10582                 static NeonGenTwoOpFn * const fns[3][2] = {
10583                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
10584                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
10585                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
10586                 };
10587                 genfn = fns[size][u];
10588                 break;
10589             }
10590             case 0x9: /* SQSHL, UQSHL */
10591             {
10592                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10593                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
10594                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
10595                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
10596                 };
10597                 genenvfn = fns[size][u];
10598                 break;
10599             }
10600             case 0xa: /* SRSHL, URSHL */
10601             {
10602                 static NeonGenTwoOpFn * const fns[3][2] = {
10603                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
10604                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
10605                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
10606                 };
10607                 genfn = fns[size][u];
10608                 break;
10609             }
10610             case 0xb: /* SQRSHL, UQRSHL */
10611             {
10612                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10613                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
10614                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
10615                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
10616                 };
10617                 genenvfn = fns[size][u];
10618                 break;
10619             }
10620             case 0xc: /* SMAX, UMAX */
10621             {
10622                 static NeonGenTwoOpFn * const fns[3][2] = {
10623                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
10624                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
10625                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10626                 };
10627                 genfn = fns[size][u];
10628                 break;
10629             }
10630
10631             case 0xd: /* SMIN, UMIN */
10632             {
10633                 static NeonGenTwoOpFn * const fns[3][2] = {
10634                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
10635                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
10636                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10637                 };
10638                 genfn = fns[size][u];
10639                 break;
10640             }
10641             case 0xe: /* SABD, UABD */
10642             case 0xf: /* SABA, UABA */
10643             {
10644                 static NeonGenTwoOpFn * const fns[3][2] = {
10645                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
10646                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
10647                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
10648                 };
10649                 genfn = fns[size][u];
10650                 break;
10651             }
10652             case 0x13: /* MUL, PMUL */
10653                 assert(u); /* PMUL */
10654                 assert(size == 0);
10655                 genfn = gen_helper_neon_mul_p8;
10656                 break;
10657             case 0x16: /* SQDMULH, SQRDMULH */
10658             {
10659                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
10660                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
10661                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
10662                 };
10663                 assert(size == 1 || size == 2);
10664                 genenvfn = fns[size - 1][u];
10665                 break;
10666             }
10667             default:
10668                 g_assert_not_reached();
10669             }
10670
10671             if (genenvfn) {
10672                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
10673             } else {
10674                 genfn(tcg_res, tcg_op1, tcg_op2);
10675             }
10676
10677             if (opcode == 0xf) {
10678                 /* SABA, UABA: accumulating ops */
10679                 static NeonGenTwoOpFn * const fns[3] = {
10680                     gen_helper_neon_add_u8,
10681                     gen_helper_neon_add_u16,
10682                     tcg_gen_add_i32,
10683                 };
10684
10685                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
10686                 fns[size](tcg_res, tcg_op1, tcg_res);
10687             }
10688
10689             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10690
10691             tcg_temp_free_i32(tcg_res);
10692             tcg_temp_free_i32(tcg_op1);
10693             tcg_temp_free_i32(tcg_op2);
10694         }
10695     }
10696     clear_vec_high(s, is_q, rd);
10697 }
10698
10699 /* AdvSIMD three same
10700  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
10701  * +---+---+---+-----------+------+---+------+--------+---+------+------+
10702  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
10703  * +---+---+---+-----------+------+---+------+--------+---+------+------+
10704  */
10705 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
10706 {
10707     int opcode = extract32(insn, 11, 5);
10708
10709     switch (opcode) {
10710     case 0x3: /* logic ops */
10711         disas_simd_3same_logic(s, insn);
10712         break;
10713     case 0x17: /* ADDP */
10714     case 0x14: /* SMAXP, UMAXP */
10715     case 0x15: /* SMINP, UMINP */
10716     {
10717         /* Pairwise operations */
10718         int is_q = extract32(insn, 30, 1);
10719         int u = extract32(insn, 29, 1);
10720         int size = extract32(insn, 22, 2);
10721         int rm = extract32(insn, 16, 5);
10722         int rn = extract32(insn, 5, 5);
10723         int rd = extract32(insn, 0, 5);
10724         if (opcode == 0x17) {
10725             if (u || (size == 3 && !is_q)) {
10726                 unallocated_encoding(s);
10727                 return;
10728             }
10729         } else {
10730             if (size == 3) {
10731                 unallocated_encoding(s);
10732                 return;
10733             }
10734         }
10735         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
10736         break;
10737     }
10738     case 0x18 ... 0x31:
10739         /* floating point ops, sz[1] and U are part of opcode */
10740         disas_simd_3same_float(s, insn);
10741         break;
10742     default:
10743         disas_simd_3same_int(s, insn);
10744         break;
10745     }
10746 }
10747
10748 /*
10749  * Advanced SIMD three same (ARMv8.2 FP16 variants)
10750  *
10751  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
10752  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
10753  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
10754  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
10755  *
10756  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
10757  * (register), FACGE, FABD, FCMGT (register) and FACGT.
10758  *
10759  */
10760 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
10761 {
10762     int opcode, fpopcode;
10763     int is_q, u, a, rm, rn, rd;
10764     int datasize, elements;
10765     int pass;
10766     TCGv_ptr fpst;
10767     bool pairwise = false;
10768
10769     if (!dc_isar_feature(aa64_fp16, s)) {
10770         unallocated_encoding(s);
10771         return;
10772     }
10773
10774     if (!fp_access_check(s)) {
10775         return;
10776     }
10777
10778     /* For these floating point ops, the U, a and opcode bits
10779      * together indicate the operation.
10780      */
10781     opcode = extract32(insn, 11, 3);
10782     u = extract32(insn, 29, 1);
10783     a = extract32(insn, 23, 1);
10784     is_q = extract32(insn, 30, 1);
10785     rm = extract32(insn, 16, 5);
10786     rn = extract32(insn, 5, 5);
10787     rd = extract32(insn, 0, 5);
10788
10789     fpopcode = opcode | (a << 3) |  (u << 4);
10790     datasize = is_q ? 128 : 64;
10791     elements = datasize / 16;
10792
10793     switch (fpopcode) {
10794     case 0x10: /* FMAXNMP */
10795     case 0x12: /* FADDP */
10796     case 0x16: /* FMAXP */
10797     case 0x18: /* FMINNMP */
10798     case 0x1e: /* FMINP */
10799         pairwise = true;
10800         break;
10801     }
10802
10803     fpst = get_fpstatus_ptr(true);
10804
10805     if (pairwise) {
10806         int maxpass = is_q ? 8 : 4;
10807         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10808         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10809         TCGv_i32 tcg_res[8];
10810
10811         for (pass = 0; pass < maxpass; pass++) {
10812             int passreg = pass < (maxpass / 2) ? rn : rm;
10813             int passelt = (pass << 1) & (maxpass - 1);
10814
10815             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
10816             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
10817             tcg_res[pass] = tcg_temp_new_i32();
10818
10819             switch (fpopcode) {
10820             case 0x10: /* FMAXNMP */
10821                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
10822                                            fpst);
10823                 break;
10824             case 0x12: /* FADDP */
10825                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10826                 break;
10827             case 0x16: /* FMAXP */
10828                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10829                 break;
10830             case 0x18: /* FMINNMP */
10831                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
10832                                            fpst);
10833                 break;
10834             case 0x1e: /* FMINP */
10835                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10836                 break;
10837             default:
10838                 g_assert_not_reached();
10839             }
10840         }
10841
10842         for (pass = 0; pass < maxpass; pass++) {
10843             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
10844             tcg_temp_free_i32(tcg_res[pass]);
10845         }
10846
10847         tcg_temp_free_i32(tcg_op1);
10848         tcg_temp_free_i32(tcg_op2);
10849
10850     } else {
10851         for (pass = 0; pass < elements; pass++) {
10852             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10853             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10854             TCGv_i32 tcg_res = tcg_temp_new_i32();
10855
10856             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
10857             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
10858
10859             switch (fpopcode) {
10860             case 0x0: /* FMAXNM */
10861                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
10862                 break;
10863             case 0x1: /* FMLA */
10864                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10865                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
10866                                            fpst);
10867                 break;
10868             case 0x2: /* FADD */
10869                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
10870                 break;
10871             case 0x3: /* FMULX */
10872                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
10873                 break;
10874             case 0x4: /* FCMEQ */
10875                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10876                 break;
10877             case 0x6: /* FMAX */
10878                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
10879                 break;
10880             case 0x7: /* FRECPS */
10881                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10882                 break;
10883             case 0x8: /* FMINNM */
10884                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
10885                 break;
10886             case 0x9: /* FMLS */
10887                 /* As usual for ARM, separate negation for fused multiply-add */
10888                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
10889                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10890                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
10891                                            fpst);
10892                 break;
10893             case 0xa: /* FSUB */
10894                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
10895                 break;
10896             case 0xe: /* FMIN */
10897                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
10898                 break;
10899             case 0xf: /* FRSQRTS */
10900                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10901                 break;
10902             case 0x13: /* FMUL */
10903                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
10904                 break;
10905             case 0x14: /* FCMGE */
10906                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10907                 break;
10908             case 0x15: /* FACGE */
10909                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10910                 break;
10911             case 0x17: /* FDIV */
10912                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
10913                 break;
10914             case 0x1a: /* FABD */
10915                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
10916                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
10917                 break;
10918             case 0x1c: /* FCMGT */
10919                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10920                 break;
10921             case 0x1d: /* FACGT */
10922                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10923                 break;
10924             default:
10925                 fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
10926                         __func__, insn, fpopcode, s->pc);
10927                 g_assert_not_reached();
10928             }
10929
10930             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10931             tcg_temp_free_i32(tcg_res);
10932             tcg_temp_free_i32(tcg_op1);
10933             tcg_temp_free_i32(tcg_op2);
10934         }
10935     }
10936
10937     tcg_temp_free_ptr(fpst);
10938
10939     clear_vec_high(s, is_q, rd);
10940 }
10941
10942 /* AdvSIMD three same extra
10943  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
10944  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
10945  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
10946  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
10947  */
10948 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
10949 {
10950     int rd = extract32(insn, 0, 5);
10951     int rn = extract32(insn, 5, 5);
10952     int opcode = extract32(insn, 11, 4);
10953     int rm = extract32(insn, 16, 5);
10954     int size = extract32(insn, 22, 2);
10955     bool u = extract32(insn, 29, 1);
10956     bool is_q = extract32(insn, 30, 1);
10957     bool feature;
10958     int rot;
10959
10960     switch (u * 16 + opcode) {
10961     case 0x10: /* SQRDMLAH (vector) */
10962     case 0x11: /* SQRDMLSH (vector) */
10963         if (size != 1 && size != 2) {
10964             unallocated_encoding(s);
10965             return;
10966         }
10967         feature = dc_isar_feature(aa64_rdm, s);
10968         break;
10969     case 0x02: /* SDOT (vector) */
10970     case 0x12: /* UDOT (vector) */
10971         if (size != MO_32) {
10972             unallocated_encoding(s);
10973             return;
10974         }
10975         feature = dc_isar_feature(aa64_dp, s);
10976         break;
10977     case 0x18: /* FCMLA, #0 */
10978     case 0x19: /* FCMLA, #90 */
10979     case 0x1a: /* FCMLA, #180 */
10980     case 0x1b: /* FCMLA, #270 */
10981     case 0x1c: /* FCADD, #90 */
10982     case 0x1e: /* FCADD, #270 */
10983         if (size == 0
10984             || (size == 1 && !dc_isar_feature(aa64_fp16, s))
10985             || (size == 3 && !is_q)) {
10986             unallocated_encoding(s);
10987             return;
10988         }
10989         feature = dc_isar_feature(aa64_fcma, s);
10990         break;
10991     default:
10992         unallocated_encoding(s);
10993         return;
10994     }
10995     if (!feature) {
10996         unallocated_encoding(s);
10997         return;
10998     }
10999     if (!fp_access_check(s)) {
11000         return;
11001     }
11002
11003     switch (opcode) {
11004     case 0x0: /* SQRDMLAH (vector) */
11005         switch (size) {
11006         case 1:
11007             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
11008             break;
11009         case 2:
11010             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
11011             break;
11012         default:
11013             g_assert_not_reached();
11014         }
11015         return;
11016
11017     case 0x1: /* SQRDMLSH (vector) */
11018         switch (size) {
11019         case 1:
11020             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
11021             break;
11022         case 2:
11023             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
11024             break;
11025         default:
11026             g_assert_not_reached();
11027         }
11028         return;
11029
11030     case 0x2: /* SDOT / UDOT */
11031         gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
11032                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11033         return;
11034
11035     case 0x8: /* FCMLA, #0 */
11036     case 0x9: /* FCMLA, #90 */
11037     case 0xa: /* FCMLA, #180 */
11038     case 0xb: /* FCMLA, #270 */
11039         rot = extract32(opcode, 0, 2);
11040         switch (size) {
11041         case 1:
11042             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
11043                               gen_helper_gvec_fcmlah);
11044             break;
11045         case 2:
11046             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11047                               gen_helper_gvec_fcmlas);
11048             break;
11049         case 3:
11050             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11051                               gen_helper_gvec_fcmlad);
11052             break;
11053         default:
11054             g_assert_not_reached();
11055         }
11056         return;
11057
11058     case 0xc: /* FCADD, #90 */
11059     case 0xe: /* FCADD, #270 */
11060         rot = extract32(opcode, 1, 1);
11061         switch (size) {
11062         case 1:
11063             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11064                               gen_helper_gvec_fcaddh);
11065             break;
11066         case 2:
11067             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11068                               gen_helper_gvec_fcadds);
11069             break;
11070         case 3:
11071             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11072                               gen_helper_gvec_fcaddd);
11073             break;
11074         default:
11075             g_assert_not_reached();
11076         }
11077         return;
11078
11079     default:
11080         g_assert_not_reached();
11081     }
11082 }
11083
11084 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11085                                   int size, int rn, int rd)
11086 {
11087     /* Handle 2-reg-misc ops which are widening (so each size element
11088      * in the source becomes a 2*size element in the destination.
11089      * The only instruction like this is FCVTL.
11090      */
11091     int pass;
11092
11093     if (size == 3) {
11094         /* 32 -> 64 bit fp conversion */
11095         TCGv_i64 tcg_res[2];
11096         int srcelt = is_q ? 2 : 0;
11097
11098         for (pass = 0; pass < 2; pass++) {
11099             TCGv_i32 tcg_op = tcg_temp_new_i32();
11100             tcg_res[pass] = tcg_temp_new_i64();
11101
11102             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11103             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11104             tcg_temp_free_i32(tcg_op);
11105         }
11106         for (pass = 0; pass < 2; pass++) {
11107             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11108             tcg_temp_free_i64(tcg_res[pass]);
11109         }
11110     } else {
11111         /* 16 -> 32 bit fp conversion */
11112         int srcelt = is_q ? 4 : 0;
11113         TCGv_i32 tcg_res[4];
11114         TCGv_ptr fpst = get_fpstatus_ptr(false);
11115         TCGv_i32 ahp = get_ahp_flag();
11116
11117         for (pass = 0; pass < 4; pass++) {
11118             tcg_res[pass] = tcg_temp_new_i32();
11119
11120             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11121             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11122                                            fpst, ahp);
11123         }
11124         for (pass = 0; pass < 4; pass++) {
11125             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11126             tcg_temp_free_i32(tcg_res[pass]);
11127         }
11128
11129         tcg_temp_free_ptr(fpst);
11130         tcg_temp_free_i32(ahp);
11131     }
11132 }
11133
11134 static void handle_rev(DisasContext *s, int opcode, bool u,
11135                        bool is_q, int size, int rn, int rd)
11136 {
11137     int op = (opcode << 1) | u;
11138     int opsz = op + size;
11139     int grp_size = 3 - opsz;
11140     int dsize = is_q ? 128 : 64;
11141     int i;
11142
11143     if (opsz >= 3) {
11144         unallocated_encoding(s);
11145         return;
11146     }
11147
11148     if (!fp_access_check(s)) {
11149         return;
11150     }
11151
11152     if (size == 0) {
11153         /* Special case bytes, use bswap op on each group of elements */
11154         int groups = dsize / (8 << grp_size);
11155
11156         for (i = 0; i < groups; i++) {
11157             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11158
11159             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11160             switch (grp_size) {
11161             case MO_16:
11162                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11163                 break;
11164             case MO_32:
11165                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11166                 break;
11167             case MO_64:
11168                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11169                 break;
11170             default:
11171                 g_assert_not_reached();
11172             }
11173             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11174             tcg_temp_free_i64(tcg_tmp);
11175         }
11176         clear_vec_high(s, is_q, rd);
11177     } else {
11178         int revmask = (1 << grp_size) - 1;
11179         int esize = 8 << size;
11180         int elements = dsize / esize;
11181         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11182         TCGv_i64 tcg_rd = tcg_const_i64(0);
11183         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11184
11185         for (i = 0; i < elements; i++) {
11186             int e_rev = (i & 0xf) ^ revmask;
11187             int off = e_rev * esize;
11188             read_vec_element(s, tcg_rn, rn, i, size);
11189             if (off >= 64) {
11190                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11191                                     tcg_rn, off - 64, esize);
11192             } else {
11193                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11194             }
11195         }
11196         write_vec_element(s, tcg_rd, rd, 0, MO_64);
11197         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11198
11199         tcg_temp_free_i64(tcg_rd_hi);
11200         tcg_temp_free_i64(tcg_rd);
11201         tcg_temp_free_i64(tcg_rn);
11202     }
11203 }
11204
11205 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11206                                   bool is_q, int size, int rn, int rd)
11207 {
11208     /* Implement the pairwise operations from 2-misc:
11209      * SADDLP, UADDLP, SADALP, UADALP.
11210      * These all add pairs of elements in the input to produce a
11211      * double-width result element in the output (possibly accumulating).
11212      */
11213     bool accum = (opcode == 0x6);
11214     int maxpass = is_q ? 2 : 1;
11215     int pass;
11216     TCGv_i64 tcg_res[2];
11217
11218     if (size == 2) {
11219         /* 32 + 32 -> 64 op */
11220         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
11221
11222         for (pass = 0; pass < maxpass; pass++) {
11223             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11224             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11225
11226             tcg_res[pass] = tcg_temp_new_i64();
11227
11228             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11229             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11230             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11231             if (accum) {
11232                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11233                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11234             }
11235
11236             tcg_temp_free_i64(tcg_op1);
11237             tcg_temp_free_i64(tcg_op2);
11238         }
11239     } else {
11240         for (pass = 0; pass < maxpass; pass++) {
11241             TCGv_i64 tcg_op = tcg_temp_new_i64();
11242             NeonGenOneOpFn *genfn;
11243             static NeonGenOneOpFn * const fns[2][2] = {
11244                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11245                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11246             };
11247
11248             genfn = fns[size][u];
11249
11250             tcg_res[pass] = tcg_temp_new_i64();
11251
11252             read_vec_element(s, tcg_op, rn, pass, MO_64);
11253             genfn(tcg_res[pass], tcg_op);
11254
11255             if (accum) {
11256                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11257                 if (size == 0) {
11258                     gen_helper_neon_addl_u16(tcg_res[pass],
11259                                              tcg_res[pass], tcg_op);
11260                 } else {
11261                     gen_helper_neon_addl_u32(tcg_res[pass],
11262                                              tcg_res[pass], tcg_op);
11263                 }
11264             }
11265             tcg_temp_free_i64(tcg_op);
11266         }
11267     }
11268     if (!is_q) {
11269         tcg_res[1] = tcg_const_i64(0);
11270     }
11271     for (pass = 0; pass < 2; pass++) {
11272         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11273         tcg_temp_free_i64(tcg_res[pass]);
11274     }
11275 }
11276
11277 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11278 {
11279     /* Implement SHLL and SHLL2 */
11280     int pass;
11281     int part = is_q ? 2 : 0;
11282     TCGv_i64 tcg_res[2];
11283
11284     for (pass = 0; pass < 2; pass++) {
11285         static NeonGenWidenFn * const widenfns[3] = {
11286             gen_helper_neon_widen_u8,
11287             gen_helper_neon_widen_u16,
11288             tcg_gen_extu_i32_i64,
11289         };
11290         NeonGenWidenFn *widenfn = widenfns[size];
11291         TCGv_i32 tcg_op = tcg_temp_new_i32();
11292
11293         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11294         tcg_res[pass] = tcg_temp_new_i64();
11295         widenfn(tcg_res[pass], tcg_op);
11296         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11297
11298         tcg_temp_free_i32(tcg_op);
11299     }
11300
11301     for (pass = 0; pass < 2; pass++) {
11302         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11303         tcg_temp_free_i64(tcg_res[pass]);
11304     }
11305 }
11306
11307 /* AdvSIMD two reg misc
11308  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11309  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11310  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11311  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11312  */
11313 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11314 {
11315     int size = extract32(insn, 22, 2);
11316     int opcode = extract32(insn, 12, 5);
11317     bool u = extract32(insn, 29, 1);
11318     bool is_q = extract32(insn, 30, 1);
11319     int rn = extract32(insn, 5, 5);
11320     int rd = extract32(insn, 0, 5);
11321     bool need_fpstatus = false;
11322     bool need_rmode = false;
11323     int rmode = -1;
11324     TCGv_i32 tcg_rmode;
11325     TCGv_ptr tcg_fpstatus;
11326
11327     switch (opcode) {
11328     case 0x0: /* REV64, REV32 */
11329     case 0x1: /* REV16 */
11330         handle_rev(s, opcode, u, is_q, size, rn, rd);
11331         return;
11332     case 0x5: /* CNT, NOT, RBIT */
11333         if (u && size == 0) {
11334             /* NOT */
11335             break;
11336         } else if (u && size == 1) {
11337             /* RBIT */
11338             break;
11339         } else if (!u && size == 0) {
11340             /* CNT */
11341             break;
11342         }
11343         unallocated_encoding(s);
11344         return;
11345     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11346     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11347         if (size == 3) {
11348             unallocated_encoding(s);
11349             return;
11350         }
11351         if (!fp_access_check(s)) {
11352             return;
11353         }
11354
11355         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11356         return;
11357     case 0x4: /* CLS, CLZ */
11358         if (size == 3) {
11359             unallocated_encoding(s);
11360             return;
11361         }
11362         break;
11363     case 0x2: /* SADDLP, UADDLP */
11364     case 0x6: /* SADALP, UADALP */
11365         if (size == 3) {
11366             unallocated_encoding(s);
11367             return;
11368         }
11369         if (!fp_access_check(s)) {
11370             return;
11371         }
11372         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11373         return;
11374     case 0x13: /* SHLL, SHLL2 */
11375         if (u == 0 || size == 3) {
11376             unallocated_encoding(s);
11377             return;
11378         }
11379         if (!fp_access_check(s)) {
11380             return;
11381         }
11382         handle_shll(s, is_q, size, rn, rd);
11383         return;
11384     case 0xa: /* CMLT */
11385         if (u == 1) {
11386             unallocated_encoding(s);
11387             return;
11388         }
11389         /* fall through */
11390     case 0x8: /* CMGT, CMGE */
11391     case 0x9: /* CMEQ, CMLE */
11392     case 0xb: /* ABS, NEG */
11393         if (size == 3 && !is_q) {
11394             unallocated_encoding(s);
11395             return;
11396         }
11397         break;
11398     case 0x3: /* SUQADD, USQADD */
11399         if (size == 3 && !is_q) {
11400             unallocated_encoding(s);
11401             return;
11402         }
11403         if (!fp_access_check(s)) {
11404             return;
11405         }
11406         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
11407         return;
11408     case 0x7: /* SQABS, SQNEG */
11409         if (size == 3 && !is_q) {
11410             unallocated_encoding(s);
11411             return;
11412         }
11413         break;
11414     case 0xc ... 0xf:
11415     case 0x16 ... 0x1d:
11416     case 0x1f:
11417     {
11418         /* Floating point: U, size[1] and opcode indicate operation;
11419          * size[0] indicates single or double precision.
11420          */
11421         int is_double = extract32(size, 0, 1);
11422         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11423         size = is_double ? 3 : 2;
11424         switch (opcode) {
11425         case 0x2f: /* FABS */
11426         case 0x6f: /* FNEG */
11427             if (size == 3 && !is_q) {
11428                 unallocated_encoding(s);
11429                 return;
11430             }
11431             break;
11432         case 0x1d: /* SCVTF */
11433         case 0x5d: /* UCVTF */
11434         {
11435             bool is_signed = (opcode == 0x1d) ? true : false;
11436             int elements = is_double ? 2 : is_q ? 4 : 2;
11437             if (is_double && !is_q) {
11438                 unallocated_encoding(s);
11439                 return;
11440             }
11441             if (!fp_access_check(s)) {
11442                 return;
11443             }
11444             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
11445             return;
11446         }
11447         case 0x2c: /* FCMGT (zero) */
11448         case 0x2d: /* FCMEQ (zero) */
11449         case 0x2e: /* FCMLT (zero) */
11450         case 0x6c: /* FCMGE (zero) */
11451         case 0x6d: /* FCMLE (zero) */
11452             if (size == 3 && !is_q) {
11453                 unallocated_encoding(s);
11454                 return;
11455             }
11456             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
11457             return;
11458         case 0x7f: /* FSQRT */
11459             if (size == 3 && !is_q) {
11460                 unallocated_encoding(s);
11461                 return;
11462             }
11463             break;
11464         case 0x1a: /* FCVTNS */
11465         case 0x1b: /* FCVTMS */
11466         case 0x3a: /* FCVTPS */
11467         case 0x3b: /* FCVTZS */
11468         case 0x5a: /* FCVTNU */
11469         case 0x5b: /* FCVTMU */
11470         case 0x7a: /* FCVTPU */
11471         case 0x7b: /* FCVTZU */
11472             need_fpstatus = true;
11473             need_rmode = true;
11474             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11475             if (size == 3 && !is_q) {
11476                 unallocated_encoding(s);
11477                 return;
11478             }
11479             break;
11480         case 0x5c: /* FCVTAU */
11481         case 0x1c: /* FCVTAS */
11482             need_fpstatus = true;
11483             need_rmode = true;
11484             rmode = FPROUNDING_TIEAWAY;
11485             if (size == 3 && !is_q) {
11486                 unallocated_encoding(s);
11487                 return;
11488             }
11489             break;
11490         case 0x3c: /* URECPE */
11491             if (size == 3) {
11492                 unallocated_encoding(s);
11493                 return;
11494             }
11495             /* fall through */
11496         case 0x3d: /* FRECPE */
11497         case 0x7d: /* FRSQRTE */
11498             if (size == 3 && !is_q) {
11499                 unallocated_encoding(s);
11500                 return;
11501             }
11502             if (!fp_access_check(s)) {
11503                 return;
11504             }
11505             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11506             return;
11507         case 0x56: /* FCVTXN, FCVTXN2 */
11508             if (size == 2) {
11509                 unallocated_encoding(s);
11510                 return;
11511             }
11512             /* fall through */
11513         case 0x16: /* FCVTN, FCVTN2 */
11514             /* handle_2misc_narrow does a 2*size -> size operation, but these
11515              * instructions encode the source size rather than dest size.
11516              */
11517             if (!fp_access_check(s)) {
11518                 return;
11519             }
11520             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11521             return;
11522         case 0x17: /* FCVTL, FCVTL2 */
11523             if (!fp_access_check(s)) {
11524                 return;
11525             }
11526             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11527             return;
11528         case 0x18: /* FRINTN */
11529         case 0x19: /* FRINTM */
11530         case 0x38: /* FRINTP */
11531         case 0x39: /* FRINTZ */
11532             need_rmode = true;
11533             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11534             /* fall through */
11535         case 0x59: /* FRINTX */
11536         case 0x79: /* FRINTI */
11537             need_fpstatus = true;
11538             if (size == 3 && !is_q) {
11539                 unallocated_encoding(s);
11540                 return;
11541             }
11542             break;
11543         case 0x58: /* FRINTA */
11544             need_rmode = true;
11545             rmode = FPROUNDING_TIEAWAY;
11546             need_fpstatus = true;
11547             if (size == 3 && !is_q) {
11548                 unallocated_encoding(s);
11549                 return;
11550             }
11551             break;
11552         case 0x7c: /* URSQRTE */
11553             if (size == 3) {
11554                 unallocated_encoding(s);
11555                 return;
11556             }
11557             need_fpstatus = true;
11558             break;
11559         default:
11560             unallocated_encoding(s);
11561             return;
11562         }
11563         break;
11564     }
11565     default:
11566         unallocated_encoding(s);
11567         return;
11568     }
11569
11570     if (!fp_access_check(s)) {
11571         return;
11572     }
11573
11574     if (need_fpstatus || need_rmode) {
11575         tcg_fpstatus = get_fpstatus_ptr(false);
11576     } else {
11577         tcg_fpstatus = NULL;
11578     }
11579     if (need_rmode) {
11580         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
11581         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11582     } else {
11583         tcg_rmode = NULL;
11584     }
11585
11586     switch (opcode) {
11587     case 0x5:
11588         if (u && size == 0) { /* NOT */
11589             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11590             return;
11591         }
11592         break;
11593     case 0xb:
11594         if (u) { /* NEG */
11595             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11596             return;
11597         }
11598         break;
11599     }
11600
11601     if (size == 3) {
11602         /* All 64-bit element operations can be shared with scalar 2misc */
11603         int pass;
11604
11605         /* Coverity claims (size == 3 && !is_q) has been eliminated
11606          * from all paths leading to here.
11607          */
11608         tcg_debug_assert(is_q);
11609         for (pass = 0; pass < 2; pass++) {
11610             TCGv_i64 tcg_op = tcg_temp_new_i64();
11611             TCGv_i64 tcg_res = tcg_temp_new_i64();
11612
11613             read_vec_element(s, tcg_op, rn, pass, MO_64);
11614
11615             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11616                             tcg_rmode, tcg_fpstatus);
11617
11618             write_vec_element(s, tcg_res, rd, pass, MO_64);
11619
11620             tcg_temp_free_i64(tcg_res);
11621             tcg_temp_free_i64(tcg_op);
11622         }
11623     } else {
11624         int pass;
11625
11626         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11627             TCGv_i32 tcg_op = tcg_temp_new_i32();
11628             TCGv_i32 tcg_res = tcg_temp_new_i32();
11629             TCGCond cond;
11630
11631             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
11632
11633             if (size == 2) {
11634                 /* Special cases for 32 bit elements */
11635                 switch (opcode) {
11636                 case 0xa: /* CMLT */
11637                     /* 32 bit integer comparison against zero, result is
11638                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
11639                      * and inverting.
11640                      */
11641                     cond = TCG_COND_LT;
11642                 do_cmop:
11643                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
11644                     tcg_gen_neg_i32(tcg_res, tcg_res);
11645                     break;
11646                 case 0x8: /* CMGT, CMGE */
11647                     cond = u ? TCG_COND_GE : TCG_COND_GT;
11648                     goto do_cmop;
11649                 case 0x9: /* CMEQ, CMLE */
11650                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
11651                     goto do_cmop;
11652                 case 0x4: /* CLS */
11653                     if (u) {
11654                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
11655                     } else {
11656                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
11657                     }
11658                     break;
11659                 case 0x7: /* SQABS, SQNEG */
11660                     if (u) {
11661                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
11662                     } else {
11663                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
11664                     }
11665                     break;
11666                 case 0xb: /* ABS, NEG */
11667                     if (u) {
11668                         tcg_gen_neg_i32(tcg_res, tcg_op);
11669                     } else {
11670                         TCGv_i32 tcg_zero = tcg_const_i32(0);
11671                         tcg_gen_neg_i32(tcg_res, tcg_op);
11672                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
11673                                             tcg_zero, tcg_op, tcg_res);
11674                         tcg_temp_free_i32(tcg_zero);
11675                     }
11676                     break;
11677                 case 0x2f: /* FABS */
11678                     gen_helper_vfp_abss(tcg_res, tcg_op);
11679                     break;
11680                 case 0x6f: /* FNEG */
11681                     gen_helper_vfp_negs(tcg_res, tcg_op);
11682                     break;
11683                 case 0x7f: /* FSQRT */
11684                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
11685                     break;
11686                 case 0x1a: /* FCVTNS */
11687                 case 0x1b: /* FCVTMS */
11688                 case 0x1c: /* FCVTAS */
11689                 case 0x3a: /* FCVTPS */
11690                 case 0x3b: /* FCVTZS */
11691                 {
11692                     TCGv_i32 tcg_shift = tcg_const_i32(0);
11693                     gen_helper_vfp_tosls(tcg_res, tcg_op,
11694                                          tcg_shift, tcg_fpstatus);
11695                     tcg_temp_free_i32(tcg_shift);
11696                     break;
11697                 }
11698                 case 0x5a: /* FCVTNU */
11699                 case 0x5b: /* FCVTMU */
11700                 case 0x5c: /* FCVTAU */
11701                 case 0x7a: /* FCVTPU */
11702                 case 0x7b: /* FCVTZU */
11703                 {
11704                     TCGv_i32 tcg_shift = tcg_const_i32(0);
11705                     gen_helper_vfp_touls(tcg_res, tcg_op,
11706                                          tcg_shift, tcg_fpstatus);
11707                     tcg_temp_free_i32(tcg_shift);
11708                     break;
11709                 }
11710                 case 0x18: /* FRINTN */
11711                 case 0x19: /* FRINTM */
11712                 case 0x38: /* FRINTP */
11713                 case 0x39: /* FRINTZ */
11714                 case 0x58: /* FRINTA */
11715                 case 0x79: /* FRINTI */
11716                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
11717                     break;
11718                 case 0x59: /* FRINTX */
11719                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
11720                     break;
11721                 case 0x7c: /* URSQRTE */
11722                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
11723                     break;
11724                 default:
11725                     g_assert_not_reached();
11726                 }
11727             } else {
11728                 /* Use helpers for 8 and 16 bit elements */
11729                 switch (opcode) {
11730                 case 0x5: /* CNT, RBIT */
11731                     /* For these two insns size is part of the opcode specifier
11732                      * (handled earlier); they always operate on byte elements.
11733                      */
11734                     if (u) {
11735                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
11736                     } else {
11737                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
11738                     }
11739                     break;
11740                 case 0x7: /* SQABS, SQNEG */
11741                 {
11742                     NeonGenOneOpEnvFn *genfn;
11743                     static NeonGenOneOpEnvFn * const fns[2][2] = {
11744                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
11745                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
11746                     };
11747                     genfn = fns[size][u];
11748                     genfn(tcg_res, cpu_env, tcg_op);
11749                     break;
11750                 }
11751                 case 0x8: /* CMGT, CMGE */
11752                 case 0x9: /* CMEQ, CMLE */
11753                 case 0xa: /* CMLT */
11754                 {
11755                     static NeonGenTwoOpFn * const fns[3][2] = {
11756                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
11757                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
11758                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
11759                     };
11760                     NeonGenTwoOpFn *genfn;
11761                     int comp;
11762                     bool reverse;
11763                     TCGv_i32 tcg_zero = tcg_const_i32(0);
11764
11765                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
11766                     comp = (opcode - 0x8) * 2 + u;
11767                     /* ...but LE, LT are implemented as reverse GE, GT */
11768                     reverse = (comp > 2);
11769                     if (reverse) {
11770                         comp = 4 - comp;
11771                     }
11772                     genfn = fns[comp][size];
11773                     if (reverse) {
11774                         genfn(tcg_res, tcg_zero, tcg_op);
11775                     } else {
11776                         genfn(tcg_res, tcg_op, tcg_zero);
11777                     }
11778                     tcg_temp_free_i32(tcg_zero);
11779                     break;
11780                 }
11781                 case 0xb: /* ABS, NEG */
11782                     if (u) {
11783                         TCGv_i32 tcg_zero = tcg_const_i32(0);
11784                         if (size) {
11785                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
11786                         } else {
11787                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
11788                         }
11789                         tcg_temp_free_i32(tcg_zero);
11790                     } else {
11791                         if (size) {
11792                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
11793                         } else {
11794                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
11795                         }
11796                     }
11797                     break;
11798                 case 0x4: /* CLS, CLZ */
11799                     if (u) {
11800                         if (size == 0) {
11801                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
11802                         } else {
11803                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
11804                         }
11805                     } else {
11806                         if (size == 0) {
11807                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
11808                         } else {
11809                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
11810                         }
11811                     }
11812                     break;
11813                 default:
11814                     g_assert_not_reached();
11815                 }
11816             }
11817
11818             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11819
11820             tcg_temp_free_i32(tcg_res);
11821             tcg_temp_free_i32(tcg_op);
11822         }
11823     }
11824     clear_vec_high(s, is_q, rd);
11825
11826     if (need_rmode) {
11827         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11828         tcg_temp_free_i32(tcg_rmode);
11829     }
11830     if (need_fpstatus) {
11831         tcg_temp_free_ptr(tcg_fpstatus);
11832     }
11833 }
11834
11835 /* AdvSIMD [scalar] two register miscellaneous (FP16)
11836  *
11837  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
11838  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11839  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11840  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11841  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
11842  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
11843  *
11844  * This actually covers two groups where scalar access is governed by
11845  * bit 28. A bunch of the instructions (float to integral) only exist
11846  * in the vector form and are un-allocated for the scalar decode. Also
11847  * in the scalar decode Q is always 1.
11848  */
11849 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
11850 {
11851     int fpop, opcode, a, u;
11852     int rn, rd;
11853     bool is_q;
11854     bool is_scalar;
11855     bool only_in_vector = false;
11856
11857     int pass;
11858     TCGv_i32 tcg_rmode = NULL;
11859     TCGv_ptr tcg_fpstatus = NULL;
11860     bool need_rmode = false;
11861     bool need_fpst = true;
11862     int rmode;
11863
11864     if (!dc_isar_feature(aa64_fp16, s)) {
11865         unallocated_encoding(s);
11866         return;
11867     }
11868
11869     rd = extract32(insn, 0, 5);
11870     rn = extract32(insn, 5, 5);
11871
11872     a = extract32(insn, 23, 1);
11873     u = extract32(insn, 29, 1);
11874     is_scalar = extract32(insn, 28, 1);
11875     is_q = extract32(insn, 30, 1);
11876
11877     opcode = extract32(insn, 12, 5);
11878     fpop = deposit32(opcode, 5, 1, a);
11879     fpop = deposit32(fpop, 6, 1, u);
11880
11881     rd = extract32(insn, 0, 5);
11882     rn = extract32(insn, 5, 5);
11883
11884     switch (fpop) {
11885     case 0x1d: /* SCVTF */
11886     case 0x5d: /* UCVTF */
11887     {
11888         int elements;
11889
11890         if (is_scalar) {
11891             elements = 1;
11892         } else {
11893             elements = (is_q ? 8 : 4);
11894         }
11895
11896         if (!fp_access_check(s)) {
11897             return;
11898         }
11899         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
11900         return;
11901     }
11902     break;
11903     case 0x2c: /* FCMGT (zero) */
11904     case 0x2d: /* FCMEQ (zero) */
11905     case 0x2e: /* FCMLT (zero) */
11906     case 0x6c: /* FCMGE (zero) */
11907     case 0x6d: /* FCMLE (zero) */
11908         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
11909         return;
11910     case 0x3d: /* FRECPE */
11911     case 0x3f: /* FRECPX */
11912         break;
11913     case 0x18: /* FRINTN */
11914         need_rmode = true;
11915         only_in_vector = true;
11916         rmode = FPROUNDING_TIEEVEN;
11917         break;
11918     case 0x19: /* FRINTM */
11919         need_rmode = true;
11920         only_in_vector = true;
11921         rmode = FPROUNDING_NEGINF;
11922         break;
11923     case 0x38: /* FRINTP */
11924         need_rmode = true;
11925         only_in_vector = true;
11926         rmode = FPROUNDING_POSINF;
11927         break;
11928     case 0x39: /* FRINTZ */
11929         need_rmode = true;
11930         only_in_vector = true;
11931         rmode = FPROUNDING_ZERO;
11932         break;
11933     case 0x58: /* FRINTA */
11934         need_rmode = true;
11935         only_in_vector = true;
11936         rmode = FPROUNDING_TIEAWAY;
11937         break;
11938     case 0x59: /* FRINTX */
11939     case 0x79: /* FRINTI */
11940         only_in_vector = true;
11941         /* current rounding mode */
11942         break;
11943     case 0x1a: /* FCVTNS */
11944         need_rmode = true;
11945         rmode = FPROUNDING_TIEEVEN;
11946         break;
11947     case 0x1b: /* FCVTMS */
11948         need_rmode = true;
11949         rmode = FPROUNDING_NEGINF;
11950         break;
11951     case 0x1c: /* FCVTAS */
11952         need_rmode = true;
11953         rmode = FPROUNDING_TIEAWAY;
11954         break;
11955     case 0x3a: /* FCVTPS */
11956         need_rmode = true;
11957         rmode = FPROUNDING_POSINF;
11958         break;
11959     case 0x3b: /* FCVTZS */
11960         need_rmode = true;
11961         rmode = FPROUNDING_ZERO;
11962         break;
11963     case 0x5a: /* FCVTNU */
11964         need_rmode = true;
11965         rmode = FPROUNDING_TIEEVEN;
11966         break;
11967     case 0x5b: /* FCVTMU */
11968         need_rmode = true;
11969         rmode = FPROUNDING_NEGINF;
11970         break;
11971     case 0x5c: /* FCVTAU */
11972         need_rmode = true;
11973         rmode = FPROUNDING_TIEAWAY;
11974         break;
11975     case 0x7a: /* FCVTPU */
11976         need_rmode = true;
11977         rmode = FPROUNDING_POSINF;
11978         break;
11979     case 0x7b: /* FCVTZU */
11980         need_rmode = true;
11981         rmode = FPROUNDING_ZERO;
11982         break;
11983     case 0x2f: /* FABS */
11984     case 0x6f: /* FNEG */
11985         need_fpst = false;
11986         break;
11987     case 0x7d: /* FRSQRTE */
11988     case 0x7f: /* FSQRT (vector) */
11989         break;
11990     default:
11991         fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
11992         g_assert_not_reached();
11993     }
11994
11995
11996     /* Check additional constraints for the scalar encoding */
11997     if (is_scalar) {
11998         if (!is_q) {
11999             unallocated_encoding(s);
12000             return;
12001         }
12002         /* FRINTxx is only in the vector form */
12003         if (only_in_vector) {
12004             unallocated_encoding(s);
12005             return;
12006         }
12007     }
12008
12009     if (!fp_access_check(s)) {
12010         return;
12011     }
12012
12013     if (need_rmode || need_fpst) {
12014         tcg_fpstatus = get_fpstatus_ptr(true);
12015     }
12016
12017     if (need_rmode) {
12018         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12019         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12020     }
12021
12022     if (is_scalar) {
12023         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12024         TCGv_i32 tcg_res = tcg_temp_new_i32();
12025
12026         switch (fpop) {
12027         case 0x1a: /* FCVTNS */
12028         case 0x1b: /* FCVTMS */
12029         case 0x1c: /* FCVTAS */
12030         case 0x3a: /* FCVTPS */
12031         case 0x3b: /* FCVTZS */
12032             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12033             break;
12034         case 0x3d: /* FRECPE */
12035             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12036             break;
12037         case 0x3f: /* FRECPX */
12038             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12039             break;
12040         case 0x5a: /* FCVTNU */
12041         case 0x5b: /* FCVTMU */
12042         case 0x5c: /* FCVTAU */
12043         case 0x7a: /* FCVTPU */
12044         case 0x7b: /* FCVTZU */
12045             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12046             break;
12047         case 0x6f: /* FNEG */
12048             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12049             break;
12050         case 0x7d: /* FRSQRTE */
12051             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12052             break;
12053         default:
12054             g_assert_not_reached();
12055         }
12056
12057         /* limit any sign extension going on */
12058         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12059         write_fp_sreg(s, rd, tcg_res);
12060
12061         tcg_temp_free_i32(tcg_res);
12062         tcg_temp_free_i32(tcg_op);
12063     } else {
12064         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12065             TCGv_i32 tcg_op = tcg_temp_new_i32();
12066             TCGv_i32 tcg_res = tcg_temp_new_i32();
12067
12068             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12069
12070             switch (fpop) {
12071             case 0x1a: /* FCVTNS */
12072             case 0x1b: /* FCVTMS */
12073             case 0x1c: /* FCVTAS */
12074             case 0x3a: /* FCVTPS */
12075             case 0x3b: /* FCVTZS */
12076                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12077                 break;
12078             case 0x3d: /* FRECPE */
12079                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12080                 break;
12081             case 0x5a: /* FCVTNU */
12082             case 0x5b: /* FCVTMU */
12083             case 0x5c: /* FCVTAU */
12084             case 0x7a: /* FCVTPU */
12085             case 0x7b: /* FCVTZU */
12086                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12087                 break;
12088             case 0x18: /* FRINTN */
12089             case 0x19: /* FRINTM */
12090             case 0x38: /* FRINTP */
12091             case 0x39: /* FRINTZ */
12092             case 0x58: /* FRINTA */
12093             case 0x79: /* FRINTI */
12094                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12095                 break;
12096             case 0x59: /* FRINTX */
12097                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12098                 break;
12099             case 0x2f: /* FABS */
12100                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12101                 break;
12102             case 0x6f: /* FNEG */
12103                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12104                 break;
12105             case 0x7d: /* FRSQRTE */
12106                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12107                 break;
12108             case 0x7f: /* FSQRT */
12109                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12110                 break;
12111             default:
12112                 g_assert_not_reached();
12113             }
12114
12115             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12116
12117             tcg_temp_free_i32(tcg_res);
12118             tcg_temp_free_i32(tcg_op);
12119         }
12120
12121         clear_vec_high(s, is_q, rd);
12122     }
12123
12124     if (tcg_rmode) {
12125         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12126         tcg_temp_free_i32(tcg_rmode);
12127     }
12128
12129     if (tcg_fpstatus) {
12130         tcg_temp_free_ptr(tcg_fpstatus);
12131     }
12132 }
12133
12134 /* AdvSIMD scalar x indexed element
12135  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12136  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12137  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12138  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12139  * AdvSIMD vector x indexed element
12140  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12141  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12142  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12143  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12144  */
12145 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12146 {
12147     /* This encoding has two kinds of instruction:
12148      *  normal, where we perform elt x idxelt => elt for each
12149      *     element in the vector
12150      *  long, where we perform elt x idxelt and generate a result of
12151      *     double the width of the input element
12152      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12153      */
12154     bool is_scalar = extract32(insn, 28, 1);
12155     bool is_q = extract32(insn, 30, 1);
12156     bool u = extract32(insn, 29, 1);
12157     int size = extract32(insn, 22, 2);
12158     int l = extract32(insn, 21, 1);
12159     int m = extract32(insn, 20, 1);
12160     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12161     int rm = extract32(insn, 16, 4);
12162     int opcode = extract32(insn, 12, 4);
12163     int h = extract32(insn, 11, 1);
12164     int rn = extract32(insn, 5, 5);
12165     int rd = extract32(insn, 0, 5);
12166     bool is_long = false;
12167     int is_fp = 0;
12168     bool is_fp16 = false;
12169     int index;
12170     TCGv_ptr fpst;
12171
12172     switch (16 * u + opcode) {
12173     case 0x08: /* MUL */
12174     case 0x10: /* MLA */
12175     case 0x14: /* MLS */
12176         if (is_scalar) {
12177             unallocated_encoding(s);
12178             return;
12179         }
12180         break;
12181     case 0x02: /* SMLAL, SMLAL2 */
12182     case 0x12: /* UMLAL, UMLAL2 */
12183     case 0x06: /* SMLSL, SMLSL2 */
12184     case 0x16: /* UMLSL, UMLSL2 */
12185     case 0x0a: /* SMULL, SMULL2 */
12186     case 0x1a: /* UMULL, UMULL2 */
12187         if (is_scalar) {
12188             unallocated_encoding(s);
12189             return;
12190         }
12191         is_long = true;
12192         break;
12193     case 0x03: /* SQDMLAL, SQDMLAL2 */
12194     case 0x07: /* SQDMLSL, SQDMLSL2 */
12195     case 0x0b: /* SQDMULL, SQDMULL2 */
12196         is_long = true;
12197         break;
12198     case 0x0c: /* SQDMULH */
12199     case 0x0d: /* SQRDMULH */
12200         break;
12201     case 0x01: /* FMLA */
12202     case 0x05: /* FMLS */
12203     case 0x09: /* FMUL */
12204     case 0x19: /* FMULX */
12205         is_fp = 1;
12206         break;
12207     case 0x1d: /* SQRDMLAH */
12208     case 0x1f: /* SQRDMLSH */
12209         if (!dc_isar_feature(aa64_rdm, s)) {
12210             unallocated_encoding(s);
12211             return;
12212         }
12213         break;
12214     case 0x0e: /* SDOT */
12215     case 0x1e: /* UDOT */
12216         if (size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12217             unallocated_encoding(s);
12218             return;
12219         }
12220         break;
12221     case 0x11: /* FCMLA #0 */
12222     case 0x13: /* FCMLA #90 */
12223     case 0x15: /* FCMLA #180 */
12224     case 0x17: /* FCMLA #270 */
12225         if (!dc_isar_feature(aa64_fcma, s)) {
12226             unallocated_encoding(s);
12227             return;
12228         }
12229         is_fp = 2;
12230         break;
12231     default:
12232         unallocated_encoding(s);
12233         return;
12234     }
12235
12236     switch (is_fp) {
12237     case 1: /* normal fp */
12238         /* convert insn encoded size to TCGMemOp size */
12239         switch (size) {
12240         case 0: /* half-precision */
12241             size = MO_16;
12242             is_fp16 = true;
12243             break;
12244         case MO_32: /* single precision */
12245         case MO_64: /* double precision */
12246             break;
12247         default:
12248             unallocated_encoding(s);
12249             return;
12250         }
12251         break;
12252
12253     case 2: /* complex fp */
12254         /* Each indexable element is a complex pair.  */
12255         size <<= 1;
12256         switch (size) {
12257         case MO_32:
12258             if (h && !is_q) {
12259                 unallocated_encoding(s);
12260                 return;
12261             }
12262             is_fp16 = true;
12263             break;
12264         case MO_64:
12265             break;
12266         default:
12267             unallocated_encoding(s);
12268             return;
12269         }
12270         break;
12271
12272     default: /* integer */
12273         switch (size) {
12274         case MO_8:
12275         case MO_64:
12276             unallocated_encoding(s);
12277             return;
12278         }
12279         break;
12280     }
12281     if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12282         unallocated_encoding(s);
12283         return;
12284     }
12285
12286     /* Given TCGMemOp size, adjust register and indexing.  */
12287     switch (size) {
12288     case MO_16:
12289         index = h << 2 | l << 1 | m;
12290         break;
12291     case MO_32:
12292         index = h << 1 | l;
12293         rm |= m << 4;
12294         break;
12295     case MO_64:
12296         if (l || !is_q) {
12297             unallocated_encoding(s);
12298             return;
12299         }
12300         index = h;
12301         rm |= m << 4;
12302         break;
12303     default:
12304         g_assert_not_reached();
12305     }
12306
12307     if (!fp_access_check(s)) {
12308         return;
12309     }
12310
12311     if (is_fp) {
12312         fpst = get_fpstatus_ptr(is_fp16);
12313     } else {
12314         fpst = NULL;
12315     }
12316
12317     switch (16 * u + opcode) {
12318     case 0x0e: /* SDOT */
12319     case 0x1e: /* UDOT */
12320         gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
12321                          u ? gen_helper_gvec_udot_idx_b
12322                          : gen_helper_gvec_sdot_idx_b);
12323         return;
12324     case 0x11: /* FCMLA #0 */
12325     case 0x13: /* FCMLA #90 */
12326     case 0x15: /* FCMLA #180 */
12327     case 0x17: /* FCMLA #270 */
12328         {
12329             int rot = extract32(insn, 13, 2);
12330             int data = (index << 2) | rot;
12331             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12332                                vec_full_reg_offset(s, rn),
12333                                vec_full_reg_offset(s, rm), fpst,
12334                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12335                                size == MO_64
12336                                ? gen_helper_gvec_fcmlas_idx
12337                                : gen_helper_gvec_fcmlah_idx);
12338             tcg_temp_free_ptr(fpst);
12339         }
12340         return;
12341     }
12342
12343     if (size == 3) {
12344         TCGv_i64 tcg_idx = tcg_temp_new_i64();
12345         int pass;
12346
12347         assert(is_fp && is_q && !is_long);
12348
12349         read_vec_element(s, tcg_idx, rm, index, MO_64);
12350
12351         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12352             TCGv_i64 tcg_op = tcg_temp_new_i64();
12353             TCGv_i64 tcg_res = tcg_temp_new_i64();
12354
12355             read_vec_element(s, tcg_op, rn, pass, MO_64);
12356
12357             switch (16 * u + opcode) {
12358             case 0x05: /* FMLS */
12359                 /* As usual for ARM, separate negation for fused multiply-add */
12360                 gen_helper_vfp_negd(tcg_op, tcg_op);
12361                 /* fall through */
12362             case 0x01: /* FMLA */
12363                 read_vec_element(s, tcg_res, rd, pass, MO_64);
12364                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12365                 break;
12366             case 0x09: /* FMUL */
12367                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12368                 break;
12369             case 0x19: /* FMULX */
12370                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12371                 break;
12372             default:
12373                 g_assert_not_reached();
12374             }
12375
12376             write_vec_element(s, tcg_res, rd, pass, MO_64);
12377             tcg_temp_free_i64(tcg_op);
12378             tcg_temp_free_i64(tcg_res);
12379         }
12380
12381         tcg_temp_free_i64(tcg_idx);
12382         clear_vec_high(s, !is_scalar, rd);
12383     } else if (!is_long) {
12384         /* 32 bit floating point, or 16 or 32 bit integer.
12385          * For the 16 bit scalar case we use the usual Neon helpers and
12386          * rely on the fact that 0 op 0 == 0 with no side effects.
12387          */
12388         TCGv_i32 tcg_idx = tcg_temp_new_i32();
12389         int pass, maxpasses;
12390
12391         if (is_scalar) {
12392             maxpasses = 1;
12393         } else {
12394             maxpasses = is_q ? 4 : 2;
12395         }
12396
12397         read_vec_element_i32(s, tcg_idx, rm, index, size);
12398
12399         if (size == 1 && !is_scalar) {
12400             /* The simplest way to handle the 16x16 indexed ops is to duplicate
12401              * the index into both halves of the 32 bit tcg_idx and then use
12402              * the usual Neon helpers.
12403              */
12404             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12405         }
12406
12407         for (pass = 0; pass < maxpasses; pass++) {
12408             TCGv_i32 tcg_op = tcg_temp_new_i32();
12409             TCGv_i32 tcg_res = tcg_temp_new_i32();
12410
12411             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
12412
12413             switch (16 * u + opcode) {
12414             case 0x08: /* MUL */
12415             case 0x10: /* MLA */
12416             case 0x14: /* MLS */
12417             {
12418                 static NeonGenTwoOpFn * const fns[2][2] = {
12419                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
12420                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
12421                 };
12422                 NeonGenTwoOpFn *genfn;
12423                 bool is_sub = opcode == 0x4;
12424
12425                 if (size == 1) {
12426                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
12427                 } else {
12428                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
12429                 }
12430                 if (opcode == 0x8) {
12431                     break;
12432                 }
12433                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
12434                 genfn = fns[size - 1][is_sub];
12435                 genfn(tcg_res, tcg_op, tcg_res);
12436                 break;
12437             }
12438             case 0x05: /* FMLS */
12439             case 0x01: /* FMLA */
12440                 read_vec_element_i32(s, tcg_res, rd, pass,
12441                                      is_scalar ? size : MO_32);
12442                 switch (size) {
12443                 case 1:
12444                     if (opcode == 0x5) {
12445                         /* As usual for ARM, separate negation for fused
12446                          * multiply-add */
12447                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
12448                     }
12449                     if (is_scalar) {
12450                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
12451                                                    tcg_res, fpst);
12452                     } else {
12453                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
12454                                                     tcg_res, fpst);
12455                     }
12456                     break;
12457                 case 2:
12458                     if (opcode == 0x5) {
12459                         /* As usual for ARM, separate negation for
12460                          * fused multiply-add */
12461                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
12462                     }
12463                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
12464                                            tcg_res, fpst);
12465                     break;
12466                 default:
12467                     g_assert_not_reached();
12468                 }
12469                 break;
12470             case 0x09: /* FMUL */
12471                 switch (size) {
12472                 case 1:
12473                     if (is_scalar) {
12474                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
12475                                                 tcg_idx, fpst);
12476                     } else {
12477                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
12478                                                  tcg_idx, fpst);
12479                     }
12480                     break;
12481                 case 2:
12482                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
12483                     break;
12484                 default:
12485                     g_assert_not_reached();
12486                 }
12487                 break;
12488             case 0x19: /* FMULX */
12489                 switch (size) {
12490                 case 1:
12491                     if (is_scalar) {
12492                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
12493                                                  tcg_idx, fpst);
12494                     } else {
12495                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
12496                                                   tcg_idx, fpst);
12497                     }
12498                     break;
12499                 case 2:
12500                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
12501                     break;
12502                 default:
12503                     g_assert_not_reached();
12504                 }
12505                 break;
12506             case 0x0c: /* SQDMULH */
12507                 if (size == 1) {
12508                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
12509                                                tcg_op, tcg_idx);
12510                 } else {
12511                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
12512                                                tcg_op, tcg_idx);
12513                 }
12514                 break;
12515             case 0x0d: /* SQRDMULH */
12516                 if (size == 1) {
12517                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
12518                                                 tcg_op, tcg_idx);
12519                 } else {
12520                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
12521                                                 tcg_op, tcg_idx);
12522                 }
12523                 break;
12524             case 0x1d: /* SQRDMLAH */
12525                 read_vec_element_i32(s, tcg_res, rd, pass,
12526                                      is_scalar ? size : MO_32);
12527                 if (size == 1) {
12528                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
12529                                                 tcg_op, tcg_idx, tcg_res);
12530                 } else {
12531                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
12532                                                 tcg_op, tcg_idx, tcg_res);
12533                 }
12534                 break;
12535             case 0x1f: /* SQRDMLSH */
12536                 read_vec_element_i32(s, tcg_res, rd, pass,
12537                                      is_scalar ? size : MO_32);
12538                 if (size == 1) {
12539                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
12540                                                 tcg_op, tcg_idx, tcg_res);
12541                 } else {
12542                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
12543                                                 tcg_op, tcg_idx, tcg_res);
12544                 }
12545                 break;
12546             default:
12547                 g_assert_not_reached();
12548             }
12549
12550             if (is_scalar) {
12551                 write_fp_sreg(s, rd, tcg_res);
12552             } else {
12553                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12554             }
12555
12556             tcg_temp_free_i32(tcg_op);
12557             tcg_temp_free_i32(tcg_res);
12558         }
12559
12560         tcg_temp_free_i32(tcg_idx);
12561         clear_vec_high(s, is_q, rd);
12562     } else {
12563         /* long ops: 16x16->32 or 32x32->64 */
12564         TCGv_i64 tcg_res[2];
12565         int pass;
12566         bool satop = extract32(opcode, 0, 1);
12567         TCGMemOp memop = MO_32;
12568
12569         if (satop || !u) {
12570             memop |= MO_SIGN;
12571         }
12572
12573         if (size == 2) {
12574             TCGv_i64 tcg_idx = tcg_temp_new_i64();
12575
12576             read_vec_element(s, tcg_idx, rm, index, memop);
12577
12578             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12579                 TCGv_i64 tcg_op = tcg_temp_new_i64();
12580                 TCGv_i64 tcg_passres;
12581                 int passelt;
12582
12583                 if (is_scalar) {
12584                     passelt = 0;
12585                 } else {
12586                     passelt = pass + (is_q * 2);
12587                 }
12588
12589                 read_vec_element(s, tcg_op, rn, passelt, memop);
12590
12591                 tcg_res[pass] = tcg_temp_new_i64();
12592
12593                 if (opcode == 0xa || opcode == 0xb) {
12594                     /* Non-accumulating ops */
12595                     tcg_passres = tcg_res[pass];
12596                 } else {
12597                     tcg_passres = tcg_temp_new_i64();
12598                 }
12599
12600                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
12601                 tcg_temp_free_i64(tcg_op);
12602
12603                 if (satop) {
12604                     /* saturating, doubling */
12605                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
12606                                                       tcg_passres, tcg_passres);
12607                 }
12608
12609                 if (opcode == 0xa || opcode == 0xb) {
12610                     continue;
12611                 }
12612
12613                 /* Accumulating op: handle accumulate step */
12614                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12615
12616                 switch (opcode) {
12617                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
12618                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12619                     break;
12620                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
12621                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12622                     break;
12623                 case 0x7: /* SQDMLSL, SQDMLSL2 */
12624                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
12625                     /* fall through */
12626                 case 0x3: /* SQDMLAL, SQDMLAL2 */
12627                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
12628                                                       tcg_res[pass],
12629                                                       tcg_passres);
12630                     break;
12631                 default:
12632                     g_assert_not_reached();
12633                 }
12634                 tcg_temp_free_i64(tcg_passres);
12635             }
12636             tcg_temp_free_i64(tcg_idx);
12637
12638             clear_vec_high(s, !is_scalar, rd);
12639         } else {
12640             TCGv_i32 tcg_idx = tcg_temp_new_i32();
12641
12642             assert(size == 1);
12643             read_vec_element_i32(s, tcg_idx, rm, index, size);
12644
12645             if (!is_scalar) {
12646                 /* The simplest way to handle the 16x16 indexed ops is to
12647                  * duplicate the index into both halves of the 32 bit tcg_idx
12648                  * and then use the usual Neon helpers.
12649                  */
12650                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12651             }
12652
12653             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12654                 TCGv_i32 tcg_op = tcg_temp_new_i32();
12655                 TCGv_i64 tcg_passres;
12656
12657                 if (is_scalar) {
12658                     read_vec_element_i32(s, tcg_op, rn, pass, size);
12659                 } else {
12660                     read_vec_element_i32(s, tcg_op, rn,
12661                                          pass + (is_q * 2), MO_32);
12662                 }
12663
12664                 tcg_res[pass] = tcg_temp_new_i64();
12665
12666                 if (opcode == 0xa || opcode == 0xb) {
12667                     /* Non-accumulating ops */
12668                     tcg_passres = tcg_res[pass];
12669                 } else {
12670                     tcg_passres = tcg_temp_new_i64();
12671                 }
12672
12673                 if (memop & MO_SIGN) {
12674                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
12675                 } else {
12676                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
12677                 }
12678                 if (satop) {
12679                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
12680                                                       tcg_passres, tcg_passres);
12681                 }
12682                 tcg_temp_free_i32(tcg_op);
12683
12684                 if (opcode == 0xa || opcode == 0xb) {
12685                     continue;
12686                 }
12687
12688                 /* Accumulating op: handle accumulate step */
12689                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12690
12691                 switch (opcode) {
12692                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
12693                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
12694                                              tcg_passres);
12695                     break;
12696                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
12697                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
12698                                              tcg_passres);
12699                     break;
12700                 case 0x7: /* SQDMLSL, SQDMLSL2 */
12701                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
12702                     /* fall through */
12703                 case 0x3: /* SQDMLAL, SQDMLAL2 */
12704                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
12705                                                       tcg_res[pass],
12706                                                       tcg_passres);
12707                     break;
12708                 default:
12709                     g_assert_not_reached();
12710                 }
12711                 tcg_temp_free_i64(tcg_passres);
12712             }
12713             tcg_temp_free_i32(tcg_idx);
12714
12715             if (is_scalar) {
12716                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
12717             }
12718         }
12719
12720         if (is_scalar) {
12721             tcg_res[1] = tcg_const_i64(0);
12722         }
12723
12724         for (pass = 0; pass < 2; pass++) {
12725             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12726             tcg_temp_free_i64(tcg_res[pass]);
12727         }
12728     }
12729
12730     if (fpst) {
12731         tcg_temp_free_ptr(fpst);
12732     }
12733 }
12734
12735 /* Crypto AES
12736  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
12737  * +-----------------+------+-----------+--------+-----+------+------+
12738  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12739  * +-----------------+------+-----------+--------+-----+------+------+
12740  */
12741 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
12742 {
12743     int size = extract32(insn, 22, 2);
12744     int opcode = extract32(insn, 12, 5);
12745     int rn = extract32(insn, 5, 5);
12746     int rd = extract32(insn, 0, 5);
12747     int decrypt;
12748     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
12749     TCGv_i32 tcg_decrypt;
12750     CryptoThreeOpIntFn *genfn;
12751
12752     if (!dc_isar_feature(aa64_aes, s) || size != 0) {
12753         unallocated_encoding(s);
12754         return;
12755     }
12756
12757     switch (opcode) {
12758     case 0x4: /* AESE */
12759         decrypt = 0;
12760         genfn = gen_helper_crypto_aese;
12761         break;
12762     case 0x6: /* AESMC */
12763         decrypt = 0;
12764         genfn = gen_helper_crypto_aesmc;
12765         break;
12766     case 0x5: /* AESD */
12767         decrypt = 1;
12768         genfn = gen_helper_crypto_aese;
12769         break;
12770     case 0x7: /* AESIMC */
12771         decrypt = 1;
12772         genfn = gen_helper_crypto_aesmc;
12773         break;
12774     default:
12775         unallocated_encoding(s);
12776         return;
12777     }
12778
12779     if (!fp_access_check(s)) {
12780         return;
12781     }
12782
12783     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12784     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12785     tcg_decrypt = tcg_const_i32(decrypt);
12786
12787     genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
12788
12789     tcg_temp_free_ptr(tcg_rd_ptr);
12790     tcg_temp_free_ptr(tcg_rn_ptr);
12791     tcg_temp_free_i32(tcg_decrypt);
12792 }
12793
12794 /* Crypto three-reg SHA
12795  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
12796  * +-----------------+------+---+------+---+--------+-----+------+------+
12797  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
12798  * +-----------------+------+---+------+---+--------+-----+------+------+
12799  */
12800 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
12801 {
12802     int size = extract32(insn, 22, 2);
12803     int opcode = extract32(insn, 12, 3);
12804     int rm = extract32(insn, 16, 5);
12805     int rn = extract32(insn, 5, 5);
12806     int rd = extract32(insn, 0, 5);
12807     CryptoThreeOpFn *genfn;
12808     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
12809     bool feature;
12810
12811     if (size != 0) {
12812         unallocated_encoding(s);
12813         return;
12814     }
12815
12816     switch (opcode) {
12817     case 0: /* SHA1C */
12818     case 1: /* SHA1P */
12819     case 2: /* SHA1M */
12820     case 3: /* SHA1SU0 */
12821         genfn = NULL;
12822         feature = dc_isar_feature(aa64_sha1, s);
12823         break;
12824     case 4: /* SHA256H */
12825         genfn = gen_helper_crypto_sha256h;
12826         feature = dc_isar_feature(aa64_sha256, s);
12827         break;
12828     case 5: /* SHA256H2 */
12829         genfn = gen_helper_crypto_sha256h2;
12830         feature = dc_isar_feature(aa64_sha256, s);
12831         break;
12832     case 6: /* SHA256SU1 */
12833         genfn = gen_helper_crypto_sha256su1;
12834         feature = dc_isar_feature(aa64_sha256, s);
12835         break;
12836     default:
12837         unallocated_encoding(s);
12838         return;
12839     }
12840
12841     if (!feature) {
12842         unallocated_encoding(s);
12843         return;
12844     }
12845
12846     if (!fp_access_check(s)) {
12847         return;
12848     }
12849
12850     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12851     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12852     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
12853
12854     if (genfn) {
12855         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
12856     } else {
12857         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
12858
12859         gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
12860                                     tcg_rm_ptr, tcg_opcode);
12861         tcg_temp_free_i32(tcg_opcode);
12862     }
12863
12864     tcg_temp_free_ptr(tcg_rd_ptr);
12865     tcg_temp_free_ptr(tcg_rn_ptr);
12866     tcg_temp_free_ptr(tcg_rm_ptr);
12867 }
12868
12869 /* Crypto two-reg SHA
12870  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
12871  * +-----------------+------+-----------+--------+-----+------+------+
12872  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12873  * +-----------------+------+-----------+--------+-----+------+------+
12874  */
12875 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
12876 {
12877     int size = extract32(insn, 22, 2);
12878     int opcode = extract32(insn, 12, 5);
12879     int rn = extract32(insn, 5, 5);
12880     int rd = extract32(insn, 0, 5);
12881     CryptoTwoOpFn *genfn;
12882     bool feature;
12883     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
12884
12885     if (size != 0) {
12886         unallocated_encoding(s);
12887         return;
12888     }
12889
12890     switch (opcode) {
12891     case 0: /* SHA1H */
12892         feature = dc_isar_feature(aa64_sha1, s);
12893         genfn = gen_helper_crypto_sha1h;
12894         break;
12895     case 1: /* SHA1SU1 */
12896         feature = dc_isar_feature(aa64_sha1, s);
12897         genfn = gen_helper_crypto_sha1su1;
12898         break;
12899     case 2: /* SHA256SU0 */
12900         feature = dc_isar_feature(aa64_sha256, s);
12901         genfn = gen_helper_crypto_sha256su0;
12902         break;
12903     default:
12904         unallocated_encoding(s);
12905         return;
12906     }
12907
12908     if (!feature) {
12909         unallocated_encoding(s);
12910         return;
12911     }
12912
12913     if (!fp_access_check(s)) {
12914         return;
12915     }
12916
12917     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12918     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12919
12920     genfn(tcg_rd_ptr, tcg_rn_ptr);
12921
12922     tcg_temp_free_ptr(tcg_rd_ptr);
12923     tcg_temp_free_ptr(tcg_rn_ptr);
12924 }
12925
12926 /* Crypto three-reg SHA512
12927  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
12928  * +-----------------------+------+---+---+-----+--------+------+------+
12929  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
12930  * +-----------------------+------+---+---+-----+--------+------+------+
12931  */
12932 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
12933 {
12934     int opcode = extract32(insn, 10, 2);
12935     int o =  extract32(insn, 14, 1);
12936     int rm = extract32(insn, 16, 5);
12937     int rn = extract32(insn, 5, 5);
12938     int rd = extract32(insn, 0, 5);
12939     bool feature;
12940     CryptoThreeOpFn *genfn;
12941
12942     if (o == 0) {
12943         switch (opcode) {
12944         case 0: /* SHA512H */
12945             feature = dc_isar_feature(aa64_sha512, s);
12946             genfn = gen_helper_crypto_sha512h;
12947             break;
12948         case 1: /* SHA512H2 */
12949             feature = dc_isar_feature(aa64_sha512, s);
12950             genfn = gen_helper_crypto_sha512h2;
12951             break;
12952         case 2: /* SHA512SU1 */
12953             feature = dc_isar_feature(aa64_sha512, s);
12954             genfn = gen_helper_crypto_sha512su1;
12955             break;
12956         case 3: /* RAX1 */
12957             feature = dc_isar_feature(aa64_sha3, s);
12958             genfn = NULL;
12959             break;
12960         }
12961     } else {
12962         switch (opcode) {
12963         case 0: /* SM3PARTW1 */
12964             feature = dc_isar_feature(aa64_sm3, s);
12965             genfn = gen_helper_crypto_sm3partw1;
12966             break;
12967         case 1: /* SM3PARTW2 */
12968             feature = dc_isar_feature(aa64_sm3, s);
12969             genfn = gen_helper_crypto_sm3partw2;
12970             break;
12971         case 2: /* SM4EKEY */
12972             feature = dc_isar_feature(aa64_sm4, s);
12973             genfn = gen_helper_crypto_sm4ekey;
12974             break;
12975         default:
12976             unallocated_encoding(s);
12977             return;
12978         }
12979     }
12980
12981     if (!feature) {
12982         unallocated_encoding(s);
12983         return;
12984     }
12985
12986     if (!fp_access_check(s)) {
12987         return;
12988     }
12989
12990     if (genfn) {
12991         TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
12992
12993         tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12994         tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12995         tcg_rm_ptr = vec_full_reg_ptr(s, rm);
12996
12997         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
12998
12999         tcg_temp_free_ptr(tcg_rd_ptr);
13000         tcg_temp_free_ptr(tcg_rn_ptr);
13001         tcg_temp_free_ptr(tcg_rm_ptr);
13002     } else {
13003         TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13004         int pass;
13005
13006         tcg_op1 = tcg_temp_new_i64();
13007         tcg_op2 = tcg_temp_new_i64();
13008         tcg_res[0] = tcg_temp_new_i64();
13009         tcg_res[1] = tcg_temp_new_i64();
13010
13011         for (pass = 0; pass < 2; pass++) {
13012             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13013             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13014
13015             tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
13016             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13017         }
13018         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13019         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13020
13021         tcg_temp_free_i64(tcg_op1);
13022         tcg_temp_free_i64(tcg_op2);
13023         tcg_temp_free_i64(tcg_res[0]);
13024         tcg_temp_free_i64(tcg_res[1]);
13025     }
13026 }
13027
13028 /* Crypto two-reg SHA512
13029  *  31                                     12  11  10  9    5 4    0
13030  * +-----------------------------------------+--------+------+------+
13031  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13032  * +-----------------------------------------+--------+------+------+
13033  */
13034 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13035 {
13036     int opcode = extract32(insn, 10, 2);
13037     int rn = extract32(insn, 5, 5);
13038     int rd = extract32(insn, 0, 5);
13039     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13040     bool feature;
13041     CryptoTwoOpFn *genfn;
13042
13043     switch (opcode) {
13044     case 0: /* SHA512SU0 */
13045         feature = dc_isar_feature(aa64_sha512, s);
13046         genfn = gen_helper_crypto_sha512su0;
13047         break;
13048     case 1: /* SM4E */
13049         feature = dc_isar_feature(aa64_sm4, s);
13050         genfn = gen_helper_crypto_sm4e;
13051         break;
13052     default:
13053         unallocated_encoding(s);
13054         return;
13055     }
13056
13057     if (!feature) {
13058         unallocated_encoding(s);
13059         return;
13060     }
13061
13062     if (!fp_access_check(s)) {
13063         return;
13064     }
13065
13066     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13067     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13068
13069     genfn(tcg_rd_ptr, tcg_rn_ptr);
13070
13071     tcg_temp_free_ptr(tcg_rd_ptr);
13072     tcg_temp_free_ptr(tcg_rn_ptr);
13073 }
13074
13075 /* Crypto four-register
13076  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13077  * +-------------------+-----+------+---+------+------+------+
13078  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13079  * +-------------------+-----+------+---+------+------+------+
13080  */
13081 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13082 {
13083     int op0 = extract32(insn, 21, 2);
13084     int rm = extract32(insn, 16, 5);
13085     int ra = extract32(insn, 10, 5);
13086     int rn = extract32(insn, 5, 5);
13087     int rd = extract32(insn, 0, 5);
13088     bool feature;
13089
13090     switch (op0) {
13091     case 0: /* EOR3 */
13092     case 1: /* BCAX */
13093         feature = dc_isar_feature(aa64_sha3, s);
13094         break;
13095     case 2: /* SM3SS1 */
13096         feature = dc_isar_feature(aa64_sm3, s);
13097         break;
13098     default:
13099         unallocated_encoding(s);
13100         return;
13101     }
13102
13103     if (!feature) {
13104         unallocated_encoding(s);
13105         return;
13106     }
13107
13108     if (!fp_access_check(s)) {
13109         return;
13110     }
13111
13112     if (op0 < 2) {
13113         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13114         int pass;
13115
13116         tcg_op1 = tcg_temp_new_i64();
13117         tcg_op2 = tcg_temp_new_i64();
13118         tcg_op3 = tcg_temp_new_i64();
13119         tcg_res[0] = tcg_temp_new_i64();
13120         tcg_res[1] = tcg_temp_new_i64();
13121
13122         for (pass = 0; pass < 2; pass++) {
13123             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13124             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13125             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13126
13127             if (op0 == 0) {
13128                 /* EOR3 */
13129                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13130             } else {
13131                 /* BCAX */
13132                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13133             }
13134             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13135         }
13136         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13137         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13138
13139         tcg_temp_free_i64(tcg_op1);
13140         tcg_temp_free_i64(tcg_op2);
13141         tcg_temp_free_i64(tcg_op3);
13142         tcg_temp_free_i64(tcg_res[0]);
13143         tcg_temp_free_i64(tcg_res[1]);
13144     } else {
13145         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13146
13147         tcg_op1 = tcg_temp_new_i32();
13148         tcg_op2 = tcg_temp_new_i32();
13149         tcg_op3 = tcg_temp_new_i32();
13150         tcg_res = tcg_temp_new_i32();
13151         tcg_zero = tcg_const_i32(0);
13152
13153         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13154         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13155         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13156
13157         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13158         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13159         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13160         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13161
13162         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13163         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13164         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13165         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13166
13167         tcg_temp_free_i32(tcg_op1);
13168         tcg_temp_free_i32(tcg_op2);
13169         tcg_temp_free_i32(tcg_op3);
13170         tcg_temp_free_i32(tcg_res);
13171         tcg_temp_free_i32(tcg_zero);
13172     }
13173 }
13174
13175 /* Crypto XAR
13176  *  31                   21 20  16 15    10 9    5 4    0
13177  * +-----------------------+------+--------+------+------+
13178  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13179  * +-----------------------+------+--------+------+------+
13180  */
13181 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13182 {
13183     int rm = extract32(insn, 16, 5);
13184     int imm6 = extract32(insn, 10, 6);
13185     int rn = extract32(insn, 5, 5);
13186     int rd = extract32(insn, 0, 5);
13187     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13188     int pass;
13189
13190     if (!dc_isar_feature(aa64_sha3, s)) {
13191         unallocated_encoding(s);
13192         return;
13193     }
13194
13195     if (!fp_access_check(s)) {
13196         return;
13197     }
13198
13199     tcg_op1 = tcg_temp_new_i64();
13200     tcg_op2 = tcg_temp_new_i64();
13201     tcg_res[0] = tcg_temp_new_i64();
13202     tcg_res[1] = tcg_temp_new_i64();
13203
13204     for (pass = 0; pass < 2; pass++) {
13205         read_vec_element(s, tcg_op1, rn, pass, MO_64);
13206         read_vec_element(s, tcg_op2, rm, pass, MO_64);
13207
13208         tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13209         tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13210     }
13211     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13212     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13213
13214     tcg_temp_free_i64(tcg_op1);
13215     tcg_temp_free_i64(tcg_op2);
13216     tcg_temp_free_i64(tcg_res[0]);
13217     tcg_temp_free_i64(tcg_res[1]);
13218 }
13219
13220 /* Crypto three-reg imm2
13221  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13222  * +-----------------------+------+-----+------+--------+------+------+
13223  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13224  * +-----------------------+------+-----+------+--------+------+------+
13225  */
13226 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13227 {
13228     int opcode = extract32(insn, 10, 2);
13229     int imm2 = extract32(insn, 12, 2);
13230     int rm = extract32(insn, 16, 5);
13231     int rn = extract32(insn, 5, 5);
13232     int rd = extract32(insn, 0, 5);
13233     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13234     TCGv_i32 tcg_imm2, tcg_opcode;
13235
13236     if (!dc_isar_feature(aa64_sm3, s)) {
13237         unallocated_encoding(s);
13238         return;
13239     }
13240
13241     if (!fp_access_check(s)) {
13242         return;
13243     }
13244
13245     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13246     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13247     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13248     tcg_imm2   = tcg_const_i32(imm2);
13249     tcg_opcode = tcg_const_i32(opcode);
13250
13251     gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13252                             tcg_opcode);
13253
13254     tcg_temp_free_ptr(tcg_rd_ptr);
13255     tcg_temp_free_ptr(tcg_rn_ptr);
13256     tcg_temp_free_ptr(tcg_rm_ptr);
13257     tcg_temp_free_i32(tcg_imm2);
13258     tcg_temp_free_i32(tcg_opcode);
13259 }
13260
13261 /* C3.6 Data processing - SIMD, inc Crypto
13262  *
13263  * As the decode gets a little complex we are using a table based
13264  * approach for this part of the decode.
13265  */
13266 static const AArch64DecodeTable data_proc_simd[] = {
13267     /* pattern  ,  mask     ,  fn                        */
13268     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13269     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13270     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13271     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13272     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13273     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13274     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13275     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13276     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13277     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13278     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13279     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13280     { 0x2e000000, 0xbf208400, disas_simd_ext },
13281     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13282     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13283     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13284     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13285     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13286     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13287     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13288     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13289     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13290     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13291     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13292     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13293     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13294     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13295     { 0xce800000, 0xffe00000, disas_crypto_xar },
13296     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13297     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13298     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13299     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13300     { 0x00000000, 0x00000000, NULL }
13301 };
13302
13303 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13304 {
13305     /* Note that this is called with all non-FP cases from
13306      * table C3-6 so it must UNDEF for entries not specifically
13307      * allocated to instructions in that table.
13308      */
13309     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13310     if (fn) {
13311         fn(s, insn);
13312     } else {
13313         unallocated_encoding(s);
13314     }
13315 }
13316
13317 /* C3.6 Data processing - SIMD and floating point */
13318 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13319 {
13320     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13321         disas_data_proc_fp(s, insn);
13322     } else {
13323         /* SIMD, including crypto */
13324         disas_data_proc_simd(s, insn);
13325     }
13326 }
13327
13328 /* C3.1 A64 instruction index by encoding */
13329 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
13330 {
13331     uint32_t insn;
13332
13333     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
13334     s->insn = insn;
13335     s->pc += 4;
13336
13337     s->fp_access_checked = false;
13338
13339     switch (extract32(insn, 25, 4)) {
13340     case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
13341         unallocated_encoding(s);
13342         break;
13343     case 0x2:
13344         if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
13345             unallocated_encoding(s);
13346         }
13347         break;
13348     case 0x8: case 0x9: /* Data processing - immediate */
13349         disas_data_proc_imm(s, insn);
13350         break;
13351     case 0xa: case 0xb: /* Branch, exception generation and system insns */
13352         disas_b_exc_sys(s, insn);
13353         break;
13354     case 0x4:
13355     case 0x6:
13356     case 0xc:
13357     case 0xe:      /* Loads and stores */
13358         disas_ldst(s, insn);
13359         break;
13360     case 0x5:
13361     case 0xd:      /* Data processing - register */
13362         disas_data_proc_reg(s, insn);
13363         break;
13364     case 0x7:
13365     case 0xf:      /* Data processing - SIMD and floating point */
13366         disas_data_proc_simd_fp(s, insn);
13367         break;
13368     default:
13369         assert(FALSE); /* all 15 cases should be handled above */
13370         break;
13371     }
13372
13373     /* if we allocated any temporaries, free them here */
13374     free_tmp_a64(s);
13375 }
13376
13377 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
13378                                           CPUState *cpu)
13379 {
13380     DisasContext *dc = container_of(dcbase, DisasContext, base);
13381     CPUARMState *env = cpu->env_ptr;
13382     ARMCPU *arm_cpu = arm_env_get_cpu(env);
13383     int bound;
13384
13385     dc->isar = &arm_cpu->isar;
13386     dc->pc = dc->base.pc_first;
13387     dc->condjmp = 0;
13388
13389     dc->aarch64 = 1;
13390     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
13391      * there is no secure EL1, so we route exceptions to EL3.
13392      */
13393     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
13394                                !arm_el_is_aa64(env, 3);
13395     dc->thumb = 0;
13396     dc->sctlr_b = 0;
13397     dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
13398     dc->condexec_mask = 0;
13399     dc->condexec_cond = 0;
13400     dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
13401     dc->tbi0 = ARM_TBFLAG_TBI0(dc->base.tb->flags);
13402     dc->tbi1 = ARM_TBFLAG_TBI1(dc->base.tb->flags);
13403     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
13404 #if !defined(CONFIG_USER_ONLY)
13405     dc->user = (dc->current_el == 0);
13406 #endif
13407     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
13408     dc->sve_excp_el = ARM_TBFLAG_SVEEXC_EL(dc->base.tb->flags);
13409     dc->sve_len = (ARM_TBFLAG_ZCR_LEN(dc->base.tb->flags) + 1) * 16;
13410     dc->vec_len = 0;
13411     dc->vec_stride = 0;
13412     dc->cp_regs = arm_cpu->cp_regs;
13413     dc->features = env->features;
13414
13415     /* Single step state. The code-generation logic here is:
13416      *  SS_ACTIVE == 0:
13417      *   generate code with no special handling for single-stepping (except
13418      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
13419      *   this happens anyway because those changes are all system register or
13420      *   PSTATE writes).
13421      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
13422      *   emit code for one insn
13423      *   emit code to clear PSTATE.SS
13424      *   emit code to generate software step exception for completed step
13425      *   end TB (as usual for having generated an exception)
13426      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
13427      *   emit code to generate a software step exception
13428      *   end the TB
13429      */
13430     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
13431     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
13432     dc->is_ldex = false;
13433     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
13434
13435     /* Bound the number of insns to execute to those left on the page.  */
13436     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
13437
13438     /* If architectural single step active, limit to 1.  */
13439     if (dc->ss_active) {
13440         bound = 1;
13441     }
13442     dc->base.max_insns = MIN(dc->base.max_insns, bound);
13443
13444     init_tmp_a64_array(dc);
13445 }
13446
13447 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
13448 {
13449 }
13450
13451 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
13452 {
13453     DisasContext *dc = container_of(dcbase, DisasContext, base);
13454
13455     tcg_gen_insn_start(dc->pc, 0, 0);
13456     dc->insn_start = tcg_last_op();
13457 }
13458
13459 static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
13460                                         const CPUBreakpoint *bp)
13461 {
13462     DisasContext *dc = container_of(dcbase, DisasContext, base);
13463
13464     if (bp->flags & BP_CPU) {
13465         gen_a64_set_pc_im(dc->pc);
13466         gen_helper_check_breakpoints(cpu_env);
13467         /* End the TB early; it likely won't be executed */
13468         dc->base.is_jmp = DISAS_TOO_MANY;
13469     } else {
13470         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
13471         /* The address covered by the breakpoint must be
13472            included in [tb->pc, tb->pc + tb->size) in order
13473            to for it to be properly cleared -- thus we
13474            increment the PC here so that the logic setting
13475            tb->size below does the right thing.  */
13476         dc->pc += 4;
13477         dc->base.is_jmp = DISAS_NORETURN;
13478     }
13479
13480     return true;
13481 }
13482
13483 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
13484 {
13485     DisasContext *dc = container_of(dcbase, DisasContext, base);
13486     CPUARMState *env = cpu->env_ptr;
13487
13488     if (dc->ss_active && !dc->pstate_ss) {
13489         /* Singlestep state is Active-pending.
13490          * If we're in this state at the start of a TB then either
13491          *  a) we just took an exception to an EL which is being debugged
13492          *     and this is the first insn in the exception handler
13493          *  b) debug exceptions were masked and we just unmasked them
13494          *     without changing EL (eg by clearing PSTATE.D)
13495          * In either case we're going to take a swstep exception in the
13496          * "did not step an insn" case, and so the syndrome ISV and EX
13497          * bits should be zero.
13498          */
13499         assert(dc->base.num_insns == 1);
13500         gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
13501                       default_exception_el(dc));
13502         dc->base.is_jmp = DISAS_NORETURN;
13503     } else {
13504         disas_a64_insn(env, dc);
13505     }
13506
13507     dc->base.pc_next = dc->pc;
13508     translator_loop_temp_check(&dc->base);
13509 }
13510
13511 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
13512 {
13513     DisasContext *dc = container_of(dcbase, DisasContext, base);
13514
13515     if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
13516         /* Note that this means single stepping WFI doesn't halt the CPU.
13517          * For conditional branch insns this is harmless unreachable code as
13518          * gen_goto_tb() has already handled emitting the debug exception
13519          * (and thus a tb-jump is not possible when singlestepping).
13520          */
13521         switch (dc->base.is_jmp) {
13522         default:
13523             gen_a64_set_pc_im(dc->pc);
13524             /* fall through */
13525         case DISAS_EXIT:
13526         case DISAS_JUMP:
13527             if (dc->base.singlestep_enabled) {
13528                 gen_exception_internal(EXCP_DEBUG);
13529             } else {
13530                 gen_step_complete_exception(dc);
13531             }
13532             break;
13533         case DISAS_NORETURN:
13534             break;
13535         }
13536     } else {
13537         switch (dc->base.is_jmp) {
13538         case DISAS_NEXT:
13539         case DISAS_TOO_MANY:
13540             gen_goto_tb(dc, 1, dc->pc);
13541             break;
13542         default:
13543         case DISAS_UPDATE:
13544             gen_a64_set_pc_im(dc->pc);
13545             /* fall through */
13546         case DISAS_EXIT:
13547             tcg_gen_exit_tb(NULL, 0);
13548             break;
13549         case DISAS_JUMP:
13550             tcg_gen_lookup_and_goto_ptr();
13551             break;
13552         case DISAS_NORETURN:
13553         case DISAS_SWI:
13554             break;
13555         case DISAS_WFE:
13556             gen_a64_set_pc_im(dc->pc);
13557             gen_helper_wfe(cpu_env);
13558             break;
13559         case DISAS_YIELD:
13560             gen_a64_set_pc_im(dc->pc);
13561             gen_helper_yield(cpu_env);
13562             break;
13563         case DISAS_WFI:
13564         {
13565             /* This is a special case because we don't want to just halt the CPU
13566              * if trying to debug across a WFI.
13567              */
13568             TCGv_i32 tmp = tcg_const_i32(4);
13569
13570             gen_a64_set_pc_im(dc->pc);
13571             gen_helper_wfi(cpu_env, tmp);
13572             tcg_temp_free_i32(tmp);
13573             /* The helper doesn't necessarily throw an exception, but we
13574              * must go back to the main loop to check for interrupts anyway.
13575              */
13576             tcg_gen_exit_tb(NULL, 0);
13577             break;
13578         }
13579         }
13580     }
13581
13582     /* Functions above can change dc->pc, so re-align db->pc_next */
13583     dc->base.pc_next = dc->pc;
13584 }
13585
13586 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
13587                                       CPUState *cpu)
13588 {
13589     DisasContext *dc = container_of(dcbase, DisasContext, base);
13590
13591     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
13592     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
13593 }
13594
13595 const TranslatorOps aarch64_translator_ops = {
13596     .init_disas_context = aarch64_tr_init_disas_context,
13597     .tb_start           = aarch64_tr_tb_start,
13598     .insn_start         = aarch64_tr_insn_start,
13599     .breakpoint_check   = aarch64_tr_breakpoint_check,
13600     .translate_insn     = aarch64_tr_translate_insn,
13601     .tb_stop            = aarch64_tr_tb_stop,
13602     .disas_log          = aarch64_tr_disas_log,
13603 };