target/arm/translate-vfp.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 #include "qemu/osdep.h"
  24 #include "tcg/tcg-op.h"
  25 #include "tcg/tcg-op-gvec.h"
  26 #include "exec/exec-all.h"
  27 #include "exec/gen-icount.h"
  28 #include "translate.h"
  29 #include "translate-a32.h"
  30
  31 /* Include the generated VFP decoder */
  32 #include "decode-vfp.c.inc"
  33 #include "decode-vfp-uncond.c.inc"
  34
  35 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
  36 {
  37     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
  38 }
  39
  40 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
  41 {
  42     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
  43 }
  44
  45 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
  46 {
  47     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
  48 }
  49
  50 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
  51 {
  52     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
  53 }
  54
  55 /*
  56  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  57  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  58  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  59  */
  60 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  61 {
  62     uint64_t imm;
  63
  64     switch (size) {
  65     case MO_64:
  66         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  67             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  68             extract32(imm8, 0, 6);
  69         imm <<= 48;
  70         break;
  71     case MO_32:
  72         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  73             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  74             (extract32(imm8, 0, 6) << 3);
  75         imm <<= 16;
  76         break;
  77     case MO_16:
  78         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  79             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  80             (extract32(imm8, 0, 6) << 6);
  81         break;
  82     default:
  83         g_assert_not_reached();
  84     }
  85     return imm;
  86 }
  87
  88 /*
  89  * Return the offset of a 16-bit half of the specified VFP single-precision
  90  * register. If top is true, returns the top 16 bits; otherwise the bottom
  91  * 16 bits.
  92  */
  93 static inline long vfp_f16_offset(unsigned reg, bool top)
  94 {
  95     long offs = vfp_reg_offset(false, reg);
  96 #if HOST_BIG_ENDIAN
  97     if (!top) {
  98         offs += 2;
  99     }
 100 #else
 101     if (top) {
 102         offs += 2;
 103     }
 104 #endif
 105     return offs;
 106 }
 107
 108 /*
 109  * Generate code for M-profile lazy FP state preservation if needed;
 110  * this corresponds to the pseudocode PreserveFPState() function.
 111  */
 112 static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update)
 113 {
 114     if (s->v7m_lspact) {
 115         /*
 116          * Lazy state saving affects external memory and also the NVIC,
 117          * so we must mark it as an IO operation for icount (and cause
 118          * this to be the last insn in the TB).
 119          */
 120         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 121             s->base.is_jmp = DISAS_UPDATE_EXIT;
 122             gen_io_start();
 123         }
 124         gen_helper_v7m_preserve_fp_state(cpu_env);
 125         /*
 126          * If the preserve_fp_state helper doesn't throw an exception
 127          * then it will clear LSPACT; we don't need to repeat this for
 128          * any further FP insns in this TB.
 129          */
 130         s->v7m_lspact = false;
 131         /*
 132          * The helper might have zeroed VPR, so we do not know the
 133          * correct value for the MVE_NO_PRED TB flag any more.
 134          * If we're about to create a new fp context then that
 135          * will precisely determine the MVE_NO_PRED value (see
 136          * gen_update_fp_context()). Otherwise, we must:
 137          *  - set s->mve_no_pred to false, so this instruction
 138          *    is generated to use helper functions
 139          *  - end the TB now, without chaining to the next TB
 140          */
 141         if (skip_context_update || !s->v7m_new_fp_ctxt_needed) {
 142             s->mve_no_pred = false;
 143             s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
 144         }
 145     }
 146 }
 147
 148 /*
 149  * Generate code for M-profile FP context handling: update the
 150  * ownership of the FP context, and create a new context if
 151  * necessary. This corresponds to the parts of the pseudocode
 152  * ExecuteFPCheck() after the inital PreserveFPState() call.
 153  */
 154 static void gen_update_fp_context(DisasContext *s)
 155 {
 156     /* Update ownership of FP context: set FPCCR.S to match current state */
 157     if (s->v8m_fpccr_s_wrong) {
 158         TCGv_i32 tmp;
 159
 160         tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 161         if (s->v8m_secure) {
 162             tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 163         } else {
 164             tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 165         }
 166         store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 167         /* Don't need to do this for any further FP insns in this TB */
 168         s->v8m_fpccr_s_wrong = false;
 169     }
 170
 171     if (s->v7m_new_fp_ctxt_needed) {
 172         /*
 173          * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
 174          * the FPSCR, and VPR.
 175          */
 176         TCGv_i32 control, fpscr;
 177         uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 178
 179         fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 180         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 181         tcg_temp_free_i32(fpscr);
 182         if (dc_isar_feature(aa32_mve, s)) {
 183             store_cpu_field(tcg_constant_i32(0), v7m.vpr);
 184         }
 185         /*
 186          * We just updated the FPSCR and VPR. Some of this state is cached
 187          * in the MVE_NO_PRED TB flag. We want to avoid having to end the
 188          * TB here, which means we need the new value of the MVE_NO_PRED
 189          * flag to be exactly known here and the same for all executions.
 190          * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
 191          * always set to 0, so the new MVE_NO_PRED flag is always 1
 192          * if and only if we have MVE.
 193          *
 194          * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
 195          * but those do not exist for M-profile, so are not relevant here.)
 196          */
 197         s->mve_no_pred = dc_isar_feature(aa32_mve, s);
 198
 199         if (s->v8m_secure) {
 200             bits |= R_V7M_CONTROL_SFPA_MASK;
 201         }
 202         control = load_cpu_field(v7m.control[M_REG_S]);
 203         tcg_gen_ori_i32(control, control, bits);
 204         store_cpu_field(control, v7m.control[M_REG_S]);
 205         /* Don't need to do this for any further FP insns in this TB */
 206         s->v7m_new_fp_ctxt_needed = false;
 207     }
 208 }
 209
 210 /*
 211  * Check that VFP access is enabled, A-profile specific version.
 212  *
 213  * If VFP is enabled, return true. If not, emit code to generate an
 214  * appropriate exception and return false.
 215  * The ignore_vfp_enabled argument specifies that we should ignore
 216  * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
 217  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
 218  */
 219 static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled)
 220 {
 221     if (s->fp_excp_el) {
 222         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 223                            syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
 224         return false;
 225     }
 226
 227     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 228         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 229         unallocated_encoding(s);
 230         return false;
 231     }
 232     return true;
 233 }
 234
 235 /*
 236  * Check that VFP access is enabled, M-profile specific version.
 237  *
 238  * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
 239  * return true. If not, emit code to generate an appropriate exception and
 240  * return false.
 241  * skip_context_update is true to skip the "update FP context" part of this.
 242  */
 243 bool vfp_access_check_m(DisasContext *s, bool skip_context_update)
 244 {
 245     if (s->fp_excp_el) {
 246         /*
 247          * M-profile mostly catches the "FPU disabled" case early, in
 248          * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
 249          * which do coprocessor-checks are outside the large ranges of
 250          * the encoding space handled by the patterns in m-nocp.decode,
 251          * and for them we may need to raise NOCP here.
 252          */
 253         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
 254                            syn_uncategorized(), s->fp_excp_el);
 255         return false;
 256     }
 257
 258     /* Handle M-profile lazy FP state mechanics */
 259
 260     /* Trigger lazy-state preservation if necessary */
 261     gen_preserve_fp_state(s, skip_context_update);
 262
 263     if (!skip_context_update) {
 264         /* Update ownership of FP context and create new FP context if needed */
 265         gen_update_fp_context(s);
 266     }
 267
 268     return true;
 269 }
 270
 271 /*
 272  * The most usual kind of VFP access check, for everything except
 273  * FMXR/FMRX to the always-available special registers.
 274  */
 275 bool vfp_access_check(DisasContext *s)
 276 {
 277     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 278         return vfp_access_check_m(s, false);
 279     } else {
 280         return vfp_access_check_a(s, false);
 281     }
 282 }
 283
 284 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 285 {
 286     uint32_t rd, rn, rm;
 287     int sz = a->sz;
 288
 289     if (!dc_isar_feature(aa32_vsel, s)) {
 290         return false;
 291     }
 292
 293     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 294         return false;
 295     }
 296
 297     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 298         return false;
 299     }
 300
 301     /* UNDEF accesses to D16-D31 if they don't exist */
 302     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 303         ((a->vm | a->vn | a->vd) & 0x10)) {
 304         return false;
 305     }
 306
 307     rd = a->vd;
 308     rn = a->vn;
 309     rm = a->vm;
 310
 311     if (!vfp_access_check(s)) {
 312         return true;
 313     }
 314
 315     if (sz == 3) {
 316         TCGv_i64 frn, frm, dest;
 317         TCGv_i64 tmp, zero, zf, nf, vf;
 318
 319         zero = tcg_constant_i64(0);
 320
 321         frn = tcg_temp_new_i64();
 322         frm = tcg_temp_new_i64();
 323         dest = tcg_temp_new_i64();
 324
 325         zf = tcg_temp_new_i64();
 326         nf = tcg_temp_new_i64();
 327         vf = tcg_temp_new_i64();
 328
 329         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 330         tcg_gen_ext_i32_i64(nf, cpu_NF);
 331         tcg_gen_ext_i32_i64(vf, cpu_VF);
 332
 333         vfp_load_reg64(frn, rn);
 334         vfp_load_reg64(frm, rm);
 335         switch (a->cc) {
 336         case 0: /* eq: Z */
 337             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm);
 338             break;
 339         case 1: /* vs: V */
 340             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm);
 341             break;
 342         case 2: /* ge: N == V -> N ^ V == 0 */
 343             tmp = tcg_temp_new_i64();
 344             tcg_gen_xor_i64(tmp, vf, nf);
 345             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm);
 346             tcg_temp_free_i64(tmp);
 347             break;
 348         case 3: /* gt: !Z && N == V */
 349             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm);
 350             tmp = tcg_temp_new_i64();
 351             tcg_gen_xor_i64(tmp, vf, nf);
 352             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm);
 353             tcg_temp_free_i64(tmp);
 354             break;
 355         }
 356         vfp_store_reg64(dest, rd);
 357         tcg_temp_free_i64(frn);
 358         tcg_temp_free_i64(frm);
 359         tcg_temp_free_i64(dest);
 360
 361         tcg_temp_free_i64(zf);
 362         tcg_temp_free_i64(nf);
 363         tcg_temp_free_i64(vf);
 364     } else {
 365         TCGv_i32 frn, frm, dest;
 366         TCGv_i32 tmp, zero;
 367
 368         zero = tcg_constant_i32(0);
 369
 370         frn = tcg_temp_new_i32();
 371         frm = tcg_temp_new_i32();
 372         dest = tcg_temp_new_i32();
 373         vfp_load_reg32(frn, rn);
 374         vfp_load_reg32(frm, rm);
 375         switch (a->cc) {
 376         case 0: /* eq: Z */
 377             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm);
 378             break;
 379         case 1: /* vs: V */
 380             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm);
 381             break;
 382         case 2: /* ge: N == V -> N ^ V == 0 */
 383             tmp = tcg_temp_new_i32();
 384             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 385             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm);
 386             tcg_temp_free_i32(tmp);
 387             break;
 388         case 3: /* gt: !Z && N == V */
 389             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm);
 390             tmp = tcg_temp_new_i32();
 391             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 392             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm);
 393             tcg_temp_free_i32(tmp);
 394             break;
 395         }
 396         /* For fp16 the top half is always zeroes */
 397         if (sz == 1) {
 398             tcg_gen_andi_i32(dest, dest, 0xffff);
 399         }
 400         vfp_store_reg32(dest, rd);
 401         tcg_temp_free_i32(frn);
 402         tcg_temp_free_i32(frm);
 403         tcg_temp_free_i32(dest);
 404     }
 405
 406     return true;
 407 }
 408
 409 /*
 410  * Table for converting the most common AArch32 encoding of
 411  * rounding mode to arm_fprounding order (which matches the
 412  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 413  */
 414 static const uint8_t fp_decode_rm[] = {
 415     FPROUNDING_TIEAWAY,
 416     FPROUNDING_TIEEVEN,
 417     FPROUNDING_POSINF,
 418     FPROUNDING_NEGINF,
 419 };
 420
 421 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 422 {
 423     uint32_t rd, rm;
 424     int sz = a->sz;
 425     TCGv_ptr fpst;
 426     TCGv_i32 tcg_rmode;
 427     int rounding = fp_decode_rm[a->rm];
 428
 429     if (!dc_isar_feature(aa32_vrint, s)) {
 430         return false;
 431     }
 432
 433     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 434         return false;
 435     }
 436
 437     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 438         return false;
 439     }
 440
 441     /* UNDEF accesses to D16-D31 if they don't exist */
 442     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 443         ((a->vm | a->vd) & 0x10)) {
 444         return false;
 445     }
 446
 447     rd = a->vd;
 448     rm = a->vm;
 449
 450     if (!vfp_access_check(s)) {
 451         return true;
 452     }
 453
 454     if (sz == 1) {
 455         fpst = fpstatus_ptr(FPST_FPCR_F16);
 456     } else {
 457         fpst = fpstatus_ptr(FPST_FPCR);
 458     }
 459
 460     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 461     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 462
 463     if (sz == 3) {
 464         TCGv_i64 tcg_op;
 465         TCGv_i64 tcg_res;
 466         tcg_op = tcg_temp_new_i64();
 467         tcg_res = tcg_temp_new_i64();
 468         vfp_load_reg64(tcg_op, rm);
 469         gen_helper_rintd(tcg_res, tcg_op, fpst);
 470         vfp_store_reg64(tcg_res, rd);
 471         tcg_temp_free_i64(tcg_op);
 472         tcg_temp_free_i64(tcg_res);
 473     } else {
 474         TCGv_i32 tcg_op;
 475         TCGv_i32 tcg_res;
 476         tcg_op = tcg_temp_new_i32();
 477         tcg_res = tcg_temp_new_i32();
 478         vfp_load_reg32(tcg_op, rm);
 479         if (sz == 1) {
 480             gen_helper_rinth(tcg_res, tcg_op, fpst);
 481         } else {
 482             gen_helper_rints(tcg_res, tcg_op, fpst);
 483         }
 484         vfp_store_reg32(tcg_res, rd);
 485         tcg_temp_free_i32(tcg_op);
 486         tcg_temp_free_i32(tcg_res);
 487     }
 488
 489     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 490     tcg_temp_free_i32(tcg_rmode);
 491
 492     tcg_temp_free_ptr(fpst);
 493     return true;
 494 }
 495
 496 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 497 {
 498     uint32_t rd, rm;
 499     int sz = a->sz;
 500     TCGv_ptr fpst;
 501     TCGv_i32 tcg_rmode, tcg_shift;
 502     int rounding = fp_decode_rm[a->rm];
 503     bool is_signed = a->op;
 504
 505     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 506         return false;
 507     }
 508
 509     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 510         return false;
 511     }
 512
 513     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 514         return false;
 515     }
 516
 517     /* UNDEF accesses to D16-D31 if they don't exist */
 518     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 519         return false;
 520     }
 521
 522     rd = a->vd;
 523     rm = a->vm;
 524
 525     if (!vfp_access_check(s)) {
 526         return true;
 527     }
 528
 529     if (sz == 1) {
 530         fpst = fpstatus_ptr(FPST_FPCR_F16);
 531     } else {
 532         fpst = fpstatus_ptr(FPST_FPCR);
 533     }
 534
 535     tcg_shift = tcg_constant_i32(0);
 536
 537     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 538     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 539
 540     if (sz == 3) {
 541         TCGv_i64 tcg_double, tcg_res;
 542         TCGv_i32 tcg_tmp;
 543         tcg_double = tcg_temp_new_i64();
 544         tcg_res = tcg_temp_new_i64();
 545         tcg_tmp = tcg_temp_new_i32();
 546         vfp_load_reg64(tcg_double, rm);
 547         if (is_signed) {
 548             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 549         } else {
 550             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 551         }
 552         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 553         vfp_store_reg32(tcg_tmp, rd);
 554         tcg_temp_free_i32(tcg_tmp);
 555         tcg_temp_free_i64(tcg_res);
 556         tcg_temp_free_i64(tcg_double);
 557     } else {
 558         TCGv_i32 tcg_single, tcg_res;
 559         tcg_single = tcg_temp_new_i32();
 560         tcg_res = tcg_temp_new_i32();
 561         vfp_load_reg32(tcg_single, rm);
 562         if (sz == 1) {
 563             if (is_signed) {
 564                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 565             } else {
 566                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 567             }
 568         } else {
 569             if (is_signed) {
 570                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 571             } else {
 572                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 573             }
 574         }
 575         vfp_store_reg32(tcg_res, rd);
 576         tcg_temp_free_i32(tcg_res);
 577         tcg_temp_free_i32(tcg_single);
 578     }
 579
 580     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 581     tcg_temp_free_i32(tcg_rmode);
 582
 583     tcg_temp_free_ptr(fpst);
 584
 585     return true;
 586 }
 587
 588 bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
 589 {
 590     /*
 591      * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
 592      * and VMOV (general-purpose register to vector lane) insns are not
 593      * predicated, but they are subject to beatwise execution if they are
 594      * not in an IT block.
 595      *
 596      * Since our implementation always executes all 4 beats in one tick,
 597      * this means only that if PSR.ECI says we should not be executing
 598      * the beat corresponding to the lane of the vector register being
 599      * accessed then we should skip performing the move, and that we need
 600      * to do the usual check for bad ECI state and advance of ECI state.
 601      *
 602      * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
 603      *
 604      * Return true if this VMOV scalar <-> gpreg should be skipped because
 605      * the MVE PSR.ECI state says we skip the beat where the store happens.
 606      */
 607
 608     /* Calculate the byte offset into Qn which we're going to access */
 609     int ofs = (index << size) + ((vn & 1) * 8);
 610
 611     if (!dc_isar_feature(aa32_mve, s)) {
 612         return false;
 613     }
 614
 615     switch (s->eci) {
 616     case ECI_NONE:
 617         return false;
 618     case ECI_A0:
 619         return ofs < 4;
 620     case ECI_A0A1:
 621         return ofs < 8;
 622     case ECI_A0A1A2:
 623     case ECI_A0A1A2B0:
 624         return ofs < 12;
 625     default:
 626         g_assert_not_reached();
 627     }
 628 }
 629
 630 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 631 {
 632     /* VMOV scalar to general purpose register */
 633     TCGv_i32 tmp;
 634
 635     /*
 636      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 637      * all sizes, whether the CPU has fp or not.
 638      */
 639     if (!dc_isar_feature(aa32_mve, s)) {
 640         if (a->size == MO_32
 641             ? !dc_isar_feature(aa32_fpsp_v2, s)
 642             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 643             return false;
 644         }
 645     }
 646
 647     /* UNDEF accesses to D16-D31 if they don't exist */
 648     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 649         return false;
 650     }
 651
 652     if (dc_isar_feature(aa32_mve, s)) {
 653         if (!mve_eci_check(s)) {
 654             return true;
 655         }
 656     }
 657
 658     if (!vfp_access_check(s)) {
 659         return true;
 660     }
 661
 662     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
 663         tmp = tcg_temp_new_i32();
 664         read_neon_element32(tmp, a->vn, a->index,
 665                             a->size | (a->u ? 0 : MO_SIGN));
 666         store_reg(s, a->rt, tmp);
 667     }
 668
 669     if (dc_isar_feature(aa32_mve, s)) {
 670         mve_update_and_store_eci(s);
 671     }
 672     return true;
 673 }
 674
 675 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 676 {
 677     /* VMOV general purpose register to scalar */
 678     TCGv_i32 tmp;
 679
 680     /*
 681      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 682      * all sizes, whether the CPU has fp or not.
 683      */
 684     if (!dc_isar_feature(aa32_mve, s)) {
 685         if (a->size == MO_32
 686             ? !dc_isar_feature(aa32_fpsp_v2, s)
 687             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 688             return false;
 689         }
 690     }
 691
 692     /* UNDEF accesses to D16-D31 if they don't exist */
 693     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 694         return false;
 695     }
 696
 697     if (dc_isar_feature(aa32_mve, s)) {
 698         if (!mve_eci_check(s)) {
 699             return true;
 700         }
 701     }
 702
 703     if (!vfp_access_check(s)) {
 704         return true;
 705     }
 706
 707     if (!mve_skip_vmov(s, a->vn, a->index, a->size)) {
 708         tmp = load_reg(s, a->rt);
 709         write_neon_element32(tmp, a->vn, a->index, a->size);
 710         tcg_temp_free_i32(tmp);
 711     }
 712
 713     if (dc_isar_feature(aa32_mve, s)) {
 714         mve_update_and_store_eci(s);
 715     }
 716     return true;
 717 }
 718
 719 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 720 {
 721     /* VDUP (general purpose register) */
 722     TCGv_i32 tmp;
 723     int size, vec_size;
 724
 725     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 726         return false;
 727     }
 728
 729     /* UNDEF accesses to D16-D31 if they don't exist */
 730     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 731         return false;
 732     }
 733
 734     if (a->b && a->e) {
 735         return false;
 736     }
 737
 738     if (a->q && (a->vn & 1)) {
 739         return false;
 740     }
 741
 742     vec_size = a->q ? 16 : 8;
 743     if (a->b) {
 744         size = 0;
 745     } else if (a->e) {
 746         size = 1;
 747     } else {
 748         size = 2;
 749     }
 750
 751     if (!vfp_access_check(s)) {
 752         return true;
 753     }
 754
 755     tmp = load_reg(s, a->rt);
 756     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
 757                          vec_size, vec_size, tmp);
 758     tcg_temp_free_i32(tmp);
 759
 760     return true;
 761 }
 762
 763 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 764 {
 765     TCGv_i32 tmp;
 766     bool ignore_vfp_enabled = false;
 767
 768     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 769         /* M profile version was already handled in m-nocp.decode */
 770         return false;
 771     }
 772
 773     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 774         return false;
 775     }
 776
 777     switch (a->reg) {
 778     case ARM_VFP_FPSID:
 779         /*
 780          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 781          * all ID registers to privileged access only.
 782          */
 783         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 784             return false;
 785         }
 786         ignore_vfp_enabled = true;
 787         break;
 788     case ARM_VFP_MVFR0:
 789     case ARM_VFP_MVFR1:
 790         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 791             return false;
 792         }
 793         ignore_vfp_enabled = true;
 794         break;
 795     case ARM_VFP_MVFR2:
 796         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 797             return false;
 798         }
 799         ignore_vfp_enabled = true;
 800         break;
 801     case ARM_VFP_FPSCR:
 802         break;
 803     case ARM_VFP_FPEXC:
 804         if (IS_USER(s)) {
 805             return false;
 806         }
 807         ignore_vfp_enabled = true;
 808         break;
 809     case ARM_VFP_FPINST:
 810     case ARM_VFP_FPINST2:
 811         /* Not present in VFPv3 */
 812         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
 813             return false;
 814         }
 815         break;
 816     default:
 817         return false;
 818     }
 819
 820     /*
 821      * Call vfp_access_check_a() directly, because we need to tell
 822      * it to ignore FPEXC.EN for some register accesses.
 823      */
 824     if (!vfp_access_check_a(s, ignore_vfp_enabled)) {
 825         return true;
 826     }
 827
 828     if (a->l) {
 829         /* VMRS, move VFP special register to gp register */
 830         switch (a->reg) {
 831         case ARM_VFP_MVFR0:
 832         case ARM_VFP_MVFR1:
 833         case ARM_VFP_MVFR2:
 834         case ARM_VFP_FPSID:
 835             if (s->current_el == 1) {
 836                 gen_set_condexec(s);
 837                 gen_set_pc_im(s, s->pc_curr);
 838                 gen_helper_check_hcr_el2_trap(cpu_env,
 839                                               tcg_constant_i32(a->rt),
 840                                               tcg_constant_i32(a->reg));
 841             }
 842             /* fall through */
 843         case ARM_VFP_FPEXC:
 844         case ARM_VFP_FPINST:
 845         case ARM_VFP_FPINST2:
 846             tmp = load_cpu_field(vfp.xregs[a->reg]);
 847             break;
 848         case ARM_VFP_FPSCR:
 849             if (a->rt == 15) {
 850                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 851                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 852             } else {
 853                 tmp = tcg_temp_new_i32();
 854                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
 855             }
 856             break;
 857         default:
 858             g_assert_not_reached();
 859         }
 860
 861         if (a->rt == 15) {
 862             /* Set the 4 flag bits in the CPSR.  */
 863             gen_set_nzcv(tmp);
 864             tcg_temp_free_i32(tmp);
 865         } else {
 866             store_reg(s, a->rt, tmp);
 867         }
 868     } else {
 869         /* VMSR, move gp register to VFP special register */
 870         switch (a->reg) {
 871         case ARM_VFP_FPSID:
 872         case ARM_VFP_MVFR0:
 873         case ARM_VFP_MVFR1:
 874         case ARM_VFP_MVFR2:
 875             /* Writes are ignored.  */
 876             break;
 877         case ARM_VFP_FPSCR:
 878             tmp = load_reg(s, a->rt);
 879             gen_helper_vfp_set_fpscr(cpu_env, tmp);
 880             tcg_temp_free_i32(tmp);
 881             gen_lookup_tb(s);
 882             break;
 883         case ARM_VFP_FPEXC:
 884             /*
 885              * TODO: VFP subarchitecture support.
 886              * For now, keep the EN bit only
 887              */
 888             tmp = load_reg(s, a->rt);
 889             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 890             store_cpu_field(tmp, vfp.xregs[a->reg]);
 891             gen_lookup_tb(s);
 892             break;
 893         case ARM_VFP_FPINST:
 894         case ARM_VFP_FPINST2:
 895             tmp = load_reg(s, a->rt);
 896             store_cpu_field(tmp, vfp.xregs[a->reg]);
 897             break;
 898         default:
 899             g_assert_not_reached();
 900         }
 901     }
 902
 903     return true;
 904 }
 905
 906
 907 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
 908 {
 909     TCGv_i32 tmp;
 910
 911     if (!dc_isar_feature(aa32_fp16_arith, s)) {
 912         return false;
 913     }
 914
 915     if (a->rt == 15) {
 916         /* UNPREDICTABLE; we choose to UNDEF */
 917         return false;
 918     }
 919
 920     if (!vfp_access_check(s)) {
 921         return true;
 922     }
 923
 924     if (a->l) {
 925         /* VFP to general purpose register */
 926         tmp = tcg_temp_new_i32();
 927         vfp_load_reg32(tmp, a->vn);
 928         tcg_gen_andi_i32(tmp, tmp, 0xffff);
 929         store_reg(s, a->rt, tmp);
 930     } else {
 931         /* general purpose register to VFP */
 932         tmp = load_reg(s, a->rt);
 933         tcg_gen_andi_i32(tmp, tmp, 0xffff);
 934         vfp_store_reg32(tmp, a->vn);
 935         tcg_temp_free_i32(tmp);
 936     }
 937
 938     return true;
 939 }
 940
 941 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
 942 {
 943     TCGv_i32 tmp;
 944
 945     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
 946         return false;
 947     }
 948
 949     if (!vfp_access_check(s)) {
 950         return true;
 951     }
 952
 953     if (a->l) {
 954         /* VFP to general purpose register */
 955         tmp = tcg_temp_new_i32();
 956         vfp_load_reg32(tmp, a->vn);
 957         if (a->rt == 15) {
 958             /* Set the 4 flag bits in the CPSR.  */
 959             gen_set_nzcv(tmp);
 960             tcg_temp_free_i32(tmp);
 961         } else {
 962             store_reg(s, a->rt, tmp);
 963         }
 964     } else {
 965         /* general purpose register to VFP */
 966         tmp = load_reg(s, a->rt);
 967         vfp_store_reg32(tmp, a->vn);
 968         tcg_temp_free_i32(tmp);
 969     }
 970
 971     return true;
 972 }
 973
 974 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
 975 {
 976     TCGv_i32 tmp;
 977
 978     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
 979         return false;
 980     }
 981
 982     /*
 983      * VMOV between two general-purpose registers and two single precision
 984      * floating point registers
 985      */
 986     if (!vfp_access_check(s)) {
 987         return true;
 988     }
 989
 990     if (a->op) {
 991         /* fpreg to gpreg */
 992         tmp = tcg_temp_new_i32();
 993         vfp_load_reg32(tmp, a->vm);
 994         store_reg(s, a->rt, tmp);
 995         tmp = tcg_temp_new_i32();
 996         vfp_load_reg32(tmp, a->vm + 1);
 997         store_reg(s, a->rt2, tmp);
 998     } else {
 999         /* gpreg to fpreg */
1000         tmp = load_reg(s, a->rt);
1001         vfp_store_reg32(tmp, a->vm);
1002         tcg_temp_free_i32(tmp);
1003         tmp = load_reg(s, a->rt2);
1004         vfp_store_reg32(tmp, a->vm + 1);
1005         tcg_temp_free_i32(tmp);
1006     }
1007
1008     return true;
1009 }
1010
1011 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1012 {
1013     TCGv_i32 tmp;
1014
1015     /*
1016      * VMOV between two general-purpose registers and one double precision
1017      * floating point register.  Note that this does not require support
1018      * for double precision arithmetic.
1019      */
1020     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1021         return false;
1022     }
1023
1024     /* UNDEF accesses to D16-D31 if they don't exist */
1025     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1026         return false;
1027     }
1028
1029     if (!vfp_access_check(s)) {
1030         return true;
1031     }
1032
1033     if (a->op) {
1034         /* fpreg to gpreg */
1035         tmp = tcg_temp_new_i32();
1036         vfp_load_reg32(tmp, a->vm * 2);
1037         store_reg(s, a->rt, tmp);
1038         tmp = tcg_temp_new_i32();
1039         vfp_load_reg32(tmp, a->vm * 2 + 1);
1040         store_reg(s, a->rt2, tmp);
1041     } else {
1042         /* gpreg to fpreg */
1043         tmp = load_reg(s, a->rt);
1044         vfp_store_reg32(tmp, a->vm * 2);
1045         tcg_temp_free_i32(tmp);
1046         tmp = load_reg(s, a->rt2);
1047         vfp_store_reg32(tmp, a->vm * 2 + 1);
1048         tcg_temp_free_i32(tmp);
1049     }
1050
1051     return true;
1052 }
1053
1054 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1055 {
1056     uint32_t offset;
1057     TCGv_i32 addr, tmp;
1058
1059     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1060         return false;
1061     }
1062
1063     if (!vfp_access_check(s)) {
1064         return true;
1065     }
1066
1067     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1068     offset = a->imm << 1;
1069     if (!a->u) {
1070         offset = -offset;
1071     }
1072
1073     /* For thumb, use of PC is UNPREDICTABLE.  */
1074     addr = add_reg_for_lit(s, a->rn, offset);
1075     tmp = tcg_temp_new_i32();
1076     if (a->l) {
1077         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1078         vfp_store_reg32(tmp, a->vd);
1079     } else {
1080         vfp_load_reg32(tmp, a->vd);
1081         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1082     }
1083     tcg_temp_free_i32(tmp);
1084     tcg_temp_free_i32(addr);
1085
1086     return true;
1087 }
1088
1089 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1090 {
1091     uint32_t offset;
1092     TCGv_i32 addr, tmp;
1093
1094     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1095         return false;
1096     }
1097
1098     if (!vfp_access_check(s)) {
1099         return true;
1100     }
1101
1102     offset = a->imm << 2;
1103     if (!a->u) {
1104         offset = -offset;
1105     }
1106
1107     /* For thumb, use of PC is UNPREDICTABLE.  */
1108     addr = add_reg_for_lit(s, a->rn, offset);
1109     tmp = tcg_temp_new_i32();
1110     if (a->l) {
1111         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1112         vfp_store_reg32(tmp, a->vd);
1113     } else {
1114         vfp_load_reg32(tmp, a->vd);
1115         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1116     }
1117     tcg_temp_free_i32(tmp);
1118     tcg_temp_free_i32(addr);
1119
1120     return true;
1121 }
1122
1123 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1124 {
1125     uint32_t offset;
1126     TCGv_i32 addr;
1127     TCGv_i64 tmp;
1128
1129     /* Note that this does not require support for double arithmetic.  */
1130     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1131         return false;
1132     }
1133
1134     /* UNDEF accesses to D16-D31 if they don't exist */
1135     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1136         return false;
1137     }
1138
1139     if (!vfp_access_check(s)) {
1140         return true;
1141     }
1142
1143     offset = a->imm << 2;
1144     if (!a->u) {
1145         offset = -offset;
1146     }
1147
1148     /* For thumb, use of PC is UNPREDICTABLE.  */
1149     addr = add_reg_for_lit(s, a->rn, offset);
1150     tmp = tcg_temp_new_i64();
1151     if (a->l) {
1152         gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1153         vfp_store_reg64(tmp, a->vd);
1154     } else {
1155         vfp_load_reg64(tmp, a->vd);
1156         gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1157     }
1158     tcg_temp_free_i64(tmp);
1159     tcg_temp_free_i32(addr);
1160
1161     return true;
1162 }
1163
1164 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1165 {
1166     uint32_t offset;
1167     TCGv_i32 addr, tmp;
1168     int i, n;
1169
1170     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1171         return false;
1172     }
1173
1174     n = a->imm;
1175
1176     if (n == 0 || (a->vd + n) > 32) {
1177         /*
1178          * UNPREDICTABLE cases for bad immediates: we choose to
1179          * UNDEF to avoid generating huge numbers of TCG ops
1180          */
1181         return false;
1182     }
1183     if (a->rn == 15 && a->w) {
1184         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1185         return false;
1186     }
1187
1188     s->eci_handled = true;
1189
1190     if (!vfp_access_check(s)) {
1191         return true;
1192     }
1193
1194     /* For thumb, use of PC is UNPREDICTABLE.  */
1195     addr = add_reg_for_lit(s, a->rn, 0);
1196     if (a->p) {
1197         /* pre-decrement */
1198         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1199     }
1200
1201     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1202         /*
1203          * Here 'addr' is the lowest address we will store to,
1204          * and is either the old SP (if post-increment) or
1205          * the new SP (if pre-decrement). For post-increment
1206          * where the old value is below the limit and the new
1207          * value is above, it is UNKNOWN whether the limit check
1208          * triggers; we choose to trigger.
1209          */
1210         gen_helper_v8m_stackcheck(cpu_env, addr);
1211     }
1212
1213     offset = 4;
1214     tmp = tcg_temp_new_i32();
1215     for (i = 0; i < n; i++) {
1216         if (a->l) {
1217             /* load */
1218             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1219             vfp_store_reg32(tmp, a->vd + i);
1220         } else {
1221             /* store */
1222             vfp_load_reg32(tmp, a->vd + i);
1223             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1224         }
1225         tcg_gen_addi_i32(addr, addr, offset);
1226     }
1227     tcg_temp_free_i32(tmp);
1228     if (a->w) {
1229         /* writeback */
1230         if (a->p) {
1231             offset = -offset * n;
1232             tcg_gen_addi_i32(addr, addr, offset);
1233         }
1234         store_reg(s, a->rn, addr);
1235     } else {
1236         tcg_temp_free_i32(addr);
1237     }
1238
1239     clear_eci_state(s);
1240     return true;
1241 }
1242
1243 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1244 {
1245     uint32_t offset;
1246     TCGv_i32 addr;
1247     TCGv_i64 tmp;
1248     int i, n;
1249
1250     /* Note that this does not require support for double arithmetic.  */
1251     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1252         return false;
1253     }
1254
1255     n = a->imm >> 1;
1256
1257     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1258         /*
1259          * UNPREDICTABLE cases for bad immediates: we choose to
1260          * UNDEF to avoid generating huge numbers of TCG ops
1261          */
1262         return false;
1263     }
1264     if (a->rn == 15 && a->w) {
1265         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1266         return false;
1267     }
1268
1269     /* UNDEF accesses to D16-D31 if they don't exist */
1270     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1271         return false;
1272     }
1273
1274     s->eci_handled = true;
1275
1276     if (!vfp_access_check(s)) {
1277         return true;
1278     }
1279
1280     /* For thumb, use of PC is UNPREDICTABLE.  */
1281     addr = add_reg_for_lit(s, a->rn, 0);
1282     if (a->p) {
1283         /* pre-decrement */
1284         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1285     }
1286
1287     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1288         /*
1289          * Here 'addr' is the lowest address we will store to,
1290          * and is either the old SP (if post-increment) or
1291          * the new SP (if pre-decrement). For post-increment
1292          * where the old value is below the limit and the new
1293          * value is above, it is UNKNOWN whether the limit check
1294          * triggers; we choose to trigger.
1295          */
1296         gen_helper_v8m_stackcheck(cpu_env, addr);
1297     }
1298
1299     offset = 8;
1300     tmp = tcg_temp_new_i64();
1301     for (i = 0; i < n; i++) {
1302         if (a->l) {
1303             /* load */
1304             gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1305             vfp_store_reg64(tmp, a->vd + i);
1306         } else {
1307             /* store */
1308             vfp_load_reg64(tmp, a->vd + i);
1309             gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4);
1310         }
1311         tcg_gen_addi_i32(addr, addr, offset);
1312     }
1313     tcg_temp_free_i64(tmp);
1314     if (a->w) {
1315         /* writeback */
1316         if (a->p) {
1317             offset = -offset * n;
1318         } else if (a->imm & 1) {
1319             offset = 4;
1320         } else {
1321             offset = 0;
1322         }
1323
1324         if (offset != 0) {
1325             tcg_gen_addi_i32(addr, addr, offset);
1326         }
1327         store_reg(s, a->rn, addr);
1328     } else {
1329         tcg_temp_free_i32(addr);
1330     }
1331
1332     clear_eci_state(s);
1333     return true;
1334 }
1335
1336 /*
1337  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1338  * The callback should emit code to write a value to vd. If
1339  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1340  * will contain the old value of the relevant VFP register;
1341  * otherwise it must be written to only.
1342  */
1343 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1344                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1345 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1346                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1347
1348 /*
1349  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1350  * The callback should emit code to write a value to vd (which
1351  * should be written to only).
1352  */
1353 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1354 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1355
1356 /*
1357  * Return true if the specified S reg is in a scalar bank
1358  * (ie if it is s0..s7)
1359  */
1360 static inline bool vfp_sreg_is_scalar(int reg)
1361 {
1362     return (reg & 0x18) == 0;
1363 }
1364
1365 /*
1366  * Return true if the specified D reg is in a scalar bank
1367  * (ie if it is d0..d3 or d16..d19)
1368  */
1369 static inline bool vfp_dreg_is_scalar(int reg)
1370 {
1371     return (reg & 0xc) == 0;
1372 }
1373
1374 /*
1375  * Advance the S reg number forwards by delta within its bank
1376  * (ie increment the low 3 bits but leave the rest the same)
1377  */
1378 static inline int vfp_advance_sreg(int reg, int delta)
1379 {
1380     return ((reg + delta) & 0x7) | (reg & ~0x7);
1381 }
1382
1383 /*
1384  * Advance the D reg number forwards by delta within its bank
1385  * (ie increment the low 2 bits but leave the rest the same)
1386  */
1387 static inline int vfp_advance_dreg(int reg, int delta)
1388 {
1389     return ((reg + delta) & 0x3) | (reg & ~0x3);
1390 }
1391
1392 /*
1393  * Perform a 3-operand VFP data processing instruction. fn is the
1394  * callback to do the actual operation; this function deals with the
1395  * code to handle looping around for VFP vector processing.
1396  */
1397 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1398                           int vd, int vn, int vm, bool reads_vd)
1399 {
1400     uint32_t delta_m = 0;
1401     uint32_t delta_d = 0;
1402     int veclen = s->vec_len;
1403     TCGv_i32 f0, f1, fd;
1404     TCGv_ptr fpst;
1405
1406     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1407         return false;
1408     }
1409
1410     if (!dc_isar_feature(aa32_fpshvec, s) &&
1411         (veclen != 0 || s->vec_stride != 0)) {
1412         return false;
1413     }
1414
1415     if (!vfp_access_check(s)) {
1416         return true;
1417     }
1418
1419     if (veclen > 0) {
1420         /* Figure out what type of vector operation this is.  */
1421         if (vfp_sreg_is_scalar(vd)) {
1422             /* scalar */
1423             veclen = 0;
1424         } else {
1425             delta_d = s->vec_stride + 1;
1426
1427             if (vfp_sreg_is_scalar(vm)) {
1428                 /* mixed scalar/vector */
1429                 delta_m = 0;
1430             } else {
1431                 /* vector */
1432                 delta_m = delta_d;
1433             }
1434         }
1435     }
1436
1437     f0 = tcg_temp_new_i32();
1438     f1 = tcg_temp_new_i32();
1439     fd = tcg_temp_new_i32();
1440     fpst = fpstatus_ptr(FPST_FPCR);
1441
1442     vfp_load_reg32(f0, vn);
1443     vfp_load_reg32(f1, vm);
1444
1445     for (;;) {
1446         if (reads_vd) {
1447             vfp_load_reg32(fd, vd);
1448         }
1449         fn(fd, f0, f1, fpst);
1450         vfp_store_reg32(fd, vd);
1451
1452         if (veclen == 0) {
1453             break;
1454         }
1455
1456         /* Set up the operands for the next iteration */
1457         veclen--;
1458         vd = vfp_advance_sreg(vd, delta_d);
1459         vn = vfp_advance_sreg(vn, delta_d);
1460         vfp_load_reg32(f0, vn);
1461         if (delta_m) {
1462             vm = vfp_advance_sreg(vm, delta_m);
1463             vfp_load_reg32(f1, vm);
1464         }
1465     }
1466
1467     tcg_temp_free_i32(f0);
1468     tcg_temp_free_i32(f1);
1469     tcg_temp_free_i32(fd);
1470     tcg_temp_free_ptr(fpst);
1471
1472     return true;
1473 }
1474
1475 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1476                           int vd, int vn, int vm, bool reads_vd)
1477 {
1478     /*
1479      * Do a half-precision operation. Functionally this is
1480      * the same as do_vfp_3op_sp(), except:
1481      *  - it uses the FPST_FPCR_F16
1482      *  - it doesn't need the VFP vector handling (fp16 is a
1483      *    v8 feature, and in v8 VFP vectors don't exist)
1484      *  - it does the aa32_fp16_arith feature test
1485      */
1486     TCGv_i32 f0, f1, fd;
1487     TCGv_ptr fpst;
1488
1489     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1490         return false;
1491     }
1492
1493     if (s->vec_len != 0 || s->vec_stride != 0) {
1494         return false;
1495     }
1496
1497     if (!vfp_access_check(s)) {
1498         return true;
1499     }
1500
1501     f0 = tcg_temp_new_i32();
1502     f1 = tcg_temp_new_i32();
1503     fd = tcg_temp_new_i32();
1504     fpst = fpstatus_ptr(FPST_FPCR_F16);
1505
1506     vfp_load_reg32(f0, vn);
1507     vfp_load_reg32(f1, vm);
1508
1509     if (reads_vd) {
1510         vfp_load_reg32(fd, vd);
1511     }
1512     fn(fd, f0, f1, fpst);
1513     vfp_store_reg32(fd, vd);
1514
1515     tcg_temp_free_i32(f0);
1516     tcg_temp_free_i32(f1);
1517     tcg_temp_free_i32(fd);
1518     tcg_temp_free_ptr(fpst);
1519
1520     return true;
1521 }
1522
1523 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1524                           int vd, int vn, int vm, bool reads_vd)
1525 {
1526     uint32_t delta_m = 0;
1527     uint32_t delta_d = 0;
1528     int veclen = s->vec_len;
1529     TCGv_i64 f0, f1, fd;
1530     TCGv_ptr fpst;
1531
1532     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1533         return false;
1534     }
1535
1536     /* UNDEF accesses to D16-D31 if they don't exist */
1537     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1538         return false;
1539     }
1540
1541     if (!dc_isar_feature(aa32_fpshvec, s) &&
1542         (veclen != 0 || s->vec_stride != 0)) {
1543         return false;
1544     }
1545
1546     if (!vfp_access_check(s)) {
1547         return true;
1548     }
1549
1550     if (veclen > 0) {
1551         /* Figure out what type of vector operation this is.  */
1552         if (vfp_dreg_is_scalar(vd)) {
1553             /* scalar */
1554             veclen = 0;
1555         } else {
1556             delta_d = (s->vec_stride >> 1) + 1;
1557
1558             if (vfp_dreg_is_scalar(vm)) {
1559                 /* mixed scalar/vector */
1560                 delta_m = 0;
1561             } else {
1562                 /* vector */
1563                 delta_m = delta_d;
1564             }
1565         }
1566     }
1567
1568     f0 = tcg_temp_new_i64();
1569     f1 = tcg_temp_new_i64();
1570     fd = tcg_temp_new_i64();
1571     fpst = fpstatus_ptr(FPST_FPCR);
1572
1573     vfp_load_reg64(f0, vn);
1574     vfp_load_reg64(f1, vm);
1575
1576     for (;;) {
1577         if (reads_vd) {
1578             vfp_load_reg64(fd, vd);
1579         }
1580         fn(fd, f0, f1, fpst);
1581         vfp_store_reg64(fd, vd);
1582
1583         if (veclen == 0) {
1584             break;
1585         }
1586         /* Set up the operands for the next iteration */
1587         veclen--;
1588         vd = vfp_advance_dreg(vd, delta_d);
1589         vn = vfp_advance_dreg(vn, delta_d);
1590         vfp_load_reg64(f0, vn);
1591         if (delta_m) {
1592             vm = vfp_advance_dreg(vm, delta_m);
1593             vfp_load_reg64(f1, vm);
1594         }
1595     }
1596
1597     tcg_temp_free_i64(f0);
1598     tcg_temp_free_i64(f1);
1599     tcg_temp_free_i64(fd);
1600     tcg_temp_free_ptr(fpst);
1601
1602     return true;
1603 }
1604
1605 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1606 {
1607     uint32_t delta_m = 0;
1608     uint32_t delta_d = 0;
1609     int veclen = s->vec_len;
1610     TCGv_i32 f0, fd;
1611
1612     /* Note that the caller must check the aa32_fpsp_v2 feature. */
1613
1614     if (!dc_isar_feature(aa32_fpshvec, s) &&
1615         (veclen != 0 || s->vec_stride != 0)) {
1616         return false;
1617     }
1618
1619     if (!vfp_access_check(s)) {
1620         return true;
1621     }
1622
1623     if (veclen > 0) {
1624         /* Figure out what type of vector operation this is.  */
1625         if (vfp_sreg_is_scalar(vd)) {
1626             /* scalar */
1627             veclen = 0;
1628         } else {
1629             delta_d = s->vec_stride + 1;
1630
1631             if (vfp_sreg_is_scalar(vm)) {
1632                 /* mixed scalar/vector */
1633                 delta_m = 0;
1634             } else {
1635                 /* vector */
1636                 delta_m = delta_d;
1637             }
1638         }
1639     }
1640
1641     f0 = tcg_temp_new_i32();
1642     fd = tcg_temp_new_i32();
1643
1644     vfp_load_reg32(f0, vm);
1645
1646     for (;;) {
1647         fn(fd, f0);
1648         vfp_store_reg32(fd, vd);
1649
1650         if (veclen == 0) {
1651             break;
1652         }
1653
1654         if (delta_m == 0) {
1655             /* single source one-many */
1656             while (veclen--) {
1657                 vd = vfp_advance_sreg(vd, delta_d);
1658                 vfp_store_reg32(fd, vd);
1659             }
1660             break;
1661         }
1662
1663         /* Set up the operands for the next iteration */
1664         veclen--;
1665         vd = vfp_advance_sreg(vd, delta_d);
1666         vm = vfp_advance_sreg(vm, delta_m);
1667         vfp_load_reg32(f0, vm);
1668     }
1669
1670     tcg_temp_free_i32(f0);
1671     tcg_temp_free_i32(fd);
1672
1673     return true;
1674 }
1675
1676 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1677 {
1678     /*
1679      * Do a half-precision operation. Functionally this is
1680      * the same as do_vfp_2op_sp(), except:
1681      *  - it doesn't need the VFP vector handling (fp16 is a
1682      *    v8 feature, and in v8 VFP vectors don't exist)
1683      *  - it does the aa32_fp16_arith feature test
1684      */
1685     TCGv_i32 f0;
1686
1687     /* Note that the caller must check the aa32_fp16_arith feature */
1688
1689     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1690         return false;
1691     }
1692
1693     if (s->vec_len != 0 || s->vec_stride != 0) {
1694         return false;
1695     }
1696
1697     if (!vfp_access_check(s)) {
1698         return true;
1699     }
1700
1701     f0 = tcg_temp_new_i32();
1702     vfp_load_reg32(f0, vm);
1703     fn(f0, f0);
1704     vfp_store_reg32(f0, vd);
1705     tcg_temp_free_i32(f0);
1706
1707     return true;
1708 }
1709
1710 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1711 {
1712     uint32_t delta_m = 0;
1713     uint32_t delta_d = 0;
1714     int veclen = s->vec_len;
1715     TCGv_i64 f0, fd;
1716
1717     /* Note that the caller must check the aa32_fpdp_v2 feature. */
1718
1719     /* UNDEF accesses to D16-D31 if they don't exist */
1720     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1721         return false;
1722     }
1723
1724     if (!dc_isar_feature(aa32_fpshvec, s) &&
1725         (veclen != 0 || s->vec_stride != 0)) {
1726         return false;
1727     }
1728
1729     if (!vfp_access_check(s)) {
1730         return true;
1731     }
1732
1733     if (veclen > 0) {
1734         /* Figure out what type of vector operation this is.  */
1735         if (vfp_dreg_is_scalar(vd)) {
1736             /* scalar */
1737             veclen = 0;
1738         } else {
1739             delta_d = (s->vec_stride >> 1) + 1;
1740
1741             if (vfp_dreg_is_scalar(vm)) {
1742                 /* mixed scalar/vector */
1743                 delta_m = 0;
1744             } else {
1745                 /* vector */
1746                 delta_m = delta_d;
1747             }
1748         }
1749     }
1750
1751     f0 = tcg_temp_new_i64();
1752     fd = tcg_temp_new_i64();
1753
1754     vfp_load_reg64(f0, vm);
1755
1756     for (;;) {
1757         fn(fd, f0);
1758         vfp_store_reg64(fd, vd);
1759
1760         if (veclen == 0) {
1761             break;
1762         }
1763
1764         if (delta_m == 0) {
1765             /* single source one-many */
1766             while (veclen--) {
1767                 vd = vfp_advance_dreg(vd, delta_d);
1768                 vfp_store_reg64(fd, vd);
1769             }
1770             break;
1771         }
1772
1773         /* Set up the operands for the next iteration */
1774         veclen--;
1775         vd = vfp_advance_dreg(vd, delta_d);
1776         vd = vfp_advance_dreg(vm, delta_m);
1777         vfp_load_reg64(f0, vm);
1778     }
1779
1780     tcg_temp_free_i64(f0);
1781     tcg_temp_free_i64(fd);
1782
1783     return true;
1784 }
1785
1786 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1787 {
1788     /* Note that order of inputs to the add matters for NaNs */
1789     TCGv_i32 tmp = tcg_temp_new_i32();
1790
1791     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1792     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1793     tcg_temp_free_i32(tmp);
1794 }
1795
1796 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1797 {
1798     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1799 }
1800
1801 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1802 {
1803     /* Note that order of inputs to the add matters for NaNs */
1804     TCGv_i32 tmp = tcg_temp_new_i32();
1805
1806     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1807     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1808     tcg_temp_free_i32(tmp);
1809 }
1810
1811 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1812 {
1813     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1814 }
1815
1816 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1817 {
1818     /* Note that order of inputs to the add matters for NaNs */
1819     TCGv_i64 tmp = tcg_temp_new_i64();
1820
1821     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1822     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1823     tcg_temp_free_i64(tmp);
1824 }
1825
1826 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1827 {
1828     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1829 }
1830
1831 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1832 {
1833     /*
1834      * VMLS: vd = vd + -(vn * vm)
1835      * Note that order of inputs to the add matters for NaNs.
1836      */
1837     TCGv_i32 tmp = tcg_temp_new_i32();
1838
1839     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1840     gen_helper_vfp_negh(tmp, tmp);
1841     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1842     tcg_temp_free_i32(tmp);
1843 }
1844
1845 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
1846 {
1847     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
1848 }
1849
1850 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1851 {
1852     /*
1853      * VMLS: vd = vd + -(vn * vm)
1854      * Note that order of inputs to the add matters for NaNs.
1855      */
1856     TCGv_i32 tmp = tcg_temp_new_i32();
1857
1858     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1859     gen_helper_vfp_negs(tmp, tmp);
1860     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1861     tcg_temp_free_i32(tmp);
1862 }
1863
1864 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1865 {
1866     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1867 }
1868
1869 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1870 {
1871     /*
1872      * VMLS: vd = vd + -(vn * vm)
1873      * Note that order of inputs to the add matters for NaNs.
1874      */
1875     TCGv_i64 tmp = tcg_temp_new_i64();
1876
1877     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1878     gen_helper_vfp_negd(tmp, tmp);
1879     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1880     tcg_temp_free_i64(tmp);
1881 }
1882
1883 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1884 {
1885     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1886 }
1887
1888 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1889 {
1890     /*
1891      * VNMLS: -fd + (fn * fm)
1892      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1893      * plausible looking simplifications because this will give wrong results
1894      * for NaNs.
1895      */
1896     TCGv_i32 tmp = tcg_temp_new_i32();
1897
1898     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1899     gen_helper_vfp_negh(vd, vd);
1900     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1901     tcg_temp_free_i32(tmp);
1902 }
1903
1904 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
1905 {
1906     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
1907 }
1908
1909 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1910 {
1911     /*
1912      * VNMLS: -fd + (fn * fm)
1913      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1914      * plausible looking simplifications because this will give wrong results
1915      * for NaNs.
1916      */
1917     TCGv_i32 tmp = tcg_temp_new_i32();
1918
1919     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1920     gen_helper_vfp_negs(vd, vd);
1921     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1922     tcg_temp_free_i32(tmp);
1923 }
1924
1925 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1926 {
1927     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1928 }
1929
1930 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1931 {
1932     /*
1933      * VNMLS: -fd + (fn * fm)
1934      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1935      * plausible looking simplifications because this will give wrong results
1936      * for NaNs.
1937      */
1938     TCGv_i64 tmp = tcg_temp_new_i64();
1939
1940     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1941     gen_helper_vfp_negd(vd, vd);
1942     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1943     tcg_temp_free_i64(tmp);
1944 }
1945
1946 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1947 {
1948     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1949 }
1950
1951 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1952 {
1953     /* VNMLA: -fd + -(fn * fm) */
1954     TCGv_i32 tmp = tcg_temp_new_i32();
1955
1956     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1957     gen_helper_vfp_negh(tmp, tmp);
1958     gen_helper_vfp_negh(vd, vd);
1959     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1960     tcg_temp_free_i32(tmp);
1961 }
1962
1963 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
1964 {
1965     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
1966 }
1967
1968 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1969 {
1970     /* VNMLA: -fd + -(fn * fm) */
1971     TCGv_i32 tmp = tcg_temp_new_i32();
1972
1973     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1974     gen_helper_vfp_negs(tmp, tmp);
1975     gen_helper_vfp_negs(vd, vd);
1976     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1977     tcg_temp_free_i32(tmp);
1978 }
1979
1980 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1981 {
1982     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1983 }
1984
1985 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1986 {
1987     /* VNMLA: -fd + (fn * fm) */
1988     TCGv_i64 tmp = tcg_temp_new_i64();
1989
1990     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1991     gen_helper_vfp_negd(tmp, tmp);
1992     gen_helper_vfp_negd(vd, vd);
1993     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1994     tcg_temp_free_i64(tmp);
1995 }
1996
1997 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1998 {
1999     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2000 }
2001
2002 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2003 {
2004     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2005 }
2006
2007 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2008 {
2009     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2010 }
2011
2012 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2013 {
2014     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2015 }
2016
2017 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2018 {
2019     /* VNMUL: -(fn * fm) */
2020     gen_helper_vfp_mulh(vd, vn, vm, fpst);
2021     gen_helper_vfp_negh(vd, vd);
2022 }
2023
2024 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2025 {
2026     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2027 }
2028
2029 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2030 {
2031     /* VNMUL: -(fn * fm) */
2032     gen_helper_vfp_muls(vd, vn, vm, fpst);
2033     gen_helper_vfp_negs(vd, vd);
2034 }
2035
2036 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2037 {
2038     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2039 }
2040
2041 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2042 {
2043     /* VNMUL: -(fn * fm) */
2044     gen_helper_vfp_muld(vd, vn, vm, fpst);
2045     gen_helper_vfp_negd(vd, vd);
2046 }
2047
2048 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2049 {
2050     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2051 }
2052
2053 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2054 {
2055     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2056 }
2057
2058 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2059 {
2060     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2061 }
2062
2063 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2064 {
2065     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2066 }
2067
2068 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2069 {
2070     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2071 }
2072
2073 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2074 {
2075     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2076 }
2077
2078 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2079 {
2080     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2081 }
2082
2083 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2084 {
2085     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2086 }
2087
2088 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2089 {
2090     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2091 }
2092
2093 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2094 {
2095     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2096 }
2097
2098 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2099 {
2100     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2101         return false;
2102     }
2103     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2104                          a->vd, a->vn, a->vm, false);
2105 }
2106
2107 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2108 {
2109     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2110         return false;
2111     }
2112     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2113                          a->vd, a->vn, a->vm, false);
2114 }
2115
2116 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2117 {
2118     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2119         return false;
2120     }
2121     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2122                          a->vd, a->vn, a->vm, false);
2123 }
2124
2125 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2126 {
2127     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2128         return false;
2129     }
2130     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2131                          a->vd, a->vn, a->vm, false);
2132 }
2133
2134 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2135 {
2136     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2137         return false;
2138     }
2139     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2140                          a->vd, a->vn, a->vm, false);
2141 }
2142
2143 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2144 {
2145     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2146         return false;
2147     }
2148     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2149                          a->vd, a->vn, a->vm, false);
2150 }
2151
2152 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2153 {
2154     /*
2155      * VFNMA : fd = muladd(-fd,  fn, fm)
2156      * VFNMS : fd = muladd(-fd, -fn, fm)
2157      * VFMA  : fd = muladd( fd,  fn, fm)
2158      * VFMS  : fd = muladd( fd, -fn, fm)
2159      *
2160      * These are fused multiply-add, and must be done as one floating
2161      * point operation with no rounding between the multiplication and
2162      * addition steps.  NB that doing the negations here as separate
2163      * steps is correct : an input NaN should come out with its sign
2164      * bit flipped if it is a negated-input.
2165      */
2166     TCGv_ptr fpst;
2167     TCGv_i32 vn, vm, vd;
2168
2169     /*
2170      * Present in VFPv4 only, and only with the FP16 extension.
2171      * Note that we can't rely on the SIMDFMAC check alone, because
2172      * in a Neon-no-VFP core that ID register field will be non-zero.
2173      */
2174     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2175         !dc_isar_feature(aa32_simdfmac, s) ||
2176         !dc_isar_feature(aa32_fpsp_v2, s)) {
2177         return false;
2178     }
2179
2180     if (s->vec_len != 0 || s->vec_stride != 0) {
2181         return false;
2182     }
2183
2184     if (!vfp_access_check(s)) {
2185         return true;
2186     }
2187
2188     vn = tcg_temp_new_i32();
2189     vm = tcg_temp_new_i32();
2190     vd = tcg_temp_new_i32();
2191
2192     vfp_load_reg32(vn, a->vn);
2193     vfp_load_reg32(vm, a->vm);
2194     if (neg_n) {
2195         /* VFNMS, VFMS */
2196         gen_helper_vfp_negh(vn, vn);
2197     }
2198     vfp_load_reg32(vd, a->vd);
2199     if (neg_d) {
2200         /* VFNMA, VFNMS */
2201         gen_helper_vfp_negh(vd, vd);
2202     }
2203     fpst = fpstatus_ptr(FPST_FPCR_F16);
2204     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2205     vfp_store_reg32(vd, a->vd);
2206
2207     tcg_temp_free_ptr(fpst);
2208     tcg_temp_free_i32(vn);
2209     tcg_temp_free_i32(vm);
2210     tcg_temp_free_i32(vd);
2211
2212     return true;
2213 }
2214
2215 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2216 {
2217     /*
2218      * VFNMA : fd = muladd(-fd,  fn, fm)
2219      * VFNMS : fd = muladd(-fd, -fn, fm)
2220      * VFMA  : fd = muladd( fd,  fn, fm)
2221      * VFMS  : fd = muladd( fd, -fn, fm)
2222      *
2223      * These are fused multiply-add, and must be done as one floating
2224      * point operation with no rounding between the multiplication and
2225      * addition steps.  NB that doing the negations here as separate
2226      * steps is correct : an input NaN should come out with its sign
2227      * bit flipped if it is a negated-input.
2228      */
2229     TCGv_ptr fpst;
2230     TCGv_i32 vn, vm, vd;
2231
2232     /*
2233      * Present in VFPv4 only.
2234      * Note that we can't rely on the SIMDFMAC check alone, because
2235      * in a Neon-no-VFP core that ID register field will be non-zero.
2236      */
2237     if (!dc_isar_feature(aa32_simdfmac, s) ||
2238         !dc_isar_feature(aa32_fpsp_v2, s)) {
2239         return false;
2240     }
2241     /*
2242      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2243      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2244      */
2245     if (s->vec_len != 0 || s->vec_stride != 0) {
2246         return false;
2247     }
2248
2249     if (!vfp_access_check(s)) {
2250         return true;
2251     }
2252
2253     vn = tcg_temp_new_i32();
2254     vm = tcg_temp_new_i32();
2255     vd = tcg_temp_new_i32();
2256
2257     vfp_load_reg32(vn, a->vn);
2258     vfp_load_reg32(vm, a->vm);
2259     if (neg_n) {
2260         /* VFNMS, VFMS */
2261         gen_helper_vfp_negs(vn, vn);
2262     }
2263     vfp_load_reg32(vd, a->vd);
2264     if (neg_d) {
2265         /* VFNMA, VFNMS */
2266         gen_helper_vfp_negs(vd, vd);
2267     }
2268     fpst = fpstatus_ptr(FPST_FPCR);
2269     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2270     vfp_store_reg32(vd, a->vd);
2271
2272     tcg_temp_free_ptr(fpst);
2273     tcg_temp_free_i32(vn);
2274     tcg_temp_free_i32(vm);
2275     tcg_temp_free_i32(vd);
2276
2277     return true;
2278 }
2279
2280 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2281 {
2282     /*
2283      * VFNMA : fd = muladd(-fd,  fn, fm)
2284      * VFNMS : fd = muladd(-fd, -fn, fm)
2285      * VFMA  : fd = muladd( fd,  fn, fm)
2286      * VFMS  : fd = muladd( fd, -fn, fm)
2287      *
2288      * These are fused multiply-add, and must be done as one floating
2289      * point operation with no rounding between the multiplication and
2290      * addition steps.  NB that doing the negations here as separate
2291      * steps is correct : an input NaN should come out with its sign
2292      * bit flipped if it is a negated-input.
2293      */
2294     TCGv_ptr fpst;
2295     TCGv_i64 vn, vm, vd;
2296
2297     /*
2298      * Present in VFPv4 only.
2299      * Note that we can't rely on the SIMDFMAC check alone, because
2300      * in a Neon-no-VFP core that ID register field will be non-zero.
2301      */
2302     if (!dc_isar_feature(aa32_simdfmac, s) ||
2303         !dc_isar_feature(aa32_fpdp_v2, s)) {
2304         return false;
2305     }
2306     /*
2307      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2308      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2309      */
2310     if (s->vec_len != 0 || s->vec_stride != 0) {
2311         return false;
2312     }
2313
2314     /* UNDEF accesses to D16-D31 if they don't exist. */
2315     if (!dc_isar_feature(aa32_simd_r32, s) &&
2316         ((a->vd | a->vn | a->vm) & 0x10)) {
2317         return false;
2318     }
2319
2320     if (!vfp_access_check(s)) {
2321         return true;
2322     }
2323
2324     vn = tcg_temp_new_i64();
2325     vm = tcg_temp_new_i64();
2326     vd = tcg_temp_new_i64();
2327
2328     vfp_load_reg64(vn, a->vn);
2329     vfp_load_reg64(vm, a->vm);
2330     if (neg_n) {
2331         /* VFNMS, VFMS */
2332         gen_helper_vfp_negd(vn, vn);
2333     }
2334     vfp_load_reg64(vd, a->vd);
2335     if (neg_d) {
2336         /* VFNMA, VFNMS */
2337         gen_helper_vfp_negd(vd, vd);
2338     }
2339     fpst = fpstatus_ptr(FPST_FPCR);
2340     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2341     vfp_store_reg64(vd, a->vd);
2342
2343     tcg_temp_free_ptr(fpst);
2344     tcg_temp_free_i64(vn);
2345     tcg_temp_free_i64(vm);
2346     tcg_temp_free_i64(vd);
2347
2348     return true;
2349 }
2350
2351 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2352     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2353                                       arg_##INSN##_##PREC *a)           \
2354     {                                                                   \
2355         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2356     }
2357
2358 #define MAKE_VFM_TRANS_FNS(PREC) \
2359     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2360     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2361     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2362     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2363
2364 MAKE_VFM_TRANS_FNS(hp)
2365 MAKE_VFM_TRANS_FNS(sp)
2366 MAKE_VFM_TRANS_FNS(dp)
2367
2368 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2369 {
2370     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2371         return false;
2372     }
2373
2374     if (s->vec_len != 0 || s->vec_stride != 0) {
2375         return false;
2376     }
2377
2378     if (!vfp_access_check(s)) {
2379         return true;
2380     }
2381
2382     vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd);
2383     return true;
2384 }
2385
2386 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2387 {
2388     uint32_t delta_d = 0;
2389     int veclen = s->vec_len;
2390     TCGv_i32 fd;
2391     uint32_t vd;
2392
2393     vd = a->vd;
2394
2395     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2396         return false;
2397     }
2398
2399     if (!dc_isar_feature(aa32_fpshvec, s) &&
2400         (veclen != 0 || s->vec_stride != 0)) {
2401         return false;
2402     }
2403
2404     if (!vfp_access_check(s)) {
2405         return true;
2406     }
2407
2408     if (veclen > 0) {
2409         /* Figure out what type of vector operation this is.  */
2410         if (vfp_sreg_is_scalar(vd)) {
2411             /* scalar */
2412             veclen = 0;
2413         } else {
2414             delta_d = s->vec_stride + 1;
2415         }
2416     }
2417
2418     fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm));
2419
2420     for (;;) {
2421         vfp_store_reg32(fd, vd);
2422
2423         if (veclen == 0) {
2424             break;
2425         }
2426
2427         /* Set up the operands for the next iteration */
2428         veclen--;
2429         vd = vfp_advance_sreg(vd, delta_d);
2430     }
2431
2432     return true;
2433 }
2434
2435 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2436 {
2437     uint32_t delta_d = 0;
2438     int veclen = s->vec_len;
2439     TCGv_i64 fd;
2440     uint32_t vd;
2441
2442     vd = a->vd;
2443
2444     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2445         return false;
2446     }
2447
2448     /* UNDEF accesses to D16-D31 if they don't exist. */
2449     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2450         return false;
2451     }
2452
2453     if (!dc_isar_feature(aa32_fpshvec, s) &&
2454         (veclen != 0 || s->vec_stride != 0)) {
2455         return false;
2456     }
2457
2458     if (!vfp_access_check(s)) {
2459         return true;
2460     }
2461
2462     if (veclen > 0) {
2463         /* Figure out what type of vector operation this is.  */
2464         if (vfp_dreg_is_scalar(vd)) {
2465             /* scalar */
2466             veclen = 0;
2467         } else {
2468             delta_d = (s->vec_stride >> 1) + 1;
2469         }
2470     }
2471
2472     fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm));
2473
2474     for (;;) {
2475         vfp_store_reg64(fd, vd);
2476
2477         if (veclen == 0) {
2478             break;
2479         }
2480
2481         /* Set up the operands for the next iteration */
2482         veclen--;
2483         vd = vfp_advance_dreg(vd, delta_d);
2484     }
2485
2486     return true;
2487 }
2488
2489 #define DO_VFP_2OP(INSN, PREC, FN, CHECK)                       \
2490     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2491                                       arg_##INSN##_##PREC *a)   \
2492     {                                                           \
2493         if (!dc_isar_feature(CHECK, s)) {                       \
2494             return false;                                       \
2495         }                                                       \
2496         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2497     }
2498
2499 #define DO_VFP_VMOV(INSN, PREC, FN)                             \
2500     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2501                                       arg_##INSN##_##PREC *a)   \
2502     {                                                           \
2503         if (!dc_isar_feature(aa32_fp##PREC##_v2, s) &&          \
2504             !dc_isar_feature(aa32_mve, s)) {                    \
2505             return false;                                       \
2506         }                                                       \
2507         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2508     }
2509
2510 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
2511 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
2512
2513 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
2514 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
2515 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
2516
2517 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
2518 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
2519 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
2520
2521 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2522 {
2523     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2524 }
2525
2526 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2527 {
2528     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2529 }
2530
2531 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2532 {
2533     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2534 }
2535
2536 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
2537 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
2538 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
2539
2540 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2541 {
2542     TCGv_i32 vd, vm;
2543
2544     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2545         return false;
2546     }
2547
2548     /* Vm/M bits must be zero for the Z variant */
2549     if (a->z && a->vm != 0) {
2550         return false;
2551     }
2552
2553     if (!vfp_access_check(s)) {
2554         return true;
2555     }
2556
2557     vd = tcg_temp_new_i32();
2558     vm = tcg_temp_new_i32();
2559
2560     vfp_load_reg32(vd, a->vd);
2561     if (a->z) {
2562         tcg_gen_movi_i32(vm, 0);
2563     } else {
2564         vfp_load_reg32(vm, a->vm);
2565     }
2566
2567     if (a->e) {
2568         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2569     } else {
2570         gen_helper_vfp_cmph(vd, vm, cpu_env);
2571     }
2572
2573     tcg_temp_free_i32(vd);
2574     tcg_temp_free_i32(vm);
2575
2576     return true;
2577 }
2578
2579 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2580 {
2581     TCGv_i32 vd, vm;
2582
2583     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2584         return false;
2585     }
2586
2587     /* Vm/M bits must be zero for the Z variant */
2588     if (a->z && a->vm != 0) {
2589         return false;
2590     }
2591
2592     if (!vfp_access_check(s)) {
2593         return true;
2594     }
2595
2596     vd = tcg_temp_new_i32();
2597     vm = tcg_temp_new_i32();
2598
2599     vfp_load_reg32(vd, a->vd);
2600     if (a->z) {
2601         tcg_gen_movi_i32(vm, 0);
2602     } else {
2603         vfp_load_reg32(vm, a->vm);
2604     }
2605
2606     if (a->e) {
2607         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2608     } else {
2609         gen_helper_vfp_cmps(vd, vm, cpu_env);
2610     }
2611
2612     tcg_temp_free_i32(vd);
2613     tcg_temp_free_i32(vm);
2614
2615     return true;
2616 }
2617
2618 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2619 {
2620     TCGv_i64 vd, vm;
2621
2622     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2623         return false;
2624     }
2625
2626     /* Vm/M bits must be zero for the Z variant */
2627     if (a->z && a->vm != 0) {
2628         return false;
2629     }
2630
2631     /* UNDEF accesses to D16-D31 if they don't exist. */
2632     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2633         return false;
2634     }
2635
2636     if (!vfp_access_check(s)) {
2637         return true;
2638     }
2639
2640     vd = tcg_temp_new_i64();
2641     vm = tcg_temp_new_i64();
2642
2643     vfp_load_reg64(vd, a->vd);
2644     if (a->z) {
2645         tcg_gen_movi_i64(vm, 0);
2646     } else {
2647         vfp_load_reg64(vm, a->vm);
2648     }
2649
2650     if (a->e) {
2651         gen_helper_vfp_cmped(vd, vm, cpu_env);
2652     } else {
2653         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2654     }
2655
2656     tcg_temp_free_i64(vd);
2657     tcg_temp_free_i64(vm);
2658
2659     return true;
2660 }
2661
2662 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2663 {
2664     TCGv_ptr fpst;
2665     TCGv_i32 ahp_mode;
2666     TCGv_i32 tmp;
2667
2668     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2669         return false;
2670     }
2671
2672     if (!vfp_access_check(s)) {
2673         return true;
2674     }
2675
2676     fpst = fpstatus_ptr(FPST_FPCR);
2677     ahp_mode = get_ahp_flag();
2678     tmp = tcg_temp_new_i32();
2679     /* The T bit tells us if we want the low or high 16 bits of Vm */
2680     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2681     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2682     vfp_store_reg32(tmp, a->vd);
2683     tcg_temp_free_i32(ahp_mode);
2684     tcg_temp_free_ptr(fpst);
2685     tcg_temp_free_i32(tmp);
2686     return true;
2687 }
2688
2689 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2690 {
2691     TCGv_ptr fpst;
2692     TCGv_i32 ahp_mode;
2693     TCGv_i32 tmp;
2694     TCGv_i64 vd;
2695
2696     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2697         return false;
2698     }
2699
2700     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2701         return false;
2702     }
2703
2704     /* UNDEF accesses to D16-D31 if they don't exist. */
2705     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2706         return false;
2707     }
2708
2709     if (!vfp_access_check(s)) {
2710         return true;
2711     }
2712
2713     fpst = fpstatus_ptr(FPST_FPCR);
2714     ahp_mode = get_ahp_flag();
2715     tmp = tcg_temp_new_i32();
2716     /* The T bit tells us if we want the low or high 16 bits of Vm */
2717     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2718     vd = tcg_temp_new_i64();
2719     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2720     vfp_store_reg64(vd, a->vd);
2721     tcg_temp_free_i32(ahp_mode);
2722     tcg_temp_free_ptr(fpst);
2723     tcg_temp_free_i32(tmp);
2724     tcg_temp_free_i64(vd);
2725     return true;
2726 }
2727
2728 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
2729 {
2730     TCGv_ptr fpst;
2731     TCGv_i32 tmp;
2732
2733     if (!dc_isar_feature(aa32_bf16, s)) {
2734         return false;
2735     }
2736
2737     if (!vfp_access_check(s)) {
2738         return true;
2739     }
2740
2741     fpst = fpstatus_ptr(FPST_FPCR);
2742     tmp = tcg_temp_new_i32();
2743
2744     vfp_load_reg32(tmp, a->vm);
2745     gen_helper_bfcvt(tmp, tmp, fpst);
2746     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2747     tcg_temp_free_ptr(fpst);
2748     tcg_temp_free_i32(tmp);
2749     return true;
2750 }
2751
2752 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2753 {
2754     TCGv_ptr fpst;
2755     TCGv_i32 ahp_mode;
2756     TCGv_i32 tmp;
2757
2758     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2759         return false;
2760     }
2761
2762     if (!vfp_access_check(s)) {
2763         return true;
2764     }
2765
2766     fpst = fpstatus_ptr(FPST_FPCR);
2767     ahp_mode = get_ahp_flag();
2768     tmp = tcg_temp_new_i32();
2769
2770     vfp_load_reg32(tmp, a->vm);
2771     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2772     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2773     tcg_temp_free_i32(ahp_mode);
2774     tcg_temp_free_ptr(fpst);
2775     tcg_temp_free_i32(tmp);
2776     return true;
2777 }
2778
2779 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2780 {
2781     TCGv_ptr fpst;
2782     TCGv_i32 ahp_mode;
2783     TCGv_i32 tmp;
2784     TCGv_i64 vm;
2785
2786     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2787         return false;
2788     }
2789
2790     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2791         return false;
2792     }
2793
2794     /* UNDEF accesses to D16-D31 if they don't exist. */
2795     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2796         return false;
2797     }
2798
2799     if (!vfp_access_check(s)) {
2800         return true;
2801     }
2802
2803     fpst = fpstatus_ptr(FPST_FPCR);
2804     ahp_mode = get_ahp_flag();
2805     tmp = tcg_temp_new_i32();
2806     vm = tcg_temp_new_i64();
2807
2808     vfp_load_reg64(vm, a->vm);
2809     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2810     tcg_temp_free_i64(vm);
2811     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2812     tcg_temp_free_i32(ahp_mode);
2813     tcg_temp_free_ptr(fpst);
2814     tcg_temp_free_i32(tmp);
2815     return true;
2816 }
2817
2818 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2819 {
2820     TCGv_ptr fpst;
2821     TCGv_i32 tmp;
2822
2823     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2824         return false;
2825     }
2826
2827     if (!vfp_access_check(s)) {
2828         return true;
2829     }
2830
2831     tmp = tcg_temp_new_i32();
2832     vfp_load_reg32(tmp, a->vm);
2833     fpst = fpstatus_ptr(FPST_FPCR_F16);
2834     gen_helper_rinth(tmp, tmp, fpst);
2835     vfp_store_reg32(tmp, a->vd);
2836     tcg_temp_free_ptr(fpst);
2837     tcg_temp_free_i32(tmp);
2838     return true;
2839 }
2840
2841 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2842 {
2843     TCGv_ptr fpst;
2844     TCGv_i32 tmp;
2845
2846     if (!dc_isar_feature(aa32_vrint, s)) {
2847         return false;
2848     }
2849
2850     if (!vfp_access_check(s)) {
2851         return true;
2852     }
2853
2854     tmp = tcg_temp_new_i32();
2855     vfp_load_reg32(tmp, a->vm);
2856     fpst = fpstatus_ptr(FPST_FPCR);
2857     gen_helper_rints(tmp, tmp, fpst);
2858     vfp_store_reg32(tmp, a->vd);
2859     tcg_temp_free_ptr(fpst);
2860     tcg_temp_free_i32(tmp);
2861     return true;
2862 }
2863
2864 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2865 {
2866     TCGv_ptr fpst;
2867     TCGv_i64 tmp;
2868
2869     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2870         return false;
2871     }
2872
2873     if (!dc_isar_feature(aa32_vrint, s)) {
2874         return false;
2875     }
2876
2877     /* UNDEF accesses to D16-D31 if they don't exist. */
2878     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2879         return false;
2880     }
2881
2882     if (!vfp_access_check(s)) {
2883         return true;
2884     }
2885
2886     tmp = tcg_temp_new_i64();
2887     vfp_load_reg64(tmp, a->vm);
2888     fpst = fpstatus_ptr(FPST_FPCR);
2889     gen_helper_rintd(tmp, tmp, fpst);
2890     vfp_store_reg64(tmp, a->vd);
2891     tcg_temp_free_ptr(fpst);
2892     tcg_temp_free_i64(tmp);
2893     return true;
2894 }
2895
2896 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
2897 {
2898     TCGv_ptr fpst;
2899     TCGv_i32 tmp;
2900     TCGv_i32 tcg_rmode;
2901
2902     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2903         return false;
2904     }
2905
2906     if (!vfp_access_check(s)) {
2907         return true;
2908     }
2909
2910     tmp = tcg_temp_new_i32();
2911     vfp_load_reg32(tmp, a->vm);
2912     fpst = fpstatus_ptr(FPST_FPCR_F16);
2913     tcg_rmode = tcg_const_i32(float_round_to_zero);
2914     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2915     gen_helper_rinth(tmp, tmp, fpst);
2916     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2917     vfp_store_reg32(tmp, a->vd);
2918     tcg_temp_free_ptr(fpst);
2919     tcg_temp_free_i32(tcg_rmode);
2920     tcg_temp_free_i32(tmp);
2921     return true;
2922 }
2923
2924 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2925 {
2926     TCGv_ptr fpst;
2927     TCGv_i32 tmp;
2928     TCGv_i32 tcg_rmode;
2929
2930     if (!dc_isar_feature(aa32_vrint, s)) {
2931         return false;
2932     }
2933
2934     if (!vfp_access_check(s)) {
2935         return true;
2936     }
2937
2938     tmp = tcg_temp_new_i32();
2939     vfp_load_reg32(tmp, a->vm);
2940     fpst = fpstatus_ptr(FPST_FPCR);
2941     tcg_rmode = tcg_const_i32(float_round_to_zero);
2942     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2943     gen_helper_rints(tmp, tmp, fpst);
2944     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2945     vfp_store_reg32(tmp, a->vd);
2946     tcg_temp_free_ptr(fpst);
2947     tcg_temp_free_i32(tcg_rmode);
2948     tcg_temp_free_i32(tmp);
2949     return true;
2950 }
2951
2952 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2953 {
2954     TCGv_ptr fpst;
2955     TCGv_i64 tmp;
2956     TCGv_i32 tcg_rmode;
2957
2958     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2959         return false;
2960     }
2961
2962     if (!dc_isar_feature(aa32_vrint, s)) {
2963         return false;
2964     }
2965
2966     /* UNDEF accesses to D16-D31 if they don't exist. */
2967     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2968         return false;
2969     }
2970
2971     if (!vfp_access_check(s)) {
2972         return true;
2973     }
2974
2975     tmp = tcg_temp_new_i64();
2976     vfp_load_reg64(tmp, a->vm);
2977     fpst = fpstatus_ptr(FPST_FPCR);
2978     tcg_rmode = tcg_const_i32(float_round_to_zero);
2979     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2980     gen_helper_rintd(tmp, tmp, fpst);
2981     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2982     vfp_store_reg64(tmp, a->vd);
2983     tcg_temp_free_ptr(fpst);
2984     tcg_temp_free_i64(tmp);
2985     tcg_temp_free_i32(tcg_rmode);
2986     return true;
2987 }
2988
2989 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
2990 {
2991     TCGv_ptr fpst;
2992     TCGv_i32 tmp;
2993
2994     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2995         return false;
2996     }
2997
2998     if (!vfp_access_check(s)) {
2999         return true;
3000     }
3001
3002     tmp = tcg_temp_new_i32();
3003     vfp_load_reg32(tmp, a->vm);
3004     fpst = fpstatus_ptr(FPST_FPCR_F16);
3005     gen_helper_rinth_exact(tmp, tmp, fpst);
3006     vfp_store_reg32(tmp, a->vd);
3007     tcg_temp_free_ptr(fpst);
3008     tcg_temp_free_i32(tmp);
3009     return true;
3010 }
3011
3012 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3013 {
3014     TCGv_ptr fpst;
3015     TCGv_i32 tmp;
3016
3017     if (!dc_isar_feature(aa32_vrint, s)) {
3018         return false;
3019     }
3020
3021     if (!vfp_access_check(s)) {
3022         return true;
3023     }
3024
3025     tmp = tcg_temp_new_i32();
3026     vfp_load_reg32(tmp, a->vm);
3027     fpst = fpstatus_ptr(FPST_FPCR);
3028     gen_helper_rints_exact(tmp, tmp, fpst);
3029     vfp_store_reg32(tmp, a->vd);
3030     tcg_temp_free_ptr(fpst);
3031     tcg_temp_free_i32(tmp);
3032     return true;
3033 }
3034
3035 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3036 {
3037     TCGv_ptr fpst;
3038     TCGv_i64 tmp;
3039
3040     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3041         return false;
3042     }
3043
3044     if (!dc_isar_feature(aa32_vrint, s)) {
3045         return false;
3046     }
3047
3048     /* UNDEF accesses to D16-D31 if they don't exist. */
3049     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3050         return false;
3051     }
3052
3053     if (!vfp_access_check(s)) {
3054         return true;
3055     }
3056
3057     tmp = tcg_temp_new_i64();
3058     vfp_load_reg64(tmp, a->vm);
3059     fpst = fpstatus_ptr(FPST_FPCR);
3060     gen_helper_rintd_exact(tmp, tmp, fpst);
3061     vfp_store_reg64(tmp, a->vd);
3062     tcg_temp_free_ptr(fpst);
3063     tcg_temp_free_i64(tmp);
3064     return true;
3065 }
3066
3067 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3068 {
3069     TCGv_i64 vd;
3070     TCGv_i32 vm;
3071
3072     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3073         return false;
3074     }
3075
3076     /* UNDEF accesses to D16-D31 if they don't exist. */
3077     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3078         return false;
3079     }
3080
3081     if (!vfp_access_check(s)) {
3082         return true;
3083     }
3084
3085     vm = tcg_temp_new_i32();
3086     vd = tcg_temp_new_i64();
3087     vfp_load_reg32(vm, a->vm);
3088     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3089     vfp_store_reg64(vd, a->vd);
3090     tcg_temp_free_i32(vm);
3091     tcg_temp_free_i64(vd);
3092     return true;
3093 }
3094
3095 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3096 {
3097     TCGv_i64 vm;
3098     TCGv_i32 vd;
3099
3100     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3101         return false;
3102     }
3103
3104     /* UNDEF accesses to D16-D31 if they don't exist. */
3105     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3106         return false;
3107     }
3108
3109     if (!vfp_access_check(s)) {
3110         return true;
3111     }
3112
3113     vd = tcg_temp_new_i32();
3114     vm = tcg_temp_new_i64();
3115     vfp_load_reg64(vm, a->vm);
3116     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3117     vfp_store_reg32(vd, a->vd);
3118     tcg_temp_free_i32(vd);
3119     tcg_temp_free_i64(vm);
3120     return true;
3121 }
3122
3123 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3124 {
3125     TCGv_i32 vm;
3126     TCGv_ptr fpst;
3127
3128     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3129         return false;
3130     }
3131
3132     if (!vfp_access_check(s)) {
3133         return true;
3134     }
3135
3136     vm = tcg_temp_new_i32();
3137     vfp_load_reg32(vm, a->vm);
3138     fpst = fpstatus_ptr(FPST_FPCR_F16);
3139     if (a->s) {
3140         /* i32 -> f16 */
3141         gen_helper_vfp_sitoh(vm, vm, fpst);
3142     } else {
3143         /* u32 -> f16 */
3144         gen_helper_vfp_uitoh(vm, vm, fpst);
3145     }
3146     vfp_store_reg32(vm, a->vd);
3147     tcg_temp_free_i32(vm);
3148     tcg_temp_free_ptr(fpst);
3149     return true;
3150 }
3151
3152 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3153 {
3154     TCGv_i32 vm;
3155     TCGv_ptr fpst;
3156
3157     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3158         return false;
3159     }
3160
3161     if (!vfp_access_check(s)) {
3162         return true;
3163     }
3164
3165     vm = tcg_temp_new_i32();
3166     vfp_load_reg32(vm, a->vm);
3167     fpst = fpstatus_ptr(FPST_FPCR);
3168     if (a->s) {
3169         /* i32 -> f32 */
3170         gen_helper_vfp_sitos(vm, vm, fpst);
3171     } else {
3172         /* u32 -> f32 */
3173         gen_helper_vfp_uitos(vm, vm, fpst);
3174     }
3175     vfp_store_reg32(vm, a->vd);
3176     tcg_temp_free_i32(vm);
3177     tcg_temp_free_ptr(fpst);
3178     return true;
3179 }
3180
3181 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3182 {
3183     TCGv_i32 vm;
3184     TCGv_i64 vd;
3185     TCGv_ptr fpst;
3186
3187     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3188         return false;
3189     }
3190
3191     /* UNDEF accesses to D16-D31 if they don't exist. */
3192     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3193         return false;
3194     }
3195
3196     if (!vfp_access_check(s)) {
3197         return true;
3198     }
3199
3200     vm = tcg_temp_new_i32();
3201     vd = tcg_temp_new_i64();
3202     vfp_load_reg32(vm, a->vm);
3203     fpst = fpstatus_ptr(FPST_FPCR);
3204     if (a->s) {
3205         /* i32 -> f64 */
3206         gen_helper_vfp_sitod(vd, vm, fpst);
3207     } else {
3208         /* u32 -> f64 */
3209         gen_helper_vfp_uitod(vd, vm, fpst);
3210     }
3211     vfp_store_reg64(vd, a->vd);
3212     tcg_temp_free_i32(vm);
3213     tcg_temp_free_i64(vd);
3214     tcg_temp_free_ptr(fpst);
3215     return true;
3216 }
3217
3218 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3219 {
3220     TCGv_i32 vd;
3221     TCGv_i64 vm;
3222
3223     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3224         return false;
3225     }
3226
3227     if (!dc_isar_feature(aa32_jscvt, s)) {
3228         return false;
3229     }
3230
3231     /* UNDEF accesses to D16-D31 if they don't exist. */
3232     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3233         return false;
3234     }
3235
3236     if (!vfp_access_check(s)) {
3237         return true;
3238     }
3239
3240     vm = tcg_temp_new_i64();
3241     vd = tcg_temp_new_i32();
3242     vfp_load_reg64(vm, a->vm);
3243     gen_helper_vjcvt(vd, vm, cpu_env);
3244     vfp_store_reg32(vd, a->vd);
3245     tcg_temp_free_i64(vm);
3246     tcg_temp_free_i32(vd);
3247     return true;
3248 }
3249
3250 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3251 {
3252     TCGv_i32 vd, shift;
3253     TCGv_ptr fpst;
3254     int frac_bits;
3255
3256     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3257         return false;
3258     }
3259
3260     if (!vfp_access_check(s)) {
3261         return true;
3262     }
3263
3264     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3265
3266     vd = tcg_temp_new_i32();
3267     vfp_load_reg32(vd, a->vd);
3268
3269     fpst = fpstatus_ptr(FPST_FPCR_F16);
3270     shift = tcg_constant_i32(frac_bits);
3271
3272     /* Switch on op:U:sx bits */
3273     switch (a->opc) {
3274     case 0:
3275         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3276         break;
3277     case 1:
3278         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3279         break;
3280     case 2:
3281         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3282         break;
3283     case 3:
3284         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3285         break;
3286     case 4:
3287         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3288         break;
3289     case 5:
3290         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3291         break;
3292     case 6:
3293         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3294         break;
3295     case 7:
3296         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3297         break;
3298     default:
3299         g_assert_not_reached();
3300     }
3301
3302     vfp_store_reg32(vd, a->vd);
3303     tcg_temp_free_i32(vd);
3304     tcg_temp_free_ptr(fpst);
3305     return true;
3306 }
3307
3308 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3309 {
3310     TCGv_i32 vd, shift;
3311     TCGv_ptr fpst;
3312     int frac_bits;
3313
3314     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3315         return false;
3316     }
3317
3318     if (!vfp_access_check(s)) {
3319         return true;
3320     }
3321
3322     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3323
3324     vd = tcg_temp_new_i32();
3325     vfp_load_reg32(vd, a->vd);
3326
3327     fpst = fpstatus_ptr(FPST_FPCR);
3328     shift = tcg_constant_i32(frac_bits);
3329
3330     /* Switch on op:U:sx bits */
3331     switch (a->opc) {
3332     case 0:
3333         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3334         break;
3335     case 1:
3336         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3337         break;
3338     case 2:
3339         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3340         break;
3341     case 3:
3342         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3343         break;
3344     case 4:
3345         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3346         break;
3347     case 5:
3348         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3349         break;
3350     case 6:
3351         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3352         break;
3353     case 7:
3354         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3355         break;
3356     default:
3357         g_assert_not_reached();
3358     }
3359
3360     vfp_store_reg32(vd, a->vd);
3361     tcg_temp_free_i32(vd);
3362     tcg_temp_free_ptr(fpst);
3363     return true;
3364 }
3365
3366 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3367 {
3368     TCGv_i64 vd;
3369     TCGv_i32 shift;
3370     TCGv_ptr fpst;
3371     int frac_bits;
3372
3373     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3374         return false;
3375     }
3376
3377     /* UNDEF accesses to D16-D31 if they don't exist. */
3378     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3379         return false;
3380     }
3381
3382     if (!vfp_access_check(s)) {
3383         return true;
3384     }
3385
3386     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3387
3388     vd = tcg_temp_new_i64();
3389     vfp_load_reg64(vd, a->vd);
3390
3391     fpst = fpstatus_ptr(FPST_FPCR);
3392     shift = tcg_constant_i32(frac_bits);
3393
3394     /* Switch on op:U:sx bits */
3395     switch (a->opc) {
3396     case 0:
3397         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3398         break;
3399     case 1:
3400         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3401         break;
3402     case 2:
3403         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3404         break;
3405     case 3:
3406         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3407         break;
3408     case 4:
3409         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3410         break;
3411     case 5:
3412         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3413         break;
3414     case 6:
3415         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3416         break;
3417     case 7:
3418         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3419         break;
3420     default:
3421         g_assert_not_reached();
3422     }
3423
3424     vfp_store_reg64(vd, a->vd);
3425     tcg_temp_free_i64(vd);
3426     tcg_temp_free_ptr(fpst);
3427     return true;
3428 }
3429
3430 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3431 {
3432     TCGv_i32 vm;
3433     TCGv_ptr fpst;
3434
3435     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3436         return false;
3437     }
3438
3439     if (!vfp_access_check(s)) {
3440         return true;
3441     }
3442
3443     fpst = fpstatus_ptr(FPST_FPCR_F16);
3444     vm = tcg_temp_new_i32();
3445     vfp_load_reg32(vm, a->vm);
3446
3447     if (a->s) {
3448         if (a->rz) {
3449             gen_helper_vfp_tosizh(vm, vm, fpst);
3450         } else {
3451             gen_helper_vfp_tosih(vm, vm, fpst);
3452         }
3453     } else {
3454         if (a->rz) {
3455             gen_helper_vfp_touizh(vm, vm, fpst);
3456         } else {
3457             gen_helper_vfp_touih(vm, vm, fpst);
3458         }
3459     }
3460     vfp_store_reg32(vm, a->vd);
3461     tcg_temp_free_i32(vm);
3462     tcg_temp_free_ptr(fpst);
3463     return true;
3464 }
3465
3466 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3467 {
3468     TCGv_i32 vm;
3469     TCGv_ptr fpst;
3470
3471     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3472         return false;
3473     }
3474
3475     if (!vfp_access_check(s)) {
3476         return true;
3477     }
3478
3479     fpst = fpstatus_ptr(FPST_FPCR);
3480     vm = tcg_temp_new_i32();
3481     vfp_load_reg32(vm, a->vm);
3482
3483     if (a->s) {
3484         if (a->rz) {
3485             gen_helper_vfp_tosizs(vm, vm, fpst);
3486         } else {
3487             gen_helper_vfp_tosis(vm, vm, fpst);
3488         }
3489     } else {
3490         if (a->rz) {
3491             gen_helper_vfp_touizs(vm, vm, fpst);
3492         } else {
3493             gen_helper_vfp_touis(vm, vm, fpst);
3494         }
3495     }
3496     vfp_store_reg32(vm, a->vd);
3497     tcg_temp_free_i32(vm);
3498     tcg_temp_free_ptr(fpst);
3499     return true;
3500 }
3501
3502 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3503 {
3504     TCGv_i32 vd;
3505     TCGv_i64 vm;
3506     TCGv_ptr fpst;
3507
3508     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3509         return false;
3510     }
3511
3512     /* UNDEF accesses to D16-D31 if they don't exist. */
3513     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3514         return false;
3515     }
3516
3517     if (!vfp_access_check(s)) {
3518         return true;
3519     }
3520
3521     fpst = fpstatus_ptr(FPST_FPCR);
3522     vm = tcg_temp_new_i64();
3523     vd = tcg_temp_new_i32();
3524     vfp_load_reg64(vm, a->vm);
3525
3526     if (a->s) {
3527         if (a->rz) {
3528             gen_helper_vfp_tosizd(vd, vm, fpst);
3529         } else {
3530             gen_helper_vfp_tosid(vd, vm, fpst);
3531         }
3532     } else {
3533         if (a->rz) {
3534             gen_helper_vfp_touizd(vd, vm, fpst);
3535         } else {
3536             gen_helper_vfp_touid(vd, vm, fpst);
3537         }
3538     }
3539     vfp_store_reg32(vd, a->vd);
3540     tcg_temp_free_i32(vd);
3541     tcg_temp_free_i64(vm);
3542     tcg_temp_free_ptr(fpst);
3543     return true;
3544 }
3545
3546 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3547 {
3548     TCGv_i32 rd, rm;
3549
3550     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3551         return false;
3552     }
3553
3554     if (s->vec_len != 0 || s->vec_stride != 0) {
3555         return false;
3556     }
3557
3558     if (!vfp_access_check(s)) {
3559         return true;
3560     }
3561
3562     /* Insert low half of Vm into high half of Vd */
3563     rm = tcg_temp_new_i32();
3564     rd = tcg_temp_new_i32();
3565     vfp_load_reg32(rm, a->vm);
3566     vfp_load_reg32(rd, a->vd);
3567     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3568     vfp_store_reg32(rd, a->vd);
3569     tcg_temp_free_i32(rm);
3570     tcg_temp_free_i32(rd);
3571     return true;
3572 }
3573
3574 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3575 {
3576     TCGv_i32 rm;
3577
3578     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3579         return false;
3580     }
3581
3582     if (s->vec_len != 0 || s->vec_stride != 0) {
3583         return false;
3584     }
3585
3586     if (!vfp_access_check(s)) {
3587         return true;
3588     }
3589
3590     /* Set Vd to high half of Vm */
3591     rm = tcg_temp_new_i32();
3592     vfp_load_reg32(rm, a->vm);
3593     tcg_gen_shri_i32(rm, rm, 16);
3594     vfp_store_reg32(rm, a->vd);
3595     tcg_temp_free_i32(rm);
3596     return true;
3597 }