target/arm/translate-vfp.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 #include "qemu/osdep.h"
  24 #include "tcg/tcg-op.h"
  25 #include "tcg/tcg-op-gvec.h"
  26 #include "exec/exec-all.h"
  27 #include "exec/gen-icount.h"
  28 #include "translate.h"
  29 #include "translate-a32.h"
  30
  31 /* Include the generated VFP decoder */
  32 #include "decode-vfp.c.inc"
  33 #include "decode-vfp-uncond.c.inc"
  34
  35 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
  36 {
  37     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
  38 }
  39
  40 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
  41 {
  42     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
  43 }
  44
  45 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
  46 {
  47     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
  48 }
  49
  50 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
  51 {
  52     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
  53 }
  54
  55 /*
  56  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  57  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  58  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  59  */
  60 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  61 {
  62     uint64_t imm;
  63
  64     switch (size) {
  65     case MO_64:
  66         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  67             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  68             extract32(imm8, 0, 6);
  69         imm <<= 48;
  70         break;
  71     case MO_32:
  72         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  73             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  74             (extract32(imm8, 0, 6) << 3);
  75         imm <<= 16;
  76         break;
  77     case MO_16:
  78         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  79             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  80             (extract32(imm8, 0, 6) << 6);
  81         break;
  82     default:
  83         g_assert_not_reached();
  84     }
  85     return imm;
  86 }
  87
  88 /*
  89  * Return the offset of a 16-bit half of the specified VFP single-precision
  90  * register. If top is true, returns the top 16 bits; otherwise the bottom
  91  * 16 bits.
  92  */
  93 static inline long vfp_f16_offset(unsigned reg, bool top)
  94 {
  95     long offs = vfp_reg_offset(false, reg);
  96 #ifdef HOST_WORDS_BIGENDIAN
  97     if (!top) {
  98         offs += 2;
  99     }
 100 #else
 101     if (top) {
 102         offs += 2;
 103     }
 104 #endif
 105     return offs;
 106 }
 107
 108 /*
 109  * Generate code for M-profile lazy FP state preservation if needed;
 110  * this corresponds to the pseudocode PreserveFPState() function.
 111  */
 112 static void gen_preserve_fp_state(DisasContext *s)
 113 {
 114     if (s->v7m_lspact) {
 115         /*
 116          * Lazy state saving affects external memory and also the NVIC,
 117          * so we must mark it as an IO operation for icount (and cause
 118          * this to be the last insn in the TB).
 119          */
 120         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 121             s->base.is_jmp = DISAS_UPDATE_EXIT;
 122             gen_io_start();
 123         }
 124         gen_helper_v7m_preserve_fp_state(cpu_env);
 125         /*
 126          * If the preserve_fp_state helper doesn't throw an exception
 127          * then it will clear LSPACT; we don't need to repeat this for
 128          * any further FP insns in this TB.
 129          */
 130         s->v7m_lspact = false;
 131     }
 132 }
 133
 134 /*
 135  * Check that VFP access is enabled. If it is, do the necessary
 136  * M-profile lazy-FP handling and then return true.
 137  * If not, emit code to generate an appropriate exception and
 138  * return false.
 139  * The ignore_vfp_enabled argument specifies that we should ignore
 140  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
 141  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
 142  */
 143 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
 144 {
 145     if (s->fp_excp_el) {
 146         /* M-profile handled this earlier, in disas_m_nocp() */
 147         assert (!arm_dc_feature(s, ARM_FEATURE_M));
 148         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 149                            syn_fp_access_trap(1, 0xe, false),
 150                            s->fp_excp_el);
 151         return false;
 152     }
 153
 154     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 155         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 156         unallocated_encoding(s);
 157         return false;
 158     }
 159
 160     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 161         /* Handle M-profile lazy FP state mechanics */
 162
 163         /* Trigger lazy-state preservation if necessary */
 164         gen_preserve_fp_state(s);
 165
 166         /* Update ownership of FP context: set FPCCR.S to match current state */
 167         if (s->v8m_fpccr_s_wrong) {
 168             TCGv_i32 tmp;
 169
 170             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 171             if (s->v8m_secure) {
 172                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 173             } else {
 174                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 175             }
 176             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 177             /* Don't need to do this for any further FP insns in this TB */
 178             s->v8m_fpccr_s_wrong = false;
 179         }
 180
 181         if (s->v7m_new_fp_ctxt_needed) {
 182             /*
 183              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 184              * and the FPSCR.
 185              */
 186             TCGv_i32 control, fpscr;
 187             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 188
 189             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 190             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 191             tcg_temp_free_i32(fpscr);
 192             /*
 193              * We don't need to arrange to end the TB, because the only
 194              * parts of FPSCR which we cache in the TB flags are the VECLEN
 195              * and VECSTRIDE, and those don't exist for M-profile.
 196              */
 197
 198             if (s->v8m_secure) {
 199                 bits |= R_V7M_CONTROL_SFPA_MASK;
 200             }
 201             control = load_cpu_field(v7m.control[M_REG_S]);
 202             tcg_gen_ori_i32(control, control, bits);
 203             store_cpu_field(control, v7m.control[M_REG_S]);
 204             /* Don't need to do this for any further FP insns in this TB */
 205             s->v7m_new_fp_ctxt_needed = false;
 206         }
 207     }
 208
 209     return true;
 210 }
 211
 212 /*
 213  * The most usual kind of VFP access check, for everything except
 214  * FMXR/FMRX to the always-available special registers.
 215  */
 216 bool vfp_access_check(DisasContext *s)
 217 {
 218     return full_vfp_access_check(s, false);
 219 }
 220
 221 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 222 {
 223     uint32_t rd, rn, rm;
 224     int sz = a->sz;
 225
 226     if (!dc_isar_feature(aa32_vsel, s)) {
 227         return false;
 228     }
 229
 230     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 231         return false;
 232     }
 233
 234     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 235         return false;
 236     }
 237
 238     /* UNDEF accesses to D16-D31 if they don't exist */
 239     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 240         ((a->vm | a->vn | a->vd) & 0x10)) {
 241         return false;
 242     }
 243
 244     rd = a->vd;
 245     rn = a->vn;
 246     rm = a->vm;
 247
 248     if (!vfp_access_check(s)) {
 249         return true;
 250     }
 251
 252     if (sz == 3) {
 253         TCGv_i64 frn, frm, dest;
 254         TCGv_i64 tmp, zero, zf, nf, vf;
 255
 256         zero = tcg_const_i64(0);
 257
 258         frn = tcg_temp_new_i64();
 259         frm = tcg_temp_new_i64();
 260         dest = tcg_temp_new_i64();
 261
 262         zf = tcg_temp_new_i64();
 263         nf = tcg_temp_new_i64();
 264         vf = tcg_temp_new_i64();
 265
 266         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 267         tcg_gen_ext_i32_i64(nf, cpu_NF);
 268         tcg_gen_ext_i32_i64(vf, cpu_VF);
 269
 270         vfp_load_reg64(frn, rn);
 271         vfp_load_reg64(frm, rm);
 272         switch (a->cc) {
 273         case 0: /* eq: Z */
 274             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 275                                 frn, frm);
 276             break;
 277         case 1: /* vs: V */
 278             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 279                                 frn, frm);
 280             break;
 281         case 2: /* ge: N == V -> N ^ V == 0 */
 282             tmp = tcg_temp_new_i64();
 283             tcg_gen_xor_i64(tmp, vf, nf);
 284             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 285                                 frn, frm);
 286             tcg_temp_free_i64(tmp);
 287             break;
 288         case 3: /* gt: !Z && N == V */
 289             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 290                                 frn, frm);
 291             tmp = tcg_temp_new_i64();
 292             tcg_gen_xor_i64(tmp, vf, nf);
 293             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 294                                 dest, frm);
 295             tcg_temp_free_i64(tmp);
 296             break;
 297         }
 298         vfp_store_reg64(dest, rd);
 299         tcg_temp_free_i64(frn);
 300         tcg_temp_free_i64(frm);
 301         tcg_temp_free_i64(dest);
 302
 303         tcg_temp_free_i64(zf);
 304         tcg_temp_free_i64(nf);
 305         tcg_temp_free_i64(vf);
 306
 307         tcg_temp_free_i64(zero);
 308     } else {
 309         TCGv_i32 frn, frm, dest;
 310         TCGv_i32 tmp, zero;
 311
 312         zero = tcg_const_i32(0);
 313
 314         frn = tcg_temp_new_i32();
 315         frm = tcg_temp_new_i32();
 316         dest = tcg_temp_new_i32();
 317         vfp_load_reg32(frn, rn);
 318         vfp_load_reg32(frm, rm);
 319         switch (a->cc) {
 320         case 0: /* eq: Z */
 321             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 322                                 frn, frm);
 323             break;
 324         case 1: /* vs: V */
 325             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 326                                 frn, frm);
 327             break;
 328         case 2: /* ge: N == V -> N ^ V == 0 */
 329             tmp = tcg_temp_new_i32();
 330             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 331             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 332                                 frn, frm);
 333             tcg_temp_free_i32(tmp);
 334             break;
 335         case 3: /* gt: !Z && N == V */
 336             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 337                                 frn, frm);
 338             tmp = tcg_temp_new_i32();
 339             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 340             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 341                                 dest, frm);
 342             tcg_temp_free_i32(tmp);
 343             break;
 344         }
 345         /* For fp16 the top half is always zeroes */
 346         if (sz == 1) {
 347             tcg_gen_andi_i32(dest, dest, 0xffff);
 348         }
 349         vfp_store_reg32(dest, rd);
 350         tcg_temp_free_i32(frn);
 351         tcg_temp_free_i32(frm);
 352         tcg_temp_free_i32(dest);
 353
 354         tcg_temp_free_i32(zero);
 355     }
 356
 357     return true;
 358 }
 359
 360 /*
 361  * Table for converting the most common AArch32 encoding of
 362  * rounding mode to arm_fprounding order (which matches the
 363  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 364  */
 365 static const uint8_t fp_decode_rm[] = {
 366     FPROUNDING_TIEAWAY,
 367     FPROUNDING_TIEEVEN,
 368     FPROUNDING_POSINF,
 369     FPROUNDING_NEGINF,
 370 };
 371
 372 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 373 {
 374     uint32_t rd, rm;
 375     int sz = a->sz;
 376     TCGv_ptr fpst;
 377     TCGv_i32 tcg_rmode;
 378     int rounding = fp_decode_rm[a->rm];
 379
 380     if (!dc_isar_feature(aa32_vrint, s)) {
 381         return false;
 382     }
 383
 384     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 385         return false;
 386     }
 387
 388     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 389         return false;
 390     }
 391
 392     /* UNDEF accesses to D16-D31 if they don't exist */
 393     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 394         ((a->vm | a->vd) & 0x10)) {
 395         return false;
 396     }
 397
 398     rd = a->vd;
 399     rm = a->vm;
 400
 401     if (!vfp_access_check(s)) {
 402         return true;
 403     }
 404
 405     if (sz == 1) {
 406         fpst = fpstatus_ptr(FPST_FPCR_F16);
 407     } else {
 408         fpst = fpstatus_ptr(FPST_FPCR);
 409     }
 410
 411     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 412     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 413
 414     if (sz == 3) {
 415         TCGv_i64 tcg_op;
 416         TCGv_i64 tcg_res;
 417         tcg_op = tcg_temp_new_i64();
 418         tcg_res = tcg_temp_new_i64();
 419         vfp_load_reg64(tcg_op, rm);
 420         gen_helper_rintd(tcg_res, tcg_op, fpst);
 421         vfp_store_reg64(tcg_res, rd);
 422         tcg_temp_free_i64(tcg_op);
 423         tcg_temp_free_i64(tcg_res);
 424     } else {
 425         TCGv_i32 tcg_op;
 426         TCGv_i32 tcg_res;
 427         tcg_op = tcg_temp_new_i32();
 428         tcg_res = tcg_temp_new_i32();
 429         vfp_load_reg32(tcg_op, rm);
 430         if (sz == 1) {
 431             gen_helper_rinth(tcg_res, tcg_op, fpst);
 432         } else {
 433             gen_helper_rints(tcg_res, tcg_op, fpst);
 434         }
 435         vfp_store_reg32(tcg_res, rd);
 436         tcg_temp_free_i32(tcg_op);
 437         tcg_temp_free_i32(tcg_res);
 438     }
 439
 440     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 441     tcg_temp_free_i32(tcg_rmode);
 442
 443     tcg_temp_free_ptr(fpst);
 444     return true;
 445 }
 446
 447 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 448 {
 449     uint32_t rd, rm;
 450     int sz = a->sz;
 451     TCGv_ptr fpst;
 452     TCGv_i32 tcg_rmode, tcg_shift;
 453     int rounding = fp_decode_rm[a->rm];
 454     bool is_signed = a->op;
 455
 456     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 457         return false;
 458     }
 459
 460     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 461         return false;
 462     }
 463
 464     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 465         return false;
 466     }
 467
 468     /* UNDEF accesses to D16-D31 if they don't exist */
 469     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 470         return false;
 471     }
 472
 473     rd = a->vd;
 474     rm = a->vm;
 475
 476     if (!vfp_access_check(s)) {
 477         return true;
 478     }
 479
 480     if (sz == 1) {
 481         fpst = fpstatus_ptr(FPST_FPCR_F16);
 482     } else {
 483         fpst = fpstatus_ptr(FPST_FPCR);
 484     }
 485
 486     tcg_shift = tcg_const_i32(0);
 487
 488     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 489     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 490
 491     if (sz == 3) {
 492         TCGv_i64 tcg_double, tcg_res;
 493         TCGv_i32 tcg_tmp;
 494         tcg_double = tcg_temp_new_i64();
 495         tcg_res = tcg_temp_new_i64();
 496         tcg_tmp = tcg_temp_new_i32();
 497         vfp_load_reg64(tcg_double, rm);
 498         if (is_signed) {
 499             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 500         } else {
 501             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 502         }
 503         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 504         vfp_store_reg32(tcg_tmp, rd);
 505         tcg_temp_free_i32(tcg_tmp);
 506         tcg_temp_free_i64(tcg_res);
 507         tcg_temp_free_i64(tcg_double);
 508     } else {
 509         TCGv_i32 tcg_single, tcg_res;
 510         tcg_single = tcg_temp_new_i32();
 511         tcg_res = tcg_temp_new_i32();
 512         vfp_load_reg32(tcg_single, rm);
 513         if (sz == 1) {
 514             if (is_signed) {
 515                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 516             } else {
 517                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 518             }
 519         } else {
 520             if (is_signed) {
 521                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 522             } else {
 523                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 524             }
 525         }
 526         vfp_store_reg32(tcg_res, rd);
 527         tcg_temp_free_i32(tcg_res);
 528         tcg_temp_free_i32(tcg_single);
 529     }
 530
 531     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 532     tcg_temp_free_i32(tcg_rmode);
 533
 534     tcg_temp_free_i32(tcg_shift);
 535
 536     tcg_temp_free_ptr(fpst);
 537
 538     return true;
 539 }
 540
 541 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 542 {
 543     /* VMOV scalar to general purpose register */
 544     TCGv_i32 tmp;
 545
 546     /*
 547      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 548      * all sizes, whether the CPU has fp or not.
 549      */
 550     if (!dc_isar_feature(aa32_mve, s)) {
 551         if (a->size == MO_32
 552             ? !dc_isar_feature(aa32_fpsp_v2, s)
 553             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 554             return false;
 555         }
 556     }
 557
 558     /* UNDEF accesses to D16-D31 if they don't exist */
 559     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 560         return false;
 561     }
 562
 563     if (!vfp_access_check(s)) {
 564         return true;
 565     }
 566
 567     tmp = tcg_temp_new_i32();
 568     read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
 569     store_reg(s, a->rt, tmp);
 570
 571     return true;
 572 }
 573
 574 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 575 {
 576     /* VMOV general purpose register to scalar */
 577     TCGv_i32 tmp;
 578
 579     /*
 580      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 581      * all sizes, whether the CPU has fp or not.
 582      */
 583     if (!dc_isar_feature(aa32_mve, s)) {
 584         if (a->size == MO_32
 585             ? !dc_isar_feature(aa32_fpsp_v2, s)
 586             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 587             return false;
 588         }
 589     }
 590
 591     /* UNDEF accesses to D16-D31 if they don't exist */
 592     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 593         return false;
 594     }
 595
 596     if (!vfp_access_check(s)) {
 597         return true;
 598     }
 599
 600     tmp = load_reg(s, a->rt);
 601     write_neon_element32(tmp, a->vn, a->index, a->size);
 602     tcg_temp_free_i32(tmp);
 603
 604     return true;
 605 }
 606
 607 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 608 {
 609     /* VDUP (general purpose register) */
 610     TCGv_i32 tmp;
 611     int size, vec_size;
 612
 613     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 614         return false;
 615     }
 616
 617     /* UNDEF accesses to D16-D31 if they don't exist */
 618     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 619         return false;
 620     }
 621
 622     if (a->b && a->e) {
 623         return false;
 624     }
 625
 626     if (a->q && (a->vn & 1)) {
 627         return false;
 628     }
 629
 630     vec_size = a->q ? 16 : 8;
 631     if (a->b) {
 632         size = 0;
 633     } else if (a->e) {
 634         size = 1;
 635     } else {
 636         size = 2;
 637     }
 638
 639     if (!vfp_access_check(s)) {
 640         return true;
 641     }
 642
 643     tmp = load_reg(s, a->rt);
 644     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
 645                          vec_size, vec_size, tmp);
 646     tcg_temp_free_i32(tmp);
 647
 648     return true;
 649 }
 650
 651 /*
 652  * M-profile provides two different sets of instructions that can
 653  * access floating point system registers: VMSR/VMRS (which move
 654  * to/from a general purpose register) and VLDR/VSTR sysreg (which
 655  * move directly to/from memory). In some cases there are also side
 656  * effects which must happen after any write to memory (which could
 657  * cause an exception). So we implement the common logic for the
 658  * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
 659  * which take pointers to callback functions which will perform the
 660  * actual "read/write general purpose register" and "read/write
 661  * memory" operations.
 662  */
 663
 664 /*
 665  * Emit code to store the sysreg to its final destination; frees the
 666  * TCG temp 'value' it is passed.
 667  */
 668 typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
 669 /*
 670  * Emit code to load the value to be copied to the sysreg; returns
 671  * a new TCG temporary
 672  */
 673 typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
 674
 675 /* Common decode/access checks for fp sysreg read/write */
 676 typedef enum FPSysRegCheckResult {
 677     FPSysRegCheckFailed, /* caller should return false */
 678     FPSysRegCheckDone, /* caller should return true */
 679     FPSysRegCheckContinue, /* caller should continue generating code */
 680 } FPSysRegCheckResult;
 681
 682 static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
 683 {
 684     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
 685         return FPSysRegCheckFailed;
 686     }
 687
 688     switch (regno) {
 689     case ARM_VFP_FPSCR:
 690     case QEMU_VFP_FPSCR_NZCV:
 691         break;
 692     case ARM_VFP_FPSCR_NZCVQC:
 693         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 694             return FPSysRegCheckFailed;
 695         }
 696         break;
 697     case ARM_VFP_FPCXT_S:
 698     case ARM_VFP_FPCXT_NS:
 699         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 700             return FPSysRegCheckFailed;
 701         }
 702         if (!s->v8m_secure) {
 703             return FPSysRegCheckFailed;
 704         }
 705         break;
 706     case ARM_VFP_VPR:
 707     case ARM_VFP_P0:
 708         if (!dc_isar_feature(aa32_mve, s)) {
 709             return FPSysRegCheckFailed;
 710         }
 711         break;
 712     default:
 713         return FPSysRegCheckFailed;
 714     }
 715
 716     /*
 717      * FPCXT_NS is a special case: it has specific handling for
 718      * "current FP state is inactive", and must do the PreserveFPState()
 719      * but not the usual full set of actions done by ExecuteFPCheck().
 720      * So we don't call vfp_access_check() and the callers must handle this.
 721      */
 722     if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
 723         return FPSysRegCheckDone;
 724     }
 725     return FPSysRegCheckContinue;
 726 }
 727
 728 static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
 729                                   TCGLabel *label)
 730 {
 731     /*
 732      * FPCXT_NS is a special case: it has specific handling for
 733      * "current FP state is inactive", and must do the PreserveFPState()
 734      * but not the usual full set of actions done by ExecuteFPCheck().
 735      * We don't have a TB flag that matches the fpInactive check, so we
 736      * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
 737      *
 738      * Emit code that checks fpInactive and does a conditional
 739      * branch to label based on it:
 740      *  if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
 741      *  if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
 742      */
 743     assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
 744
 745     /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
 746     TCGv_i32 aspen, fpca;
 747     aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
 748     fpca = load_cpu_field(v7m.control[M_REG_S]);
 749     tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
 750     tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
 751     tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
 752     tcg_gen_or_i32(fpca, fpca, aspen);
 753     tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
 754     tcg_temp_free_i32(aspen);
 755     tcg_temp_free_i32(fpca);
 756 }
 757
 758 static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
 759
 760                                   fp_sysreg_loadfn *loadfn,
 761                                  void *opaque)
 762 {
 763     /* Do a write to an M-profile floating point system register */
 764     TCGv_i32 tmp;
 765     TCGLabel *lab_end = NULL;
 766
 767     switch (fp_sysreg_checks(s, regno)) {
 768     case FPSysRegCheckFailed:
 769         return false;
 770     case FPSysRegCheckDone:
 771         return true;
 772     case FPSysRegCheckContinue:
 773         break;
 774     }
 775
 776     switch (regno) {
 777     case ARM_VFP_FPSCR:
 778         tmp = loadfn(s, opaque);
 779         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 780         tcg_temp_free_i32(tmp);
 781         gen_lookup_tb(s);
 782         break;
 783     case ARM_VFP_FPSCR_NZCVQC:
 784     {
 785         TCGv_i32 fpscr;
 786         tmp = loadfn(s, opaque);
 787         if (dc_isar_feature(aa32_mve, s)) {
 788             /* QC is only present for MVE; otherwise RES0 */
 789             TCGv_i32 qc = tcg_temp_new_i32();
 790             tcg_gen_andi_i32(qc, tmp, FPCR_QC);
 791             /*
 792              * The 4 vfp.qc[] fields need only be "zero" vs "non-zero";
 793              * here writing the same value into all elements is simplest.
 794              */
 795             tcg_gen_gvec_dup_i32(MO_32, offsetof(CPUARMState, vfp.qc),
 796                                  16, 16, qc);
 797         }
 798         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 799         fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 800         tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
 801         tcg_gen_or_i32(fpscr, fpscr, tmp);
 802         store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
 803         tcg_temp_free_i32(tmp);
 804         break;
 805     }
 806     case ARM_VFP_FPCXT_NS:
 807         lab_end = gen_new_label();
 808         /* fpInactive case: write is a NOP, so branch to end */
 809         gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
 810         /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
 811         gen_preserve_fp_state(s);
 812         /* fall through */
 813     case ARM_VFP_FPCXT_S:
 814     {
 815         TCGv_i32 sfpa, control;
 816         /*
 817          * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
 818          * bits [27:0] from value and zeroes bits [31:28].
 819          */
 820         tmp = loadfn(s, opaque);
 821         sfpa = tcg_temp_new_i32();
 822         tcg_gen_shri_i32(sfpa, tmp, 31);
 823         control = load_cpu_field(v7m.control[M_REG_S]);
 824         tcg_gen_deposit_i32(control, control, sfpa,
 825                             R_V7M_CONTROL_SFPA_SHIFT, 1);
 826         store_cpu_field(control, v7m.control[M_REG_S]);
 827         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 828         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 829         tcg_temp_free_i32(tmp);
 830         tcg_temp_free_i32(sfpa);
 831         break;
 832     }
 833     case ARM_VFP_VPR:
 834         /* Behaves as NOP if not privileged */
 835         if (IS_USER(s)) {
 836             break;
 837         }
 838         tmp = loadfn(s, opaque);
 839         store_cpu_field(tmp, v7m.vpr);
 840         break;
 841     case ARM_VFP_P0:
 842     {
 843         TCGv_i32 vpr;
 844         tmp = loadfn(s, opaque);
 845         vpr = load_cpu_field(v7m.vpr);
 846         tcg_gen_deposit_i32(vpr, vpr, tmp,
 847                             R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
 848         store_cpu_field(vpr, v7m.vpr);
 849         tcg_temp_free_i32(tmp);
 850         break;
 851     }
 852     default:
 853         g_assert_not_reached();
 854     }
 855     if (lab_end) {
 856         gen_set_label(lab_end);
 857     }
 858     return true;
 859 }
 860
 861 static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
 862                                 fp_sysreg_storefn *storefn,
 863                                 void *opaque)
 864 {
 865     /* Do a read from an M-profile floating point system register */
 866     TCGv_i32 tmp;
 867     TCGLabel *lab_end = NULL;
 868     bool lookup_tb = false;
 869
 870     switch (fp_sysreg_checks(s, regno)) {
 871     case FPSysRegCheckFailed:
 872         return false;
 873     case FPSysRegCheckDone:
 874         return true;
 875     case FPSysRegCheckContinue:
 876         break;
 877     }
 878
 879     if (regno == ARM_VFP_FPSCR_NZCVQC && !dc_isar_feature(aa32_mve, s)) {
 880         /* QC is RES0 without MVE, so NZCVQC simplifies to NZCV */
 881         regno = QEMU_VFP_FPSCR_NZCV;
 882     }
 883
 884     switch (regno) {
 885     case ARM_VFP_FPSCR:
 886         tmp = tcg_temp_new_i32();
 887         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 888         storefn(s, opaque, tmp);
 889         break;
 890     case ARM_VFP_FPSCR_NZCVQC:
 891         tmp = tcg_temp_new_i32();
 892         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 893         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCVQC_MASK);
 894         storefn(s, opaque, tmp);
 895         break;
 896     case QEMU_VFP_FPSCR_NZCV:
 897         /*
 898          * Read just NZCV; this is a special case to avoid the
 899          * helper call for the "VMRS to CPSR.NZCV" insn.
 900          */
 901         tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 902         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 903         storefn(s, opaque, tmp);
 904         break;
 905     case ARM_VFP_FPCXT_S:
 906     {
 907         TCGv_i32 control, sfpa, fpscr;
 908         /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
 909         tmp = tcg_temp_new_i32();
 910         sfpa = tcg_temp_new_i32();
 911         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 912         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 913         control = load_cpu_field(v7m.control[M_REG_S]);
 914         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 915         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 916         tcg_gen_or_i32(tmp, tmp, sfpa);
 917         tcg_temp_free_i32(sfpa);
 918         /*
 919          * Store result before updating FPSCR etc, in case
 920          * it is a memory write which causes an exception.
 921          */
 922         storefn(s, opaque, tmp);
 923         /*
 924          * Now we must reset FPSCR from FPDSCR_NS, and clear
 925          * CONTROL.SFPA; so we'll end the TB here.
 926          */
 927         tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
 928         store_cpu_field(control, v7m.control[M_REG_S]);
 929         fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 930         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 931         tcg_temp_free_i32(fpscr);
 932         lookup_tb = true;
 933         break;
 934     }
 935     case ARM_VFP_FPCXT_NS:
 936     {
 937         TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
 938         TCGLabel *lab_active = gen_new_label();
 939
 940         lookup_tb = true;
 941
 942         gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
 943         /* fpInactive case: reads as FPDSCR_NS */
 944         TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 945         storefn(s, opaque, tmp);
 946         lab_end = gen_new_label();
 947         tcg_gen_br(lab_end);
 948
 949         gen_set_label(lab_active);
 950         /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
 951         gen_preserve_fp_state(s);
 952         tmp = tcg_temp_new_i32();
 953         sfpa = tcg_temp_new_i32();
 954         fpscr = tcg_temp_new_i32();
 955         gen_helper_vfp_get_fpscr(fpscr, cpu_env);
 956         tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
 957         control = load_cpu_field(v7m.control[M_REG_S]);
 958         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 959         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 960         tcg_gen_or_i32(tmp, tmp, sfpa);
 961         tcg_temp_free_i32(control);
 962         /* Store result before updating FPSCR, in case it faults */
 963         storefn(s, opaque, tmp);
 964         /* If SFPA is zero then set FPSCR from FPDSCR_NS */
 965         fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 966         zero = tcg_const_i32(0);
 967         tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
 968         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 969         tcg_temp_free_i32(zero);
 970         tcg_temp_free_i32(sfpa);
 971         tcg_temp_free_i32(fpdscr);
 972         tcg_temp_free_i32(fpscr);
 973         break;
 974     }
 975     case ARM_VFP_VPR:
 976         /* Behaves as NOP if not privileged */
 977         if (IS_USER(s)) {
 978             break;
 979         }
 980         tmp = load_cpu_field(v7m.vpr);
 981         storefn(s, opaque, tmp);
 982         break;
 983     case ARM_VFP_P0:
 984         tmp = load_cpu_field(v7m.vpr);
 985         tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
 986         storefn(s, opaque, tmp);
 987         break;
 988     default:
 989         g_assert_not_reached();
 990     }
 991
 992     if (lab_end) {
 993         gen_set_label(lab_end);
 994     }
 995     if (lookup_tb) {
 996         gen_lookup_tb(s);
 997     }
 998     return true;
 999 }
1000
1001 static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
1002 {
1003     arg_VMSR_VMRS *a = opaque;
1004
1005     if (a->rt == 15) {
1006         /* Set the 4 flag bits in the CPSR */
1007         gen_set_nzcv(value);
1008         tcg_temp_free_i32(value);
1009     } else {
1010         store_reg(s, a->rt, value);
1011     }
1012 }
1013
1014 static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
1015 {
1016     arg_VMSR_VMRS *a = opaque;
1017
1018     return load_reg(s, a->rt);
1019 }
1020
1021 static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
1022 {
1023     /*
1024      * Accesses to R15 are UNPREDICTABLE; we choose to undef.
1025      * FPSCR -> r15 is a special case which writes to the PSR flags;
1026      * set a->reg to a special value to tell gen_M_fp_sysreg_read()
1027      * we only care about the top 4 bits of FPSCR there.
1028      */
1029     if (a->rt == 15) {
1030         if (a->l && a->reg == ARM_VFP_FPSCR) {
1031             a->reg = QEMU_VFP_FPSCR_NZCV;
1032         } else {
1033             return false;
1034         }
1035     }
1036
1037     if (a->l) {
1038         /* VMRS, move FP system register to gp register */
1039         return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
1040     } else {
1041         /* VMSR, move gp register to FP system register */
1042         return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
1043     }
1044 }
1045
1046 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
1047 {
1048     TCGv_i32 tmp;
1049     bool ignore_vfp_enabled = false;
1050
1051     if (arm_dc_feature(s, ARM_FEATURE_M)) {
1052         return gen_M_VMSR_VMRS(s, a);
1053     }
1054
1055     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1056         return false;
1057     }
1058
1059     switch (a->reg) {
1060     case ARM_VFP_FPSID:
1061         /*
1062          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
1063          * all ID registers to privileged access only.
1064          */
1065         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
1066             return false;
1067         }
1068         ignore_vfp_enabled = true;
1069         break;
1070     case ARM_VFP_MVFR0:
1071     case ARM_VFP_MVFR1:
1072         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
1073             return false;
1074         }
1075         ignore_vfp_enabled = true;
1076         break;
1077     case ARM_VFP_MVFR2:
1078         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
1079             return false;
1080         }
1081         ignore_vfp_enabled = true;
1082         break;
1083     case ARM_VFP_FPSCR:
1084         break;
1085     case ARM_VFP_FPEXC:
1086         if (IS_USER(s)) {
1087             return false;
1088         }
1089         ignore_vfp_enabled = true;
1090         break;
1091     case ARM_VFP_FPINST:
1092     case ARM_VFP_FPINST2:
1093         /* Not present in VFPv3 */
1094         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
1095             return false;
1096         }
1097         break;
1098     default:
1099         return false;
1100     }
1101
1102     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
1103         return true;
1104     }
1105
1106     if (a->l) {
1107         /* VMRS, move VFP special register to gp register */
1108         switch (a->reg) {
1109         case ARM_VFP_MVFR0:
1110         case ARM_VFP_MVFR1:
1111         case ARM_VFP_MVFR2:
1112         case ARM_VFP_FPSID:
1113             if (s->current_el == 1) {
1114                 TCGv_i32 tcg_reg, tcg_rt;
1115
1116                 gen_set_condexec(s);
1117                 gen_set_pc_im(s, s->pc_curr);
1118                 tcg_reg = tcg_const_i32(a->reg);
1119                 tcg_rt = tcg_const_i32(a->rt);
1120                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
1121                 tcg_temp_free_i32(tcg_reg);
1122                 tcg_temp_free_i32(tcg_rt);
1123             }
1124             /* fall through */
1125         case ARM_VFP_FPEXC:
1126         case ARM_VFP_FPINST:
1127         case ARM_VFP_FPINST2:
1128             tmp = load_cpu_field(vfp.xregs[a->reg]);
1129             break;
1130         case ARM_VFP_FPSCR:
1131             if (a->rt == 15) {
1132                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
1133                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
1134             } else {
1135                 tmp = tcg_temp_new_i32();
1136                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
1137             }
1138             break;
1139         default:
1140             g_assert_not_reached();
1141         }
1142
1143         if (a->rt == 15) {
1144             /* Set the 4 flag bits in the CPSR.  */
1145             gen_set_nzcv(tmp);
1146             tcg_temp_free_i32(tmp);
1147         } else {
1148             store_reg(s, a->rt, tmp);
1149         }
1150     } else {
1151         /* VMSR, move gp register to VFP special register */
1152         switch (a->reg) {
1153         case ARM_VFP_FPSID:
1154         case ARM_VFP_MVFR0:
1155         case ARM_VFP_MVFR1:
1156         case ARM_VFP_MVFR2:
1157             /* Writes are ignored.  */
1158             break;
1159         case ARM_VFP_FPSCR:
1160             tmp = load_reg(s, a->rt);
1161             gen_helper_vfp_set_fpscr(cpu_env, tmp);
1162             tcg_temp_free_i32(tmp);
1163             gen_lookup_tb(s);
1164             break;
1165         case ARM_VFP_FPEXC:
1166             /*
1167              * TODO: VFP subarchitecture support.
1168              * For now, keep the EN bit only
1169              */
1170             tmp = load_reg(s, a->rt);
1171             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
1172             store_cpu_field(tmp, vfp.xregs[a->reg]);
1173             gen_lookup_tb(s);
1174             break;
1175         case ARM_VFP_FPINST:
1176         case ARM_VFP_FPINST2:
1177             tmp = load_reg(s, a->rt);
1178             store_cpu_field(tmp, vfp.xregs[a->reg]);
1179             break;
1180         default:
1181             g_assert_not_reached();
1182         }
1183     }
1184
1185     return true;
1186 }
1187
1188 static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
1189 {
1190     arg_vldr_sysreg *a = opaque;
1191     uint32_t offset = a->imm;
1192     TCGv_i32 addr;
1193
1194     if (!a->a) {
1195         offset = - offset;
1196     }
1197
1198     addr = load_reg(s, a->rn);
1199     if (a->p) {
1200         tcg_gen_addi_i32(addr, addr, offset);
1201     }
1202
1203     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1204         gen_helper_v8m_stackcheck(cpu_env, addr);
1205     }
1206
1207     gen_aa32_st_i32(s, value, addr, get_mem_index(s),
1208                     MO_UL | MO_ALIGN | s->be_data);
1209     tcg_temp_free_i32(value);
1210
1211     if (a->w) {
1212         /* writeback */
1213         if (!a->p) {
1214             tcg_gen_addi_i32(addr, addr, offset);
1215         }
1216         store_reg(s, a->rn, addr);
1217     } else {
1218         tcg_temp_free_i32(addr);
1219     }
1220 }
1221
1222 static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1223 {
1224     arg_vldr_sysreg *a = opaque;
1225     uint32_t offset = a->imm;
1226     TCGv_i32 addr;
1227     TCGv_i32 value = tcg_temp_new_i32();
1228
1229     if (!a->a) {
1230         offset = - offset;
1231     }
1232
1233     addr = load_reg(s, a->rn);
1234     if (a->p) {
1235         tcg_gen_addi_i32(addr, addr, offset);
1236     }
1237
1238     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1239         gen_helper_v8m_stackcheck(cpu_env, addr);
1240     }
1241
1242     gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1243                     MO_UL | MO_ALIGN | s->be_data);
1244
1245     if (a->w) {
1246         /* writeback */
1247         if (!a->p) {
1248             tcg_gen_addi_i32(addr, addr, offset);
1249         }
1250         store_reg(s, a->rn, addr);
1251     } else {
1252         tcg_temp_free_i32(addr);
1253     }
1254     return value;
1255 }
1256
1257 static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1258 {
1259     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1260         return false;
1261     }
1262     if (a->rn == 15) {
1263         return false;
1264     }
1265     return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1266 }
1267
1268 static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1269 {
1270     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1271         return false;
1272     }
1273     if (a->rn == 15) {
1274         return false;
1275     }
1276     return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1277 }
1278
1279 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1280 {
1281     TCGv_i32 tmp;
1282
1283     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1284         return false;
1285     }
1286
1287     if (a->rt == 15) {
1288         /* UNPREDICTABLE; we choose to UNDEF */
1289         return false;
1290     }
1291
1292     if (!vfp_access_check(s)) {
1293         return true;
1294     }
1295
1296     if (a->l) {
1297         /* VFP to general purpose register */
1298         tmp = tcg_temp_new_i32();
1299         vfp_load_reg32(tmp, a->vn);
1300         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1301         store_reg(s, a->rt, tmp);
1302     } else {
1303         /* general purpose register to VFP */
1304         tmp = load_reg(s, a->rt);
1305         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1306         vfp_store_reg32(tmp, a->vn);
1307         tcg_temp_free_i32(tmp);
1308     }
1309
1310     return true;
1311 }
1312
1313 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
1314 {
1315     TCGv_i32 tmp;
1316
1317     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1318         return false;
1319     }
1320
1321     if (!vfp_access_check(s)) {
1322         return true;
1323     }
1324
1325     if (a->l) {
1326         /* VFP to general purpose register */
1327         tmp = tcg_temp_new_i32();
1328         vfp_load_reg32(tmp, a->vn);
1329         if (a->rt == 15) {
1330             /* Set the 4 flag bits in the CPSR.  */
1331             gen_set_nzcv(tmp);
1332             tcg_temp_free_i32(tmp);
1333         } else {
1334             store_reg(s, a->rt, tmp);
1335         }
1336     } else {
1337         /* general purpose register to VFP */
1338         tmp = load_reg(s, a->rt);
1339         vfp_store_reg32(tmp, a->vn);
1340         tcg_temp_free_i32(tmp);
1341     }
1342
1343     return true;
1344 }
1345
1346 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
1347 {
1348     TCGv_i32 tmp;
1349
1350     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1351         return false;
1352     }
1353
1354     /*
1355      * VMOV between two general-purpose registers and two single precision
1356      * floating point registers
1357      */
1358     if (!vfp_access_check(s)) {
1359         return true;
1360     }
1361
1362     if (a->op) {
1363         /* fpreg to gpreg */
1364         tmp = tcg_temp_new_i32();
1365         vfp_load_reg32(tmp, a->vm);
1366         store_reg(s, a->rt, tmp);
1367         tmp = tcg_temp_new_i32();
1368         vfp_load_reg32(tmp, a->vm + 1);
1369         store_reg(s, a->rt2, tmp);
1370     } else {
1371         /* gpreg to fpreg */
1372         tmp = load_reg(s, a->rt);
1373         vfp_store_reg32(tmp, a->vm);
1374         tcg_temp_free_i32(tmp);
1375         tmp = load_reg(s, a->rt2);
1376         vfp_store_reg32(tmp, a->vm + 1);
1377         tcg_temp_free_i32(tmp);
1378     }
1379
1380     return true;
1381 }
1382
1383 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1384 {
1385     TCGv_i32 tmp;
1386
1387     /*
1388      * VMOV between two general-purpose registers and one double precision
1389      * floating point register.  Note that this does not require support
1390      * for double precision arithmetic.
1391      */
1392     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1393         return false;
1394     }
1395
1396     /* UNDEF accesses to D16-D31 if they don't exist */
1397     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1398         return false;
1399     }
1400
1401     if (!vfp_access_check(s)) {
1402         return true;
1403     }
1404
1405     if (a->op) {
1406         /* fpreg to gpreg */
1407         tmp = tcg_temp_new_i32();
1408         vfp_load_reg32(tmp, a->vm * 2);
1409         store_reg(s, a->rt, tmp);
1410         tmp = tcg_temp_new_i32();
1411         vfp_load_reg32(tmp, a->vm * 2 + 1);
1412         store_reg(s, a->rt2, tmp);
1413     } else {
1414         /* gpreg to fpreg */
1415         tmp = load_reg(s, a->rt);
1416         vfp_store_reg32(tmp, a->vm * 2);
1417         tcg_temp_free_i32(tmp);
1418         tmp = load_reg(s, a->rt2);
1419         vfp_store_reg32(tmp, a->vm * 2 + 1);
1420         tcg_temp_free_i32(tmp);
1421     }
1422
1423     return true;
1424 }
1425
1426 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1427 {
1428     uint32_t offset;
1429     TCGv_i32 addr, tmp;
1430
1431     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1432         return false;
1433     }
1434
1435     if (!vfp_access_check(s)) {
1436         return true;
1437     }
1438
1439     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1440     offset = a->imm << 1;
1441     if (!a->u) {
1442         offset = -offset;
1443     }
1444
1445     /* For thumb, use of PC is UNPREDICTABLE.  */
1446     addr = add_reg_for_lit(s, a->rn, offset);
1447     tmp = tcg_temp_new_i32();
1448     if (a->l) {
1449         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1450         vfp_store_reg32(tmp, a->vd);
1451     } else {
1452         vfp_load_reg32(tmp, a->vd);
1453         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1454     }
1455     tcg_temp_free_i32(tmp);
1456     tcg_temp_free_i32(addr);
1457
1458     return true;
1459 }
1460
1461 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1462 {
1463     uint32_t offset;
1464     TCGv_i32 addr, tmp;
1465
1466     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1467         return false;
1468     }
1469
1470     if (!vfp_access_check(s)) {
1471         return true;
1472     }
1473
1474     offset = a->imm << 2;
1475     if (!a->u) {
1476         offset = -offset;
1477     }
1478
1479     /* For thumb, use of PC is UNPREDICTABLE.  */
1480     addr = add_reg_for_lit(s, a->rn, offset);
1481     tmp = tcg_temp_new_i32();
1482     if (a->l) {
1483         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1484         vfp_store_reg32(tmp, a->vd);
1485     } else {
1486         vfp_load_reg32(tmp, a->vd);
1487         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1488     }
1489     tcg_temp_free_i32(tmp);
1490     tcg_temp_free_i32(addr);
1491
1492     return true;
1493 }
1494
1495 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1496 {
1497     uint32_t offset;
1498     TCGv_i32 addr;
1499     TCGv_i64 tmp;
1500
1501     /* Note that this does not require support for double arithmetic.  */
1502     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1503         return false;
1504     }
1505
1506     /* UNDEF accesses to D16-D31 if they don't exist */
1507     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1508         return false;
1509     }
1510
1511     if (!vfp_access_check(s)) {
1512         return true;
1513     }
1514
1515     offset = a->imm << 2;
1516     if (!a->u) {
1517         offset = -offset;
1518     }
1519
1520     /* For thumb, use of PC is UNPREDICTABLE.  */
1521     addr = add_reg_for_lit(s, a->rn, offset);
1522     tmp = tcg_temp_new_i64();
1523     if (a->l) {
1524         gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1525         vfp_store_reg64(tmp, a->vd);
1526     } else {
1527         vfp_load_reg64(tmp, a->vd);
1528         gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1529     }
1530     tcg_temp_free_i64(tmp);
1531     tcg_temp_free_i32(addr);
1532
1533     return true;
1534 }
1535
1536 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1537 {
1538     uint32_t offset;
1539     TCGv_i32 addr, tmp;
1540     int i, n;
1541
1542     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1543         return false;
1544     }
1545
1546     n = a->imm;
1547
1548     if (n == 0 || (a->vd + n) > 32) {
1549         /*
1550          * UNPREDICTABLE cases for bad immediates: we choose to
1551          * UNDEF to avoid generating huge numbers of TCG ops
1552          */
1553         return false;
1554     }
1555     if (a->rn == 15 && a->w) {
1556         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1557         return false;
1558     }
1559
1560     if (!vfp_access_check(s)) {
1561         return true;
1562     }
1563
1564     /* For thumb, use of PC is UNPREDICTABLE.  */
1565     addr = add_reg_for_lit(s, a->rn, 0);
1566     if (a->p) {
1567         /* pre-decrement */
1568         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1569     }
1570
1571     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1572         /*
1573          * Here 'addr' is the lowest address we will store to,
1574          * and is either the old SP (if post-increment) or
1575          * the new SP (if pre-decrement). For post-increment
1576          * where the old value is below the limit and the new
1577          * value is above, it is UNKNOWN whether the limit check
1578          * triggers; we choose to trigger.
1579          */
1580         gen_helper_v8m_stackcheck(cpu_env, addr);
1581     }
1582
1583     offset = 4;
1584     tmp = tcg_temp_new_i32();
1585     for (i = 0; i < n; i++) {
1586         if (a->l) {
1587             /* load */
1588             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1589             vfp_store_reg32(tmp, a->vd + i);
1590         } else {
1591             /* store */
1592             vfp_load_reg32(tmp, a->vd + i);
1593             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1594         }
1595         tcg_gen_addi_i32(addr, addr, offset);
1596     }
1597     tcg_temp_free_i32(tmp);
1598     if (a->w) {
1599         /* writeback */
1600         if (a->p) {
1601             offset = -offset * n;
1602             tcg_gen_addi_i32(addr, addr, offset);
1603         }
1604         store_reg(s, a->rn, addr);
1605     } else {
1606         tcg_temp_free_i32(addr);
1607     }
1608
1609     return true;
1610 }
1611
1612 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1613 {
1614     uint32_t offset;
1615     TCGv_i32 addr;
1616     TCGv_i64 tmp;
1617     int i, n;
1618
1619     /* Note that this does not require support for double arithmetic.  */
1620     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1621         return false;
1622     }
1623
1624     n = a->imm >> 1;
1625
1626     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1627         /*
1628          * UNPREDICTABLE cases for bad immediates: we choose to
1629          * UNDEF to avoid generating huge numbers of TCG ops
1630          */
1631         return false;
1632     }
1633     if (a->rn == 15 && a->w) {
1634         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1635         return false;
1636     }
1637
1638     /* UNDEF accesses to D16-D31 if they don't exist */
1639     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1640         return false;
1641     }
1642
1643     if (!vfp_access_check(s)) {
1644         return true;
1645     }
1646
1647     /* For thumb, use of PC is UNPREDICTABLE.  */
1648     addr = add_reg_for_lit(s, a->rn, 0);
1649     if (a->p) {
1650         /* pre-decrement */
1651         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1652     }
1653
1654     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1655         /*
1656          * Here 'addr' is the lowest address we will store to,
1657          * and is either the old SP (if post-increment) or
1658          * the new SP (if pre-decrement). For post-increment
1659          * where the old value is below the limit and the new
1660          * value is above, it is UNKNOWN whether the limit check
1661          * triggers; we choose to trigger.
1662          */
1663         gen_helper_v8m_stackcheck(cpu_env, addr);
1664     }
1665
1666     offset = 8;
1667     tmp = tcg_temp_new_i64();
1668     for (i = 0; i < n; i++) {
1669         if (a->l) {
1670             /* load */
1671             gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1672             vfp_store_reg64(tmp, a->vd + i);
1673         } else {
1674             /* store */
1675             vfp_load_reg64(tmp, a->vd + i);
1676             gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1677         }
1678         tcg_gen_addi_i32(addr, addr, offset);
1679     }
1680     tcg_temp_free_i64(tmp);
1681     if (a->w) {
1682         /* writeback */
1683         if (a->p) {
1684             offset = -offset * n;
1685         } else if (a->imm & 1) {
1686             offset = 4;
1687         } else {
1688             offset = 0;
1689         }
1690
1691         if (offset != 0) {
1692             tcg_gen_addi_i32(addr, addr, offset);
1693         }
1694         store_reg(s, a->rn, addr);
1695     } else {
1696         tcg_temp_free_i32(addr);
1697     }
1698
1699     return true;
1700 }
1701
1702 /*
1703  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1704  * The callback should emit code to write a value to vd. If
1705  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1706  * will contain the old value of the relevant VFP register;
1707  * otherwise it must be written to only.
1708  */
1709 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1710                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1711 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1712                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1713
1714 /*
1715  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1716  * The callback should emit code to write a value to vd (which
1717  * should be written to only).
1718  */
1719 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1720 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1721
1722 /*
1723  * Return true if the specified S reg is in a scalar bank
1724  * (ie if it is s0..s7)
1725  */
1726 static inline bool vfp_sreg_is_scalar(int reg)
1727 {
1728     return (reg & 0x18) == 0;
1729 }
1730
1731 /*
1732  * Return true if the specified D reg is in a scalar bank
1733  * (ie if it is d0..d3 or d16..d19)
1734  */
1735 static inline bool vfp_dreg_is_scalar(int reg)
1736 {
1737     return (reg & 0xc) == 0;
1738 }
1739
1740 /*
1741  * Advance the S reg number forwards by delta within its bank
1742  * (ie increment the low 3 bits but leave the rest the same)
1743  */
1744 static inline int vfp_advance_sreg(int reg, int delta)
1745 {
1746     return ((reg + delta) & 0x7) | (reg & ~0x7);
1747 }
1748
1749 /*
1750  * Advance the D reg number forwards by delta within its bank
1751  * (ie increment the low 2 bits but leave the rest the same)
1752  */
1753 static inline int vfp_advance_dreg(int reg, int delta)
1754 {
1755     return ((reg + delta) & 0x3) | (reg & ~0x3);
1756 }
1757
1758 /*
1759  * Perform a 3-operand VFP data processing instruction. fn is the
1760  * callback to do the actual operation; this function deals with the
1761  * code to handle looping around for VFP vector processing.
1762  */
1763 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1764                           int vd, int vn, int vm, bool reads_vd)
1765 {
1766     uint32_t delta_m = 0;
1767     uint32_t delta_d = 0;
1768     int veclen = s->vec_len;
1769     TCGv_i32 f0, f1, fd;
1770     TCGv_ptr fpst;
1771
1772     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1773         return false;
1774     }
1775
1776     if (!dc_isar_feature(aa32_fpshvec, s) &&
1777         (veclen != 0 || s->vec_stride != 0)) {
1778         return false;
1779     }
1780
1781     if (!vfp_access_check(s)) {
1782         return true;
1783     }
1784
1785     if (veclen > 0) {
1786         /* Figure out what type of vector operation this is.  */
1787         if (vfp_sreg_is_scalar(vd)) {
1788             /* scalar */
1789             veclen = 0;
1790         } else {
1791             delta_d = s->vec_stride + 1;
1792
1793             if (vfp_sreg_is_scalar(vm)) {
1794                 /* mixed scalar/vector */
1795                 delta_m = 0;
1796             } else {
1797                 /* vector */
1798                 delta_m = delta_d;
1799             }
1800         }
1801     }
1802
1803     f0 = tcg_temp_new_i32();
1804     f1 = tcg_temp_new_i32();
1805     fd = tcg_temp_new_i32();
1806     fpst = fpstatus_ptr(FPST_FPCR);
1807
1808     vfp_load_reg32(f0, vn);
1809     vfp_load_reg32(f1, vm);
1810
1811     for (;;) {
1812         if (reads_vd) {
1813             vfp_load_reg32(fd, vd);
1814         }
1815         fn(fd, f0, f1, fpst);
1816         vfp_store_reg32(fd, vd);
1817
1818         if (veclen == 0) {
1819             break;
1820         }
1821
1822         /* Set up the operands for the next iteration */
1823         veclen--;
1824         vd = vfp_advance_sreg(vd, delta_d);
1825         vn = vfp_advance_sreg(vn, delta_d);
1826         vfp_load_reg32(f0, vn);
1827         if (delta_m) {
1828             vm = vfp_advance_sreg(vm, delta_m);
1829             vfp_load_reg32(f1, vm);
1830         }
1831     }
1832
1833     tcg_temp_free_i32(f0);
1834     tcg_temp_free_i32(f1);
1835     tcg_temp_free_i32(fd);
1836     tcg_temp_free_ptr(fpst);
1837
1838     return true;
1839 }
1840
1841 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1842                           int vd, int vn, int vm, bool reads_vd)
1843 {
1844     /*
1845      * Do a half-precision operation. Functionally this is
1846      * the same as do_vfp_3op_sp(), except:
1847      *  - it uses the FPST_FPCR_F16
1848      *  - it doesn't need the VFP vector handling (fp16 is a
1849      *    v8 feature, and in v8 VFP vectors don't exist)
1850      *  - it does the aa32_fp16_arith feature test
1851      */
1852     TCGv_i32 f0, f1, fd;
1853     TCGv_ptr fpst;
1854
1855     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1856         return false;
1857     }
1858
1859     if (s->vec_len != 0 || s->vec_stride != 0) {
1860         return false;
1861     }
1862
1863     if (!vfp_access_check(s)) {
1864         return true;
1865     }
1866
1867     f0 = tcg_temp_new_i32();
1868     f1 = tcg_temp_new_i32();
1869     fd = tcg_temp_new_i32();
1870     fpst = fpstatus_ptr(FPST_FPCR_F16);
1871
1872     vfp_load_reg32(f0, vn);
1873     vfp_load_reg32(f1, vm);
1874
1875     if (reads_vd) {
1876         vfp_load_reg32(fd, vd);
1877     }
1878     fn(fd, f0, f1, fpst);
1879     vfp_store_reg32(fd, vd);
1880
1881     tcg_temp_free_i32(f0);
1882     tcg_temp_free_i32(f1);
1883     tcg_temp_free_i32(fd);
1884     tcg_temp_free_ptr(fpst);
1885
1886     return true;
1887 }
1888
1889 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1890                           int vd, int vn, int vm, bool reads_vd)
1891 {
1892     uint32_t delta_m = 0;
1893     uint32_t delta_d = 0;
1894     int veclen = s->vec_len;
1895     TCGv_i64 f0, f1, fd;
1896     TCGv_ptr fpst;
1897
1898     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1899         return false;
1900     }
1901
1902     /* UNDEF accesses to D16-D31 if they don't exist */
1903     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1904         return false;
1905     }
1906
1907     if (!dc_isar_feature(aa32_fpshvec, s) &&
1908         (veclen != 0 || s->vec_stride != 0)) {
1909         return false;
1910     }
1911
1912     if (!vfp_access_check(s)) {
1913         return true;
1914     }
1915
1916     if (veclen > 0) {
1917         /* Figure out what type of vector operation this is.  */
1918         if (vfp_dreg_is_scalar(vd)) {
1919             /* scalar */
1920             veclen = 0;
1921         } else {
1922             delta_d = (s->vec_stride >> 1) + 1;
1923
1924             if (vfp_dreg_is_scalar(vm)) {
1925                 /* mixed scalar/vector */
1926                 delta_m = 0;
1927             } else {
1928                 /* vector */
1929                 delta_m = delta_d;
1930             }
1931         }
1932     }
1933
1934     f0 = tcg_temp_new_i64();
1935     f1 = tcg_temp_new_i64();
1936     fd = tcg_temp_new_i64();
1937     fpst = fpstatus_ptr(FPST_FPCR);
1938
1939     vfp_load_reg64(f0, vn);
1940     vfp_load_reg64(f1, vm);
1941
1942     for (;;) {
1943         if (reads_vd) {
1944             vfp_load_reg64(fd, vd);
1945         }
1946         fn(fd, f0, f1, fpst);
1947         vfp_store_reg64(fd, vd);
1948
1949         if (veclen == 0) {
1950             break;
1951         }
1952         /* Set up the operands for the next iteration */
1953         veclen--;
1954         vd = vfp_advance_dreg(vd, delta_d);
1955         vn = vfp_advance_dreg(vn, delta_d);
1956         vfp_load_reg64(f0, vn);
1957         if (delta_m) {
1958             vm = vfp_advance_dreg(vm, delta_m);
1959             vfp_load_reg64(f1, vm);
1960         }
1961     }
1962
1963     tcg_temp_free_i64(f0);
1964     tcg_temp_free_i64(f1);
1965     tcg_temp_free_i64(fd);
1966     tcg_temp_free_ptr(fpst);
1967
1968     return true;
1969 }
1970
1971 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1972 {
1973     uint32_t delta_m = 0;
1974     uint32_t delta_d = 0;
1975     int veclen = s->vec_len;
1976     TCGv_i32 f0, fd;
1977
1978     /* Note that the caller must check the aa32_fpsp_v2 feature. */
1979
1980     if (!dc_isar_feature(aa32_fpshvec, s) &&
1981         (veclen != 0 || s->vec_stride != 0)) {
1982         return false;
1983     }
1984
1985     if (!vfp_access_check(s)) {
1986         return true;
1987     }
1988
1989     if (veclen > 0) {
1990         /* Figure out what type of vector operation this is.  */
1991         if (vfp_sreg_is_scalar(vd)) {
1992             /* scalar */
1993             veclen = 0;
1994         } else {
1995             delta_d = s->vec_stride + 1;
1996
1997             if (vfp_sreg_is_scalar(vm)) {
1998                 /* mixed scalar/vector */
1999                 delta_m = 0;
2000             } else {
2001                 /* vector */
2002                 delta_m = delta_d;
2003             }
2004         }
2005     }
2006
2007     f0 = tcg_temp_new_i32();
2008     fd = tcg_temp_new_i32();
2009
2010     vfp_load_reg32(f0, vm);
2011
2012     for (;;) {
2013         fn(fd, f0);
2014         vfp_store_reg32(fd, vd);
2015
2016         if (veclen == 0) {
2017             break;
2018         }
2019
2020         if (delta_m == 0) {
2021             /* single source one-many */
2022             while (veclen--) {
2023                 vd = vfp_advance_sreg(vd, delta_d);
2024                 vfp_store_reg32(fd, vd);
2025             }
2026             break;
2027         }
2028
2029         /* Set up the operands for the next iteration */
2030         veclen--;
2031         vd = vfp_advance_sreg(vd, delta_d);
2032         vm = vfp_advance_sreg(vm, delta_m);
2033         vfp_load_reg32(f0, vm);
2034     }
2035
2036     tcg_temp_free_i32(f0);
2037     tcg_temp_free_i32(fd);
2038
2039     return true;
2040 }
2041
2042 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
2043 {
2044     /*
2045      * Do a half-precision operation. Functionally this is
2046      * the same as do_vfp_2op_sp(), except:
2047      *  - it doesn't need the VFP vector handling (fp16 is a
2048      *    v8 feature, and in v8 VFP vectors don't exist)
2049      *  - it does the aa32_fp16_arith feature test
2050      */
2051     TCGv_i32 f0;
2052
2053     /* Note that the caller must check the aa32_fp16_arith feature */
2054
2055     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2056         return false;
2057     }
2058
2059     if (s->vec_len != 0 || s->vec_stride != 0) {
2060         return false;
2061     }
2062
2063     if (!vfp_access_check(s)) {
2064         return true;
2065     }
2066
2067     f0 = tcg_temp_new_i32();
2068     vfp_load_reg32(f0, vm);
2069     fn(f0, f0);
2070     vfp_store_reg32(f0, vd);
2071     tcg_temp_free_i32(f0);
2072
2073     return true;
2074 }
2075
2076 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
2077 {
2078     uint32_t delta_m = 0;
2079     uint32_t delta_d = 0;
2080     int veclen = s->vec_len;
2081     TCGv_i64 f0, fd;
2082
2083     /* Note that the caller must check the aa32_fpdp_v2 feature. */
2084
2085     /* UNDEF accesses to D16-D31 if they don't exist */
2086     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
2087         return false;
2088     }
2089
2090     if (!dc_isar_feature(aa32_fpshvec, s) &&
2091         (veclen != 0 || s->vec_stride != 0)) {
2092         return false;
2093     }
2094
2095     if (!vfp_access_check(s)) {
2096         return true;
2097     }
2098
2099     if (veclen > 0) {
2100         /* Figure out what type of vector operation this is.  */
2101         if (vfp_dreg_is_scalar(vd)) {
2102             /* scalar */
2103             veclen = 0;
2104         } else {
2105             delta_d = (s->vec_stride >> 1) + 1;
2106
2107             if (vfp_dreg_is_scalar(vm)) {
2108                 /* mixed scalar/vector */
2109                 delta_m = 0;
2110             } else {
2111                 /* vector */
2112                 delta_m = delta_d;
2113             }
2114         }
2115     }
2116
2117     f0 = tcg_temp_new_i64();
2118     fd = tcg_temp_new_i64();
2119
2120     vfp_load_reg64(f0, vm);
2121
2122     for (;;) {
2123         fn(fd, f0);
2124         vfp_store_reg64(fd, vd);
2125
2126         if (veclen == 0) {
2127             break;
2128         }
2129
2130         if (delta_m == 0) {
2131             /* single source one-many */
2132             while (veclen--) {
2133                 vd = vfp_advance_dreg(vd, delta_d);
2134                 vfp_store_reg64(fd, vd);
2135             }
2136             break;
2137         }
2138
2139         /* Set up the operands for the next iteration */
2140         veclen--;
2141         vd = vfp_advance_dreg(vd, delta_d);
2142         vd = vfp_advance_dreg(vm, delta_m);
2143         vfp_load_reg64(f0, vm);
2144     }
2145
2146     tcg_temp_free_i64(f0);
2147     tcg_temp_free_i64(fd);
2148
2149     return true;
2150 }
2151
2152 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2153 {
2154     /* Note that order of inputs to the add matters for NaNs */
2155     TCGv_i32 tmp = tcg_temp_new_i32();
2156
2157     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2158     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2159     tcg_temp_free_i32(tmp);
2160 }
2161
2162 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
2163 {
2164     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
2165 }
2166
2167 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2168 {
2169     /* Note that order of inputs to the add matters for NaNs */
2170     TCGv_i32 tmp = tcg_temp_new_i32();
2171
2172     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2173     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2174     tcg_temp_free_i32(tmp);
2175 }
2176
2177 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
2178 {
2179     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
2180 }
2181
2182 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2183 {
2184     /* Note that order of inputs to the add matters for NaNs */
2185     TCGv_i64 tmp = tcg_temp_new_i64();
2186
2187     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2188     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2189     tcg_temp_free_i64(tmp);
2190 }
2191
2192 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
2193 {
2194     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
2195 }
2196
2197 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2198 {
2199     /*
2200      * VMLS: vd = vd + -(vn * vm)
2201      * Note that order of inputs to the add matters for NaNs.
2202      */
2203     TCGv_i32 tmp = tcg_temp_new_i32();
2204
2205     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2206     gen_helper_vfp_negh(tmp, tmp);
2207     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2208     tcg_temp_free_i32(tmp);
2209 }
2210
2211 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
2212 {
2213     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
2214 }
2215
2216 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2217 {
2218     /*
2219      * VMLS: vd = vd + -(vn * vm)
2220      * Note that order of inputs to the add matters for NaNs.
2221      */
2222     TCGv_i32 tmp = tcg_temp_new_i32();
2223
2224     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2225     gen_helper_vfp_negs(tmp, tmp);
2226     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2227     tcg_temp_free_i32(tmp);
2228 }
2229
2230 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
2231 {
2232     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
2233 }
2234
2235 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2236 {
2237     /*
2238      * VMLS: vd = vd + -(vn * vm)
2239      * Note that order of inputs to the add matters for NaNs.
2240      */
2241     TCGv_i64 tmp = tcg_temp_new_i64();
2242
2243     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2244     gen_helper_vfp_negd(tmp, tmp);
2245     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2246     tcg_temp_free_i64(tmp);
2247 }
2248
2249 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
2250 {
2251     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
2252 }
2253
2254 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2255 {
2256     /*
2257      * VNMLS: -fd + (fn * fm)
2258      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2259      * plausible looking simplifications because this will give wrong results
2260      * for NaNs.
2261      */
2262     TCGv_i32 tmp = tcg_temp_new_i32();
2263
2264     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2265     gen_helper_vfp_negh(vd, vd);
2266     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2267     tcg_temp_free_i32(tmp);
2268 }
2269
2270 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
2271 {
2272     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
2273 }
2274
2275 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2276 {
2277     /*
2278      * VNMLS: -fd + (fn * fm)
2279      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2280      * plausible looking simplifications because this will give wrong results
2281      * for NaNs.
2282      */
2283     TCGv_i32 tmp = tcg_temp_new_i32();
2284
2285     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2286     gen_helper_vfp_negs(vd, vd);
2287     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2288     tcg_temp_free_i32(tmp);
2289 }
2290
2291 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
2292 {
2293     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
2294 }
2295
2296 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2297 {
2298     /*
2299      * VNMLS: -fd + (fn * fm)
2300      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2301      * plausible looking simplifications because this will give wrong results
2302      * for NaNs.
2303      */
2304     TCGv_i64 tmp = tcg_temp_new_i64();
2305
2306     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2307     gen_helper_vfp_negd(vd, vd);
2308     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2309     tcg_temp_free_i64(tmp);
2310 }
2311
2312 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
2313 {
2314     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
2315 }
2316
2317 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2318 {
2319     /* VNMLA: -fd + -(fn * fm) */
2320     TCGv_i32 tmp = tcg_temp_new_i32();
2321
2322     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2323     gen_helper_vfp_negh(tmp, tmp);
2324     gen_helper_vfp_negh(vd, vd);
2325     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2326     tcg_temp_free_i32(tmp);
2327 }
2328
2329 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
2330 {
2331     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
2332 }
2333
2334 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2335 {
2336     /* VNMLA: -fd + -(fn * fm) */
2337     TCGv_i32 tmp = tcg_temp_new_i32();
2338
2339     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2340     gen_helper_vfp_negs(tmp, tmp);
2341     gen_helper_vfp_negs(vd, vd);
2342     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2343     tcg_temp_free_i32(tmp);
2344 }
2345
2346 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2347 {
2348     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2349 }
2350
2351 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2352 {
2353     /* VNMLA: -fd + (fn * fm) */
2354     TCGv_i64 tmp = tcg_temp_new_i64();
2355
2356     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2357     gen_helper_vfp_negd(tmp, tmp);
2358     gen_helper_vfp_negd(vd, vd);
2359     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2360     tcg_temp_free_i64(tmp);
2361 }
2362
2363 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2364 {
2365     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2366 }
2367
2368 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2369 {
2370     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2371 }
2372
2373 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2374 {
2375     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2376 }
2377
2378 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2379 {
2380     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2381 }
2382
2383 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2384 {
2385     /* VNMUL: -(fn * fm) */
2386     gen_helper_vfp_mulh(vd, vn, vm, fpst);
2387     gen_helper_vfp_negh(vd, vd);
2388 }
2389
2390 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2391 {
2392     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2393 }
2394
2395 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2396 {
2397     /* VNMUL: -(fn * fm) */
2398     gen_helper_vfp_muls(vd, vn, vm, fpst);
2399     gen_helper_vfp_negs(vd, vd);
2400 }
2401
2402 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2403 {
2404     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2405 }
2406
2407 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2408 {
2409     /* VNMUL: -(fn * fm) */
2410     gen_helper_vfp_muld(vd, vn, vm, fpst);
2411     gen_helper_vfp_negd(vd, vd);
2412 }
2413
2414 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2415 {
2416     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2417 }
2418
2419 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2420 {
2421     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2422 }
2423
2424 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2425 {
2426     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2427 }
2428
2429 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2430 {
2431     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2432 }
2433
2434 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2435 {
2436     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2437 }
2438
2439 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2440 {
2441     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2442 }
2443
2444 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2445 {
2446     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2447 }
2448
2449 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2450 {
2451     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2452 }
2453
2454 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2455 {
2456     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2457 }
2458
2459 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2460 {
2461     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2462 }
2463
2464 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2465 {
2466     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2467         return false;
2468     }
2469     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2470                          a->vd, a->vn, a->vm, false);
2471 }
2472
2473 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2474 {
2475     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2476         return false;
2477     }
2478     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2479                          a->vd, a->vn, a->vm, false);
2480 }
2481
2482 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2483 {
2484     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2485         return false;
2486     }
2487     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2488                          a->vd, a->vn, a->vm, false);
2489 }
2490
2491 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2492 {
2493     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2494         return false;
2495     }
2496     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2497                          a->vd, a->vn, a->vm, false);
2498 }
2499
2500 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2501 {
2502     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2503         return false;
2504     }
2505     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2506                          a->vd, a->vn, a->vm, false);
2507 }
2508
2509 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2510 {
2511     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2512         return false;
2513     }
2514     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2515                          a->vd, a->vn, a->vm, false);
2516 }
2517
2518 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2519 {
2520     /*
2521      * VFNMA : fd = muladd(-fd,  fn, fm)
2522      * VFNMS : fd = muladd(-fd, -fn, fm)
2523      * VFMA  : fd = muladd( fd,  fn, fm)
2524      * VFMS  : fd = muladd( fd, -fn, fm)
2525      *
2526      * These are fused multiply-add, and must be done as one floating
2527      * point operation with no rounding between the multiplication and
2528      * addition steps.  NB that doing the negations here as separate
2529      * steps is correct : an input NaN should come out with its sign
2530      * bit flipped if it is a negated-input.
2531      */
2532     TCGv_ptr fpst;
2533     TCGv_i32 vn, vm, vd;
2534
2535     /*
2536      * Present in VFPv4 only, and only with the FP16 extension.
2537      * Note that we can't rely on the SIMDFMAC check alone, because
2538      * in a Neon-no-VFP core that ID register field will be non-zero.
2539      */
2540     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2541         !dc_isar_feature(aa32_simdfmac, s) ||
2542         !dc_isar_feature(aa32_fpsp_v2, s)) {
2543         return false;
2544     }
2545
2546     if (s->vec_len != 0 || s->vec_stride != 0) {
2547         return false;
2548     }
2549
2550     if (!vfp_access_check(s)) {
2551         return true;
2552     }
2553
2554     vn = tcg_temp_new_i32();
2555     vm = tcg_temp_new_i32();
2556     vd = tcg_temp_new_i32();
2557
2558     vfp_load_reg32(vn, a->vn);
2559     vfp_load_reg32(vm, a->vm);
2560     if (neg_n) {
2561         /* VFNMS, VFMS */
2562         gen_helper_vfp_negh(vn, vn);
2563     }
2564     vfp_load_reg32(vd, a->vd);
2565     if (neg_d) {
2566         /* VFNMA, VFNMS */
2567         gen_helper_vfp_negh(vd, vd);
2568     }
2569     fpst = fpstatus_ptr(FPST_FPCR_F16);
2570     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2571     vfp_store_reg32(vd, a->vd);
2572
2573     tcg_temp_free_ptr(fpst);
2574     tcg_temp_free_i32(vn);
2575     tcg_temp_free_i32(vm);
2576     tcg_temp_free_i32(vd);
2577
2578     return true;
2579 }
2580
2581 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2582 {
2583     /*
2584      * VFNMA : fd = muladd(-fd,  fn, fm)
2585      * VFNMS : fd = muladd(-fd, -fn, fm)
2586      * VFMA  : fd = muladd( fd,  fn, fm)
2587      * VFMS  : fd = muladd( fd, -fn, fm)
2588      *
2589      * These are fused multiply-add, and must be done as one floating
2590      * point operation with no rounding between the multiplication and
2591      * addition steps.  NB that doing the negations here as separate
2592      * steps is correct : an input NaN should come out with its sign
2593      * bit flipped if it is a negated-input.
2594      */
2595     TCGv_ptr fpst;
2596     TCGv_i32 vn, vm, vd;
2597
2598     /*
2599      * Present in VFPv4 only.
2600      * Note that we can't rely on the SIMDFMAC check alone, because
2601      * in a Neon-no-VFP core that ID register field will be non-zero.
2602      */
2603     if (!dc_isar_feature(aa32_simdfmac, s) ||
2604         !dc_isar_feature(aa32_fpsp_v2, s)) {
2605         return false;
2606     }
2607     /*
2608      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2609      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2610      */
2611     if (s->vec_len != 0 || s->vec_stride != 0) {
2612         return false;
2613     }
2614
2615     if (!vfp_access_check(s)) {
2616         return true;
2617     }
2618
2619     vn = tcg_temp_new_i32();
2620     vm = tcg_temp_new_i32();
2621     vd = tcg_temp_new_i32();
2622
2623     vfp_load_reg32(vn, a->vn);
2624     vfp_load_reg32(vm, a->vm);
2625     if (neg_n) {
2626         /* VFNMS, VFMS */
2627         gen_helper_vfp_negs(vn, vn);
2628     }
2629     vfp_load_reg32(vd, a->vd);
2630     if (neg_d) {
2631         /* VFNMA, VFNMS */
2632         gen_helper_vfp_negs(vd, vd);
2633     }
2634     fpst = fpstatus_ptr(FPST_FPCR);
2635     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2636     vfp_store_reg32(vd, a->vd);
2637
2638     tcg_temp_free_ptr(fpst);
2639     tcg_temp_free_i32(vn);
2640     tcg_temp_free_i32(vm);
2641     tcg_temp_free_i32(vd);
2642
2643     return true;
2644 }
2645
2646 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2647 {
2648     /*
2649      * VFNMA : fd = muladd(-fd,  fn, fm)
2650      * VFNMS : fd = muladd(-fd, -fn, fm)
2651      * VFMA  : fd = muladd( fd,  fn, fm)
2652      * VFMS  : fd = muladd( fd, -fn, fm)
2653      *
2654      * These are fused multiply-add, and must be done as one floating
2655      * point operation with no rounding between the multiplication and
2656      * addition steps.  NB that doing the negations here as separate
2657      * steps is correct : an input NaN should come out with its sign
2658      * bit flipped if it is a negated-input.
2659      */
2660     TCGv_ptr fpst;
2661     TCGv_i64 vn, vm, vd;
2662
2663     /*
2664      * Present in VFPv4 only.
2665      * Note that we can't rely on the SIMDFMAC check alone, because
2666      * in a Neon-no-VFP core that ID register field will be non-zero.
2667      */
2668     if (!dc_isar_feature(aa32_simdfmac, s) ||
2669         !dc_isar_feature(aa32_fpdp_v2, s)) {
2670         return false;
2671     }
2672     /*
2673      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2674      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2675      */
2676     if (s->vec_len != 0 || s->vec_stride != 0) {
2677         return false;
2678     }
2679
2680     /* UNDEF accesses to D16-D31 if they don't exist. */
2681     if (!dc_isar_feature(aa32_simd_r32, s) &&
2682         ((a->vd | a->vn | a->vm) & 0x10)) {
2683         return false;
2684     }
2685
2686     if (!vfp_access_check(s)) {
2687         return true;
2688     }
2689
2690     vn = tcg_temp_new_i64();
2691     vm = tcg_temp_new_i64();
2692     vd = tcg_temp_new_i64();
2693
2694     vfp_load_reg64(vn, a->vn);
2695     vfp_load_reg64(vm, a->vm);
2696     if (neg_n) {
2697         /* VFNMS, VFMS */
2698         gen_helper_vfp_negd(vn, vn);
2699     }
2700     vfp_load_reg64(vd, a->vd);
2701     if (neg_d) {
2702         /* VFNMA, VFNMS */
2703         gen_helper_vfp_negd(vd, vd);
2704     }
2705     fpst = fpstatus_ptr(FPST_FPCR);
2706     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2707     vfp_store_reg64(vd, a->vd);
2708
2709     tcg_temp_free_ptr(fpst);
2710     tcg_temp_free_i64(vn);
2711     tcg_temp_free_i64(vm);
2712     tcg_temp_free_i64(vd);
2713
2714     return true;
2715 }
2716
2717 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2718     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2719                                       arg_##INSN##_##PREC *a)           \
2720     {                                                                   \
2721         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2722     }
2723
2724 #define MAKE_VFM_TRANS_FNS(PREC) \
2725     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2726     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2727     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2728     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2729
2730 MAKE_VFM_TRANS_FNS(hp)
2731 MAKE_VFM_TRANS_FNS(sp)
2732 MAKE_VFM_TRANS_FNS(dp)
2733
2734 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2735 {
2736     TCGv_i32 fd;
2737
2738     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2739         return false;
2740     }
2741
2742     if (s->vec_len != 0 || s->vec_stride != 0) {
2743         return false;
2744     }
2745
2746     if (!vfp_access_check(s)) {
2747         return true;
2748     }
2749
2750     fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2751     vfp_store_reg32(fd, a->vd);
2752     tcg_temp_free_i32(fd);
2753     return true;
2754 }
2755
2756 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2757 {
2758     uint32_t delta_d = 0;
2759     int veclen = s->vec_len;
2760     TCGv_i32 fd;
2761     uint32_t vd;
2762
2763     vd = a->vd;
2764
2765     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2766         return false;
2767     }
2768
2769     if (!dc_isar_feature(aa32_fpshvec, s) &&
2770         (veclen != 0 || s->vec_stride != 0)) {
2771         return false;
2772     }
2773
2774     if (!vfp_access_check(s)) {
2775         return true;
2776     }
2777
2778     if (veclen > 0) {
2779         /* Figure out what type of vector operation this is.  */
2780         if (vfp_sreg_is_scalar(vd)) {
2781             /* scalar */
2782             veclen = 0;
2783         } else {
2784             delta_d = s->vec_stride + 1;
2785         }
2786     }
2787
2788     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2789
2790     for (;;) {
2791         vfp_store_reg32(fd, vd);
2792
2793         if (veclen == 0) {
2794             break;
2795         }
2796
2797         /* Set up the operands for the next iteration */
2798         veclen--;
2799         vd = vfp_advance_sreg(vd, delta_d);
2800     }
2801
2802     tcg_temp_free_i32(fd);
2803     return true;
2804 }
2805
2806 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2807 {
2808     uint32_t delta_d = 0;
2809     int veclen = s->vec_len;
2810     TCGv_i64 fd;
2811     uint32_t vd;
2812
2813     vd = a->vd;
2814
2815     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2816         return false;
2817     }
2818
2819     /* UNDEF accesses to D16-D31 if they don't exist. */
2820     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2821         return false;
2822     }
2823
2824     if (!dc_isar_feature(aa32_fpshvec, s) &&
2825         (veclen != 0 || s->vec_stride != 0)) {
2826         return false;
2827     }
2828
2829     if (!vfp_access_check(s)) {
2830         return true;
2831     }
2832
2833     if (veclen > 0) {
2834         /* Figure out what type of vector operation this is.  */
2835         if (vfp_dreg_is_scalar(vd)) {
2836             /* scalar */
2837             veclen = 0;
2838         } else {
2839             delta_d = (s->vec_stride >> 1) + 1;
2840         }
2841     }
2842
2843     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2844
2845     for (;;) {
2846         vfp_store_reg64(fd, vd);
2847
2848         if (veclen == 0) {
2849             break;
2850         }
2851
2852         /* Set up the operands for the next iteration */
2853         veclen--;
2854         vd = vfp_advance_dreg(vd, delta_d);
2855     }
2856
2857     tcg_temp_free_i64(fd);
2858     return true;
2859 }
2860
2861 #define DO_VFP_2OP(INSN, PREC, FN, CHECK)                       \
2862     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2863                                       arg_##INSN##_##PREC *a)   \
2864     {                                                           \
2865         if (!dc_isar_feature(CHECK, s)) {                       \
2866             return false;                                       \
2867         }                                                       \
2868         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2869     }
2870
2871 #define DO_VFP_VMOV(INSN, PREC, FN)                             \
2872     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2873                                       arg_##INSN##_##PREC *a)   \
2874     {                                                           \
2875         if (!dc_isar_feature(aa32_fp##PREC##_v2, s) &&          \
2876             !dc_isar_feature(aa32_mve, s)) {                    \
2877             return false;                                       \
2878         }                                                       \
2879         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2880     }
2881
2882 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
2883 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
2884
2885 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
2886 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
2887 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
2888
2889 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
2890 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
2891 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
2892
2893 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2894 {
2895     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2896 }
2897
2898 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2899 {
2900     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2901 }
2902
2903 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2904 {
2905     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2906 }
2907
2908 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
2909 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
2910 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
2911
2912 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2913 {
2914     TCGv_i32 vd, vm;
2915
2916     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2917         return false;
2918     }
2919
2920     /* Vm/M bits must be zero for the Z variant */
2921     if (a->z && a->vm != 0) {
2922         return false;
2923     }
2924
2925     if (!vfp_access_check(s)) {
2926         return true;
2927     }
2928
2929     vd = tcg_temp_new_i32();
2930     vm = tcg_temp_new_i32();
2931
2932     vfp_load_reg32(vd, a->vd);
2933     if (a->z) {
2934         tcg_gen_movi_i32(vm, 0);
2935     } else {
2936         vfp_load_reg32(vm, a->vm);
2937     }
2938
2939     if (a->e) {
2940         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2941     } else {
2942         gen_helper_vfp_cmph(vd, vm, cpu_env);
2943     }
2944
2945     tcg_temp_free_i32(vd);
2946     tcg_temp_free_i32(vm);
2947
2948     return true;
2949 }
2950
2951 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2952 {
2953     TCGv_i32 vd, vm;
2954
2955     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2956         return false;
2957     }
2958
2959     /* Vm/M bits must be zero for the Z variant */
2960     if (a->z && a->vm != 0) {
2961         return false;
2962     }
2963
2964     if (!vfp_access_check(s)) {
2965         return true;
2966     }
2967
2968     vd = tcg_temp_new_i32();
2969     vm = tcg_temp_new_i32();
2970
2971     vfp_load_reg32(vd, a->vd);
2972     if (a->z) {
2973         tcg_gen_movi_i32(vm, 0);
2974     } else {
2975         vfp_load_reg32(vm, a->vm);
2976     }
2977
2978     if (a->e) {
2979         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2980     } else {
2981         gen_helper_vfp_cmps(vd, vm, cpu_env);
2982     }
2983
2984     tcg_temp_free_i32(vd);
2985     tcg_temp_free_i32(vm);
2986
2987     return true;
2988 }
2989
2990 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2991 {
2992     TCGv_i64 vd, vm;
2993
2994     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2995         return false;
2996     }
2997
2998     /* Vm/M bits must be zero for the Z variant */
2999     if (a->z && a->vm != 0) {
3000         return false;
3001     }
3002
3003     /* UNDEF accesses to D16-D31 if they don't exist. */
3004     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3005         return false;
3006     }
3007
3008     if (!vfp_access_check(s)) {
3009         return true;
3010     }
3011
3012     vd = tcg_temp_new_i64();
3013     vm = tcg_temp_new_i64();
3014
3015     vfp_load_reg64(vd, a->vd);
3016     if (a->z) {
3017         tcg_gen_movi_i64(vm, 0);
3018     } else {
3019         vfp_load_reg64(vm, a->vm);
3020     }
3021
3022     if (a->e) {
3023         gen_helper_vfp_cmped(vd, vm, cpu_env);
3024     } else {
3025         gen_helper_vfp_cmpd(vd, vm, cpu_env);
3026     }
3027
3028     tcg_temp_free_i64(vd);
3029     tcg_temp_free_i64(vm);
3030
3031     return true;
3032 }
3033
3034 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
3035 {
3036     TCGv_ptr fpst;
3037     TCGv_i32 ahp_mode;
3038     TCGv_i32 tmp;
3039
3040     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
3041         return false;
3042     }
3043
3044     if (!vfp_access_check(s)) {
3045         return true;
3046     }
3047
3048     fpst = fpstatus_ptr(FPST_FPCR);
3049     ahp_mode = get_ahp_flag();
3050     tmp = tcg_temp_new_i32();
3051     /* The T bit tells us if we want the low or high 16 bits of Vm */
3052     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
3053     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
3054     vfp_store_reg32(tmp, a->vd);
3055     tcg_temp_free_i32(ahp_mode);
3056     tcg_temp_free_ptr(fpst);
3057     tcg_temp_free_i32(tmp);
3058     return true;
3059 }
3060
3061 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
3062 {
3063     TCGv_ptr fpst;
3064     TCGv_i32 ahp_mode;
3065     TCGv_i32 tmp;
3066     TCGv_i64 vd;
3067
3068     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3069         return false;
3070     }
3071
3072     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3073         return false;
3074     }
3075
3076     /* UNDEF accesses to D16-D31 if they don't exist. */
3077     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
3078         return false;
3079     }
3080
3081     if (!vfp_access_check(s)) {
3082         return true;
3083     }
3084
3085     fpst = fpstatus_ptr(FPST_FPCR);
3086     ahp_mode = get_ahp_flag();
3087     tmp = tcg_temp_new_i32();
3088     /* The T bit tells us if we want the low or high 16 bits of Vm */
3089     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
3090     vd = tcg_temp_new_i64();
3091     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
3092     vfp_store_reg64(vd, a->vd);
3093     tcg_temp_free_i32(ahp_mode);
3094     tcg_temp_free_ptr(fpst);
3095     tcg_temp_free_i32(tmp);
3096     tcg_temp_free_i64(vd);
3097     return true;
3098 }
3099
3100 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
3101 {
3102     TCGv_ptr fpst;
3103     TCGv_i32 tmp;
3104
3105     if (!dc_isar_feature(aa32_bf16, s)) {
3106         return false;
3107     }
3108
3109     if (!vfp_access_check(s)) {
3110         return true;
3111     }
3112
3113     fpst = fpstatus_ptr(FPST_FPCR);
3114     tmp = tcg_temp_new_i32();
3115
3116     vfp_load_reg32(tmp, a->vm);
3117     gen_helper_bfcvt(tmp, tmp, fpst);
3118     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3119     tcg_temp_free_ptr(fpst);
3120     tcg_temp_free_i32(tmp);
3121     return true;
3122 }
3123
3124 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
3125 {
3126     TCGv_ptr fpst;
3127     TCGv_i32 ahp_mode;
3128     TCGv_i32 tmp;
3129
3130     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
3131         return false;
3132     }
3133
3134     if (!vfp_access_check(s)) {
3135         return true;
3136     }
3137
3138     fpst = fpstatus_ptr(FPST_FPCR);
3139     ahp_mode = get_ahp_flag();
3140     tmp = tcg_temp_new_i32();
3141
3142     vfp_load_reg32(tmp, a->vm);
3143     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
3144     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3145     tcg_temp_free_i32(ahp_mode);
3146     tcg_temp_free_ptr(fpst);
3147     tcg_temp_free_i32(tmp);
3148     return true;
3149 }
3150
3151 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
3152 {
3153     TCGv_ptr fpst;
3154     TCGv_i32 ahp_mode;
3155     TCGv_i32 tmp;
3156     TCGv_i64 vm;
3157
3158     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3159         return false;
3160     }
3161
3162     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3163         return false;
3164     }
3165
3166     /* UNDEF accesses to D16-D31 if they don't exist. */
3167     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
3168         return false;
3169     }
3170
3171     if (!vfp_access_check(s)) {
3172         return true;
3173     }
3174
3175     fpst = fpstatus_ptr(FPST_FPCR);
3176     ahp_mode = get_ahp_flag();
3177     tmp = tcg_temp_new_i32();
3178     vm = tcg_temp_new_i64();
3179
3180     vfp_load_reg64(vm, a->vm);
3181     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
3182     tcg_temp_free_i64(vm);
3183     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3184     tcg_temp_free_i32(ahp_mode);
3185     tcg_temp_free_ptr(fpst);
3186     tcg_temp_free_i32(tmp);
3187     return true;
3188 }
3189
3190 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
3191 {
3192     TCGv_ptr fpst;
3193     TCGv_i32 tmp;
3194
3195     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3196         return false;
3197     }
3198
3199     if (!vfp_access_check(s)) {
3200         return true;
3201     }
3202
3203     tmp = tcg_temp_new_i32();
3204     vfp_load_reg32(tmp, a->vm);
3205     fpst = fpstatus_ptr(FPST_FPCR_F16);
3206     gen_helper_rinth(tmp, tmp, fpst);
3207     vfp_store_reg32(tmp, a->vd);
3208     tcg_temp_free_ptr(fpst);
3209     tcg_temp_free_i32(tmp);
3210     return true;
3211 }
3212
3213 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
3214 {
3215     TCGv_ptr fpst;
3216     TCGv_i32 tmp;
3217
3218     if (!dc_isar_feature(aa32_vrint, s)) {
3219         return false;
3220     }
3221
3222     if (!vfp_access_check(s)) {
3223         return true;
3224     }
3225
3226     tmp = tcg_temp_new_i32();
3227     vfp_load_reg32(tmp, a->vm);
3228     fpst = fpstatus_ptr(FPST_FPCR);
3229     gen_helper_rints(tmp, tmp, fpst);
3230     vfp_store_reg32(tmp, a->vd);
3231     tcg_temp_free_ptr(fpst);
3232     tcg_temp_free_i32(tmp);
3233     return true;
3234 }
3235
3236 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
3237 {
3238     TCGv_ptr fpst;
3239     TCGv_i64 tmp;
3240
3241     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3242         return false;
3243     }
3244
3245     if (!dc_isar_feature(aa32_vrint, s)) {
3246         return false;
3247     }
3248
3249     /* UNDEF accesses to D16-D31 if they don't exist. */
3250     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3251         return false;
3252     }
3253
3254     if (!vfp_access_check(s)) {
3255         return true;
3256     }
3257
3258     tmp = tcg_temp_new_i64();
3259     vfp_load_reg64(tmp, a->vm);
3260     fpst = fpstatus_ptr(FPST_FPCR);
3261     gen_helper_rintd(tmp, tmp, fpst);
3262     vfp_store_reg64(tmp, a->vd);
3263     tcg_temp_free_ptr(fpst);
3264     tcg_temp_free_i64(tmp);
3265     return true;
3266 }
3267
3268 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
3269 {
3270     TCGv_ptr fpst;
3271     TCGv_i32 tmp;
3272     TCGv_i32 tcg_rmode;
3273
3274     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3275         return false;
3276     }
3277
3278     if (!vfp_access_check(s)) {
3279         return true;
3280     }
3281
3282     tmp = tcg_temp_new_i32();
3283     vfp_load_reg32(tmp, a->vm);
3284     fpst = fpstatus_ptr(FPST_FPCR_F16);
3285     tcg_rmode = tcg_const_i32(float_round_to_zero);
3286     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3287     gen_helper_rinth(tmp, tmp, fpst);
3288     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3289     vfp_store_reg32(tmp, a->vd);
3290     tcg_temp_free_ptr(fpst);
3291     tcg_temp_free_i32(tcg_rmode);
3292     tcg_temp_free_i32(tmp);
3293     return true;
3294 }
3295
3296 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
3297 {
3298     TCGv_ptr fpst;
3299     TCGv_i32 tmp;
3300     TCGv_i32 tcg_rmode;
3301
3302     if (!dc_isar_feature(aa32_vrint, s)) {
3303         return false;
3304     }
3305
3306     if (!vfp_access_check(s)) {
3307         return true;
3308     }
3309
3310     tmp = tcg_temp_new_i32();
3311     vfp_load_reg32(tmp, a->vm);
3312     fpst = fpstatus_ptr(FPST_FPCR);
3313     tcg_rmode = tcg_const_i32(float_round_to_zero);
3314     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3315     gen_helper_rints(tmp, tmp, fpst);
3316     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3317     vfp_store_reg32(tmp, a->vd);
3318     tcg_temp_free_ptr(fpst);
3319     tcg_temp_free_i32(tcg_rmode);
3320     tcg_temp_free_i32(tmp);
3321     return true;
3322 }
3323
3324 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
3325 {
3326     TCGv_ptr fpst;
3327     TCGv_i64 tmp;
3328     TCGv_i32 tcg_rmode;
3329
3330     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3331         return false;
3332     }
3333
3334     if (!dc_isar_feature(aa32_vrint, s)) {
3335         return false;
3336     }
3337
3338     /* UNDEF accesses to D16-D31 if they don't exist. */
3339     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3340         return false;
3341     }
3342
3343     if (!vfp_access_check(s)) {
3344         return true;
3345     }
3346
3347     tmp = tcg_temp_new_i64();
3348     vfp_load_reg64(tmp, a->vm);
3349     fpst = fpstatus_ptr(FPST_FPCR);
3350     tcg_rmode = tcg_const_i32(float_round_to_zero);
3351     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3352     gen_helper_rintd(tmp, tmp, fpst);
3353     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3354     vfp_store_reg64(tmp, a->vd);
3355     tcg_temp_free_ptr(fpst);
3356     tcg_temp_free_i64(tmp);
3357     tcg_temp_free_i32(tcg_rmode);
3358     return true;
3359 }
3360
3361 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3362 {
3363     TCGv_ptr fpst;
3364     TCGv_i32 tmp;
3365
3366     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3367         return false;
3368     }
3369
3370     if (!vfp_access_check(s)) {
3371         return true;
3372     }
3373
3374     tmp = tcg_temp_new_i32();
3375     vfp_load_reg32(tmp, a->vm);
3376     fpst = fpstatus_ptr(FPST_FPCR_F16);
3377     gen_helper_rinth_exact(tmp, tmp, fpst);
3378     vfp_store_reg32(tmp, a->vd);
3379     tcg_temp_free_ptr(fpst);
3380     tcg_temp_free_i32(tmp);
3381     return true;
3382 }
3383
3384 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3385 {
3386     TCGv_ptr fpst;
3387     TCGv_i32 tmp;
3388
3389     if (!dc_isar_feature(aa32_vrint, s)) {
3390         return false;
3391     }
3392
3393     if (!vfp_access_check(s)) {
3394         return true;
3395     }
3396
3397     tmp = tcg_temp_new_i32();
3398     vfp_load_reg32(tmp, a->vm);
3399     fpst = fpstatus_ptr(FPST_FPCR);
3400     gen_helper_rints_exact(tmp, tmp, fpst);
3401     vfp_store_reg32(tmp, a->vd);
3402     tcg_temp_free_ptr(fpst);
3403     tcg_temp_free_i32(tmp);
3404     return true;
3405 }
3406
3407 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3408 {
3409     TCGv_ptr fpst;
3410     TCGv_i64 tmp;
3411
3412     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3413         return false;
3414     }
3415
3416     if (!dc_isar_feature(aa32_vrint, s)) {
3417         return false;
3418     }
3419
3420     /* UNDEF accesses to D16-D31 if they don't exist. */
3421     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3422         return false;
3423     }
3424
3425     if (!vfp_access_check(s)) {
3426         return true;
3427     }
3428
3429     tmp = tcg_temp_new_i64();
3430     vfp_load_reg64(tmp, a->vm);
3431     fpst = fpstatus_ptr(FPST_FPCR);
3432     gen_helper_rintd_exact(tmp, tmp, fpst);
3433     vfp_store_reg64(tmp, a->vd);
3434     tcg_temp_free_ptr(fpst);
3435     tcg_temp_free_i64(tmp);
3436     return true;
3437 }
3438
3439 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3440 {
3441     TCGv_i64 vd;
3442     TCGv_i32 vm;
3443
3444     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3445         return false;
3446     }
3447
3448     /* UNDEF accesses to D16-D31 if they don't exist. */
3449     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3450         return false;
3451     }
3452
3453     if (!vfp_access_check(s)) {
3454         return true;
3455     }
3456
3457     vm = tcg_temp_new_i32();
3458     vd = tcg_temp_new_i64();
3459     vfp_load_reg32(vm, a->vm);
3460     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3461     vfp_store_reg64(vd, a->vd);
3462     tcg_temp_free_i32(vm);
3463     tcg_temp_free_i64(vd);
3464     return true;
3465 }
3466
3467 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3468 {
3469     TCGv_i64 vm;
3470     TCGv_i32 vd;
3471
3472     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3473         return false;
3474     }
3475
3476     /* UNDEF accesses to D16-D31 if they don't exist. */
3477     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3478         return false;
3479     }
3480
3481     if (!vfp_access_check(s)) {
3482         return true;
3483     }
3484
3485     vd = tcg_temp_new_i32();
3486     vm = tcg_temp_new_i64();
3487     vfp_load_reg64(vm, a->vm);
3488     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3489     vfp_store_reg32(vd, a->vd);
3490     tcg_temp_free_i32(vd);
3491     tcg_temp_free_i64(vm);
3492     return true;
3493 }
3494
3495 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3496 {
3497     TCGv_i32 vm;
3498     TCGv_ptr fpst;
3499
3500     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3501         return false;
3502     }
3503
3504     if (!vfp_access_check(s)) {
3505         return true;
3506     }
3507
3508     vm = tcg_temp_new_i32();
3509     vfp_load_reg32(vm, a->vm);
3510     fpst = fpstatus_ptr(FPST_FPCR_F16);
3511     if (a->s) {
3512         /* i32 -> f16 */
3513         gen_helper_vfp_sitoh(vm, vm, fpst);
3514     } else {
3515         /* u32 -> f16 */
3516         gen_helper_vfp_uitoh(vm, vm, fpst);
3517     }
3518     vfp_store_reg32(vm, a->vd);
3519     tcg_temp_free_i32(vm);
3520     tcg_temp_free_ptr(fpst);
3521     return true;
3522 }
3523
3524 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3525 {
3526     TCGv_i32 vm;
3527     TCGv_ptr fpst;
3528
3529     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3530         return false;
3531     }
3532
3533     if (!vfp_access_check(s)) {
3534         return true;
3535     }
3536
3537     vm = tcg_temp_new_i32();
3538     vfp_load_reg32(vm, a->vm);
3539     fpst = fpstatus_ptr(FPST_FPCR);
3540     if (a->s) {
3541         /* i32 -> f32 */
3542         gen_helper_vfp_sitos(vm, vm, fpst);
3543     } else {
3544         /* u32 -> f32 */
3545         gen_helper_vfp_uitos(vm, vm, fpst);
3546     }
3547     vfp_store_reg32(vm, a->vd);
3548     tcg_temp_free_i32(vm);
3549     tcg_temp_free_ptr(fpst);
3550     return true;
3551 }
3552
3553 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3554 {
3555     TCGv_i32 vm;
3556     TCGv_i64 vd;
3557     TCGv_ptr fpst;
3558
3559     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3560         return false;
3561     }
3562
3563     /* UNDEF accesses to D16-D31 if they don't exist. */
3564     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3565         return false;
3566     }
3567
3568     if (!vfp_access_check(s)) {
3569         return true;
3570     }
3571
3572     vm = tcg_temp_new_i32();
3573     vd = tcg_temp_new_i64();
3574     vfp_load_reg32(vm, a->vm);
3575     fpst = fpstatus_ptr(FPST_FPCR);
3576     if (a->s) {
3577         /* i32 -> f64 */
3578         gen_helper_vfp_sitod(vd, vm, fpst);
3579     } else {
3580         /* u32 -> f64 */
3581         gen_helper_vfp_uitod(vd, vm, fpst);
3582     }
3583     vfp_store_reg64(vd, a->vd);
3584     tcg_temp_free_i32(vm);
3585     tcg_temp_free_i64(vd);
3586     tcg_temp_free_ptr(fpst);
3587     return true;
3588 }
3589
3590 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3591 {
3592     TCGv_i32 vd;
3593     TCGv_i64 vm;
3594
3595     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3596         return false;
3597     }
3598
3599     if (!dc_isar_feature(aa32_jscvt, s)) {
3600         return false;
3601     }
3602
3603     /* UNDEF accesses to D16-D31 if they don't exist. */
3604     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3605         return false;
3606     }
3607
3608     if (!vfp_access_check(s)) {
3609         return true;
3610     }
3611
3612     vm = tcg_temp_new_i64();
3613     vd = tcg_temp_new_i32();
3614     vfp_load_reg64(vm, a->vm);
3615     gen_helper_vjcvt(vd, vm, cpu_env);
3616     vfp_store_reg32(vd, a->vd);
3617     tcg_temp_free_i64(vm);
3618     tcg_temp_free_i32(vd);
3619     return true;
3620 }
3621
3622 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3623 {
3624     TCGv_i32 vd, shift;
3625     TCGv_ptr fpst;
3626     int frac_bits;
3627
3628     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3629         return false;
3630     }
3631
3632     if (!vfp_access_check(s)) {
3633         return true;
3634     }
3635
3636     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3637
3638     vd = tcg_temp_new_i32();
3639     vfp_load_reg32(vd, a->vd);
3640
3641     fpst = fpstatus_ptr(FPST_FPCR_F16);
3642     shift = tcg_const_i32(frac_bits);
3643
3644     /* Switch on op:U:sx bits */
3645     switch (a->opc) {
3646     case 0:
3647         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3648         break;
3649     case 1:
3650         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3651         break;
3652     case 2:
3653         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3654         break;
3655     case 3:
3656         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3657         break;
3658     case 4:
3659         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3660         break;
3661     case 5:
3662         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3663         break;
3664     case 6:
3665         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3666         break;
3667     case 7:
3668         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3669         break;
3670     default:
3671         g_assert_not_reached();
3672     }
3673
3674     vfp_store_reg32(vd, a->vd);
3675     tcg_temp_free_i32(vd);
3676     tcg_temp_free_i32(shift);
3677     tcg_temp_free_ptr(fpst);
3678     return true;
3679 }
3680
3681 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3682 {
3683     TCGv_i32 vd, shift;
3684     TCGv_ptr fpst;
3685     int frac_bits;
3686
3687     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3688         return false;
3689     }
3690
3691     if (!vfp_access_check(s)) {
3692         return true;
3693     }
3694
3695     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3696
3697     vd = tcg_temp_new_i32();
3698     vfp_load_reg32(vd, a->vd);
3699
3700     fpst = fpstatus_ptr(FPST_FPCR);
3701     shift = tcg_const_i32(frac_bits);
3702
3703     /* Switch on op:U:sx bits */
3704     switch (a->opc) {
3705     case 0:
3706         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3707         break;
3708     case 1:
3709         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3710         break;
3711     case 2:
3712         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3713         break;
3714     case 3:
3715         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3716         break;
3717     case 4:
3718         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3719         break;
3720     case 5:
3721         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3722         break;
3723     case 6:
3724         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3725         break;
3726     case 7:
3727         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3728         break;
3729     default:
3730         g_assert_not_reached();
3731     }
3732
3733     vfp_store_reg32(vd, a->vd);
3734     tcg_temp_free_i32(vd);
3735     tcg_temp_free_i32(shift);
3736     tcg_temp_free_ptr(fpst);
3737     return true;
3738 }
3739
3740 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3741 {
3742     TCGv_i64 vd;
3743     TCGv_i32 shift;
3744     TCGv_ptr fpst;
3745     int frac_bits;
3746
3747     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3748         return false;
3749     }
3750
3751     /* UNDEF accesses to D16-D31 if they don't exist. */
3752     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3753         return false;
3754     }
3755
3756     if (!vfp_access_check(s)) {
3757         return true;
3758     }
3759
3760     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3761
3762     vd = tcg_temp_new_i64();
3763     vfp_load_reg64(vd, a->vd);
3764
3765     fpst = fpstatus_ptr(FPST_FPCR);
3766     shift = tcg_const_i32(frac_bits);
3767
3768     /* Switch on op:U:sx bits */
3769     switch (a->opc) {
3770     case 0:
3771         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3772         break;
3773     case 1:
3774         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3775         break;
3776     case 2:
3777         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3778         break;
3779     case 3:
3780         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3781         break;
3782     case 4:
3783         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3784         break;
3785     case 5:
3786         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3787         break;
3788     case 6:
3789         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3790         break;
3791     case 7:
3792         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3793         break;
3794     default:
3795         g_assert_not_reached();
3796     }
3797
3798     vfp_store_reg64(vd, a->vd);
3799     tcg_temp_free_i64(vd);
3800     tcg_temp_free_i32(shift);
3801     tcg_temp_free_ptr(fpst);
3802     return true;
3803 }
3804
3805 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3806 {
3807     TCGv_i32 vm;
3808     TCGv_ptr fpst;
3809
3810     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3811         return false;
3812     }
3813
3814     if (!vfp_access_check(s)) {
3815         return true;
3816     }
3817
3818     fpst = fpstatus_ptr(FPST_FPCR_F16);
3819     vm = tcg_temp_new_i32();
3820     vfp_load_reg32(vm, a->vm);
3821
3822     if (a->s) {
3823         if (a->rz) {
3824             gen_helper_vfp_tosizh(vm, vm, fpst);
3825         } else {
3826             gen_helper_vfp_tosih(vm, vm, fpst);
3827         }
3828     } else {
3829         if (a->rz) {
3830             gen_helper_vfp_touizh(vm, vm, fpst);
3831         } else {
3832             gen_helper_vfp_touih(vm, vm, fpst);
3833         }
3834     }
3835     vfp_store_reg32(vm, a->vd);
3836     tcg_temp_free_i32(vm);
3837     tcg_temp_free_ptr(fpst);
3838     return true;
3839 }
3840
3841 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3842 {
3843     TCGv_i32 vm;
3844     TCGv_ptr fpst;
3845
3846     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3847         return false;
3848     }
3849
3850     if (!vfp_access_check(s)) {
3851         return true;
3852     }
3853
3854     fpst = fpstatus_ptr(FPST_FPCR);
3855     vm = tcg_temp_new_i32();
3856     vfp_load_reg32(vm, a->vm);
3857
3858     if (a->s) {
3859         if (a->rz) {
3860             gen_helper_vfp_tosizs(vm, vm, fpst);
3861         } else {
3862             gen_helper_vfp_tosis(vm, vm, fpst);
3863         }
3864     } else {
3865         if (a->rz) {
3866             gen_helper_vfp_touizs(vm, vm, fpst);
3867         } else {
3868             gen_helper_vfp_touis(vm, vm, fpst);
3869         }
3870     }
3871     vfp_store_reg32(vm, a->vd);
3872     tcg_temp_free_i32(vm);
3873     tcg_temp_free_ptr(fpst);
3874     return true;
3875 }
3876
3877 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3878 {
3879     TCGv_i32 vd;
3880     TCGv_i64 vm;
3881     TCGv_ptr fpst;
3882
3883     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3884         return false;
3885     }
3886
3887     /* UNDEF accesses to D16-D31 if they don't exist. */
3888     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3889         return false;
3890     }
3891
3892     if (!vfp_access_check(s)) {
3893         return true;
3894     }
3895
3896     fpst = fpstatus_ptr(FPST_FPCR);
3897     vm = tcg_temp_new_i64();
3898     vd = tcg_temp_new_i32();
3899     vfp_load_reg64(vm, a->vm);
3900
3901     if (a->s) {
3902         if (a->rz) {
3903             gen_helper_vfp_tosizd(vd, vm, fpst);
3904         } else {
3905             gen_helper_vfp_tosid(vd, vm, fpst);
3906         }
3907     } else {
3908         if (a->rz) {
3909             gen_helper_vfp_touizd(vd, vm, fpst);
3910         } else {
3911             gen_helper_vfp_touid(vd, vm, fpst);
3912         }
3913     }
3914     vfp_store_reg32(vd, a->vd);
3915     tcg_temp_free_i32(vd);
3916     tcg_temp_free_i64(vm);
3917     tcg_temp_free_ptr(fpst);
3918     return true;
3919 }
3920
3921 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3922 {
3923     TCGv_i32 rd, rm;
3924
3925     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3926         return false;
3927     }
3928
3929     if (s->vec_len != 0 || s->vec_stride != 0) {
3930         return false;
3931     }
3932
3933     if (!vfp_access_check(s)) {
3934         return true;
3935     }
3936
3937     /* Insert low half of Vm into high half of Vd */
3938     rm = tcg_temp_new_i32();
3939     rd = tcg_temp_new_i32();
3940     vfp_load_reg32(rm, a->vm);
3941     vfp_load_reg32(rd, a->vd);
3942     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3943     vfp_store_reg32(rd, a->vd);
3944     tcg_temp_free_i32(rm);
3945     tcg_temp_free_i32(rd);
3946     return true;
3947 }
3948
3949 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3950 {
3951     TCGv_i32 rm;
3952
3953     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3954         return false;
3955     }
3956
3957     if (s->vec_len != 0 || s->vec_stride != 0) {
3958         return false;
3959     }
3960
3961     if (!vfp_access_check(s)) {
3962         return true;
3963     }
3964
3965     /* Set Vd to high half of Vm */
3966     rm = tcg_temp_new_i32();
3967     vfp_load_reg32(rm, a->vm);
3968     tcg_gen_shri_i32(rm, rm, 16);
3969     vfp_store_reg32(rm, a->vd);
3970     tcg_temp_free_i32(rm);
3971     return true;
3972 }