target/arm/translate-vfp.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 #include "qemu/osdep.h"
  24 #include "tcg/tcg-op.h"
  25 #include "tcg/tcg-op-gvec.h"
  26 #include "exec/exec-all.h"
  27 #include "exec/gen-icount.h"
  28 #include "translate.h"
  29 #include "translate-a32.h"
  30
  31 /* Include the generated VFP decoder */
  32 #include "decode-vfp.c.inc"
  33 #include "decode-vfp-uncond.c.inc"
  34
  35 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
  36 {
  37     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
  38 }
  39
  40 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
  41 {
  42     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
  43 }
  44
  45 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
  46 {
  47     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
  48 }
  49
  50 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
  51 {
  52     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
  53 }
  54
  55 /*
  56  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  57  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  58  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  59  */
  60 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  61 {
  62     uint64_t imm;
  63
  64     switch (size) {
  65     case MO_64:
  66         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  67             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  68             extract32(imm8, 0, 6);
  69         imm <<= 48;
  70         break;
  71     case MO_32:
  72         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  73             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  74             (extract32(imm8, 0, 6) << 3);
  75         imm <<= 16;
  76         break;
  77     case MO_16:
  78         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  79             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  80             (extract32(imm8, 0, 6) << 6);
  81         break;
  82     default:
  83         g_assert_not_reached();
  84     }
  85     return imm;
  86 }
  87
  88 /*
  89  * Return the offset of a 16-bit half of the specified VFP single-precision
  90  * register. If top is true, returns the top 16 bits; otherwise the bottom
  91  * 16 bits.
  92  */
  93 static inline long vfp_f16_offset(unsigned reg, bool top)
  94 {
  95     long offs = vfp_reg_offset(false, reg);
  96 #ifdef HOST_WORDS_BIGENDIAN
  97     if (!top) {
  98         offs += 2;
  99     }
 100 #else
 101     if (top) {
 102         offs += 2;
 103     }
 104 #endif
 105     return offs;
 106 }
 107
 108 /*
 109  * Generate code for M-profile lazy FP state preservation if needed;
 110  * this corresponds to the pseudocode PreserveFPState() function.
 111  */
 112 static void gen_preserve_fp_state(DisasContext *s)
 113 {
 114     if (s->v7m_lspact) {
 115         /*
 116          * Lazy state saving affects external memory and also the NVIC,
 117          * so we must mark it as an IO operation for icount (and cause
 118          * this to be the last insn in the TB).
 119          */
 120         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 121             s->base.is_jmp = DISAS_UPDATE_EXIT;
 122             gen_io_start();
 123         }
 124         gen_helper_v7m_preserve_fp_state(cpu_env);
 125         /*
 126          * If the preserve_fp_state helper doesn't throw an exception
 127          * then it will clear LSPACT; we don't need to repeat this for
 128          * any further FP insns in this TB.
 129          */
 130         s->v7m_lspact = false;
 131     }
 132 }
 133
 134 /*
 135  * Check that VFP access is enabled. If it is, do the necessary
 136  * M-profile lazy-FP handling and then return true.
 137  * If not, emit code to generate an appropriate exception and
 138  * return false.
 139  * The ignore_vfp_enabled argument specifies that we should ignore
 140  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
 141  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
 142  */
 143 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
 144 {
 145     if (s->fp_excp_el) {
 146         /* M-profile handled this earlier, in disas_m_nocp() */
 147         assert (!arm_dc_feature(s, ARM_FEATURE_M));
 148         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 149                            syn_fp_access_trap(1, 0xe, false),
 150                            s->fp_excp_el);
 151         return false;
 152     }
 153
 154     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 155         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 156         unallocated_encoding(s);
 157         return false;
 158     }
 159
 160     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 161         /* Handle M-profile lazy FP state mechanics */
 162
 163         /* Trigger lazy-state preservation if necessary */
 164         gen_preserve_fp_state(s);
 165
 166         /* Update ownership of FP context: set FPCCR.S to match current state */
 167         if (s->v8m_fpccr_s_wrong) {
 168             TCGv_i32 tmp;
 169
 170             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 171             if (s->v8m_secure) {
 172                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 173             } else {
 174                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 175             }
 176             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 177             /* Don't need to do this for any further FP insns in this TB */
 178             s->v8m_fpccr_s_wrong = false;
 179         }
 180
 181         if (s->v7m_new_fp_ctxt_needed) {
 182             /*
 183              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 184              * and the FPSCR.
 185              */
 186             TCGv_i32 control, fpscr;
 187             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 188
 189             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 190             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 191             tcg_temp_free_i32(fpscr);
 192             /*
 193              * We don't need to arrange to end the TB, because the only
 194              * parts of FPSCR which we cache in the TB flags are the VECLEN
 195              * and VECSTRIDE, and those don't exist for M-profile.
 196              */
 197
 198             if (s->v8m_secure) {
 199                 bits |= R_V7M_CONTROL_SFPA_MASK;
 200             }
 201             control = load_cpu_field(v7m.control[M_REG_S]);
 202             tcg_gen_ori_i32(control, control, bits);
 203             store_cpu_field(control, v7m.control[M_REG_S]);
 204             /* Don't need to do this for any further FP insns in this TB */
 205             s->v7m_new_fp_ctxt_needed = false;
 206         }
 207     }
 208
 209     return true;
 210 }
 211
 212 /*
 213  * The most usual kind of VFP access check, for everything except
 214  * FMXR/FMRX to the always-available special registers.
 215  */
 216 bool vfp_access_check(DisasContext *s)
 217 {
 218     return full_vfp_access_check(s, false);
 219 }
 220
 221 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 222 {
 223     uint32_t rd, rn, rm;
 224     int sz = a->sz;
 225
 226     if (!dc_isar_feature(aa32_vsel, s)) {
 227         return false;
 228     }
 229
 230     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 231         return false;
 232     }
 233
 234     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 235         return false;
 236     }
 237
 238     /* UNDEF accesses to D16-D31 if they don't exist */
 239     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 240         ((a->vm | a->vn | a->vd) & 0x10)) {
 241         return false;
 242     }
 243
 244     rd = a->vd;
 245     rn = a->vn;
 246     rm = a->vm;
 247
 248     if (!vfp_access_check(s)) {
 249         return true;
 250     }
 251
 252     if (sz == 3) {
 253         TCGv_i64 frn, frm, dest;
 254         TCGv_i64 tmp, zero, zf, nf, vf;
 255
 256         zero = tcg_const_i64(0);
 257
 258         frn = tcg_temp_new_i64();
 259         frm = tcg_temp_new_i64();
 260         dest = tcg_temp_new_i64();
 261
 262         zf = tcg_temp_new_i64();
 263         nf = tcg_temp_new_i64();
 264         vf = tcg_temp_new_i64();
 265
 266         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 267         tcg_gen_ext_i32_i64(nf, cpu_NF);
 268         tcg_gen_ext_i32_i64(vf, cpu_VF);
 269
 270         vfp_load_reg64(frn, rn);
 271         vfp_load_reg64(frm, rm);
 272         switch (a->cc) {
 273         case 0: /* eq: Z */
 274             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 275                                 frn, frm);
 276             break;
 277         case 1: /* vs: V */
 278             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 279                                 frn, frm);
 280             break;
 281         case 2: /* ge: N == V -> N ^ V == 0 */
 282             tmp = tcg_temp_new_i64();
 283             tcg_gen_xor_i64(tmp, vf, nf);
 284             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 285                                 frn, frm);
 286             tcg_temp_free_i64(tmp);
 287             break;
 288         case 3: /* gt: !Z && N == V */
 289             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 290                                 frn, frm);
 291             tmp = tcg_temp_new_i64();
 292             tcg_gen_xor_i64(tmp, vf, nf);
 293             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 294                                 dest, frm);
 295             tcg_temp_free_i64(tmp);
 296             break;
 297         }
 298         vfp_store_reg64(dest, rd);
 299         tcg_temp_free_i64(frn);
 300         tcg_temp_free_i64(frm);
 301         tcg_temp_free_i64(dest);
 302
 303         tcg_temp_free_i64(zf);
 304         tcg_temp_free_i64(nf);
 305         tcg_temp_free_i64(vf);
 306
 307         tcg_temp_free_i64(zero);
 308     } else {
 309         TCGv_i32 frn, frm, dest;
 310         TCGv_i32 tmp, zero;
 311
 312         zero = tcg_const_i32(0);
 313
 314         frn = tcg_temp_new_i32();
 315         frm = tcg_temp_new_i32();
 316         dest = tcg_temp_new_i32();
 317         vfp_load_reg32(frn, rn);
 318         vfp_load_reg32(frm, rm);
 319         switch (a->cc) {
 320         case 0: /* eq: Z */
 321             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 322                                 frn, frm);
 323             break;
 324         case 1: /* vs: V */
 325             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 326                                 frn, frm);
 327             break;
 328         case 2: /* ge: N == V -> N ^ V == 0 */
 329             tmp = tcg_temp_new_i32();
 330             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 331             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 332                                 frn, frm);
 333             tcg_temp_free_i32(tmp);
 334             break;
 335         case 3: /* gt: !Z && N == V */
 336             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 337                                 frn, frm);
 338             tmp = tcg_temp_new_i32();
 339             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 340             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 341                                 dest, frm);
 342             tcg_temp_free_i32(tmp);
 343             break;
 344         }
 345         /* For fp16 the top half is always zeroes */
 346         if (sz == 1) {
 347             tcg_gen_andi_i32(dest, dest, 0xffff);
 348         }
 349         vfp_store_reg32(dest, rd);
 350         tcg_temp_free_i32(frn);
 351         tcg_temp_free_i32(frm);
 352         tcg_temp_free_i32(dest);
 353
 354         tcg_temp_free_i32(zero);
 355     }
 356
 357     return true;
 358 }
 359
 360 /*
 361  * Table for converting the most common AArch32 encoding of
 362  * rounding mode to arm_fprounding order (which matches the
 363  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 364  */
 365 static const uint8_t fp_decode_rm[] = {
 366     FPROUNDING_TIEAWAY,
 367     FPROUNDING_TIEEVEN,
 368     FPROUNDING_POSINF,
 369     FPROUNDING_NEGINF,
 370 };
 371
 372 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 373 {
 374     uint32_t rd, rm;
 375     int sz = a->sz;
 376     TCGv_ptr fpst;
 377     TCGv_i32 tcg_rmode;
 378     int rounding = fp_decode_rm[a->rm];
 379
 380     if (!dc_isar_feature(aa32_vrint, s)) {
 381         return false;
 382     }
 383
 384     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 385         return false;
 386     }
 387
 388     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 389         return false;
 390     }
 391
 392     /* UNDEF accesses to D16-D31 if they don't exist */
 393     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 394         ((a->vm | a->vd) & 0x10)) {
 395         return false;
 396     }
 397
 398     rd = a->vd;
 399     rm = a->vm;
 400
 401     if (!vfp_access_check(s)) {
 402         return true;
 403     }
 404
 405     if (sz == 1) {
 406         fpst = fpstatus_ptr(FPST_FPCR_F16);
 407     } else {
 408         fpst = fpstatus_ptr(FPST_FPCR);
 409     }
 410
 411     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 412     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 413
 414     if (sz == 3) {
 415         TCGv_i64 tcg_op;
 416         TCGv_i64 tcg_res;
 417         tcg_op = tcg_temp_new_i64();
 418         tcg_res = tcg_temp_new_i64();
 419         vfp_load_reg64(tcg_op, rm);
 420         gen_helper_rintd(tcg_res, tcg_op, fpst);
 421         vfp_store_reg64(tcg_res, rd);
 422         tcg_temp_free_i64(tcg_op);
 423         tcg_temp_free_i64(tcg_res);
 424     } else {
 425         TCGv_i32 tcg_op;
 426         TCGv_i32 tcg_res;
 427         tcg_op = tcg_temp_new_i32();
 428         tcg_res = tcg_temp_new_i32();
 429         vfp_load_reg32(tcg_op, rm);
 430         if (sz == 1) {
 431             gen_helper_rinth(tcg_res, tcg_op, fpst);
 432         } else {
 433             gen_helper_rints(tcg_res, tcg_op, fpst);
 434         }
 435         vfp_store_reg32(tcg_res, rd);
 436         tcg_temp_free_i32(tcg_op);
 437         tcg_temp_free_i32(tcg_res);
 438     }
 439
 440     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 441     tcg_temp_free_i32(tcg_rmode);
 442
 443     tcg_temp_free_ptr(fpst);
 444     return true;
 445 }
 446
 447 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 448 {
 449     uint32_t rd, rm;
 450     int sz = a->sz;
 451     TCGv_ptr fpst;
 452     TCGv_i32 tcg_rmode, tcg_shift;
 453     int rounding = fp_decode_rm[a->rm];
 454     bool is_signed = a->op;
 455
 456     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 457         return false;
 458     }
 459
 460     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 461         return false;
 462     }
 463
 464     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 465         return false;
 466     }
 467
 468     /* UNDEF accesses to D16-D31 if they don't exist */
 469     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 470         return false;
 471     }
 472
 473     rd = a->vd;
 474     rm = a->vm;
 475
 476     if (!vfp_access_check(s)) {
 477         return true;
 478     }
 479
 480     if (sz == 1) {
 481         fpst = fpstatus_ptr(FPST_FPCR_F16);
 482     } else {
 483         fpst = fpstatus_ptr(FPST_FPCR);
 484     }
 485
 486     tcg_shift = tcg_const_i32(0);
 487
 488     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 489     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 490
 491     if (sz == 3) {
 492         TCGv_i64 tcg_double, tcg_res;
 493         TCGv_i32 tcg_tmp;
 494         tcg_double = tcg_temp_new_i64();
 495         tcg_res = tcg_temp_new_i64();
 496         tcg_tmp = tcg_temp_new_i32();
 497         vfp_load_reg64(tcg_double, rm);
 498         if (is_signed) {
 499             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 500         } else {
 501             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 502         }
 503         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 504         vfp_store_reg32(tcg_tmp, rd);
 505         tcg_temp_free_i32(tcg_tmp);
 506         tcg_temp_free_i64(tcg_res);
 507         tcg_temp_free_i64(tcg_double);
 508     } else {
 509         TCGv_i32 tcg_single, tcg_res;
 510         tcg_single = tcg_temp_new_i32();
 511         tcg_res = tcg_temp_new_i32();
 512         vfp_load_reg32(tcg_single, rm);
 513         if (sz == 1) {
 514             if (is_signed) {
 515                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 516             } else {
 517                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 518             }
 519         } else {
 520             if (is_signed) {
 521                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 522             } else {
 523                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 524             }
 525         }
 526         vfp_store_reg32(tcg_res, rd);
 527         tcg_temp_free_i32(tcg_res);
 528         tcg_temp_free_i32(tcg_single);
 529     }
 530
 531     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 532     tcg_temp_free_i32(tcg_rmode);
 533
 534     tcg_temp_free_i32(tcg_shift);
 535
 536     tcg_temp_free_ptr(fpst);
 537
 538     return true;
 539 }
 540
 541 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 542 {
 543     /* VMOV scalar to general purpose register */
 544     TCGv_i32 tmp;
 545
 546     /*
 547      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 548      * all sizes, whether the CPU has fp or not.
 549      */
 550     if (!dc_isar_feature(aa32_mve, s)) {
 551         if (a->size == MO_32
 552             ? !dc_isar_feature(aa32_fpsp_v2, s)
 553             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 554             return false;
 555         }
 556     }
 557
 558     /* UNDEF accesses to D16-D31 if they don't exist */
 559     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 560         return false;
 561     }
 562
 563     if (!vfp_access_check(s)) {
 564         return true;
 565     }
 566
 567     tmp = tcg_temp_new_i32();
 568     read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
 569     store_reg(s, a->rt, tmp);
 570
 571     return true;
 572 }
 573
 574 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 575 {
 576     /* VMOV general purpose register to scalar */
 577     TCGv_i32 tmp;
 578
 579     /*
 580      * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
 581      * all sizes, whether the CPU has fp or not.
 582      */
 583     if (!dc_isar_feature(aa32_mve, s)) {
 584         if (a->size == MO_32
 585             ? !dc_isar_feature(aa32_fpsp_v2, s)
 586             : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 587             return false;
 588         }
 589     }
 590
 591     /* UNDEF accesses to D16-D31 if they don't exist */
 592     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 593         return false;
 594     }
 595
 596     if (!vfp_access_check(s)) {
 597         return true;
 598     }
 599
 600     tmp = load_reg(s, a->rt);
 601     write_neon_element32(tmp, a->vn, a->index, a->size);
 602     tcg_temp_free_i32(tmp);
 603
 604     return true;
 605 }
 606
 607 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 608 {
 609     /* VDUP (general purpose register) */
 610     TCGv_i32 tmp;
 611     int size, vec_size;
 612
 613     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 614         return false;
 615     }
 616
 617     /* UNDEF accesses to D16-D31 if they don't exist */
 618     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 619         return false;
 620     }
 621
 622     if (a->b && a->e) {
 623         return false;
 624     }
 625
 626     if (a->q && (a->vn & 1)) {
 627         return false;
 628     }
 629
 630     vec_size = a->q ? 16 : 8;
 631     if (a->b) {
 632         size = 0;
 633     } else if (a->e) {
 634         size = 1;
 635     } else {
 636         size = 2;
 637     }
 638
 639     if (!vfp_access_check(s)) {
 640         return true;
 641     }
 642
 643     tmp = load_reg(s, a->rt);
 644     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
 645                          vec_size, vec_size, tmp);
 646     tcg_temp_free_i32(tmp);
 647
 648     return true;
 649 }
 650
 651 /*
 652  * M-profile provides two different sets of instructions that can
 653  * access floating point system registers: VMSR/VMRS (which move
 654  * to/from a general purpose register) and VLDR/VSTR sysreg (which
 655  * move directly to/from memory). In some cases there are also side
 656  * effects which must happen after any write to memory (which could
 657  * cause an exception). So we implement the common logic for the
 658  * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
 659  * which take pointers to callback functions which will perform the
 660  * actual "read/write general purpose register" and "read/write
 661  * memory" operations.
 662  */
 663
 664 /*
 665  * Emit code to store the sysreg to its final destination; frees the
 666  * TCG temp 'value' it is passed.
 667  */
 668 typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
 669 /*
 670  * Emit code to load the value to be copied to the sysreg; returns
 671  * a new TCG temporary
 672  */
 673 typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
 674
 675 /* Common decode/access checks for fp sysreg read/write */
 676 typedef enum FPSysRegCheckResult {
 677     FPSysRegCheckFailed, /* caller should return false */
 678     FPSysRegCheckDone, /* caller should return true */
 679     FPSysRegCheckContinue, /* caller should continue generating code */
 680 } FPSysRegCheckResult;
 681
 682 static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
 683 {
 684     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
 685         return FPSysRegCheckFailed;
 686     }
 687
 688     switch (regno) {
 689     case ARM_VFP_FPSCR:
 690     case QEMU_VFP_FPSCR_NZCV:
 691         break;
 692     case ARM_VFP_FPSCR_NZCVQC:
 693         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 694             return FPSysRegCheckFailed;
 695         }
 696         break;
 697     case ARM_VFP_FPCXT_S:
 698     case ARM_VFP_FPCXT_NS:
 699         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 700             return FPSysRegCheckFailed;
 701         }
 702         if (!s->v8m_secure) {
 703             return FPSysRegCheckFailed;
 704         }
 705         break;
 706     case ARM_VFP_VPR:
 707     case ARM_VFP_P0:
 708         if (!dc_isar_feature(aa32_mve, s)) {
 709             return FPSysRegCheckFailed;
 710         }
 711         break;
 712     default:
 713         return FPSysRegCheckFailed;
 714     }
 715
 716     /*
 717      * FPCXT_NS is a special case: it has specific handling for
 718      * "current FP state is inactive", and must do the PreserveFPState()
 719      * but not the usual full set of actions done by ExecuteFPCheck().
 720      * So we don't call vfp_access_check() and the callers must handle this.
 721      */
 722     if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
 723         return FPSysRegCheckDone;
 724     }
 725     return FPSysRegCheckContinue;
 726 }
 727
 728 static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
 729                                   TCGLabel *label)
 730 {
 731     /*
 732      * FPCXT_NS is a special case: it has specific handling for
 733      * "current FP state is inactive", and must do the PreserveFPState()
 734      * but not the usual full set of actions done by ExecuteFPCheck().
 735      * We don't have a TB flag that matches the fpInactive check, so we
 736      * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
 737      *
 738      * Emit code that checks fpInactive and does a conditional
 739      * branch to label based on it:
 740      *  if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
 741      *  if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
 742      */
 743     assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
 744
 745     /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
 746     TCGv_i32 aspen, fpca;
 747     aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
 748     fpca = load_cpu_field(v7m.control[M_REG_S]);
 749     tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
 750     tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
 751     tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
 752     tcg_gen_or_i32(fpca, fpca, aspen);
 753     tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
 754     tcg_temp_free_i32(aspen);
 755     tcg_temp_free_i32(fpca);
 756 }
 757
 758 static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
 759
 760                                   fp_sysreg_loadfn *loadfn,
 761                                  void *opaque)
 762 {
 763     /* Do a write to an M-profile floating point system register */
 764     TCGv_i32 tmp;
 765     TCGLabel *lab_end = NULL;
 766
 767     switch (fp_sysreg_checks(s, regno)) {
 768     case FPSysRegCheckFailed:
 769         return false;
 770     case FPSysRegCheckDone:
 771         return true;
 772     case FPSysRegCheckContinue:
 773         break;
 774     }
 775
 776     switch (regno) {
 777     case ARM_VFP_FPSCR:
 778         tmp = loadfn(s, opaque);
 779         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 780         tcg_temp_free_i32(tmp);
 781         gen_lookup_tb(s);
 782         break;
 783     case ARM_VFP_FPSCR_NZCVQC:
 784     {
 785         TCGv_i32 fpscr;
 786         tmp = loadfn(s, opaque);
 787         /*
 788          * TODO: when we implement MVE, write the QC bit.
 789          * For non-MVE, QC is RES0.
 790          */
 791         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 792         fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 793         tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
 794         tcg_gen_or_i32(fpscr, fpscr, tmp);
 795         store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
 796         tcg_temp_free_i32(tmp);
 797         break;
 798     }
 799     case ARM_VFP_FPCXT_NS:
 800         lab_end = gen_new_label();
 801         /* fpInactive case: write is a NOP, so branch to end */
 802         gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
 803         /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
 804         gen_preserve_fp_state(s);
 805         /* fall through */
 806     case ARM_VFP_FPCXT_S:
 807     {
 808         TCGv_i32 sfpa, control;
 809         /*
 810          * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
 811          * bits [27:0] from value and zeroes bits [31:28].
 812          */
 813         tmp = loadfn(s, opaque);
 814         sfpa = tcg_temp_new_i32();
 815         tcg_gen_shri_i32(sfpa, tmp, 31);
 816         control = load_cpu_field(v7m.control[M_REG_S]);
 817         tcg_gen_deposit_i32(control, control, sfpa,
 818                             R_V7M_CONTROL_SFPA_SHIFT, 1);
 819         store_cpu_field(control, v7m.control[M_REG_S]);
 820         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 821         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 822         tcg_temp_free_i32(tmp);
 823         tcg_temp_free_i32(sfpa);
 824         break;
 825     }
 826     case ARM_VFP_VPR:
 827         /* Behaves as NOP if not privileged */
 828         if (IS_USER(s)) {
 829             break;
 830         }
 831         tmp = loadfn(s, opaque);
 832         store_cpu_field(tmp, v7m.vpr);
 833         break;
 834     case ARM_VFP_P0:
 835     {
 836         TCGv_i32 vpr;
 837         tmp = loadfn(s, opaque);
 838         vpr = load_cpu_field(v7m.vpr);
 839         tcg_gen_deposit_i32(vpr, vpr, tmp,
 840                             R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
 841         store_cpu_field(vpr, v7m.vpr);
 842         tcg_temp_free_i32(tmp);
 843         break;
 844     }
 845     default:
 846         g_assert_not_reached();
 847     }
 848     if (lab_end) {
 849         gen_set_label(lab_end);
 850     }
 851     return true;
 852 }
 853
 854 static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
 855                                 fp_sysreg_storefn *storefn,
 856                                 void *opaque)
 857 {
 858     /* Do a read from an M-profile floating point system register */
 859     TCGv_i32 tmp;
 860     TCGLabel *lab_end = NULL;
 861     bool lookup_tb = false;
 862
 863     switch (fp_sysreg_checks(s, regno)) {
 864     case FPSysRegCheckFailed:
 865         return false;
 866     case FPSysRegCheckDone:
 867         return true;
 868     case FPSysRegCheckContinue:
 869         break;
 870     }
 871
 872     switch (regno) {
 873     case ARM_VFP_FPSCR:
 874         tmp = tcg_temp_new_i32();
 875         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 876         storefn(s, opaque, tmp);
 877         break;
 878     case ARM_VFP_FPSCR_NZCVQC:
 879         /*
 880          * TODO: MVE has a QC bit, which we probably won't store
 881          * in the xregs[] field. For non-MVE, where QC is RES0,
 882          * we can just fall through to the FPSCR_NZCV case.
 883          */
 884     case QEMU_VFP_FPSCR_NZCV:
 885         /*
 886          * Read just NZCV; this is a special case to avoid the
 887          * helper call for the "VMRS to CPSR.NZCV" insn.
 888          */
 889         tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 890         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 891         storefn(s, opaque, tmp);
 892         break;
 893     case ARM_VFP_FPCXT_S:
 894     {
 895         TCGv_i32 control, sfpa, fpscr;
 896         /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
 897         tmp = tcg_temp_new_i32();
 898         sfpa = tcg_temp_new_i32();
 899         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 900         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 901         control = load_cpu_field(v7m.control[M_REG_S]);
 902         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 903         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 904         tcg_gen_or_i32(tmp, tmp, sfpa);
 905         tcg_temp_free_i32(sfpa);
 906         /*
 907          * Store result before updating FPSCR etc, in case
 908          * it is a memory write which causes an exception.
 909          */
 910         storefn(s, opaque, tmp);
 911         /*
 912          * Now we must reset FPSCR from FPDSCR_NS, and clear
 913          * CONTROL.SFPA; so we'll end the TB here.
 914          */
 915         tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
 916         store_cpu_field(control, v7m.control[M_REG_S]);
 917         fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 918         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 919         tcg_temp_free_i32(fpscr);
 920         lookup_tb = true;
 921         break;
 922     }
 923     case ARM_VFP_FPCXT_NS:
 924     {
 925         TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
 926         TCGLabel *lab_active = gen_new_label();
 927
 928         lookup_tb = true;
 929
 930         gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
 931         /* fpInactive case: reads as FPDSCR_NS */
 932         TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 933         storefn(s, opaque, tmp);
 934         lab_end = gen_new_label();
 935         tcg_gen_br(lab_end);
 936
 937         gen_set_label(lab_active);
 938         /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
 939         gen_preserve_fp_state(s);
 940         tmp = tcg_temp_new_i32();
 941         sfpa = tcg_temp_new_i32();
 942         fpscr = tcg_temp_new_i32();
 943         gen_helper_vfp_get_fpscr(fpscr, cpu_env);
 944         tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
 945         control = load_cpu_field(v7m.control[M_REG_S]);
 946         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 947         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 948         tcg_gen_or_i32(tmp, tmp, sfpa);
 949         tcg_temp_free_i32(control);
 950         /* Store result before updating FPSCR, in case it faults */
 951         storefn(s, opaque, tmp);
 952         /* If SFPA is zero then set FPSCR from FPDSCR_NS */
 953         fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 954         zero = tcg_const_i32(0);
 955         tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
 956         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 957         tcg_temp_free_i32(zero);
 958         tcg_temp_free_i32(sfpa);
 959         tcg_temp_free_i32(fpdscr);
 960         tcg_temp_free_i32(fpscr);
 961         break;
 962     }
 963     case ARM_VFP_VPR:
 964         /* Behaves as NOP if not privileged */
 965         if (IS_USER(s)) {
 966             break;
 967         }
 968         tmp = load_cpu_field(v7m.vpr);
 969         storefn(s, opaque, tmp);
 970         break;
 971     case ARM_VFP_P0:
 972         tmp = load_cpu_field(v7m.vpr);
 973         tcg_gen_extract_i32(tmp, tmp, R_V7M_VPR_P0_SHIFT, R_V7M_VPR_P0_LENGTH);
 974         storefn(s, opaque, tmp);
 975         break;
 976     default:
 977         g_assert_not_reached();
 978     }
 979
 980     if (lab_end) {
 981         gen_set_label(lab_end);
 982     }
 983     if (lookup_tb) {
 984         gen_lookup_tb(s);
 985     }
 986     return true;
 987 }
 988
 989 static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
 990 {
 991     arg_VMSR_VMRS *a = opaque;
 992
 993     if (a->rt == 15) {
 994         /* Set the 4 flag bits in the CPSR */
 995         gen_set_nzcv(value);
 996         tcg_temp_free_i32(value);
 997     } else {
 998         store_reg(s, a->rt, value);
 999     }
1000 }
1001
1002 static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
1003 {
1004     arg_VMSR_VMRS *a = opaque;
1005
1006     return load_reg(s, a->rt);
1007 }
1008
1009 static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
1010 {
1011     /*
1012      * Accesses to R15 are UNPREDICTABLE; we choose to undef.
1013      * FPSCR -> r15 is a special case which writes to the PSR flags;
1014      * set a->reg to a special value to tell gen_M_fp_sysreg_read()
1015      * we only care about the top 4 bits of FPSCR there.
1016      */
1017     if (a->rt == 15) {
1018         if (a->l && a->reg == ARM_VFP_FPSCR) {
1019             a->reg = QEMU_VFP_FPSCR_NZCV;
1020         } else {
1021             return false;
1022         }
1023     }
1024
1025     if (a->l) {
1026         /* VMRS, move FP system register to gp register */
1027         return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
1028     } else {
1029         /* VMSR, move gp register to FP system register */
1030         return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
1031     }
1032 }
1033
1034 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
1035 {
1036     TCGv_i32 tmp;
1037     bool ignore_vfp_enabled = false;
1038
1039     if (arm_dc_feature(s, ARM_FEATURE_M)) {
1040         return gen_M_VMSR_VMRS(s, a);
1041     }
1042
1043     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1044         return false;
1045     }
1046
1047     switch (a->reg) {
1048     case ARM_VFP_FPSID:
1049         /*
1050          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
1051          * all ID registers to privileged access only.
1052          */
1053         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
1054             return false;
1055         }
1056         ignore_vfp_enabled = true;
1057         break;
1058     case ARM_VFP_MVFR0:
1059     case ARM_VFP_MVFR1:
1060         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
1061             return false;
1062         }
1063         ignore_vfp_enabled = true;
1064         break;
1065     case ARM_VFP_MVFR2:
1066         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
1067             return false;
1068         }
1069         ignore_vfp_enabled = true;
1070         break;
1071     case ARM_VFP_FPSCR:
1072         break;
1073     case ARM_VFP_FPEXC:
1074         if (IS_USER(s)) {
1075             return false;
1076         }
1077         ignore_vfp_enabled = true;
1078         break;
1079     case ARM_VFP_FPINST:
1080     case ARM_VFP_FPINST2:
1081         /* Not present in VFPv3 */
1082         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
1083             return false;
1084         }
1085         break;
1086     default:
1087         return false;
1088     }
1089
1090     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
1091         return true;
1092     }
1093
1094     if (a->l) {
1095         /* VMRS, move VFP special register to gp register */
1096         switch (a->reg) {
1097         case ARM_VFP_MVFR0:
1098         case ARM_VFP_MVFR1:
1099         case ARM_VFP_MVFR2:
1100         case ARM_VFP_FPSID:
1101             if (s->current_el == 1) {
1102                 TCGv_i32 tcg_reg, tcg_rt;
1103
1104                 gen_set_condexec(s);
1105                 gen_set_pc_im(s, s->pc_curr);
1106                 tcg_reg = tcg_const_i32(a->reg);
1107                 tcg_rt = tcg_const_i32(a->rt);
1108                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
1109                 tcg_temp_free_i32(tcg_reg);
1110                 tcg_temp_free_i32(tcg_rt);
1111             }
1112             /* fall through */
1113         case ARM_VFP_FPEXC:
1114         case ARM_VFP_FPINST:
1115         case ARM_VFP_FPINST2:
1116             tmp = load_cpu_field(vfp.xregs[a->reg]);
1117             break;
1118         case ARM_VFP_FPSCR:
1119             if (a->rt == 15) {
1120                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
1121                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
1122             } else {
1123                 tmp = tcg_temp_new_i32();
1124                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
1125             }
1126             break;
1127         default:
1128             g_assert_not_reached();
1129         }
1130
1131         if (a->rt == 15) {
1132             /* Set the 4 flag bits in the CPSR.  */
1133             gen_set_nzcv(tmp);
1134             tcg_temp_free_i32(tmp);
1135         } else {
1136             store_reg(s, a->rt, tmp);
1137         }
1138     } else {
1139         /* VMSR, move gp register to VFP special register */
1140         switch (a->reg) {
1141         case ARM_VFP_FPSID:
1142         case ARM_VFP_MVFR0:
1143         case ARM_VFP_MVFR1:
1144         case ARM_VFP_MVFR2:
1145             /* Writes are ignored.  */
1146             break;
1147         case ARM_VFP_FPSCR:
1148             tmp = load_reg(s, a->rt);
1149             gen_helper_vfp_set_fpscr(cpu_env, tmp);
1150             tcg_temp_free_i32(tmp);
1151             gen_lookup_tb(s);
1152             break;
1153         case ARM_VFP_FPEXC:
1154             /*
1155              * TODO: VFP subarchitecture support.
1156              * For now, keep the EN bit only
1157              */
1158             tmp = load_reg(s, a->rt);
1159             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
1160             store_cpu_field(tmp, vfp.xregs[a->reg]);
1161             gen_lookup_tb(s);
1162             break;
1163         case ARM_VFP_FPINST:
1164         case ARM_VFP_FPINST2:
1165             tmp = load_reg(s, a->rt);
1166             store_cpu_field(tmp, vfp.xregs[a->reg]);
1167             break;
1168         default:
1169             g_assert_not_reached();
1170         }
1171     }
1172
1173     return true;
1174 }
1175
1176 static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
1177 {
1178     arg_vldr_sysreg *a = opaque;
1179     uint32_t offset = a->imm;
1180     TCGv_i32 addr;
1181
1182     if (!a->a) {
1183         offset = - offset;
1184     }
1185
1186     addr = load_reg(s, a->rn);
1187     if (a->p) {
1188         tcg_gen_addi_i32(addr, addr, offset);
1189     }
1190
1191     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1192         gen_helper_v8m_stackcheck(cpu_env, addr);
1193     }
1194
1195     gen_aa32_st_i32(s, value, addr, get_mem_index(s),
1196                     MO_UL | MO_ALIGN | s->be_data);
1197     tcg_temp_free_i32(value);
1198
1199     if (a->w) {
1200         /* writeback */
1201         if (!a->p) {
1202             tcg_gen_addi_i32(addr, addr, offset);
1203         }
1204         store_reg(s, a->rn, addr);
1205     } else {
1206         tcg_temp_free_i32(addr);
1207     }
1208 }
1209
1210 static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1211 {
1212     arg_vldr_sysreg *a = opaque;
1213     uint32_t offset = a->imm;
1214     TCGv_i32 addr;
1215     TCGv_i32 value = tcg_temp_new_i32();
1216
1217     if (!a->a) {
1218         offset = - offset;
1219     }
1220
1221     addr = load_reg(s, a->rn);
1222     if (a->p) {
1223         tcg_gen_addi_i32(addr, addr, offset);
1224     }
1225
1226     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1227         gen_helper_v8m_stackcheck(cpu_env, addr);
1228     }
1229
1230     gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1231                     MO_UL | MO_ALIGN | s->be_data);
1232
1233     if (a->w) {
1234         /* writeback */
1235         if (!a->p) {
1236             tcg_gen_addi_i32(addr, addr, offset);
1237         }
1238         store_reg(s, a->rn, addr);
1239     } else {
1240         tcg_temp_free_i32(addr);
1241     }
1242     return value;
1243 }
1244
1245 static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1246 {
1247     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1248         return false;
1249     }
1250     if (a->rn == 15) {
1251         return false;
1252     }
1253     return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1254 }
1255
1256 static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1257 {
1258     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1259         return false;
1260     }
1261     if (a->rn == 15) {
1262         return false;
1263     }
1264     return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1265 }
1266
1267 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1268 {
1269     TCGv_i32 tmp;
1270
1271     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1272         return false;
1273     }
1274
1275     if (a->rt == 15) {
1276         /* UNPREDICTABLE; we choose to UNDEF */
1277         return false;
1278     }
1279
1280     if (!vfp_access_check(s)) {
1281         return true;
1282     }
1283
1284     if (a->l) {
1285         /* VFP to general purpose register */
1286         tmp = tcg_temp_new_i32();
1287         vfp_load_reg32(tmp, a->vn);
1288         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1289         store_reg(s, a->rt, tmp);
1290     } else {
1291         /* general purpose register to VFP */
1292         tmp = load_reg(s, a->rt);
1293         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1294         vfp_store_reg32(tmp, a->vn);
1295         tcg_temp_free_i32(tmp);
1296     }
1297
1298     return true;
1299 }
1300
1301 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
1302 {
1303     TCGv_i32 tmp;
1304
1305     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1306         return false;
1307     }
1308
1309     if (!vfp_access_check(s)) {
1310         return true;
1311     }
1312
1313     if (a->l) {
1314         /* VFP to general purpose register */
1315         tmp = tcg_temp_new_i32();
1316         vfp_load_reg32(tmp, a->vn);
1317         if (a->rt == 15) {
1318             /* Set the 4 flag bits in the CPSR.  */
1319             gen_set_nzcv(tmp);
1320             tcg_temp_free_i32(tmp);
1321         } else {
1322             store_reg(s, a->rt, tmp);
1323         }
1324     } else {
1325         /* general purpose register to VFP */
1326         tmp = load_reg(s, a->rt);
1327         vfp_store_reg32(tmp, a->vn);
1328         tcg_temp_free_i32(tmp);
1329     }
1330
1331     return true;
1332 }
1333
1334 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
1335 {
1336     TCGv_i32 tmp;
1337
1338     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1339         return false;
1340     }
1341
1342     /*
1343      * VMOV between two general-purpose registers and two single precision
1344      * floating point registers
1345      */
1346     if (!vfp_access_check(s)) {
1347         return true;
1348     }
1349
1350     if (a->op) {
1351         /* fpreg to gpreg */
1352         tmp = tcg_temp_new_i32();
1353         vfp_load_reg32(tmp, a->vm);
1354         store_reg(s, a->rt, tmp);
1355         tmp = tcg_temp_new_i32();
1356         vfp_load_reg32(tmp, a->vm + 1);
1357         store_reg(s, a->rt2, tmp);
1358     } else {
1359         /* gpreg to fpreg */
1360         tmp = load_reg(s, a->rt);
1361         vfp_store_reg32(tmp, a->vm);
1362         tcg_temp_free_i32(tmp);
1363         tmp = load_reg(s, a->rt2);
1364         vfp_store_reg32(tmp, a->vm + 1);
1365         tcg_temp_free_i32(tmp);
1366     }
1367
1368     return true;
1369 }
1370
1371 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1372 {
1373     TCGv_i32 tmp;
1374
1375     /*
1376      * VMOV between two general-purpose registers and one double precision
1377      * floating point register.  Note that this does not require support
1378      * for double precision arithmetic.
1379      */
1380     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1381         return false;
1382     }
1383
1384     /* UNDEF accesses to D16-D31 if they don't exist */
1385     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1386         return false;
1387     }
1388
1389     if (!vfp_access_check(s)) {
1390         return true;
1391     }
1392
1393     if (a->op) {
1394         /* fpreg to gpreg */
1395         tmp = tcg_temp_new_i32();
1396         vfp_load_reg32(tmp, a->vm * 2);
1397         store_reg(s, a->rt, tmp);
1398         tmp = tcg_temp_new_i32();
1399         vfp_load_reg32(tmp, a->vm * 2 + 1);
1400         store_reg(s, a->rt2, tmp);
1401     } else {
1402         /* gpreg to fpreg */
1403         tmp = load_reg(s, a->rt);
1404         vfp_store_reg32(tmp, a->vm * 2);
1405         tcg_temp_free_i32(tmp);
1406         tmp = load_reg(s, a->rt2);
1407         vfp_store_reg32(tmp, a->vm * 2 + 1);
1408         tcg_temp_free_i32(tmp);
1409     }
1410
1411     return true;
1412 }
1413
1414 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1415 {
1416     uint32_t offset;
1417     TCGv_i32 addr, tmp;
1418
1419     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1420         return false;
1421     }
1422
1423     if (!vfp_access_check(s)) {
1424         return true;
1425     }
1426
1427     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1428     offset = a->imm << 1;
1429     if (!a->u) {
1430         offset = -offset;
1431     }
1432
1433     /* For thumb, use of PC is UNPREDICTABLE.  */
1434     addr = add_reg_for_lit(s, a->rn, offset);
1435     tmp = tcg_temp_new_i32();
1436     if (a->l) {
1437         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1438         vfp_store_reg32(tmp, a->vd);
1439     } else {
1440         vfp_load_reg32(tmp, a->vd);
1441         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1442     }
1443     tcg_temp_free_i32(tmp);
1444     tcg_temp_free_i32(addr);
1445
1446     return true;
1447 }
1448
1449 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1450 {
1451     uint32_t offset;
1452     TCGv_i32 addr, tmp;
1453
1454     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1455         return false;
1456     }
1457
1458     if (!vfp_access_check(s)) {
1459         return true;
1460     }
1461
1462     offset = a->imm << 2;
1463     if (!a->u) {
1464         offset = -offset;
1465     }
1466
1467     /* For thumb, use of PC is UNPREDICTABLE.  */
1468     addr = add_reg_for_lit(s, a->rn, offset);
1469     tmp = tcg_temp_new_i32();
1470     if (a->l) {
1471         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1472         vfp_store_reg32(tmp, a->vd);
1473     } else {
1474         vfp_load_reg32(tmp, a->vd);
1475         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1476     }
1477     tcg_temp_free_i32(tmp);
1478     tcg_temp_free_i32(addr);
1479
1480     return true;
1481 }
1482
1483 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1484 {
1485     uint32_t offset;
1486     TCGv_i32 addr;
1487     TCGv_i64 tmp;
1488
1489     /* Note that this does not require support for double arithmetic.  */
1490     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1491         return false;
1492     }
1493
1494     /* UNDEF accesses to D16-D31 if they don't exist */
1495     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1496         return false;
1497     }
1498
1499     if (!vfp_access_check(s)) {
1500         return true;
1501     }
1502
1503     offset = a->imm << 2;
1504     if (!a->u) {
1505         offset = -offset;
1506     }
1507
1508     /* For thumb, use of PC is UNPREDICTABLE.  */
1509     addr = add_reg_for_lit(s, a->rn, offset);
1510     tmp = tcg_temp_new_i64();
1511     if (a->l) {
1512         gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1513         vfp_store_reg64(tmp, a->vd);
1514     } else {
1515         vfp_load_reg64(tmp, a->vd);
1516         gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1517     }
1518     tcg_temp_free_i64(tmp);
1519     tcg_temp_free_i32(addr);
1520
1521     return true;
1522 }
1523
1524 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1525 {
1526     uint32_t offset;
1527     TCGv_i32 addr, tmp;
1528     int i, n;
1529
1530     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1531         return false;
1532     }
1533
1534     n = a->imm;
1535
1536     if (n == 0 || (a->vd + n) > 32) {
1537         /*
1538          * UNPREDICTABLE cases for bad immediates: we choose to
1539          * UNDEF to avoid generating huge numbers of TCG ops
1540          */
1541         return false;
1542     }
1543     if (a->rn == 15 && a->w) {
1544         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1545         return false;
1546     }
1547
1548     if (!vfp_access_check(s)) {
1549         return true;
1550     }
1551
1552     /* For thumb, use of PC is UNPREDICTABLE.  */
1553     addr = add_reg_for_lit(s, a->rn, 0);
1554     if (a->p) {
1555         /* pre-decrement */
1556         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1557     }
1558
1559     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1560         /*
1561          * Here 'addr' is the lowest address we will store to,
1562          * and is either the old SP (if post-increment) or
1563          * the new SP (if pre-decrement). For post-increment
1564          * where the old value is below the limit and the new
1565          * value is above, it is UNKNOWN whether the limit check
1566          * triggers; we choose to trigger.
1567          */
1568         gen_helper_v8m_stackcheck(cpu_env, addr);
1569     }
1570
1571     offset = 4;
1572     tmp = tcg_temp_new_i32();
1573     for (i = 0; i < n; i++) {
1574         if (a->l) {
1575             /* load */
1576             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1577             vfp_store_reg32(tmp, a->vd + i);
1578         } else {
1579             /* store */
1580             vfp_load_reg32(tmp, a->vd + i);
1581             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1582         }
1583         tcg_gen_addi_i32(addr, addr, offset);
1584     }
1585     tcg_temp_free_i32(tmp);
1586     if (a->w) {
1587         /* writeback */
1588         if (a->p) {
1589             offset = -offset * n;
1590             tcg_gen_addi_i32(addr, addr, offset);
1591         }
1592         store_reg(s, a->rn, addr);
1593     } else {
1594         tcg_temp_free_i32(addr);
1595     }
1596
1597     return true;
1598 }
1599
1600 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1601 {
1602     uint32_t offset;
1603     TCGv_i32 addr;
1604     TCGv_i64 tmp;
1605     int i, n;
1606
1607     /* Note that this does not require support for double arithmetic.  */
1608     if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) {
1609         return false;
1610     }
1611
1612     n = a->imm >> 1;
1613
1614     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1615         /*
1616          * UNPREDICTABLE cases for bad immediates: we choose to
1617          * UNDEF to avoid generating huge numbers of TCG ops
1618          */
1619         return false;
1620     }
1621     if (a->rn == 15 && a->w) {
1622         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1623         return false;
1624     }
1625
1626     /* UNDEF accesses to D16-D31 if they don't exist */
1627     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1628         return false;
1629     }
1630
1631     if (!vfp_access_check(s)) {
1632         return true;
1633     }
1634
1635     /* For thumb, use of PC is UNPREDICTABLE.  */
1636     addr = add_reg_for_lit(s, a->rn, 0);
1637     if (a->p) {
1638         /* pre-decrement */
1639         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1640     }
1641
1642     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1643         /*
1644          * Here 'addr' is the lowest address we will store to,
1645          * and is either the old SP (if post-increment) or
1646          * the new SP (if pre-decrement). For post-increment
1647          * where the old value is below the limit and the new
1648          * value is above, it is UNKNOWN whether the limit check
1649          * triggers; we choose to trigger.
1650          */
1651         gen_helper_v8m_stackcheck(cpu_env, addr);
1652     }
1653
1654     offset = 8;
1655     tmp = tcg_temp_new_i64();
1656     for (i = 0; i < n; i++) {
1657         if (a->l) {
1658             /* load */
1659             gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1660             vfp_store_reg64(tmp, a->vd + i);
1661         } else {
1662             /* store */
1663             vfp_load_reg64(tmp, a->vd + i);
1664             gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1665         }
1666         tcg_gen_addi_i32(addr, addr, offset);
1667     }
1668     tcg_temp_free_i64(tmp);
1669     if (a->w) {
1670         /* writeback */
1671         if (a->p) {
1672             offset = -offset * n;
1673         } else if (a->imm & 1) {
1674             offset = 4;
1675         } else {
1676             offset = 0;
1677         }
1678
1679         if (offset != 0) {
1680             tcg_gen_addi_i32(addr, addr, offset);
1681         }
1682         store_reg(s, a->rn, addr);
1683     } else {
1684         tcg_temp_free_i32(addr);
1685     }
1686
1687     return true;
1688 }
1689
1690 /*
1691  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1692  * The callback should emit code to write a value to vd. If
1693  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1694  * will contain the old value of the relevant VFP register;
1695  * otherwise it must be written to only.
1696  */
1697 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1698                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1699 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1700                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1701
1702 /*
1703  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1704  * The callback should emit code to write a value to vd (which
1705  * should be written to only).
1706  */
1707 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1708 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1709
1710 /*
1711  * Return true if the specified S reg is in a scalar bank
1712  * (ie if it is s0..s7)
1713  */
1714 static inline bool vfp_sreg_is_scalar(int reg)
1715 {
1716     return (reg & 0x18) == 0;
1717 }
1718
1719 /*
1720  * Return true if the specified D reg is in a scalar bank
1721  * (ie if it is d0..d3 or d16..d19)
1722  */
1723 static inline bool vfp_dreg_is_scalar(int reg)
1724 {
1725     return (reg & 0xc) == 0;
1726 }
1727
1728 /*
1729  * Advance the S reg number forwards by delta within its bank
1730  * (ie increment the low 3 bits but leave the rest the same)
1731  */
1732 static inline int vfp_advance_sreg(int reg, int delta)
1733 {
1734     return ((reg + delta) & 0x7) | (reg & ~0x7);
1735 }
1736
1737 /*
1738  * Advance the D reg number forwards by delta within its bank
1739  * (ie increment the low 2 bits but leave the rest the same)
1740  */
1741 static inline int vfp_advance_dreg(int reg, int delta)
1742 {
1743     return ((reg + delta) & 0x3) | (reg & ~0x3);
1744 }
1745
1746 /*
1747  * Perform a 3-operand VFP data processing instruction. fn is the
1748  * callback to do the actual operation; this function deals with the
1749  * code to handle looping around for VFP vector processing.
1750  */
1751 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1752                           int vd, int vn, int vm, bool reads_vd)
1753 {
1754     uint32_t delta_m = 0;
1755     uint32_t delta_d = 0;
1756     int veclen = s->vec_len;
1757     TCGv_i32 f0, f1, fd;
1758     TCGv_ptr fpst;
1759
1760     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1761         return false;
1762     }
1763
1764     if (!dc_isar_feature(aa32_fpshvec, s) &&
1765         (veclen != 0 || s->vec_stride != 0)) {
1766         return false;
1767     }
1768
1769     if (!vfp_access_check(s)) {
1770         return true;
1771     }
1772
1773     if (veclen > 0) {
1774         /* Figure out what type of vector operation this is.  */
1775         if (vfp_sreg_is_scalar(vd)) {
1776             /* scalar */
1777             veclen = 0;
1778         } else {
1779             delta_d = s->vec_stride + 1;
1780
1781             if (vfp_sreg_is_scalar(vm)) {
1782                 /* mixed scalar/vector */
1783                 delta_m = 0;
1784             } else {
1785                 /* vector */
1786                 delta_m = delta_d;
1787             }
1788         }
1789     }
1790
1791     f0 = tcg_temp_new_i32();
1792     f1 = tcg_temp_new_i32();
1793     fd = tcg_temp_new_i32();
1794     fpst = fpstatus_ptr(FPST_FPCR);
1795
1796     vfp_load_reg32(f0, vn);
1797     vfp_load_reg32(f1, vm);
1798
1799     for (;;) {
1800         if (reads_vd) {
1801             vfp_load_reg32(fd, vd);
1802         }
1803         fn(fd, f0, f1, fpst);
1804         vfp_store_reg32(fd, vd);
1805
1806         if (veclen == 0) {
1807             break;
1808         }
1809
1810         /* Set up the operands for the next iteration */
1811         veclen--;
1812         vd = vfp_advance_sreg(vd, delta_d);
1813         vn = vfp_advance_sreg(vn, delta_d);
1814         vfp_load_reg32(f0, vn);
1815         if (delta_m) {
1816             vm = vfp_advance_sreg(vm, delta_m);
1817             vfp_load_reg32(f1, vm);
1818         }
1819     }
1820
1821     tcg_temp_free_i32(f0);
1822     tcg_temp_free_i32(f1);
1823     tcg_temp_free_i32(fd);
1824     tcg_temp_free_ptr(fpst);
1825
1826     return true;
1827 }
1828
1829 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1830                           int vd, int vn, int vm, bool reads_vd)
1831 {
1832     /*
1833      * Do a half-precision operation. Functionally this is
1834      * the same as do_vfp_3op_sp(), except:
1835      *  - it uses the FPST_FPCR_F16
1836      *  - it doesn't need the VFP vector handling (fp16 is a
1837      *    v8 feature, and in v8 VFP vectors don't exist)
1838      *  - it does the aa32_fp16_arith feature test
1839      */
1840     TCGv_i32 f0, f1, fd;
1841     TCGv_ptr fpst;
1842
1843     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1844         return false;
1845     }
1846
1847     if (s->vec_len != 0 || s->vec_stride != 0) {
1848         return false;
1849     }
1850
1851     if (!vfp_access_check(s)) {
1852         return true;
1853     }
1854
1855     f0 = tcg_temp_new_i32();
1856     f1 = tcg_temp_new_i32();
1857     fd = tcg_temp_new_i32();
1858     fpst = fpstatus_ptr(FPST_FPCR_F16);
1859
1860     vfp_load_reg32(f0, vn);
1861     vfp_load_reg32(f1, vm);
1862
1863     if (reads_vd) {
1864         vfp_load_reg32(fd, vd);
1865     }
1866     fn(fd, f0, f1, fpst);
1867     vfp_store_reg32(fd, vd);
1868
1869     tcg_temp_free_i32(f0);
1870     tcg_temp_free_i32(f1);
1871     tcg_temp_free_i32(fd);
1872     tcg_temp_free_ptr(fpst);
1873
1874     return true;
1875 }
1876
1877 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1878                           int vd, int vn, int vm, bool reads_vd)
1879 {
1880     uint32_t delta_m = 0;
1881     uint32_t delta_d = 0;
1882     int veclen = s->vec_len;
1883     TCGv_i64 f0, f1, fd;
1884     TCGv_ptr fpst;
1885
1886     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1887         return false;
1888     }
1889
1890     /* UNDEF accesses to D16-D31 if they don't exist */
1891     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1892         return false;
1893     }
1894
1895     if (!dc_isar_feature(aa32_fpshvec, s) &&
1896         (veclen != 0 || s->vec_stride != 0)) {
1897         return false;
1898     }
1899
1900     if (!vfp_access_check(s)) {
1901         return true;
1902     }
1903
1904     if (veclen > 0) {
1905         /* Figure out what type of vector operation this is.  */
1906         if (vfp_dreg_is_scalar(vd)) {
1907             /* scalar */
1908             veclen = 0;
1909         } else {
1910             delta_d = (s->vec_stride >> 1) + 1;
1911
1912             if (vfp_dreg_is_scalar(vm)) {
1913                 /* mixed scalar/vector */
1914                 delta_m = 0;
1915             } else {
1916                 /* vector */
1917                 delta_m = delta_d;
1918             }
1919         }
1920     }
1921
1922     f0 = tcg_temp_new_i64();
1923     f1 = tcg_temp_new_i64();
1924     fd = tcg_temp_new_i64();
1925     fpst = fpstatus_ptr(FPST_FPCR);
1926
1927     vfp_load_reg64(f0, vn);
1928     vfp_load_reg64(f1, vm);
1929
1930     for (;;) {
1931         if (reads_vd) {
1932             vfp_load_reg64(fd, vd);
1933         }
1934         fn(fd, f0, f1, fpst);
1935         vfp_store_reg64(fd, vd);
1936
1937         if (veclen == 0) {
1938             break;
1939         }
1940         /* Set up the operands for the next iteration */
1941         veclen--;
1942         vd = vfp_advance_dreg(vd, delta_d);
1943         vn = vfp_advance_dreg(vn, delta_d);
1944         vfp_load_reg64(f0, vn);
1945         if (delta_m) {
1946             vm = vfp_advance_dreg(vm, delta_m);
1947             vfp_load_reg64(f1, vm);
1948         }
1949     }
1950
1951     tcg_temp_free_i64(f0);
1952     tcg_temp_free_i64(f1);
1953     tcg_temp_free_i64(fd);
1954     tcg_temp_free_ptr(fpst);
1955
1956     return true;
1957 }
1958
1959 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1960 {
1961     uint32_t delta_m = 0;
1962     uint32_t delta_d = 0;
1963     int veclen = s->vec_len;
1964     TCGv_i32 f0, fd;
1965
1966     /* Note that the caller must check the aa32_fpsp_v2 feature. */
1967
1968     if (!dc_isar_feature(aa32_fpshvec, s) &&
1969         (veclen != 0 || s->vec_stride != 0)) {
1970         return false;
1971     }
1972
1973     if (!vfp_access_check(s)) {
1974         return true;
1975     }
1976
1977     if (veclen > 0) {
1978         /* Figure out what type of vector operation this is.  */
1979         if (vfp_sreg_is_scalar(vd)) {
1980             /* scalar */
1981             veclen = 0;
1982         } else {
1983             delta_d = s->vec_stride + 1;
1984
1985             if (vfp_sreg_is_scalar(vm)) {
1986                 /* mixed scalar/vector */
1987                 delta_m = 0;
1988             } else {
1989                 /* vector */
1990                 delta_m = delta_d;
1991             }
1992         }
1993     }
1994
1995     f0 = tcg_temp_new_i32();
1996     fd = tcg_temp_new_i32();
1997
1998     vfp_load_reg32(f0, vm);
1999
2000     for (;;) {
2001         fn(fd, f0);
2002         vfp_store_reg32(fd, vd);
2003
2004         if (veclen == 0) {
2005             break;
2006         }
2007
2008         if (delta_m == 0) {
2009             /* single source one-many */
2010             while (veclen--) {
2011                 vd = vfp_advance_sreg(vd, delta_d);
2012                 vfp_store_reg32(fd, vd);
2013             }
2014             break;
2015         }
2016
2017         /* Set up the operands for the next iteration */
2018         veclen--;
2019         vd = vfp_advance_sreg(vd, delta_d);
2020         vm = vfp_advance_sreg(vm, delta_m);
2021         vfp_load_reg32(f0, vm);
2022     }
2023
2024     tcg_temp_free_i32(f0);
2025     tcg_temp_free_i32(fd);
2026
2027     return true;
2028 }
2029
2030 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
2031 {
2032     /*
2033      * Do a half-precision operation. Functionally this is
2034      * the same as do_vfp_2op_sp(), except:
2035      *  - it doesn't need the VFP vector handling (fp16 is a
2036      *    v8 feature, and in v8 VFP vectors don't exist)
2037      *  - it does the aa32_fp16_arith feature test
2038      */
2039     TCGv_i32 f0;
2040
2041     /* Note that the caller must check the aa32_fp16_arith feature */
2042
2043     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2044         return false;
2045     }
2046
2047     if (s->vec_len != 0 || s->vec_stride != 0) {
2048         return false;
2049     }
2050
2051     if (!vfp_access_check(s)) {
2052         return true;
2053     }
2054
2055     f0 = tcg_temp_new_i32();
2056     vfp_load_reg32(f0, vm);
2057     fn(f0, f0);
2058     vfp_store_reg32(f0, vd);
2059     tcg_temp_free_i32(f0);
2060
2061     return true;
2062 }
2063
2064 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
2065 {
2066     uint32_t delta_m = 0;
2067     uint32_t delta_d = 0;
2068     int veclen = s->vec_len;
2069     TCGv_i64 f0, fd;
2070
2071     /* Note that the caller must check the aa32_fpdp_v2 feature. */
2072
2073     /* UNDEF accesses to D16-D31 if they don't exist */
2074     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
2075         return false;
2076     }
2077
2078     if (!dc_isar_feature(aa32_fpshvec, s) &&
2079         (veclen != 0 || s->vec_stride != 0)) {
2080         return false;
2081     }
2082
2083     if (!vfp_access_check(s)) {
2084         return true;
2085     }
2086
2087     if (veclen > 0) {
2088         /* Figure out what type of vector operation this is.  */
2089         if (vfp_dreg_is_scalar(vd)) {
2090             /* scalar */
2091             veclen = 0;
2092         } else {
2093             delta_d = (s->vec_stride >> 1) + 1;
2094
2095             if (vfp_dreg_is_scalar(vm)) {
2096                 /* mixed scalar/vector */
2097                 delta_m = 0;
2098             } else {
2099                 /* vector */
2100                 delta_m = delta_d;
2101             }
2102         }
2103     }
2104
2105     f0 = tcg_temp_new_i64();
2106     fd = tcg_temp_new_i64();
2107
2108     vfp_load_reg64(f0, vm);
2109
2110     for (;;) {
2111         fn(fd, f0);
2112         vfp_store_reg64(fd, vd);
2113
2114         if (veclen == 0) {
2115             break;
2116         }
2117
2118         if (delta_m == 0) {
2119             /* single source one-many */
2120             while (veclen--) {
2121                 vd = vfp_advance_dreg(vd, delta_d);
2122                 vfp_store_reg64(fd, vd);
2123             }
2124             break;
2125         }
2126
2127         /* Set up the operands for the next iteration */
2128         veclen--;
2129         vd = vfp_advance_dreg(vd, delta_d);
2130         vd = vfp_advance_dreg(vm, delta_m);
2131         vfp_load_reg64(f0, vm);
2132     }
2133
2134     tcg_temp_free_i64(f0);
2135     tcg_temp_free_i64(fd);
2136
2137     return true;
2138 }
2139
2140 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2141 {
2142     /* Note that order of inputs to the add matters for NaNs */
2143     TCGv_i32 tmp = tcg_temp_new_i32();
2144
2145     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2146     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2147     tcg_temp_free_i32(tmp);
2148 }
2149
2150 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
2151 {
2152     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
2153 }
2154
2155 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2156 {
2157     /* Note that order of inputs to the add matters for NaNs */
2158     TCGv_i32 tmp = tcg_temp_new_i32();
2159
2160     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2161     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2162     tcg_temp_free_i32(tmp);
2163 }
2164
2165 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
2166 {
2167     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
2168 }
2169
2170 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2171 {
2172     /* Note that order of inputs to the add matters for NaNs */
2173     TCGv_i64 tmp = tcg_temp_new_i64();
2174
2175     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2176     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2177     tcg_temp_free_i64(tmp);
2178 }
2179
2180 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
2181 {
2182     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
2183 }
2184
2185 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2186 {
2187     /*
2188      * VMLS: vd = vd + -(vn * vm)
2189      * Note that order of inputs to the add matters for NaNs.
2190      */
2191     TCGv_i32 tmp = tcg_temp_new_i32();
2192
2193     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2194     gen_helper_vfp_negh(tmp, tmp);
2195     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2196     tcg_temp_free_i32(tmp);
2197 }
2198
2199 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
2200 {
2201     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
2202 }
2203
2204 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2205 {
2206     /*
2207      * VMLS: vd = vd + -(vn * vm)
2208      * Note that order of inputs to the add matters for NaNs.
2209      */
2210     TCGv_i32 tmp = tcg_temp_new_i32();
2211
2212     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2213     gen_helper_vfp_negs(tmp, tmp);
2214     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2215     tcg_temp_free_i32(tmp);
2216 }
2217
2218 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
2219 {
2220     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
2221 }
2222
2223 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2224 {
2225     /*
2226      * VMLS: vd = vd + -(vn * vm)
2227      * Note that order of inputs to the add matters for NaNs.
2228      */
2229     TCGv_i64 tmp = tcg_temp_new_i64();
2230
2231     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2232     gen_helper_vfp_negd(tmp, tmp);
2233     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2234     tcg_temp_free_i64(tmp);
2235 }
2236
2237 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
2238 {
2239     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
2240 }
2241
2242 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2243 {
2244     /*
2245      * VNMLS: -fd + (fn * fm)
2246      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2247      * plausible looking simplifications because this will give wrong results
2248      * for NaNs.
2249      */
2250     TCGv_i32 tmp = tcg_temp_new_i32();
2251
2252     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2253     gen_helper_vfp_negh(vd, vd);
2254     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2255     tcg_temp_free_i32(tmp);
2256 }
2257
2258 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
2259 {
2260     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
2261 }
2262
2263 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2264 {
2265     /*
2266      * VNMLS: -fd + (fn * fm)
2267      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2268      * plausible looking simplifications because this will give wrong results
2269      * for NaNs.
2270      */
2271     TCGv_i32 tmp = tcg_temp_new_i32();
2272
2273     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2274     gen_helper_vfp_negs(vd, vd);
2275     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2276     tcg_temp_free_i32(tmp);
2277 }
2278
2279 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
2280 {
2281     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
2282 }
2283
2284 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2285 {
2286     /*
2287      * VNMLS: -fd + (fn * fm)
2288      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2289      * plausible looking simplifications because this will give wrong results
2290      * for NaNs.
2291      */
2292     TCGv_i64 tmp = tcg_temp_new_i64();
2293
2294     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2295     gen_helper_vfp_negd(vd, vd);
2296     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2297     tcg_temp_free_i64(tmp);
2298 }
2299
2300 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
2301 {
2302     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
2303 }
2304
2305 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2306 {
2307     /* VNMLA: -fd + -(fn * fm) */
2308     TCGv_i32 tmp = tcg_temp_new_i32();
2309
2310     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2311     gen_helper_vfp_negh(tmp, tmp);
2312     gen_helper_vfp_negh(vd, vd);
2313     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2314     tcg_temp_free_i32(tmp);
2315 }
2316
2317 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
2318 {
2319     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
2320 }
2321
2322 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2323 {
2324     /* VNMLA: -fd + -(fn * fm) */
2325     TCGv_i32 tmp = tcg_temp_new_i32();
2326
2327     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2328     gen_helper_vfp_negs(tmp, tmp);
2329     gen_helper_vfp_negs(vd, vd);
2330     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2331     tcg_temp_free_i32(tmp);
2332 }
2333
2334 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2335 {
2336     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2337 }
2338
2339 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2340 {
2341     /* VNMLA: -fd + (fn * fm) */
2342     TCGv_i64 tmp = tcg_temp_new_i64();
2343
2344     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2345     gen_helper_vfp_negd(tmp, tmp);
2346     gen_helper_vfp_negd(vd, vd);
2347     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2348     tcg_temp_free_i64(tmp);
2349 }
2350
2351 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2352 {
2353     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2354 }
2355
2356 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2357 {
2358     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2359 }
2360
2361 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2362 {
2363     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2364 }
2365
2366 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2367 {
2368     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2369 }
2370
2371 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2372 {
2373     /* VNMUL: -(fn * fm) */
2374     gen_helper_vfp_mulh(vd, vn, vm, fpst);
2375     gen_helper_vfp_negh(vd, vd);
2376 }
2377
2378 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2379 {
2380     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2381 }
2382
2383 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2384 {
2385     /* VNMUL: -(fn * fm) */
2386     gen_helper_vfp_muls(vd, vn, vm, fpst);
2387     gen_helper_vfp_negs(vd, vd);
2388 }
2389
2390 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2391 {
2392     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2393 }
2394
2395 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2396 {
2397     /* VNMUL: -(fn * fm) */
2398     gen_helper_vfp_muld(vd, vn, vm, fpst);
2399     gen_helper_vfp_negd(vd, vd);
2400 }
2401
2402 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2403 {
2404     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2405 }
2406
2407 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2408 {
2409     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2410 }
2411
2412 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2413 {
2414     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2415 }
2416
2417 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2418 {
2419     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2420 }
2421
2422 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2423 {
2424     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2425 }
2426
2427 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2428 {
2429     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2430 }
2431
2432 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2433 {
2434     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2435 }
2436
2437 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2438 {
2439     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2440 }
2441
2442 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2443 {
2444     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2445 }
2446
2447 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2448 {
2449     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2450 }
2451
2452 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2453 {
2454     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2455         return false;
2456     }
2457     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2458                          a->vd, a->vn, a->vm, false);
2459 }
2460
2461 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2462 {
2463     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2464         return false;
2465     }
2466     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2467                          a->vd, a->vn, a->vm, false);
2468 }
2469
2470 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2471 {
2472     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2473         return false;
2474     }
2475     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2476                          a->vd, a->vn, a->vm, false);
2477 }
2478
2479 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2480 {
2481     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2482         return false;
2483     }
2484     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2485                          a->vd, a->vn, a->vm, false);
2486 }
2487
2488 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2489 {
2490     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2491         return false;
2492     }
2493     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2494                          a->vd, a->vn, a->vm, false);
2495 }
2496
2497 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2498 {
2499     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2500         return false;
2501     }
2502     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2503                          a->vd, a->vn, a->vm, false);
2504 }
2505
2506 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2507 {
2508     /*
2509      * VFNMA : fd = muladd(-fd,  fn, fm)
2510      * VFNMS : fd = muladd(-fd, -fn, fm)
2511      * VFMA  : fd = muladd( fd,  fn, fm)
2512      * VFMS  : fd = muladd( fd, -fn, fm)
2513      *
2514      * These are fused multiply-add, and must be done as one floating
2515      * point operation with no rounding between the multiplication and
2516      * addition steps.  NB that doing the negations here as separate
2517      * steps is correct : an input NaN should come out with its sign
2518      * bit flipped if it is a negated-input.
2519      */
2520     TCGv_ptr fpst;
2521     TCGv_i32 vn, vm, vd;
2522
2523     /*
2524      * Present in VFPv4 only, and only with the FP16 extension.
2525      * Note that we can't rely on the SIMDFMAC check alone, because
2526      * in a Neon-no-VFP core that ID register field will be non-zero.
2527      */
2528     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2529         !dc_isar_feature(aa32_simdfmac, s) ||
2530         !dc_isar_feature(aa32_fpsp_v2, s)) {
2531         return false;
2532     }
2533
2534     if (s->vec_len != 0 || s->vec_stride != 0) {
2535         return false;
2536     }
2537
2538     if (!vfp_access_check(s)) {
2539         return true;
2540     }
2541
2542     vn = tcg_temp_new_i32();
2543     vm = tcg_temp_new_i32();
2544     vd = tcg_temp_new_i32();
2545
2546     vfp_load_reg32(vn, a->vn);
2547     vfp_load_reg32(vm, a->vm);
2548     if (neg_n) {
2549         /* VFNMS, VFMS */
2550         gen_helper_vfp_negh(vn, vn);
2551     }
2552     vfp_load_reg32(vd, a->vd);
2553     if (neg_d) {
2554         /* VFNMA, VFNMS */
2555         gen_helper_vfp_negh(vd, vd);
2556     }
2557     fpst = fpstatus_ptr(FPST_FPCR_F16);
2558     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2559     vfp_store_reg32(vd, a->vd);
2560
2561     tcg_temp_free_ptr(fpst);
2562     tcg_temp_free_i32(vn);
2563     tcg_temp_free_i32(vm);
2564     tcg_temp_free_i32(vd);
2565
2566     return true;
2567 }
2568
2569 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2570 {
2571     /*
2572      * VFNMA : fd = muladd(-fd,  fn, fm)
2573      * VFNMS : fd = muladd(-fd, -fn, fm)
2574      * VFMA  : fd = muladd( fd,  fn, fm)
2575      * VFMS  : fd = muladd( fd, -fn, fm)
2576      *
2577      * These are fused multiply-add, and must be done as one floating
2578      * point operation with no rounding between the multiplication and
2579      * addition steps.  NB that doing the negations here as separate
2580      * steps is correct : an input NaN should come out with its sign
2581      * bit flipped if it is a negated-input.
2582      */
2583     TCGv_ptr fpst;
2584     TCGv_i32 vn, vm, vd;
2585
2586     /*
2587      * Present in VFPv4 only.
2588      * Note that we can't rely on the SIMDFMAC check alone, because
2589      * in a Neon-no-VFP core that ID register field will be non-zero.
2590      */
2591     if (!dc_isar_feature(aa32_simdfmac, s) ||
2592         !dc_isar_feature(aa32_fpsp_v2, s)) {
2593         return false;
2594     }
2595     /*
2596      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2597      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2598      */
2599     if (s->vec_len != 0 || s->vec_stride != 0) {
2600         return false;
2601     }
2602
2603     if (!vfp_access_check(s)) {
2604         return true;
2605     }
2606
2607     vn = tcg_temp_new_i32();
2608     vm = tcg_temp_new_i32();
2609     vd = tcg_temp_new_i32();
2610
2611     vfp_load_reg32(vn, a->vn);
2612     vfp_load_reg32(vm, a->vm);
2613     if (neg_n) {
2614         /* VFNMS, VFMS */
2615         gen_helper_vfp_negs(vn, vn);
2616     }
2617     vfp_load_reg32(vd, a->vd);
2618     if (neg_d) {
2619         /* VFNMA, VFNMS */
2620         gen_helper_vfp_negs(vd, vd);
2621     }
2622     fpst = fpstatus_ptr(FPST_FPCR);
2623     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2624     vfp_store_reg32(vd, a->vd);
2625
2626     tcg_temp_free_ptr(fpst);
2627     tcg_temp_free_i32(vn);
2628     tcg_temp_free_i32(vm);
2629     tcg_temp_free_i32(vd);
2630
2631     return true;
2632 }
2633
2634 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2635 {
2636     /*
2637      * VFNMA : fd = muladd(-fd,  fn, fm)
2638      * VFNMS : fd = muladd(-fd, -fn, fm)
2639      * VFMA  : fd = muladd( fd,  fn, fm)
2640      * VFMS  : fd = muladd( fd, -fn, fm)
2641      *
2642      * These are fused multiply-add, and must be done as one floating
2643      * point operation with no rounding between the multiplication and
2644      * addition steps.  NB that doing the negations here as separate
2645      * steps is correct : an input NaN should come out with its sign
2646      * bit flipped if it is a negated-input.
2647      */
2648     TCGv_ptr fpst;
2649     TCGv_i64 vn, vm, vd;
2650
2651     /*
2652      * Present in VFPv4 only.
2653      * Note that we can't rely on the SIMDFMAC check alone, because
2654      * in a Neon-no-VFP core that ID register field will be non-zero.
2655      */
2656     if (!dc_isar_feature(aa32_simdfmac, s) ||
2657         !dc_isar_feature(aa32_fpdp_v2, s)) {
2658         return false;
2659     }
2660     /*
2661      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2662      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2663      */
2664     if (s->vec_len != 0 || s->vec_stride != 0) {
2665         return false;
2666     }
2667
2668     /* UNDEF accesses to D16-D31 if they don't exist. */
2669     if (!dc_isar_feature(aa32_simd_r32, s) &&
2670         ((a->vd | a->vn | a->vm) & 0x10)) {
2671         return false;
2672     }
2673
2674     if (!vfp_access_check(s)) {
2675         return true;
2676     }
2677
2678     vn = tcg_temp_new_i64();
2679     vm = tcg_temp_new_i64();
2680     vd = tcg_temp_new_i64();
2681
2682     vfp_load_reg64(vn, a->vn);
2683     vfp_load_reg64(vm, a->vm);
2684     if (neg_n) {
2685         /* VFNMS, VFMS */
2686         gen_helper_vfp_negd(vn, vn);
2687     }
2688     vfp_load_reg64(vd, a->vd);
2689     if (neg_d) {
2690         /* VFNMA, VFNMS */
2691         gen_helper_vfp_negd(vd, vd);
2692     }
2693     fpst = fpstatus_ptr(FPST_FPCR);
2694     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2695     vfp_store_reg64(vd, a->vd);
2696
2697     tcg_temp_free_ptr(fpst);
2698     tcg_temp_free_i64(vn);
2699     tcg_temp_free_i64(vm);
2700     tcg_temp_free_i64(vd);
2701
2702     return true;
2703 }
2704
2705 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2706     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2707                                       arg_##INSN##_##PREC *a)           \
2708     {                                                                   \
2709         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2710     }
2711
2712 #define MAKE_VFM_TRANS_FNS(PREC) \
2713     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2714     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2715     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2716     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2717
2718 MAKE_VFM_TRANS_FNS(hp)
2719 MAKE_VFM_TRANS_FNS(sp)
2720 MAKE_VFM_TRANS_FNS(dp)
2721
2722 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2723 {
2724     TCGv_i32 fd;
2725
2726     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2727         return false;
2728     }
2729
2730     if (s->vec_len != 0 || s->vec_stride != 0) {
2731         return false;
2732     }
2733
2734     if (!vfp_access_check(s)) {
2735         return true;
2736     }
2737
2738     fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2739     vfp_store_reg32(fd, a->vd);
2740     tcg_temp_free_i32(fd);
2741     return true;
2742 }
2743
2744 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2745 {
2746     uint32_t delta_d = 0;
2747     int veclen = s->vec_len;
2748     TCGv_i32 fd;
2749     uint32_t vd;
2750
2751     vd = a->vd;
2752
2753     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2754         return false;
2755     }
2756
2757     if (!dc_isar_feature(aa32_fpshvec, s) &&
2758         (veclen != 0 || s->vec_stride != 0)) {
2759         return false;
2760     }
2761
2762     if (!vfp_access_check(s)) {
2763         return true;
2764     }
2765
2766     if (veclen > 0) {
2767         /* Figure out what type of vector operation this is.  */
2768         if (vfp_sreg_is_scalar(vd)) {
2769             /* scalar */
2770             veclen = 0;
2771         } else {
2772             delta_d = s->vec_stride + 1;
2773         }
2774     }
2775
2776     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2777
2778     for (;;) {
2779         vfp_store_reg32(fd, vd);
2780
2781         if (veclen == 0) {
2782             break;
2783         }
2784
2785         /* Set up the operands for the next iteration */
2786         veclen--;
2787         vd = vfp_advance_sreg(vd, delta_d);
2788     }
2789
2790     tcg_temp_free_i32(fd);
2791     return true;
2792 }
2793
2794 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2795 {
2796     uint32_t delta_d = 0;
2797     int veclen = s->vec_len;
2798     TCGv_i64 fd;
2799     uint32_t vd;
2800
2801     vd = a->vd;
2802
2803     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2804         return false;
2805     }
2806
2807     /* UNDEF accesses to D16-D31 if they don't exist. */
2808     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2809         return false;
2810     }
2811
2812     if (!dc_isar_feature(aa32_fpshvec, s) &&
2813         (veclen != 0 || s->vec_stride != 0)) {
2814         return false;
2815     }
2816
2817     if (!vfp_access_check(s)) {
2818         return true;
2819     }
2820
2821     if (veclen > 0) {
2822         /* Figure out what type of vector operation this is.  */
2823         if (vfp_dreg_is_scalar(vd)) {
2824             /* scalar */
2825             veclen = 0;
2826         } else {
2827             delta_d = (s->vec_stride >> 1) + 1;
2828         }
2829     }
2830
2831     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2832
2833     for (;;) {
2834         vfp_store_reg64(fd, vd);
2835
2836         if (veclen == 0) {
2837             break;
2838         }
2839
2840         /* Set up the operands for the next iteration */
2841         veclen--;
2842         vd = vfp_advance_dreg(vd, delta_d);
2843     }
2844
2845     tcg_temp_free_i64(fd);
2846     return true;
2847 }
2848
2849 #define DO_VFP_2OP(INSN, PREC, FN, CHECK)                       \
2850     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2851                                       arg_##INSN##_##PREC *a)   \
2852     {                                                           \
2853         if (!dc_isar_feature(CHECK, s)) {                       \
2854             return false;                                       \
2855         }                                                       \
2856         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2857     }
2858
2859 #define DO_VFP_VMOV(INSN, PREC, FN)                             \
2860     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2861                                       arg_##INSN##_##PREC *a)   \
2862     {                                                           \
2863         if (!dc_isar_feature(aa32_fp##PREC##_v2, s) &&          \
2864             !dc_isar_feature(aa32_mve, s)) {                    \
2865             return false;                                       \
2866         }                                                       \
2867         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2868     }
2869
2870 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32)
2871 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64)
2872
2873 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith)
2874 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2)
2875 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2)
2876
2877 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith)
2878 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2)
2879 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2)
2880
2881 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2882 {
2883     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2884 }
2885
2886 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2887 {
2888     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2889 }
2890
2891 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2892 {
2893     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2894 }
2895
2896 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
2897 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2)
2898 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2)
2899
2900 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2901 {
2902     TCGv_i32 vd, vm;
2903
2904     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2905         return false;
2906     }
2907
2908     /* Vm/M bits must be zero for the Z variant */
2909     if (a->z && a->vm != 0) {
2910         return false;
2911     }
2912
2913     if (!vfp_access_check(s)) {
2914         return true;
2915     }
2916
2917     vd = tcg_temp_new_i32();
2918     vm = tcg_temp_new_i32();
2919
2920     vfp_load_reg32(vd, a->vd);
2921     if (a->z) {
2922         tcg_gen_movi_i32(vm, 0);
2923     } else {
2924         vfp_load_reg32(vm, a->vm);
2925     }
2926
2927     if (a->e) {
2928         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2929     } else {
2930         gen_helper_vfp_cmph(vd, vm, cpu_env);
2931     }
2932
2933     tcg_temp_free_i32(vd);
2934     tcg_temp_free_i32(vm);
2935
2936     return true;
2937 }
2938
2939 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2940 {
2941     TCGv_i32 vd, vm;
2942
2943     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2944         return false;
2945     }
2946
2947     /* Vm/M bits must be zero for the Z variant */
2948     if (a->z && a->vm != 0) {
2949         return false;
2950     }
2951
2952     if (!vfp_access_check(s)) {
2953         return true;
2954     }
2955
2956     vd = tcg_temp_new_i32();
2957     vm = tcg_temp_new_i32();
2958
2959     vfp_load_reg32(vd, a->vd);
2960     if (a->z) {
2961         tcg_gen_movi_i32(vm, 0);
2962     } else {
2963         vfp_load_reg32(vm, a->vm);
2964     }
2965
2966     if (a->e) {
2967         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2968     } else {
2969         gen_helper_vfp_cmps(vd, vm, cpu_env);
2970     }
2971
2972     tcg_temp_free_i32(vd);
2973     tcg_temp_free_i32(vm);
2974
2975     return true;
2976 }
2977
2978 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2979 {
2980     TCGv_i64 vd, vm;
2981
2982     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2983         return false;
2984     }
2985
2986     /* Vm/M bits must be zero for the Z variant */
2987     if (a->z && a->vm != 0) {
2988         return false;
2989     }
2990
2991     /* UNDEF accesses to D16-D31 if they don't exist. */
2992     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2993         return false;
2994     }
2995
2996     if (!vfp_access_check(s)) {
2997         return true;
2998     }
2999
3000     vd = tcg_temp_new_i64();
3001     vm = tcg_temp_new_i64();
3002
3003     vfp_load_reg64(vd, a->vd);
3004     if (a->z) {
3005         tcg_gen_movi_i64(vm, 0);
3006     } else {
3007         vfp_load_reg64(vm, a->vm);
3008     }
3009
3010     if (a->e) {
3011         gen_helper_vfp_cmped(vd, vm, cpu_env);
3012     } else {
3013         gen_helper_vfp_cmpd(vd, vm, cpu_env);
3014     }
3015
3016     tcg_temp_free_i64(vd);
3017     tcg_temp_free_i64(vm);
3018
3019     return true;
3020 }
3021
3022 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
3023 {
3024     TCGv_ptr fpst;
3025     TCGv_i32 ahp_mode;
3026     TCGv_i32 tmp;
3027
3028     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
3029         return false;
3030     }
3031
3032     if (!vfp_access_check(s)) {
3033         return true;
3034     }
3035
3036     fpst = fpstatus_ptr(FPST_FPCR);
3037     ahp_mode = get_ahp_flag();
3038     tmp = tcg_temp_new_i32();
3039     /* The T bit tells us if we want the low or high 16 bits of Vm */
3040     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
3041     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
3042     vfp_store_reg32(tmp, a->vd);
3043     tcg_temp_free_i32(ahp_mode);
3044     tcg_temp_free_ptr(fpst);
3045     tcg_temp_free_i32(tmp);
3046     return true;
3047 }
3048
3049 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
3050 {
3051     TCGv_ptr fpst;
3052     TCGv_i32 ahp_mode;
3053     TCGv_i32 tmp;
3054     TCGv_i64 vd;
3055
3056     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3057         return false;
3058     }
3059
3060     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3061         return false;
3062     }
3063
3064     /* UNDEF accesses to D16-D31 if they don't exist. */
3065     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
3066         return false;
3067     }
3068
3069     if (!vfp_access_check(s)) {
3070         return true;
3071     }
3072
3073     fpst = fpstatus_ptr(FPST_FPCR);
3074     ahp_mode = get_ahp_flag();
3075     tmp = tcg_temp_new_i32();
3076     /* The T bit tells us if we want the low or high 16 bits of Vm */
3077     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
3078     vd = tcg_temp_new_i64();
3079     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
3080     vfp_store_reg64(vd, a->vd);
3081     tcg_temp_free_i32(ahp_mode);
3082     tcg_temp_free_ptr(fpst);
3083     tcg_temp_free_i32(tmp);
3084     tcg_temp_free_i64(vd);
3085     return true;
3086 }
3087
3088 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
3089 {
3090     TCGv_ptr fpst;
3091     TCGv_i32 ahp_mode;
3092     TCGv_i32 tmp;
3093
3094     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
3095         return false;
3096     }
3097
3098     if (!vfp_access_check(s)) {
3099         return true;
3100     }
3101
3102     fpst = fpstatus_ptr(FPST_FPCR);
3103     ahp_mode = get_ahp_flag();
3104     tmp = tcg_temp_new_i32();
3105
3106     vfp_load_reg32(tmp, a->vm);
3107     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
3108     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3109     tcg_temp_free_i32(ahp_mode);
3110     tcg_temp_free_ptr(fpst);
3111     tcg_temp_free_i32(tmp);
3112     return true;
3113 }
3114
3115 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
3116 {
3117     TCGv_ptr fpst;
3118     TCGv_i32 ahp_mode;
3119     TCGv_i32 tmp;
3120     TCGv_i64 vm;
3121
3122     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3123         return false;
3124     }
3125
3126     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3127         return false;
3128     }
3129
3130     /* UNDEF accesses to D16-D31 if they don't exist. */
3131     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
3132         return false;
3133     }
3134
3135     if (!vfp_access_check(s)) {
3136         return true;
3137     }
3138
3139     fpst = fpstatus_ptr(FPST_FPCR);
3140     ahp_mode = get_ahp_flag();
3141     tmp = tcg_temp_new_i32();
3142     vm = tcg_temp_new_i64();
3143
3144     vfp_load_reg64(vm, a->vm);
3145     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
3146     tcg_temp_free_i64(vm);
3147     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3148     tcg_temp_free_i32(ahp_mode);
3149     tcg_temp_free_ptr(fpst);
3150     tcg_temp_free_i32(tmp);
3151     return true;
3152 }
3153
3154 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
3155 {
3156     TCGv_ptr fpst;
3157     TCGv_i32 tmp;
3158
3159     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3160         return false;
3161     }
3162
3163     if (!vfp_access_check(s)) {
3164         return true;
3165     }
3166
3167     tmp = tcg_temp_new_i32();
3168     vfp_load_reg32(tmp, a->vm);
3169     fpst = fpstatus_ptr(FPST_FPCR_F16);
3170     gen_helper_rinth(tmp, tmp, fpst);
3171     vfp_store_reg32(tmp, a->vd);
3172     tcg_temp_free_ptr(fpst);
3173     tcg_temp_free_i32(tmp);
3174     return true;
3175 }
3176
3177 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
3178 {
3179     TCGv_ptr fpst;
3180     TCGv_i32 tmp;
3181
3182     if (!dc_isar_feature(aa32_vrint, s)) {
3183         return false;
3184     }
3185
3186     if (!vfp_access_check(s)) {
3187         return true;
3188     }
3189
3190     tmp = tcg_temp_new_i32();
3191     vfp_load_reg32(tmp, a->vm);
3192     fpst = fpstatus_ptr(FPST_FPCR);
3193     gen_helper_rints(tmp, tmp, fpst);
3194     vfp_store_reg32(tmp, a->vd);
3195     tcg_temp_free_ptr(fpst);
3196     tcg_temp_free_i32(tmp);
3197     return true;
3198 }
3199
3200 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
3201 {
3202     TCGv_ptr fpst;
3203     TCGv_i64 tmp;
3204
3205     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3206         return false;
3207     }
3208
3209     if (!dc_isar_feature(aa32_vrint, s)) {
3210         return false;
3211     }
3212
3213     /* UNDEF accesses to D16-D31 if they don't exist. */
3214     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3215         return false;
3216     }
3217
3218     if (!vfp_access_check(s)) {
3219         return true;
3220     }
3221
3222     tmp = tcg_temp_new_i64();
3223     vfp_load_reg64(tmp, a->vm);
3224     fpst = fpstatus_ptr(FPST_FPCR);
3225     gen_helper_rintd(tmp, tmp, fpst);
3226     vfp_store_reg64(tmp, a->vd);
3227     tcg_temp_free_ptr(fpst);
3228     tcg_temp_free_i64(tmp);
3229     return true;
3230 }
3231
3232 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
3233 {
3234     TCGv_ptr fpst;
3235     TCGv_i32 tmp;
3236     TCGv_i32 tcg_rmode;
3237
3238     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3239         return false;
3240     }
3241
3242     if (!vfp_access_check(s)) {
3243         return true;
3244     }
3245
3246     tmp = tcg_temp_new_i32();
3247     vfp_load_reg32(tmp, a->vm);
3248     fpst = fpstatus_ptr(FPST_FPCR_F16);
3249     tcg_rmode = tcg_const_i32(float_round_to_zero);
3250     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3251     gen_helper_rinth(tmp, tmp, fpst);
3252     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3253     vfp_store_reg32(tmp, a->vd);
3254     tcg_temp_free_ptr(fpst);
3255     tcg_temp_free_i32(tcg_rmode);
3256     tcg_temp_free_i32(tmp);
3257     return true;
3258 }
3259
3260 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
3261 {
3262     TCGv_ptr fpst;
3263     TCGv_i32 tmp;
3264     TCGv_i32 tcg_rmode;
3265
3266     if (!dc_isar_feature(aa32_vrint, s)) {
3267         return false;
3268     }
3269
3270     if (!vfp_access_check(s)) {
3271         return true;
3272     }
3273
3274     tmp = tcg_temp_new_i32();
3275     vfp_load_reg32(tmp, a->vm);
3276     fpst = fpstatus_ptr(FPST_FPCR);
3277     tcg_rmode = tcg_const_i32(float_round_to_zero);
3278     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3279     gen_helper_rints(tmp, tmp, fpst);
3280     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3281     vfp_store_reg32(tmp, a->vd);
3282     tcg_temp_free_ptr(fpst);
3283     tcg_temp_free_i32(tcg_rmode);
3284     tcg_temp_free_i32(tmp);
3285     return true;
3286 }
3287
3288 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
3289 {
3290     TCGv_ptr fpst;
3291     TCGv_i64 tmp;
3292     TCGv_i32 tcg_rmode;
3293
3294     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3295         return false;
3296     }
3297
3298     if (!dc_isar_feature(aa32_vrint, s)) {
3299         return false;
3300     }
3301
3302     /* UNDEF accesses to D16-D31 if they don't exist. */
3303     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3304         return false;
3305     }
3306
3307     if (!vfp_access_check(s)) {
3308         return true;
3309     }
3310
3311     tmp = tcg_temp_new_i64();
3312     vfp_load_reg64(tmp, a->vm);
3313     fpst = fpstatus_ptr(FPST_FPCR);
3314     tcg_rmode = tcg_const_i32(float_round_to_zero);
3315     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3316     gen_helper_rintd(tmp, tmp, fpst);
3317     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3318     vfp_store_reg64(tmp, a->vd);
3319     tcg_temp_free_ptr(fpst);
3320     tcg_temp_free_i64(tmp);
3321     tcg_temp_free_i32(tcg_rmode);
3322     return true;
3323 }
3324
3325 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3326 {
3327     TCGv_ptr fpst;
3328     TCGv_i32 tmp;
3329
3330     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3331         return false;
3332     }
3333
3334     if (!vfp_access_check(s)) {
3335         return true;
3336     }
3337
3338     tmp = tcg_temp_new_i32();
3339     vfp_load_reg32(tmp, a->vm);
3340     fpst = fpstatus_ptr(FPST_FPCR_F16);
3341     gen_helper_rinth_exact(tmp, tmp, fpst);
3342     vfp_store_reg32(tmp, a->vd);
3343     tcg_temp_free_ptr(fpst);
3344     tcg_temp_free_i32(tmp);
3345     return true;
3346 }
3347
3348 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3349 {
3350     TCGv_ptr fpst;
3351     TCGv_i32 tmp;
3352
3353     if (!dc_isar_feature(aa32_vrint, s)) {
3354         return false;
3355     }
3356
3357     if (!vfp_access_check(s)) {
3358         return true;
3359     }
3360
3361     tmp = tcg_temp_new_i32();
3362     vfp_load_reg32(tmp, a->vm);
3363     fpst = fpstatus_ptr(FPST_FPCR);
3364     gen_helper_rints_exact(tmp, tmp, fpst);
3365     vfp_store_reg32(tmp, a->vd);
3366     tcg_temp_free_ptr(fpst);
3367     tcg_temp_free_i32(tmp);
3368     return true;
3369 }
3370
3371 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3372 {
3373     TCGv_ptr fpst;
3374     TCGv_i64 tmp;
3375
3376     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3377         return false;
3378     }
3379
3380     if (!dc_isar_feature(aa32_vrint, s)) {
3381         return false;
3382     }
3383
3384     /* UNDEF accesses to D16-D31 if they don't exist. */
3385     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3386         return false;
3387     }
3388
3389     if (!vfp_access_check(s)) {
3390         return true;
3391     }
3392
3393     tmp = tcg_temp_new_i64();
3394     vfp_load_reg64(tmp, a->vm);
3395     fpst = fpstatus_ptr(FPST_FPCR);
3396     gen_helper_rintd_exact(tmp, tmp, fpst);
3397     vfp_store_reg64(tmp, a->vd);
3398     tcg_temp_free_ptr(fpst);
3399     tcg_temp_free_i64(tmp);
3400     return true;
3401 }
3402
3403 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3404 {
3405     TCGv_i64 vd;
3406     TCGv_i32 vm;
3407
3408     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3409         return false;
3410     }
3411
3412     /* UNDEF accesses to D16-D31 if they don't exist. */
3413     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3414         return false;
3415     }
3416
3417     if (!vfp_access_check(s)) {
3418         return true;
3419     }
3420
3421     vm = tcg_temp_new_i32();
3422     vd = tcg_temp_new_i64();
3423     vfp_load_reg32(vm, a->vm);
3424     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3425     vfp_store_reg64(vd, a->vd);
3426     tcg_temp_free_i32(vm);
3427     tcg_temp_free_i64(vd);
3428     return true;
3429 }
3430
3431 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3432 {
3433     TCGv_i64 vm;
3434     TCGv_i32 vd;
3435
3436     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3437         return false;
3438     }
3439
3440     /* UNDEF accesses to D16-D31 if they don't exist. */
3441     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3442         return false;
3443     }
3444
3445     if (!vfp_access_check(s)) {
3446         return true;
3447     }
3448
3449     vd = tcg_temp_new_i32();
3450     vm = tcg_temp_new_i64();
3451     vfp_load_reg64(vm, a->vm);
3452     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3453     vfp_store_reg32(vd, a->vd);
3454     tcg_temp_free_i32(vd);
3455     tcg_temp_free_i64(vm);
3456     return true;
3457 }
3458
3459 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3460 {
3461     TCGv_i32 vm;
3462     TCGv_ptr fpst;
3463
3464     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3465         return false;
3466     }
3467
3468     if (!vfp_access_check(s)) {
3469         return true;
3470     }
3471
3472     vm = tcg_temp_new_i32();
3473     vfp_load_reg32(vm, a->vm);
3474     fpst = fpstatus_ptr(FPST_FPCR_F16);
3475     if (a->s) {
3476         /* i32 -> f16 */
3477         gen_helper_vfp_sitoh(vm, vm, fpst);
3478     } else {
3479         /* u32 -> f16 */
3480         gen_helper_vfp_uitoh(vm, vm, fpst);
3481     }
3482     vfp_store_reg32(vm, a->vd);
3483     tcg_temp_free_i32(vm);
3484     tcg_temp_free_ptr(fpst);
3485     return true;
3486 }
3487
3488 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3489 {
3490     TCGv_i32 vm;
3491     TCGv_ptr fpst;
3492
3493     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3494         return false;
3495     }
3496
3497     if (!vfp_access_check(s)) {
3498         return true;
3499     }
3500
3501     vm = tcg_temp_new_i32();
3502     vfp_load_reg32(vm, a->vm);
3503     fpst = fpstatus_ptr(FPST_FPCR);
3504     if (a->s) {
3505         /* i32 -> f32 */
3506         gen_helper_vfp_sitos(vm, vm, fpst);
3507     } else {
3508         /* u32 -> f32 */
3509         gen_helper_vfp_uitos(vm, vm, fpst);
3510     }
3511     vfp_store_reg32(vm, a->vd);
3512     tcg_temp_free_i32(vm);
3513     tcg_temp_free_ptr(fpst);
3514     return true;
3515 }
3516
3517 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3518 {
3519     TCGv_i32 vm;
3520     TCGv_i64 vd;
3521     TCGv_ptr fpst;
3522
3523     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3524         return false;
3525     }
3526
3527     /* UNDEF accesses to D16-D31 if they don't exist. */
3528     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3529         return false;
3530     }
3531
3532     if (!vfp_access_check(s)) {
3533         return true;
3534     }
3535
3536     vm = tcg_temp_new_i32();
3537     vd = tcg_temp_new_i64();
3538     vfp_load_reg32(vm, a->vm);
3539     fpst = fpstatus_ptr(FPST_FPCR);
3540     if (a->s) {
3541         /* i32 -> f64 */
3542         gen_helper_vfp_sitod(vd, vm, fpst);
3543     } else {
3544         /* u32 -> f64 */
3545         gen_helper_vfp_uitod(vd, vm, fpst);
3546     }
3547     vfp_store_reg64(vd, a->vd);
3548     tcg_temp_free_i32(vm);
3549     tcg_temp_free_i64(vd);
3550     tcg_temp_free_ptr(fpst);
3551     return true;
3552 }
3553
3554 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3555 {
3556     TCGv_i32 vd;
3557     TCGv_i64 vm;
3558
3559     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3560         return false;
3561     }
3562
3563     if (!dc_isar_feature(aa32_jscvt, s)) {
3564         return false;
3565     }
3566
3567     /* UNDEF accesses to D16-D31 if they don't exist. */
3568     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3569         return false;
3570     }
3571
3572     if (!vfp_access_check(s)) {
3573         return true;
3574     }
3575
3576     vm = tcg_temp_new_i64();
3577     vd = tcg_temp_new_i32();
3578     vfp_load_reg64(vm, a->vm);
3579     gen_helper_vjcvt(vd, vm, cpu_env);
3580     vfp_store_reg32(vd, a->vd);
3581     tcg_temp_free_i64(vm);
3582     tcg_temp_free_i32(vd);
3583     return true;
3584 }
3585
3586 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3587 {
3588     TCGv_i32 vd, shift;
3589     TCGv_ptr fpst;
3590     int frac_bits;
3591
3592     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3593         return false;
3594     }
3595
3596     if (!vfp_access_check(s)) {
3597         return true;
3598     }
3599
3600     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3601
3602     vd = tcg_temp_new_i32();
3603     vfp_load_reg32(vd, a->vd);
3604
3605     fpst = fpstatus_ptr(FPST_FPCR_F16);
3606     shift = tcg_const_i32(frac_bits);
3607
3608     /* Switch on op:U:sx bits */
3609     switch (a->opc) {
3610     case 0:
3611         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3612         break;
3613     case 1:
3614         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3615         break;
3616     case 2:
3617         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3618         break;
3619     case 3:
3620         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3621         break;
3622     case 4:
3623         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3624         break;
3625     case 5:
3626         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3627         break;
3628     case 6:
3629         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3630         break;
3631     case 7:
3632         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3633         break;
3634     default:
3635         g_assert_not_reached();
3636     }
3637
3638     vfp_store_reg32(vd, a->vd);
3639     tcg_temp_free_i32(vd);
3640     tcg_temp_free_i32(shift);
3641     tcg_temp_free_ptr(fpst);
3642     return true;
3643 }
3644
3645 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3646 {
3647     TCGv_i32 vd, shift;
3648     TCGv_ptr fpst;
3649     int frac_bits;
3650
3651     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3652         return false;
3653     }
3654
3655     if (!vfp_access_check(s)) {
3656         return true;
3657     }
3658
3659     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3660
3661     vd = tcg_temp_new_i32();
3662     vfp_load_reg32(vd, a->vd);
3663
3664     fpst = fpstatus_ptr(FPST_FPCR);
3665     shift = tcg_const_i32(frac_bits);
3666
3667     /* Switch on op:U:sx bits */
3668     switch (a->opc) {
3669     case 0:
3670         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3671         break;
3672     case 1:
3673         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3674         break;
3675     case 2:
3676         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3677         break;
3678     case 3:
3679         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3680         break;
3681     case 4:
3682         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3683         break;
3684     case 5:
3685         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3686         break;
3687     case 6:
3688         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3689         break;
3690     case 7:
3691         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3692         break;
3693     default:
3694         g_assert_not_reached();
3695     }
3696
3697     vfp_store_reg32(vd, a->vd);
3698     tcg_temp_free_i32(vd);
3699     tcg_temp_free_i32(shift);
3700     tcg_temp_free_ptr(fpst);
3701     return true;
3702 }
3703
3704 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3705 {
3706     TCGv_i64 vd;
3707     TCGv_i32 shift;
3708     TCGv_ptr fpst;
3709     int frac_bits;
3710
3711     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3712         return false;
3713     }
3714
3715     /* UNDEF accesses to D16-D31 if they don't exist. */
3716     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3717         return false;
3718     }
3719
3720     if (!vfp_access_check(s)) {
3721         return true;
3722     }
3723
3724     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3725
3726     vd = tcg_temp_new_i64();
3727     vfp_load_reg64(vd, a->vd);
3728
3729     fpst = fpstatus_ptr(FPST_FPCR);
3730     shift = tcg_const_i32(frac_bits);
3731
3732     /* Switch on op:U:sx bits */
3733     switch (a->opc) {
3734     case 0:
3735         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3736         break;
3737     case 1:
3738         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3739         break;
3740     case 2:
3741         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3742         break;
3743     case 3:
3744         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3745         break;
3746     case 4:
3747         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3748         break;
3749     case 5:
3750         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3751         break;
3752     case 6:
3753         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3754         break;
3755     case 7:
3756         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3757         break;
3758     default:
3759         g_assert_not_reached();
3760     }
3761
3762     vfp_store_reg64(vd, a->vd);
3763     tcg_temp_free_i64(vd);
3764     tcg_temp_free_i32(shift);
3765     tcg_temp_free_ptr(fpst);
3766     return true;
3767 }
3768
3769 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3770 {
3771     TCGv_i32 vm;
3772     TCGv_ptr fpst;
3773
3774     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3775         return false;
3776     }
3777
3778     if (!vfp_access_check(s)) {
3779         return true;
3780     }
3781
3782     fpst = fpstatus_ptr(FPST_FPCR_F16);
3783     vm = tcg_temp_new_i32();
3784     vfp_load_reg32(vm, a->vm);
3785
3786     if (a->s) {
3787         if (a->rz) {
3788             gen_helper_vfp_tosizh(vm, vm, fpst);
3789         } else {
3790             gen_helper_vfp_tosih(vm, vm, fpst);
3791         }
3792     } else {
3793         if (a->rz) {
3794             gen_helper_vfp_touizh(vm, vm, fpst);
3795         } else {
3796             gen_helper_vfp_touih(vm, vm, fpst);
3797         }
3798     }
3799     vfp_store_reg32(vm, a->vd);
3800     tcg_temp_free_i32(vm);
3801     tcg_temp_free_ptr(fpst);
3802     return true;
3803 }
3804
3805 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3806 {
3807     TCGv_i32 vm;
3808     TCGv_ptr fpst;
3809
3810     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3811         return false;
3812     }
3813
3814     if (!vfp_access_check(s)) {
3815         return true;
3816     }
3817
3818     fpst = fpstatus_ptr(FPST_FPCR);
3819     vm = tcg_temp_new_i32();
3820     vfp_load_reg32(vm, a->vm);
3821
3822     if (a->s) {
3823         if (a->rz) {
3824             gen_helper_vfp_tosizs(vm, vm, fpst);
3825         } else {
3826             gen_helper_vfp_tosis(vm, vm, fpst);
3827         }
3828     } else {
3829         if (a->rz) {
3830             gen_helper_vfp_touizs(vm, vm, fpst);
3831         } else {
3832             gen_helper_vfp_touis(vm, vm, fpst);
3833         }
3834     }
3835     vfp_store_reg32(vm, a->vd);
3836     tcg_temp_free_i32(vm);
3837     tcg_temp_free_ptr(fpst);
3838     return true;
3839 }
3840
3841 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3842 {
3843     TCGv_i32 vd;
3844     TCGv_i64 vm;
3845     TCGv_ptr fpst;
3846
3847     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3848         return false;
3849     }
3850
3851     /* UNDEF accesses to D16-D31 if they don't exist. */
3852     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3853         return false;
3854     }
3855
3856     if (!vfp_access_check(s)) {
3857         return true;
3858     }
3859
3860     fpst = fpstatus_ptr(FPST_FPCR);
3861     vm = tcg_temp_new_i64();
3862     vd = tcg_temp_new_i32();
3863     vfp_load_reg64(vm, a->vm);
3864
3865     if (a->s) {
3866         if (a->rz) {
3867             gen_helper_vfp_tosizd(vd, vm, fpst);
3868         } else {
3869             gen_helper_vfp_tosid(vd, vm, fpst);
3870         }
3871     } else {
3872         if (a->rz) {
3873             gen_helper_vfp_touizd(vd, vm, fpst);
3874         } else {
3875             gen_helper_vfp_touid(vd, vm, fpst);
3876         }
3877     }
3878     vfp_store_reg32(vd, a->vd);
3879     tcg_temp_free_i32(vd);
3880     tcg_temp_free_i64(vm);
3881     tcg_temp_free_ptr(fpst);
3882     return true;
3883 }
3884
3885 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3886 {
3887     TCGv_i32 rd, rm;
3888
3889     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3890         return false;
3891     }
3892
3893     if (s->vec_len != 0 || s->vec_stride != 0) {
3894         return false;
3895     }
3896
3897     if (!vfp_access_check(s)) {
3898         return true;
3899     }
3900
3901     /* Insert low half of Vm into high half of Vd */
3902     rm = tcg_temp_new_i32();
3903     rd = tcg_temp_new_i32();
3904     vfp_load_reg32(rm, a->vm);
3905     vfp_load_reg32(rd, a->vd);
3906     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3907     vfp_store_reg32(rd, a->vd);
3908     tcg_temp_free_i32(rm);
3909     tcg_temp_free_i32(rd);
3910     return true;
3911 }
3912
3913 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3914 {
3915     TCGv_i32 rm;
3916
3917     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3918         return false;
3919     }
3920
3921     if (s->vec_len != 0 || s->vec_stride != 0) {
3922         return false;
3923     }
3924
3925     if (!vfp_access_check(s)) {
3926         return true;
3927     }
3928
3929     /* Set Vd to high half of Vm */
3930     rm = tcg_temp_new_i32();
3931     vfp_load_reg32(rm, a->vm);
3932     tcg_gen_shri_i32(rm, rm, 16);
3933     vfp_store_reg32(rm, a->vd);
3934     tcg_temp_free_i32(rm);
3935     return true;
3936 }