target/arm/translate-vfp.c.inc

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 /* Include the generated VFP decoder */
  30 #include "decode-vfp.c.inc"
  31 #include "decode-vfp-uncond.c.inc"
  32
  33 /*
  34  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  35  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  36  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  37  */
  38 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  39 {
  40     uint64_t imm;
  41
  42     switch (size) {
  43     case MO_64:
  44         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  45             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  46             extract32(imm8, 0, 6);
  47         imm <<= 48;
  48         break;
  49     case MO_32:
  50         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  51             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  52             (extract32(imm8, 0, 6) << 3);
  53         imm <<= 16;
  54         break;
  55     case MO_16:
  56         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  57             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  58             (extract32(imm8, 0, 6) << 6);
  59         break;
  60     default:
  61         g_assert_not_reached();
  62     }
  63     return imm;
  64 }
  65
  66 /*
  67  * Return the offset of a 16-bit half of the specified VFP single-precision
  68  * register. If top is true, returns the top 16 bits; otherwise the bottom
  69  * 16 bits.
  70  */
  71 static inline long vfp_f16_offset(unsigned reg, bool top)
  72 {
  73     long offs = vfp_reg_offset(false, reg);
  74 #ifdef HOST_WORDS_BIGENDIAN
  75     if (!top) {
  76         offs += 2;
  77     }
  78 #else
  79     if (top) {
  80         offs += 2;
  81     }
  82 #endif
  83     return offs;
  84 }
  85
  86 /*
  87  * Generate code for M-profile lazy FP state preservation if needed;
  88  * this corresponds to the pseudocode PreserveFPState() function.
  89  */
  90 static void gen_preserve_fp_state(DisasContext *s)
  91 {
  92     if (s->v7m_lspact) {
  93         /*
  94          * Lazy state saving affects external memory and also the NVIC,
  95          * so we must mark it as an IO operation for icount (and cause
  96          * this to be the last insn in the TB).
  97          */
  98         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
  99             s->base.is_jmp = DISAS_UPDATE_EXIT;
 100             gen_io_start();
 101         }
 102         gen_helper_v7m_preserve_fp_state(cpu_env);
 103         /*
 104          * If the preserve_fp_state helper doesn't throw an exception
 105          * then it will clear LSPACT; we don't need to repeat this for
 106          * any further FP insns in this TB.
 107          */
 108         s->v7m_lspact = false;
 109     }
 110 }
 111
 112 /*
 113  * Check that VFP access is enabled. If it is, do the necessary
 114  * M-profile lazy-FP handling and then return true.
 115  * If not, emit code to generate an appropriate exception and
 116  * return false.
 117  * The ignore_vfp_enabled argument specifies that we should ignore
 118  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
 119  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
 120  */
 121 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
 122 {
 123     if (s->fp_excp_el) {
 124         /* M-profile handled this earlier, in disas_m_nocp() */
 125         assert (!arm_dc_feature(s, ARM_FEATURE_M));
 126         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 127                            syn_fp_access_trap(1, 0xe, false),
 128                            s->fp_excp_el);
 129         return false;
 130     }
 131
 132     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 133         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 134         unallocated_encoding(s);
 135         return false;
 136     }
 137
 138     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 139         /* Handle M-profile lazy FP state mechanics */
 140
 141         /* Trigger lazy-state preservation if necessary */
 142         gen_preserve_fp_state(s);
 143
 144         /* Update ownership of FP context: set FPCCR.S to match current state */
 145         if (s->v8m_fpccr_s_wrong) {
 146             TCGv_i32 tmp;
 147
 148             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 149             if (s->v8m_secure) {
 150                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 151             } else {
 152                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 153             }
 154             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 155             /* Don't need to do this for any further FP insns in this TB */
 156             s->v8m_fpccr_s_wrong = false;
 157         }
 158
 159         if (s->v7m_new_fp_ctxt_needed) {
 160             /*
 161              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 162              * and the FPSCR.
 163              */
 164             TCGv_i32 control, fpscr;
 165             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 166
 167             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 168             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 169             tcg_temp_free_i32(fpscr);
 170             /*
 171              * We don't need to arrange to end the TB, because the only
 172              * parts of FPSCR which we cache in the TB flags are the VECLEN
 173              * and VECSTRIDE, and those don't exist for M-profile.
 174              */
 175
 176             if (s->v8m_secure) {
 177                 bits |= R_V7M_CONTROL_SFPA_MASK;
 178             }
 179             control = load_cpu_field(v7m.control[M_REG_S]);
 180             tcg_gen_ori_i32(control, control, bits);
 181             store_cpu_field(control, v7m.control[M_REG_S]);
 182             /* Don't need to do this for any further FP insns in this TB */
 183             s->v7m_new_fp_ctxt_needed = false;
 184         }
 185     }
 186
 187     return true;
 188 }
 189
 190 /*
 191  * The most usual kind of VFP access check, for everything except
 192  * FMXR/FMRX to the always-available special registers.
 193  */
 194 static bool vfp_access_check(DisasContext *s)
 195 {
 196     return full_vfp_access_check(s, false);
 197 }
 198
 199 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 200 {
 201     uint32_t rd, rn, rm;
 202     int sz = a->sz;
 203
 204     if (!dc_isar_feature(aa32_vsel, s)) {
 205         return false;
 206     }
 207
 208     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 209         return false;
 210     }
 211
 212     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 213         return false;
 214     }
 215
 216     /* UNDEF accesses to D16-D31 if they don't exist */
 217     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 218         ((a->vm | a->vn | a->vd) & 0x10)) {
 219         return false;
 220     }
 221
 222     rd = a->vd;
 223     rn = a->vn;
 224     rm = a->vm;
 225
 226     if (!vfp_access_check(s)) {
 227         return true;
 228     }
 229
 230     if (sz == 3) {
 231         TCGv_i64 frn, frm, dest;
 232         TCGv_i64 tmp, zero, zf, nf, vf;
 233
 234         zero = tcg_const_i64(0);
 235
 236         frn = tcg_temp_new_i64();
 237         frm = tcg_temp_new_i64();
 238         dest = tcg_temp_new_i64();
 239
 240         zf = tcg_temp_new_i64();
 241         nf = tcg_temp_new_i64();
 242         vf = tcg_temp_new_i64();
 243
 244         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 245         tcg_gen_ext_i32_i64(nf, cpu_NF);
 246         tcg_gen_ext_i32_i64(vf, cpu_VF);
 247
 248         vfp_load_reg64(frn, rn);
 249         vfp_load_reg64(frm, rm);
 250         switch (a->cc) {
 251         case 0: /* eq: Z */
 252             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 253                                 frn, frm);
 254             break;
 255         case 1: /* vs: V */
 256             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 257                                 frn, frm);
 258             break;
 259         case 2: /* ge: N == V -> N ^ V == 0 */
 260             tmp = tcg_temp_new_i64();
 261             tcg_gen_xor_i64(tmp, vf, nf);
 262             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 263                                 frn, frm);
 264             tcg_temp_free_i64(tmp);
 265             break;
 266         case 3: /* gt: !Z && N == V */
 267             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 268                                 frn, frm);
 269             tmp = tcg_temp_new_i64();
 270             tcg_gen_xor_i64(tmp, vf, nf);
 271             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 272                                 dest, frm);
 273             tcg_temp_free_i64(tmp);
 274             break;
 275         }
 276         vfp_store_reg64(dest, rd);
 277         tcg_temp_free_i64(frn);
 278         tcg_temp_free_i64(frm);
 279         tcg_temp_free_i64(dest);
 280
 281         tcg_temp_free_i64(zf);
 282         tcg_temp_free_i64(nf);
 283         tcg_temp_free_i64(vf);
 284
 285         tcg_temp_free_i64(zero);
 286     } else {
 287         TCGv_i32 frn, frm, dest;
 288         TCGv_i32 tmp, zero;
 289
 290         zero = tcg_const_i32(0);
 291
 292         frn = tcg_temp_new_i32();
 293         frm = tcg_temp_new_i32();
 294         dest = tcg_temp_new_i32();
 295         vfp_load_reg32(frn, rn);
 296         vfp_load_reg32(frm, rm);
 297         switch (a->cc) {
 298         case 0: /* eq: Z */
 299             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 300                                 frn, frm);
 301             break;
 302         case 1: /* vs: V */
 303             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 304                                 frn, frm);
 305             break;
 306         case 2: /* ge: N == V -> N ^ V == 0 */
 307             tmp = tcg_temp_new_i32();
 308             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 309             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 310                                 frn, frm);
 311             tcg_temp_free_i32(tmp);
 312             break;
 313         case 3: /* gt: !Z && N == V */
 314             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 315                                 frn, frm);
 316             tmp = tcg_temp_new_i32();
 317             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 318             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 319                                 dest, frm);
 320             tcg_temp_free_i32(tmp);
 321             break;
 322         }
 323         /* For fp16 the top half is always zeroes */
 324         if (sz == 1) {
 325             tcg_gen_andi_i32(dest, dest, 0xffff);
 326         }
 327         vfp_store_reg32(dest, rd);
 328         tcg_temp_free_i32(frn);
 329         tcg_temp_free_i32(frm);
 330         tcg_temp_free_i32(dest);
 331
 332         tcg_temp_free_i32(zero);
 333     }
 334
 335     return true;
 336 }
 337
 338 /*
 339  * Table for converting the most common AArch32 encoding of
 340  * rounding mode to arm_fprounding order (which matches the
 341  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 342  */
 343 static const uint8_t fp_decode_rm[] = {
 344     FPROUNDING_TIEAWAY,
 345     FPROUNDING_TIEEVEN,
 346     FPROUNDING_POSINF,
 347     FPROUNDING_NEGINF,
 348 };
 349
 350 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 351 {
 352     uint32_t rd, rm;
 353     int sz = a->sz;
 354     TCGv_ptr fpst;
 355     TCGv_i32 tcg_rmode;
 356     int rounding = fp_decode_rm[a->rm];
 357
 358     if (!dc_isar_feature(aa32_vrint, s)) {
 359         return false;
 360     }
 361
 362     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 363         return false;
 364     }
 365
 366     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 367         return false;
 368     }
 369
 370     /* UNDEF accesses to D16-D31 if they don't exist */
 371     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 372         ((a->vm | a->vd) & 0x10)) {
 373         return false;
 374     }
 375
 376     rd = a->vd;
 377     rm = a->vm;
 378
 379     if (!vfp_access_check(s)) {
 380         return true;
 381     }
 382
 383     if (sz == 1) {
 384         fpst = fpstatus_ptr(FPST_FPCR_F16);
 385     } else {
 386         fpst = fpstatus_ptr(FPST_FPCR);
 387     }
 388
 389     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 390     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 391
 392     if (sz == 3) {
 393         TCGv_i64 tcg_op;
 394         TCGv_i64 tcg_res;
 395         tcg_op = tcg_temp_new_i64();
 396         tcg_res = tcg_temp_new_i64();
 397         vfp_load_reg64(tcg_op, rm);
 398         gen_helper_rintd(tcg_res, tcg_op, fpst);
 399         vfp_store_reg64(tcg_res, rd);
 400         tcg_temp_free_i64(tcg_op);
 401         tcg_temp_free_i64(tcg_res);
 402     } else {
 403         TCGv_i32 tcg_op;
 404         TCGv_i32 tcg_res;
 405         tcg_op = tcg_temp_new_i32();
 406         tcg_res = tcg_temp_new_i32();
 407         vfp_load_reg32(tcg_op, rm);
 408         if (sz == 1) {
 409             gen_helper_rinth(tcg_res, tcg_op, fpst);
 410         } else {
 411             gen_helper_rints(tcg_res, tcg_op, fpst);
 412         }
 413         vfp_store_reg32(tcg_res, rd);
 414         tcg_temp_free_i32(tcg_op);
 415         tcg_temp_free_i32(tcg_res);
 416     }
 417
 418     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 419     tcg_temp_free_i32(tcg_rmode);
 420
 421     tcg_temp_free_ptr(fpst);
 422     return true;
 423 }
 424
 425 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 426 {
 427     uint32_t rd, rm;
 428     int sz = a->sz;
 429     TCGv_ptr fpst;
 430     TCGv_i32 tcg_rmode, tcg_shift;
 431     int rounding = fp_decode_rm[a->rm];
 432     bool is_signed = a->op;
 433
 434     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 435         return false;
 436     }
 437
 438     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 439         return false;
 440     }
 441
 442     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 443         return false;
 444     }
 445
 446     /* UNDEF accesses to D16-D31 if they don't exist */
 447     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 448         return false;
 449     }
 450
 451     rd = a->vd;
 452     rm = a->vm;
 453
 454     if (!vfp_access_check(s)) {
 455         return true;
 456     }
 457
 458     if (sz == 1) {
 459         fpst = fpstatus_ptr(FPST_FPCR_F16);
 460     } else {
 461         fpst = fpstatus_ptr(FPST_FPCR);
 462     }
 463
 464     tcg_shift = tcg_const_i32(0);
 465
 466     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 467     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 468
 469     if (sz == 3) {
 470         TCGv_i64 tcg_double, tcg_res;
 471         TCGv_i32 tcg_tmp;
 472         tcg_double = tcg_temp_new_i64();
 473         tcg_res = tcg_temp_new_i64();
 474         tcg_tmp = tcg_temp_new_i32();
 475         vfp_load_reg64(tcg_double, rm);
 476         if (is_signed) {
 477             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 478         } else {
 479             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 480         }
 481         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 482         vfp_store_reg32(tcg_tmp, rd);
 483         tcg_temp_free_i32(tcg_tmp);
 484         tcg_temp_free_i64(tcg_res);
 485         tcg_temp_free_i64(tcg_double);
 486     } else {
 487         TCGv_i32 tcg_single, tcg_res;
 488         tcg_single = tcg_temp_new_i32();
 489         tcg_res = tcg_temp_new_i32();
 490         vfp_load_reg32(tcg_single, rm);
 491         if (sz == 1) {
 492             if (is_signed) {
 493                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 494             } else {
 495                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 496             }
 497         } else {
 498             if (is_signed) {
 499                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 500             } else {
 501                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 502             }
 503         }
 504         vfp_store_reg32(tcg_res, rd);
 505         tcg_temp_free_i32(tcg_res);
 506         tcg_temp_free_i32(tcg_single);
 507     }
 508
 509     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 510     tcg_temp_free_i32(tcg_rmode);
 511
 512     tcg_temp_free_i32(tcg_shift);
 513
 514     tcg_temp_free_ptr(fpst);
 515
 516     return true;
 517 }
 518
 519 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 520 {
 521     /* VMOV scalar to general purpose register */
 522     TCGv_i32 tmp;
 523
 524     /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
 525     if (a->size == MO_32
 526         ? !dc_isar_feature(aa32_fpsp_v2, s)
 527         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 528         return false;
 529     }
 530
 531     /* UNDEF accesses to D16-D31 if they don't exist */
 532     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 533         return false;
 534     }
 535
 536     if (!vfp_access_check(s)) {
 537         return true;
 538     }
 539
 540     tmp = tcg_temp_new_i32();
 541     read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
 542     store_reg(s, a->rt, tmp);
 543
 544     return true;
 545 }
 546
 547 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 548 {
 549     /* VMOV general purpose register to scalar */
 550     TCGv_i32 tmp;
 551
 552     /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
 553     if (a->size == MO_32
 554         ? !dc_isar_feature(aa32_fpsp_v2, s)
 555         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 556         return false;
 557     }
 558
 559     /* UNDEF accesses to D16-D31 if they don't exist */
 560     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 561         return false;
 562     }
 563
 564     if (!vfp_access_check(s)) {
 565         return true;
 566     }
 567
 568     tmp = load_reg(s, a->rt);
 569     write_neon_element32(tmp, a->vn, a->index, a->size);
 570     tcg_temp_free_i32(tmp);
 571
 572     return true;
 573 }
 574
 575 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 576 {
 577     /* VDUP (general purpose register) */
 578     TCGv_i32 tmp;
 579     int size, vec_size;
 580
 581     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 582         return false;
 583     }
 584
 585     /* UNDEF accesses to D16-D31 if they don't exist */
 586     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 587         return false;
 588     }
 589
 590     if (a->b && a->e) {
 591         return false;
 592     }
 593
 594     if (a->q && (a->vn & 1)) {
 595         return false;
 596     }
 597
 598     vec_size = a->q ? 16 : 8;
 599     if (a->b) {
 600         size = 0;
 601     } else if (a->e) {
 602         size = 1;
 603     } else {
 604         size = 2;
 605     }
 606
 607     if (!vfp_access_check(s)) {
 608         return true;
 609     }
 610
 611     tmp = load_reg(s, a->rt);
 612     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
 613                          vec_size, vec_size, tmp);
 614     tcg_temp_free_i32(tmp);
 615
 616     return true;
 617 }
 618
 619 /*
 620  * M-profile provides two different sets of instructions that can
 621  * access floating point system registers: VMSR/VMRS (which move
 622  * to/from a general purpose register) and VLDR/VSTR sysreg (which
 623  * move directly to/from memory). In some cases there are also side
 624  * effects which must happen after any write to memory (which could
 625  * cause an exception). So we implement the common logic for the
 626  * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
 627  * which take pointers to callback functions which will perform the
 628  * actual "read/write general purpose register" and "read/write
 629  * memory" operations.
 630  */
 631
 632 /*
 633  * Emit code to store the sysreg to its final destination; frees the
 634  * TCG temp 'value' it is passed.
 635  */
 636 typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
 637 /*
 638  * Emit code to load the value to be copied to the sysreg; returns
 639  * a new TCG temporary
 640  */
 641 typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
 642
 643 /* Common decode/access checks for fp sysreg read/write */
 644 typedef enum FPSysRegCheckResult {
 645     FPSysRegCheckFailed, /* caller should return false */
 646     FPSysRegCheckDone, /* caller should return true */
 647     FPSysRegCheckContinue, /* caller should continue generating code */
 648 } FPSysRegCheckResult;
 649
 650 static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
 651 {
 652     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 653         return FPSysRegCheckFailed;
 654     }
 655
 656     switch (regno) {
 657     case ARM_VFP_FPSCR:
 658     case QEMU_VFP_FPSCR_NZCV:
 659         break;
 660     case ARM_VFP_FPSCR_NZCVQC:
 661         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 662             return false;
 663         }
 664         break;
 665     case ARM_VFP_FPCXT_S:
 666     case ARM_VFP_FPCXT_NS:
 667         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 668             return false;
 669         }
 670         if (!s->v8m_secure) {
 671             return false;
 672         }
 673         break;
 674     default:
 675         return FPSysRegCheckFailed;
 676     }
 677
 678     /*
 679      * FPCXT_NS is a special case: it has specific handling for
 680      * "current FP state is inactive", and must do the PreserveFPState()
 681      * but not the usual full set of actions done by ExecuteFPCheck().
 682      * So we don't call vfp_access_check() and the callers must handle this.
 683      */
 684     if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
 685         return FPSysRegCheckDone;
 686     }
 687     return FPSysRegCheckContinue;
 688 }
 689
 690 static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
 691                                   TCGLabel *label)
 692 {
 693     /*
 694      * FPCXT_NS is a special case: it has specific handling for
 695      * "current FP state is inactive", and must do the PreserveFPState()
 696      * but not the usual full set of actions done by ExecuteFPCheck().
 697      * We don't have a TB flag that matches the fpInactive check, so we
 698      * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
 699      *
 700      * Emit code that checks fpInactive and does a conditional
 701      * branch to label based on it:
 702      *  if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
 703      *  if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
 704      */
 705     assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
 706
 707     /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
 708     TCGv_i32 aspen, fpca;
 709     aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
 710     fpca = load_cpu_field(v7m.control[M_REG_S]);
 711     tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
 712     tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
 713     tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
 714     tcg_gen_or_i32(fpca, fpca, aspen);
 715     tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
 716     tcg_temp_free_i32(aspen);
 717     tcg_temp_free_i32(fpca);
 718 }
 719
 720 static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
 721
 722                                   fp_sysreg_loadfn *loadfn,
 723                                  void *opaque)
 724 {
 725     /* Do a write to an M-profile floating point system register */
 726     TCGv_i32 tmp;
 727     TCGLabel *lab_end = NULL;
 728
 729     switch (fp_sysreg_checks(s, regno)) {
 730     case FPSysRegCheckFailed:
 731         return false;
 732     case FPSysRegCheckDone:
 733         return true;
 734     case FPSysRegCheckContinue:
 735         break;
 736     }
 737
 738     switch (regno) {
 739     case ARM_VFP_FPSCR:
 740         tmp = loadfn(s, opaque);
 741         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 742         tcg_temp_free_i32(tmp);
 743         gen_lookup_tb(s);
 744         break;
 745     case ARM_VFP_FPSCR_NZCVQC:
 746     {
 747         TCGv_i32 fpscr;
 748         tmp = loadfn(s, opaque);
 749         /*
 750          * TODO: when we implement MVE, write the QC bit.
 751          * For non-MVE, QC is RES0.
 752          */
 753         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 754         fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 755         tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
 756         tcg_gen_or_i32(fpscr, fpscr, tmp);
 757         store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
 758         tcg_temp_free_i32(tmp);
 759         break;
 760     }
 761     case ARM_VFP_FPCXT_NS:
 762         lab_end = gen_new_label();
 763         /* fpInactive case: write is a NOP, so branch to end */
 764         gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
 765         /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
 766         gen_preserve_fp_state(s);
 767         /* fall through */
 768     case ARM_VFP_FPCXT_S:
 769     {
 770         TCGv_i32 sfpa, control;
 771         /*
 772          * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
 773          * bits [27:0] from value and zeroes bits [31:28].
 774          */
 775         tmp = loadfn(s, opaque);
 776         sfpa = tcg_temp_new_i32();
 777         tcg_gen_shri_i32(sfpa, tmp, 31);
 778         control = load_cpu_field(v7m.control[M_REG_S]);
 779         tcg_gen_deposit_i32(control, control, sfpa,
 780                             R_V7M_CONTROL_SFPA_SHIFT, 1);
 781         store_cpu_field(control, v7m.control[M_REG_S]);
 782         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 783         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 784         tcg_temp_free_i32(tmp);
 785         tcg_temp_free_i32(sfpa);
 786         break;
 787     }
 788     default:
 789         g_assert_not_reached();
 790     }
 791     if (lab_end) {
 792         gen_set_label(lab_end);
 793     }
 794     return true;
 795 }
 796
 797 static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
 798                                 fp_sysreg_storefn *storefn,
 799                                 void *opaque)
 800 {
 801     /* Do a read from an M-profile floating point system register */
 802     TCGv_i32 tmp;
 803     TCGLabel *lab_end = NULL;
 804     bool lookup_tb = false;
 805
 806     switch (fp_sysreg_checks(s, regno)) {
 807     case FPSysRegCheckFailed:
 808         return false;
 809     case FPSysRegCheckDone:
 810         return true;
 811     case FPSysRegCheckContinue:
 812         break;
 813     }
 814
 815     switch (regno) {
 816     case ARM_VFP_FPSCR:
 817         tmp = tcg_temp_new_i32();
 818         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 819         storefn(s, opaque, tmp);
 820         break;
 821     case ARM_VFP_FPSCR_NZCVQC:
 822         /*
 823          * TODO: MVE has a QC bit, which we probably won't store
 824          * in the xregs[] field. For non-MVE, where QC is RES0,
 825          * we can just fall through to the FPSCR_NZCV case.
 826          */
 827     case QEMU_VFP_FPSCR_NZCV:
 828         /*
 829          * Read just NZCV; this is a special case to avoid the
 830          * helper call for the "VMRS to CPSR.NZCV" insn.
 831          */
 832         tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 833         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 834         storefn(s, opaque, tmp);
 835         break;
 836     case ARM_VFP_FPCXT_S:
 837     {
 838         TCGv_i32 control, sfpa, fpscr;
 839         /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
 840         tmp = tcg_temp_new_i32();
 841         sfpa = tcg_temp_new_i32();
 842         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 843         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 844         control = load_cpu_field(v7m.control[M_REG_S]);
 845         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 846         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 847         tcg_gen_or_i32(tmp, tmp, sfpa);
 848         tcg_temp_free_i32(sfpa);
 849         /*
 850          * Store result before updating FPSCR etc, in case
 851          * it is a memory write which causes an exception.
 852          */
 853         storefn(s, opaque, tmp);
 854         /*
 855          * Now we must reset FPSCR from FPDSCR_NS, and clear
 856          * CONTROL.SFPA; so we'll end the TB here.
 857          */
 858         tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
 859         store_cpu_field(control, v7m.control[M_REG_S]);
 860         fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 861         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 862         tcg_temp_free_i32(fpscr);
 863         lookup_tb = true;
 864         break;
 865     }
 866     case ARM_VFP_FPCXT_NS:
 867     {
 868         TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
 869         TCGLabel *lab_active = gen_new_label();
 870
 871         lookup_tb = true;
 872
 873         gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
 874         /* fpInactive case: reads as FPDSCR_NS */
 875         TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 876         storefn(s, opaque, tmp);
 877         lab_end = gen_new_label();
 878         tcg_gen_br(lab_end);
 879
 880         gen_set_label(lab_active);
 881         /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
 882         gen_preserve_fp_state(s);
 883         tmp = tcg_temp_new_i32();
 884         sfpa = tcg_temp_new_i32();
 885         fpscr = tcg_temp_new_i32();
 886         gen_helper_vfp_get_fpscr(fpscr, cpu_env);
 887         tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
 888         control = load_cpu_field(v7m.control[M_REG_S]);
 889         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 890         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 891         tcg_gen_or_i32(tmp, tmp, sfpa);
 892         tcg_temp_free_i32(control);
 893         /* Store result before updating FPSCR, in case it faults */
 894         storefn(s, opaque, tmp);
 895         /* If SFPA is zero then set FPSCR from FPDSCR_NS */
 896         fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 897         zero = tcg_const_i32(0);
 898         tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
 899         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 900         tcg_temp_free_i32(zero);
 901         tcg_temp_free_i32(sfpa);
 902         tcg_temp_free_i32(fpdscr);
 903         tcg_temp_free_i32(fpscr);
 904         break;
 905     }
 906     default:
 907         g_assert_not_reached();
 908     }
 909
 910     if (lab_end) {
 911         gen_set_label(lab_end);
 912     }
 913     if (lookup_tb) {
 914         gen_lookup_tb(s);
 915     }
 916     return true;
 917 }
 918
 919 static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
 920 {
 921     arg_VMSR_VMRS *a = opaque;
 922
 923     if (a->rt == 15) {
 924         /* Set the 4 flag bits in the CPSR */
 925         gen_set_nzcv(value);
 926         tcg_temp_free_i32(value);
 927     } else {
 928         store_reg(s, a->rt, value);
 929     }
 930 }
 931
 932 static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
 933 {
 934     arg_VMSR_VMRS *a = opaque;
 935
 936     return load_reg(s, a->rt);
 937 }
 938
 939 static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 940 {
 941     /*
 942      * Accesses to R15 are UNPREDICTABLE; we choose to undef.
 943      * FPSCR -> r15 is a special case which writes to the PSR flags;
 944      * set a->reg to a special value to tell gen_M_fp_sysreg_read()
 945      * we only care about the top 4 bits of FPSCR there.
 946      */
 947     if (a->rt == 15) {
 948         if (a->l && a->reg == ARM_VFP_FPSCR) {
 949             a->reg = QEMU_VFP_FPSCR_NZCV;
 950         } else {
 951             return false;
 952         }
 953     }
 954
 955     if (a->l) {
 956         /* VMRS, move FP system register to gp register */
 957         return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
 958     } else {
 959         /* VMSR, move gp register to FP system register */
 960         return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
 961     }
 962 }
 963
 964 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 965 {
 966     TCGv_i32 tmp;
 967     bool ignore_vfp_enabled = false;
 968
 969     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 970         return gen_M_VMSR_VMRS(s, a);
 971     }
 972
 973     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 974         return false;
 975     }
 976
 977     switch (a->reg) {
 978     case ARM_VFP_FPSID:
 979         /*
 980          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 981          * all ID registers to privileged access only.
 982          */
 983         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 984             return false;
 985         }
 986         ignore_vfp_enabled = true;
 987         break;
 988     case ARM_VFP_MVFR0:
 989     case ARM_VFP_MVFR1:
 990         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 991             return false;
 992         }
 993         ignore_vfp_enabled = true;
 994         break;
 995     case ARM_VFP_MVFR2:
 996         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 997             return false;
 998         }
 999         ignore_vfp_enabled = true;
1000         break;
1001     case ARM_VFP_FPSCR:
1002         break;
1003     case ARM_VFP_FPEXC:
1004         if (IS_USER(s)) {
1005             return false;
1006         }
1007         ignore_vfp_enabled = true;
1008         break;
1009     case ARM_VFP_FPINST:
1010     case ARM_VFP_FPINST2:
1011         /* Not present in VFPv3 */
1012         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
1013             return false;
1014         }
1015         break;
1016     default:
1017         return false;
1018     }
1019
1020     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
1021         return true;
1022     }
1023
1024     if (a->l) {
1025         /* VMRS, move VFP special register to gp register */
1026         switch (a->reg) {
1027         case ARM_VFP_MVFR0:
1028         case ARM_VFP_MVFR1:
1029         case ARM_VFP_MVFR2:
1030         case ARM_VFP_FPSID:
1031             if (s->current_el == 1) {
1032                 TCGv_i32 tcg_reg, tcg_rt;
1033
1034                 gen_set_condexec(s);
1035                 gen_set_pc_im(s, s->pc_curr);
1036                 tcg_reg = tcg_const_i32(a->reg);
1037                 tcg_rt = tcg_const_i32(a->rt);
1038                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
1039                 tcg_temp_free_i32(tcg_reg);
1040                 tcg_temp_free_i32(tcg_rt);
1041             }
1042             /* fall through */
1043         case ARM_VFP_FPEXC:
1044         case ARM_VFP_FPINST:
1045         case ARM_VFP_FPINST2:
1046             tmp = load_cpu_field(vfp.xregs[a->reg]);
1047             break;
1048         case ARM_VFP_FPSCR:
1049             if (a->rt == 15) {
1050                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
1051                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
1052             } else {
1053                 tmp = tcg_temp_new_i32();
1054                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
1055             }
1056             break;
1057         default:
1058             g_assert_not_reached();
1059         }
1060
1061         if (a->rt == 15) {
1062             /* Set the 4 flag bits in the CPSR.  */
1063             gen_set_nzcv(tmp);
1064             tcg_temp_free_i32(tmp);
1065         } else {
1066             store_reg(s, a->rt, tmp);
1067         }
1068     } else {
1069         /* VMSR, move gp register to VFP special register */
1070         switch (a->reg) {
1071         case ARM_VFP_FPSID:
1072         case ARM_VFP_MVFR0:
1073         case ARM_VFP_MVFR1:
1074         case ARM_VFP_MVFR2:
1075             /* Writes are ignored.  */
1076             break;
1077         case ARM_VFP_FPSCR:
1078             tmp = load_reg(s, a->rt);
1079             gen_helper_vfp_set_fpscr(cpu_env, tmp);
1080             tcg_temp_free_i32(tmp);
1081             gen_lookup_tb(s);
1082             break;
1083         case ARM_VFP_FPEXC:
1084             /*
1085              * TODO: VFP subarchitecture support.
1086              * For now, keep the EN bit only
1087              */
1088             tmp = load_reg(s, a->rt);
1089             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
1090             store_cpu_field(tmp, vfp.xregs[a->reg]);
1091             gen_lookup_tb(s);
1092             break;
1093         case ARM_VFP_FPINST:
1094         case ARM_VFP_FPINST2:
1095             tmp = load_reg(s, a->rt);
1096             store_cpu_field(tmp, vfp.xregs[a->reg]);
1097             break;
1098         default:
1099             g_assert_not_reached();
1100         }
1101     }
1102
1103     return true;
1104 }
1105
1106 static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
1107 {
1108     arg_vldr_sysreg *a = opaque;
1109     uint32_t offset = a->imm;
1110     TCGv_i32 addr;
1111
1112     if (!a->a) {
1113         offset = - offset;
1114     }
1115
1116     addr = load_reg(s, a->rn);
1117     if (a->p) {
1118         tcg_gen_addi_i32(addr, addr, offset);
1119     }
1120
1121     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1122         gen_helper_v8m_stackcheck(cpu_env, addr);
1123     }
1124
1125     gen_aa32_st_i32(s, value, addr, get_mem_index(s),
1126                     MO_UL | MO_ALIGN | s->be_data);
1127     tcg_temp_free_i32(value);
1128
1129     if (a->w) {
1130         /* writeback */
1131         if (!a->p) {
1132             tcg_gen_addi_i32(addr, addr, offset);
1133         }
1134         store_reg(s, a->rn, addr);
1135     } else {
1136         tcg_temp_free_i32(addr);
1137     }
1138 }
1139
1140 static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1141 {
1142     arg_vldr_sysreg *a = opaque;
1143     uint32_t offset = a->imm;
1144     TCGv_i32 addr;
1145     TCGv_i32 value = tcg_temp_new_i32();
1146
1147     if (!a->a) {
1148         offset = - offset;
1149     }
1150
1151     addr = load_reg(s, a->rn);
1152     if (a->p) {
1153         tcg_gen_addi_i32(addr, addr, offset);
1154     }
1155
1156     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1157         gen_helper_v8m_stackcheck(cpu_env, addr);
1158     }
1159
1160     gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1161                     MO_UL | MO_ALIGN | s->be_data);
1162
1163     if (a->w) {
1164         /* writeback */
1165         if (!a->p) {
1166             tcg_gen_addi_i32(addr, addr, offset);
1167         }
1168         store_reg(s, a->rn, addr);
1169     } else {
1170         tcg_temp_free_i32(addr);
1171     }
1172     return value;
1173 }
1174
1175 static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1176 {
1177     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1178         return false;
1179     }
1180     if (a->rn == 15) {
1181         return false;
1182     }
1183     return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1184 }
1185
1186 static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1187 {
1188     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1189         return false;
1190     }
1191     if (a->rn == 15) {
1192         return false;
1193     }
1194     return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1195 }
1196
1197 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1198 {
1199     TCGv_i32 tmp;
1200
1201     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1202         return false;
1203     }
1204
1205     if (a->rt == 15) {
1206         /* UNPREDICTABLE; we choose to UNDEF */
1207         return false;
1208     }
1209
1210     if (!vfp_access_check(s)) {
1211         return true;
1212     }
1213
1214     if (a->l) {
1215         /* VFP to general purpose register */
1216         tmp = tcg_temp_new_i32();
1217         vfp_load_reg32(tmp, a->vn);
1218         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1219         store_reg(s, a->rt, tmp);
1220     } else {
1221         /* general purpose register to VFP */
1222         tmp = load_reg(s, a->rt);
1223         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1224         vfp_store_reg32(tmp, a->vn);
1225         tcg_temp_free_i32(tmp);
1226     }
1227
1228     return true;
1229 }
1230
1231 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
1232 {
1233     TCGv_i32 tmp;
1234
1235     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1236         return false;
1237     }
1238
1239     if (!vfp_access_check(s)) {
1240         return true;
1241     }
1242
1243     if (a->l) {
1244         /* VFP to general purpose register */
1245         tmp = tcg_temp_new_i32();
1246         vfp_load_reg32(tmp, a->vn);
1247         if (a->rt == 15) {
1248             /* Set the 4 flag bits in the CPSR.  */
1249             gen_set_nzcv(tmp);
1250             tcg_temp_free_i32(tmp);
1251         } else {
1252             store_reg(s, a->rt, tmp);
1253         }
1254     } else {
1255         /* general purpose register to VFP */
1256         tmp = load_reg(s, a->rt);
1257         vfp_store_reg32(tmp, a->vn);
1258         tcg_temp_free_i32(tmp);
1259     }
1260
1261     return true;
1262 }
1263
1264 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
1265 {
1266     TCGv_i32 tmp;
1267
1268     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1269         return false;
1270     }
1271
1272     /*
1273      * VMOV between two general-purpose registers and two single precision
1274      * floating point registers
1275      */
1276     if (!vfp_access_check(s)) {
1277         return true;
1278     }
1279
1280     if (a->op) {
1281         /* fpreg to gpreg */
1282         tmp = tcg_temp_new_i32();
1283         vfp_load_reg32(tmp, a->vm);
1284         store_reg(s, a->rt, tmp);
1285         tmp = tcg_temp_new_i32();
1286         vfp_load_reg32(tmp, a->vm + 1);
1287         store_reg(s, a->rt2, tmp);
1288     } else {
1289         /* gpreg to fpreg */
1290         tmp = load_reg(s, a->rt);
1291         vfp_store_reg32(tmp, a->vm);
1292         tcg_temp_free_i32(tmp);
1293         tmp = load_reg(s, a->rt2);
1294         vfp_store_reg32(tmp, a->vm + 1);
1295         tcg_temp_free_i32(tmp);
1296     }
1297
1298     return true;
1299 }
1300
1301 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1302 {
1303     TCGv_i32 tmp;
1304
1305     /*
1306      * VMOV between two general-purpose registers and one double precision
1307      * floating point register.  Note that this does not require support
1308      * for double precision arithmetic.
1309      */
1310     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1311         return false;
1312     }
1313
1314     /* UNDEF accesses to D16-D31 if they don't exist */
1315     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1316         return false;
1317     }
1318
1319     if (!vfp_access_check(s)) {
1320         return true;
1321     }
1322
1323     if (a->op) {
1324         /* fpreg to gpreg */
1325         tmp = tcg_temp_new_i32();
1326         vfp_load_reg32(tmp, a->vm * 2);
1327         store_reg(s, a->rt, tmp);
1328         tmp = tcg_temp_new_i32();
1329         vfp_load_reg32(tmp, a->vm * 2 + 1);
1330         store_reg(s, a->rt2, tmp);
1331     } else {
1332         /* gpreg to fpreg */
1333         tmp = load_reg(s, a->rt);
1334         vfp_store_reg32(tmp, a->vm * 2);
1335         tcg_temp_free_i32(tmp);
1336         tmp = load_reg(s, a->rt2);
1337         vfp_store_reg32(tmp, a->vm * 2 + 1);
1338         tcg_temp_free_i32(tmp);
1339     }
1340
1341     return true;
1342 }
1343
1344 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1345 {
1346     uint32_t offset;
1347     TCGv_i32 addr, tmp;
1348
1349     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1350         return false;
1351     }
1352
1353     if (!vfp_access_check(s)) {
1354         return true;
1355     }
1356
1357     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1358     offset = a->imm << 1;
1359     if (!a->u) {
1360         offset = -offset;
1361     }
1362
1363     /* For thumb, use of PC is UNPREDICTABLE.  */
1364     addr = add_reg_for_lit(s, a->rn, offset);
1365     tmp = tcg_temp_new_i32();
1366     if (a->l) {
1367         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1368         vfp_store_reg32(tmp, a->vd);
1369     } else {
1370         vfp_load_reg32(tmp, a->vd);
1371         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1372     }
1373     tcg_temp_free_i32(tmp);
1374     tcg_temp_free_i32(addr);
1375
1376     return true;
1377 }
1378
1379 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1380 {
1381     uint32_t offset;
1382     TCGv_i32 addr, tmp;
1383
1384     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1385         return false;
1386     }
1387
1388     if (!vfp_access_check(s)) {
1389         return true;
1390     }
1391
1392     offset = a->imm << 2;
1393     if (!a->u) {
1394         offset = -offset;
1395     }
1396
1397     /* For thumb, use of PC is UNPREDICTABLE.  */
1398     addr = add_reg_for_lit(s, a->rn, offset);
1399     tmp = tcg_temp_new_i32();
1400     if (a->l) {
1401         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1402         vfp_store_reg32(tmp, a->vd);
1403     } else {
1404         vfp_load_reg32(tmp, a->vd);
1405         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1406     }
1407     tcg_temp_free_i32(tmp);
1408     tcg_temp_free_i32(addr);
1409
1410     return true;
1411 }
1412
1413 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1414 {
1415     uint32_t offset;
1416     TCGv_i32 addr;
1417     TCGv_i64 tmp;
1418
1419     /* Note that this does not require support for double arithmetic.  */
1420     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1421         return false;
1422     }
1423
1424     /* UNDEF accesses to D16-D31 if they don't exist */
1425     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1426         return false;
1427     }
1428
1429     if (!vfp_access_check(s)) {
1430         return true;
1431     }
1432
1433     offset = a->imm << 2;
1434     if (!a->u) {
1435         offset = -offset;
1436     }
1437
1438     /* For thumb, use of PC is UNPREDICTABLE.  */
1439     addr = add_reg_for_lit(s, a->rn, offset);
1440     tmp = tcg_temp_new_i64();
1441     if (a->l) {
1442         gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1443         vfp_store_reg64(tmp, a->vd);
1444     } else {
1445         vfp_load_reg64(tmp, a->vd);
1446         gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1447     }
1448     tcg_temp_free_i64(tmp);
1449     tcg_temp_free_i32(addr);
1450
1451     return true;
1452 }
1453
1454 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1455 {
1456     uint32_t offset;
1457     TCGv_i32 addr, tmp;
1458     int i, n;
1459
1460     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1461         return false;
1462     }
1463
1464     n = a->imm;
1465
1466     if (n == 0 || (a->vd + n) > 32) {
1467         /*
1468          * UNPREDICTABLE cases for bad immediates: we choose to
1469          * UNDEF to avoid generating huge numbers of TCG ops
1470          */
1471         return false;
1472     }
1473     if (a->rn == 15 && a->w) {
1474         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1475         return false;
1476     }
1477
1478     if (!vfp_access_check(s)) {
1479         return true;
1480     }
1481
1482     /* For thumb, use of PC is UNPREDICTABLE.  */
1483     addr = add_reg_for_lit(s, a->rn, 0);
1484     if (a->p) {
1485         /* pre-decrement */
1486         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1487     }
1488
1489     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1490         /*
1491          * Here 'addr' is the lowest address we will store to,
1492          * and is either the old SP (if post-increment) or
1493          * the new SP (if pre-decrement). For post-increment
1494          * where the old value is below the limit and the new
1495          * value is above, it is UNKNOWN whether the limit check
1496          * triggers; we choose to trigger.
1497          */
1498         gen_helper_v8m_stackcheck(cpu_env, addr);
1499     }
1500
1501     offset = 4;
1502     tmp = tcg_temp_new_i32();
1503     for (i = 0; i < n; i++) {
1504         if (a->l) {
1505             /* load */
1506             gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1507             vfp_store_reg32(tmp, a->vd + i);
1508         } else {
1509             /* store */
1510             vfp_load_reg32(tmp, a->vd + i);
1511             gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1512         }
1513         tcg_gen_addi_i32(addr, addr, offset);
1514     }
1515     tcg_temp_free_i32(tmp);
1516     if (a->w) {
1517         /* writeback */
1518         if (a->p) {
1519             offset = -offset * n;
1520             tcg_gen_addi_i32(addr, addr, offset);
1521         }
1522         store_reg(s, a->rn, addr);
1523     } else {
1524         tcg_temp_free_i32(addr);
1525     }
1526
1527     return true;
1528 }
1529
1530 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1531 {
1532     uint32_t offset;
1533     TCGv_i32 addr;
1534     TCGv_i64 tmp;
1535     int i, n;
1536
1537     /* Note that this does not require support for double arithmetic.  */
1538     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1539         return false;
1540     }
1541
1542     n = a->imm >> 1;
1543
1544     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1545         /*
1546          * UNPREDICTABLE cases for bad immediates: we choose to
1547          * UNDEF to avoid generating huge numbers of TCG ops
1548          */
1549         return false;
1550     }
1551     if (a->rn == 15 && a->w) {
1552         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1553         return false;
1554     }
1555
1556     /* UNDEF accesses to D16-D31 if they don't exist */
1557     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1558         return false;
1559     }
1560
1561     if (!vfp_access_check(s)) {
1562         return true;
1563     }
1564
1565     /* For thumb, use of PC is UNPREDICTABLE.  */
1566     addr = add_reg_for_lit(s, a->rn, 0);
1567     if (a->p) {
1568         /* pre-decrement */
1569         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1570     }
1571
1572     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1573         /*
1574          * Here 'addr' is the lowest address we will store to,
1575          * and is either the old SP (if post-increment) or
1576          * the new SP (if pre-decrement). For post-increment
1577          * where the old value is below the limit and the new
1578          * value is above, it is UNKNOWN whether the limit check
1579          * triggers; we choose to trigger.
1580          */
1581         gen_helper_v8m_stackcheck(cpu_env, addr);
1582     }
1583
1584     offset = 8;
1585     tmp = tcg_temp_new_i64();
1586     for (i = 0; i < n; i++) {
1587         if (a->l) {
1588             /* load */
1589             gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1590             vfp_store_reg64(tmp, a->vd + i);
1591         } else {
1592             /* store */
1593             vfp_load_reg64(tmp, a->vd + i);
1594             gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1595         }
1596         tcg_gen_addi_i32(addr, addr, offset);
1597     }
1598     tcg_temp_free_i64(tmp);
1599     if (a->w) {
1600         /* writeback */
1601         if (a->p) {
1602             offset = -offset * n;
1603         } else if (a->imm & 1) {
1604             offset = 4;
1605         } else {
1606             offset = 0;
1607         }
1608
1609         if (offset != 0) {
1610             tcg_gen_addi_i32(addr, addr, offset);
1611         }
1612         store_reg(s, a->rn, addr);
1613     } else {
1614         tcg_temp_free_i32(addr);
1615     }
1616
1617     return true;
1618 }
1619
1620 /*
1621  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1622  * The callback should emit code to write a value to vd. If
1623  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1624  * will contain the old value of the relevant VFP register;
1625  * otherwise it must be written to only.
1626  */
1627 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1628                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1629 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1630                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1631
1632 /*
1633  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1634  * The callback should emit code to write a value to vd (which
1635  * should be written to only).
1636  */
1637 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1638 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1639
1640 /*
1641  * Return true if the specified S reg is in a scalar bank
1642  * (ie if it is s0..s7)
1643  */
1644 static inline bool vfp_sreg_is_scalar(int reg)
1645 {
1646     return (reg & 0x18) == 0;
1647 }
1648
1649 /*
1650  * Return true if the specified D reg is in a scalar bank
1651  * (ie if it is d0..d3 or d16..d19)
1652  */
1653 static inline bool vfp_dreg_is_scalar(int reg)
1654 {
1655     return (reg & 0xc) == 0;
1656 }
1657
1658 /*
1659  * Advance the S reg number forwards by delta within its bank
1660  * (ie increment the low 3 bits but leave the rest the same)
1661  */
1662 static inline int vfp_advance_sreg(int reg, int delta)
1663 {
1664     return ((reg + delta) & 0x7) | (reg & ~0x7);
1665 }
1666
1667 /*
1668  * Advance the D reg number forwards by delta within its bank
1669  * (ie increment the low 2 bits but leave the rest the same)
1670  */
1671 static inline int vfp_advance_dreg(int reg, int delta)
1672 {
1673     return ((reg + delta) & 0x3) | (reg & ~0x3);
1674 }
1675
1676 /*
1677  * Perform a 3-operand VFP data processing instruction. fn is the
1678  * callback to do the actual operation; this function deals with the
1679  * code to handle looping around for VFP vector processing.
1680  */
1681 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1682                           int vd, int vn, int vm, bool reads_vd)
1683 {
1684     uint32_t delta_m = 0;
1685     uint32_t delta_d = 0;
1686     int veclen = s->vec_len;
1687     TCGv_i32 f0, f1, fd;
1688     TCGv_ptr fpst;
1689
1690     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1691         return false;
1692     }
1693
1694     if (!dc_isar_feature(aa32_fpshvec, s) &&
1695         (veclen != 0 || s->vec_stride != 0)) {
1696         return false;
1697     }
1698
1699     if (!vfp_access_check(s)) {
1700         return true;
1701     }
1702
1703     if (veclen > 0) {
1704         /* Figure out what type of vector operation this is.  */
1705         if (vfp_sreg_is_scalar(vd)) {
1706             /* scalar */
1707             veclen = 0;
1708         } else {
1709             delta_d = s->vec_stride + 1;
1710
1711             if (vfp_sreg_is_scalar(vm)) {
1712                 /* mixed scalar/vector */
1713                 delta_m = 0;
1714             } else {
1715                 /* vector */
1716                 delta_m = delta_d;
1717             }
1718         }
1719     }
1720
1721     f0 = tcg_temp_new_i32();
1722     f1 = tcg_temp_new_i32();
1723     fd = tcg_temp_new_i32();
1724     fpst = fpstatus_ptr(FPST_FPCR);
1725
1726     vfp_load_reg32(f0, vn);
1727     vfp_load_reg32(f1, vm);
1728
1729     for (;;) {
1730         if (reads_vd) {
1731             vfp_load_reg32(fd, vd);
1732         }
1733         fn(fd, f0, f1, fpst);
1734         vfp_store_reg32(fd, vd);
1735
1736         if (veclen == 0) {
1737             break;
1738         }
1739
1740         /* Set up the operands for the next iteration */
1741         veclen--;
1742         vd = vfp_advance_sreg(vd, delta_d);
1743         vn = vfp_advance_sreg(vn, delta_d);
1744         vfp_load_reg32(f0, vn);
1745         if (delta_m) {
1746             vm = vfp_advance_sreg(vm, delta_m);
1747             vfp_load_reg32(f1, vm);
1748         }
1749     }
1750
1751     tcg_temp_free_i32(f0);
1752     tcg_temp_free_i32(f1);
1753     tcg_temp_free_i32(fd);
1754     tcg_temp_free_ptr(fpst);
1755
1756     return true;
1757 }
1758
1759 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1760                           int vd, int vn, int vm, bool reads_vd)
1761 {
1762     /*
1763      * Do a half-precision operation. Functionally this is
1764      * the same as do_vfp_3op_sp(), except:
1765      *  - it uses the FPST_FPCR_F16
1766      *  - it doesn't need the VFP vector handling (fp16 is a
1767      *    v8 feature, and in v8 VFP vectors don't exist)
1768      *  - it does the aa32_fp16_arith feature test
1769      */
1770     TCGv_i32 f0, f1, fd;
1771     TCGv_ptr fpst;
1772
1773     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1774         return false;
1775     }
1776
1777     if (s->vec_len != 0 || s->vec_stride != 0) {
1778         return false;
1779     }
1780
1781     if (!vfp_access_check(s)) {
1782         return true;
1783     }
1784
1785     f0 = tcg_temp_new_i32();
1786     f1 = tcg_temp_new_i32();
1787     fd = tcg_temp_new_i32();
1788     fpst = fpstatus_ptr(FPST_FPCR_F16);
1789
1790     vfp_load_reg32(f0, vn);
1791     vfp_load_reg32(f1, vm);
1792
1793     if (reads_vd) {
1794         vfp_load_reg32(fd, vd);
1795     }
1796     fn(fd, f0, f1, fpst);
1797     vfp_store_reg32(fd, vd);
1798
1799     tcg_temp_free_i32(f0);
1800     tcg_temp_free_i32(f1);
1801     tcg_temp_free_i32(fd);
1802     tcg_temp_free_ptr(fpst);
1803
1804     return true;
1805 }
1806
1807 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1808                           int vd, int vn, int vm, bool reads_vd)
1809 {
1810     uint32_t delta_m = 0;
1811     uint32_t delta_d = 0;
1812     int veclen = s->vec_len;
1813     TCGv_i64 f0, f1, fd;
1814     TCGv_ptr fpst;
1815
1816     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1817         return false;
1818     }
1819
1820     /* UNDEF accesses to D16-D31 if they don't exist */
1821     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1822         return false;
1823     }
1824
1825     if (!dc_isar_feature(aa32_fpshvec, s) &&
1826         (veclen != 0 || s->vec_stride != 0)) {
1827         return false;
1828     }
1829
1830     if (!vfp_access_check(s)) {
1831         return true;
1832     }
1833
1834     if (veclen > 0) {
1835         /* Figure out what type of vector operation this is.  */
1836         if (vfp_dreg_is_scalar(vd)) {
1837             /* scalar */
1838             veclen = 0;
1839         } else {
1840             delta_d = (s->vec_stride >> 1) + 1;
1841
1842             if (vfp_dreg_is_scalar(vm)) {
1843                 /* mixed scalar/vector */
1844                 delta_m = 0;
1845             } else {
1846                 /* vector */
1847                 delta_m = delta_d;
1848             }
1849         }
1850     }
1851
1852     f0 = tcg_temp_new_i64();
1853     f1 = tcg_temp_new_i64();
1854     fd = tcg_temp_new_i64();
1855     fpst = fpstatus_ptr(FPST_FPCR);
1856
1857     vfp_load_reg64(f0, vn);
1858     vfp_load_reg64(f1, vm);
1859
1860     for (;;) {
1861         if (reads_vd) {
1862             vfp_load_reg64(fd, vd);
1863         }
1864         fn(fd, f0, f1, fpst);
1865         vfp_store_reg64(fd, vd);
1866
1867         if (veclen == 0) {
1868             break;
1869         }
1870         /* Set up the operands for the next iteration */
1871         veclen--;
1872         vd = vfp_advance_dreg(vd, delta_d);
1873         vn = vfp_advance_dreg(vn, delta_d);
1874         vfp_load_reg64(f0, vn);
1875         if (delta_m) {
1876             vm = vfp_advance_dreg(vm, delta_m);
1877             vfp_load_reg64(f1, vm);
1878         }
1879     }
1880
1881     tcg_temp_free_i64(f0);
1882     tcg_temp_free_i64(f1);
1883     tcg_temp_free_i64(fd);
1884     tcg_temp_free_ptr(fpst);
1885
1886     return true;
1887 }
1888
1889 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1890 {
1891     uint32_t delta_m = 0;
1892     uint32_t delta_d = 0;
1893     int veclen = s->vec_len;
1894     TCGv_i32 f0, fd;
1895
1896     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1897         return false;
1898     }
1899
1900     if (!dc_isar_feature(aa32_fpshvec, s) &&
1901         (veclen != 0 || s->vec_stride != 0)) {
1902         return false;
1903     }
1904
1905     if (!vfp_access_check(s)) {
1906         return true;
1907     }
1908
1909     if (veclen > 0) {
1910         /* Figure out what type of vector operation this is.  */
1911         if (vfp_sreg_is_scalar(vd)) {
1912             /* scalar */
1913             veclen = 0;
1914         } else {
1915             delta_d = s->vec_stride + 1;
1916
1917             if (vfp_sreg_is_scalar(vm)) {
1918                 /* mixed scalar/vector */
1919                 delta_m = 0;
1920             } else {
1921                 /* vector */
1922                 delta_m = delta_d;
1923             }
1924         }
1925     }
1926
1927     f0 = tcg_temp_new_i32();
1928     fd = tcg_temp_new_i32();
1929
1930     vfp_load_reg32(f0, vm);
1931
1932     for (;;) {
1933         fn(fd, f0);
1934         vfp_store_reg32(fd, vd);
1935
1936         if (veclen == 0) {
1937             break;
1938         }
1939
1940         if (delta_m == 0) {
1941             /* single source one-many */
1942             while (veclen--) {
1943                 vd = vfp_advance_sreg(vd, delta_d);
1944                 vfp_store_reg32(fd, vd);
1945             }
1946             break;
1947         }
1948
1949         /* Set up the operands for the next iteration */
1950         veclen--;
1951         vd = vfp_advance_sreg(vd, delta_d);
1952         vm = vfp_advance_sreg(vm, delta_m);
1953         vfp_load_reg32(f0, vm);
1954     }
1955
1956     tcg_temp_free_i32(f0);
1957     tcg_temp_free_i32(fd);
1958
1959     return true;
1960 }
1961
1962 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1963 {
1964     /*
1965      * Do a half-precision operation. Functionally this is
1966      * the same as do_vfp_2op_sp(), except:
1967      *  - it doesn't need the VFP vector handling (fp16 is a
1968      *    v8 feature, and in v8 VFP vectors don't exist)
1969      *  - it does the aa32_fp16_arith feature test
1970      */
1971     TCGv_i32 f0;
1972
1973     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1974         return false;
1975     }
1976
1977     if (s->vec_len != 0 || s->vec_stride != 0) {
1978         return false;
1979     }
1980
1981     if (!vfp_access_check(s)) {
1982         return true;
1983     }
1984
1985     f0 = tcg_temp_new_i32();
1986     vfp_load_reg32(f0, vm);
1987     fn(f0, f0);
1988     vfp_store_reg32(f0, vd);
1989     tcg_temp_free_i32(f0);
1990
1991     return true;
1992 }
1993
1994 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1995 {
1996     uint32_t delta_m = 0;
1997     uint32_t delta_d = 0;
1998     int veclen = s->vec_len;
1999     TCGv_i64 f0, fd;
2000
2001     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2002         return false;
2003     }
2004
2005     /* UNDEF accesses to D16-D31 if they don't exist */
2006     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
2007         return false;
2008     }
2009
2010     if (!dc_isar_feature(aa32_fpshvec, s) &&
2011         (veclen != 0 || s->vec_stride != 0)) {
2012         return false;
2013     }
2014
2015     if (!vfp_access_check(s)) {
2016         return true;
2017     }
2018
2019     if (veclen > 0) {
2020         /* Figure out what type of vector operation this is.  */
2021         if (vfp_dreg_is_scalar(vd)) {
2022             /* scalar */
2023             veclen = 0;
2024         } else {
2025             delta_d = (s->vec_stride >> 1) + 1;
2026
2027             if (vfp_dreg_is_scalar(vm)) {
2028                 /* mixed scalar/vector */
2029                 delta_m = 0;
2030             } else {
2031                 /* vector */
2032                 delta_m = delta_d;
2033             }
2034         }
2035     }
2036
2037     f0 = tcg_temp_new_i64();
2038     fd = tcg_temp_new_i64();
2039
2040     vfp_load_reg64(f0, vm);
2041
2042     for (;;) {
2043         fn(fd, f0);
2044         vfp_store_reg64(fd, vd);
2045
2046         if (veclen == 0) {
2047             break;
2048         }
2049
2050         if (delta_m == 0) {
2051             /* single source one-many */
2052             while (veclen--) {
2053                 vd = vfp_advance_dreg(vd, delta_d);
2054                 vfp_store_reg64(fd, vd);
2055             }
2056             break;
2057         }
2058
2059         /* Set up the operands for the next iteration */
2060         veclen--;
2061         vd = vfp_advance_dreg(vd, delta_d);
2062         vd = vfp_advance_dreg(vm, delta_m);
2063         vfp_load_reg64(f0, vm);
2064     }
2065
2066     tcg_temp_free_i64(f0);
2067     tcg_temp_free_i64(fd);
2068
2069     return true;
2070 }
2071
2072 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2073 {
2074     /* Note that order of inputs to the add matters for NaNs */
2075     TCGv_i32 tmp = tcg_temp_new_i32();
2076
2077     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2078     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2079     tcg_temp_free_i32(tmp);
2080 }
2081
2082 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
2083 {
2084     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
2085 }
2086
2087 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2088 {
2089     /* Note that order of inputs to the add matters for NaNs */
2090     TCGv_i32 tmp = tcg_temp_new_i32();
2091
2092     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2093     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2094     tcg_temp_free_i32(tmp);
2095 }
2096
2097 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
2098 {
2099     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
2100 }
2101
2102 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2103 {
2104     /* Note that order of inputs to the add matters for NaNs */
2105     TCGv_i64 tmp = tcg_temp_new_i64();
2106
2107     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2108     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2109     tcg_temp_free_i64(tmp);
2110 }
2111
2112 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
2113 {
2114     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
2115 }
2116
2117 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2118 {
2119     /*
2120      * VMLS: vd = vd + -(vn * vm)
2121      * Note that order of inputs to the add matters for NaNs.
2122      */
2123     TCGv_i32 tmp = tcg_temp_new_i32();
2124
2125     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2126     gen_helper_vfp_negh(tmp, tmp);
2127     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2128     tcg_temp_free_i32(tmp);
2129 }
2130
2131 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
2132 {
2133     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
2134 }
2135
2136 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2137 {
2138     /*
2139      * VMLS: vd = vd + -(vn * vm)
2140      * Note that order of inputs to the add matters for NaNs.
2141      */
2142     TCGv_i32 tmp = tcg_temp_new_i32();
2143
2144     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2145     gen_helper_vfp_negs(tmp, tmp);
2146     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2147     tcg_temp_free_i32(tmp);
2148 }
2149
2150 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
2151 {
2152     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
2153 }
2154
2155 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2156 {
2157     /*
2158      * VMLS: vd = vd + -(vn * vm)
2159      * Note that order of inputs to the add matters for NaNs.
2160      */
2161     TCGv_i64 tmp = tcg_temp_new_i64();
2162
2163     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2164     gen_helper_vfp_negd(tmp, tmp);
2165     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2166     tcg_temp_free_i64(tmp);
2167 }
2168
2169 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
2170 {
2171     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
2172 }
2173
2174 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2175 {
2176     /*
2177      * VNMLS: -fd + (fn * fm)
2178      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2179      * plausible looking simplifications because this will give wrong results
2180      * for NaNs.
2181      */
2182     TCGv_i32 tmp = tcg_temp_new_i32();
2183
2184     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2185     gen_helper_vfp_negh(vd, vd);
2186     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2187     tcg_temp_free_i32(tmp);
2188 }
2189
2190 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
2191 {
2192     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
2193 }
2194
2195 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2196 {
2197     /*
2198      * VNMLS: -fd + (fn * fm)
2199      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2200      * plausible looking simplifications because this will give wrong results
2201      * for NaNs.
2202      */
2203     TCGv_i32 tmp = tcg_temp_new_i32();
2204
2205     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2206     gen_helper_vfp_negs(vd, vd);
2207     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2208     tcg_temp_free_i32(tmp);
2209 }
2210
2211 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
2212 {
2213     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
2214 }
2215
2216 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2217 {
2218     /*
2219      * VNMLS: -fd + (fn * fm)
2220      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2221      * plausible looking simplifications because this will give wrong results
2222      * for NaNs.
2223      */
2224     TCGv_i64 tmp = tcg_temp_new_i64();
2225
2226     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2227     gen_helper_vfp_negd(vd, vd);
2228     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2229     tcg_temp_free_i64(tmp);
2230 }
2231
2232 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
2233 {
2234     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
2235 }
2236
2237 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2238 {
2239     /* VNMLA: -fd + -(fn * fm) */
2240     TCGv_i32 tmp = tcg_temp_new_i32();
2241
2242     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2243     gen_helper_vfp_negh(tmp, tmp);
2244     gen_helper_vfp_negh(vd, vd);
2245     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2246     tcg_temp_free_i32(tmp);
2247 }
2248
2249 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
2250 {
2251     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
2252 }
2253
2254 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2255 {
2256     /* VNMLA: -fd + -(fn * fm) */
2257     TCGv_i32 tmp = tcg_temp_new_i32();
2258
2259     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2260     gen_helper_vfp_negs(tmp, tmp);
2261     gen_helper_vfp_negs(vd, vd);
2262     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2263     tcg_temp_free_i32(tmp);
2264 }
2265
2266 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2267 {
2268     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2269 }
2270
2271 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2272 {
2273     /* VNMLA: -fd + (fn * fm) */
2274     TCGv_i64 tmp = tcg_temp_new_i64();
2275
2276     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2277     gen_helper_vfp_negd(tmp, tmp);
2278     gen_helper_vfp_negd(vd, vd);
2279     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2280     tcg_temp_free_i64(tmp);
2281 }
2282
2283 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2284 {
2285     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2286 }
2287
2288 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2289 {
2290     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2291 }
2292
2293 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2294 {
2295     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2296 }
2297
2298 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2299 {
2300     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2301 }
2302
2303 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2304 {
2305     /* VNMUL: -(fn * fm) */
2306     gen_helper_vfp_mulh(vd, vn, vm, fpst);
2307     gen_helper_vfp_negh(vd, vd);
2308 }
2309
2310 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2311 {
2312     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2313 }
2314
2315 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2316 {
2317     /* VNMUL: -(fn * fm) */
2318     gen_helper_vfp_muls(vd, vn, vm, fpst);
2319     gen_helper_vfp_negs(vd, vd);
2320 }
2321
2322 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2323 {
2324     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2325 }
2326
2327 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2328 {
2329     /* VNMUL: -(fn * fm) */
2330     gen_helper_vfp_muld(vd, vn, vm, fpst);
2331     gen_helper_vfp_negd(vd, vd);
2332 }
2333
2334 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2335 {
2336     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2337 }
2338
2339 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2340 {
2341     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2342 }
2343
2344 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2345 {
2346     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2347 }
2348
2349 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2350 {
2351     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2352 }
2353
2354 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2355 {
2356     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2357 }
2358
2359 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2360 {
2361     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2362 }
2363
2364 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2365 {
2366     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2367 }
2368
2369 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2370 {
2371     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2372 }
2373
2374 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2375 {
2376     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2377 }
2378
2379 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2380 {
2381     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2382 }
2383
2384 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2385 {
2386     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2387         return false;
2388     }
2389     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2390                          a->vd, a->vn, a->vm, false);
2391 }
2392
2393 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2394 {
2395     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2396         return false;
2397     }
2398     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2399                          a->vd, a->vn, a->vm, false);
2400 }
2401
2402 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2403 {
2404     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2405         return false;
2406     }
2407     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2408                          a->vd, a->vn, a->vm, false);
2409 }
2410
2411 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2412 {
2413     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2414         return false;
2415     }
2416     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2417                          a->vd, a->vn, a->vm, false);
2418 }
2419
2420 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2421 {
2422     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2423         return false;
2424     }
2425     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2426                          a->vd, a->vn, a->vm, false);
2427 }
2428
2429 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2430 {
2431     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2432         return false;
2433     }
2434     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2435                          a->vd, a->vn, a->vm, false);
2436 }
2437
2438 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2439 {
2440     /*
2441      * VFNMA : fd = muladd(-fd,  fn, fm)
2442      * VFNMS : fd = muladd(-fd, -fn, fm)
2443      * VFMA  : fd = muladd( fd,  fn, fm)
2444      * VFMS  : fd = muladd( fd, -fn, fm)
2445      *
2446      * These are fused multiply-add, and must be done as one floating
2447      * point operation with no rounding between the multiplication and
2448      * addition steps.  NB that doing the negations here as separate
2449      * steps is correct : an input NaN should come out with its sign
2450      * bit flipped if it is a negated-input.
2451      */
2452     TCGv_ptr fpst;
2453     TCGv_i32 vn, vm, vd;
2454
2455     /*
2456      * Present in VFPv4 only, and only with the FP16 extension.
2457      * Note that we can't rely on the SIMDFMAC check alone, because
2458      * in a Neon-no-VFP core that ID register field will be non-zero.
2459      */
2460     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2461         !dc_isar_feature(aa32_simdfmac, s) ||
2462         !dc_isar_feature(aa32_fpsp_v2, s)) {
2463         return false;
2464     }
2465
2466     if (s->vec_len != 0 || s->vec_stride != 0) {
2467         return false;
2468     }
2469
2470     if (!vfp_access_check(s)) {
2471         return true;
2472     }
2473
2474     vn = tcg_temp_new_i32();
2475     vm = tcg_temp_new_i32();
2476     vd = tcg_temp_new_i32();
2477
2478     vfp_load_reg32(vn, a->vn);
2479     vfp_load_reg32(vm, a->vm);
2480     if (neg_n) {
2481         /* VFNMS, VFMS */
2482         gen_helper_vfp_negh(vn, vn);
2483     }
2484     vfp_load_reg32(vd, a->vd);
2485     if (neg_d) {
2486         /* VFNMA, VFNMS */
2487         gen_helper_vfp_negh(vd, vd);
2488     }
2489     fpst = fpstatus_ptr(FPST_FPCR_F16);
2490     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2491     vfp_store_reg32(vd, a->vd);
2492
2493     tcg_temp_free_ptr(fpst);
2494     tcg_temp_free_i32(vn);
2495     tcg_temp_free_i32(vm);
2496     tcg_temp_free_i32(vd);
2497
2498     return true;
2499 }
2500
2501 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2502 {
2503     /*
2504      * VFNMA : fd = muladd(-fd,  fn, fm)
2505      * VFNMS : fd = muladd(-fd, -fn, fm)
2506      * VFMA  : fd = muladd( fd,  fn, fm)
2507      * VFMS  : fd = muladd( fd, -fn, fm)
2508      *
2509      * These are fused multiply-add, and must be done as one floating
2510      * point operation with no rounding between the multiplication and
2511      * addition steps.  NB that doing the negations here as separate
2512      * steps is correct : an input NaN should come out with its sign
2513      * bit flipped if it is a negated-input.
2514      */
2515     TCGv_ptr fpst;
2516     TCGv_i32 vn, vm, vd;
2517
2518     /*
2519      * Present in VFPv4 only.
2520      * Note that we can't rely on the SIMDFMAC check alone, because
2521      * in a Neon-no-VFP core that ID register field will be non-zero.
2522      */
2523     if (!dc_isar_feature(aa32_simdfmac, s) ||
2524         !dc_isar_feature(aa32_fpsp_v2, s)) {
2525         return false;
2526     }
2527     /*
2528      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2529      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2530      */
2531     if (s->vec_len != 0 || s->vec_stride != 0) {
2532         return false;
2533     }
2534
2535     if (!vfp_access_check(s)) {
2536         return true;
2537     }
2538
2539     vn = tcg_temp_new_i32();
2540     vm = tcg_temp_new_i32();
2541     vd = tcg_temp_new_i32();
2542
2543     vfp_load_reg32(vn, a->vn);
2544     vfp_load_reg32(vm, a->vm);
2545     if (neg_n) {
2546         /* VFNMS, VFMS */
2547         gen_helper_vfp_negs(vn, vn);
2548     }
2549     vfp_load_reg32(vd, a->vd);
2550     if (neg_d) {
2551         /* VFNMA, VFNMS */
2552         gen_helper_vfp_negs(vd, vd);
2553     }
2554     fpst = fpstatus_ptr(FPST_FPCR);
2555     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2556     vfp_store_reg32(vd, a->vd);
2557
2558     tcg_temp_free_ptr(fpst);
2559     tcg_temp_free_i32(vn);
2560     tcg_temp_free_i32(vm);
2561     tcg_temp_free_i32(vd);
2562
2563     return true;
2564 }
2565
2566 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2567 {
2568     /*
2569      * VFNMA : fd = muladd(-fd,  fn, fm)
2570      * VFNMS : fd = muladd(-fd, -fn, fm)
2571      * VFMA  : fd = muladd( fd,  fn, fm)
2572      * VFMS  : fd = muladd( fd, -fn, fm)
2573      *
2574      * These are fused multiply-add, and must be done as one floating
2575      * point operation with no rounding between the multiplication and
2576      * addition steps.  NB that doing the negations here as separate
2577      * steps is correct : an input NaN should come out with its sign
2578      * bit flipped if it is a negated-input.
2579      */
2580     TCGv_ptr fpst;
2581     TCGv_i64 vn, vm, vd;
2582
2583     /*
2584      * Present in VFPv4 only.
2585      * Note that we can't rely on the SIMDFMAC check alone, because
2586      * in a Neon-no-VFP core that ID register field will be non-zero.
2587      */
2588     if (!dc_isar_feature(aa32_simdfmac, s) ||
2589         !dc_isar_feature(aa32_fpdp_v2, s)) {
2590         return false;
2591     }
2592     /*
2593      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2594      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2595      */
2596     if (s->vec_len != 0 || s->vec_stride != 0) {
2597         return false;
2598     }
2599
2600     /* UNDEF accesses to D16-D31 if they don't exist. */
2601     if (!dc_isar_feature(aa32_simd_r32, s) &&
2602         ((a->vd | a->vn | a->vm) & 0x10)) {
2603         return false;
2604     }
2605
2606     if (!vfp_access_check(s)) {
2607         return true;
2608     }
2609
2610     vn = tcg_temp_new_i64();
2611     vm = tcg_temp_new_i64();
2612     vd = tcg_temp_new_i64();
2613
2614     vfp_load_reg64(vn, a->vn);
2615     vfp_load_reg64(vm, a->vm);
2616     if (neg_n) {
2617         /* VFNMS, VFMS */
2618         gen_helper_vfp_negd(vn, vn);
2619     }
2620     vfp_load_reg64(vd, a->vd);
2621     if (neg_d) {
2622         /* VFNMA, VFNMS */
2623         gen_helper_vfp_negd(vd, vd);
2624     }
2625     fpst = fpstatus_ptr(FPST_FPCR);
2626     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2627     vfp_store_reg64(vd, a->vd);
2628
2629     tcg_temp_free_ptr(fpst);
2630     tcg_temp_free_i64(vn);
2631     tcg_temp_free_i64(vm);
2632     tcg_temp_free_i64(vd);
2633
2634     return true;
2635 }
2636
2637 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2638     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2639                                       arg_##INSN##_##PREC *a)           \
2640     {                                                                   \
2641         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2642     }
2643
2644 #define MAKE_VFM_TRANS_FNS(PREC) \
2645     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2646     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2647     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2648     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2649
2650 MAKE_VFM_TRANS_FNS(hp)
2651 MAKE_VFM_TRANS_FNS(sp)
2652 MAKE_VFM_TRANS_FNS(dp)
2653
2654 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2655 {
2656     TCGv_i32 fd;
2657
2658     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2659         return false;
2660     }
2661
2662     if (s->vec_len != 0 || s->vec_stride != 0) {
2663         return false;
2664     }
2665
2666     if (!vfp_access_check(s)) {
2667         return true;
2668     }
2669
2670     fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2671     vfp_store_reg32(fd, a->vd);
2672     tcg_temp_free_i32(fd);
2673     return true;
2674 }
2675
2676 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2677 {
2678     uint32_t delta_d = 0;
2679     int veclen = s->vec_len;
2680     TCGv_i32 fd;
2681     uint32_t vd;
2682
2683     vd = a->vd;
2684
2685     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2686         return false;
2687     }
2688
2689     if (!dc_isar_feature(aa32_fpshvec, s) &&
2690         (veclen != 0 || s->vec_stride != 0)) {
2691         return false;
2692     }
2693
2694     if (!vfp_access_check(s)) {
2695         return true;
2696     }
2697
2698     if (veclen > 0) {
2699         /* Figure out what type of vector operation this is.  */
2700         if (vfp_sreg_is_scalar(vd)) {
2701             /* scalar */
2702             veclen = 0;
2703         } else {
2704             delta_d = s->vec_stride + 1;
2705         }
2706     }
2707
2708     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2709
2710     for (;;) {
2711         vfp_store_reg32(fd, vd);
2712
2713         if (veclen == 0) {
2714             break;
2715         }
2716
2717         /* Set up the operands for the next iteration */
2718         veclen--;
2719         vd = vfp_advance_sreg(vd, delta_d);
2720     }
2721
2722     tcg_temp_free_i32(fd);
2723     return true;
2724 }
2725
2726 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2727 {
2728     uint32_t delta_d = 0;
2729     int veclen = s->vec_len;
2730     TCGv_i64 fd;
2731     uint32_t vd;
2732
2733     vd = a->vd;
2734
2735     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2736         return false;
2737     }
2738
2739     /* UNDEF accesses to D16-D31 if they don't exist. */
2740     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2741         return false;
2742     }
2743
2744     if (!dc_isar_feature(aa32_fpshvec, s) &&
2745         (veclen != 0 || s->vec_stride != 0)) {
2746         return false;
2747     }
2748
2749     if (!vfp_access_check(s)) {
2750         return true;
2751     }
2752
2753     if (veclen > 0) {
2754         /* Figure out what type of vector operation this is.  */
2755         if (vfp_dreg_is_scalar(vd)) {
2756             /* scalar */
2757             veclen = 0;
2758         } else {
2759             delta_d = (s->vec_stride >> 1) + 1;
2760         }
2761     }
2762
2763     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2764
2765     for (;;) {
2766         vfp_store_reg64(fd, vd);
2767
2768         if (veclen == 0) {
2769             break;
2770         }
2771
2772         /* Set up the operands for the next iteration */
2773         veclen--;
2774         vd = vfp_advance_dreg(vd, delta_d);
2775     }
2776
2777     tcg_temp_free_i64(fd);
2778     return true;
2779 }
2780
2781 #define DO_VFP_2OP(INSN, PREC, FN)                              \
2782     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2783                                       arg_##INSN##_##PREC *a)   \
2784     {                                                           \
2785         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2786     }
2787
2788 DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
2789 DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
2790
2791 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
2792 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
2793 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
2794
2795 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
2796 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
2797 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
2798
2799 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2800 {
2801     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2802 }
2803
2804 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2805 {
2806     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2807 }
2808
2809 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2810 {
2811     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2812 }
2813
2814 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
2815 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
2816 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
2817
2818 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2819 {
2820     TCGv_i32 vd, vm;
2821
2822     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2823         return false;
2824     }
2825
2826     /* Vm/M bits must be zero for the Z variant */
2827     if (a->z && a->vm != 0) {
2828         return false;
2829     }
2830
2831     if (!vfp_access_check(s)) {
2832         return true;
2833     }
2834
2835     vd = tcg_temp_new_i32();
2836     vm = tcg_temp_new_i32();
2837
2838     vfp_load_reg32(vd, a->vd);
2839     if (a->z) {
2840         tcg_gen_movi_i32(vm, 0);
2841     } else {
2842         vfp_load_reg32(vm, a->vm);
2843     }
2844
2845     if (a->e) {
2846         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2847     } else {
2848         gen_helper_vfp_cmph(vd, vm, cpu_env);
2849     }
2850
2851     tcg_temp_free_i32(vd);
2852     tcg_temp_free_i32(vm);
2853
2854     return true;
2855 }
2856
2857 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2858 {
2859     TCGv_i32 vd, vm;
2860
2861     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2862         return false;
2863     }
2864
2865     /* Vm/M bits must be zero for the Z variant */
2866     if (a->z && a->vm != 0) {
2867         return false;
2868     }
2869
2870     if (!vfp_access_check(s)) {
2871         return true;
2872     }
2873
2874     vd = tcg_temp_new_i32();
2875     vm = tcg_temp_new_i32();
2876
2877     vfp_load_reg32(vd, a->vd);
2878     if (a->z) {
2879         tcg_gen_movi_i32(vm, 0);
2880     } else {
2881         vfp_load_reg32(vm, a->vm);
2882     }
2883
2884     if (a->e) {
2885         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2886     } else {
2887         gen_helper_vfp_cmps(vd, vm, cpu_env);
2888     }
2889
2890     tcg_temp_free_i32(vd);
2891     tcg_temp_free_i32(vm);
2892
2893     return true;
2894 }
2895
2896 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2897 {
2898     TCGv_i64 vd, vm;
2899
2900     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2901         return false;
2902     }
2903
2904     /* Vm/M bits must be zero for the Z variant */
2905     if (a->z && a->vm != 0) {
2906         return false;
2907     }
2908
2909     /* UNDEF accesses to D16-D31 if they don't exist. */
2910     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2911         return false;
2912     }
2913
2914     if (!vfp_access_check(s)) {
2915         return true;
2916     }
2917
2918     vd = tcg_temp_new_i64();
2919     vm = tcg_temp_new_i64();
2920
2921     vfp_load_reg64(vd, a->vd);
2922     if (a->z) {
2923         tcg_gen_movi_i64(vm, 0);
2924     } else {
2925         vfp_load_reg64(vm, a->vm);
2926     }
2927
2928     if (a->e) {
2929         gen_helper_vfp_cmped(vd, vm, cpu_env);
2930     } else {
2931         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2932     }
2933
2934     tcg_temp_free_i64(vd);
2935     tcg_temp_free_i64(vm);
2936
2937     return true;
2938 }
2939
2940 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2941 {
2942     TCGv_ptr fpst;
2943     TCGv_i32 ahp_mode;
2944     TCGv_i32 tmp;
2945
2946     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2947         return false;
2948     }
2949
2950     if (!vfp_access_check(s)) {
2951         return true;
2952     }
2953
2954     fpst = fpstatus_ptr(FPST_FPCR);
2955     ahp_mode = get_ahp_flag();
2956     tmp = tcg_temp_new_i32();
2957     /* The T bit tells us if we want the low or high 16 bits of Vm */
2958     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2959     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2960     vfp_store_reg32(tmp, a->vd);
2961     tcg_temp_free_i32(ahp_mode);
2962     tcg_temp_free_ptr(fpst);
2963     tcg_temp_free_i32(tmp);
2964     return true;
2965 }
2966
2967 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2968 {
2969     TCGv_ptr fpst;
2970     TCGv_i32 ahp_mode;
2971     TCGv_i32 tmp;
2972     TCGv_i64 vd;
2973
2974     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2975         return false;
2976     }
2977
2978     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2979         return false;
2980     }
2981
2982     /* UNDEF accesses to D16-D31 if they don't exist. */
2983     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2984         return false;
2985     }
2986
2987     if (!vfp_access_check(s)) {
2988         return true;
2989     }
2990
2991     fpst = fpstatus_ptr(FPST_FPCR);
2992     ahp_mode = get_ahp_flag();
2993     tmp = tcg_temp_new_i32();
2994     /* The T bit tells us if we want the low or high 16 bits of Vm */
2995     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2996     vd = tcg_temp_new_i64();
2997     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2998     vfp_store_reg64(vd, a->vd);
2999     tcg_temp_free_i32(ahp_mode);
3000     tcg_temp_free_ptr(fpst);
3001     tcg_temp_free_i32(tmp);
3002     tcg_temp_free_i64(vd);
3003     return true;
3004 }
3005
3006 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
3007 {
3008     TCGv_ptr fpst;
3009     TCGv_i32 ahp_mode;
3010     TCGv_i32 tmp;
3011
3012     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
3013         return false;
3014     }
3015
3016     if (!vfp_access_check(s)) {
3017         return true;
3018     }
3019
3020     fpst = fpstatus_ptr(FPST_FPCR);
3021     ahp_mode = get_ahp_flag();
3022     tmp = tcg_temp_new_i32();
3023
3024     vfp_load_reg32(tmp, a->vm);
3025     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
3026     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3027     tcg_temp_free_i32(ahp_mode);
3028     tcg_temp_free_ptr(fpst);
3029     tcg_temp_free_i32(tmp);
3030     return true;
3031 }
3032
3033 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
3034 {
3035     TCGv_ptr fpst;
3036     TCGv_i32 ahp_mode;
3037     TCGv_i32 tmp;
3038     TCGv_i64 vm;
3039
3040     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3041         return false;
3042     }
3043
3044     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3045         return false;
3046     }
3047
3048     /* UNDEF accesses to D16-D31 if they don't exist. */
3049     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
3050         return false;
3051     }
3052
3053     if (!vfp_access_check(s)) {
3054         return true;
3055     }
3056
3057     fpst = fpstatus_ptr(FPST_FPCR);
3058     ahp_mode = get_ahp_flag();
3059     tmp = tcg_temp_new_i32();
3060     vm = tcg_temp_new_i64();
3061
3062     vfp_load_reg64(vm, a->vm);
3063     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
3064     tcg_temp_free_i64(vm);
3065     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3066     tcg_temp_free_i32(ahp_mode);
3067     tcg_temp_free_ptr(fpst);
3068     tcg_temp_free_i32(tmp);
3069     return true;
3070 }
3071
3072 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
3073 {
3074     TCGv_ptr fpst;
3075     TCGv_i32 tmp;
3076
3077     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3078         return false;
3079     }
3080
3081     if (!vfp_access_check(s)) {
3082         return true;
3083     }
3084
3085     tmp = tcg_temp_new_i32();
3086     vfp_load_reg32(tmp, a->vm);
3087     fpst = fpstatus_ptr(FPST_FPCR_F16);
3088     gen_helper_rinth(tmp, tmp, fpst);
3089     vfp_store_reg32(tmp, a->vd);
3090     tcg_temp_free_ptr(fpst);
3091     tcg_temp_free_i32(tmp);
3092     return true;
3093 }
3094
3095 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
3096 {
3097     TCGv_ptr fpst;
3098     TCGv_i32 tmp;
3099
3100     if (!dc_isar_feature(aa32_vrint, s)) {
3101         return false;
3102     }
3103
3104     if (!vfp_access_check(s)) {
3105         return true;
3106     }
3107
3108     tmp = tcg_temp_new_i32();
3109     vfp_load_reg32(tmp, a->vm);
3110     fpst = fpstatus_ptr(FPST_FPCR);
3111     gen_helper_rints(tmp, tmp, fpst);
3112     vfp_store_reg32(tmp, a->vd);
3113     tcg_temp_free_ptr(fpst);
3114     tcg_temp_free_i32(tmp);
3115     return true;
3116 }
3117
3118 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
3119 {
3120     TCGv_ptr fpst;
3121     TCGv_i64 tmp;
3122
3123     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3124         return false;
3125     }
3126
3127     if (!dc_isar_feature(aa32_vrint, s)) {
3128         return false;
3129     }
3130
3131     /* UNDEF accesses to D16-D31 if they don't exist. */
3132     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3133         return false;
3134     }
3135
3136     if (!vfp_access_check(s)) {
3137         return true;
3138     }
3139
3140     tmp = tcg_temp_new_i64();
3141     vfp_load_reg64(tmp, a->vm);
3142     fpst = fpstatus_ptr(FPST_FPCR);
3143     gen_helper_rintd(tmp, tmp, fpst);
3144     vfp_store_reg64(tmp, a->vd);
3145     tcg_temp_free_ptr(fpst);
3146     tcg_temp_free_i64(tmp);
3147     return true;
3148 }
3149
3150 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
3151 {
3152     TCGv_ptr fpst;
3153     TCGv_i32 tmp;
3154     TCGv_i32 tcg_rmode;
3155
3156     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3157         return false;
3158     }
3159
3160     if (!vfp_access_check(s)) {
3161         return true;
3162     }
3163
3164     tmp = tcg_temp_new_i32();
3165     vfp_load_reg32(tmp, a->vm);
3166     fpst = fpstatus_ptr(FPST_FPCR_F16);
3167     tcg_rmode = tcg_const_i32(float_round_to_zero);
3168     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3169     gen_helper_rinth(tmp, tmp, fpst);
3170     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3171     vfp_store_reg32(tmp, a->vd);
3172     tcg_temp_free_ptr(fpst);
3173     tcg_temp_free_i32(tcg_rmode);
3174     tcg_temp_free_i32(tmp);
3175     return true;
3176 }
3177
3178 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
3179 {
3180     TCGv_ptr fpst;
3181     TCGv_i32 tmp;
3182     TCGv_i32 tcg_rmode;
3183
3184     if (!dc_isar_feature(aa32_vrint, s)) {
3185         return false;
3186     }
3187
3188     if (!vfp_access_check(s)) {
3189         return true;
3190     }
3191
3192     tmp = tcg_temp_new_i32();
3193     vfp_load_reg32(tmp, a->vm);
3194     fpst = fpstatus_ptr(FPST_FPCR);
3195     tcg_rmode = tcg_const_i32(float_round_to_zero);
3196     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3197     gen_helper_rints(tmp, tmp, fpst);
3198     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3199     vfp_store_reg32(tmp, a->vd);
3200     tcg_temp_free_ptr(fpst);
3201     tcg_temp_free_i32(tcg_rmode);
3202     tcg_temp_free_i32(tmp);
3203     return true;
3204 }
3205
3206 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
3207 {
3208     TCGv_ptr fpst;
3209     TCGv_i64 tmp;
3210     TCGv_i32 tcg_rmode;
3211
3212     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3213         return false;
3214     }
3215
3216     if (!dc_isar_feature(aa32_vrint, s)) {
3217         return false;
3218     }
3219
3220     /* UNDEF accesses to D16-D31 if they don't exist. */
3221     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3222         return false;
3223     }
3224
3225     if (!vfp_access_check(s)) {
3226         return true;
3227     }
3228
3229     tmp = tcg_temp_new_i64();
3230     vfp_load_reg64(tmp, a->vm);
3231     fpst = fpstatus_ptr(FPST_FPCR);
3232     tcg_rmode = tcg_const_i32(float_round_to_zero);
3233     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3234     gen_helper_rintd(tmp, tmp, fpst);
3235     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3236     vfp_store_reg64(tmp, a->vd);
3237     tcg_temp_free_ptr(fpst);
3238     tcg_temp_free_i64(tmp);
3239     tcg_temp_free_i32(tcg_rmode);
3240     return true;
3241 }
3242
3243 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3244 {
3245     TCGv_ptr fpst;
3246     TCGv_i32 tmp;
3247
3248     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3249         return false;
3250     }
3251
3252     if (!vfp_access_check(s)) {
3253         return true;
3254     }
3255
3256     tmp = tcg_temp_new_i32();
3257     vfp_load_reg32(tmp, a->vm);
3258     fpst = fpstatus_ptr(FPST_FPCR_F16);
3259     gen_helper_rinth_exact(tmp, tmp, fpst);
3260     vfp_store_reg32(tmp, a->vd);
3261     tcg_temp_free_ptr(fpst);
3262     tcg_temp_free_i32(tmp);
3263     return true;
3264 }
3265
3266 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3267 {
3268     TCGv_ptr fpst;
3269     TCGv_i32 tmp;
3270
3271     if (!dc_isar_feature(aa32_vrint, s)) {
3272         return false;
3273     }
3274
3275     if (!vfp_access_check(s)) {
3276         return true;
3277     }
3278
3279     tmp = tcg_temp_new_i32();
3280     vfp_load_reg32(tmp, a->vm);
3281     fpst = fpstatus_ptr(FPST_FPCR);
3282     gen_helper_rints_exact(tmp, tmp, fpst);
3283     vfp_store_reg32(tmp, a->vd);
3284     tcg_temp_free_ptr(fpst);
3285     tcg_temp_free_i32(tmp);
3286     return true;
3287 }
3288
3289 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3290 {
3291     TCGv_ptr fpst;
3292     TCGv_i64 tmp;
3293
3294     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3295         return false;
3296     }
3297
3298     if (!dc_isar_feature(aa32_vrint, s)) {
3299         return false;
3300     }
3301
3302     /* UNDEF accesses to D16-D31 if they don't exist. */
3303     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3304         return false;
3305     }
3306
3307     if (!vfp_access_check(s)) {
3308         return true;
3309     }
3310
3311     tmp = tcg_temp_new_i64();
3312     vfp_load_reg64(tmp, a->vm);
3313     fpst = fpstatus_ptr(FPST_FPCR);
3314     gen_helper_rintd_exact(tmp, tmp, fpst);
3315     vfp_store_reg64(tmp, a->vd);
3316     tcg_temp_free_ptr(fpst);
3317     tcg_temp_free_i64(tmp);
3318     return true;
3319 }
3320
3321 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3322 {
3323     TCGv_i64 vd;
3324     TCGv_i32 vm;
3325
3326     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3327         return false;
3328     }
3329
3330     /* UNDEF accesses to D16-D31 if they don't exist. */
3331     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3332         return false;
3333     }
3334
3335     if (!vfp_access_check(s)) {
3336         return true;
3337     }
3338
3339     vm = tcg_temp_new_i32();
3340     vd = tcg_temp_new_i64();
3341     vfp_load_reg32(vm, a->vm);
3342     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3343     vfp_store_reg64(vd, a->vd);
3344     tcg_temp_free_i32(vm);
3345     tcg_temp_free_i64(vd);
3346     return true;
3347 }
3348
3349 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3350 {
3351     TCGv_i64 vm;
3352     TCGv_i32 vd;
3353
3354     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3355         return false;
3356     }
3357
3358     /* UNDEF accesses to D16-D31 if they don't exist. */
3359     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3360         return false;
3361     }
3362
3363     if (!vfp_access_check(s)) {
3364         return true;
3365     }
3366
3367     vd = tcg_temp_new_i32();
3368     vm = tcg_temp_new_i64();
3369     vfp_load_reg64(vm, a->vm);
3370     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3371     vfp_store_reg32(vd, a->vd);
3372     tcg_temp_free_i32(vd);
3373     tcg_temp_free_i64(vm);
3374     return true;
3375 }
3376
3377 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3378 {
3379     TCGv_i32 vm;
3380     TCGv_ptr fpst;
3381
3382     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3383         return false;
3384     }
3385
3386     if (!vfp_access_check(s)) {
3387         return true;
3388     }
3389
3390     vm = tcg_temp_new_i32();
3391     vfp_load_reg32(vm, a->vm);
3392     fpst = fpstatus_ptr(FPST_FPCR_F16);
3393     if (a->s) {
3394         /* i32 -> f16 */
3395         gen_helper_vfp_sitoh(vm, vm, fpst);
3396     } else {
3397         /* u32 -> f16 */
3398         gen_helper_vfp_uitoh(vm, vm, fpst);
3399     }
3400     vfp_store_reg32(vm, a->vd);
3401     tcg_temp_free_i32(vm);
3402     tcg_temp_free_ptr(fpst);
3403     return true;
3404 }
3405
3406 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3407 {
3408     TCGv_i32 vm;
3409     TCGv_ptr fpst;
3410
3411     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3412         return false;
3413     }
3414
3415     if (!vfp_access_check(s)) {
3416         return true;
3417     }
3418
3419     vm = tcg_temp_new_i32();
3420     vfp_load_reg32(vm, a->vm);
3421     fpst = fpstatus_ptr(FPST_FPCR);
3422     if (a->s) {
3423         /* i32 -> f32 */
3424         gen_helper_vfp_sitos(vm, vm, fpst);
3425     } else {
3426         /* u32 -> f32 */
3427         gen_helper_vfp_uitos(vm, vm, fpst);
3428     }
3429     vfp_store_reg32(vm, a->vd);
3430     tcg_temp_free_i32(vm);
3431     tcg_temp_free_ptr(fpst);
3432     return true;
3433 }
3434
3435 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3436 {
3437     TCGv_i32 vm;
3438     TCGv_i64 vd;
3439     TCGv_ptr fpst;
3440
3441     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3442         return false;
3443     }
3444
3445     /* UNDEF accesses to D16-D31 if they don't exist. */
3446     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3447         return false;
3448     }
3449
3450     if (!vfp_access_check(s)) {
3451         return true;
3452     }
3453
3454     vm = tcg_temp_new_i32();
3455     vd = tcg_temp_new_i64();
3456     vfp_load_reg32(vm, a->vm);
3457     fpst = fpstatus_ptr(FPST_FPCR);
3458     if (a->s) {
3459         /* i32 -> f64 */
3460         gen_helper_vfp_sitod(vd, vm, fpst);
3461     } else {
3462         /* u32 -> f64 */
3463         gen_helper_vfp_uitod(vd, vm, fpst);
3464     }
3465     vfp_store_reg64(vd, a->vd);
3466     tcg_temp_free_i32(vm);
3467     tcg_temp_free_i64(vd);
3468     tcg_temp_free_ptr(fpst);
3469     return true;
3470 }
3471
3472 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3473 {
3474     TCGv_i32 vd;
3475     TCGv_i64 vm;
3476
3477     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3478         return false;
3479     }
3480
3481     if (!dc_isar_feature(aa32_jscvt, s)) {
3482         return false;
3483     }
3484
3485     /* UNDEF accesses to D16-D31 if they don't exist. */
3486     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3487         return false;
3488     }
3489
3490     if (!vfp_access_check(s)) {
3491         return true;
3492     }
3493
3494     vm = tcg_temp_new_i64();
3495     vd = tcg_temp_new_i32();
3496     vfp_load_reg64(vm, a->vm);
3497     gen_helper_vjcvt(vd, vm, cpu_env);
3498     vfp_store_reg32(vd, a->vd);
3499     tcg_temp_free_i64(vm);
3500     tcg_temp_free_i32(vd);
3501     return true;
3502 }
3503
3504 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3505 {
3506     TCGv_i32 vd, shift;
3507     TCGv_ptr fpst;
3508     int frac_bits;
3509
3510     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3511         return false;
3512     }
3513
3514     if (!vfp_access_check(s)) {
3515         return true;
3516     }
3517
3518     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3519
3520     vd = tcg_temp_new_i32();
3521     vfp_load_reg32(vd, a->vd);
3522
3523     fpst = fpstatus_ptr(FPST_FPCR_F16);
3524     shift = tcg_const_i32(frac_bits);
3525
3526     /* Switch on op:U:sx bits */
3527     switch (a->opc) {
3528     case 0:
3529         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3530         break;
3531     case 1:
3532         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3533         break;
3534     case 2:
3535         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3536         break;
3537     case 3:
3538         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3539         break;
3540     case 4:
3541         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3542         break;
3543     case 5:
3544         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3545         break;
3546     case 6:
3547         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3548         break;
3549     case 7:
3550         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3551         break;
3552     default:
3553         g_assert_not_reached();
3554     }
3555
3556     vfp_store_reg32(vd, a->vd);
3557     tcg_temp_free_i32(vd);
3558     tcg_temp_free_i32(shift);
3559     tcg_temp_free_ptr(fpst);
3560     return true;
3561 }
3562
3563 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3564 {
3565     TCGv_i32 vd, shift;
3566     TCGv_ptr fpst;
3567     int frac_bits;
3568
3569     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3570         return false;
3571     }
3572
3573     if (!vfp_access_check(s)) {
3574         return true;
3575     }
3576
3577     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3578
3579     vd = tcg_temp_new_i32();
3580     vfp_load_reg32(vd, a->vd);
3581
3582     fpst = fpstatus_ptr(FPST_FPCR);
3583     shift = tcg_const_i32(frac_bits);
3584
3585     /* Switch on op:U:sx bits */
3586     switch (a->opc) {
3587     case 0:
3588         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3589         break;
3590     case 1:
3591         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3592         break;
3593     case 2:
3594         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3595         break;
3596     case 3:
3597         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3598         break;
3599     case 4:
3600         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3601         break;
3602     case 5:
3603         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3604         break;
3605     case 6:
3606         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3607         break;
3608     case 7:
3609         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3610         break;
3611     default:
3612         g_assert_not_reached();
3613     }
3614
3615     vfp_store_reg32(vd, a->vd);
3616     tcg_temp_free_i32(vd);
3617     tcg_temp_free_i32(shift);
3618     tcg_temp_free_ptr(fpst);
3619     return true;
3620 }
3621
3622 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3623 {
3624     TCGv_i64 vd;
3625     TCGv_i32 shift;
3626     TCGv_ptr fpst;
3627     int frac_bits;
3628
3629     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3630         return false;
3631     }
3632
3633     /* UNDEF accesses to D16-D31 if they don't exist. */
3634     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3635         return false;
3636     }
3637
3638     if (!vfp_access_check(s)) {
3639         return true;
3640     }
3641
3642     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3643
3644     vd = tcg_temp_new_i64();
3645     vfp_load_reg64(vd, a->vd);
3646
3647     fpst = fpstatus_ptr(FPST_FPCR);
3648     shift = tcg_const_i32(frac_bits);
3649
3650     /* Switch on op:U:sx bits */
3651     switch (a->opc) {
3652     case 0:
3653         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3654         break;
3655     case 1:
3656         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3657         break;
3658     case 2:
3659         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3660         break;
3661     case 3:
3662         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3663         break;
3664     case 4:
3665         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3666         break;
3667     case 5:
3668         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3669         break;
3670     case 6:
3671         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3672         break;
3673     case 7:
3674         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3675         break;
3676     default:
3677         g_assert_not_reached();
3678     }
3679
3680     vfp_store_reg64(vd, a->vd);
3681     tcg_temp_free_i64(vd);
3682     tcg_temp_free_i32(shift);
3683     tcg_temp_free_ptr(fpst);
3684     return true;
3685 }
3686
3687 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3688 {
3689     TCGv_i32 vm;
3690     TCGv_ptr fpst;
3691
3692     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3693         return false;
3694     }
3695
3696     if (!vfp_access_check(s)) {
3697         return true;
3698     }
3699
3700     fpst = fpstatus_ptr(FPST_FPCR_F16);
3701     vm = tcg_temp_new_i32();
3702     vfp_load_reg32(vm, a->vm);
3703
3704     if (a->s) {
3705         if (a->rz) {
3706             gen_helper_vfp_tosizh(vm, vm, fpst);
3707         } else {
3708             gen_helper_vfp_tosih(vm, vm, fpst);
3709         }
3710     } else {
3711         if (a->rz) {
3712             gen_helper_vfp_touizh(vm, vm, fpst);
3713         } else {
3714             gen_helper_vfp_touih(vm, vm, fpst);
3715         }
3716     }
3717     vfp_store_reg32(vm, a->vd);
3718     tcg_temp_free_i32(vm);
3719     tcg_temp_free_ptr(fpst);
3720     return true;
3721 }
3722
3723 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3724 {
3725     TCGv_i32 vm;
3726     TCGv_ptr fpst;
3727
3728     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3729         return false;
3730     }
3731
3732     if (!vfp_access_check(s)) {
3733         return true;
3734     }
3735
3736     fpst = fpstatus_ptr(FPST_FPCR);
3737     vm = tcg_temp_new_i32();
3738     vfp_load_reg32(vm, a->vm);
3739
3740     if (a->s) {
3741         if (a->rz) {
3742             gen_helper_vfp_tosizs(vm, vm, fpst);
3743         } else {
3744             gen_helper_vfp_tosis(vm, vm, fpst);
3745         }
3746     } else {
3747         if (a->rz) {
3748             gen_helper_vfp_touizs(vm, vm, fpst);
3749         } else {
3750             gen_helper_vfp_touis(vm, vm, fpst);
3751         }
3752     }
3753     vfp_store_reg32(vm, a->vd);
3754     tcg_temp_free_i32(vm);
3755     tcg_temp_free_ptr(fpst);
3756     return true;
3757 }
3758
3759 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3760 {
3761     TCGv_i32 vd;
3762     TCGv_i64 vm;
3763     TCGv_ptr fpst;
3764
3765     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3766         return false;
3767     }
3768
3769     /* UNDEF accesses to D16-D31 if they don't exist. */
3770     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3771         return false;
3772     }
3773
3774     if (!vfp_access_check(s)) {
3775         return true;
3776     }
3777
3778     fpst = fpstatus_ptr(FPST_FPCR);
3779     vm = tcg_temp_new_i64();
3780     vd = tcg_temp_new_i32();
3781     vfp_load_reg64(vm, a->vm);
3782
3783     if (a->s) {
3784         if (a->rz) {
3785             gen_helper_vfp_tosizd(vd, vm, fpst);
3786         } else {
3787             gen_helper_vfp_tosid(vd, vm, fpst);
3788         }
3789     } else {
3790         if (a->rz) {
3791             gen_helper_vfp_touizd(vd, vm, fpst);
3792         } else {
3793             gen_helper_vfp_touid(vd, vm, fpst);
3794         }
3795     }
3796     vfp_store_reg32(vd, a->vd);
3797     tcg_temp_free_i32(vd);
3798     tcg_temp_free_i64(vm);
3799     tcg_temp_free_ptr(fpst);
3800     return true;
3801 }
3802
3803 /*
3804  * Decode VLLDM and VLSTM are nonstandard because:
3805  *  * if there is no FPU then these insns must NOP in
3806  *    Secure state and UNDEF in Nonsecure state
3807  *  * if there is an FPU then these insns do not have
3808  *    the usual behaviour that vfp_access_check() provides of
3809  *    being controlled by CPACR/NSACR enable bits or the
3810  *    lazy-stacking logic.
3811  */
3812 static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
3813 {
3814     TCGv_i32 fptr;
3815
3816     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
3817         !arm_dc_feature(s, ARM_FEATURE_V8)) {
3818         return false;
3819     }
3820
3821     if (a->op) {
3822         /*
3823          * T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
3824          * to take the IMPDEF option to make memory accesses to the stack
3825          * slots that correspond to the D16-D31 registers (discarding
3826          * read data and writing UNKNOWN values), so for us the T2
3827          * encoding behaves identically to the T1 encoding.
3828          */
3829         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3830             return false;
3831         }
3832     } else {
3833         /*
3834          * T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
3835          * This is currently architecturally impossible, but we add the
3836          * check to stay in line with the pseudocode. Note that we must
3837          * emit code for the UNDEF so it takes precedence over the NOCP.
3838          */
3839         if (dc_isar_feature(aa32_simd_r32, s)) {
3840             unallocated_encoding(s);
3841             return true;
3842         }
3843     }
3844
3845     /*
3846      * If not secure, UNDEF. We must emit code for this
3847      * rather than returning false so that this takes
3848      * precedence over the m-nocp.decode NOCP fallback.
3849      */
3850     if (!s->v8m_secure) {
3851         unallocated_encoding(s);
3852         return true;
3853     }
3854     /* If no fpu, NOP. */
3855     if (!dc_isar_feature(aa32_vfp, s)) {
3856         return true;
3857     }
3858
3859     fptr = load_reg(s, a->rn);
3860     if (a->l) {
3861         gen_helper_v7m_vlldm(cpu_env, fptr);
3862     } else {
3863         gen_helper_v7m_vlstm(cpu_env, fptr);
3864     }
3865     tcg_temp_free_i32(fptr);
3866
3867     /* End the TB, because we have updated FP control bits */
3868     s->base.is_jmp = DISAS_UPDATE_EXIT;
3869     return true;
3870 }
3871
3872 static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
3873 {
3874     int btmreg, topreg;
3875     TCGv_i64 zero;
3876     TCGv_i32 aspen, sfpa;
3877
3878     if (!dc_isar_feature(aa32_m_sec_state, s)) {
3879         /* Before v8.1M, fall through in decode to NOCP check */
3880         return false;
3881     }
3882
3883     /* Explicitly UNDEF because this takes precedence over NOCP */
3884     if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
3885         unallocated_encoding(s);
3886         return true;
3887     }
3888
3889     if (!dc_isar_feature(aa32_vfp_simd, s)) {
3890         /* NOP if we have neither FP nor MVE */
3891         return true;
3892     }
3893
3894     /*
3895      * If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
3896      * active floating point context so we must NOP (without doing
3897      * any lazy state preservation or the NOCP check).
3898      */
3899     aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
3900     sfpa = load_cpu_field(v7m.control[M_REG_S]);
3901     tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3902     tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3903     tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
3904     tcg_gen_or_i32(sfpa, sfpa, aspen);
3905     arm_gen_condlabel(s);
3906     tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel);
3907
3908     if (s->fp_excp_el != 0) {
3909         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3910                            syn_uncategorized(), s->fp_excp_el);
3911         return true;
3912     }
3913
3914     topreg = a->vd + a->imm - 1;
3915     btmreg = a->vd;
3916
3917     /* Convert to Sreg numbers if the insn specified in Dregs */
3918     if (a->size == 3) {
3919         topreg = topreg * 2 + 1;
3920         btmreg *= 2;
3921     }
3922
3923     if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
3924         /* UNPREDICTABLE: we choose to undef */
3925         unallocated_encoding(s);
3926         return true;
3927     }
3928
3929     /* Silently ignore requests to clear D16-D31 if they don't exist */
3930     if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
3931         topreg = 31;
3932     }
3933
3934     if (!vfp_access_check(s)) {
3935         return true;
3936     }
3937
3938     /* Zero the Sregs from btmreg to topreg inclusive. */
3939     zero = tcg_const_i64(0);
3940     if (btmreg & 1) {
3941         write_neon_element64(zero, btmreg >> 1, 1, MO_32);
3942         btmreg++;
3943     }
3944     for (; btmreg + 1 <= topreg; btmreg += 2) {
3945         write_neon_element64(zero, btmreg >> 1, 0, MO_64);
3946     }
3947     if (btmreg == topreg) {
3948         write_neon_element64(zero, btmreg >> 1, 0, MO_32);
3949         btmreg++;
3950     }
3951     assert(btmreg == topreg + 1);
3952     /* TODO: when MVE is implemented, zero VPR here */
3953     return true;
3954 }
3955
3956 static bool trans_NOCP(DisasContext *s, arg_nocp *a)
3957 {
3958     /*
3959      * Handle M-profile early check for disabled coprocessor:
3960      * all we need to do here is emit the NOCP exception if
3961      * the coprocessor is disabled. Otherwise we return false
3962      * and the real VFP/etc decode will handle the insn.
3963      */
3964     assert(arm_dc_feature(s, ARM_FEATURE_M));
3965
3966     if (a->cp == 11) {
3967         a->cp = 10;
3968     }
3969     if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
3970         (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
3971         /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
3972         a->cp = 10;
3973     }
3974
3975     if (a->cp != 10) {
3976         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3977                            syn_uncategorized(), default_exception_el(s));
3978         return true;
3979     }
3980
3981     if (s->fp_excp_el != 0) {
3982         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3983                            syn_uncategorized(), s->fp_excp_el);
3984         return true;
3985     }
3986
3987     return false;
3988 }
3989
3990 static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
3991 {
3992     /* This range needs a coprocessor check for v8.1M and later only */
3993     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3994         return false;
3995     }
3996     return trans_NOCP(s, a);
3997 }
3998
3999 static bool trans_VINS(DisasContext *s, arg_VINS *a)
4000 {
4001     TCGv_i32 rd, rm;
4002
4003     if (!dc_isar_feature(aa32_fp16_arith, s)) {
4004         return false;
4005     }
4006
4007     if (s->vec_len != 0 || s->vec_stride != 0) {
4008         return false;
4009     }
4010
4011     if (!vfp_access_check(s)) {
4012         return true;
4013     }
4014
4015     /* Insert low half of Vm into high half of Vd */
4016     rm = tcg_temp_new_i32();
4017     rd = tcg_temp_new_i32();
4018     vfp_load_reg32(rm, a->vm);
4019     vfp_load_reg32(rd, a->vd);
4020     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
4021     vfp_store_reg32(rd, a->vd);
4022     tcg_temp_free_i32(rm);
4023     tcg_temp_free_i32(rd);
4024     return true;
4025 }
4026
4027 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
4028 {
4029     TCGv_i32 rm;
4030
4031     if (!dc_isar_feature(aa32_fp16_arith, s)) {
4032         return false;
4033     }
4034
4035     if (s->vec_len != 0 || s->vec_stride != 0) {
4036         return false;
4037     }
4038
4039     if (!vfp_access_check(s)) {
4040         return true;
4041     }
4042
4043     /* Set Vd to high half of Vm */
4044     rm = tcg_temp_new_i32();
4045     vfp_load_reg32(rm, a->vm);
4046     tcg_gen_shri_i32(rm, rm, 16);
4047     vfp_store_reg32(rm, a->vd);
4048     tcg_temp_free_i32(rm);
4049     return true;
4050 }