target/arm/translate-vfp.c.inc

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 /* Include the generated VFP decoder */
  30 #include "decode-vfp.c.inc"
  31 #include "decode-vfp-uncond.c.inc"
  32
  33 /*
  34  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  35  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  36  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  37  */
  38 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  39 {
  40     uint64_t imm;
  41
  42     switch (size) {
  43     case MO_64:
  44         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  45             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  46             extract32(imm8, 0, 6);
  47         imm <<= 48;
  48         break;
  49     case MO_32:
  50         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  51             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  52             (extract32(imm8, 0, 6) << 3);
  53         imm <<= 16;
  54         break;
  55     case MO_16:
  56         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  57             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  58             (extract32(imm8, 0, 6) << 6);
  59         break;
  60     default:
  61         g_assert_not_reached();
  62     }
  63     return imm;
  64 }
  65
  66 /*
  67  * Return the offset of a 16-bit half of the specified VFP single-precision
  68  * register. If top is true, returns the top 16 bits; otherwise the bottom
  69  * 16 bits.
  70  */
  71 static inline long vfp_f16_offset(unsigned reg, bool top)
  72 {
  73     long offs = vfp_reg_offset(false, reg);
  74 #ifdef HOST_WORDS_BIGENDIAN
  75     if (!top) {
  76         offs += 2;
  77     }
  78 #else
  79     if (top) {
  80         offs += 2;
  81     }
  82 #endif
  83     return offs;
  84 }
  85
  86 /*
  87  * Generate code for M-profile lazy FP state preservation if needed;
  88  * this corresponds to the pseudocode PreserveFPState() function.
  89  */
  90 static void gen_preserve_fp_state(DisasContext *s)
  91 {
  92     if (s->v7m_lspact) {
  93         /*
  94          * Lazy state saving affects external memory and also the NVIC,
  95          * so we must mark it as an IO operation for icount (and cause
  96          * this to be the last insn in the TB).
  97          */
  98         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
  99             s->base.is_jmp = DISAS_UPDATE_EXIT;
 100             gen_io_start();
 101         }
 102         gen_helper_v7m_preserve_fp_state(cpu_env);
 103         /*
 104          * If the preserve_fp_state helper doesn't throw an exception
 105          * then it will clear LSPACT; we don't need to repeat this for
 106          * any further FP insns in this TB.
 107          */
 108         s->v7m_lspact = false;
 109     }
 110 }
 111
 112 /*
 113  * Check that VFP access is enabled. If it is, do the necessary
 114  * M-profile lazy-FP handling and then return true.
 115  * If not, emit code to generate an appropriate exception and
 116  * return false.
 117  * The ignore_vfp_enabled argument specifies that we should ignore
 118  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
 119  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
 120  */
 121 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
 122 {
 123     if (s->fp_excp_el) {
 124         /* M-profile handled this earlier, in disas_m_nocp() */
 125         assert (!arm_dc_feature(s, ARM_FEATURE_M));
 126         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 127                            syn_fp_access_trap(1, 0xe, false),
 128                            s->fp_excp_el);
 129         return false;
 130     }
 131
 132     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 133         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 134         unallocated_encoding(s);
 135         return false;
 136     }
 137
 138     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 139         /* Handle M-profile lazy FP state mechanics */
 140
 141         /* Trigger lazy-state preservation if necessary */
 142         gen_preserve_fp_state(s);
 143
 144         /* Update ownership of FP context: set FPCCR.S to match current state */
 145         if (s->v8m_fpccr_s_wrong) {
 146             TCGv_i32 tmp;
 147
 148             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 149             if (s->v8m_secure) {
 150                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 151             } else {
 152                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 153             }
 154             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 155             /* Don't need to do this for any further FP insns in this TB */
 156             s->v8m_fpccr_s_wrong = false;
 157         }
 158
 159         if (s->v7m_new_fp_ctxt_needed) {
 160             /*
 161              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 162              * and the FPSCR.
 163              */
 164             TCGv_i32 control, fpscr;
 165             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 166
 167             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 168             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 169             tcg_temp_free_i32(fpscr);
 170             /*
 171              * We don't need to arrange to end the TB, because the only
 172              * parts of FPSCR which we cache in the TB flags are the VECLEN
 173              * and VECSTRIDE, and those don't exist for M-profile.
 174              */
 175
 176             if (s->v8m_secure) {
 177                 bits |= R_V7M_CONTROL_SFPA_MASK;
 178             }
 179             control = load_cpu_field(v7m.control[M_REG_S]);
 180             tcg_gen_ori_i32(control, control, bits);
 181             store_cpu_field(control, v7m.control[M_REG_S]);
 182             /* Don't need to do this for any further FP insns in this TB */
 183             s->v7m_new_fp_ctxt_needed = false;
 184         }
 185     }
 186
 187     return true;
 188 }
 189
 190 /*
 191  * The most usual kind of VFP access check, for everything except
 192  * FMXR/FMRX to the always-available special registers.
 193  */
 194 static bool vfp_access_check(DisasContext *s)
 195 {
 196     return full_vfp_access_check(s, false);
 197 }
 198
 199 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 200 {
 201     uint32_t rd, rn, rm;
 202     int sz = a->sz;
 203
 204     if (!dc_isar_feature(aa32_vsel, s)) {
 205         return false;
 206     }
 207
 208     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 209         return false;
 210     }
 211
 212     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 213         return false;
 214     }
 215
 216     /* UNDEF accesses to D16-D31 if they don't exist */
 217     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 218         ((a->vm | a->vn | a->vd) & 0x10)) {
 219         return false;
 220     }
 221
 222     rd = a->vd;
 223     rn = a->vn;
 224     rm = a->vm;
 225
 226     if (!vfp_access_check(s)) {
 227         return true;
 228     }
 229
 230     if (sz == 3) {
 231         TCGv_i64 frn, frm, dest;
 232         TCGv_i64 tmp, zero, zf, nf, vf;
 233
 234         zero = tcg_const_i64(0);
 235
 236         frn = tcg_temp_new_i64();
 237         frm = tcg_temp_new_i64();
 238         dest = tcg_temp_new_i64();
 239
 240         zf = tcg_temp_new_i64();
 241         nf = tcg_temp_new_i64();
 242         vf = tcg_temp_new_i64();
 243
 244         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 245         tcg_gen_ext_i32_i64(nf, cpu_NF);
 246         tcg_gen_ext_i32_i64(vf, cpu_VF);
 247
 248         vfp_load_reg64(frn, rn);
 249         vfp_load_reg64(frm, rm);
 250         switch (a->cc) {
 251         case 0: /* eq: Z */
 252             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 253                                 frn, frm);
 254             break;
 255         case 1: /* vs: V */
 256             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 257                                 frn, frm);
 258             break;
 259         case 2: /* ge: N == V -> N ^ V == 0 */
 260             tmp = tcg_temp_new_i64();
 261             tcg_gen_xor_i64(tmp, vf, nf);
 262             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 263                                 frn, frm);
 264             tcg_temp_free_i64(tmp);
 265             break;
 266         case 3: /* gt: !Z && N == V */
 267             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 268                                 frn, frm);
 269             tmp = tcg_temp_new_i64();
 270             tcg_gen_xor_i64(tmp, vf, nf);
 271             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 272                                 dest, frm);
 273             tcg_temp_free_i64(tmp);
 274             break;
 275         }
 276         vfp_store_reg64(dest, rd);
 277         tcg_temp_free_i64(frn);
 278         tcg_temp_free_i64(frm);
 279         tcg_temp_free_i64(dest);
 280
 281         tcg_temp_free_i64(zf);
 282         tcg_temp_free_i64(nf);
 283         tcg_temp_free_i64(vf);
 284
 285         tcg_temp_free_i64(zero);
 286     } else {
 287         TCGv_i32 frn, frm, dest;
 288         TCGv_i32 tmp, zero;
 289
 290         zero = tcg_const_i32(0);
 291
 292         frn = tcg_temp_new_i32();
 293         frm = tcg_temp_new_i32();
 294         dest = tcg_temp_new_i32();
 295         vfp_load_reg32(frn, rn);
 296         vfp_load_reg32(frm, rm);
 297         switch (a->cc) {
 298         case 0: /* eq: Z */
 299             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 300                                 frn, frm);
 301             break;
 302         case 1: /* vs: V */
 303             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 304                                 frn, frm);
 305             break;
 306         case 2: /* ge: N == V -> N ^ V == 0 */
 307             tmp = tcg_temp_new_i32();
 308             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 309             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 310                                 frn, frm);
 311             tcg_temp_free_i32(tmp);
 312             break;
 313         case 3: /* gt: !Z && N == V */
 314             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 315                                 frn, frm);
 316             tmp = tcg_temp_new_i32();
 317             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 318             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 319                                 dest, frm);
 320             tcg_temp_free_i32(tmp);
 321             break;
 322         }
 323         /* For fp16 the top half is always zeroes */
 324         if (sz == 1) {
 325             tcg_gen_andi_i32(dest, dest, 0xffff);
 326         }
 327         vfp_store_reg32(dest, rd);
 328         tcg_temp_free_i32(frn);
 329         tcg_temp_free_i32(frm);
 330         tcg_temp_free_i32(dest);
 331
 332         tcg_temp_free_i32(zero);
 333     }
 334
 335     return true;
 336 }
 337
 338 /*
 339  * Table for converting the most common AArch32 encoding of
 340  * rounding mode to arm_fprounding order (which matches the
 341  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 342  */
 343 static const uint8_t fp_decode_rm[] = {
 344     FPROUNDING_TIEAWAY,
 345     FPROUNDING_TIEEVEN,
 346     FPROUNDING_POSINF,
 347     FPROUNDING_NEGINF,
 348 };
 349
 350 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 351 {
 352     uint32_t rd, rm;
 353     int sz = a->sz;
 354     TCGv_ptr fpst;
 355     TCGv_i32 tcg_rmode;
 356     int rounding = fp_decode_rm[a->rm];
 357
 358     if (!dc_isar_feature(aa32_vrint, s)) {
 359         return false;
 360     }
 361
 362     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 363         return false;
 364     }
 365
 366     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 367         return false;
 368     }
 369
 370     /* UNDEF accesses to D16-D31 if they don't exist */
 371     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 372         ((a->vm | a->vd) & 0x10)) {
 373         return false;
 374     }
 375
 376     rd = a->vd;
 377     rm = a->vm;
 378
 379     if (!vfp_access_check(s)) {
 380         return true;
 381     }
 382
 383     if (sz == 1) {
 384         fpst = fpstatus_ptr(FPST_FPCR_F16);
 385     } else {
 386         fpst = fpstatus_ptr(FPST_FPCR);
 387     }
 388
 389     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 390     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 391
 392     if (sz == 3) {
 393         TCGv_i64 tcg_op;
 394         TCGv_i64 tcg_res;
 395         tcg_op = tcg_temp_new_i64();
 396         tcg_res = tcg_temp_new_i64();
 397         vfp_load_reg64(tcg_op, rm);
 398         gen_helper_rintd(tcg_res, tcg_op, fpst);
 399         vfp_store_reg64(tcg_res, rd);
 400         tcg_temp_free_i64(tcg_op);
 401         tcg_temp_free_i64(tcg_res);
 402     } else {
 403         TCGv_i32 tcg_op;
 404         TCGv_i32 tcg_res;
 405         tcg_op = tcg_temp_new_i32();
 406         tcg_res = tcg_temp_new_i32();
 407         vfp_load_reg32(tcg_op, rm);
 408         if (sz == 1) {
 409             gen_helper_rinth(tcg_res, tcg_op, fpst);
 410         } else {
 411             gen_helper_rints(tcg_res, tcg_op, fpst);
 412         }
 413         vfp_store_reg32(tcg_res, rd);
 414         tcg_temp_free_i32(tcg_op);
 415         tcg_temp_free_i32(tcg_res);
 416     }
 417
 418     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 419     tcg_temp_free_i32(tcg_rmode);
 420
 421     tcg_temp_free_ptr(fpst);
 422     return true;
 423 }
 424
 425 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 426 {
 427     uint32_t rd, rm;
 428     int sz = a->sz;
 429     TCGv_ptr fpst;
 430     TCGv_i32 tcg_rmode, tcg_shift;
 431     int rounding = fp_decode_rm[a->rm];
 432     bool is_signed = a->op;
 433
 434     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 435         return false;
 436     }
 437
 438     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 439         return false;
 440     }
 441
 442     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 443         return false;
 444     }
 445
 446     /* UNDEF accesses to D16-D31 if they don't exist */
 447     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 448         return false;
 449     }
 450
 451     rd = a->vd;
 452     rm = a->vm;
 453
 454     if (!vfp_access_check(s)) {
 455         return true;
 456     }
 457
 458     if (sz == 1) {
 459         fpst = fpstatus_ptr(FPST_FPCR_F16);
 460     } else {
 461         fpst = fpstatus_ptr(FPST_FPCR);
 462     }
 463
 464     tcg_shift = tcg_const_i32(0);
 465
 466     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 467     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 468
 469     if (sz == 3) {
 470         TCGv_i64 tcg_double, tcg_res;
 471         TCGv_i32 tcg_tmp;
 472         tcg_double = tcg_temp_new_i64();
 473         tcg_res = tcg_temp_new_i64();
 474         tcg_tmp = tcg_temp_new_i32();
 475         vfp_load_reg64(tcg_double, rm);
 476         if (is_signed) {
 477             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 478         } else {
 479             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 480         }
 481         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 482         vfp_store_reg32(tcg_tmp, rd);
 483         tcg_temp_free_i32(tcg_tmp);
 484         tcg_temp_free_i64(tcg_res);
 485         tcg_temp_free_i64(tcg_double);
 486     } else {
 487         TCGv_i32 tcg_single, tcg_res;
 488         tcg_single = tcg_temp_new_i32();
 489         tcg_res = tcg_temp_new_i32();
 490         vfp_load_reg32(tcg_single, rm);
 491         if (sz == 1) {
 492             if (is_signed) {
 493                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 494             } else {
 495                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 496             }
 497         } else {
 498             if (is_signed) {
 499                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 500             } else {
 501                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 502             }
 503         }
 504         vfp_store_reg32(tcg_res, rd);
 505         tcg_temp_free_i32(tcg_res);
 506         tcg_temp_free_i32(tcg_single);
 507     }
 508
 509     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 510     tcg_temp_free_i32(tcg_rmode);
 511
 512     tcg_temp_free_i32(tcg_shift);
 513
 514     tcg_temp_free_ptr(fpst);
 515
 516     return true;
 517 }
 518
 519 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 520 {
 521     /* VMOV scalar to general purpose register */
 522     TCGv_i32 tmp;
 523
 524     /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
 525     if (a->size == MO_32
 526         ? !dc_isar_feature(aa32_fpsp_v2, s)
 527         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 528         return false;
 529     }
 530
 531     /* UNDEF accesses to D16-D31 if they don't exist */
 532     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 533         return false;
 534     }
 535
 536     if (!vfp_access_check(s)) {
 537         return true;
 538     }
 539
 540     tmp = tcg_temp_new_i32();
 541     read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
 542     store_reg(s, a->rt, tmp);
 543
 544     return true;
 545 }
 546
 547 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 548 {
 549     /* VMOV general purpose register to scalar */
 550     TCGv_i32 tmp;
 551
 552     /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
 553     if (a->size == MO_32
 554         ? !dc_isar_feature(aa32_fpsp_v2, s)
 555         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 556         return false;
 557     }
 558
 559     /* UNDEF accesses to D16-D31 if they don't exist */
 560     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 561         return false;
 562     }
 563
 564     if (!vfp_access_check(s)) {
 565         return true;
 566     }
 567
 568     tmp = load_reg(s, a->rt);
 569     write_neon_element32(tmp, a->vn, a->index, a->size);
 570     tcg_temp_free_i32(tmp);
 571
 572     return true;
 573 }
 574
 575 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 576 {
 577     /* VDUP (general purpose register) */
 578     TCGv_i32 tmp;
 579     int size, vec_size;
 580
 581     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 582         return false;
 583     }
 584
 585     /* UNDEF accesses to D16-D31 if they don't exist */
 586     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 587         return false;
 588     }
 589
 590     if (a->b && a->e) {
 591         return false;
 592     }
 593
 594     if (a->q && (a->vn & 1)) {
 595         return false;
 596     }
 597
 598     vec_size = a->q ? 16 : 8;
 599     if (a->b) {
 600         size = 0;
 601     } else if (a->e) {
 602         size = 1;
 603     } else {
 604         size = 2;
 605     }
 606
 607     if (!vfp_access_check(s)) {
 608         return true;
 609     }
 610
 611     tmp = load_reg(s, a->rt);
 612     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
 613                          vec_size, vec_size, tmp);
 614     tcg_temp_free_i32(tmp);
 615
 616     return true;
 617 }
 618
 619 /*
 620  * M-profile provides two different sets of instructions that can
 621  * access floating point system registers: VMSR/VMRS (which move
 622  * to/from a general purpose register) and VLDR/VSTR sysreg (which
 623  * move directly to/from memory). In some cases there are also side
 624  * effects which must happen after any write to memory (which could
 625  * cause an exception). So we implement the common logic for the
 626  * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
 627  * which take pointers to callback functions which will perform the
 628  * actual "read/write general purpose register" and "read/write
 629  * memory" operations.
 630  */
 631
 632 /*
 633  * Emit code to store the sysreg to its final destination; frees the
 634  * TCG temp 'value' it is passed.
 635  */
 636 typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
 637 /*
 638  * Emit code to load the value to be copied to the sysreg; returns
 639  * a new TCG temporary
 640  */
 641 typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
 642
 643 /* Common decode/access checks for fp sysreg read/write */
 644 typedef enum FPSysRegCheckResult {
 645     FPSysRegCheckFailed, /* caller should return false */
 646     FPSysRegCheckDone, /* caller should return true */
 647     FPSysRegCheckContinue, /* caller should continue generating code */
 648 } FPSysRegCheckResult;
 649
 650 static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
 651 {
 652     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 653         return FPSysRegCheckFailed;
 654     }
 655
 656     switch (regno) {
 657     case ARM_VFP_FPSCR:
 658     case QEMU_VFP_FPSCR_NZCV:
 659         break;
 660     case ARM_VFP_FPSCR_NZCVQC:
 661         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 662             return false;
 663         }
 664         break;
 665     case ARM_VFP_FPCXT_S:
 666         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 667             return false;
 668         }
 669         if (!s->v8m_secure) {
 670             return false;
 671         }
 672         break;
 673     default:
 674         return FPSysRegCheckFailed;
 675     }
 676
 677     if (!vfp_access_check(s)) {
 678         return FPSysRegCheckDone;
 679     }
 680
 681     return FPSysRegCheckContinue;
 682 }
 683
 684 static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
 685
 686                                   fp_sysreg_loadfn *loadfn,
 687                                  void *opaque)
 688 {
 689     /* Do a write to an M-profile floating point system register */
 690     TCGv_i32 tmp;
 691
 692     switch (fp_sysreg_checks(s, regno)) {
 693     case FPSysRegCheckFailed:
 694         return false;
 695     case FPSysRegCheckDone:
 696         return true;
 697     case FPSysRegCheckContinue:
 698         break;
 699     }
 700
 701     switch (regno) {
 702     case ARM_VFP_FPSCR:
 703         tmp = loadfn(s, opaque);
 704         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 705         tcg_temp_free_i32(tmp);
 706         gen_lookup_tb(s);
 707         break;
 708     case ARM_VFP_FPSCR_NZCVQC:
 709     {
 710         TCGv_i32 fpscr;
 711         tmp = loadfn(s, opaque);
 712         /*
 713          * TODO: when we implement MVE, write the QC bit.
 714          * For non-MVE, QC is RES0.
 715          */
 716         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 717         fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 718         tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
 719         tcg_gen_or_i32(fpscr, fpscr, tmp);
 720         store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
 721         tcg_temp_free_i32(tmp);
 722         break;
 723     }
 724     case ARM_VFP_FPCXT_S:
 725     {
 726         TCGv_i32 sfpa, control, fpscr;
 727         /* Set FPSCR[27:0] and CONTROL.SFPA from value */
 728         tmp = loadfn(s, opaque);
 729         sfpa = tcg_temp_new_i32();
 730         tcg_gen_shri_i32(sfpa, tmp, 31);
 731         control = load_cpu_field(v7m.control[M_REG_S]);
 732         tcg_gen_deposit_i32(control, control, sfpa,
 733                             R_V7M_CONTROL_SFPA_SHIFT, 1);
 734         store_cpu_field(control, v7m.control[M_REG_S]);
 735         fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 736         tcg_gen_andi_i32(fpscr, fpscr, FPCR_NZCV_MASK);
 737         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 738         tcg_gen_or_i32(fpscr, fpscr, tmp);
 739         store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
 740         tcg_temp_free_i32(tmp);
 741         tcg_temp_free_i32(sfpa);
 742         break;
 743     }
 744     default:
 745         g_assert_not_reached();
 746     }
 747     return true;
 748 }
 749
 750 static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
 751                                 fp_sysreg_storefn *storefn,
 752                                 void *opaque)
 753 {
 754     /* Do a read from an M-profile floating point system register */
 755     TCGv_i32 tmp;
 756
 757     switch (fp_sysreg_checks(s, regno)) {
 758     case FPSysRegCheckFailed:
 759         return false;
 760     case FPSysRegCheckDone:
 761         return true;
 762     case FPSysRegCheckContinue:
 763         break;
 764     }
 765
 766     switch (regno) {
 767     case ARM_VFP_FPSCR:
 768         tmp = tcg_temp_new_i32();
 769         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 770         storefn(s, opaque, tmp);
 771         break;
 772     case ARM_VFP_FPSCR_NZCVQC:
 773         /*
 774          * TODO: MVE has a QC bit, which we probably won't store
 775          * in the xregs[] field. For non-MVE, where QC is RES0,
 776          * we can just fall through to the FPSCR_NZCV case.
 777          */
 778     case QEMU_VFP_FPSCR_NZCV:
 779         /*
 780          * Read just NZCV; this is a special case to avoid the
 781          * helper call for the "VMRS to CPSR.NZCV" insn.
 782          */
 783         tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 784         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 785         storefn(s, opaque, tmp);
 786         break;
 787     case ARM_VFP_FPCXT_S:
 788     {
 789         TCGv_i32 control, sfpa, fpscr;
 790         /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
 791         tmp = tcg_temp_new_i32();
 792         sfpa = tcg_temp_new_i32();
 793         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 794         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 795         control = load_cpu_field(v7m.control[M_REG_S]);
 796         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 797         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 798         tcg_gen_or_i32(tmp, tmp, sfpa);
 799         tcg_temp_free_i32(sfpa);
 800         /*
 801          * Store result before updating FPSCR etc, in case
 802          * it is a memory write which causes an exception.
 803          */
 804         storefn(s, opaque, tmp);
 805         /*
 806          * Now we must reset FPSCR from FPDSCR_NS, and clear
 807          * CONTROL.SFPA; so we'll end the TB here.
 808          */
 809         tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
 810         store_cpu_field(control, v7m.control[M_REG_S]);
 811         fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 812         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 813         tcg_temp_free_i32(fpscr);
 814         gen_lookup_tb(s);
 815         break;
 816     }
 817     default:
 818         g_assert_not_reached();
 819     }
 820     return true;
 821 }
 822
 823 static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
 824 {
 825     arg_VMSR_VMRS *a = opaque;
 826
 827     if (a->rt == 15) {
 828         /* Set the 4 flag bits in the CPSR */
 829         gen_set_nzcv(value);
 830         tcg_temp_free_i32(value);
 831     } else {
 832         store_reg(s, a->rt, value);
 833     }
 834 }
 835
 836 static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
 837 {
 838     arg_VMSR_VMRS *a = opaque;
 839
 840     return load_reg(s, a->rt);
 841 }
 842
 843 static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 844 {
 845     /*
 846      * Accesses to R15 are UNPREDICTABLE; we choose to undef.
 847      * FPSCR -> r15 is a special case which writes to the PSR flags;
 848      * set a->reg to a special value to tell gen_M_fp_sysreg_read()
 849      * we only care about the top 4 bits of FPSCR there.
 850      */
 851     if (a->rt == 15) {
 852         if (a->l && a->reg == ARM_VFP_FPSCR) {
 853             a->reg = QEMU_VFP_FPSCR_NZCV;
 854         } else {
 855             return false;
 856         }
 857     }
 858
 859     if (a->l) {
 860         /* VMRS, move FP system register to gp register */
 861         return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
 862     } else {
 863         /* VMSR, move gp register to FP system register */
 864         return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
 865     }
 866 }
 867
 868 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 869 {
 870     TCGv_i32 tmp;
 871     bool ignore_vfp_enabled = false;
 872
 873     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 874         return gen_M_VMSR_VMRS(s, a);
 875     }
 876
 877     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 878         return false;
 879     }
 880
 881     switch (a->reg) {
 882     case ARM_VFP_FPSID:
 883         /*
 884          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 885          * all ID registers to privileged access only.
 886          */
 887         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 888             return false;
 889         }
 890         ignore_vfp_enabled = true;
 891         break;
 892     case ARM_VFP_MVFR0:
 893     case ARM_VFP_MVFR1:
 894         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 895             return false;
 896         }
 897         ignore_vfp_enabled = true;
 898         break;
 899     case ARM_VFP_MVFR2:
 900         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 901             return false;
 902         }
 903         ignore_vfp_enabled = true;
 904         break;
 905     case ARM_VFP_FPSCR:
 906         break;
 907     case ARM_VFP_FPEXC:
 908         if (IS_USER(s)) {
 909             return false;
 910         }
 911         ignore_vfp_enabled = true;
 912         break;
 913     case ARM_VFP_FPINST:
 914     case ARM_VFP_FPINST2:
 915         /* Not present in VFPv3 */
 916         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
 917             return false;
 918         }
 919         break;
 920     default:
 921         return false;
 922     }
 923
 924     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
 925         return true;
 926     }
 927
 928     if (a->l) {
 929         /* VMRS, move VFP special register to gp register */
 930         switch (a->reg) {
 931         case ARM_VFP_MVFR0:
 932         case ARM_VFP_MVFR1:
 933         case ARM_VFP_MVFR2:
 934         case ARM_VFP_FPSID:
 935             if (s->current_el == 1) {
 936                 TCGv_i32 tcg_reg, tcg_rt;
 937
 938                 gen_set_condexec(s);
 939                 gen_set_pc_im(s, s->pc_curr);
 940                 tcg_reg = tcg_const_i32(a->reg);
 941                 tcg_rt = tcg_const_i32(a->rt);
 942                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
 943                 tcg_temp_free_i32(tcg_reg);
 944                 tcg_temp_free_i32(tcg_rt);
 945             }
 946             /* fall through */
 947         case ARM_VFP_FPEXC:
 948         case ARM_VFP_FPINST:
 949         case ARM_VFP_FPINST2:
 950             tmp = load_cpu_field(vfp.xregs[a->reg]);
 951             break;
 952         case ARM_VFP_FPSCR:
 953             if (a->rt == 15) {
 954                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 955                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 956             } else {
 957                 tmp = tcg_temp_new_i32();
 958                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
 959             }
 960             break;
 961         default:
 962             g_assert_not_reached();
 963         }
 964
 965         if (a->rt == 15) {
 966             /* Set the 4 flag bits in the CPSR.  */
 967             gen_set_nzcv(tmp);
 968             tcg_temp_free_i32(tmp);
 969         } else {
 970             store_reg(s, a->rt, tmp);
 971         }
 972     } else {
 973         /* VMSR, move gp register to VFP special register */
 974         switch (a->reg) {
 975         case ARM_VFP_FPSID:
 976         case ARM_VFP_MVFR0:
 977         case ARM_VFP_MVFR1:
 978         case ARM_VFP_MVFR2:
 979             /* Writes are ignored.  */
 980             break;
 981         case ARM_VFP_FPSCR:
 982             tmp = load_reg(s, a->rt);
 983             gen_helper_vfp_set_fpscr(cpu_env, tmp);
 984             tcg_temp_free_i32(tmp);
 985             gen_lookup_tb(s);
 986             break;
 987         case ARM_VFP_FPEXC:
 988             /*
 989              * TODO: VFP subarchitecture support.
 990              * For now, keep the EN bit only
 991              */
 992             tmp = load_reg(s, a->rt);
 993             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 994             store_cpu_field(tmp, vfp.xregs[a->reg]);
 995             gen_lookup_tb(s);
 996             break;
 997         case ARM_VFP_FPINST:
 998         case ARM_VFP_FPINST2:
 999             tmp = load_reg(s, a->rt);
1000             store_cpu_field(tmp, vfp.xregs[a->reg]);
1001             break;
1002         default:
1003             g_assert_not_reached();
1004         }
1005     }
1006
1007     return true;
1008 }
1009
1010 static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
1011 {
1012     arg_vldr_sysreg *a = opaque;
1013     uint32_t offset = a->imm;
1014     TCGv_i32 addr;
1015
1016     if (!a->a) {
1017         offset = - offset;
1018     }
1019
1020     addr = load_reg(s, a->rn);
1021     if (a->p) {
1022         tcg_gen_addi_i32(addr, addr, offset);
1023     }
1024
1025     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1026         gen_helper_v8m_stackcheck(cpu_env, addr);
1027     }
1028
1029     gen_aa32_st_i32(s, value, addr, get_mem_index(s),
1030                     MO_UL | MO_ALIGN | s->be_data);
1031     tcg_temp_free_i32(value);
1032
1033     if (a->w) {
1034         /* writeback */
1035         if (!a->p) {
1036             tcg_gen_addi_i32(addr, addr, offset);
1037         }
1038         store_reg(s, a->rn, addr);
1039     } else {
1040         tcg_temp_free_i32(addr);
1041     }
1042 }
1043
1044 static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1045 {
1046     arg_vldr_sysreg *a = opaque;
1047     uint32_t offset = a->imm;
1048     TCGv_i32 addr;
1049     TCGv_i32 value = tcg_temp_new_i32();
1050
1051     if (!a->a) {
1052         offset = - offset;
1053     }
1054
1055     addr = load_reg(s, a->rn);
1056     if (a->p) {
1057         tcg_gen_addi_i32(addr, addr, offset);
1058     }
1059
1060     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1061         gen_helper_v8m_stackcheck(cpu_env, addr);
1062     }
1063
1064     gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1065                     MO_UL | MO_ALIGN | s->be_data);
1066
1067     if (a->w) {
1068         /* writeback */
1069         if (!a->p) {
1070             tcg_gen_addi_i32(addr, addr, offset);
1071         }
1072         store_reg(s, a->rn, addr);
1073     } else {
1074         tcg_temp_free_i32(addr);
1075     }
1076     return value;
1077 }
1078
1079 static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1080 {
1081     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1082         return false;
1083     }
1084     if (a->rn == 15) {
1085         return false;
1086     }
1087     return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1088 }
1089
1090 static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1091 {
1092     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1093         return false;
1094     }
1095     if (a->rn == 15) {
1096         return false;
1097     }
1098     return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1099 }
1100
1101 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1102 {
1103     TCGv_i32 tmp;
1104
1105     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1106         return false;
1107     }
1108
1109     if (a->rt == 15) {
1110         /* UNPREDICTABLE; we choose to UNDEF */
1111         return false;
1112     }
1113
1114     if (!vfp_access_check(s)) {
1115         return true;
1116     }
1117
1118     if (a->l) {
1119         /* VFP to general purpose register */
1120         tmp = tcg_temp_new_i32();
1121         vfp_load_reg32(tmp, a->vn);
1122         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1123         store_reg(s, a->rt, tmp);
1124     } else {
1125         /* general purpose register to VFP */
1126         tmp = load_reg(s, a->rt);
1127         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1128         vfp_store_reg32(tmp, a->vn);
1129         tcg_temp_free_i32(tmp);
1130     }
1131
1132     return true;
1133 }
1134
1135 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
1136 {
1137     TCGv_i32 tmp;
1138
1139     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1140         return false;
1141     }
1142
1143     if (!vfp_access_check(s)) {
1144         return true;
1145     }
1146
1147     if (a->l) {
1148         /* VFP to general purpose register */
1149         tmp = tcg_temp_new_i32();
1150         vfp_load_reg32(tmp, a->vn);
1151         if (a->rt == 15) {
1152             /* Set the 4 flag bits in the CPSR.  */
1153             gen_set_nzcv(tmp);
1154             tcg_temp_free_i32(tmp);
1155         } else {
1156             store_reg(s, a->rt, tmp);
1157         }
1158     } else {
1159         /* general purpose register to VFP */
1160         tmp = load_reg(s, a->rt);
1161         vfp_store_reg32(tmp, a->vn);
1162         tcg_temp_free_i32(tmp);
1163     }
1164
1165     return true;
1166 }
1167
1168 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
1169 {
1170     TCGv_i32 tmp;
1171
1172     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1173         return false;
1174     }
1175
1176     /*
1177      * VMOV between two general-purpose registers and two single precision
1178      * floating point registers
1179      */
1180     if (!vfp_access_check(s)) {
1181         return true;
1182     }
1183
1184     if (a->op) {
1185         /* fpreg to gpreg */
1186         tmp = tcg_temp_new_i32();
1187         vfp_load_reg32(tmp, a->vm);
1188         store_reg(s, a->rt, tmp);
1189         tmp = tcg_temp_new_i32();
1190         vfp_load_reg32(tmp, a->vm + 1);
1191         store_reg(s, a->rt2, tmp);
1192     } else {
1193         /* gpreg to fpreg */
1194         tmp = load_reg(s, a->rt);
1195         vfp_store_reg32(tmp, a->vm);
1196         tcg_temp_free_i32(tmp);
1197         tmp = load_reg(s, a->rt2);
1198         vfp_store_reg32(tmp, a->vm + 1);
1199         tcg_temp_free_i32(tmp);
1200     }
1201
1202     return true;
1203 }
1204
1205 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1206 {
1207     TCGv_i32 tmp;
1208
1209     /*
1210      * VMOV between two general-purpose registers and one double precision
1211      * floating point register.  Note that this does not require support
1212      * for double precision arithmetic.
1213      */
1214     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1215         return false;
1216     }
1217
1218     /* UNDEF accesses to D16-D31 if they don't exist */
1219     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1220         return false;
1221     }
1222
1223     if (!vfp_access_check(s)) {
1224         return true;
1225     }
1226
1227     if (a->op) {
1228         /* fpreg to gpreg */
1229         tmp = tcg_temp_new_i32();
1230         vfp_load_reg32(tmp, a->vm * 2);
1231         store_reg(s, a->rt, tmp);
1232         tmp = tcg_temp_new_i32();
1233         vfp_load_reg32(tmp, a->vm * 2 + 1);
1234         store_reg(s, a->rt2, tmp);
1235     } else {
1236         /* gpreg to fpreg */
1237         tmp = load_reg(s, a->rt);
1238         vfp_store_reg32(tmp, a->vm * 2);
1239         tcg_temp_free_i32(tmp);
1240         tmp = load_reg(s, a->rt2);
1241         vfp_store_reg32(tmp, a->vm * 2 + 1);
1242         tcg_temp_free_i32(tmp);
1243     }
1244
1245     return true;
1246 }
1247
1248 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1249 {
1250     uint32_t offset;
1251     TCGv_i32 addr, tmp;
1252
1253     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1254         return false;
1255     }
1256
1257     if (!vfp_access_check(s)) {
1258         return true;
1259     }
1260
1261     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1262     offset = a->imm << 1;
1263     if (!a->u) {
1264         offset = -offset;
1265     }
1266
1267     /* For thumb, use of PC is UNPREDICTABLE.  */
1268     addr = add_reg_for_lit(s, a->rn, offset);
1269     tmp = tcg_temp_new_i32();
1270     if (a->l) {
1271         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1272         vfp_store_reg32(tmp, a->vd);
1273     } else {
1274         vfp_load_reg32(tmp, a->vd);
1275         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1276     }
1277     tcg_temp_free_i32(tmp);
1278     tcg_temp_free_i32(addr);
1279
1280     return true;
1281 }
1282
1283 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1284 {
1285     uint32_t offset;
1286     TCGv_i32 addr, tmp;
1287
1288     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1289         return false;
1290     }
1291
1292     if (!vfp_access_check(s)) {
1293         return true;
1294     }
1295
1296     offset = a->imm << 2;
1297     if (!a->u) {
1298         offset = -offset;
1299     }
1300
1301     /* For thumb, use of PC is UNPREDICTABLE.  */
1302     addr = add_reg_for_lit(s, a->rn, offset);
1303     tmp = tcg_temp_new_i32();
1304     if (a->l) {
1305         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1306         vfp_store_reg32(tmp, a->vd);
1307     } else {
1308         vfp_load_reg32(tmp, a->vd);
1309         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1310     }
1311     tcg_temp_free_i32(tmp);
1312     tcg_temp_free_i32(addr);
1313
1314     return true;
1315 }
1316
1317 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1318 {
1319     uint32_t offset;
1320     TCGv_i32 addr;
1321     TCGv_i64 tmp;
1322
1323     /* Note that this does not require support for double arithmetic.  */
1324     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1325         return false;
1326     }
1327
1328     /* UNDEF accesses to D16-D31 if they don't exist */
1329     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1330         return false;
1331     }
1332
1333     if (!vfp_access_check(s)) {
1334         return true;
1335     }
1336
1337     offset = a->imm << 2;
1338     if (!a->u) {
1339         offset = -offset;
1340     }
1341
1342     /* For thumb, use of PC is UNPREDICTABLE.  */
1343     addr = add_reg_for_lit(s, a->rn, offset);
1344     tmp = tcg_temp_new_i64();
1345     if (a->l) {
1346         gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1347         vfp_store_reg64(tmp, a->vd);
1348     } else {
1349         vfp_load_reg64(tmp, a->vd);
1350         gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1351     }
1352     tcg_temp_free_i64(tmp);
1353     tcg_temp_free_i32(addr);
1354
1355     return true;
1356 }
1357
1358 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1359 {
1360     uint32_t offset;
1361     TCGv_i32 addr, tmp;
1362     int i, n;
1363
1364     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1365         return false;
1366     }
1367
1368     n = a->imm;
1369
1370     if (n == 0 || (a->vd + n) > 32) {
1371         /*
1372          * UNPREDICTABLE cases for bad immediates: we choose to
1373          * UNDEF to avoid generating huge numbers of TCG ops
1374          */
1375         return false;
1376     }
1377     if (a->rn == 15 && a->w) {
1378         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1379         return false;
1380     }
1381
1382     if (!vfp_access_check(s)) {
1383         return true;
1384     }
1385
1386     /* For thumb, use of PC is UNPREDICTABLE.  */
1387     addr = add_reg_for_lit(s, a->rn, 0);
1388     if (a->p) {
1389         /* pre-decrement */
1390         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1391     }
1392
1393     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1394         /*
1395          * Here 'addr' is the lowest address we will store to,
1396          * and is either the old SP (if post-increment) or
1397          * the new SP (if pre-decrement). For post-increment
1398          * where the old value is below the limit and the new
1399          * value is above, it is UNKNOWN whether the limit check
1400          * triggers; we choose to trigger.
1401          */
1402         gen_helper_v8m_stackcheck(cpu_env, addr);
1403     }
1404
1405     offset = 4;
1406     tmp = tcg_temp_new_i32();
1407     for (i = 0; i < n; i++) {
1408         if (a->l) {
1409             /* load */
1410             gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1411             vfp_store_reg32(tmp, a->vd + i);
1412         } else {
1413             /* store */
1414             vfp_load_reg32(tmp, a->vd + i);
1415             gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1416         }
1417         tcg_gen_addi_i32(addr, addr, offset);
1418     }
1419     tcg_temp_free_i32(tmp);
1420     if (a->w) {
1421         /* writeback */
1422         if (a->p) {
1423             offset = -offset * n;
1424             tcg_gen_addi_i32(addr, addr, offset);
1425         }
1426         store_reg(s, a->rn, addr);
1427     } else {
1428         tcg_temp_free_i32(addr);
1429     }
1430
1431     return true;
1432 }
1433
1434 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1435 {
1436     uint32_t offset;
1437     TCGv_i32 addr;
1438     TCGv_i64 tmp;
1439     int i, n;
1440
1441     /* Note that this does not require support for double arithmetic.  */
1442     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1443         return false;
1444     }
1445
1446     n = a->imm >> 1;
1447
1448     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1449         /*
1450          * UNPREDICTABLE cases for bad immediates: we choose to
1451          * UNDEF to avoid generating huge numbers of TCG ops
1452          */
1453         return false;
1454     }
1455     if (a->rn == 15 && a->w) {
1456         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1457         return false;
1458     }
1459
1460     /* UNDEF accesses to D16-D31 if they don't exist */
1461     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1462         return false;
1463     }
1464
1465     if (!vfp_access_check(s)) {
1466         return true;
1467     }
1468
1469     /* For thumb, use of PC is UNPREDICTABLE.  */
1470     addr = add_reg_for_lit(s, a->rn, 0);
1471     if (a->p) {
1472         /* pre-decrement */
1473         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1474     }
1475
1476     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1477         /*
1478          * Here 'addr' is the lowest address we will store to,
1479          * and is either the old SP (if post-increment) or
1480          * the new SP (if pre-decrement). For post-increment
1481          * where the old value is below the limit and the new
1482          * value is above, it is UNKNOWN whether the limit check
1483          * triggers; we choose to trigger.
1484          */
1485         gen_helper_v8m_stackcheck(cpu_env, addr);
1486     }
1487
1488     offset = 8;
1489     tmp = tcg_temp_new_i64();
1490     for (i = 0; i < n; i++) {
1491         if (a->l) {
1492             /* load */
1493             gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1494             vfp_store_reg64(tmp, a->vd + i);
1495         } else {
1496             /* store */
1497             vfp_load_reg64(tmp, a->vd + i);
1498             gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1499         }
1500         tcg_gen_addi_i32(addr, addr, offset);
1501     }
1502     tcg_temp_free_i64(tmp);
1503     if (a->w) {
1504         /* writeback */
1505         if (a->p) {
1506             offset = -offset * n;
1507         } else if (a->imm & 1) {
1508             offset = 4;
1509         } else {
1510             offset = 0;
1511         }
1512
1513         if (offset != 0) {
1514             tcg_gen_addi_i32(addr, addr, offset);
1515         }
1516         store_reg(s, a->rn, addr);
1517     } else {
1518         tcg_temp_free_i32(addr);
1519     }
1520
1521     return true;
1522 }
1523
1524 /*
1525  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1526  * The callback should emit code to write a value to vd. If
1527  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1528  * will contain the old value of the relevant VFP register;
1529  * otherwise it must be written to only.
1530  */
1531 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1532                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1533 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1534                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1535
1536 /*
1537  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1538  * The callback should emit code to write a value to vd (which
1539  * should be written to only).
1540  */
1541 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1542 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1543
1544 /*
1545  * Return true if the specified S reg is in a scalar bank
1546  * (ie if it is s0..s7)
1547  */
1548 static inline bool vfp_sreg_is_scalar(int reg)
1549 {
1550     return (reg & 0x18) == 0;
1551 }
1552
1553 /*
1554  * Return true if the specified D reg is in a scalar bank
1555  * (ie if it is d0..d3 or d16..d19)
1556  */
1557 static inline bool vfp_dreg_is_scalar(int reg)
1558 {
1559     return (reg & 0xc) == 0;
1560 }
1561
1562 /*
1563  * Advance the S reg number forwards by delta within its bank
1564  * (ie increment the low 3 bits but leave the rest the same)
1565  */
1566 static inline int vfp_advance_sreg(int reg, int delta)
1567 {
1568     return ((reg + delta) & 0x7) | (reg & ~0x7);
1569 }
1570
1571 /*
1572  * Advance the D reg number forwards by delta within its bank
1573  * (ie increment the low 2 bits but leave the rest the same)
1574  */
1575 static inline int vfp_advance_dreg(int reg, int delta)
1576 {
1577     return ((reg + delta) & 0x3) | (reg & ~0x3);
1578 }
1579
1580 /*
1581  * Perform a 3-operand VFP data processing instruction. fn is the
1582  * callback to do the actual operation; this function deals with the
1583  * code to handle looping around for VFP vector processing.
1584  */
1585 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1586                           int vd, int vn, int vm, bool reads_vd)
1587 {
1588     uint32_t delta_m = 0;
1589     uint32_t delta_d = 0;
1590     int veclen = s->vec_len;
1591     TCGv_i32 f0, f1, fd;
1592     TCGv_ptr fpst;
1593
1594     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1595         return false;
1596     }
1597
1598     if (!dc_isar_feature(aa32_fpshvec, s) &&
1599         (veclen != 0 || s->vec_stride != 0)) {
1600         return false;
1601     }
1602
1603     if (!vfp_access_check(s)) {
1604         return true;
1605     }
1606
1607     if (veclen > 0) {
1608         /* Figure out what type of vector operation this is.  */
1609         if (vfp_sreg_is_scalar(vd)) {
1610             /* scalar */
1611             veclen = 0;
1612         } else {
1613             delta_d = s->vec_stride + 1;
1614
1615             if (vfp_sreg_is_scalar(vm)) {
1616                 /* mixed scalar/vector */
1617                 delta_m = 0;
1618             } else {
1619                 /* vector */
1620                 delta_m = delta_d;
1621             }
1622         }
1623     }
1624
1625     f0 = tcg_temp_new_i32();
1626     f1 = tcg_temp_new_i32();
1627     fd = tcg_temp_new_i32();
1628     fpst = fpstatus_ptr(FPST_FPCR);
1629
1630     vfp_load_reg32(f0, vn);
1631     vfp_load_reg32(f1, vm);
1632
1633     for (;;) {
1634         if (reads_vd) {
1635             vfp_load_reg32(fd, vd);
1636         }
1637         fn(fd, f0, f1, fpst);
1638         vfp_store_reg32(fd, vd);
1639
1640         if (veclen == 0) {
1641             break;
1642         }
1643
1644         /* Set up the operands for the next iteration */
1645         veclen--;
1646         vd = vfp_advance_sreg(vd, delta_d);
1647         vn = vfp_advance_sreg(vn, delta_d);
1648         vfp_load_reg32(f0, vn);
1649         if (delta_m) {
1650             vm = vfp_advance_sreg(vm, delta_m);
1651             vfp_load_reg32(f1, vm);
1652         }
1653     }
1654
1655     tcg_temp_free_i32(f0);
1656     tcg_temp_free_i32(f1);
1657     tcg_temp_free_i32(fd);
1658     tcg_temp_free_ptr(fpst);
1659
1660     return true;
1661 }
1662
1663 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1664                           int vd, int vn, int vm, bool reads_vd)
1665 {
1666     /*
1667      * Do a half-precision operation. Functionally this is
1668      * the same as do_vfp_3op_sp(), except:
1669      *  - it uses the FPST_FPCR_F16
1670      *  - it doesn't need the VFP vector handling (fp16 is a
1671      *    v8 feature, and in v8 VFP vectors don't exist)
1672      *  - it does the aa32_fp16_arith feature test
1673      */
1674     TCGv_i32 f0, f1, fd;
1675     TCGv_ptr fpst;
1676
1677     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1678         return false;
1679     }
1680
1681     if (s->vec_len != 0 || s->vec_stride != 0) {
1682         return false;
1683     }
1684
1685     if (!vfp_access_check(s)) {
1686         return true;
1687     }
1688
1689     f0 = tcg_temp_new_i32();
1690     f1 = tcg_temp_new_i32();
1691     fd = tcg_temp_new_i32();
1692     fpst = fpstatus_ptr(FPST_FPCR_F16);
1693
1694     vfp_load_reg32(f0, vn);
1695     vfp_load_reg32(f1, vm);
1696
1697     if (reads_vd) {
1698         vfp_load_reg32(fd, vd);
1699     }
1700     fn(fd, f0, f1, fpst);
1701     vfp_store_reg32(fd, vd);
1702
1703     tcg_temp_free_i32(f0);
1704     tcg_temp_free_i32(f1);
1705     tcg_temp_free_i32(fd);
1706     tcg_temp_free_ptr(fpst);
1707
1708     return true;
1709 }
1710
1711 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1712                           int vd, int vn, int vm, bool reads_vd)
1713 {
1714     uint32_t delta_m = 0;
1715     uint32_t delta_d = 0;
1716     int veclen = s->vec_len;
1717     TCGv_i64 f0, f1, fd;
1718     TCGv_ptr fpst;
1719
1720     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1721         return false;
1722     }
1723
1724     /* UNDEF accesses to D16-D31 if they don't exist */
1725     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1726         return false;
1727     }
1728
1729     if (!dc_isar_feature(aa32_fpshvec, s) &&
1730         (veclen != 0 || s->vec_stride != 0)) {
1731         return false;
1732     }
1733
1734     if (!vfp_access_check(s)) {
1735         return true;
1736     }
1737
1738     if (veclen > 0) {
1739         /* Figure out what type of vector operation this is.  */
1740         if (vfp_dreg_is_scalar(vd)) {
1741             /* scalar */
1742             veclen = 0;
1743         } else {
1744             delta_d = (s->vec_stride >> 1) + 1;
1745
1746             if (vfp_dreg_is_scalar(vm)) {
1747                 /* mixed scalar/vector */
1748                 delta_m = 0;
1749             } else {
1750                 /* vector */
1751                 delta_m = delta_d;
1752             }
1753         }
1754     }
1755
1756     f0 = tcg_temp_new_i64();
1757     f1 = tcg_temp_new_i64();
1758     fd = tcg_temp_new_i64();
1759     fpst = fpstatus_ptr(FPST_FPCR);
1760
1761     vfp_load_reg64(f0, vn);
1762     vfp_load_reg64(f1, vm);
1763
1764     for (;;) {
1765         if (reads_vd) {
1766             vfp_load_reg64(fd, vd);
1767         }
1768         fn(fd, f0, f1, fpst);
1769         vfp_store_reg64(fd, vd);
1770
1771         if (veclen == 0) {
1772             break;
1773         }
1774         /* Set up the operands for the next iteration */
1775         veclen--;
1776         vd = vfp_advance_dreg(vd, delta_d);
1777         vn = vfp_advance_dreg(vn, delta_d);
1778         vfp_load_reg64(f0, vn);
1779         if (delta_m) {
1780             vm = vfp_advance_dreg(vm, delta_m);
1781             vfp_load_reg64(f1, vm);
1782         }
1783     }
1784
1785     tcg_temp_free_i64(f0);
1786     tcg_temp_free_i64(f1);
1787     tcg_temp_free_i64(fd);
1788     tcg_temp_free_ptr(fpst);
1789
1790     return true;
1791 }
1792
1793 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1794 {
1795     uint32_t delta_m = 0;
1796     uint32_t delta_d = 0;
1797     int veclen = s->vec_len;
1798     TCGv_i32 f0, fd;
1799
1800     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1801         return false;
1802     }
1803
1804     if (!dc_isar_feature(aa32_fpshvec, s) &&
1805         (veclen != 0 || s->vec_stride != 0)) {
1806         return false;
1807     }
1808
1809     if (!vfp_access_check(s)) {
1810         return true;
1811     }
1812
1813     if (veclen > 0) {
1814         /* Figure out what type of vector operation this is.  */
1815         if (vfp_sreg_is_scalar(vd)) {
1816             /* scalar */
1817             veclen = 0;
1818         } else {
1819             delta_d = s->vec_stride + 1;
1820
1821             if (vfp_sreg_is_scalar(vm)) {
1822                 /* mixed scalar/vector */
1823                 delta_m = 0;
1824             } else {
1825                 /* vector */
1826                 delta_m = delta_d;
1827             }
1828         }
1829     }
1830
1831     f0 = tcg_temp_new_i32();
1832     fd = tcg_temp_new_i32();
1833
1834     vfp_load_reg32(f0, vm);
1835
1836     for (;;) {
1837         fn(fd, f0);
1838         vfp_store_reg32(fd, vd);
1839
1840         if (veclen == 0) {
1841             break;
1842         }
1843
1844         if (delta_m == 0) {
1845             /* single source one-many */
1846             while (veclen--) {
1847                 vd = vfp_advance_sreg(vd, delta_d);
1848                 vfp_store_reg32(fd, vd);
1849             }
1850             break;
1851         }
1852
1853         /* Set up the operands for the next iteration */
1854         veclen--;
1855         vd = vfp_advance_sreg(vd, delta_d);
1856         vm = vfp_advance_sreg(vm, delta_m);
1857         vfp_load_reg32(f0, vm);
1858     }
1859
1860     tcg_temp_free_i32(f0);
1861     tcg_temp_free_i32(fd);
1862
1863     return true;
1864 }
1865
1866 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1867 {
1868     /*
1869      * Do a half-precision operation. Functionally this is
1870      * the same as do_vfp_2op_sp(), except:
1871      *  - it doesn't need the VFP vector handling (fp16 is a
1872      *    v8 feature, and in v8 VFP vectors don't exist)
1873      *  - it does the aa32_fp16_arith feature test
1874      */
1875     TCGv_i32 f0;
1876
1877     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1878         return false;
1879     }
1880
1881     if (s->vec_len != 0 || s->vec_stride != 0) {
1882         return false;
1883     }
1884
1885     if (!vfp_access_check(s)) {
1886         return true;
1887     }
1888
1889     f0 = tcg_temp_new_i32();
1890     vfp_load_reg32(f0, vm);
1891     fn(f0, f0);
1892     vfp_store_reg32(f0, vd);
1893     tcg_temp_free_i32(f0);
1894
1895     return true;
1896 }
1897
1898 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1899 {
1900     uint32_t delta_m = 0;
1901     uint32_t delta_d = 0;
1902     int veclen = s->vec_len;
1903     TCGv_i64 f0, fd;
1904
1905     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1906         return false;
1907     }
1908
1909     /* UNDEF accesses to D16-D31 if they don't exist */
1910     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1911         return false;
1912     }
1913
1914     if (!dc_isar_feature(aa32_fpshvec, s) &&
1915         (veclen != 0 || s->vec_stride != 0)) {
1916         return false;
1917     }
1918
1919     if (!vfp_access_check(s)) {
1920         return true;
1921     }
1922
1923     if (veclen > 0) {
1924         /* Figure out what type of vector operation this is.  */
1925         if (vfp_dreg_is_scalar(vd)) {
1926             /* scalar */
1927             veclen = 0;
1928         } else {
1929             delta_d = (s->vec_stride >> 1) + 1;
1930
1931             if (vfp_dreg_is_scalar(vm)) {
1932                 /* mixed scalar/vector */
1933                 delta_m = 0;
1934             } else {
1935                 /* vector */
1936                 delta_m = delta_d;
1937             }
1938         }
1939     }
1940
1941     f0 = tcg_temp_new_i64();
1942     fd = tcg_temp_new_i64();
1943
1944     vfp_load_reg64(f0, vm);
1945
1946     for (;;) {
1947         fn(fd, f0);
1948         vfp_store_reg64(fd, vd);
1949
1950         if (veclen == 0) {
1951             break;
1952         }
1953
1954         if (delta_m == 0) {
1955             /* single source one-many */
1956             while (veclen--) {
1957                 vd = vfp_advance_dreg(vd, delta_d);
1958                 vfp_store_reg64(fd, vd);
1959             }
1960             break;
1961         }
1962
1963         /* Set up the operands for the next iteration */
1964         veclen--;
1965         vd = vfp_advance_dreg(vd, delta_d);
1966         vd = vfp_advance_dreg(vm, delta_m);
1967         vfp_load_reg64(f0, vm);
1968     }
1969
1970     tcg_temp_free_i64(f0);
1971     tcg_temp_free_i64(fd);
1972
1973     return true;
1974 }
1975
1976 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1977 {
1978     /* Note that order of inputs to the add matters for NaNs */
1979     TCGv_i32 tmp = tcg_temp_new_i32();
1980
1981     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1982     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1983     tcg_temp_free_i32(tmp);
1984 }
1985
1986 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1987 {
1988     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1989 }
1990
1991 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1992 {
1993     /* Note that order of inputs to the add matters for NaNs */
1994     TCGv_i32 tmp = tcg_temp_new_i32();
1995
1996     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1997     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1998     tcg_temp_free_i32(tmp);
1999 }
2000
2001 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
2002 {
2003     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
2004 }
2005
2006 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2007 {
2008     /* Note that order of inputs to the add matters for NaNs */
2009     TCGv_i64 tmp = tcg_temp_new_i64();
2010
2011     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2012     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2013     tcg_temp_free_i64(tmp);
2014 }
2015
2016 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
2017 {
2018     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
2019 }
2020
2021 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2022 {
2023     /*
2024      * VMLS: vd = vd + -(vn * vm)
2025      * Note that order of inputs to the add matters for NaNs.
2026      */
2027     TCGv_i32 tmp = tcg_temp_new_i32();
2028
2029     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2030     gen_helper_vfp_negh(tmp, tmp);
2031     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2032     tcg_temp_free_i32(tmp);
2033 }
2034
2035 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
2036 {
2037     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
2038 }
2039
2040 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2041 {
2042     /*
2043      * VMLS: vd = vd + -(vn * vm)
2044      * Note that order of inputs to the add matters for NaNs.
2045      */
2046     TCGv_i32 tmp = tcg_temp_new_i32();
2047
2048     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2049     gen_helper_vfp_negs(tmp, tmp);
2050     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2051     tcg_temp_free_i32(tmp);
2052 }
2053
2054 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
2055 {
2056     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
2057 }
2058
2059 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2060 {
2061     /*
2062      * VMLS: vd = vd + -(vn * vm)
2063      * Note that order of inputs to the add matters for NaNs.
2064      */
2065     TCGv_i64 tmp = tcg_temp_new_i64();
2066
2067     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2068     gen_helper_vfp_negd(tmp, tmp);
2069     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2070     tcg_temp_free_i64(tmp);
2071 }
2072
2073 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
2074 {
2075     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
2076 }
2077
2078 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2079 {
2080     /*
2081      * VNMLS: -fd + (fn * fm)
2082      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2083      * plausible looking simplifications because this will give wrong results
2084      * for NaNs.
2085      */
2086     TCGv_i32 tmp = tcg_temp_new_i32();
2087
2088     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2089     gen_helper_vfp_negh(vd, vd);
2090     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2091     tcg_temp_free_i32(tmp);
2092 }
2093
2094 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
2095 {
2096     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
2097 }
2098
2099 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2100 {
2101     /*
2102      * VNMLS: -fd + (fn * fm)
2103      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2104      * plausible looking simplifications because this will give wrong results
2105      * for NaNs.
2106      */
2107     TCGv_i32 tmp = tcg_temp_new_i32();
2108
2109     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2110     gen_helper_vfp_negs(vd, vd);
2111     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2112     tcg_temp_free_i32(tmp);
2113 }
2114
2115 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
2116 {
2117     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
2118 }
2119
2120 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2121 {
2122     /*
2123      * VNMLS: -fd + (fn * fm)
2124      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2125      * plausible looking simplifications because this will give wrong results
2126      * for NaNs.
2127      */
2128     TCGv_i64 tmp = tcg_temp_new_i64();
2129
2130     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2131     gen_helper_vfp_negd(vd, vd);
2132     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2133     tcg_temp_free_i64(tmp);
2134 }
2135
2136 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
2137 {
2138     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
2139 }
2140
2141 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2142 {
2143     /* VNMLA: -fd + -(fn * fm) */
2144     TCGv_i32 tmp = tcg_temp_new_i32();
2145
2146     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2147     gen_helper_vfp_negh(tmp, tmp);
2148     gen_helper_vfp_negh(vd, vd);
2149     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2150     tcg_temp_free_i32(tmp);
2151 }
2152
2153 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
2154 {
2155     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
2156 }
2157
2158 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2159 {
2160     /* VNMLA: -fd + -(fn * fm) */
2161     TCGv_i32 tmp = tcg_temp_new_i32();
2162
2163     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2164     gen_helper_vfp_negs(tmp, tmp);
2165     gen_helper_vfp_negs(vd, vd);
2166     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2167     tcg_temp_free_i32(tmp);
2168 }
2169
2170 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2171 {
2172     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2173 }
2174
2175 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2176 {
2177     /* VNMLA: -fd + (fn * fm) */
2178     TCGv_i64 tmp = tcg_temp_new_i64();
2179
2180     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2181     gen_helper_vfp_negd(tmp, tmp);
2182     gen_helper_vfp_negd(vd, vd);
2183     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2184     tcg_temp_free_i64(tmp);
2185 }
2186
2187 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2188 {
2189     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2190 }
2191
2192 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2193 {
2194     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2195 }
2196
2197 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2198 {
2199     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2200 }
2201
2202 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2203 {
2204     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2205 }
2206
2207 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2208 {
2209     /* VNMUL: -(fn * fm) */
2210     gen_helper_vfp_mulh(vd, vn, vm, fpst);
2211     gen_helper_vfp_negh(vd, vd);
2212 }
2213
2214 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2215 {
2216     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2217 }
2218
2219 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2220 {
2221     /* VNMUL: -(fn * fm) */
2222     gen_helper_vfp_muls(vd, vn, vm, fpst);
2223     gen_helper_vfp_negs(vd, vd);
2224 }
2225
2226 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2227 {
2228     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2229 }
2230
2231 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2232 {
2233     /* VNMUL: -(fn * fm) */
2234     gen_helper_vfp_muld(vd, vn, vm, fpst);
2235     gen_helper_vfp_negd(vd, vd);
2236 }
2237
2238 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2239 {
2240     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2241 }
2242
2243 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2244 {
2245     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2246 }
2247
2248 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2249 {
2250     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2251 }
2252
2253 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2254 {
2255     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2256 }
2257
2258 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2259 {
2260     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2261 }
2262
2263 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2264 {
2265     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2266 }
2267
2268 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2269 {
2270     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2271 }
2272
2273 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2274 {
2275     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2276 }
2277
2278 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2279 {
2280     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2281 }
2282
2283 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2284 {
2285     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2286 }
2287
2288 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2289 {
2290     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2291         return false;
2292     }
2293     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2294                          a->vd, a->vn, a->vm, false);
2295 }
2296
2297 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2298 {
2299     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2300         return false;
2301     }
2302     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2303                          a->vd, a->vn, a->vm, false);
2304 }
2305
2306 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2307 {
2308     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2309         return false;
2310     }
2311     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2312                          a->vd, a->vn, a->vm, false);
2313 }
2314
2315 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2316 {
2317     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2318         return false;
2319     }
2320     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2321                          a->vd, a->vn, a->vm, false);
2322 }
2323
2324 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2325 {
2326     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2327         return false;
2328     }
2329     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2330                          a->vd, a->vn, a->vm, false);
2331 }
2332
2333 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2334 {
2335     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2336         return false;
2337     }
2338     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2339                          a->vd, a->vn, a->vm, false);
2340 }
2341
2342 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2343 {
2344     /*
2345      * VFNMA : fd = muladd(-fd,  fn, fm)
2346      * VFNMS : fd = muladd(-fd, -fn, fm)
2347      * VFMA  : fd = muladd( fd,  fn, fm)
2348      * VFMS  : fd = muladd( fd, -fn, fm)
2349      *
2350      * These are fused multiply-add, and must be done as one floating
2351      * point operation with no rounding between the multiplication and
2352      * addition steps.  NB that doing the negations here as separate
2353      * steps is correct : an input NaN should come out with its sign
2354      * bit flipped if it is a negated-input.
2355      */
2356     TCGv_ptr fpst;
2357     TCGv_i32 vn, vm, vd;
2358
2359     /*
2360      * Present in VFPv4 only, and only with the FP16 extension.
2361      * Note that we can't rely on the SIMDFMAC check alone, because
2362      * in a Neon-no-VFP core that ID register field will be non-zero.
2363      */
2364     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2365         !dc_isar_feature(aa32_simdfmac, s) ||
2366         !dc_isar_feature(aa32_fpsp_v2, s)) {
2367         return false;
2368     }
2369
2370     if (s->vec_len != 0 || s->vec_stride != 0) {
2371         return false;
2372     }
2373
2374     if (!vfp_access_check(s)) {
2375         return true;
2376     }
2377
2378     vn = tcg_temp_new_i32();
2379     vm = tcg_temp_new_i32();
2380     vd = tcg_temp_new_i32();
2381
2382     vfp_load_reg32(vn, a->vn);
2383     vfp_load_reg32(vm, a->vm);
2384     if (neg_n) {
2385         /* VFNMS, VFMS */
2386         gen_helper_vfp_negh(vn, vn);
2387     }
2388     vfp_load_reg32(vd, a->vd);
2389     if (neg_d) {
2390         /* VFNMA, VFNMS */
2391         gen_helper_vfp_negh(vd, vd);
2392     }
2393     fpst = fpstatus_ptr(FPST_FPCR_F16);
2394     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2395     vfp_store_reg32(vd, a->vd);
2396
2397     tcg_temp_free_ptr(fpst);
2398     tcg_temp_free_i32(vn);
2399     tcg_temp_free_i32(vm);
2400     tcg_temp_free_i32(vd);
2401
2402     return true;
2403 }
2404
2405 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2406 {
2407     /*
2408      * VFNMA : fd = muladd(-fd,  fn, fm)
2409      * VFNMS : fd = muladd(-fd, -fn, fm)
2410      * VFMA  : fd = muladd( fd,  fn, fm)
2411      * VFMS  : fd = muladd( fd, -fn, fm)
2412      *
2413      * These are fused multiply-add, and must be done as one floating
2414      * point operation with no rounding between the multiplication and
2415      * addition steps.  NB that doing the negations here as separate
2416      * steps is correct : an input NaN should come out with its sign
2417      * bit flipped if it is a negated-input.
2418      */
2419     TCGv_ptr fpst;
2420     TCGv_i32 vn, vm, vd;
2421
2422     /*
2423      * Present in VFPv4 only.
2424      * Note that we can't rely on the SIMDFMAC check alone, because
2425      * in a Neon-no-VFP core that ID register field will be non-zero.
2426      */
2427     if (!dc_isar_feature(aa32_simdfmac, s) ||
2428         !dc_isar_feature(aa32_fpsp_v2, s)) {
2429         return false;
2430     }
2431     /*
2432      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2433      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2434      */
2435     if (s->vec_len != 0 || s->vec_stride != 0) {
2436         return false;
2437     }
2438
2439     if (!vfp_access_check(s)) {
2440         return true;
2441     }
2442
2443     vn = tcg_temp_new_i32();
2444     vm = tcg_temp_new_i32();
2445     vd = tcg_temp_new_i32();
2446
2447     vfp_load_reg32(vn, a->vn);
2448     vfp_load_reg32(vm, a->vm);
2449     if (neg_n) {
2450         /* VFNMS, VFMS */
2451         gen_helper_vfp_negs(vn, vn);
2452     }
2453     vfp_load_reg32(vd, a->vd);
2454     if (neg_d) {
2455         /* VFNMA, VFNMS */
2456         gen_helper_vfp_negs(vd, vd);
2457     }
2458     fpst = fpstatus_ptr(FPST_FPCR);
2459     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2460     vfp_store_reg32(vd, a->vd);
2461
2462     tcg_temp_free_ptr(fpst);
2463     tcg_temp_free_i32(vn);
2464     tcg_temp_free_i32(vm);
2465     tcg_temp_free_i32(vd);
2466
2467     return true;
2468 }
2469
2470 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2471 {
2472     /*
2473      * VFNMA : fd = muladd(-fd,  fn, fm)
2474      * VFNMS : fd = muladd(-fd, -fn, fm)
2475      * VFMA  : fd = muladd( fd,  fn, fm)
2476      * VFMS  : fd = muladd( fd, -fn, fm)
2477      *
2478      * These are fused multiply-add, and must be done as one floating
2479      * point operation with no rounding between the multiplication and
2480      * addition steps.  NB that doing the negations here as separate
2481      * steps is correct : an input NaN should come out with its sign
2482      * bit flipped if it is a negated-input.
2483      */
2484     TCGv_ptr fpst;
2485     TCGv_i64 vn, vm, vd;
2486
2487     /*
2488      * Present in VFPv4 only.
2489      * Note that we can't rely on the SIMDFMAC check alone, because
2490      * in a Neon-no-VFP core that ID register field will be non-zero.
2491      */
2492     if (!dc_isar_feature(aa32_simdfmac, s) ||
2493         !dc_isar_feature(aa32_fpdp_v2, s)) {
2494         return false;
2495     }
2496     /*
2497      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2498      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2499      */
2500     if (s->vec_len != 0 || s->vec_stride != 0) {
2501         return false;
2502     }
2503
2504     /* UNDEF accesses to D16-D31 if they don't exist. */
2505     if (!dc_isar_feature(aa32_simd_r32, s) &&
2506         ((a->vd | a->vn | a->vm) & 0x10)) {
2507         return false;
2508     }
2509
2510     if (!vfp_access_check(s)) {
2511         return true;
2512     }
2513
2514     vn = tcg_temp_new_i64();
2515     vm = tcg_temp_new_i64();
2516     vd = tcg_temp_new_i64();
2517
2518     vfp_load_reg64(vn, a->vn);
2519     vfp_load_reg64(vm, a->vm);
2520     if (neg_n) {
2521         /* VFNMS, VFMS */
2522         gen_helper_vfp_negd(vn, vn);
2523     }
2524     vfp_load_reg64(vd, a->vd);
2525     if (neg_d) {
2526         /* VFNMA, VFNMS */
2527         gen_helper_vfp_negd(vd, vd);
2528     }
2529     fpst = fpstatus_ptr(FPST_FPCR);
2530     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2531     vfp_store_reg64(vd, a->vd);
2532
2533     tcg_temp_free_ptr(fpst);
2534     tcg_temp_free_i64(vn);
2535     tcg_temp_free_i64(vm);
2536     tcg_temp_free_i64(vd);
2537
2538     return true;
2539 }
2540
2541 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2542     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2543                                       arg_##INSN##_##PREC *a)           \
2544     {                                                                   \
2545         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2546     }
2547
2548 #define MAKE_VFM_TRANS_FNS(PREC) \
2549     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2550     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2551     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2552     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2553
2554 MAKE_VFM_TRANS_FNS(hp)
2555 MAKE_VFM_TRANS_FNS(sp)
2556 MAKE_VFM_TRANS_FNS(dp)
2557
2558 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2559 {
2560     TCGv_i32 fd;
2561
2562     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2563         return false;
2564     }
2565
2566     if (s->vec_len != 0 || s->vec_stride != 0) {
2567         return false;
2568     }
2569
2570     if (!vfp_access_check(s)) {
2571         return true;
2572     }
2573
2574     fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2575     vfp_store_reg32(fd, a->vd);
2576     tcg_temp_free_i32(fd);
2577     return true;
2578 }
2579
2580 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2581 {
2582     uint32_t delta_d = 0;
2583     int veclen = s->vec_len;
2584     TCGv_i32 fd;
2585     uint32_t vd;
2586
2587     vd = a->vd;
2588
2589     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2590         return false;
2591     }
2592
2593     if (!dc_isar_feature(aa32_fpshvec, s) &&
2594         (veclen != 0 || s->vec_stride != 0)) {
2595         return false;
2596     }
2597
2598     if (!vfp_access_check(s)) {
2599         return true;
2600     }
2601
2602     if (veclen > 0) {
2603         /* Figure out what type of vector operation this is.  */
2604         if (vfp_sreg_is_scalar(vd)) {
2605             /* scalar */
2606             veclen = 0;
2607         } else {
2608             delta_d = s->vec_stride + 1;
2609         }
2610     }
2611
2612     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2613
2614     for (;;) {
2615         vfp_store_reg32(fd, vd);
2616
2617         if (veclen == 0) {
2618             break;
2619         }
2620
2621         /* Set up the operands for the next iteration */
2622         veclen--;
2623         vd = vfp_advance_sreg(vd, delta_d);
2624     }
2625
2626     tcg_temp_free_i32(fd);
2627     return true;
2628 }
2629
2630 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2631 {
2632     uint32_t delta_d = 0;
2633     int veclen = s->vec_len;
2634     TCGv_i64 fd;
2635     uint32_t vd;
2636
2637     vd = a->vd;
2638
2639     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2640         return false;
2641     }
2642
2643     /* UNDEF accesses to D16-D31 if they don't exist. */
2644     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2645         return false;
2646     }
2647
2648     if (!dc_isar_feature(aa32_fpshvec, s) &&
2649         (veclen != 0 || s->vec_stride != 0)) {
2650         return false;
2651     }
2652
2653     if (!vfp_access_check(s)) {
2654         return true;
2655     }
2656
2657     if (veclen > 0) {
2658         /* Figure out what type of vector operation this is.  */
2659         if (vfp_dreg_is_scalar(vd)) {
2660             /* scalar */
2661             veclen = 0;
2662         } else {
2663             delta_d = (s->vec_stride >> 1) + 1;
2664         }
2665     }
2666
2667     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2668
2669     for (;;) {
2670         vfp_store_reg64(fd, vd);
2671
2672         if (veclen == 0) {
2673             break;
2674         }
2675
2676         /* Set up the operands for the next iteration */
2677         veclen--;
2678         vd = vfp_advance_dreg(vd, delta_d);
2679     }
2680
2681     tcg_temp_free_i64(fd);
2682     return true;
2683 }
2684
2685 #define DO_VFP_2OP(INSN, PREC, FN)                              \
2686     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2687                                       arg_##INSN##_##PREC *a)   \
2688     {                                                           \
2689         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2690     }
2691
2692 DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
2693 DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
2694
2695 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
2696 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
2697 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
2698
2699 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
2700 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
2701 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
2702
2703 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2704 {
2705     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2706 }
2707
2708 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2709 {
2710     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2711 }
2712
2713 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2714 {
2715     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2716 }
2717
2718 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
2719 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
2720 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
2721
2722 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2723 {
2724     TCGv_i32 vd, vm;
2725
2726     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2727         return false;
2728     }
2729
2730     /* Vm/M bits must be zero for the Z variant */
2731     if (a->z && a->vm != 0) {
2732         return false;
2733     }
2734
2735     if (!vfp_access_check(s)) {
2736         return true;
2737     }
2738
2739     vd = tcg_temp_new_i32();
2740     vm = tcg_temp_new_i32();
2741
2742     vfp_load_reg32(vd, a->vd);
2743     if (a->z) {
2744         tcg_gen_movi_i32(vm, 0);
2745     } else {
2746         vfp_load_reg32(vm, a->vm);
2747     }
2748
2749     if (a->e) {
2750         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2751     } else {
2752         gen_helper_vfp_cmph(vd, vm, cpu_env);
2753     }
2754
2755     tcg_temp_free_i32(vd);
2756     tcg_temp_free_i32(vm);
2757
2758     return true;
2759 }
2760
2761 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2762 {
2763     TCGv_i32 vd, vm;
2764
2765     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2766         return false;
2767     }
2768
2769     /* Vm/M bits must be zero for the Z variant */
2770     if (a->z && a->vm != 0) {
2771         return false;
2772     }
2773
2774     if (!vfp_access_check(s)) {
2775         return true;
2776     }
2777
2778     vd = tcg_temp_new_i32();
2779     vm = tcg_temp_new_i32();
2780
2781     vfp_load_reg32(vd, a->vd);
2782     if (a->z) {
2783         tcg_gen_movi_i32(vm, 0);
2784     } else {
2785         vfp_load_reg32(vm, a->vm);
2786     }
2787
2788     if (a->e) {
2789         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2790     } else {
2791         gen_helper_vfp_cmps(vd, vm, cpu_env);
2792     }
2793
2794     tcg_temp_free_i32(vd);
2795     tcg_temp_free_i32(vm);
2796
2797     return true;
2798 }
2799
2800 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2801 {
2802     TCGv_i64 vd, vm;
2803
2804     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2805         return false;
2806     }
2807
2808     /* Vm/M bits must be zero for the Z variant */
2809     if (a->z && a->vm != 0) {
2810         return false;
2811     }
2812
2813     /* UNDEF accesses to D16-D31 if they don't exist. */
2814     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2815         return false;
2816     }
2817
2818     if (!vfp_access_check(s)) {
2819         return true;
2820     }
2821
2822     vd = tcg_temp_new_i64();
2823     vm = tcg_temp_new_i64();
2824
2825     vfp_load_reg64(vd, a->vd);
2826     if (a->z) {
2827         tcg_gen_movi_i64(vm, 0);
2828     } else {
2829         vfp_load_reg64(vm, a->vm);
2830     }
2831
2832     if (a->e) {
2833         gen_helper_vfp_cmped(vd, vm, cpu_env);
2834     } else {
2835         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2836     }
2837
2838     tcg_temp_free_i64(vd);
2839     tcg_temp_free_i64(vm);
2840
2841     return true;
2842 }
2843
2844 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2845 {
2846     TCGv_ptr fpst;
2847     TCGv_i32 ahp_mode;
2848     TCGv_i32 tmp;
2849
2850     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2851         return false;
2852     }
2853
2854     if (!vfp_access_check(s)) {
2855         return true;
2856     }
2857
2858     fpst = fpstatus_ptr(FPST_FPCR);
2859     ahp_mode = get_ahp_flag();
2860     tmp = tcg_temp_new_i32();
2861     /* The T bit tells us if we want the low or high 16 bits of Vm */
2862     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2863     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2864     vfp_store_reg32(tmp, a->vd);
2865     tcg_temp_free_i32(ahp_mode);
2866     tcg_temp_free_ptr(fpst);
2867     tcg_temp_free_i32(tmp);
2868     return true;
2869 }
2870
2871 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2872 {
2873     TCGv_ptr fpst;
2874     TCGv_i32 ahp_mode;
2875     TCGv_i32 tmp;
2876     TCGv_i64 vd;
2877
2878     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2879         return false;
2880     }
2881
2882     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2883         return false;
2884     }
2885
2886     /* UNDEF accesses to D16-D31 if they don't exist. */
2887     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2888         return false;
2889     }
2890
2891     if (!vfp_access_check(s)) {
2892         return true;
2893     }
2894
2895     fpst = fpstatus_ptr(FPST_FPCR);
2896     ahp_mode = get_ahp_flag();
2897     tmp = tcg_temp_new_i32();
2898     /* The T bit tells us if we want the low or high 16 bits of Vm */
2899     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2900     vd = tcg_temp_new_i64();
2901     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2902     vfp_store_reg64(vd, a->vd);
2903     tcg_temp_free_i32(ahp_mode);
2904     tcg_temp_free_ptr(fpst);
2905     tcg_temp_free_i32(tmp);
2906     tcg_temp_free_i64(vd);
2907     return true;
2908 }
2909
2910 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2911 {
2912     TCGv_ptr fpst;
2913     TCGv_i32 ahp_mode;
2914     TCGv_i32 tmp;
2915
2916     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2917         return false;
2918     }
2919
2920     if (!vfp_access_check(s)) {
2921         return true;
2922     }
2923
2924     fpst = fpstatus_ptr(FPST_FPCR);
2925     ahp_mode = get_ahp_flag();
2926     tmp = tcg_temp_new_i32();
2927
2928     vfp_load_reg32(tmp, a->vm);
2929     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2930     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2931     tcg_temp_free_i32(ahp_mode);
2932     tcg_temp_free_ptr(fpst);
2933     tcg_temp_free_i32(tmp);
2934     return true;
2935 }
2936
2937 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2938 {
2939     TCGv_ptr fpst;
2940     TCGv_i32 ahp_mode;
2941     TCGv_i32 tmp;
2942     TCGv_i64 vm;
2943
2944     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2945         return false;
2946     }
2947
2948     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2949         return false;
2950     }
2951
2952     /* UNDEF accesses to D16-D31 if they don't exist. */
2953     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2954         return false;
2955     }
2956
2957     if (!vfp_access_check(s)) {
2958         return true;
2959     }
2960
2961     fpst = fpstatus_ptr(FPST_FPCR);
2962     ahp_mode = get_ahp_flag();
2963     tmp = tcg_temp_new_i32();
2964     vm = tcg_temp_new_i64();
2965
2966     vfp_load_reg64(vm, a->vm);
2967     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2968     tcg_temp_free_i64(vm);
2969     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2970     tcg_temp_free_i32(ahp_mode);
2971     tcg_temp_free_ptr(fpst);
2972     tcg_temp_free_i32(tmp);
2973     return true;
2974 }
2975
2976 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2977 {
2978     TCGv_ptr fpst;
2979     TCGv_i32 tmp;
2980
2981     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2982         return false;
2983     }
2984
2985     if (!vfp_access_check(s)) {
2986         return true;
2987     }
2988
2989     tmp = tcg_temp_new_i32();
2990     vfp_load_reg32(tmp, a->vm);
2991     fpst = fpstatus_ptr(FPST_FPCR_F16);
2992     gen_helper_rinth(tmp, tmp, fpst);
2993     vfp_store_reg32(tmp, a->vd);
2994     tcg_temp_free_ptr(fpst);
2995     tcg_temp_free_i32(tmp);
2996     return true;
2997 }
2998
2999 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
3000 {
3001     TCGv_ptr fpst;
3002     TCGv_i32 tmp;
3003
3004     if (!dc_isar_feature(aa32_vrint, s)) {
3005         return false;
3006     }
3007
3008     if (!vfp_access_check(s)) {
3009         return true;
3010     }
3011
3012     tmp = tcg_temp_new_i32();
3013     vfp_load_reg32(tmp, a->vm);
3014     fpst = fpstatus_ptr(FPST_FPCR);
3015     gen_helper_rints(tmp, tmp, fpst);
3016     vfp_store_reg32(tmp, a->vd);
3017     tcg_temp_free_ptr(fpst);
3018     tcg_temp_free_i32(tmp);
3019     return true;
3020 }
3021
3022 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
3023 {
3024     TCGv_ptr fpst;
3025     TCGv_i64 tmp;
3026
3027     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3028         return false;
3029     }
3030
3031     if (!dc_isar_feature(aa32_vrint, s)) {
3032         return false;
3033     }
3034
3035     /* UNDEF accesses to D16-D31 if they don't exist. */
3036     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3037         return false;
3038     }
3039
3040     if (!vfp_access_check(s)) {
3041         return true;
3042     }
3043
3044     tmp = tcg_temp_new_i64();
3045     vfp_load_reg64(tmp, a->vm);
3046     fpst = fpstatus_ptr(FPST_FPCR);
3047     gen_helper_rintd(tmp, tmp, fpst);
3048     vfp_store_reg64(tmp, a->vd);
3049     tcg_temp_free_ptr(fpst);
3050     tcg_temp_free_i64(tmp);
3051     return true;
3052 }
3053
3054 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
3055 {
3056     TCGv_ptr fpst;
3057     TCGv_i32 tmp;
3058     TCGv_i32 tcg_rmode;
3059
3060     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3061         return false;
3062     }
3063
3064     if (!vfp_access_check(s)) {
3065         return true;
3066     }
3067
3068     tmp = tcg_temp_new_i32();
3069     vfp_load_reg32(tmp, a->vm);
3070     fpst = fpstatus_ptr(FPST_FPCR_F16);
3071     tcg_rmode = tcg_const_i32(float_round_to_zero);
3072     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3073     gen_helper_rinth(tmp, tmp, fpst);
3074     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3075     vfp_store_reg32(tmp, a->vd);
3076     tcg_temp_free_ptr(fpst);
3077     tcg_temp_free_i32(tcg_rmode);
3078     tcg_temp_free_i32(tmp);
3079     return true;
3080 }
3081
3082 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
3083 {
3084     TCGv_ptr fpst;
3085     TCGv_i32 tmp;
3086     TCGv_i32 tcg_rmode;
3087
3088     if (!dc_isar_feature(aa32_vrint, s)) {
3089         return false;
3090     }
3091
3092     if (!vfp_access_check(s)) {
3093         return true;
3094     }
3095
3096     tmp = tcg_temp_new_i32();
3097     vfp_load_reg32(tmp, a->vm);
3098     fpst = fpstatus_ptr(FPST_FPCR);
3099     tcg_rmode = tcg_const_i32(float_round_to_zero);
3100     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3101     gen_helper_rints(tmp, tmp, fpst);
3102     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3103     vfp_store_reg32(tmp, a->vd);
3104     tcg_temp_free_ptr(fpst);
3105     tcg_temp_free_i32(tcg_rmode);
3106     tcg_temp_free_i32(tmp);
3107     return true;
3108 }
3109
3110 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
3111 {
3112     TCGv_ptr fpst;
3113     TCGv_i64 tmp;
3114     TCGv_i32 tcg_rmode;
3115
3116     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3117         return false;
3118     }
3119
3120     if (!dc_isar_feature(aa32_vrint, s)) {
3121         return false;
3122     }
3123
3124     /* UNDEF accesses to D16-D31 if they don't exist. */
3125     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3126         return false;
3127     }
3128
3129     if (!vfp_access_check(s)) {
3130         return true;
3131     }
3132
3133     tmp = tcg_temp_new_i64();
3134     vfp_load_reg64(tmp, a->vm);
3135     fpst = fpstatus_ptr(FPST_FPCR);
3136     tcg_rmode = tcg_const_i32(float_round_to_zero);
3137     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3138     gen_helper_rintd(tmp, tmp, fpst);
3139     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3140     vfp_store_reg64(tmp, a->vd);
3141     tcg_temp_free_ptr(fpst);
3142     tcg_temp_free_i64(tmp);
3143     tcg_temp_free_i32(tcg_rmode);
3144     return true;
3145 }
3146
3147 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3148 {
3149     TCGv_ptr fpst;
3150     TCGv_i32 tmp;
3151
3152     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3153         return false;
3154     }
3155
3156     if (!vfp_access_check(s)) {
3157         return true;
3158     }
3159
3160     tmp = tcg_temp_new_i32();
3161     vfp_load_reg32(tmp, a->vm);
3162     fpst = fpstatus_ptr(FPST_FPCR_F16);
3163     gen_helper_rinth_exact(tmp, tmp, fpst);
3164     vfp_store_reg32(tmp, a->vd);
3165     tcg_temp_free_ptr(fpst);
3166     tcg_temp_free_i32(tmp);
3167     return true;
3168 }
3169
3170 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3171 {
3172     TCGv_ptr fpst;
3173     TCGv_i32 tmp;
3174
3175     if (!dc_isar_feature(aa32_vrint, s)) {
3176         return false;
3177     }
3178
3179     if (!vfp_access_check(s)) {
3180         return true;
3181     }
3182
3183     tmp = tcg_temp_new_i32();
3184     vfp_load_reg32(tmp, a->vm);
3185     fpst = fpstatus_ptr(FPST_FPCR);
3186     gen_helper_rints_exact(tmp, tmp, fpst);
3187     vfp_store_reg32(tmp, a->vd);
3188     tcg_temp_free_ptr(fpst);
3189     tcg_temp_free_i32(tmp);
3190     return true;
3191 }
3192
3193 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3194 {
3195     TCGv_ptr fpst;
3196     TCGv_i64 tmp;
3197
3198     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3199         return false;
3200     }
3201
3202     if (!dc_isar_feature(aa32_vrint, s)) {
3203         return false;
3204     }
3205
3206     /* UNDEF accesses to D16-D31 if they don't exist. */
3207     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3208         return false;
3209     }
3210
3211     if (!vfp_access_check(s)) {
3212         return true;
3213     }
3214
3215     tmp = tcg_temp_new_i64();
3216     vfp_load_reg64(tmp, a->vm);
3217     fpst = fpstatus_ptr(FPST_FPCR);
3218     gen_helper_rintd_exact(tmp, tmp, fpst);
3219     vfp_store_reg64(tmp, a->vd);
3220     tcg_temp_free_ptr(fpst);
3221     tcg_temp_free_i64(tmp);
3222     return true;
3223 }
3224
3225 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3226 {
3227     TCGv_i64 vd;
3228     TCGv_i32 vm;
3229
3230     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3231         return false;
3232     }
3233
3234     /* UNDEF accesses to D16-D31 if they don't exist. */
3235     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3236         return false;
3237     }
3238
3239     if (!vfp_access_check(s)) {
3240         return true;
3241     }
3242
3243     vm = tcg_temp_new_i32();
3244     vd = tcg_temp_new_i64();
3245     vfp_load_reg32(vm, a->vm);
3246     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3247     vfp_store_reg64(vd, a->vd);
3248     tcg_temp_free_i32(vm);
3249     tcg_temp_free_i64(vd);
3250     return true;
3251 }
3252
3253 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3254 {
3255     TCGv_i64 vm;
3256     TCGv_i32 vd;
3257
3258     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3259         return false;
3260     }
3261
3262     /* UNDEF accesses to D16-D31 if they don't exist. */
3263     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3264         return false;
3265     }
3266
3267     if (!vfp_access_check(s)) {
3268         return true;
3269     }
3270
3271     vd = tcg_temp_new_i32();
3272     vm = tcg_temp_new_i64();
3273     vfp_load_reg64(vm, a->vm);
3274     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3275     vfp_store_reg32(vd, a->vd);
3276     tcg_temp_free_i32(vd);
3277     tcg_temp_free_i64(vm);
3278     return true;
3279 }
3280
3281 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3282 {
3283     TCGv_i32 vm;
3284     TCGv_ptr fpst;
3285
3286     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3287         return false;
3288     }
3289
3290     if (!vfp_access_check(s)) {
3291         return true;
3292     }
3293
3294     vm = tcg_temp_new_i32();
3295     vfp_load_reg32(vm, a->vm);
3296     fpst = fpstatus_ptr(FPST_FPCR_F16);
3297     if (a->s) {
3298         /* i32 -> f16 */
3299         gen_helper_vfp_sitoh(vm, vm, fpst);
3300     } else {
3301         /* u32 -> f16 */
3302         gen_helper_vfp_uitoh(vm, vm, fpst);
3303     }
3304     vfp_store_reg32(vm, a->vd);
3305     tcg_temp_free_i32(vm);
3306     tcg_temp_free_ptr(fpst);
3307     return true;
3308 }
3309
3310 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3311 {
3312     TCGv_i32 vm;
3313     TCGv_ptr fpst;
3314
3315     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3316         return false;
3317     }
3318
3319     if (!vfp_access_check(s)) {
3320         return true;
3321     }
3322
3323     vm = tcg_temp_new_i32();
3324     vfp_load_reg32(vm, a->vm);
3325     fpst = fpstatus_ptr(FPST_FPCR);
3326     if (a->s) {
3327         /* i32 -> f32 */
3328         gen_helper_vfp_sitos(vm, vm, fpst);
3329     } else {
3330         /* u32 -> f32 */
3331         gen_helper_vfp_uitos(vm, vm, fpst);
3332     }
3333     vfp_store_reg32(vm, a->vd);
3334     tcg_temp_free_i32(vm);
3335     tcg_temp_free_ptr(fpst);
3336     return true;
3337 }
3338
3339 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3340 {
3341     TCGv_i32 vm;
3342     TCGv_i64 vd;
3343     TCGv_ptr fpst;
3344
3345     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3346         return false;
3347     }
3348
3349     /* UNDEF accesses to D16-D31 if they don't exist. */
3350     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3351         return false;
3352     }
3353
3354     if (!vfp_access_check(s)) {
3355         return true;
3356     }
3357
3358     vm = tcg_temp_new_i32();
3359     vd = tcg_temp_new_i64();
3360     vfp_load_reg32(vm, a->vm);
3361     fpst = fpstatus_ptr(FPST_FPCR);
3362     if (a->s) {
3363         /* i32 -> f64 */
3364         gen_helper_vfp_sitod(vd, vm, fpst);
3365     } else {
3366         /* u32 -> f64 */
3367         gen_helper_vfp_uitod(vd, vm, fpst);
3368     }
3369     vfp_store_reg64(vd, a->vd);
3370     tcg_temp_free_i32(vm);
3371     tcg_temp_free_i64(vd);
3372     tcg_temp_free_ptr(fpst);
3373     return true;
3374 }
3375
3376 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3377 {
3378     TCGv_i32 vd;
3379     TCGv_i64 vm;
3380
3381     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3382         return false;
3383     }
3384
3385     if (!dc_isar_feature(aa32_jscvt, s)) {
3386         return false;
3387     }
3388
3389     /* UNDEF accesses to D16-D31 if they don't exist. */
3390     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3391         return false;
3392     }
3393
3394     if (!vfp_access_check(s)) {
3395         return true;
3396     }
3397
3398     vm = tcg_temp_new_i64();
3399     vd = tcg_temp_new_i32();
3400     vfp_load_reg64(vm, a->vm);
3401     gen_helper_vjcvt(vd, vm, cpu_env);
3402     vfp_store_reg32(vd, a->vd);
3403     tcg_temp_free_i64(vm);
3404     tcg_temp_free_i32(vd);
3405     return true;
3406 }
3407
3408 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3409 {
3410     TCGv_i32 vd, shift;
3411     TCGv_ptr fpst;
3412     int frac_bits;
3413
3414     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3415         return false;
3416     }
3417
3418     if (!vfp_access_check(s)) {
3419         return true;
3420     }
3421
3422     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3423
3424     vd = tcg_temp_new_i32();
3425     vfp_load_reg32(vd, a->vd);
3426
3427     fpst = fpstatus_ptr(FPST_FPCR_F16);
3428     shift = tcg_const_i32(frac_bits);
3429
3430     /* Switch on op:U:sx bits */
3431     switch (a->opc) {
3432     case 0:
3433         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3434         break;
3435     case 1:
3436         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3437         break;
3438     case 2:
3439         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3440         break;
3441     case 3:
3442         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3443         break;
3444     case 4:
3445         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3446         break;
3447     case 5:
3448         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3449         break;
3450     case 6:
3451         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3452         break;
3453     case 7:
3454         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3455         break;
3456     default:
3457         g_assert_not_reached();
3458     }
3459
3460     vfp_store_reg32(vd, a->vd);
3461     tcg_temp_free_i32(vd);
3462     tcg_temp_free_i32(shift);
3463     tcg_temp_free_ptr(fpst);
3464     return true;
3465 }
3466
3467 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3468 {
3469     TCGv_i32 vd, shift;
3470     TCGv_ptr fpst;
3471     int frac_bits;
3472
3473     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3474         return false;
3475     }
3476
3477     if (!vfp_access_check(s)) {
3478         return true;
3479     }
3480
3481     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3482
3483     vd = tcg_temp_new_i32();
3484     vfp_load_reg32(vd, a->vd);
3485
3486     fpst = fpstatus_ptr(FPST_FPCR);
3487     shift = tcg_const_i32(frac_bits);
3488
3489     /* Switch on op:U:sx bits */
3490     switch (a->opc) {
3491     case 0:
3492         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3493         break;
3494     case 1:
3495         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3496         break;
3497     case 2:
3498         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3499         break;
3500     case 3:
3501         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3502         break;
3503     case 4:
3504         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3505         break;
3506     case 5:
3507         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3508         break;
3509     case 6:
3510         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3511         break;
3512     case 7:
3513         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3514         break;
3515     default:
3516         g_assert_not_reached();
3517     }
3518
3519     vfp_store_reg32(vd, a->vd);
3520     tcg_temp_free_i32(vd);
3521     tcg_temp_free_i32(shift);
3522     tcg_temp_free_ptr(fpst);
3523     return true;
3524 }
3525
3526 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3527 {
3528     TCGv_i64 vd;
3529     TCGv_i32 shift;
3530     TCGv_ptr fpst;
3531     int frac_bits;
3532
3533     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3534         return false;
3535     }
3536
3537     /* UNDEF accesses to D16-D31 if they don't exist. */
3538     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3539         return false;
3540     }
3541
3542     if (!vfp_access_check(s)) {
3543         return true;
3544     }
3545
3546     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3547
3548     vd = tcg_temp_new_i64();
3549     vfp_load_reg64(vd, a->vd);
3550
3551     fpst = fpstatus_ptr(FPST_FPCR);
3552     shift = tcg_const_i32(frac_bits);
3553
3554     /* Switch on op:U:sx bits */
3555     switch (a->opc) {
3556     case 0:
3557         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3558         break;
3559     case 1:
3560         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3561         break;
3562     case 2:
3563         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3564         break;
3565     case 3:
3566         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3567         break;
3568     case 4:
3569         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3570         break;
3571     case 5:
3572         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3573         break;
3574     case 6:
3575         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3576         break;
3577     case 7:
3578         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3579         break;
3580     default:
3581         g_assert_not_reached();
3582     }
3583
3584     vfp_store_reg64(vd, a->vd);
3585     tcg_temp_free_i64(vd);
3586     tcg_temp_free_i32(shift);
3587     tcg_temp_free_ptr(fpst);
3588     return true;
3589 }
3590
3591 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3592 {
3593     TCGv_i32 vm;
3594     TCGv_ptr fpst;
3595
3596     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3597         return false;
3598     }
3599
3600     if (!vfp_access_check(s)) {
3601         return true;
3602     }
3603
3604     fpst = fpstatus_ptr(FPST_FPCR_F16);
3605     vm = tcg_temp_new_i32();
3606     vfp_load_reg32(vm, a->vm);
3607
3608     if (a->s) {
3609         if (a->rz) {
3610             gen_helper_vfp_tosizh(vm, vm, fpst);
3611         } else {
3612             gen_helper_vfp_tosih(vm, vm, fpst);
3613         }
3614     } else {
3615         if (a->rz) {
3616             gen_helper_vfp_touizh(vm, vm, fpst);
3617         } else {
3618             gen_helper_vfp_touih(vm, vm, fpst);
3619         }
3620     }
3621     vfp_store_reg32(vm, a->vd);
3622     tcg_temp_free_i32(vm);
3623     tcg_temp_free_ptr(fpst);
3624     return true;
3625 }
3626
3627 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3628 {
3629     TCGv_i32 vm;
3630     TCGv_ptr fpst;
3631
3632     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3633         return false;
3634     }
3635
3636     if (!vfp_access_check(s)) {
3637         return true;
3638     }
3639
3640     fpst = fpstatus_ptr(FPST_FPCR);
3641     vm = tcg_temp_new_i32();
3642     vfp_load_reg32(vm, a->vm);
3643
3644     if (a->s) {
3645         if (a->rz) {
3646             gen_helper_vfp_tosizs(vm, vm, fpst);
3647         } else {
3648             gen_helper_vfp_tosis(vm, vm, fpst);
3649         }
3650     } else {
3651         if (a->rz) {
3652             gen_helper_vfp_touizs(vm, vm, fpst);
3653         } else {
3654             gen_helper_vfp_touis(vm, vm, fpst);
3655         }
3656     }
3657     vfp_store_reg32(vm, a->vd);
3658     tcg_temp_free_i32(vm);
3659     tcg_temp_free_ptr(fpst);
3660     return true;
3661 }
3662
3663 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3664 {
3665     TCGv_i32 vd;
3666     TCGv_i64 vm;
3667     TCGv_ptr fpst;
3668
3669     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3670         return false;
3671     }
3672
3673     /* UNDEF accesses to D16-D31 if they don't exist. */
3674     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3675         return false;
3676     }
3677
3678     if (!vfp_access_check(s)) {
3679         return true;
3680     }
3681
3682     fpst = fpstatus_ptr(FPST_FPCR);
3683     vm = tcg_temp_new_i64();
3684     vd = tcg_temp_new_i32();
3685     vfp_load_reg64(vm, a->vm);
3686
3687     if (a->s) {
3688         if (a->rz) {
3689             gen_helper_vfp_tosizd(vd, vm, fpst);
3690         } else {
3691             gen_helper_vfp_tosid(vd, vm, fpst);
3692         }
3693     } else {
3694         if (a->rz) {
3695             gen_helper_vfp_touizd(vd, vm, fpst);
3696         } else {
3697             gen_helper_vfp_touid(vd, vm, fpst);
3698         }
3699     }
3700     vfp_store_reg32(vd, a->vd);
3701     tcg_temp_free_i32(vd);
3702     tcg_temp_free_i64(vm);
3703     tcg_temp_free_ptr(fpst);
3704     return true;
3705 }
3706
3707 /*
3708  * Decode VLLDM and VLSTM are nonstandard because:
3709  *  * if there is no FPU then these insns must NOP in
3710  *    Secure state and UNDEF in Nonsecure state
3711  *  * if there is an FPU then these insns do not have
3712  *    the usual behaviour that vfp_access_check() provides of
3713  *    being controlled by CPACR/NSACR enable bits or the
3714  *    lazy-stacking logic.
3715  */
3716 static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
3717 {
3718     TCGv_i32 fptr;
3719
3720     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
3721         !arm_dc_feature(s, ARM_FEATURE_V8)) {
3722         return false;
3723     }
3724
3725     if (a->op) {
3726         /*
3727          * T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
3728          * to take the IMPDEF option to make memory accesses to the stack
3729          * slots that correspond to the D16-D31 registers (discarding
3730          * read data and writing UNKNOWN values), so for us the T2
3731          * encoding behaves identically to the T1 encoding.
3732          */
3733         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3734             return false;
3735         }
3736     } else {
3737         /*
3738          * T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
3739          * This is currently architecturally impossible, but we add the
3740          * check to stay in line with the pseudocode. Note that we must
3741          * emit code for the UNDEF so it takes precedence over the NOCP.
3742          */
3743         if (dc_isar_feature(aa32_simd_r32, s)) {
3744             unallocated_encoding(s);
3745             return true;
3746         }
3747     }
3748
3749     /*
3750      * If not secure, UNDEF. We must emit code for this
3751      * rather than returning false so that this takes
3752      * precedence over the m-nocp.decode NOCP fallback.
3753      */
3754     if (!s->v8m_secure) {
3755         unallocated_encoding(s);
3756         return true;
3757     }
3758     /* If no fpu, NOP. */
3759     if (!dc_isar_feature(aa32_vfp, s)) {
3760         return true;
3761     }
3762
3763     fptr = load_reg(s, a->rn);
3764     if (a->l) {
3765         gen_helper_v7m_vlldm(cpu_env, fptr);
3766     } else {
3767         gen_helper_v7m_vlstm(cpu_env, fptr);
3768     }
3769     tcg_temp_free_i32(fptr);
3770
3771     /* End the TB, because we have updated FP control bits */
3772     s->base.is_jmp = DISAS_UPDATE_EXIT;
3773     return true;
3774 }
3775
3776 static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
3777 {
3778     int btmreg, topreg;
3779     TCGv_i64 zero;
3780     TCGv_i32 aspen, sfpa;
3781
3782     if (!dc_isar_feature(aa32_m_sec_state, s)) {
3783         /* Before v8.1M, fall through in decode to NOCP check */
3784         return false;
3785     }
3786
3787     /* Explicitly UNDEF because this takes precedence over NOCP */
3788     if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
3789         unallocated_encoding(s);
3790         return true;
3791     }
3792
3793     if (!dc_isar_feature(aa32_vfp_simd, s)) {
3794         /* NOP if we have neither FP nor MVE */
3795         return true;
3796     }
3797
3798     /*
3799      * If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
3800      * active floating point context so we must NOP (without doing
3801      * any lazy state preservation or the NOCP check).
3802      */
3803     aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
3804     sfpa = load_cpu_field(v7m.control[M_REG_S]);
3805     tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3806     tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3807     tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
3808     tcg_gen_or_i32(sfpa, sfpa, aspen);
3809     arm_gen_condlabel(s);
3810     tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel);
3811
3812     if (s->fp_excp_el != 0) {
3813         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3814                            syn_uncategorized(), s->fp_excp_el);
3815         return true;
3816     }
3817
3818     topreg = a->vd + a->imm - 1;
3819     btmreg = a->vd;
3820
3821     /* Convert to Sreg numbers if the insn specified in Dregs */
3822     if (a->size == 3) {
3823         topreg = topreg * 2 + 1;
3824         btmreg *= 2;
3825     }
3826
3827     if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
3828         /* UNPREDICTABLE: we choose to undef */
3829         unallocated_encoding(s);
3830         return true;
3831     }
3832
3833     /* Silently ignore requests to clear D16-D31 if they don't exist */
3834     if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
3835         topreg = 31;
3836     }
3837
3838     if (!vfp_access_check(s)) {
3839         return true;
3840     }
3841
3842     /* Zero the Sregs from btmreg to topreg inclusive. */
3843     zero = tcg_const_i64(0);
3844     if (btmreg & 1) {
3845         write_neon_element64(zero, btmreg >> 1, 1, MO_32);
3846         btmreg++;
3847     }
3848     for (; btmreg + 1 <= topreg; btmreg += 2) {
3849         write_neon_element64(zero, btmreg >> 1, 0, MO_64);
3850     }
3851     if (btmreg == topreg) {
3852         write_neon_element64(zero, btmreg >> 1, 0, MO_32);
3853         btmreg++;
3854     }
3855     assert(btmreg == topreg + 1);
3856     /* TODO: when MVE is implemented, zero VPR here */
3857     return true;
3858 }
3859
3860 static bool trans_NOCP(DisasContext *s, arg_nocp *a)
3861 {
3862     /*
3863      * Handle M-profile early check for disabled coprocessor:
3864      * all we need to do here is emit the NOCP exception if
3865      * the coprocessor is disabled. Otherwise we return false
3866      * and the real VFP/etc decode will handle the insn.
3867      */
3868     assert(arm_dc_feature(s, ARM_FEATURE_M));
3869
3870     if (a->cp == 11) {
3871         a->cp = 10;
3872     }
3873     if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
3874         (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
3875         /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
3876         a->cp = 10;
3877     }
3878
3879     if (a->cp != 10) {
3880         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3881                            syn_uncategorized(), default_exception_el(s));
3882         return true;
3883     }
3884
3885     if (s->fp_excp_el != 0) {
3886         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3887                            syn_uncategorized(), s->fp_excp_el);
3888         return true;
3889     }
3890
3891     return false;
3892 }
3893
3894 static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
3895 {
3896     /* This range needs a coprocessor check for v8.1M and later only */
3897     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3898         return false;
3899     }
3900     return trans_NOCP(s, a);
3901 }
3902
3903 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3904 {
3905     TCGv_i32 rd, rm;
3906
3907     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3908         return false;
3909     }
3910
3911     if (s->vec_len != 0 || s->vec_stride != 0) {
3912         return false;
3913     }
3914
3915     if (!vfp_access_check(s)) {
3916         return true;
3917     }
3918
3919     /* Insert low half of Vm into high half of Vd */
3920     rm = tcg_temp_new_i32();
3921     rd = tcg_temp_new_i32();
3922     vfp_load_reg32(rm, a->vm);
3923     vfp_load_reg32(rd, a->vd);
3924     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3925     vfp_store_reg32(rd, a->vd);
3926     tcg_temp_free_i32(rm);
3927     tcg_temp_free_i32(rd);
3928     return true;
3929 }
3930
3931 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3932 {
3933     TCGv_i32 rm;
3934
3935     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3936         return false;
3937     }
3938
3939     if (s->vec_len != 0 || s->vec_stride != 0) {
3940         return false;
3941     }
3942
3943     if (!vfp_access_check(s)) {
3944         return true;
3945     }
3946
3947     /* Set Vd to high half of Vm */
3948     rm = tcg_temp_new_i32();
3949     vfp_load_reg32(rm, a->vm);
3950     tcg_gen_shri_i32(rm, rm, 16);
3951     vfp_store_reg32(rm, a->vd);
3952     tcg_temp_free_i32(rm);
3953     return true;
3954 }