target/arm/translate-vfp.inc.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 /* Include the generated VFP decoder */
  30 #include "decode-vfp.inc.c"
  31 #include "decode-vfp-uncond.inc.c"
  32
  33 /*
  34  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  35  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  36  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  37  */
  38 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  39 {
  40     uint64_t imm;
  41
  42     switch (size) {
  43     case MO_64:
  44         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  45             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  46             extract32(imm8, 0, 6);
  47         imm <<= 48;
  48         break;
  49     case MO_32:
  50         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  51             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  52             (extract32(imm8, 0, 6) << 3);
  53         imm <<= 16;
  54         break;
  55     case MO_16:
  56         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  57             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  58             (extract32(imm8, 0, 6) << 6);
  59         break;
  60     default:
  61         g_assert_not_reached();
  62     }
  63     return imm;
  64 }
  65
  66 /*
  67  * Return the offset of a 16-bit half of the specified VFP single-precision
  68  * register. If top is true, returns the top 16 bits; otherwise the bottom
  69  * 16 bits.
  70  */
  71 static inline long vfp_f16_offset(unsigned reg, bool top)
  72 {
  73     long offs = vfp_reg_offset(false, reg);
  74 #ifdef HOST_WORDS_BIGENDIAN
  75     if (!top) {
  76         offs += 2;
  77     }
  78 #else
  79     if (top) {
  80         offs += 2;
  81     }
  82 #endif
  83     return offs;
  84 }
  85
  86 /*
  87  * Check that VFP access is enabled. If it is, do the necessary
  88  * M-profile lazy-FP handling and then return true.
  89  * If not, emit code to generate an appropriate exception and
  90  * return false.
  91  * The ignore_vfp_enabled argument specifies that we should ignore
  92  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
  93  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
  94  */
  95 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
  96 {
  97     if (s->fp_excp_el) {
  98         if (arm_dc_feature(s, ARM_FEATURE_M)) {
  99             gen_exception_insn(s, s->pc_curr, EXCP_NOCP, syn_uncategorized(),
 100                                s->fp_excp_el);
 101         } else {
 102             gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 103                                syn_fp_access_trap(1, 0xe, false),
 104                                s->fp_excp_el);
 105         }
 106         return false;
 107     }
 108
 109     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 110         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 111         unallocated_encoding(s);
 112         return false;
 113     }
 114
 115     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 116         /* Handle M-profile lazy FP state mechanics */
 117
 118         /* Trigger lazy-state preservation if necessary */
 119         if (s->v7m_lspact) {
 120             /*
 121              * Lazy state saving affects external memory and also the NVIC,
 122              * so we must mark it as an IO operation for icount.
 123              */
 124             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 125                 gen_io_start();
 126             }
 127             gen_helper_v7m_preserve_fp_state(cpu_env);
 128             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 129                 gen_io_end();
 130             }
 131             /*
 132              * If the preserve_fp_state helper doesn't throw an exception
 133              * then it will clear LSPACT; we don't need to repeat this for
 134              * any further FP insns in this TB.
 135              */
 136             s->v7m_lspact = false;
 137         }
 138
 139         /* Update ownership of FP context: set FPCCR.S to match current state */
 140         if (s->v8m_fpccr_s_wrong) {
 141             TCGv_i32 tmp;
 142
 143             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 144             if (s->v8m_secure) {
 145                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 146             } else {
 147                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 148             }
 149             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 150             /* Don't need to do this for any further FP insns in this TB */
 151             s->v8m_fpccr_s_wrong = false;
 152         }
 153
 154         if (s->v7m_new_fp_ctxt_needed) {
 155             /*
 156              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 157              * and the FPSCR.
 158              */
 159             TCGv_i32 control, fpscr;
 160             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 161
 162             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 163             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 164             tcg_temp_free_i32(fpscr);
 165             /*
 166              * We don't need to arrange to end the TB, because the only
 167              * parts of FPSCR which we cache in the TB flags are the VECLEN
 168              * and VECSTRIDE, and those don't exist for M-profile.
 169              */
 170
 171             if (s->v8m_secure) {
 172                 bits |= R_V7M_CONTROL_SFPA_MASK;
 173             }
 174             control = load_cpu_field(v7m.control[M_REG_S]);
 175             tcg_gen_ori_i32(control, control, bits);
 176             store_cpu_field(control, v7m.control[M_REG_S]);
 177             /* Don't need to do this for any further FP insns in this TB */
 178             s->v7m_new_fp_ctxt_needed = false;
 179         }
 180     }
 181
 182     return true;
 183 }
 184
 185 /*
 186  * The most usual kind of VFP access check, for everything except
 187  * FMXR/FMRX to the always-available special registers.
 188  */
 189 static bool vfp_access_check(DisasContext *s)
 190 {
 191     return full_vfp_access_check(s, false);
 192 }
 193
 194 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 195 {
 196     uint32_t rd, rn, rm;
 197     bool dp = a->dp;
 198
 199     if (!dc_isar_feature(aa32_vsel, s)) {
 200         return false;
 201     }
 202
 203     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 204         return false;
 205     }
 206
 207     /* UNDEF accesses to D16-D31 if they don't exist */
 208     if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
 209         ((a->vm | a->vn | a->vd) & 0x10)) {
 210         return false;
 211     }
 212
 213     rd = a->vd;
 214     rn = a->vn;
 215     rm = a->vm;
 216
 217     if (!vfp_access_check(s)) {
 218         return true;
 219     }
 220
 221     if (dp) {
 222         TCGv_i64 frn, frm, dest;
 223         TCGv_i64 tmp, zero, zf, nf, vf;
 224
 225         zero = tcg_const_i64(0);
 226
 227         frn = tcg_temp_new_i64();
 228         frm = tcg_temp_new_i64();
 229         dest = tcg_temp_new_i64();
 230
 231         zf = tcg_temp_new_i64();
 232         nf = tcg_temp_new_i64();
 233         vf = tcg_temp_new_i64();
 234
 235         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 236         tcg_gen_ext_i32_i64(nf, cpu_NF);
 237         tcg_gen_ext_i32_i64(vf, cpu_VF);
 238
 239         neon_load_reg64(frn, rn);
 240         neon_load_reg64(frm, rm);
 241         switch (a->cc) {
 242         case 0: /* eq: Z */
 243             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 244                                 frn, frm);
 245             break;
 246         case 1: /* vs: V */
 247             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 248                                 frn, frm);
 249             break;
 250         case 2: /* ge: N == V -> N ^ V == 0 */
 251             tmp = tcg_temp_new_i64();
 252             tcg_gen_xor_i64(tmp, vf, nf);
 253             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 254                                 frn, frm);
 255             tcg_temp_free_i64(tmp);
 256             break;
 257         case 3: /* gt: !Z && N == V */
 258             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 259                                 frn, frm);
 260             tmp = tcg_temp_new_i64();
 261             tcg_gen_xor_i64(tmp, vf, nf);
 262             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 263                                 dest, frm);
 264             tcg_temp_free_i64(tmp);
 265             break;
 266         }
 267         neon_store_reg64(dest, rd);
 268         tcg_temp_free_i64(frn);
 269         tcg_temp_free_i64(frm);
 270         tcg_temp_free_i64(dest);
 271
 272         tcg_temp_free_i64(zf);
 273         tcg_temp_free_i64(nf);
 274         tcg_temp_free_i64(vf);
 275
 276         tcg_temp_free_i64(zero);
 277     } else {
 278         TCGv_i32 frn, frm, dest;
 279         TCGv_i32 tmp, zero;
 280
 281         zero = tcg_const_i32(0);
 282
 283         frn = tcg_temp_new_i32();
 284         frm = tcg_temp_new_i32();
 285         dest = tcg_temp_new_i32();
 286         neon_load_reg32(frn, rn);
 287         neon_load_reg32(frm, rm);
 288         switch (a->cc) {
 289         case 0: /* eq: Z */
 290             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 291                                 frn, frm);
 292             break;
 293         case 1: /* vs: V */
 294             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 295                                 frn, frm);
 296             break;
 297         case 2: /* ge: N == V -> N ^ V == 0 */
 298             tmp = tcg_temp_new_i32();
 299             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 300             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 301                                 frn, frm);
 302             tcg_temp_free_i32(tmp);
 303             break;
 304         case 3: /* gt: !Z && N == V */
 305             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 306                                 frn, frm);
 307             tmp = tcg_temp_new_i32();
 308             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 309             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 310                                 dest, frm);
 311             tcg_temp_free_i32(tmp);
 312             break;
 313         }
 314         neon_store_reg32(dest, rd);
 315         tcg_temp_free_i32(frn);
 316         tcg_temp_free_i32(frm);
 317         tcg_temp_free_i32(dest);
 318
 319         tcg_temp_free_i32(zero);
 320     }
 321
 322     return true;
 323 }
 324
 325 /*
 326  * Table for converting the most common AArch32 encoding of
 327  * rounding mode to arm_fprounding order (which matches the
 328  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 329  */
 330 static const uint8_t fp_decode_rm[] = {
 331     FPROUNDING_TIEAWAY,
 332     FPROUNDING_TIEEVEN,
 333     FPROUNDING_POSINF,
 334     FPROUNDING_NEGINF,
 335 };
 336
 337 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 338 {
 339     uint32_t rd, rm;
 340     bool dp = a->dp;
 341     TCGv_ptr fpst;
 342     TCGv_i32 tcg_rmode;
 343     int rounding = fp_decode_rm[a->rm];
 344
 345     if (!dc_isar_feature(aa32_vrint, s)) {
 346         return false;
 347     }
 348
 349     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 350         return false;
 351     }
 352
 353     /* UNDEF accesses to D16-D31 if they don't exist */
 354     if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
 355         ((a->vm | a->vd) & 0x10)) {
 356         return false;
 357     }
 358
 359     rd = a->vd;
 360     rm = a->vm;
 361
 362     if (!vfp_access_check(s)) {
 363         return true;
 364     }
 365
 366     fpst = get_fpstatus_ptr(0);
 367
 368     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 369     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 370
 371     if (dp) {
 372         TCGv_i64 tcg_op;
 373         TCGv_i64 tcg_res;
 374         tcg_op = tcg_temp_new_i64();
 375         tcg_res = tcg_temp_new_i64();
 376         neon_load_reg64(tcg_op, rm);
 377         gen_helper_rintd(tcg_res, tcg_op, fpst);
 378         neon_store_reg64(tcg_res, rd);
 379         tcg_temp_free_i64(tcg_op);
 380         tcg_temp_free_i64(tcg_res);
 381     } else {
 382         TCGv_i32 tcg_op;
 383         TCGv_i32 tcg_res;
 384         tcg_op = tcg_temp_new_i32();
 385         tcg_res = tcg_temp_new_i32();
 386         neon_load_reg32(tcg_op, rm);
 387         gen_helper_rints(tcg_res, tcg_op, fpst);
 388         neon_store_reg32(tcg_res, rd);
 389         tcg_temp_free_i32(tcg_op);
 390         tcg_temp_free_i32(tcg_res);
 391     }
 392
 393     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 394     tcg_temp_free_i32(tcg_rmode);
 395
 396     tcg_temp_free_ptr(fpst);
 397     return true;
 398 }
 399
 400 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 401 {
 402     uint32_t rd, rm;
 403     bool dp = a->dp;
 404     TCGv_ptr fpst;
 405     TCGv_i32 tcg_rmode, tcg_shift;
 406     int rounding = fp_decode_rm[a->rm];
 407     bool is_signed = a->op;
 408
 409     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 410         return false;
 411     }
 412
 413     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 414         return false;
 415     }
 416
 417     /* UNDEF accesses to D16-D31 if they don't exist */
 418     if (dp && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 419         return false;
 420     }
 421
 422     rd = a->vd;
 423     rm = a->vm;
 424
 425     if (!vfp_access_check(s)) {
 426         return true;
 427     }
 428
 429     fpst = get_fpstatus_ptr(0);
 430
 431     tcg_shift = tcg_const_i32(0);
 432
 433     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 434     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 435
 436     if (dp) {
 437         TCGv_i64 tcg_double, tcg_res;
 438         TCGv_i32 tcg_tmp;
 439         tcg_double = tcg_temp_new_i64();
 440         tcg_res = tcg_temp_new_i64();
 441         tcg_tmp = tcg_temp_new_i32();
 442         neon_load_reg64(tcg_double, rm);
 443         if (is_signed) {
 444             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 445         } else {
 446             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 447         }
 448         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 449         neon_store_reg32(tcg_tmp, rd);
 450         tcg_temp_free_i32(tcg_tmp);
 451         tcg_temp_free_i64(tcg_res);
 452         tcg_temp_free_i64(tcg_double);
 453     } else {
 454         TCGv_i32 tcg_single, tcg_res;
 455         tcg_single = tcg_temp_new_i32();
 456         tcg_res = tcg_temp_new_i32();
 457         neon_load_reg32(tcg_single, rm);
 458         if (is_signed) {
 459             gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 460         } else {
 461             gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 462         }
 463         neon_store_reg32(tcg_res, rd);
 464         tcg_temp_free_i32(tcg_res);
 465         tcg_temp_free_i32(tcg_single);
 466     }
 467
 468     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 469     tcg_temp_free_i32(tcg_rmode);
 470
 471     tcg_temp_free_i32(tcg_shift);
 472
 473     tcg_temp_free_ptr(fpst);
 474
 475     return true;
 476 }
 477
 478 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 479 {
 480     /* VMOV scalar to general purpose register */
 481     TCGv_i32 tmp;
 482     int pass;
 483     uint32_t offset;
 484
 485     /* SIZE == 2 is a VFP instruction; otherwise NEON.  */
 486     if (a->size == 2
 487         ? !dc_isar_feature(aa32_fpsp_v2, s)
 488         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 489         return false;
 490     }
 491
 492     /* UNDEF accesses to D16-D31 if they don't exist */
 493     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 494         return false;
 495     }
 496
 497     offset = a->index << a->size;
 498     pass = extract32(offset, 2, 1);
 499     offset = extract32(offset, 0, 2) * 8;
 500
 501     if (!vfp_access_check(s)) {
 502         return true;
 503     }
 504
 505     tmp = neon_load_reg(a->vn, pass);
 506     switch (a->size) {
 507     case 0:
 508         if (offset) {
 509             tcg_gen_shri_i32(tmp, tmp, offset);
 510         }
 511         if (a->u) {
 512             gen_uxtb(tmp);
 513         } else {
 514             gen_sxtb(tmp);
 515         }
 516         break;
 517     case 1:
 518         if (a->u) {
 519             if (offset) {
 520                 tcg_gen_shri_i32(tmp, tmp, 16);
 521             } else {
 522                 gen_uxth(tmp);
 523             }
 524         } else {
 525             if (offset) {
 526                 tcg_gen_sari_i32(tmp, tmp, 16);
 527             } else {
 528                 gen_sxth(tmp);
 529             }
 530         }
 531         break;
 532     case 2:
 533         break;
 534     }
 535     store_reg(s, a->rt, tmp);
 536
 537     return true;
 538 }
 539
 540 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 541 {
 542     /* VMOV general purpose register to scalar */
 543     TCGv_i32 tmp, tmp2;
 544     int pass;
 545     uint32_t offset;
 546
 547     /* SIZE == 2 is a VFP instruction; otherwise NEON.  */
 548     if (a->size == 2
 549         ? !dc_isar_feature(aa32_fpsp_v2, s)
 550         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 551         return false;
 552     }
 553
 554     /* UNDEF accesses to D16-D31 if they don't exist */
 555     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 556         return false;
 557     }
 558
 559     offset = a->index << a->size;
 560     pass = extract32(offset, 2, 1);
 561     offset = extract32(offset, 0, 2) * 8;
 562
 563     if (!vfp_access_check(s)) {
 564         return true;
 565     }
 566
 567     tmp = load_reg(s, a->rt);
 568     switch (a->size) {
 569     case 0:
 570         tmp2 = neon_load_reg(a->vn, pass);
 571         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
 572         tcg_temp_free_i32(tmp2);
 573         break;
 574     case 1:
 575         tmp2 = neon_load_reg(a->vn, pass);
 576         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
 577         tcg_temp_free_i32(tmp2);
 578         break;
 579     case 2:
 580         break;
 581     }
 582     neon_store_reg(a->vn, pass, tmp);
 583
 584     return true;
 585 }
 586
 587 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 588 {
 589     /* VDUP (general purpose register) */
 590     TCGv_i32 tmp;
 591     int size, vec_size;
 592
 593     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 594         return false;
 595     }
 596
 597     /* UNDEF accesses to D16-D31 if they don't exist */
 598     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 599         return false;
 600     }
 601
 602     if (a->b && a->e) {
 603         return false;
 604     }
 605
 606     if (a->q && (a->vn & 1)) {
 607         return false;
 608     }
 609
 610     vec_size = a->q ? 16 : 8;
 611     if (a->b) {
 612         size = 0;
 613     } else if (a->e) {
 614         size = 1;
 615     } else {
 616         size = 2;
 617     }
 618
 619     if (!vfp_access_check(s)) {
 620         return true;
 621     }
 622
 623     tmp = load_reg(s, a->rt);
 624     tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
 625                          vec_size, vec_size, tmp);
 626     tcg_temp_free_i32(tmp);
 627
 628     return true;
 629 }
 630
 631 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 632 {
 633     TCGv_i32 tmp;
 634     bool ignore_vfp_enabled = false;
 635
 636     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 637         return false;
 638     }
 639
 640     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 641         /*
 642          * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
 643          * Accesses to R15 are UNPREDICTABLE; we choose to undef.
 644          * (FPSCR -> r15 is a special case which writes to the PSR flags.)
 645          */
 646         if (a->rt == 15 && (!a->l || a->reg != ARM_VFP_FPSCR)) {
 647             return false;
 648         }
 649     }
 650
 651     switch (a->reg) {
 652     case ARM_VFP_FPSID:
 653         /*
 654          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 655          * all ID registers to privileged access only.
 656          */
 657         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 658             return false;
 659         }
 660         ignore_vfp_enabled = true;
 661         break;
 662     case ARM_VFP_MVFR0:
 663     case ARM_VFP_MVFR1:
 664         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 665             return false;
 666         }
 667         ignore_vfp_enabled = true;
 668         break;
 669     case ARM_VFP_MVFR2:
 670         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 671             return false;
 672         }
 673         ignore_vfp_enabled = true;
 674         break;
 675     case ARM_VFP_FPSCR:
 676         break;
 677     case ARM_VFP_FPEXC:
 678         if (IS_USER(s)) {
 679             return false;
 680         }
 681         ignore_vfp_enabled = true;
 682         break;
 683     case ARM_VFP_FPINST:
 684     case ARM_VFP_FPINST2:
 685         /* Not present in VFPv3 */
 686         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
 687             return false;
 688         }
 689         break;
 690     default:
 691         return false;
 692     }
 693
 694     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
 695         return true;
 696     }
 697
 698     if (a->l) {
 699         /* VMRS, move VFP special register to gp register */
 700         switch (a->reg) {
 701         case ARM_VFP_MVFR0:
 702         case ARM_VFP_MVFR1:
 703         case ARM_VFP_MVFR2:
 704         case ARM_VFP_FPSID:
 705             if (s->current_el == 1) {
 706                 TCGv_i32 tcg_reg, tcg_rt;
 707
 708                 gen_set_condexec(s);
 709                 gen_set_pc_im(s, s->pc_curr);
 710                 tcg_reg = tcg_const_i32(a->reg);
 711                 tcg_rt = tcg_const_i32(a->rt);
 712                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
 713                 tcg_temp_free_i32(tcg_reg);
 714                 tcg_temp_free_i32(tcg_rt);
 715             }
 716             /* fall through */
 717         case ARM_VFP_FPEXC:
 718         case ARM_VFP_FPINST:
 719         case ARM_VFP_FPINST2:
 720             tmp = load_cpu_field(vfp.xregs[a->reg]);
 721             break;
 722         case ARM_VFP_FPSCR:
 723             if (a->rt == 15) {
 724                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 725                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
 726             } else {
 727                 tmp = tcg_temp_new_i32();
 728                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
 729             }
 730             break;
 731         default:
 732             g_assert_not_reached();
 733         }
 734
 735         if (a->rt == 15) {
 736             /* Set the 4 flag bits in the CPSR.  */
 737             gen_set_nzcv(tmp);
 738             tcg_temp_free_i32(tmp);
 739         } else {
 740             store_reg(s, a->rt, tmp);
 741         }
 742     } else {
 743         /* VMSR, move gp register to VFP special register */
 744         switch (a->reg) {
 745         case ARM_VFP_FPSID:
 746         case ARM_VFP_MVFR0:
 747         case ARM_VFP_MVFR1:
 748         case ARM_VFP_MVFR2:
 749             /* Writes are ignored.  */
 750             break;
 751         case ARM_VFP_FPSCR:
 752             tmp = load_reg(s, a->rt);
 753             gen_helper_vfp_set_fpscr(cpu_env, tmp);
 754             tcg_temp_free_i32(tmp);
 755             gen_lookup_tb(s);
 756             break;
 757         case ARM_VFP_FPEXC:
 758             /*
 759              * TODO: VFP subarchitecture support.
 760              * For now, keep the EN bit only
 761              */
 762             tmp = load_reg(s, a->rt);
 763             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 764             store_cpu_field(tmp, vfp.xregs[a->reg]);
 765             gen_lookup_tb(s);
 766             break;
 767         case ARM_VFP_FPINST:
 768         case ARM_VFP_FPINST2:
 769             tmp = load_reg(s, a->rt);
 770             store_cpu_field(tmp, vfp.xregs[a->reg]);
 771             break;
 772         default:
 773             g_assert_not_reached();
 774         }
 775     }
 776
 777     return true;
 778 }
 779
 780 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
 781 {
 782     TCGv_i32 tmp;
 783
 784     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 785         return false;
 786     }
 787
 788     if (!vfp_access_check(s)) {
 789         return true;
 790     }
 791
 792     if (a->l) {
 793         /* VFP to general purpose register */
 794         tmp = tcg_temp_new_i32();
 795         neon_load_reg32(tmp, a->vn);
 796         if (a->rt == 15) {
 797             /* Set the 4 flag bits in the CPSR.  */
 798             gen_set_nzcv(tmp);
 799             tcg_temp_free_i32(tmp);
 800         } else {
 801             store_reg(s, a->rt, tmp);
 802         }
 803     } else {
 804         /* general purpose register to VFP */
 805         tmp = load_reg(s, a->rt);
 806         neon_store_reg32(tmp, a->vn);
 807         tcg_temp_free_i32(tmp);
 808     }
 809
 810     return true;
 811 }
 812
 813 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
 814 {
 815     TCGv_i32 tmp;
 816
 817     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 818         return false;
 819     }
 820
 821     /*
 822      * VMOV between two general-purpose registers and two single precision
 823      * floating point registers
 824      */
 825     if (!vfp_access_check(s)) {
 826         return true;
 827     }
 828
 829     if (a->op) {
 830         /* fpreg to gpreg */
 831         tmp = tcg_temp_new_i32();
 832         neon_load_reg32(tmp, a->vm);
 833         store_reg(s, a->rt, tmp);
 834         tmp = tcg_temp_new_i32();
 835         neon_load_reg32(tmp, a->vm + 1);
 836         store_reg(s, a->rt2, tmp);
 837     } else {
 838         /* gpreg to fpreg */
 839         tmp = load_reg(s, a->rt);
 840         neon_store_reg32(tmp, a->vm);
 841         tcg_temp_free_i32(tmp);
 842         tmp = load_reg(s, a->rt2);
 843         neon_store_reg32(tmp, a->vm + 1);
 844         tcg_temp_free_i32(tmp);
 845     }
 846
 847     return true;
 848 }
 849
 850 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
 851 {
 852     TCGv_i32 tmp;
 853
 854     /*
 855      * VMOV between two general-purpose registers and one double precision
 856      * floating point register.  Note that this does not require support
 857      * for double precision arithmetic.
 858      */
 859     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 860         return false;
 861     }
 862
 863     /* UNDEF accesses to D16-D31 if they don't exist */
 864     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 865         return false;
 866     }
 867
 868     if (!vfp_access_check(s)) {
 869         return true;
 870     }
 871
 872     if (a->op) {
 873         /* fpreg to gpreg */
 874         tmp = tcg_temp_new_i32();
 875         neon_load_reg32(tmp, a->vm * 2);
 876         store_reg(s, a->rt, tmp);
 877         tmp = tcg_temp_new_i32();
 878         neon_load_reg32(tmp, a->vm * 2 + 1);
 879         store_reg(s, a->rt2, tmp);
 880     } else {
 881         /* gpreg to fpreg */
 882         tmp = load_reg(s, a->rt);
 883         neon_store_reg32(tmp, a->vm * 2);
 884         tcg_temp_free_i32(tmp);
 885         tmp = load_reg(s, a->rt2);
 886         neon_store_reg32(tmp, a->vm * 2 + 1);
 887         tcg_temp_free_i32(tmp);
 888     }
 889
 890     return true;
 891 }
 892
 893 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
 894 {
 895     uint32_t offset;
 896     TCGv_i32 addr, tmp;
 897
 898     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 899         return false;
 900     }
 901
 902     if (!vfp_access_check(s)) {
 903         return true;
 904     }
 905
 906     offset = a->imm << 2;
 907     if (!a->u) {
 908         offset = -offset;
 909     }
 910
 911     /* For thumb, use of PC is UNPREDICTABLE.  */
 912     addr = add_reg_for_lit(s, a->rn, offset);
 913     tmp = tcg_temp_new_i32();
 914     if (a->l) {
 915         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
 916         neon_store_reg32(tmp, a->vd);
 917     } else {
 918         neon_load_reg32(tmp, a->vd);
 919         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
 920     }
 921     tcg_temp_free_i32(tmp);
 922     tcg_temp_free_i32(addr);
 923
 924     return true;
 925 }
 926
 927 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
 928 {
 929     uint32_t offset;
 930     TCGv_i32 addr;
 931     TCGv_i64 tmp;
 932
 933     /* Note that this does not require support for double arithmetic.  */
 934     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 935         return false;
 936     }
 937
 938     /* UNDEF accesses to D16-D31 if they don't exist */
 939     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 940         return false;
 941     }
 942
 943     if (!vfp_access_check(s)) {
 944         return true;
 945     }
 946
 947     offset = a->imm << 2;
 948     if (!a->u) {
 949         offset = -offset;
 950     }
 951
 952     /* For thumb, use of PC is UNPREDICTABLE.  */
 953     addr = add_reg_for_lit(s, a->rn, offset);
 954     tmp = tcg_temp_new_i64();
 955     if (a->l) {
 956         gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
 957         neon_store_reg64(tmp, a->vd);
 958     } else {
 959         neon_load_reg64(tmp, a->vd);
 960         gen_aa32_st64(s, tmp, addr, get_mem_index(s));
 961     }
 962     tcg_temp_free_i64(tmp);
 963     tcg_temp_free_i32(addr);
 964
 965     return true;
 966 }
 967
 968 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
 969 {
 970     uint32_t offset;
 971     TCGv_i32 addr, tmp;
 972     int i, n;
 973
 974     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 975         return false;
 976     }
 977
 978     n = a->imm;
 979
 980     if (n == 0 || (a->vd + n) > 32) {
 981         /*
 982          * UNPREDICTABLE cases for bad immediates: we choose to
 983          * UNDEF to avoid generating huge numbers of TCG ops
 984          */
 985         return false;
 986     }
 987     if (a->rn == 15 && a->w) {
 988         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
 989         return false;
 990     }
 991
 992     if (!vfp_access_check(s)) {
 993         return true;
 994     }
 995
 996     /* For thumb, use of PC is UNPREDICTABLE.  */
 997     addr = add_reg_for_lit(s, a->rn, 0);
 998     if (a->p) {
 999         /* pre-decrement */
1000         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1001     }
1002
1003     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1004         /*
1005          * Here 'addr' is the lowest address we will store to,
1006          * and is either the old SP (if post-increment) or
1007          * the new SP (if pre-decrement). For post-increment
1008          * where the old value is below the limit and the new
1009          * value is above, it is UNKNOWN whether the limit check
1010          * triggers; we choose to trigger.
1011          */
1012         gen_helper_v8m_stackcheck(cpu_env, addr);
1013     }
1014
1015     offset = 4;
1016     tmp = tcg_temp_new_i32();
1017     for (i = 0; i < n; i++) {
1018         if (a->l) {
1019             /* load */
1020             gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1021             neon_store_reg32(tmp, a->vd + i);
1022         } else {
1023             /* store */
1024             neon_load_reg32(tmp, a->vd + i);
1025             gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1026         }
1027         tcg_gen_addi_i32(addr, addr, offset);
1028     }
1029     tcg_temp_free_i32(tmp);
1030     if (a->w) {
1031         /* writeback */
1032         if (a->p) {
1033             offset = -offset * n;
1034             tcg_gen_addi_i32(addr, addr, offset);
1035         }
1036         store_reg(s, a->rn, addr);
1037     } else {
1038         tcg_temp_free_i32(addr);
1039     }
1040
1041     return true;
1042 }
1043
1044 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1045 {
1046     uint32_t offset;
1047     TCGv_i32 addr;
1048     TCGv_i64 tmp;
1049     int i, n;
1050
1051     /* Note that this does not require support for double arithmetic.  */
1052     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1053         return false;
1054     }
1055
1056     n = a->imm >> 1;
1057
1058     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1059         /*
1060          * UNPREDICTABLE cases for bad immediates: we choose to
1061          * UNDEF to avoid generating huge numbers of TCG ops
1062          */
1063         return false;
1064     }
1065     if (a->rn == 15 && a->w) {
1066         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1067         return false;
1068     }
1069
1070     /* UNDEF accesses to D16-D31 if they don't exist */
1071     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1072         return false;
1073     }
1074
1075     if (!vfp_access_check(s)) {
1076         return true;
1077     }
1078
1079     /* For thumb, use of PC is UNPREDICTABLE.  */
1080     addr = add_reg_for_lit(s, a->rn, 0);
1081     if (a->p) {
1082         /* pre-decrement */
1083         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1084     }
1085
1086     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1087         /*
1088          * Here 'addr' is the lowest address we will store to,
1089          * and is either the old SP (if post-increment) or
1090          * the new SP (if pre-decrement). For post-increment
1091          * where the old value is below the limit and the new
1092          * value is above, it is UNKNOWN whether the limit check
1093          * triggers; we choose to trigger.
1094          */
1095         gen_helper_v8m_stackcheck(cpu_env, addr);
1096     }
1097
1098     offset = 8;
1099     tmp = tcg_temp_new_i64();
1100     for (i = 0; i < n; i++) {
1101         if (a->l) {
1102             /* load */
1103             gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1104             neon_store_reg64(tmp, a->vd + i);
1105         } else {
1106             /* store */
1107             neon_load_reg64(tmp, a->vd + i);
1108             gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1109         }
1110         tcg_gen_addi_i32(addr, addr, offset);
1111     }
1112     tcg_temp_free_i64(tmp);
1113     if (a->w) {
1114         /* writeback */
1115         if (a->p) {
1116             offset = -offset * n;
1117         } else if (a->imm & 1) {
1118             offset = 4;
1119         } else {
1120             offset = 0;
1121         }
1122
1123         if (offset != 0) {
1124             tcg_gen_addi_i32(addr, addr, offset);
1125         }
1126         store_reg(s, a->rn, addr);
1127     } else {
1128         tcg_temp_free_i32(addr);
1129     }
1130
1131     return true;
1132 }
1133
1134 /*
1135  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1136  * The callback should emit code to write a value to vd. If
1137  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1138  * will contain the old value of the relevant VFP register;
1139  * otherwise it must be written to only.
1140  */
1141 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1142                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1143 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1144                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1145
1146 /*
1147  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1148  * The callback should emit code to write a value to vd (which
1149  * should be written to only).
1150  */
1151 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1152 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1153
1154 /*
1155  * Return true if the specified S reg is in a scalar bank
1156  * (ie if it is s0..s7)
1157  */
1158 static inline bool vfp_sreg_is_scalar(int reg)
1159 {
1160     return (reg & 0x18) == 0;
1161 }
1162
1163 /*
1164  * Return true if the specified D reg is in a scalar bank
1165  * (ie if it is d0..d3 or d16..d19)
1166  */
1167 static inline bool vfp_dreg_is_scalar(int reg)
1168 {
1169     return (reg & 0xc) == 0;
1170 }
1171
1172 /*
1173  * Advance the S reg number forwards by delta within its bank
1174  * (ie increment the low 3 bits but leave the rest the same)
1175  */
1176 static inline int vfp_advance_sreg(int reg, int delta)
1177 {
1178     return ((reg + delta) & 0x7) | (reg & ~0x7);
1179 }
1180
1181 /*
1182  * Advance the D reg number forwards by delta within its bank
1183  * (ie increment the low 2 bits but leave the rest the same)
1184  */
1185 static inline int vfp_advance_dreg(int reg, int delta)
1186 {
1187     return ((reg + delta) & 0x3) | (reg & ~0x3);
1188 }
1189
1190 /*
1191  * Perform a 3-operand VFP data processing instruction. fn is the
1192  * callback to do the actual operation; this function deals with the
1193  * code to handle looping around for VFP vector processing.
1194  */
1195 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1196                           int vd, int vn, int vm, bool reads_vd)
1197 {
1198     uint32_t delta_m = 0;
1199     uint32_t delta_d = 0;
1200     int veclen = s->vec_len;
1201     TCGv_i32 f0, f1, fd;
1202     TCGv_ptr fpst;
1203
1204     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1205         return false;
1206     }
1207
1208     if (!dc_isar_feature(aa32_fpshvec, s) &&
1209         (veclen != 0 || s->vec_stride != 0)) {
1210         return false;
1211     }
1212
1213     if (!vfp_access_check(s)) {
1214         return true;
1215     }
1216
1217     if (veclen > 0) {
1218         /* Figure out what type of vector operation this is.  */
1219         if (vfp_sreg_is_scalar(vd)) {
1220             /* scalar */
1221             veclen = 0;
1222         } else {
1223             delta_d = s->vec_stride + 1;
1224
1225             if (vfp_sreg_is_scalar(vm)) {
1226                 /* mixed scalar/vector */
1227                 delta_m = 0;
1228             } else {
1229                 /* vector */
1230                 delta_m = delta_d;
1231             }
1232         }
1233     }
1234
1235     f0 = tcg_temp_new_i32();
1236     f1 = tcg_temp_new_i32();
1237     fd = tcg_temp_new_i32();
1238     fpst = get_fpstatus_ptr(0);
1239
1240     neon_load_reg32(f0, vn);
1241     neon_load_reg32(f1, vm);
1242
1243     for (;;) {
1244         if (reads_vd) {
1245             neon_load_reg32(fd, vd);
1246         }
1247         fn(fd, f0, f1, fpst);
1248         neon_store_reg32(fd, vd);
1249
1250         if (veclen == 0) {
1251             break;
1252         }
1253
1254         /* Set up the operands for the next iteration */
1255         veclen--;
1256         vd = vfp_advance_sreg(vd, delta_d);
1257         vn = vfp_advance_sreg(vn, delta_d);
1258         neon_load_reg32(f0, vn);
1259         if (delta_m) {
1260             vm = vfp_advance_sreg(vm, delta_m);
1261             neon_load_reg32(f1, vm);
1262         }
1263     }
1264
1265     tcg_temp_free_i32(f0);
1266     tcg_temp_free_i32(f1);
1267     tcg_temp_free_i32(fd);
1268     tcg_temp_free_ptr(fpst);
1269
1270     return true;
1271 }
1272
1273 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1274                           int vd, int vn, int vm, bool reads_vd)
1275 {
1276     uint32_t delta_m = 0;
1277     uint32_t delta_d = 0;
1278     int veclen = s->vec_len;
1279     TCGv_i64 f0, f1, fd;
1280     TCGv_ptr fpst;
1281
1282     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1283         return false;
1284     }
1285
1286     /* UNDEF accesses to D16-D31 if they don't exist */
1287     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1288         return false;
1289     }
1290
1291     if (!dc_isar_feature(aa32_fpshvec, s) &&
1292         (veclen != 0 || s->vec_stride != 0)) {
1293         return false;
1294     }
1295
1296     if (!vfp_access_check(s)) {
1297         return true;
1298     }
1299
1300     if (veclen > 0) {
1301         /* Figure out what type of vector operation this is.  */
1302         if (vfp_dreg_is_scalar(vd)) {
1303             /* scalar */
1304             veclen = 0;
1305         } else {
1306             delta_d = (s->vec_stride >> 1) + 1;
1307
1308             if (vfp_dreg_is_scalar(vm)) {
1309                 /* mixed scalar/vector */
1310                 delta_m = 0;
1311             } else {
1312                 /* vector */
1313                 delta_m = delta_d;
1314             }
1315         }
1316     }
1317
1318     f0 = tcg_temp_new_i64();
1319     f1 = tcg_temp_new_i64();
1320     fd = tcg_temp_new_i64();
1321     fpst = get_fpstatus_ptr(0);
1322
1323     neon_load_reg64(f0, vn);
1324     neon_load_reg64(f1, vm);
1325
1326     for (;;) {
1327         if (reads_vd) {
1328             neon_load_reg64(fd, vd);
1329         }
1330         fn(fd, f0, f1, fpst);
1331         neon_store_reg64(fd, vd);
1332
1333         if (veclen == 0) {
1334             break;
1335         }
1336         /* Set up the operands for the next iteration */
1337         veclen--;
1338         vd = vfp_advance_dreg(vd, delta_d);
1339         vn = vfp_advance_dreg(vn, delta_d);
1340         neon_load_reg64(f0, vn);
1341         if (delta_m) {
1342             vm = vfp_advance_dreg(vm, delta_m);
1343             neon_load_reg64(f1, vm);
1344         }
1345     }
1346
1347     tcg_temp_free_i64(f0);
1348     tcg_temp_free_i64(f1);
1349     tcg_temp_free_i64(fd);
1350     tcg_temp_free_ptr(fpst);
1351
1352     return true;
1353 }
1354
1355 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1356 {
1357     uint32_t delta_m = 0;
1358     uint32_t delta_d = 0;
1359     int veclen = s->vec_len;
1360     TCGv_i32 f0, fd;
1361
1362     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1363         return false;
1364     }
1365
1366     if (!dc_isar_feature(aa32_fpshvec, s) &&
1367         (veclen != 0 || s->vec_stride != 0)) {
1368         return false;
1369     }
1370
1371     if (!vfp_access_check(s)) {
1372         return true;
1373     }
1374
1375     if (veclen > 0) {
1376         /* Figure out what type of vector operation this is.  */
1377         if (vfp_sreg_is_scalar(vd)) {
1378             /* scalar */
1379             veclen = 0;
1380         } else {
1381             delta_d = s->vec_stride + 1;
1382
1383             if (vfp_sreg_is_scalar(vm)) {
1384                 /* mixed scalar/vector */
1385                 delta_m = 0;
1386             } else {
1387                 /* vector */
1388                 delta_m = delta_d;
1389             }
1390         }
1391     }
1392
1393     f0 = tcg_temp_new_i32();
1394     fd = tcg_temp_new_i32();
1395
1396     neon_load_reg32(f0, vm);
1397
1398     for (;;) {
1399         fn(fd, f0);
1400         neon_store_reg32(fd, vd);
1401
1402         if (veclen == 0) {
1403             break;
1404         }
1405
1406         if (delta_m == 0) {
1407             /* single source one-many */
1408             while (veclen--) {
1409                 vd = vfp_advance_sreg(vd, delta_d);
1410                 neon_store_reg32(fd, vd);
1411             }
1412             break;
1413         }
1414
1415         /* Set up the operands for the next iteration */
1416         veclen--;
1417         vd = vfp_advance_sreg(vd, delta_d);
1418         vm = vfp_advance_sreg(vm, delta_m);
1419         neon_load_reg32(f0, vm);
1420     }
1421
1422     tcg_temp_free_i32(f0);
1423     tcg_temp_free_i32(fd);
1424
1425     return true;
1426 }
1427
1428 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1429 {
1430     uint32_t delta_m = 0;
1431     uint32_t delta_d = 0;
1432     int veclen = s->vec_len;
1433     TCGv_i64 f0, fd;
1434
1435     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1436         return false;
1437     }
1438
1439     /* UNDEF accesses to D16-D31 if they don't exist */
1440     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1441         return false;
1442     }
1443
1444     if (!dc_isar_feature(aa32_fpshvec, s) &&
1445         (veclen != 0 || s->vec_stride != 0)) {
1446         return false;
1447     }
1448
1449     if (!vfp_access_check(s)) {
1450         return true;
1451     }
1452
1453     if (veclen > 0) {
1454         /* Figure out what type of vector operation this is.  */
1455         if (vfp_dreg_is_scalar(vd)) {
1456             /* scalar */
1457             veclen = 0;
1458         } else {
1459             delta_d = (s->vec_stride >> 1) + 1;
1460
1461             if (vfp_dreg_is_scalar(vm)) {
1462                 /* mixed scalar/vector */
1463                 delta_m = 0;
1464             } else {
1465                 /* vector */
1466                 delta_m = delta_d;
1467             }
1468         }
1469     }
1470
1471     f0 = tcg_temp_new_i64();
1472     fd = tcg_temp_new_i64();
1473
1474     neon_load_reg64(f0, vm);
1475
1476     for (;;) {
1477         fn(fd, f0);
1478         neon_store_reg64(fd, vd);
1479
1480         if (veclen == 0) {
1481             break;
1482         }
1483
1484         if (delta_m == 0) {
1485             /* single source one-many */
1486             while (veclen--) {
1487                 vd = vfp_advance_dreg(vd, delta_d);
1488                 neon_store_reg64(fd, vd);
1489             }
1490             break;
1491         }
1492
1493         /* Set up the operands for the next iteration */
1494         veclen--;
1495         vd = vfp_advance_dreg(vd, delta_d);
1496         vd = vfp_advance_dreg(vm, delta_m);
1497         neon_load_reg64(f0, vm);
1498     }
1499
1500     tcg_temp_free_i64(f0);
1501     tcg_temp_free_i64(fd);
1502
1503     return true;
1504 }
1505
1506 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1507 {
1508     /* Note that order of inputs to the add matters for NaNs */
1509     TCGv_i32 tmp = tcg_temp_new_i32();
1510
1511     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1512     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1513     tcg_temp_free_i32(tmp);
1514 }
1515
1516 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1517 {
1518     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1519 }
1520
1521 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1522 {
1523     /* Note that order of inputs to the add matters for NaNs */
1524     TCGv_i64 tmp = tcg_temp_new_i64();
1525
1526     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1527     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1528     tcg_temp_free_i64(tmp);
1529 }
1530
1531 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1532 {
1533     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1534 }
1535
1536 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1537 {
1538     /*
1539      * VMLS: vd = vd + -(vn * vm)
1540      * Note that order of inputs to the add matters for NaNs.
1541      */
1542     TCGv_i32 tmp = tcg_temp_new_i32();
1543
1544     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1545     gen_helper_vfp_negs(tmp, tmp);
1546     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1547     tcg_temp_free_i32(tmp);
1548 }
1549
1550 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1551 {
1552     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1553 }
1554
1555 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1556 {
1557     /*
1558      * VMLS: vd = vd + -(vn * vm)
1559      * Note that order of inputs to the add matters for NaNs.
1560      */
1561     TCGv_i64 tmp = tcg_temp_new_i64();
1562
1563     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1564     gen_helper_vfp_negd(tmp, tmp);
1565     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1566     tcg_temp_free_i64(tmp);
1567 }
1568
1569 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1570 {
1571     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1572 }
1573
1574 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1575 {
1576     /*
1577      * VNMLS: -fd + (fn * fm)
1578      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1579      * plausible looking simplifications because this will give wrong results
1580      * for NaNs.
1581      */
1582     TCGv_i32 tmp = tcg_temp_new_i32();
1583
1584     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1585     gen_helper_vfp_negs(vd, vd);
1586     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1587     tcg_temp_free_i32(tmp);
1588 }
1589
1590 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1591 {
1592     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1593 }
1594
1595 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1596 {
1597     /*
1598      * VNMLS: -fd + (fn * fm)
1599      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1600      * plausible looking simplifications because this will give wrong results
1601      * for NaNs.
1602      */
1603     TCGv_i64 tmp = tcg_temp_new_i64();
1604
1605     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1606     gen_helper_vfp_negd(vd, vd);
1607     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1608     tcg_temp_free_i64(tmp);
1609 }
1610
1611 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1612 {
1613     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1614 }
1615
1616 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1617 {
1618     /* VNMLA: -fd + -(fn * fm) */
1619     TCGv_i32 tmp = tcg_temp_new_i32();
1620
1621     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1622     gen_helper_vfp_negs(tmp, tmp);
1623     gen_helper_vfp_negs(vd, vd);
1624     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1625     tcg_temp_free_i32(tmp);
1626 }
1627
1628 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1629 {
1630     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1631 }
1632
1633 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1634 {
1635     /* VNMLA: -fd + (fn * fm) */
1636     TCGv_i64 tmp = tcg_temp_new_i64();
1637
1638     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1639     gen_helper_vfp_negd(tmp, tmp);
1640     gen_helper_vfp_negd(vd, vd);
1641     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1642     tcg_temp_free_i64(tmp);
1643 }
1644
1645 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1646 {
1647     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1648 }
1649
1650 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1651 {
1652     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1653 }
1654
1655 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
1656 {
1657     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1658 }
1659
1660 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1661 {
1662     /* VNMUL: -(fn * fm) */
1663     gen_helper_vfp_muls(vd, vn, vm, fpst);
1664     gen_helper_vfp_negs(vd, vd);
1665 }
1666
1667 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1668 {
1669     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1670 }
1671
1672 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1673 {
1674     /* VNMUL: -(fn * fm) */
1675     gen_helper_vfp_muld(vd, vn, vm, fpst);
1676     gen_helper_vfp_negd(vd, vd);
1677 }
1678
1679 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
1680 {
1681     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1682 }
1683
1684 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1685 {
1686     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1687 }
1688
1689 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
1690 {
1691     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1692 }
1693
1694 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1695 {
1696     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1697 }
1698
1699 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
1700 {
1701     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
1702 }
1703
1704 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
1705 {
1706     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
1707 }
1708
1709 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
1710 {
1711     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
1712 }
1713
1714 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
1715 {
1716     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1717         return false;
1718     }
1719     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
1720                          a->vd, a->vn, a->vm, false);
1721 }
1722
1723 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
1724 {
1725     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1726         return false;
1727     }
1728     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
1729                          a->vd, a->vn, a->vm, false);
1730 }
1731
1732 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
1733 {
1734     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1735         return false;
1736     }
1737     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
1738                          a->vd, a->vn, a->vm, false);
1739 }
1740
1741 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
1742 {
1743     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1744         return false;
1745     }
1746     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
1747                          a->vd, a->vn, a->vm, false);
1748 }
1749
1750 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
1751 {
1752     /*
1753      * VFNMA : fd = muladd(-fd,  fn, fm)
1754      * VFNMS : fd = muladd(-fd, -fn, fm)
1755      * VFMA  : fd = muladd( fd,  fn, fm)
1756      * VFMS  : fd = muladd( fd, -fn, fm)
1757      *
1758      * These are fused multiply-add, and must be done as one floating
1759      * point operation with no rounding between the multiplication and
1760      * addition steps.  NB that doing the negations here as separate
1761      * steps is correct : an input NaN should come out with its sign
1762      * bit flipped if it is a negated-input.
1763      */
1764     TCGv_ptr fpst;
1765     TCGv_i32 vn, vm, vd;
1766
1767     /*
1768      * Present in VFPv4 only.
1769      * Note that we can't rely on the SIMDFMAC check alone, because
1770      * in a Neon-no-VFP core that ID register field will be non-zero.
1771      */
1772     if (!dc_isar_feature(aa32_simdfmac, s) ||
1773         !dc_isar_feature(aa32_fpsp_v2, s)) {
1774         return false;
1775     }
1776     /*
1777      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
1778      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
1779      */
1780     if (s->vec_len != 0 || s->vec_stride != 0) {
1781         return false;
1782     }
1783
1784     if (!vfp_access_check(s)) {
1785         return true;
1786     }
1787
1788     vn = tcg_temp_new_i32();
1789     vm = tcg_temp_new_i32();
1790     vd = tcg_temp_new_i32();
1791
1792     neon_load_reg32(vn, a->vn);
1793     neon_load_reg32(vm, a->vm);
1794     if (neg_n) {
1795         /* VFNMS, VFMS */
1796         gen_helper_vfp_negs(vn, vn);
1797     }
1798     neon_load_reg32(vd, a->vd);
1799     if (neg_d) {
1800         /* VFNMA, VFNMS */
1801         gen_helper_vfp_negs(vd, vd);
1802     }
1803     fpst = get_fpstatus_ptr(0);
1804     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
1805     neon_store_reg32(vd, a->vd);
1806
1807     tcg_temp_free_ptr(fpst);
1808     tcg_temp_free_i32(vn);
1809     tcg_temp_free_i32(vm);
1810     tcg_temp_free_i32(vd);
1811
1812     return true;
1813 }
1814
1815 static bool trans_VFMA_sp(DisasContext *s, arg_VFMA_sp *a)
1816 {
1817     return do_vfm_sp(s, a, false, false);
1818 }
1819
1820 static bool trans_VFMS_sp(DisasContext *s, arg_VFMS_sp *a)
1821 {
1822     return do_vfm_sp(s, a, true, false);
1823 }
1824
1825 static bool trans_VFNMA_sp(DisasContext *s, arg_VFNMA_sp *a)
1826 {
1827     return do_vfm_sp(s, a, false, true);
1828 }
1829
1830 static bool trans_VFNMS_sp(DisasContext *s, arg_VFNMS_sp *a)
1831 {
1832     return do_vfm_sp(s, a, true, true);
1833 }
1834
1835 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
1836 {
1837     /*
1838      * VFNMA : fd = muladd(-fd,  fn, fm)
1839      * VFNMS : fd = muladd(-fd, -fn, fm)
1840      * VFMA  : fd = muladd( fd,  fn, fm)
1841      * VFMS  : fd = muladd( fd, -fn, fm)
1842      *
1843      * These are fused multiply-add, and must be done as one floating
1844      * point operation with no rounding between the multiplication and
1845      * addition steps.  NB that doing the negations here as separate
1846      * steps is correct : an input NaN should come out with its sign
1847      * bit flipped if it is a negated-input.
1848      */
1849     TCGv_ptr fpst;
1850     TCGv_i64 vn, vm, vd;
1851
1852     /*
1853      * Present in VFPv4 only.
1854      * Note that we can't rely on the SIMDFMAC check alone, because
1855      * in a Neon-no-VFP core that ID register field will be non-zero.
1856      */
1857     if (!dc_isar_feature(aa32_simdfmac, s) ||
1858         !dc_isar_feature(aa32_fpdp_v2, s)) {
1859         return false;
1860     }
1861     /*
1862      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
1863      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
1864      */
1865     if (s->vec_len != 0 || s->vec_stride != 0) {
1866         return false;
1867     }
1868
1869     /* UNDEF accesses to D16-D31 if they don't exist. */
1870     if (!dc_isar_feature(aa32_simd_r32, s) &&
1871         ((a->vd | a->vn | a->vm) & 0x10)) {
1872         return false;
1873     }
1874
1875     if (!vfp_access_check(s)) {
1876         return true;
1877     }
1878
1879     vn = tcg_temp_new_i64();
1880     vm = tcg_temp_new_i64();
1881     vd = tcg_temp_new_i64();
1882
1883     neon_load_reg64(vn, a->vn);
1884     neon_load_reg64(vm, a->vm);
1885     if (neg_n) {
1886         /* VFNMS, VFMS */
1887         gen_helper_vfp_negd(vn, vn);
1888     }
1889     neon_load_reg64(vd, a->vd);
1890     if (neg_d) {
1891         /* VFNMA, VFNMS */
1892         gen_helper_vfp_negd(vd, vd);
1893     }
1894     fpst = get_fpstatus_ptr(0);
1895     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
1896     neon_store_reg64(vd, a->vd);
1897
1898     tcg_temp_free_ptr(fpst);
1899     tcg_temp_free_i64(vn);
1900     tcg_temp_free_i64(vm);
1901     tcg_temp_free_i64(vd);
1902
1903     return true;
1904 }
1905
1906 static bool trans_VFMA_dp(DisasContext *s, arg_VFMA_dp *a)
1907 {
1908     return do_vfm_dp(s, a, false, false);
1909 }
1910
1911 static bool trans_VFMS_dp(DisasContext *s, arg_VFMS_dp *a)
1912 {
1913     return do_vfm_dp(s, a, true, false);
1914 }
1915
1916 static bool trans_VFNMA_dp(DisasContext *s, arg_VFNMA_dp *a)
1917 {
1918     return do_vfm_dp(s, a, false, true);
1919 }
1920
1921 static bool trans_VFNMS_dp(DisasContext *s, arg_VFNMS_dp *a)
1922 {
1923     return do_vfm_dp(s, a, true, true);
1924 }
1925
1926 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
1927 {
1928     uint32_t delta_d = 0;
1929     int veclen = s->vec_len;
1930     TCGv_i32 fd;
1931     uint32_t vd;
1932
1933     vd = a->vd;
1934
1935     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
1936         return false;
1937     }
1938
1939     if (!dc_isar_feature(aa32_fpshvec, s) &&
1940         (veclen != 0 || s->vec_stride != 0)) {
1941         return false;
1942     }
1943
1944     if (!vfp_access_check(s)) {
1945         return true;
1946     }
1947
1948     if (veclen > 0) {
1949         /* Figure out what type of vector operation this is.  */
1950         if (vfp_sreg_is_scalar(vd)) {
1951             /* scalar */
1952             veclen = 0;
1953         } else {
1954             delta_d = s->vec_stride + 1;
1955         }
1956     }
1957
1958     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
1959
1960     for (;;) {
1961         neon_store_reg32(fd, vd);
1962
1963         if (veclen == 0) {
1964             break;
1965         }
1966
1967         /* Set up the operands for the next iteration */
1968         veclen--;
1969         vd = vfp_advance_sreg(vd, delta_d);
1970     }
1971
1972     tcg_temp_free_i32(fd);
1973     return true;
1974 }
1975
1976 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
1977 {
1978     uint32_t delta_d = 0;
1979     int veclen = s->vec_len;
1980     TCGv_i64 fd;
1981     uint32_t vd;
1982
1983     vd = a->vd;
1984
1985     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
1986         return false;
1987     }
1988
1989     /* UNDEF accesses to D16-D31 if they don't exist. */
1990     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
1991         return false;
1992     }
1993
1994     if (!dc_isar_feature(aa32_fpshvec, s) &&
1995         (veclen != 0 || s->vec_stride != 0)) {
1996         return false;
1997     }
1998
1999     if (!vfp_access_check(s)) {
2000         return true;
2001     }
2002
2003     if (veclen > 0) {
2004         /* Figure out what type of vector operation this is.  */
2005         if (vfp_dreg_is_scalar(vd)) {
2006             /* scalar */
2007             veclen = 0;
2008         } else {
2009             delta_d = (s->vec_stride >> 1) + 1;
2010         }
2011     }
2012
2013     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2014
2015     for (;;) {
2016         neon_store_reg64(fd, vd);
2017
2018         if (veclen == 0) {
2019             break;
2020         }
2021
2022         /* Set up the operands for the next iteration */
2023         veclen--;
2024         vd = vfp_advance_dreg(vd, delta_d);
2025     }
2026
2027     tcg_temp_free_i64(fd);
2028     return true;
2029 }
2030
2031 static bool trans_VMOV_reg_sp(DisasContext *s, arg_VMOV_reg_sp *a)
2032 {
2033     return do_vfp_2op_sp(s, tcg_gen_mov_i32, a->vd, a->vm);
2034 }
2035
2036 static bool trans_VMOV_reg_dp(DisasContext *s, arg_VMOV_reg_dp *a)
2037 {
2038     return do_vfp_2op_dp(s, tcg_gen_mov_i64, a->vd, a->vm);
2039 }
2040
2041 static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a)
2042 {
2043     return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm);
2044 }
2045
2046 static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a)
2047 {
2048     return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm);
2049 }
2050
2051 static bool trans_VNEG_sp(DisasContext *s, arg_VNEG_sp *a)
2052 {
2053     return do_vfp_2op_sp(s, gen_helper_vfp_negs, a->vd, a->vm);
2054 }
2055
2056 static bool trans_VNEG_dp(DisasContext *s, arg_VNEG_dp *a)
2057 {
2058     return do_vfp_2op_dp(s, gen_helper_vfp_negd, a->vd, a->vm);
2059 }
2060
2061 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2062 {
2063     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2064 }
2065
2066 static bool trans_VSQRT_sp(DisasContext *s, arg_VSQRT_sp *a)
2067 {
2068     return do_vfp_2op_sp(s, gen_VSQRT_sp, a->vd, a->vm);
2069 }
2070
2071 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2072 {
2073     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2074 }
2075
2076 static bool trans_VSQRT_dp(DisasContext *s, arg_VSQRT_dp *a)
2077 {
2078     return do_vfp_2op_dp(s, gen_VSQRT_dp, a->vd, a->vm);
2079 }
2080
2081 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2082 {
2083     TCGv_i32 vd, vm;
2084
2085     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2086         return false;
2087     }
2088
2089     /* Vm/M bits must be zero for the Z variant */
2090     if (a->z && a->vm != 0) {
2091         return false;
2092     }
2093
2094     if (!vfp_access_check(s)) {
2095         return true;
2096     }
2097
2098     vd = tcg_temp_new_i32();
2099     vm = tcg_temp_new_i32();
2100
2101     neon_load_reg32(vd, a->vd);
2102     if (a->z) {
2103         tcg_gen_movi_i32(vm, 0);
2104     } else {
2105         neon_load_reg32(vm, a->vm);
2106     }
2107
2108     if (a->e) {
2109         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2110     } else {
2111         gen_helper_vfp_cmps(vd, vm, cpu_env);
2112     }
2113
2114     tcg_temp_free_i32(vd);
2115     tcg_temp_free_i32(vm);
2116
2117     return true;
2118 }
2119
2120 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2121 {
2122     TCGv_i64 vd, vm;
2123
2124     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2125         return false;
2126     }
2127
2128     /* Vm/M bits must be zero for the Z variant */
2129     if (a->z && a->vm != 0) {
2130         return false;
2131     }
2132
2133     /* UNDEF accesses to D16-D31 if they don't exist. */
2134     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2135         return false;
2136     }
2137
2138     if (!vfp_access_check(s)) {
2139         return true;
2140     }
2141
2142     vd = tcg_temp_new_i64();
2143     vm = tcg_temp_new_i64();
2144
2145     neon_load_reg64(vd, a->vd);
2146     if (a->z) {
2147         tcg_gen_movi_i64(vm, 0);
2148     } else {
2149         neon_load_reg64(vm, a->vm);
2150     }
2151
2152     if (a->e) {
2153         gen_helper_vfp_cmped(vd, vm, cpu_env);
2154     } else {
2155         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2156     }
2157
2158     tcg_temp_free_i64(vd);
2159     tcg_temp_free_i64(vm);
2160
2161     return true;
2162 }
2163
2164 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2165 {
2166     TCGv_ptr fpst;
2167     TCGv_i32 ahp_mode;
2168     TCGv_i32 tmp;
2169
2170     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2171         return false;
2172     }
2173
2174     if (!vfp_access_check(s)) {
2175         return true;
2176     }
2177
2178     fpst = get_fpstatus_ptr(false);
2179     ahp_mode = get_ahp_flag();
2180     tmp = tcg_temp_new_i32();
2181     /* The T bit tells us if we want the low or high 16 bits of Vm */
2182     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2183     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2184     neon_store_reg32(tmp, a->vd);
2185     tcg_temp_free_i32(ahp_mode);
2186     tcg_temp_free_ptr(fpst);
2187     tcg_temp_free_i32(tmp);
2188     return true;
2189 }
2190
2191 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2192 {
2193     TCGv_ptr fpst;
2194     TCGv_i32 ahp_mode;
2195     TCGv_i32 tmp;
2196     TCGv_i64 vd;
2197
2198     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2199         return false;
2200     }
2201
2202     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2203         return false;
2204     }
2205
2206     /* UNDEF accesses to D16-D31 if they don't exist. */
2207     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2208         return false;
2209     }
2210
2211     if (!vfp_access_check(s)) {
2212         return true;
2213     }
2214
2215     fpst = get_fpstatus_ptr(false);
2216     ahp_mode = get_ahp_flag();
2217     tmp = tcg_temp_new_i32();
2218     /* The T bit tells us if we want the low or high 16 bits of Vm */
2219     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2220     vd = tcg_temp_new_i64();
2221     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2222     neon_store_reg64(vd, a->vd);
2223     tcg_temp_free_i32(ahp_mode);
2224     tcg_temp_free_ptr(fpst);
2225     tcg_temp_free_i32(tmp);
2226     tcg_temp_free_i64(vd);
2227     return true;
2228 }
2229
2230 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2231 {
2232     TCGv_ptr fpst;
2233     TCGv_i32 ahp_mode;
2234     TCGv_i32 tmp;
2235
2236     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2237         return false;
2238     }
2239
2240     if (!vfp_access_check(s)) {
2241         return true;
2242     }
2243
2244     fpst = get_fpstatus_ptr(false);
2245     ahp_mode = get_ahp_flag();
2246     tmp = tcg_temp_new_i32();
2247
2248     neon_load_reg32(tmp, a->vm);
2249     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2250     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2251     tcg_temp_free_i32(ahp_mode);
2252     tcg_temp_free_ptr(fpst);
2253     tcg_temp_free_i32(tmp);
2254     return true;
2255 }
2256
2257 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2258 {
2259     TCGv_ptr fpst;
2260     TCGv_i32 ahp_mode;
2261     TCGv_i32 tmp;
2262     TCGv_i64 vm;
2263
2264     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2265         return false;
2266     }
2267
2268     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2269         return false;
2270     }
2271
2272     /* UNDEF accesses to D16-D31 if they don't exist. */
2273     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2274         return false;
2275     }
2276
2277     if (!vfp_access_check(s)) {
2278         return true;
2279     }
2280
2281     fpst = get_fpstatus_ptr(false);
2282     ahp_mode = get_ahp_flag();
2283     tmp = tcg_temp_new_i32();
2284     vm = tcg_temp_new_i64();
2285
2286     neon_load_reg64(vm, a->vm);
2287     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2288     tcg_temp_free_i64(vm);
2289     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2290     tcg_temp_free_i32(ahp_mode);
2291     tcg_temp_free_ptr(fpst);
2292     tcg_temp_free_i32(tmp);
2293     return true;
2294 }
2295
2296 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2297 {
2298     TCGv_ptr fpst;
2299     TCGv_i32 tmp;
2300
2301     if (!dc_isar_feature(aa32_vrint, s)) {
2302         return false;
2303     }
2304
2305     if (!vfp_access_check(s)) {
2306         return true;
2307     }
2308
2309     tmp = tcg_temp_new_i32();
2310     neon_load_reg32(tmp, a->vm);
2311     fpst = get_fpstatus_ptr(false);
2312     gen_helper_rints(tmp, tmp, fpst);
2313     neon_store_reg32(tmp, a->vd);
2314     tcg_temp_free_ptr(fpst);
2315     tcg_temp_free_i32(tmp);
2316     return true;
2317 }
2318
2319 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2320 {
2321     TCGv_ptr fpst;
2322     TCGv_i64 tmp;
2323
2324     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2325         return false;
2326     }
2327
2328     if (!dc_isar_feature(aa32_vrint, s)) {
2329         return false;
2330     }
2331
2332     /* UNDEF accesses to D16-D31 if they don't exist. */
2333     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2334         return false;
2335     }
2336
2337     if (!vfp_access_check(s)) {
2338         return true;
2339     }
2340
2341     tmp = tcg_temp_new_i64();
2342     neon_load_reg64(tmp, a->vm);
2343     fpst = get_fpstatus_ptr(false);
2344     gen_helper_rintd(tmp, tmp, fpst);
2345     neon_store_reg64(tmp, a->vd);
2346     tcg_temp_free_ptr(fpst);
2347     tcg_temp_free_i64(tmp);
2348     return true;
2349 }
2350
2351 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2352 {
2353     TCGv_ptr fpst;
2354     TCGv_i32 tmp;
2355     TCGv_i32 tcg_rmode;
2356
2357     if (!dc_isar_feature(aa32_vrint, s)) {
2358         return false;
2359     }
2360
2361     if (!vfp_access_check(s)) {
2362         return true;
2363     }
2364
2365     tmp = tcg_temp_new_i32();
2366     neon_load_reg32(tmp, a->vm);
2367     fpst = get_fpstatus_ptr(false);
2368     tcg_rmode = tcg_const_i32(float_round_to_zero);
2369     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2370     gen_helper_rints(tmp, tmp, fpst);
2371     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2372     neon_store_reg32(tmp, a->vd);
2373     tcg_temp_free_ptr(fpst);
2374     tcg_temp_free_i32(tcg_rmode);
2375     tcg_temp_free_i32(tmp);
2376     return true;
2377 }
2378
2379 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2380 {
2381     TCGv_ptr fpst;
2382     TCGv_i64 tmp;
2383     TCGv_i32 tcg_rmode;
2384
2385     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2386         return false;
2387     }
2388
2389     if (!dc_isar_feature(aa32_vrint, s)) {
2390         return false;
2391     }
2392
2393     /* UNDEF accesses to D16-D31 if they don't exist. */
2394     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2395         return false;
2396     }
2397
2398     if (!vfp_access_check(s)) {
2399         return true;
2400     }
2401
2402     tmp = tcg_temp_new_i64();
2403     neon_load_reg64(tmp, a->vm);
2404     fpst = get_fpstatus_ptr(false);
2405     tcg_rmode = tcg_const_i32(float_round_to_zero);
2406     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2407     gen_helper_rintd(tmp, tmp, fpst);
2408     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2409     neon_store_reg64(tmp, a->vd);
2410     tcg_temp_free_ptr(fpst);
2411     tcg_temp_free_i64(tmp);
2412     tcg_temp_free_i32(tcg_rmode);
2413     return true;
2414 }
2415
2416 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
2417 {
2418     TCGv_ptr fpst;
2419     TCGv_i32 tmp;
2420
2421     if (!dc_isar_feature(aa32_vrint, s)) {
2422         return false;
2423     }
2424
2425     if (!vfp_access_check(s)) {
2426         return true;
2427     }
2428
2429     tmp = tcg_temp_new_i32();
2430     neon_load_reg32(tmp, a->vm);
2431     fpst = get_fpstatus_ptr(false);
2432     gen_helper_rints_exact(tmp, tmp, fpst);
2433     neon_store_reg32(tmp, a->vd);
2434     tcg_temp_free_ptr(fpst);
2435     tcg_temp_free_i32(tmp);
2436     return true;
2437 }
2438
2439 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
2440 {
2441     TCGv_ptr fpst;
2442     TCGv_i64 tmp;
2443
2444     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2445         return false;
2446     }
2447
2448     if (!dc_isar_feature(aa32_vrint, s)) {
2449         return false;
2450     }
2451
2452     /* UNDEF accesses to D16-D31 if they don't exist. */
2453     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2454         return false;
2455     }
2456
2457     if (!vfp_access_check(s)) {
2458         return true;
2459     }
2460
2461     tmp = tcg_temp_new_i64();
2462     neon_load_reg64(tmp, a->vm);
2463     fpst = get_fpstatus_ptr(false);
2464     gen_helper_rintd_exact(tmp, tmp, fpst);
2465     neon_store_reg64(tmp, a->vd);
2466     tcg_temp_free_ptr(fpst);
2467     tcg_temp_free_i64(tmp);
2468     return true;
2469 }
2470
2471 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
2472 {
2473     TCGv_i64 vd;
2474     TCGv_i32 vm;
2475
2476     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2477         return false;
2478     }
2479
2480     /* UNDEF accesses to D16-D31 if they don't exist. */
2481     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2482         return false;
2483     }
2484
2485     if (!vfp_access_check(s)) {
2486         return true;
2487     }
2488
2489     vm = tcg_temp_new_i32();
2490     vd = tcg_temp_new_i64();
2491     neon_load_reg32(vm, a->vm);
2492     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
2493     neon_store_reg64(vd, a->vd);
2494     tcg_temp_free_i32(vm);
2495     tcg_temp_free_i64(vd);
2496     return true;
2497 }
2498
2499 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
2500 {
2501     TCGv_i64 vm;
2502     TCGv_i32 vd;
2503
2504     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2505         return false;
2506     }
2507
2508     /* UNDEF accesses to D16-D31 if they don't exist. */
2509     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2510         return false;
2511     }
2512
2513     if (!vfp_access_check(s)) {
2514         return true;
2515     }
2516
2517     vd = tcg_temp_new_i32();
2518     vm = tcg_temp_new_i64();
2519     neon_load_reg64(vm, a->vm);
2520     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
2521     neon_store_reg32(vd, a->vd);
2522     tcg_temp_free_i32(vd);
2523     tcg_temp_free_i64(vm);
2524     return true;
2525 }
2526
2527 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
2528 {
2529     TCGv_i32 vm;
2530     TCGv_ptr fpst;
2531
2532     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2533         return false;
2534     }
2535
2536     if (!vfp_access_check(s)) {
2537         return true;
2538     }
2539
2540     vm = tcg_temp_new_i32();
2541     neon_load_reg32(vm, a->vm);
2542     fpst = get_fpstatus_ptr(false);
2543     if (a->s) {
2544         /* i32 -> f32 */
2545         gen_helper_vfp_sitos(vm, vm, fpst);
2546     } else {
2547         /* u32 -> f32 */
2548         gen_helper_vfp_uitos(vm, vm, fpst);
2549     }
2550     neon_store_reg32(vm, a->vd);
2551     tcg_temp_free_i32(vm);
2552     tcg_temp_free_ptr(fpst);
2553     return true;
2554 }
2555
2556 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
2557 {
2558     TCGv_i32 vm;
2559     TCGv_i64 vd;
2560     TCGv_ptr fpst;
2561
2562     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2563         return false;
2564     }
2565
2566     /* UNDEF accesses to D16-D31 if they don't exist. */
2567     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2568         return false;
2569     }
2570
2571     if (!vfp_access_check(s)) {
2572         return true;
2573     }
2574
2575     vm = tcg_temp_new_i32();
2576     vd = tcg_temp_new_i64();
2577     neon_load_reg32(vm, a->vm);
2578     fpst = get_fpstatus_ptr(false);
2579     if (a->s) {
2580         /* i32 -> f64 */
2581         gen_helper_vfp_sitod(vd, vm, fpst);
2582     } else {
2583         /* u32 -> f64 */
2584         gen_helper_vfp_uitod(vd, vm, fpst);
2585     }
2586     neon_store_reg64(vd, a->vd);
2587     tcg_temp_free_i32(vm);
2588     tcg_temp_free_i64(vd);
2589     tcg_temp_free_ptr(fpst);
2590     return true;
2591 }
2592
2593 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
2594 {
2595     TCGv_i32 vd;
2596     TCGv_i64 vm;
2597
2598     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2599         return false;
2600     }
2601
2602     if (!dc_isar_feature(aa32_jscvt, s)) {
2603         return false;
2604     }
2605
2606     /* UNDEF accesses to D16-D31 if they don't exist. */
2607     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2608         return false;
2609     }
2610
2611     if (!vfp_access_check(s)) {
2612         return true;
2613     }
2614
2615     vm = tcg_temp_new_i64();
2616     vd = tcg_temp_new_i32();
2617     neon_load_reg64(vm, a->vm);
2618     gen_helper_vjcvt(vd, vm, cpu_env);
2619     neon_store_reg32(vd, a->vd);
2620     tcg_temp_free_i64(vm);
2621     tcg_temp_free_i32(vd);
2622     return true;
2623 }
2624
2625 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
2626 {
2627     TCGv_i32 vd, shift;
2628     TCGv_ptr fpst;
2629     int frac_bits;
2630
2631     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2632         return false;
2633     }
2634
2635     if (!vfp_access_check(s)) {
2636         return true;
2637     }
2638
2639     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
2640
2641     vd = tcg_temp_new_i32();
2642     neon_load_reg32(vd, a->vd);
2643
2644     fpst = get_fpstatus_ptr(false);
2645     shift = tcg_const_i32(frac_bits);
2646
2647     /* Switch on op:U:sx bits */
2648     switch (a->opc) {
2649     case 0:
2650         gen_helper_vfp_shtos(vd, vd, shift, fpst);
2651         break;
2652     case 1:
2653         gen_helper_vfp_sltos(vd, vd, shift, fpst);
2654         break;
2655     case 2:
2656         gen_helper_vfp_uhtos(vd, vd, shift, fpst);
2657         break;
2658     case 3:
2659         gen_helper_vfp_ultos(vd, vd, shift, fpst);
2660         break;
2661     case 4:
2662         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
2663         break;
2664     case 5:
2665         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
2666         break;
2667     case 6:
2668         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
2669         break;
2670     case 7:
2671         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
2672         break;
2673     default:
2674         g_assert_not_reached();
2675     }
2676
2677     neon_store_reg32(vd, a->vd);
2678     tcg_temp_free_i32(vd);
2679     tcg_temp_free_i32(shift);
2680     tcg_temp_free_ptr(fpst);
2681     return true;
2682 }
2683
2684 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
2685 {
2686     TCGv_i64 vd;
2687     TCGv_i32 shift;
2688     TCGv_ptr fpst;
2689     int frac_bits;
2690
2691     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2692         return false;
2693     }
2694
2695     /* UNDEF accesses to D16-D31 if they don't exist. */
2696     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2697         return false;
2698     }
2699
2700     if (!vfp_access_check(s)) {
2701         return true;
2702     }
2703
2704     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
2705
2706     vd = tcg_temp_new_i64();
2707     neon_load_reg64(vd, a->vd);
2708
2709     fpst = get_fpstatus_ptr(false);
2710     shift = tcg_const_i32(frac_bits);
2711
2712     /* Switch on op:U:sx bits */
2713     switch (a->opc) {
2714     case 0:
2715         gen_helper_vfp_shtod(vd, vd, shift, fpst);
2716         break;
2717     case 1:
2718         gen_helper_vfp_sltod(vd, vd, shift, fpst);
2719         break;
2720     case 2:
2721         gen_helper_vfp_uhtod(vd, vd, shift, fpst);
2722         break;
2723     case 3:
2724         gen_helper_vfp_ultod(vd, vd, shift, fpst);
2725         break;
2726     case 4:
2727         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
2728         break;
2729     case 5:
2730         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
2731         break;
2732     case 6:
2733         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
2734         break;
2735     case 7:
2736         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
2737         break;
2738     default:
2739         g_assert_not_reached();
2740     }
2741
2742     neon_store_reg64(vd, a->vd);
2743     tcg_temp_free_i64(vd);
2744     tcg_temp_free_i32(shift);
2745     tcg_temp_free_ptr(fpst);
2746     return true;
2747 }
2748
2749 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
2750 {
2751     TCGv_i32 vm;
2752     TCGv_ptr fpst;
2753
2754     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2755         return false;
2756     }
2757
2758     if (!vfp_access_check(s)) {
2759         return true;
2760     }
2761
2762     fpst = get_fpstatus_ptr(false);
2763     vm = tcg_temp_new_i32();
2764     neon_load_reg32(vm, a->vm);
2765
2766     if (a->s) {
2767         if (a->rz) {
2768             gen_helper_vfp_tosizs(vm, vm, fpst);
2769         } else {
2770             gen_helper_vfp_tosis(vm, vm, fpst);
2771         }
2772     } else {
2773         if (a->rz) {
2774             gen_helper_vfp_touizs(vm, vm, fpst);
2775         } else {
2776             gen_helper_vfp_touis(vm, vm, fpst);
2777         }
2778     }
2779     neon_store_reg32(vm, a->vd);
2780     tcg_temp_free_i32(vm);
2781     tcg_temp_free_ptr(fpst);
2782     return true;
2783 }
2784
2785 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
2786 {
2787     TCGv_i32 vd;
2788     TCGv_i64 vm;
2789     TCGv_ptr fpst;
2790
2791     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2792         return false;
2793     }
2794
2795     /* UNDEF accesses to D16-D31 if they don't exist. */
2796     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2797         return false;
2798     }
2799
2800     if (!vfp_access_check(s)) {
2801         return true;
2802     }
2803
2804     fpst = get_fpstatus_ptr(false);
2805     vm = tcg_temp_new_i64();
2806     vd = tcg_temp_new_i32();
2807     neon_load_reg64(vm, a->vm);
2808
2809     if (a->s) {
2810         if (a->rz) {
2811             gen_helper_vfp_tosizd(vd, vm, fpst);
2812         } else {
2813             gen_helper_vfp_tosid(vd, vm, fpst);
2814         }
2815     } else {
2816         if (a->rz) {
2817             gen_helper_vfp_touizd(vd, vm, fpst);
2818         } else {
2819             gen_helper_vfp_touid(vd, vm, fpst);
2820         }
2821     }
2822     neon_store_reg32(vd, a->vd);
2823     tcg_temp_free_i32(vd);
2824     tcg_temp_free_i64(vm);
2825     tcg_temp_free_ptr(fpst);
2826     return true;
2827 }
2828
2829 /*
2830  * Decode VLLDM and VLSTM are nonstandard because:
2831  *  * if there is no FPU then these insns must NOP in
2832  *    Secure state and UNDEF in Nonsecure state
2833  *  * if there is an FPU then these insns do not have
2834  *    the usual behaviour that vfp_access_check() provides of
2835  *    being controlled by CPACR/NSACR enable bits or the
2836  *    lazy-stacking logic.
2837  */
2838 static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
2839 {
2840     TCGv_i32 fptr;
2841
2842     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
2843         !arm_dc_feature(s, ARM_FEATURE_V8)) {
2844         return false;
2845     }
2846     /* If not secure, UNDEF. */
2847     if (!s->v8m_secure) {
2848         return false;
2849     }
2850     /* If no fpu, NOP. */
2851     if (!dc_isar_feature(aa32_vfp, s)) {
2852         return true;
2853     }
2854
2855     fptr = load_reg(s, a->rn);
2856     if (a->l) {
2857         gen_helper_v7m_vlldm(cpu_env, fptr);
2858     } else {
2859         gen_helper_v7m_vlstm(cpu_env, fptr);
2860     }
2861     tcg_temp_free_i32(fptr);
2862
2863     /* End the TB, because we have updated FP control bits */
2864     s->base.is_jmp = DISAS_UPDATE;
2865     return true;
2866 }