target/arm/translate-vfp.inc.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 /* Include the generated VFP decoder */
  30 #include "decode-vfp.inc.c"
  31 #include "decode-vfp-uncond.inc.c"
  32
  33 /*
  34  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  35  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  36  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  37  */
  38 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  39 {
  40     uint64_t imm;
  41
  42     switch (size) {
  43     case MO_64:
  44         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  45             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  46             extract32(imm8, 0, 6);
  47         imm <<= 48;
  48         break;
  49     case MO_32:
  50         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  51             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  52             (extract32(imm8, 0, 6) << 3);
  53         imm <<= 16;
  54         break;
  55     case MO_16:
  56         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  57             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  58             (extract32(imm8, 0, 6) << 6);
  59         break;
  60     default:
  61         g_assert_not_reached();
  62     }
  63     return imm;
  64 }
  65
  66 /*
  67  * Return the offset of a 16-bit half of the specified VFP single-precision
  68  * register. If top is true, returns the top 16 bits; otherwise the bottom
  69  * 16 bits.
  70  */
  71 static inline long vfp_f16_offset(unsigned reg, bool top)
  72 {
  73     long offs = vfp_reg_offset(false, reg);
  74 #ifdef HOST_WORDS_BIGENDIAN
  75     if (!top) {
  76         offs += 2;
  77     }
  78 #else
  79     if (top) {
  80         offs += 2;
  81     }
  82 #endif
  83     return offs;
  84 }
  85
  86 /*
  87  * Check that VFP access is enabled. If it is, do the necessary
  88  * M-profile lazy-FP handling and then return true.
  89  * If not, emit code to generate an appropriate exception and
  90  * return false.
  91  * The ignore_vfp_enabled argument specifies that we should ignore
  92  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
  93  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
  94  */
  95 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
  96 {
  97     if (s->fp_excp_el) {
  98         if (arm_dc_feature(s, ARM_FEATURE_M)) {
  99             gen_exception_insn(s, s->pc_curr, EXCP_NOCP, syn_uncategorized(),
 100                                s->fp_excp_el);
 101         } else {
 102             gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 103                                syn_fp_access_trap(1, 0xe, false),
 104                                s->fp_excp_el);
 105         }
 106         return false;
 107     }
 108
 109     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 110         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 111         unallocated_encoding(s);
 112         return false;
 113     }
 114
 115     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 116         /* Handle M-profile lazy FP state mechanics */
 117
 118         /* Trigger lazy-state preservation if necessary */
 119         if (s->v7m_lspact) {
 120             /*
 121              * Lazy state saving affects external memory and also the NVIC,
 122              * so we must mark it as an IO operation for icount.
 123              */
 124             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 125                 gen_io_start();
 126             }
 127             gen_helper_v7m_preserve_fp_state(cpu_env);
 128             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 129                 gen_io_end();
 130             }
 131             /*
 132              * If the preserve_fp_state helper doesn't throw an exception
 133              * then it will clear LSPACT; we don't need to repeat this for
 134              * any further FP insns in this TB.
 135              */
 136             s->v7m_lspact = false;
 137         }
 138
 139         /* Update ownership of FP context: set FPCCR.S to match current state */
 140         if (s->v8m_fpccr_s_wrong) {
 141             TCGv_i32 tmp;
 142
 143             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 144             if (s->v8m_secure) {
 145                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 146             } else {
 147                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 148             }
 149             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 150             /* Don't need to do this for any further FP insns in this TB */
 151             s->v8m_fpccr_s_wrong = false;
 152         }
 153
 154         if (s->v7m_new_fp_ctxt_needed) {
 155             /*
 156              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 157              * and the FPSCR.
 158              */
 159             TCGv_i32 control, fpscr;
 160             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 161
 162             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 163             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 164             tcg_temp_free_i32(fpscr);
 165             /*
 166              * We don't need to arrange to end the TB, because the only
 167              * parts of FPSCR which we cache in the TB flags are the VECLEN
 168              * and VECSTRIDE, and those don't exist for M-profile.
 169              */
 170
 171             if (s->v8m_secure) {
 172                 bits |= R_V7M_CONTROL_SFPA_MASK;
 173             }
 174             control = load_cpu_field(v7m.control[M_REG_S]);
 175             tcg_gen_ori_i32(control, control, bits);
 176             store_cpu_field(control, v7m.control[M_REG_S]);
 177             /* Don't need to do this for any further FP insns in this TB */
 178             s->v7m_new_fp_ctxt_needed = false;
 179         }
 180     }
 181
 182     return true;
 183 }
 184
 185 /*
 186  * The most usual kind of VFP access check, for everything except
 187  * FMXR/FMRX to the always-available special registers.
 188  */
 189 static bool vfp_access_check(DisasContext *s)
 190 {
 191     return full_vfp_access_check(s, false);
 192 }
 193
 194 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 195 {
 196     uint32_t rd, rn, rm;
 197     bool dp = a->dp;
 198
 199     if (!dc_isar_feature(aa32_vsel, s)) {
 200         return false;
 201     }
 202
 203     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 204         return false;
 205     }
 206
 207     /* UNDEF accesses to D16-D31 if they don't exist */
 208     if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
 209         ((a->vm | a->vn | a->vd) & 0x10)) {
 210         return false;
 211     }
 212
 213     rd = a->vd;
 214     rn = a->vn;
 215     rm = a->vm;
 216
 217     if (!vfp_access_check(s)) {
 218         return true;
 219     }
 220
 221     if (dp) {
 222         TCGv_i64 frn, frm, dest;
 223         TCGv_i64 tmp, zero, zf, nf, vf;
 224
 225         zero = tcg_const_i64(0);
 226
 227         frn = tcg_temp_new_i64();
 228         frm = tcg_temp_new_i64();
 229         dest = tcg_temp_new_i64();
 230
 231         zf = tcg_temp_new_i64();
 232         nf = tcg_temp_new_i64();
 233         vf = tcg_temp_new_i64();
 234
 235         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 236         tcg_gen_ext_i32_i64(nf, cpu_NF);
 237         tcg_gen_ext_i32_i64(vf, cpu_VF);
 238
 239         neon_load_reg64(frn, rn);
 240         neon_load_reg64(frm, rm);
 241         switch (a->cc) {
 242         case 0: /* eq: Z */
 243             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 244                                 frn, frm);
 245             break;
 246         case 1: /* vs: V */
 247             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 248                                 frn, frm);
 249             break;
 250         case 2: /* ge: N == V -> N ^ V == 0 */
 251             tmp = tcg_temp_new_i64();
 252             tcg_gen_xor_i64(tmp, vf, nf);
 253             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 254                                 frn, frm);
 255             tcg_temp_free_i64(tmp);
 256             break;
 257         case 3: /* gt: !Z && N == V */
 258             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 259                                 frn, frm);
 260             tmp = tcg_temp_new_i64();
 261             tcg_gen_xor_i64(tmp, vf, nf);
 262             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 263                                 dest, frm);
 264             tcg_temp_free_i64(tmp);
 265             break;
 266         }
 267         neon_store_reg64(dest, rd);
 268         tcg_temp_free_i64(frn);
 269         tcg_temp_free_i64(frm);
 270         tcg_temp_free_i64(dest);
 271
 272         tcg_temp_free_i64(zf);
 273         tcg_temp_free_i64(nf);
 274         tcg_temp_free_i64(vf);
 275
 276         tcg_temp_free_i64(zero);
 277     } else {
 278         TCGv_i32 frn, frm, dest;
 279         TCGv_i32 tmp, zero;
 280
 281         zero = tcg_const_i32(0);
 282
 283         frn = tcg_temp_new_i32();
 284         frm = tcg_temp_new_i32();
 285         dest = tcg_temp_new_i32();
 286         neon_load_reg32(frn, rn);
 287         neon_load_reg32(frm, rm);
 288         switch (a->cc) {
 289         case 0: /* eq: Z */
 290             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 291                                 frn, frm);
 292             break;
 293         case 1: /* vs: V */
 294             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 295                                 frn, frm);
 296             break;
 297         case 2: /* ge: N == V -> N ^ V == 0 */
 298             tmp = tcg_temp_new_i32();
 299             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 300             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 301                                 frn, frm);
 302             tcg_temp_free_i32(tmp);
 303             break;
 304         case 3: /* gt: !Z && N == V */
 305             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 306                                 frn, frm);
 307             tmp = tcg_temp_new_i32();
 308             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 309             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 310                                 dest, frm);
 311             tcg_temp_free_i32(tmp);
 312             break;
 313         }
 314         neon_store_reg32(dest, rd);
 315         tcg_temp_free_i32(frn);
 316         tcg_temp_free_i32(frm);
 317         tcg_temp_free_i32(dest);
 318
 319         tcg_temp_free_i32(zero);
 320     }
 321
 322     return true;
 323 }
 324
 325 /*
 326  * Table for converting the most common AArch32 encoding of
 327  * rounding mode to arm_fprounding order (which matches the
 328  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 329  */
 330 static const uint8_t fp_decode_rm[] = {
 331     FPROUNDING_TIEAWAY,
 332     FPROUNDING_TIEEVEN,
 333     FPROUNDING_POSINF,
 334     FPROUNDING_NEGINF,
 335 };
 336
 337 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 338 {
 339     uint32_t rd, rm;
 340     bool dp = a->dp;
 341     TCGv_ptr fpst;
 342     TCGv_i32 tcg_rmode;
 343     int rounding = fp_decode_rm[a->rm];
 344
 345     if (!dc_isar_feature(aa32_vrint, s)) {
 346         return false;
 347     }
 348
 349     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 350         return false;
 351     }
 352
 353     /* UNDEF accesses to D16-D31 if they don't exist */
 354     if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
 355         ((a->vm | a->vd) & 0x10)) {
 356         return false;
 357     }
 358
 359     rd = a->vd;
 360     rm = a->vm;
 361
 362     if (!vfp_access_check(s)) {
 363         return true;
 364     }
 365
 366     fpst = get_fpstatus_ptr(0);
 367
 368     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 369     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 370
 371     if (dp) {
 372         TCGv_i64 tcg_op;
 373         TCGv_i64 tcg_res;
 374         tcg_op = tcg_temp_new_i64();
 375         tcg_res = tcg_temp_new_i64();
 376         neon_load_reg64(tcg_op, rm);
 377         gen_helper_rintd(tcg_res, tcg_op, fpst);
 378         neon_store_reg64(tcg_res, rd);
 379         tcg_temp_free_i64(tcg_op);
 380         tcg_temp_free_i64(tcg_res);
 381     } else {
 382         TCGv_i32 tcg_op;
 383         TCGv_i32 tcg_res;
 384         tcg_op = tcg_temp_new_i32();
 385         tcg_res = tcg_temp_new_i32();
 386         neon_load_reg32(tcg_op, rm);
 387         gen_helper_rints(tcg_res, tcg_op, fpst);
 388         neon_store_reg32(tcg_res, rd);
 389         tcg_temp_free_i32(tcg_op);
 390         tcg_temp_free_i32(tcg_res);
 391     }
 392
 393     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 394     tcg_temp_free_i32(tcg_rmode);
 395
 396     tcg_temp_free_ptr(fpst);
 397     return true;
 398 }
 399
 400 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 401 {
 402     uint32_t rd, rm;
 403     bool dp = a->dp;
 404     TCGv_ptr fpst;
 405     TCGv_i32 tcg_rmode, tcg_shift;
 406     int rounding = fp_decode_rm[a->rm];
 407     bool is_signed = a->op;
 408
 409     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 410         return false;
 411     }
 412
 413     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 414         return false;
 415     }
 416
 417     /* UNDEF accesses to D16-D31 if they don't exist */
 418     if (dp && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 419         return false;
 420     }
 421
 422     rd = a->vd;
 423     rm = a->vm;
 424
 425     if (!vfp_access_check(s)) {
 426         return true;
 427     }
 428
 429     fpst = get_fpstatus_ptr(0);
 430
 431     tcg_shift = tcg_const_i32(0);
 432
 433     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 434     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 435
 436     if (dp) {
 437         TCGv_i64 tcg_double, tcg_res;
 438         TCGv_i32 tcg_tmp;
 439         tcg_double = tcg_temp_new_i64();
 440         tcg_res = tcg_temp_new_i64();
 441         tcg_tmp = tcg_temp_new_i32();
 442         neon_load_reg64(tcg_double, rm);
 443         if (is_signed) {
 444             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 445         } else {
 446             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 447         }
 448         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 449         neon_store_reg32(tcg_tmp, rd);
 450         tcg_temp_free_i32(tcg_tmp);
 451         tcg_temp_free_i64(tcg_res);
 452         tcg_temp_free_i64(tcg_double);
 453     } else {
 454         TCGv_i32 tcg_single, tcg_res;
 455         tcg_single = tcg_temp_new_i32();
 456         tcg_res = tcg_temp_new_i32();
 457         neon_load_reg32(tcg_single, rm);
 458         if (is_signed) {
 459             gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 460         } else {
 461             gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 462         }
 463         neon_store_reg32(tcg_res, rd);
 464         tcg_temp_free_i32(tcg_res);
 465         tcg_temp_free_i32(tcg_single);
 466     }
 467
 468     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 469     tcg_temp_free_i32(tcg_rmode);
 470
 471     tcg_temp_free_i32(tcg_shift);
 472
 473     tcg_temp_free_ptr(fpst);
 474
 475     return true;
 476 }
 477
 478 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 479 {
 480     /* VMOV scalar to general purpose register */
 481     TCGv_i32 tmp;
 482     int pass;
 483     uint32_t offset;
 484
 485     /* SIZE == 2 is a VFP instruction; otherwise NEON.  */
 486     if (a->size == 2
 487         ? !dc_isar_feature(aa32_fpsp_v2, s)
 488         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 489         return false;
 490     }
 491
 492     /* UNDEF accesses to D16-D31 if they don't exist */
 493     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 494         return false;
 495     }
 496
 497     offset = a->index << a->size;
 498     pass = extract32(offset, 2, 1);
 499     offset = extract32(offset, 0, 2) * 8;
 500
 501     if (!vfp_access_check(s)) {
 502         return true;
 503     }
 504
 505     tmp = neon_load_reg(a->vn, pass);
 506     switch (a->size) {
 507     case 0:
 508         if (offset) {
 509             tcg_gen_shri_i32(tmp, tmp, offset);
 510         }
 511         if (a->u) {
 512             gen_uxtb(tmp);
 513         } else {
 514             gen_sxtb(tmp);
 515         }
 516         break;
 517     case 1:
 518         if (a->u) {
 519             if (offset) {
 520                 tcg_gen_shri_i32(tmp, tmp, 16);
 521             } else {
 522                 gen_uxth(tmp);
 523             }
 524         } else {
 525             if (offset) {
 526                 tcg_gen_sari_i32(tmp, tmp, 16);
 527             } else {
 528                 gen_sxth(tmp);
 529             }
 530         }
 531         break;
 532     case 2:
 533         break;
 534     }
 535     store_reg(s, a->rt, tmp);
 536
 537     return true;
 538 }
 539
 540 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 541 {
 542     /* VMOV general purpose register to scalar */
 543     TCGv_i32 tmp, tmp2;
 544     int pass;
 545     uint32_t offset;
 546
 547     /* SIZE == 2 is a VFP instruction; otherwise NEON.  */
 548     if (a->size == 2
 549         ? !dc_isar_feature(aa32_fpsp_v2, s)
 550         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 551         return false;
 552     }
 553
 554     /* UNDEF accesses to D16-D31 if they don't exist */
 555     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 556         return false;
 557     }
 558
 559     offset = a->index << a->size;
 560     pass = extract32(offset, 2, 1);
 561     offset = extract32(offset, 0, 2) * 8;
 562
 563     if (!vfp_access_check(s)) {
 564         return true;
 565     }
 566
 567     tmp = load_reg(s, a->rt);
 568     switch (a->size) {
 569     case 0:
 570         tmp2 = neon_load_reg(a->vn, pass);
 571         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
 572         tcg_temp_free_i32(tmp2);
 573         break;
 574     case 1:
 575         tmp2 = neon_load_reg(a->vn, pass);
 576         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
 577         tcg_temp_free_i32(tmp2);
 578         break;
 579     case 2:
 580         break;
 581     }
 582     neon_store_reg(a->vn, pass, tmp);
 583
 584     return true;
 585 }
 586
 587 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 588 {
 589     /* VDUP (general purpose register) */
 590     TCGv_i32 tmp;
 591     int size, vec_size;
 592
 593     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 594         return false;
 595     }
 596
 597     /* UNDEF accesses to D16-D31 if they don't exist */
 598     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 599         return false;
 600     }
 601
 602     if (a->b && a->e) {
 603         return false;
 604     }
 605
 606     if (a->q && (a->vn & 1)) {
 607         return false;
 608     }
 609
 610     vec_size = a->q ? 16 : 8;
 611     if (a->b) {
 612         size = 0;
 613     } else if (a->e) {
 614         size = 1;
 615     } else {
 616         size = 2;
 617     }
 618
 619     if (!vfp_access_check(s)) {
 620         return true;
 621     }
 622
 623     tmp = load_reg(s, a->rt);
 624     tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
 625                          vec_size, vec_size, tmp);
 626     tcg_temp_free_i32(tmp);
 627
 628     return true;
 629 }
 630
 631 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 632 {
 633     TCGv_i32 tmp;
 634     bool ignore_vfp_enabled = false;
 635
 636     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 637         return false;
 638     }
 639
 640     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 641         /*
 642          * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
 643          * Accesses to R15 are UNPREDICTABLE; we choose to undef.
 644          * (FPSCR -> r15 is a special case which writes to the PSR flags.)
 645          */
 646         if (a->rt == 15 && (!a->l || a->reg != ARM_VFP_FPSCR)) {
 647             return false;
 648         }
 649     }
 650
 651     switch (a->reg) {
 652     case ARM_VFP_FPSID:
 653         /*
 654          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 655          * all ID registers to privileged access only.
 656          */
 657         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 658             return false;
 659         }
 660         ignore_vfp_enabled = true;
 661         break;
 662     case ARM_VFP_MVFR0:
 663     case ARM_VFP_MVFR1:
 664         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 665             return false;
 666         }
 667         ignore_vfp_enabled = true;
 668         break;
 669     case ARM_VFP_MVFR2:
 670         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 671             return false;
 672         }
 673         ignore_vfp_enabled = true;
 674         break;
 675     case ARM_VFP_FPSCR:
 676         break;
 677     case ARM_VFP_FPEXC:
 678         if (IS_USER(s)) {
 679             return false;
 680         }
 681         ignore_vfp_enabled = true;
 682         break;
 683     case ARM_VFP_FPINST:
 684     case ARM_VFP_FPINST2:
 685         /* Not present in VFPv3 */
 686         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
 687             return false;
 688         }
 689         break;
 690     default:
 691         return false;
 692     }
 693
 694     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
 695         return true;
 696     }
 697
 698     if (a->l) {
 699         /* VMRS, move VFP special register to gp register */
 700         switch (a->reg) {
 701         case ARM_VFP_MVFR0:
 702         case ARM_VFP_MVFR1:
 703         case ARM_VFP_MVFR2:
 704         case ARM_VFP_FPSID:
 705             if (s->current_el == 1) {
 706                 TCGv_i32 tcg_reg, tcg_rt;
 707
 708                 gen_set_condexec(s);
 709                 gen_set_pc_im(s, s->pc_curr);
 710                 tcg_reg = tcg_const_i32(a->reg);
 711                 tcg_rt = tcg_const_i32(a->rt);
 712                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
 713                 tcg_temp_free_i32(tcg_reg);
 714                 tcg_temp_free_i32(tcg_rt);
 715             }
 716             /* fall through */
 717         case ARM_VFP_FPEXC:
 718         case ARM_VFP_FPINST:
 719         case ARM_VFP_FPINST2:
 720             tmp = load_cpu_field(vfp.xregs[a->reg]);
 721             break;
 722         case ARM_VFP_FPSCR:
 723             if (a->rt == 15) {
 724                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 725                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
 726             } else {
 727                 tmp = tcg_temp_new_i32();
 728                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
 729             }
 730             break;
 731         default:
 732             g_assert_not_reached();
 733         }
 734
 735         if (a->rt == 15) {
 736             /* Set the 4 flag bits in the CPSR.  */
 737             gen_set_nzcv(tmp);
 738             tcg_temp_free_i32(tmp);
 739         } else {
 740             store_reg(s, a->rt, tmp);
 741         }
 742     } else {
 743         /* VMSR, move gp register to VFP special register */
 744         switch (a->reg) {
 745         case ARM_VFP_FPSID:
 746         case ARM_VFP_MVFR0:
 747         case ARM_VFP_MVFR1:
 748         case ARM_VFP_MVFR2:
 749             /* Writes are ignored.  */
 750             break;
 751         case ARM_VFP_FPSCR:
 752             tmp = load_reg(s, a->rt);
 753             gen_helper_vfp_set_fpscr(cpu_env, tmp);
 754             tcg_temp_free_i32(tmp);
 755             gen_lookup_tb(s);
 756             break;
 757         case ARM_VFP_FPEXC:
 758             /*
 759              * TODO: VFP subarchitecture support.
 760              * For now, keep the EN bit only
 761              */
 762             tmp = load_reg(s, a->rt);
 763             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 764             store_cpu_field(tmp, vfp.xregs[a->reg]);
 765             gen_lookup_tb(s);
 766             break;
 767         case ARM_VFP_FPINST:
 768         case ARM_VFP_FPINST2:
 769             tmp = load_reg(s, a->rt);
 770             store_cpu_field(tmp, vfp.xregs[a->reg]);
 771             break;
 772         default:
 773             g_assert_not_reached();
 774         }
 775     }
 776
 777     return true;
 778 }
 779
 780 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
 781 {
 782     TCGv_i32 tmp;
 783
 784     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 785         return false;
 786     }
 787
 788     if (!vfp_access_check(s)) {
 789         return true;
 790     }
 791
 792     if (a->l) {
 793         /* VFP to general purpose register */
 794         tmp = tcg_temp_new_i32();
 795         neon_load_reg32(tmp, a->vn);
 796         if (a->rt == 15) {
 797             /* Set the 4 flag bits in the CPSR.  */
 798             gen_set_nzcv(tmp);
 799             tcg_temp_free_i32(tmp);
 800         } else {
 801             store_reg(s, a->rt, tmp);
 802         }
 803     } else {
 804         /* general purpose register to VFP */
 805         tmp = load_reg(s, a->rt);
 806         neon_store_reg32(tmp, a->vn);
 807         tcg_temp_free_i32(tmp);
 808     }
 809
 810     return true;
 811 }
 812
 813 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
 814 {
 815     TCGv_i32 tmp;
 816
 817     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 818         return false;
 819     }
 820
 821     /*
 822      * VMOV between two general-purpose registers and two single precision
 823      * floating point registers
 824      */
 825     if (!vfp_access_check(s)) {
 826         return true;
 827     }
 828
 829     if (a->op) {
 830         /* fpreg to gpreg */
 831         tmp = tcg_temp_new_i32();
 832         neon_load_reg32(tmp, a->vm);
 833         store_reg(s, a->rt, tmp);
 834         tmp = tcg_temp_new_i32();
 835         neon_load_reg32(tmp, a->vm + 1);
 836         store_reg(s, a->rt2, tmp);
 837     } else {
 838         /* gpreg to fpreg */
 839         tmp = load_reg(s, a->rt);
 840         neon_store_reg32(tmp, a->vm);
 841         tcg_temp_free_i32(tmp);
 842         tmp = load_reg(s, a->rt2);
 843         neon_store_reg32(tmp, a->vm + 1);
 844         tcg_temp_free_i32(tmp);
 845     }
 846
 847     return true;
 848 }
 849
 850 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
 851 {
 852     TCGv_i32 tmp;
 853
 854     /*
 855      * VMOV between two general-purpose registers and one double precision
 856      * floating point register.  Note that this does not require support
 857      * for double precision arithmetic.
 858      */
 859     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 860         return false;
 861     }
 862
 863     /* UNDEF accesses to D16-D31 if they don't exist */
 864     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 865         return false;
 866     }
 867
 868     if (!vfp_access_check(s)) {
 869         return true;
 870     }
 871
 872     if (a->op) {
 873         /* fpreg to gpreg */
 874         tmp = tcg_temp_new_i32();
 875         neon_load_reg32(tmp, a->vm * 2);
 876         store_reg(s, a->rt, tmp);
 877         tmp = tcg_temp_new_i32();
 878         neon_load_reg32(tmp, a->vm * 2 + 1);
 879         store_reg(s, a->rt2, tmp);
 880     } else {
 881         /* gpreg to fpreg */
 882         tmp = load_reg(s, a->rt);
 883         neon_store_reg32(tmp, a->vm * 2);
 884         tcg_temp_free_i32(tmp);
 885         tmp = load_reg(s, a->rt2);
 886         neon_store_reg32(tmp, a->vm * 2 + 1);
 887         tcg_temp_free_i32(tmp);
 888     }
 889
 890     return true;
 891 }
 892
 893 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
 894 {
 895     uint32_t offset;
 896     TCGv_i32 addr, tmp;
 897
 898     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 899         return false;
 900     }
 901
 902     if (!vfp_access_check(s)) {
 903         return true;
 904     }
 905
 906     offset = a->imm << 2;
 907     if (!a->u) {
 908         offset = -offset;
 909     }
 910
 911     /* For thumb, use of PC is UNPREDICTABLE.  */
 912     addr = add_reg_for_lit(s, a->rn, offset);
 913     tmp = tcg_temp_new_i32();
 914     if (a->l) {
 915         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
 916         neon_store_reg32(tmp, a->vd);
 917     } else {
 918         neon_load_reg32(tmp, a->vd);
 919         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
 920     }
 921     tcg_temp_free_i32(tmp);
 922     tcg_temp_free_i32(addr);
 923
 924     return true;
 925 }
 926
 927 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
 928 {
 929     uint32_t offset;
 930     TCGv_i32 addr;
 931     TCGv_i64 tmp;
 932
 933     /* Note that this does not require support for double arithmetic.  */
 934     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 935         return false;
 936     }
 937
 938     /* UNDEF accesses to D16-D31 if they don't exist */
 939     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 940         return false;
 941     }
 942
 943     if (!vfp_access_check(s)) {
 944         return true;
 945     }
 946
 947     offset = a->imm << 2;
 948     if (!a->u) {
 949         offset = -offset;
 950     }
 951
 952     /* For thumb, use of PC is UNPREDICTABLE.  */
 953     addr = add_reg_for_lit(s, a->rn, offset);
 954     tmp = tcg_temp_new_i64();
 955     if (a->l) {
 956         gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
 957         neon_store_reg64(tmp, a->vd);
 958     } else {
 959         neon_load_reg64(tmp, a->vd);
 960         gen_aa32_st64(s, tmp, addr, get_mem_index(s));
 961     }
 962     tcg_temp_free_i64(tmp);
 963     tcg_temp_free_i32(addr);
 964
 965     return true;
 966 }
 967
 968 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
 969 {
 970     uint32_t offset;
 971     TCGv_i32 addr, tmp;
 972     int i, n;
 973
 974     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 975         return false;
 976     }
 977
 978     n = a->imm;
 979
 980     if (n == 0 || (a->vd + n) > 32) {
 981         /*
 982          * UNPREDICTABLE cases for bad immediates: we choose to
 983          * UNDEF to avoid generating huge numbers of TCG ops
 984          */
 985         return false;
 986     }
 987     if (a->rn == 15 && a->w) {
 988         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
 989         return false;
 990     }
 991
 992     if (!vfp_access_check(s)) {
 993         return true;
 994     }
 995
 996     /* For thumb, use of PC is UNPREDICTABLE.  */
 997     addr = add_reg_for_lit(s, a->rn, 0);
 998     if (a->p) {
 999         /* pre-decrement */
1000         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1001     }
1002
1003     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1004         /*
1005          * Here 'addr' is the lowest address we will store to,
1006          * and is either the old SP (if post-increment) or
1007          * the new SP (if pre-decrement). For post-increment
1008          * where the old value is below the limit and the new
1009          * value is above, it is UNKNOWN whether the limit check
1010          * triggers; we choose to trigger.
1011          */
1012         gen_helper_v8m_stackcheck(cpu_env, addr);
1013     }
1014
1015     offset = 4;
1016     tmp = tcg_temp_new_i32();
1017     for (i = 0; i < n; i++) {
1018         if (a->l) {
1019             /* load */
1020             gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1021             neon_store_reg32(tmp, a->vd + i);
1022         } else {
1023             /* store */
1024             neon_load_reg32(tmp, a->vd + i);
1025             gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1026         }
1027         tcg_gen_addi_i32(addr, addr, offset);
1028     }
1029     tcg_temp_free_i32(tmp);
1030     if (a->w) {
1031         /* writeback */
1032         if (a->p) {
1033             offset = -offset * n;
1034             tcg_gen_addi_i32(addr, addr, offset);
1035         }
1036         store_reg(s, a->rn, addr);
1037     } else {
1038         tcg_temp_free_i32(addr);
1039     }
1040
1041     return true;
1042 }
1043
1044 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1045 {
1046     uint32_t offset;
1047     TCGv_i32 addr;
1048     TCGv_i64 tmp;
1049     int i, n;
1050
1051     /* Note that this does not require support for double arithmetic.  */
1052     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1053         return false;
1054     }
1055
1056     n = a->imm >> 1;
1057
1058     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1059         /*
1060          * UNPREDICTABLE cases for bad immediates: we choose to
1061          * UNDEF to avoid generating huge numbers of TCG ops
1062          */
1063         return false;
1064     }
1065     if (a->rn == 15 && a->w) {
1066         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1067         return false;
1068     }
1069
1070     /* UNDEF accesses to D16-D31 if they don't exist */
1071     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1072         return false;
1073     }
1074
1075     if (!vfp_access_check(s)) {
1076         return true;
1077     }
1078
1079     /* For thumb, use of PC is UNPREDICTABLE.  */
1080     addr = add_reg_for_lit(s, a->rn, 0);
1081     if (a->p) {
1082         /* pre-decrement */
1083         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1084     }
1085
1086     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1087         /*
1088          * Here 'addr' is the lowest address we will store to,
1089          * and is either the old SP (if post-increment) or
1090          * the new SP (if pre-decrement). For post-increment
1091          * where the old value is below the limit and the new
1092          * value is above, it is UNKNOWN whether the limit check
1093          * triggers; we choose to trigger.
1094          */
1095         gen_helper_v8m_stackcheck(cpu_env, addr);
1096     }
1097
1098     offset = 8;
1099     tmp = tcg_temp_new_i64();
1100     for (i = 0; i < n; i++) {
1101         if (a->l) {
1102             /* load */
1103             gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1104             neon_store_reg64(tmp, a->vd + i);
1105         } else {
1106             /* store */
1107             neon_load_reg64(tmp, a->vd + i);
1108             gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1109         }
1110         tcg_gen_addi_i32(addr, addr, offset);
1111     }
1112     tcg_temp_free_i64(tmp);
1113     if (a->w) {
1114         /* writeback */
1115         if (a->p) {
1116             offset = -offset * n;
1117         } else if (a->imm & 1) {
1118             offset = 4;
1119         } else {
1120             offset = 0;
1121         }
1122
1123         if (offset != 0) {
1124             tcg_gen_addi_i32(addr, addr, offset);
1125         }
1126         store_reg(s, a->rn, addr);
1127     } else {
1128         tcg_temp_free_i32(addr);
1129     }
1130
1131     return true;
1132 }
1133
1134 /*
1135  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1136  * The callback should emit code to write a value to vd. If
1137  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1138  * will contain the old value of the relevant VFP register;
1139  * otherwise it must be written to only.
1140  */
1141 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1142                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1143 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1144                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1145
1146 /*
1147  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1148  * The callback should emit code to write a value to vd (which
1149  * should be written to only).
1150  */
1151 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1152 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1153
1154 /*
1155  * Return true if the specified S reg is in a scalar bank
1156  * (ie if it is s0..s7)
1157  */
1158 static inline bool vfp_sreg_is_scalar(int reg)
1159 {
1160     return (reg & 0x18) == 0;
1161 }
1162
1163 /*
1164  * Return true if the specified D reg is in a scalar bank
1165  * (ie if it is d0..d3 or d16..d19)
1166  */
1167 static inline bool vfp_dreg_is_scalar(int reg)
1168 {
1169     return (reg & 0xc) == 0;
1170 }
1171
1172 /*
1173  * Advance the S reg number forwards by delta within its bank
1174  * (ie increment the low 3 bits but leave the rest the same)
1175  */
1176 static inline int vfp_advance_sreg(int reg, int delta)
1177 {
1178     return ((reg + delta) & 0x7) | (reg & ~0x7);
1179 }
1180
1181 /*
1182  * Advance the D reg number forwards by delta within its bank
1183  * (ie increment the low 2 bits but leave the rest the same)
1184  */
1185 static inline int vfp_advance_dreg(int reg, int delta)
1186 {
1187     return ((reg + delta) & 0x3) | (reg & ~0x3);
1188 }
1189
1190 /*
1191  * Perform a 3-operand VFP data processing instruction. fn is the
1192  * callback to do the actual operation; this function deals with the
1193  * code to handle looping around for VFP vector processing.
1194  */
1195 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1196                           int vd, int vn, int vm, bool reads_vd)
1197 {
1198     uint32_t delta_m = 0;
1199     uint32_t delta_d = 0;
1200     int veclen = s->vec_len;
1201     TCGv_i32 f0, f1, fd;
1202     TCGv_ptr fpst;
1203
1204     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1205         return false;
1206     }
1207
1208     if (!dc_isar_feature(aa32_fpshvec, s) &&
1209         (veclen != 0 || s->vec_stride != 0)) {
1210         return false;
1211     }
1212
1213     if (!vfp_access_check(s)) {
1214         return true;
1215     }
1216
1217     if (veclen > 0) {
1218         /* Figure out what type of vector operation this is.  */
1219         if (vfp_sreg_is_scalar(vd)) {
1220             /* scalar */
1221             veclen = 0;
1222         } else {
1223             delta_d = s->vec_stride + 1;
1224
1225             if (vfp_sreg_is_scalar(vm)) {
1226                 /* mixed scalar/vector */
1227                 delta_m = 0;
1228             } else {
1229                 /* vector */
1230                 delta_m = delta_d;
1231             }
1232         }
1233     }
1234
1235     f0 = tcg_temp_new_i32();
1236     f1 = tcg_temp_new_i32();
1237     fd = tcg_temp_new_i32();
1238     fpst = get_fpstatus_ptr(0);
1239
1240     neon_load_reg32(f0, vn);
1241     neon_load_reg32(f1, vm);
1242
1243     for (;;) {
1244         if (reads_vd) {
1245             neon_load_reg32(fd, vd);
1246         }
1247         fn(fd, f0, f1, fpst);
1248         neon_store_reg32(fd, vd);
1249
1250         if (veclen == 0) {
1251             break;
1252         }
1253
1254         /* Set up the operands for the next iteration */
1255         veclen--;
1256         vd = vfp_advance_sreg(vd, delta_d);
1257         vn = vfp_advance_sreg(vn, delta_d);
1258         neon_load_reg32(f0, vn);
1259         if (delta_m) {
1260             vm = vfp_advance_sreg(vm, delta_m);
1261             neon_load_reg32(f1, vm);
1262         }
1263     }
1264
1265     tcg_temp_free_i32(f0);
1266     tcg_temp_free_i32(f1);
1267     tcg_temp_free_i32(fd);
1268     tcg_temp_free_ptr(fpst);
1269
1270     return true;
1271 }
1272
1273 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1274                           int vd, int vn, int vm, bool reads_vd)
1275 {
1276     uint32_t delta_m = 0;
1277     uint32_t delta_d = 0;
1278     int veclen = s->vec_len;
1279     TCGv_i64 f0, f1, fd;
1280     TCGv_ptr fpst;
1281
1282     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1283         return false;
1284     }
1285
1286     /* UNDEF accesses to D16-D31 if they don't exist */
1287     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1288         return false;
1289     }
1290
1291     if (!dc_isar_feature(aa32_fpshvec, s) &&
1292         (veclen != 0 || s->vec_stride != 0)) {
1293         return false;
1294     }
1295
1296     if (!vfp_access_check(s)) {
1297         return true;
1298     }
1299
1300     if (veclen > 0) {
1301         /* Figure out what type of vector operation this is.  */
1302         if (vfp_dreg_is_scalar(vd)) {
1303             /* scalar */
1304             veclen = 0;
1305         } else {
1306             delta_d = (s->vec_stride >> 1) + 1;
1307
1308             if (vfp_dreg_is_scalar(vm)) {
1309                 /* mixed scalar/vector */
1310                 delta_m = 0;
1311             } else {
1312                 /* vector */
1313                 delta_m = delta_d;
1314             }
1315         }
1316     }
1317
1318     f0 = tcg_temp_new_i64();
1319     f1 = tcg_temp_new_i64();
1320     fd = tcg_temp_new_i64();
1321     fpst = get_fpstatus_ptr(0);
1322
1323     neon_load_reg64(f0, vn);
1324     neon_load_reg64(f1, vm);
1325
1326     for (;;) {
1327         if (reads_vd) {
1328             neon_load_reg64(fd, vd);
1329         }
1330         fn(fd, f0, f1, fpst);
1331         neon_store_reg64(fd, vd);
1332
1333         if (veclen == 0) {
1334             break;
1335         }
1336         /* Set up the operands for the next iteration */
1337         veclen--;
1338         vd = vfp_advance_dreg(vd, delta_d);
1339         vn = vfp_advance_dreg(vn, delta_d);
1340         neon_load_reg64(f0, vn);
1341         if (delta_m) {
1342             vm = vfp_advance_dreg(vm, delta_m);
1343             neon_load_reg64(f1, vm);
1344         }
1345     }
1346
1347     tcg_temp_free_i64(f0);
1348     tcg_temp_free_i64(f1);
1349     tcg_temp_free_i64(fd);
1350     tcg_temp_free_ptr(fpst);
1351
1352     return true;
1353 }
1354
1355 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1356 {
1357     uint32_t delta_m = 0;
1358     uint32_t delta_d = 0;
1359     int veclen = s->vec_len;
1360     TCGv_i32 f0, fd;
1361
1362     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1363         return false;
1364     }
1365
1366     if (!dc_isar_feature(aa32_fpshvec, s) &&
1367         (veclen != 0 || s->vec_stride != 0)) {
1368         return false;
1369     }
1370
1371     if (!vfp_access_check(s)) {
1372         return true;
1373     }
1374
1375     if (veclen > 0) {
1376         /* Figure out what type of vector operation this is.  */
1377         if (vfp_sreg_is_scalar(vd)) {
1378             /* scalar */
1379             veclen = 0;
1380         } else {
1381             delta_d = s->vec_stride + 1;
1382
1383             if (vfp_sreg_is_scalar(vm)) {
1384                 /* mixed scalar/vector */
1385                 delta_m = 0;
1386             } else {
1387                 /* vector */
1388                 delta_m = delta_d;
1389             }
1390         }
1391     }
1392
1393     f0 = tcg_temp_new_i32();
1394     fd = tcg_temp_new_i32();
1395
1396     neon_load_reg32(f0, vm);
1397
1398     for (;;) {
1399         fn(fd, f0);
1400         neon_store_reg32(fd, vd);
1401
1402         if (veclen == 0) {
1403             break;
1404         }
1405
1406         if (delta_m == 0) {
1407             /* single source one-many */
1408             while (veclen--) {
1409                 vd = vfp_advance_sreg(vd, delta_d);
1410                 neon_store_reg32(fd, vd);
1411             }
1412             break;
1413         }
1414
1415         /* Set up the operands for the next iteration */
1416         veclen--;
1417         vd = vfp_advance_sreg(vd, delta_d);
1418         vm = vfp_advance_sreg(vm, delta_m);
1419         neon_load_reg32(f0, vm);
1420     }
1421
1422     tcg_temp_free_i32(f0);
1423     tcg_temp_free_i32(fd);
1424
1425     return true;
1426 }
1427
1428 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1429 {
1430     uint32_t delta_m = 0;
1431     uint32_t delta_d = 0;
1432     int veclen = s->vec_len;
1433     TCGv_i64 f0, fd;
1434
1435     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1436         return false;
1437     }
1438
1439     /* UNDEF accesses to D16-D31 if they don't exist */
1440     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1441         return false;
1442     }
1443
1444     if (!dc_isar_feature(aa32_fpshvec, s) &&
1445         (veclen != 0 || s->vec_stride != 0)) {
1446         return false;
1447     }
1448
1449     if (!vfp_access_check(s)) {
1450         return true;
1451     }
1452
1453     if (veclen > 0) {
1454         /* Figure out what type of vector operation this is.  */
1455         if (vfp_dreg_is_scalar(vd)) {
1456             /* scalar */
1457             veclen = 0;
1458         } else {
1459             delta_d = (s->vec_stride >> 1) + 1;
1460
1461             if (vfp_dreg_is_scalar(vm)) {
1462                 /* mixed scalar/vector */
1463                 delta_m = 0;
1464             } else {
1465                 /* vector */
1466                 delta_m = delta_d;
1467             }
1468         }
1469     }
1470
1471     f0 = tcg_temp_new_i64();
1472     fd = tcg_temp_new_i64();
1473
1474     neon_load_reg64(f0, vm);
1475
1476     for (;;) {
1477         fn(fd, f0);
1478         neon_store_reg64(fd, vd);
1479
1480         if (veclen == 0) {
1481             break;
1482         }
1483
1484         if (delta_m == 0) {
1485             /* single source one-many */
1486             while (veclen--) {
1487                 vd = vfp_advance_dreg(vd, delta_d);
1488                 neon_store_reg64(fd, vd);
1489             }
1490             break;
1491         }
1492
1493         /* Set up the operands for the next iteration */
1494         veclen--;
1495         vd = vfp_advance_dreg(vd, delta_d);
1496         vd = vfp_advance_dreg(vm, delta_m);
1497         neon_load_reg64(f0, vm);
1498     }
1499
1500     tcg_temp_free_i64(f0);
1501     tcg_temp_free_i64(fd);
1502
1503     return true;
1504 }
1505
1506 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1507 {
1508     /* Note that order of inputs to the add matters for NaNs */
1509     TCGv_i32 tmp = tcg_temp_new_i32();
1510
1511     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1512     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1513     tcg_temp_free_i32(tmp);
1514 }
1515
1516 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1517 {
1518     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1519 }
1520
1521 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1522 {
1523     /* Note that order of inputs to the add matters for NaNs */
1524     TCGv_i64 tmp = tcg_temp_new_i64();
1525
1526     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1527     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1528     tcg_temp_free_i64(tmp);
1529 }
1530
1531 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1532 {
1533     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1534 }
1535
1536 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1537 {
1538     /*
1539      * VMLS: vd = vd + -(vn * vm)
1540      * Note that order of inputs to the add matters for NaNs.
1541      */
1542     TCGv_i32 tmp = tcg_temp_new_i32();
1543
1544     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1545     gen_helper_vfp_negs(tmp, tmp);
1546     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1547     tcg_temp_free_i32(tmp);
1548 }
1549
1550 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1551 {
1552     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1553 }
1554
1555 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1556 {
1557     /*
1558      * VMLS: vd = vd + -(vn * vm)
1559      * Note that order of inputs to the add matters for NaNs.
1560      */
1561     TCGv_i64 tmp = tcg_temp_new_i64();
1562
1563     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1564     gen_helper_vfp_negd(tmp, tmp);
1565     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1566     tcg_temp_free_i64(tmp);
1567 }
1568
1569 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1570 {
1571     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1572 }
1573
1574 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1575 {
1576     /*
1577      * VNMLS: -fd + (fn * fm)
1578      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1579      * plausible looking simplifications because this will give wrong results
1580      * for NaNs.
1581      */
1582     TCGv_i32 tmp = tcg_temp_new_i32();
1583
1584     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1585     gen_helper_vfp_negs(vd, vd);
1586     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1587     tcg_temp_free_i32(tmp);
1588 }
1589
1590 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1591 {
1592     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1593 }
1594
1595 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1596 {
1597     /*
1598      * VNMLS: -fd + (fn * fm)
1599      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1600      * plausible looking simplifications because this will give wrong results
1601      * for NaNs.
1602      */
1603     TCGv_i64 tmp = tcg_temp_new_i64();
1604
1605     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1606     gen_helper_vfp_negd(vd, vd);
1607     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1608     tcg_temp_free_i64(tmp);
1609 }
1610
1611 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1612 {
1613     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1614 }
1615
1616 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1617 {
1618     /* VNMLA: -fd + -(fn * fm) */
1619     TCGv_i32 tmp = tcg_temp_new_i32();
1620
1621     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1622     gen_helper_vfp_negs(tmp, tmp);
1623     gen_helper_vfp_negs(vd, vd);
1624     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1625     tcg_temp_free_i32(tmp);
1626 }
1627
1628 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1629 {
1630     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1631 }
1632
1633 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1634 {
1635     /* VNMLA: -fd + (fn * fm) */
1636     TCGv_i64 tmp = tcg_temp_new_i64();
1637
1638     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1639     gen_helper_vfp_negd(tmp, tmp);
1640     gen_helper_vfp_negd(vd, vd);
1641     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1642     tcg_temp_free_i64(tmp);
1643 }
1644
1645 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1646 {
1647     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1648 }
1649
1650 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1651 {
1652     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1653 }
1654
1655 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
1656 {
1657     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1658 }
1659
1660 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1661 {
1662     /* VNMUL: -(fn * fm) */
1663     gen_helper_vfp_muls(vd, vn, vm, fpst);
1664     gen_helper_vfp_negs(vd, vd);
1665 }
1666
1667 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1668 {
1669     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1670 }
1671
1672 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1673 {
1674     /* VNMUL: -(fn * fm) */
1675     gen_helper_vfp_muld(vd, vn, vm, fpst);
1676     gen_helper_vfp_negd(vd, vd);
1677 }
1678
1679 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
1680 {
1681     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1682 }
1683
1684 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1685 {
1686     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1687 }
1688
1689 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
1690 {
1691     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1692 }
1693
1694 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1695 {
1696     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1697 }
1698
1699 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
1700 {
1701     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
1702 }
1703
1704 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
1705 {
1706     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
1707 }
1708
1709 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
1710 {
1711     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
1712 }
1713
1714 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
1715 {
1716     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1717         return false;
1718     }
1719     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
1720                          a->vd, a->vn, a->vm, false);
1721 }
1722
1723 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
1724 {
1725     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1726         return false;
1727     }
1728     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
1729                          a->vd, a->vn, a->vm, false);
1730 }
1731
1732 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
1733 {
1734     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1735         return false;
1736     }
1737     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
1738                          a->vd, a->vn, a->vm, false);
1739 }
1740
1741 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
1742 {
1743     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1744         return false;
1745     }
1746     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
1747                          a->vd, a->vn, a->vm, false);
1748 }
1749
1750 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
1751 {
1752     /*
1753      * VFNMA : fd = muladd(-fd,  fn, fm)
1754      * VFNMS : fd = muladd(-fd, -fn, fm)
1755      * VFMA  : fd = muladd( fd,  fn, fm)
1756      * VFMS  : fd = muladd( fd, -fn, fm)
1757      *
1758      * These are fused multiply-add, and must be done as one floating
1759      * point operation with no rounding between the multiplication and
1760      * addition steps.  NB that doing the negations here as separate
1761      * steps is correct : an input NaN should come out with its sign
1762      * bit flipped if it is a negated-input.
1763      */
1764     TCGv_ptr fpst;
1765     TCGv_i32 vn, vm, vd;
1766
1767     /*
1768      * Present in VFPv4 only.
1769      * Note that we can't rely on the SIMDFMAC check alone, because
1770      * in a Neon-no-VFP core that ID register field will be non-zero.
1771      */
1772     if (!dc_isar_feature(aa32_simdfmac, s) ||
1773         !dc_isar_feature(aa32_fpsp_v2, s)) {
1774         return false;
1775     }
1776     /*
1777      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
1778      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
1779      */
1780     if (s->vec_len != 0 || s->vec_stride != 0) {
1781         return false;
1782     }
1783
1784     if (!vfp_access_check(s)) {
1785         return true;
1786     }
1787
1788     vn = tcg_temp_new_i32();
1789     vm = tcg_temp_new_i32();
1790     vd = tcg_temp_new_i32();
1791
1792     neon_load_reg32(vn, a->vn);
1793     neon_load_reg32(vm, a->vm);
1794     if (neg_n) {
1795         /* VFNMS, VFMS */
1796         gen_helper_vfp_negs(vn, vn);
1797     }
1798     neon_load_reg32(vd, a->vd);
1799     if (neg_d) {
1800         /* VFNMA, VFNMS */
1801         gen_helper_vfp_negs(vd, vd);
1802     }
1803     fpst = get_fpstatus_ptr(0);
1804     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
1805     neon_store_reg32(vd, a->vd);
1806
1807     tcg_temp_free_ptr(fpst);
1808     tcg_temp_free_i32(vn);
1809     tcg_temp_free_i32(vm);
1810     tcg_temp_free_i32(vd);
1811
1812     return true;
1813 }
1814
1815 static bool trans_VFMA_sp(DisasContext *s, arg_VFMA_sp *a)
1816 {
1817     return do_vfm_sp(s, a, false, false);
1818 }
1819
1820 static bool trans_VFMS_sp(DisasContext *s, arg_VFMS_sp *a)
1821 {
1822     return do_vfm_sp(s, a, true, false);
1823 }
1824
1825 static bool trans_VFNMA_sp(DisasContext *s, arg_VFNMA_sp *a)
1826 {
1827     return do_vfm_sp(s, a, false, true);
1828 }
1829
1830 static bool trans_VFNMS_sp(DisasContext *s, arg_VFNMS_sp *a)
1831 {
1832     return do_vfm_sp(s, a, true, true);
1833 }
1834
1835 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
1836 {
1837     /*
1838      * VFNMA : fd = muladd(-fd,  fn, fm)
1839      * VFNMS : fd = muladd(-fd, -fn, fm)
1840      * VFMA  : fd = muladd( fd,  fn, fm)
1841      * VFMS  : fd = muladd( fd, -fn, fm)
1842      *
1843      * These are fused multiply-add, and must be done as one floating
1844      * point operation with no rounding between the multiplication and
1845      * addition steps.  NB that doing the negations here as separate
1846      * steps is correct : an input NaN should come out with its sign
1847      * bit flipped if it is a negated-input.
1848      */
1849     TCGv_ptr fpst;
1850     TCGv_i64 vn, vm, vd;
1851
1852     /*
1853      * Present in VFPv4 only.
1854      * Note that we can't rely on the SIMDFMAC check alone, because
1855      * in a Neon-no-VFP core that ID register field will be non-zero.
1856      */
1857     if (!dc_isar_feature(aa32_simdfmac, s) ||
1858         !dc_isar_feature(aa32_fpdp_v2, s)) {
1859         return false;
1860     }
1861     /*
1862      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
1863      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
1864      */
1865     if (s->vec_len != 0 || s->vec_stride != 0) {
1866         return false;
1867     }
1868
1869     /* UNDEF accesses to D16-D31 if they don't exist. */
1870     if (!dc_isar_feature(aa32_simd_r32, s) &&
1871         ((a->vd | a->vn | a->vm) & 0x10)) {
1872         return false;
1873     }
1874
1875     /* UNDEF accesses to D16-D31 if they don't exist. */
1876     if (!dc_isar_feature(aa32_simd_r32, s) &&
1877         ((a->vd | a->vn | a->vm) & 0x10)) {
1878         return false;
1879     }
1880
1881     if (!vfp_access_check(s)) {
1882         return true;
1883     }
1884
1885     vn = tcg_temp_new_i64();
1886     vm = tcg_temp_new_i64();
1887     vd = tcg_temp_new_i64();
1888
1889     neon_load_reg64(vn, a->vn);
1890     neon_load_reg64(vm, a->vm);
1891     if (neg_n) {
1892         /* VFNMS, VFMS */
1893         gen_helper_vfp_negd(vn, vn);
1894     }
1895     neon_load_reg64(vd, a->vd);
1896     if (neg_d) {
1897         /* VFNMA, VFNMS */
1898         gen_helper_vfp_negd(vd, vd);
1899     }
1900     fpst = get_fpstatus_ptr(0);
1901     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
1902     neon_store_reg64(vd, a->vd);
1903
1904     tcg_temp_free_ptr(fpst);
1905     tcg_temp_free_i64(vn);
1906     tcg_temp_free_i64(vm);
1907     tcg_temp_free_i64(vd);
1908
1909     return true;
1910 }
1911
1912 static bool trans_VFMA_dp(DisasContext *s, arg_VFMA_dp *a)
1913 {
1914     return do_vfm_dp(s, a, false, false);
1915 }
1916
1917 static bool trans_VFMS_dp(DisasContext *s, arg_VFMS_dp *a)
1918 {
1919     return do_vfm_dp(s, a, true, false);
1920 }
1921
1922 static bool trans_VFNMA_dp(DisasContext *s, arg_VFNMA_dp *a)
1923 {
1924     return do_vfm_dp(s, a, false, true);
1925 }
1926
1927 static bool trans_VFNMS_dp(DisasContext *s, arg_VFNMS_dp *a)
1928 {
1929     return do_vfm_dp(s, a, true, true);
1930 }
1931
1932 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
1933 {
1934     uint32_t delta_d = 0;
1935     int veclen = s->vec_len;
1936     TCGv_i32 fd;
1937     uint32_t vd;
1938
1939     vd = a->vd;
1940
1941     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
1942         return false;
1943     }
1944
1945     if (!dc_isar_feature(aa32_fpshvec, s) &&
1946         (veclen != 0 || s->vec_stride != 0)) {
1947         return false;
1948     }
1949
1950     if (!vfp_access_check(s)) {
1951         return true;
1952     }
1953
1954     if (veclen > 0) {
1955         /* Figure out what type of vector operation this is.  */
1956         if (vfp_sreg_is_scalar(vd)) {
1957             /* scalar */
1958             veclen = 0;
1959         } else {
1960             delta_d = s->vec_stride + 1;
1961         }
1962     }
1963
1964     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
1965
1966     for (;;) {
1967         neon_store_reg32(fd, vd);
1968
1969         if (veclen == 0) {
1970             break;
1971         }
1972
1973         /* Set up the operands for the next iteration */
1974         veclen--;
1975         vd = vfp_advance_sreg(vd, delta_d);
1976     }
1977
1978     tcg_temp_free_i32(fd);
1979     return true;
1980 }
1981
1982 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
1983 {
1984     uint32_t delta_d = 0;
1985     int veclen = s->vec_len;
1986     TCGv_i64 fd;
1987     uint32_t vd;
1988
1989     vd = a->vd;
1990
1991     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
1992         return false;
1993     }
1994
1995     /* UNDEF accesses to D16-D31 if they don't exist. */
1996     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
1997         return false;
1998     }
1999
2000     if (!dc_isar_feature(aa32_fpshvec, s) &&
2001         (veclen != 0 || s->vec_stride != 0)) {
2002         return false;
2003     }
2004
2005     if (!vfp_access_check(s)) {
2006         return true;
2007     }
2008
2009     if (veclen > 0) {
2010         /* Figure out what type of vector operation this is.  */
2011         if (vfp_dreg_is_scalar(vd)) {
2012             /* scalar */
2013             veclen = 0;
2014         } else {
2015             delta_d = (s->vec_stride >> 1) + 1;
2016         }
2017     }
2018
2019     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2020
2021     for (;;) {
2022         neon_store_reg64(fd, vd);
2023
2024         if (veclen == 0) {
2025             break;
2026         }
2027
2028         /* Set up the operands for the next iteration */
2029         veclen--;
2030         vd = vfp_advance_dreg(vd, delta_d);
2031     }
2032
2033     tcg_temp_free_i64(fd);
2034     return true;
2035 }
2036
2037 static bool trans_VMOV_reg_sp(DisasContext *s, arg_VMOV_reg_sp *a)
2038 {
2039     return do_vfp_2op_sp(s, tcg_gen_mov_i32, a->vd, a->vm);
2040 }
2041
2042 static bool trans_VMOV_reg_dp(DisasContext *s, arg_VMOV_reg_dp *a)
2043 {
2044     return do_vfp_2op_dp(s, tcg_gen_mov_i64, a->vd, a->vm);
2045 }
2046
2047 static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a)
2048 {
2049     return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm);
2050 }
2051
2052 static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a)
2053 {
2054     return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm);
2055 }
2056
2057 static bool trans_VNEG_sp(DisasContext *s, arg_VNEG_sp *a)
2058 {
2059     return do_vfp_2op_sp(s, gen_helper_vfp_negs, a->vd, a->vm);
2060 }
2061
2062 static bool trans_VNEG_dp(DisasContext *s, arg_VNEG_dp *a)
2063 {
2064     return do_vfp_2op_dp(s, gen_helper_vfp_negd, a->vd, a->vm);
2065 }
2066
2067 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2068 {
2069     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2070 }
2071
2072 static bool trans_VSQRT_sp(DisasContext *s, arg_VSQRT_sp *a)
2073 {
2074     return do_vfp_2op_sp(s, gen_VSQRT_sp, a->vd, a->vm);
2075 }
2076
2077 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2078 {
2079     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2080 }
2081
2082 static bool trans_VSQRT_dp(DisasContext *s, arg_VSQRT_dp *a)
2083 {
2084     return do_vfp_2op_dp(s, gen_VSQRT_dp, a->vd, a->vm);
2085 }
2086
2087 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2088 {
2089     TCGv_i32 vd, vm;
2090
2091     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2092         return false;
2093     }
2094
2095     /* Vm/M bits must be zero for the Z variant */
2096     if (a->z && a->vm != 0) {
2097         return false;
2098     }
2099
2100     if (!vfp_access_check(s)) {
2101         return true;
2102     }
2103
2104     vd = tcg_temp_new_i32();
2105     vm = tcg_temp_new_i32();
2106
2107     neon_load_reg32(vd, a->vd);
2108     if (a->z) {
2109         tcg_gen_movi_i32(vm, 0);
2110     } else {
2111         neon_load_reg32(vm, a->vm);
2112     }
2113
2114     if (a->e) {
2115         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2116     } else {
2117         gen_helper_vfp_cmps(vd, vm, cpu_env);
2118     }
2119
2120     tcg_temp_free_i32(vd);
2121     tcg_temp_free_i32(vm);
2122
2123     return true;
2124 }
2125
2126 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2127 {
2128     TCGv_i64 vd, vm;
2129
2130     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2131         return false;
2132     }
2133
2134     /* Vm/M bits must be zero for the Z variant */
2135     if (a->z && a->vm != 0) {
2136         return false;
2137     }
2138
2139     /* UNDEF accesses to D16-D31 if they don't exist. */
2140     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2141         return false;
2142     }
2143
2144     if (!vfp_access_check(s)) {
2145         return true;
2146     }
2147
2148     vd = tcg_temp_new_i64();
2149     vm = tcg_temp_new_i64();
2150
2151     neon_load_reg64(vd, a->vd);
2152     if (a->z) {
2153         tcg_gen_movi_i64(vm, 0);
2154     } else {
2155         neon_load_reg64(vm, a->vm);
2156     }
2157
2158     if (a->e) {
2159         gen_helper_vfp_cmped(vd, vm, cpu_env);
2160     } else {
2161         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2162     }
2163
2164     tcg_temp_free_i64(vd);
2165     tcg_temp_free_i64(vm);
2166
2167     return true;
2168 }
2169
2170 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2171 {
2172     TCGv_ptr fpst;
2173     TCGv_i32 ahp_mode;
2174     TCGv_i32 tmp;
2175
2176     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2177         return false;
2178     }
2179
2180     if (!vfp_access_check(s)) {
2181         return true;
2182     }
2183
2184     fpst = get_fpstatus_ptr(false);
2185     ahp_mode = get_ahp_flag();
2186     tmp = tcg_temp_new_i32();
2187     /* The T bit tells us if we want the low or high 16 bits of Vm */
2188     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2189     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2190     neon_store_reg32(tmp, a->vd);
2191     tcg_temp_free_i32(ahp_mode);
2192     tcg_temp_free_ptr(fpst);
2193     tcg_temp_free_i32(tmp);
2194     return true;
2195 }
2196
2197 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2198 {
2199     TCGv_ptr fpst;
2200     TCGv_i32 ahp_mode;
2201     TCGv_i32 tmp;
2202     TCGv_i64 vd;
2203
2204     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2205         return false;
2206     }
2207
2208     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2209         return false;
2210     }
2211
2212     /* UNDEF accesses to D16-D31 if they don't exist. */
2213     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2214         return false;
2215     }
2216
2217     if (!vfp_access_check(s)) {
2218         return true;
2219     }
2220
2221     fpst = get_fpstatus_ptr(false);
2222     ahp_mode = get_ahp_flag();
2223     tmp = tcg_temp_new_i32();
2224     /* The T bit tells us if we want the low or high 16 bits of Vm */
2225     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2226     vd = tcg_temp_new_i64();
2227     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2228     neon_store_reg64(vd, a->vd);
2229     tcg_temp_free_i32(ahp_mode);
2230     tcg_temp_free_ptr(fpst);
2231     tcg_temp_free_i32(tmp);
2232     tcg_temp_free_i64(vd);
2233     return true;
2234 }
2235
2236 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2237 {
2238     TCGv_ptr fpst;
2239     TCGv_i32 ahp_mode;
2240     TCGv_i32 tmp;
2241
2242     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2243         return false;
2244     }
2245
2246     if (!vfp_access_check(s)) {
2247         return true;
2248     }
2249
2250     fpst = get_fpstatus_ptr(false);
2251     ahp_mode = get_ahp_flag();
2252     tmp = tcg_temp_new_i32();
2253
2254     neon_load_reg32(tmp, a->vm);
2255     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2256     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2257     tcg_temp_free_i32(ahp_mode);
2258     tcg_temp_free_ptr(fpst);
2259     tcg_temp_free_i32(tmp);
2260     return true;
2261 }
2262
2263 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2264 {
2265     TCGv_ptr fpst;
2266     TCGv_i32 ahp_mode;
2267     TCGv_i32 tmp;
2268     TCGv_i64 vm;
2269
2270     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2271         return false;
2272     }
2273
2274     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2275         return false;
2276     }
2277
2278     /* UNDEF accesses to D16-D31 if they don't exist. */
2279     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2280         return false;
2281     }
2282
2283     if (!vfp_access_check(s)) {
2284         return true;
2285     }
2286
2287     fpst = get_fpstatus_ptr(false);
2288     ahp_mode = get_ahp_flag();
2289     tmp = tcg_temp_new_i32();
2290     vm = tcg_temp_new_i64();
2291
2292     neon_load_reg64(vm, a->vm);
2293     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2294     tcg_temp_free_i64(vm);
2295     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2296     tcg_temp_free_i32(ahp_mode);
2297     tcg_temp_free_ptr(fpst);
2298     tcg_temp_free_i32(tmp);
2299     return true;
2300 }
2301
2302 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2303 {
2304     TCGv_ptr fpst;
2305     TCGv_i32 tmp;
2306
2307     if (!dc_isar_feature(aa32_vrint, s)) {
2308         return false;
2309     }
2310
2311     if (!vfp_access_check(s)) {
2312         return true;
2313     }
2314
2315     tmp = tcg_temp_new_i32();
2316     neon_load_reg32(tmp, a->vm);
2317     fpst = get_fpstatus_ptr(false);
2318     gen_helper_rints(tmp, tmp, fpst);
2319     neon_store_reg32(tmp, a->vd);
2320     tcg_temp_free_ptr(fpst);
2321     tcg_temp_free_i32(tmp);
2322     return true;
2323 }
2324
2325 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2326 {
2327     TCGv_ptr fpst;
2328     TCGv_i64 tmp;
2329
2330     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2331         return false;
2332     }
2333
2334     if (!dc_isar_feature(aa32_vrint, s)) {
2335         return false;
2336     }
2337
2338     /* UNDEF accesses to D16-D31 if they don't exist. */
2339     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2340         return false;
2341     }
2342
2343     if (!vfp_access_check(s)) {
2344         return true;
2345     }
2346
2347     tmp = tcg_temp_new_i64();
2348     neon_load_reg64(tmp, a->vm);
2349     fpst = get_fpstatus_ptr(false);
2350     gen_helper_rintd(tmp, tmp, fpst);
2351     neon_store_reg64(tmp, a->vd);
2352     tcg_temp_free_ptr(fpst);
2353     tcg_temp_free_i64(tmp);
2354     return true;
2355 }
2356
2357 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2358 {
2359     TCGv_ptr fpst;
2360     TCGv_i32 tmp;
2361     TCGv_i32 tcg_rmode;
2362
2363     if (!dc_isar_feature(aa32_vrint, s)) {
2364         return false;
2365     }
2366
2367     if (!vfp_access_check(s)) {
2368         return true;
2369     }
2370
2371     tmp = tcg_temp_new_i32();
2372     neon_load_reg32(tmp, a->vm);
2373     fpst = get_fpstatus_ptr(false);
2374     tcg_rmode = tcg_const_i32(float_round_to_zero);
2375     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2376     gen_helper_rints(tmp, tmp, fpst);
2377     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2378     neon_store_reg32(tmp, a->vd);
2379     tcg_temp_free_ptr(fpst);
2380     tcg_temp_free_i32(tcg_rmode);
2381     tcg_temp_free_i32(tmp);
2382     return true;
2383 }
2384
2385 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2386 {
2387     TCGv_ptr fpst;
2388     TCGv_i64 tmp;
2389     TCGv_i32 tcg_rmode;
2390
2391     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2392         return false;
2393     }
2394
2395     if (!dc_isar_feature(aa32_vrint, s)) {
2396         return false;
2397     }
2398
2399     /* UNDEF accesses to D16-D31 if they don't exist. */
2400     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2401         return false;
2402     }
2403
2404     if (!vfp_access_check(s)) {
2405         return true;
2406     }
2407
2408     tmp = tcg_temp_new_i64();
2409     neon_load_reg64(tmp, a->vm);
2410     fpst = get_fpstatus_ptr(false);
2411     tcg_rmode = tcg_const_i32(float_round_to_zero);
2412     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2413     gen_helper_rintd(tmp, tmp, fpst);
2414     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2415     neon_store_reg64(tmp, a->vd);
2416     tcg_temp_free_ptr(fpst);
2417     tcg_temp_free_i64(tmp);
2418     tcg_temp_free_i32(tcg_rmode);
2419     return true;
2420 }
2421
2422 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
2423 {
2424     TCGv_ptr fpst;
2425     TCGv_i32 tmp;
2426
2427     if (!dc_isar_feature(aa32_vrint, s)) {
2428         return false;
2429     }
2430
2431     if (!vfp_access_check(s)) {
2432         return true;
2433     }
2434
2435     tmp = tcg_temp_new_i32();
2436     neon_load_reg32(tmp, a->vm);
2437     fpst = get_fpstatus_ptr(false);
2438     gen_helper_rints_exact(tmp, tmp, fpst);
2439     neon_store_reg32(tmp, a->vd);
2440     tcg_temp_free_ptr(fpst);
2441     tcg_temp_free_i32(tmp);
2442     return true;
2443 }
2444
2445 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
2446 {
2447     TCGv_ptr fpst;
2448     TCGv_i64 tmp;
2449
2450     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2451         return false;
2452     }
2453
2454     if (!dc_isar_feature(aa32_vrint, s)) {
2455         return false;
2456     }
2457
2458     /* UNDEF accesses to D16-D31 if they don't exist. */
2459     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2460         return false;
2461     }
2462
2463     if (!vfp_access_check(s)) {
2464         return true;
2465     }
2466
2467     tmp = tcg_temp_new_i64();
2468     neon_load_reg64(tmp, a->vm);
2469     fpst = get_fpstatus_ptr(false);
2470     gen_helper_rintd_exact(tmp, tmp, fpst);
2471     neon_store_reg64(tmp, a->vd);
2472     tcg_temp_free_ptr(fpst);
2473     tcg_temp_free_i64(tmp);
2474     return true;
2475 }
2476
2477 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
2478 {
2479     TCGv_i64 vd;
2480     TCGv_i32 vm;
2481
2482     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2483         return false;
2484     }
2485
2486     /* UNDEF accesses to D16-D31 if they don't exist. */
2487     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2488         return false;
2489     }
2490
2491     if (!vfp_access_check(s)) {
2492         return true;
2493     }
2494
2495     vm = tcg_temp_new_i32();
2496     vd = tcg_temp_new_i64();
2497     neon_load_reg32(vm, a->vm);
2498     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
2499     neon_store_reg64(vd, a->vd);
2500     tcg_temp_free_i32(vm);
2501     tcg_temp_free_i64(vd);
2502     return true;
2503 }
2504
2505 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
2506 {
2507     TCGv_i64 vm;
2508     TCGv_i32 vd;
2509
2510     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2511         return false;
2512     }
2513
2514     /* UNDEF accesses to D16-D31 if they don't exist. */
2515     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2516         return false;
2517     }
2518
2519     if (!vfp_access_check(s)) {
2520         return true;
2521     }
2522
2523     vd = tcg_temp_new_i32();
2524     vm = tcg_temp_new_i64();
2525     neon_load_reg64(vm, a->vm);
2526     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
2527     neon_store_reg32(vd, a->vd);
2528     tcg_temp_free_i32(vd);
2529     tcg_temp_free_i64(vm);
2530     return true;
2531 }
2532
2533 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
2534 {
2535     TCGv_i32 vm;
2536     TCGv_ptr fpst;
2537
2538     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2539         return false;
2540     }
2541
2542     if (!vfp_access_check(s)) {
2543         return true;
2544     }
2545
2546     vm = tcg_temp_new_i32();
2547     neon_load_reg32(vm, a->vm);
2548     fpst = get_fpstatus_ptr(false);
2549     if (a->s) {
2550         /* i32 -> f32 */
2551         gen_helper_vfp_sitos(vm, vm, fpst);
2552     } else {
2553         /* u32 -> f32 */
2554         gen_helper_vfp_uitos(vm, vm, fpst);
2555     }
2556     neon_store_reg32(vm, a->vd);
2557     tcg_temp_free_i32(vm);
2558     tcg_temp_free_ptr(fpst);
2559     return true;
2560 }
2561
2562 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
2563 {
2564     TCGv_i32 vm;
2565     TCGv_i64 vd;
2566     TCGv_ptr fpst;
2567
2568     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2569         return false;
2570     }
2571
2572     /* UNDEF accesses to D16-D31 if they don't exist. */
2573     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2574         return false;
2575     }
2576
2577     if (!vfp_access_check(s)) {
2578         return true;
2579     }
2580
2581     vm = tcg_temp_new_i32();
2582     vd = tcg_temp_new_i64();
2583     neon_load_reg32(vm, a->vm);
2584     fpst = get_fpstatus_ptr(false);
2585     if (a->s) {
2586         /* i32 -> f64 */
2587         gen_helper_vfp_sitod(vd, vm, fpst);
2588     } else {
2589         /* u32 -> f64 */
2590         gen_helper_vfp_uitod(vd, vm, fpst);
2591     }
2592     neon_store_reg64(vd, a->vd);
2593     tcg_temp_free_i32(vm);
2594     tcg_temp_free_i64(vd);
2595     tcg_temp_free_ptr(fpst);
2596     return true;
2597 }
2598
2599 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
2600 {
2601     TCGv_i32 vd;
2602     TCGv_i64 vm;
2603
2604     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2605         return false;
2606     }
2607
2608     if (!dc_isar_feature(aa32_jscvt, s)) {
2609         return false;
2610     }
2611
2612     /* UNDEF accesses to D16-D31 if they don't exist. */
2613     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2614         return false;
2615     }
2616
2617     if (!vfp_access_check(s)) {
2618         return true;
2619     }
2620
2621     vm = tcg_temp_new_i64();
2622     vd = tcg_temp_new_i32();
2623     neon_load_reg64(vm, a->vm);
2624     gen_helper_vjcvt(vd, vm, cpu_env);
2625     neon_store_reg32(vd, a->vd);
2626     tcg_temp_free_i64(vm);
2627     tcg_temp_free_i32(vd);
2628     return true;
2629 }
2630
2631 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
2632 {
2633     TCGv_i32 vd, shift;
2634     TCGv_ptr fpst;
2635     int frac_bits;
2636
2637     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2638         return false;
2639     }
2640
2641     if (!vfp_access_check(s)) {
2642         return true;
2643     }
2644
2645     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
2646
2647     vd = tcg_temp_new_i32();
2648     neon_load_reg32(vd, a->vd);
2649
2650     fpst = get_fpstatus_ptr(false);
2651     shift = tcg_const_i32(frac_bits);
2652
2653     /* Switch on op:U:sx bits */
2654     switch (a->opc) {
2655     case 0:
2656         gen_helper_vfp_shtos(vd, vd, shift, fpst);
2657         break;
2658     case 1:
2659         gen_helper_vfp_sltos(vd, vd, shift, fpst);
2660         break;
2661     case 2:
2662         gen_helper_vfp_uhtos(vd, vd, shift, fpst);
2663         break;
2664     case 3:
2665         gen_helper_vfp_ultos(vd, vd, shift, fpst);
2666         break;
2667     case 4:
2668         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
2669         break;
2670     case 5:
2671         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
2672         break;
2673     case 6:
2674         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
2675         break;
2676     case 7:
2677         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
2678         break;
2679     default:
2680         g_assert_not_reached();
2681     }
2682
2683     neon_store_reg32(vd, a->vd);
2684     tcg_temp_free_i32(vd);
2685     tcg_temp_free_i32(shift);
2686     tcg_temp_free_ptr(fpst);
2687     return true;
2688 }
2689
2690 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
2691 {
2692     TCGv_i64 vd;
2693     TCGv_i32 shift;
2694     TCGv_ptr fpst;
2695     int frac_bits;
2696
2697     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2698         return false;
2699     }
2700
2701     /* UNDEF accesses to D16-D31 if they don't exist. */
2702     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2703         return false;
2704     }
2705
2706     if (!vfp_access_check(s)) {
2707         return true;
2708     }
2709
2710     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
2711
2712     vd = tcg_temp_new_i64();
2713     neon_load_reg64(vd, a->vd);
2714
2715     fpst = get_fpstatus_ptr(false);
2716     shift = tcg_const_i32(frac_bits);
2717
2718     /* Switch on op:U:sx bits */
2719     switch (a->opc) {
2720     case 0:
2721         gen_helper_vfp_shtod(vd, vd, shift, fpst);
2722         break;
2723     case 1:
2724         gen_helper_vfp_sltod(vd, vd, shift, fpst);
2725         break;
2726     case 2:
2727         gen_helper_vfp_uhtod(vd, vd, shift, fpst);
2728         break;
2729     case 3:
2730         gen_helper_vfp_ultod(vd, vd, shift, fpst);
2731         break;
2732     case 4:
2733         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
2734         break;
2735     case 5:
2736         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
2737         break;
2738     case 6:
2739         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
2740         break;
2741     case 7:
2742         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
2743         break;
2744     default:
2745         g_assert_not_reached();
2746     }
2747
2748     neon_store_reg64(vd, a->vd);
2749     tcg_temp_free_i64(vd);
2750     tcg_temp_free_i32(shift);
2751     tcg_temp_free_ptr(fpst);
2752     return true;
2753 }
2754
2755 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
2756 {
2757     TCGv_i32 vm;
2758     TCGv_ptr fpst;
2759
2760     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2761         return false;
2762     }
2763
2764     if (!vfp_access_check(s)) {
2765         return true;
2766     }
2767
2768     fpst = get_fpstatus_ptr(false);
2769     vm = tcg_temp_new_i32();
2770     neon_load_reg32(vm, a->vm);
2771
2772     if (a->s) {
2773         if (a->rz) {
2774             gen_helper_vfp_tosizs(vm, vm, fpst);
2775         } else {
2776             gen_helper_vfp_tosis(vm, vm, fpst);
2777         }
2778     } else {
2779         if (a->rz) {
2780             gen_helper_vfp_touizs(vm, vm, fpst);
2781         } else {
2782             gen_helper_vfp_touis(vm, vm, fpst);
2783         }
2784     }
2785     neon_store_reg32(vm, a->vd);
2786     tcg_temp_free_i32(vm);
2787     tcg_temp_free_ptr(fpst);
2788     return true;
2789 }
2790
2791 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
2792 {
2793     TCGv_i32 vd;
2794     TCGv_i64 vm;
2795     TCGv_ptr fpst;
2796
2797     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2798         return false;
2799     }
2800
2801     /* UNDEF accesses to D16-D31 if they don't exist. */
2802     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2803         return false;
2804     }
2805
2806     if (!vfp_access_check(s)) {
2807         return true;
2808     }
2809
2810     fpst = get_fpstatus_ptr(false);
2811     vm = tcg_temp_new_i64();
2812     vd = tcg_temp_new_i32();
2813     neon_load_reg64(vm, a->vm);
2814
2815     if (a->s) {
2816         if (a->rz) {
2817             gen_helper_vfp_tosizd(vd, vm, fpst);
2818         } else {
2819             gen_helper_vfp_tosid(vd, vm, fpst);
2820         }
2821     } else {
2822         if (a->rz) {
2823             gen_helper_vfp_touizd(vd, vm, fpst);
2824         } else {
2825             gen_helper_vfp_touid(vd, vm, fpst);
2826         }
2827     }
2828     neon_store_reg32(vd, a->vd);
2829     tcg_temp_free_i32(vd);
2830     tcg_temp_free_i64(vm);
2831     tcg_temp_free_ptr(fpst);
2832     return true;
2833 }
2834
2835 /*
2836  * Decode VLLDM and VLSTM are nonstandard because:
2837  *  * if there is no FPU then these insns must NOP in
2838  *    Secure state and UNDEF in Nonsecure state
2839  *  * if there is an FPU then these insns do not have
2840  *    the usual behaviour that vfp_access_check() provides of
2841  *    being controlled by CPACR/NSACR enable bits or the
2842  *    lazy-stacking logic.
2843  */
2844 static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
2845 {
2846     TCGv_i32 fptr;
2847
2848     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
2849         !arm_dc_feature(s, ARM_FEATURE_V8)) {
2850         return false;
2851     }
2852     /* If not secure, UNDEF. */
2853     if (!s->v8m_secure) {
2854         return false;
2855     }
2856     /* If no fpu, NOP. */
2857     if (!dc_isar_feature(aa32_vfp, s)) {
2858         return true;
2859     }
2860
2861     fptr = load_reg(s, a->rn);
2862     if (a->l) {
2863         gen_helper_v7m_vlldm(cpu_env, fptr);
2864     } else {
2865         gen_helper_v7m_vlstm(cpu_env, fptr);
2866     }
2867     tcg_temp_free_i32(fptr);
2868
2869     /* End the TB, because we have updated FP control bits */
2870     s->base.is_jmp = DISAS_UPDATE;
2871     return true;
2872 }