target/arm/translate-vfp.inc.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 /* Include the generated VFP decoder */
  30 #include "decode-vfp.inc.c"
  31 #include "decode-vfp-uncond.inc.c"
  32
  33 /*
  34  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  35  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  36  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  37  */
  38 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  39 {
  40     uint64_t imm;
  41
  42     switch (size) {
  43     case MO_64:
  44         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  45             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  46             extract32(imm8, 0, 6);
  47         imm <<= 48;
  48         break;
  49     case MO_32:
  50         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  51             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  52             (extract32(imm8, 0, 6) << 3);
  53         imm <<= 16;
  54         break;
  55     case MO_16:
  56         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  57             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  58             (extract32(imm8, 0, 6) << 6);
  59         break;
  60     default:
  61         g_assert_not_reached();
  62     }
  63     return imm;
  64 }
  65
  66 /*
  67  * Return the offset of a 16-bit half of the specified VFP single-precision
  68  * register. If top is true, returns the top 16 bits; otherwise the bottom
  69  * 16 bits.
  70  */
  71 static inline long vfp_f16_offset(unsigned reg, bool top)
  72 {
  73     long offs = vfp_reg_offset(false, reg);
  74 #ifdef HOST_WORDS_BIGENDIAN
  75     if (!top) {
  76         offs += 2;
  77     }
  78 #else
  79     if (top) {
  80         offs += 2;
  81     }
  82 #endif
  83     return offs;
  84 }
  85
  86 /*
  87  * Check that VFP access is enabled. If it is, do the necessary
  88  * M-profile lazy-FP handling and then return true.
  89  * If not, emit code to generate an appropriate exception and
  90  * return false.
  91  * The ignore_vfp_enabled argument specifies that we should ignore
  92  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
  93  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
  94  */
  95 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
  96 {
  97     if (s->fp_excp_el) {
  98         if (arm_dc_feature(s, ARM_FEATURE_M)) {
  99             gen_exception_insn(s, s->pc_curr, EXCP_NOCP, syn_uncategorized(),
 100                                s->fp_excp_el);
 101         } else {
 102             gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 103                                syn_fp_access_trap(1, 0xe, false),
 104                                s->fp_excp_el);
 105         }
 106         return false;
 107     }
 108
 109     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 110         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 111         unallocated_encoding(s);
 112         return false;
 113     }
 114
 115     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 116         /* Handle M-profile lazy FP state mechanics */
 117
 118         /* Trigger lazy-state preservation if necessary */
 119         if (s->v7m_lspact) {
 120             /*
 121              * Lazy state saving affects external memory and also the NVIC,
 122              * so we must mark it as an IO operation for icount (and cause
 123              * this to be the last insn in the TB).
 124              */
 125             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 126                 s->base.is_jmp = DISAS_UPDATE_EXIT;
 127                 gen_io_start();
 128             }
 129             gen_helper_v7m_preserve_fp_state(cpu_env);
 130             /*
 131              * If the preserve_fp_state helper doesn't throw an exception
 132              * then it will clear LSPACT; we don't need to repeat this for
 133              * any further FP insns in this TB.
 134              */
 135             s->v7m_lspact = false;
 136         }
 137
 138         /* Update ownership of FP context: set FPCCR.S to match current state */
 139         if (s->v8m_fpccr_s_wrong) {
 140             TCGv_i32 tmp;
 141
 142             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 143             if (s->v8m_secure) {
 144                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 145             } else {
 146                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 147             }
 148             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 149             /* Don't need to do this for any further FP insns in this TB */
 150             s->v8m_fpccr_s_wrong = false;
 151         }
 152
 153         if (s->v7m_new_fp_ctxt_needed) {
 154             /*
 155              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 156              * and the FPSCR.
 157              */
 158             TCGv_i32 control, fpscr;
 159             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 160
 161             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 162             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 163             tcg_temp_free_i32(fpscr);
 164             /*
 165              * We don't need to arrange to end the TB, because the only
 166              * parts of FPSCR which we cache in the TB flags are the VECLEN
 167              * and VECSTRIDE, and those don't exist for M-profile.
 168              */
 169
 170             if (s->v8m_secure) {
 171                 bits |= R_V7M_CONTROL_SFPA_MASK;
 172             }
 173             control = load_cpu_field(v7m.control[M_REG_S]);
 174             tcg_gen_ori_i32(control, control, bits);
 175             store_cpu_field(control, v7m.control[M_REG_S]);
 176             /* Don't need to do this for any further FP insns in this TB */
 177             s->v7m_new_fp_ctxt_needed = false;
 178         }
 179     }
 180
 181     return true;
 182 }
 183
 184 /*
 185  * The most usual kind of VFP access check, for everything except
 186  * FMXR/FMRX to the always-available special registers.
 187  */
 188 static bool vfp_access_check(DisasContext *s)
 189 {
 190     return full_vfp_access_check(s, false);
 191 }
 192
 193 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 194 {
 195     uint32_t rd, rn, rm;
 196     bool dp = a->dp;
 197
 198     if (!dc_isar_feature(aa32_vsel, s)) {
 199         return false;
 200     }
 201
 202     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 203         return false;
 204     }
 205
 206     /* UNDEF accesses to D16-D31 if they don't exist */
 207     if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
 208         ((a->vm | a->vn | a->vd) & 0x10)) {
 209         return false;
 210     }
 211
 212     rd = a->vd;
 213     rn = a->vn;
 214     rm = a->vm;
 215
 216     if (!vfp_access_check(s)) {
 217         return true;
 218     }
 219
 220     if (dp) {
 221         TCGv_i64 frn, frm, dest;
 222         TCGv_i64 tmp, zero, zf, nf, vf;
 223
 224         zero = tcg_const_i64(0);
 225
 226         frn = tcg_temp_new_i64();
 227         frm = tcg_temp_new_i64();
 228         dest = tcg_temp_new_i64();
 229
 230         zf = tcg_temp_new_i64();
 231         nf = tcg_temp_new_i64();
 232         vf = tcg_temp_new_i64();
 233
 234         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 235         tcg_gen_ext_i32_i64(nf, cpu_NF);
 236         tcg_gen_ext_i32_i64(vf, cpu_VF);
 237
 238         neon_load_reg64(frn, rn);
 239         neon_load_reg64(frm, rm);
 240         switch (a->cc) {
 241         case 0: /* eq: Z */
 242             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 243                                 frn, frm);
 244             break;
 245         case 1: /* vs: V */
 246             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 247                                 frn, frm);
 248             break;
 249         case 2: /* ge: N == V -> N ^ V == 0 */
 250             tmp = tcg_temp_new_i64();
 251             tcg_gen_xor_i64(tmp, vf, nf);
 252             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 253                                 frn, frm);
 254             tcg_temp_free_i64(tmp);
 255             break;
 256         case 3: /* gt: !Z && N == V */
 257             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 258                                 frn, frm);
 259             tmp = tcg_temp_new_i64();
 260             tcg_gen_xor_i64(tmp, vf, nf);
 261             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 262                                 dest, frm);
 263             tcg_temp_free_i64(tmp);
 264             break;
 265         }
 266         neon_store_reg64(dest, rd);
 267         tcg_temp_free_i64(frn);
 268         tcg_temp_free_i64(frm);
 269         tcg_temp_free_i64(dest);
 270
 271         tcg_temp_free_i64(zf);
 272         tcg_temp_free_i64(nf);
 273         tcg_temp_free_i64(vf);
 274
 275         tcg_temp_free_i64(zero);
 276     } else {
 277         TCGv_i32 frn, frm, dest;
 278         TCGv_i32 tmp, zero;
 279
 280         zero = tcg_const_i32(0);
 281
 282         frn = tcg_temp_new_i32();
 283         frm = tcg_temp_new_i32();
 284         dest = tcg_temp_new_i32();
 285         neon_load_reg32(frn, rn);
 286         neon_load_reg32(frm, rm);
 287         switch (a->cc) {
 288         case 0: /* eq: Z */
 289             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 290                                 frn, frm);
 291             break;
 292         case 1: /* vs: V */
 293             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 294                                 frn, frm);
 295             break;
 296         case 2: /* ge: N == V -> N ^ V == 0 */
 297             tmp = tcg_temp_new_i32();
 298             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 299             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 300                                 frn, frm);
 301             tcg_temp_free_i32(tmp);
 302             break;
 303         case 3: /* gt: !Z && N == V */
 304             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 305                                 frn, frm);
 306             tmp = tcg_temp_new_i32();
 307             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 308             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 309                                 dest, frm);
 310             tcg_temp_free_i32(tmp);
 311             break;
 312         }
 313         neon_store_reg32(dest, rd);
 314         tcg_temp_free_i32(frn);
 315         tcg_temp_free_i32(frm);
 316         tcg_temp_free_i32(dest);
 317
 318         tcg_temp_free_i32(zero);
 319     }
 320
 321     return true;
 322 }
 323
 324 /*
 325  * Table for converting the most common AArch32 encoding of
 326  * rounding mode to arm_fprounding order (which matches the
 327  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 328  */
 329 static const uint8_t fp_decode_rm[] = {
 330     FPROUNDING_TIEAWAY,
 331     FPROUNDING_TIEEVEN,
 332     FPROUNDING_POSINF,
 333     FPROUNDING_NEGINF,
 334 };
 335
 336 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 337 {
 338     uint32_t rd, rm;
 339     bool dp = a->dp;
 340     TCGv_ptr fpst;
 341     TCGv_i32 tcg_rmode;
 342     int rounding = fp_decode_rm[a->rm];
 343
 344     if (!dc_isar_feature(aa32_vrint, s)) {
 345         return false;
 346     }
 347
 348     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 349         return false;
 350     }
 351
 352     /* UNDEF accesses to D16-D31 if they don't exist */
 353     if (dp && !dc_isar_feature(aa32_simd_r32, s) &&
 354         ((a->vm | a->vd) & 0x10)) {
 355         return false;
 356     }
 357
 358     rd = a->vd;
 359     rm = a->vm;
 360
 361     if (!vfp_access_check(s)) {
 362         return true;
 363     }
 364
 365     fpst = get_fpstatus_ptr(0);
 366
 367     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 368     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 369
 370     if (dp) {
 371         TCGv_i64 tcg_op;
 372         TCGv_i64 tcg_res;
 373         tcg_op = tcg_temp_new_i64();
 374         tcg_res = tcg_temp_new_i64();
 375         neon_load_reg64(tcg_op, rm);
 376         gen_helper_rintd(tcg_res, tcg_op, fpst);
 377         neon_store_reg64(tcg_res, rd);
 378         tcg_temp_free_i64(tcg_op);
 379         tcg_temp_free_i64(tcg_res);
 380     } else {
 381         TCGv_i32 tcg_op;
 382         TCGv_i32 tcg_res;
 383         tcg_op = tcg_temp_new_i32();
 384         tcg_res = tcg_temp_new_i32();
 385         neon_load_reg32(tcg_op, rm);
 386         gen_helper_rints(tcg_res, tcg_op, fpst);
 387         neon_store_reg32(tcg_res, rd);
 388         tcg_temp_free_i32(tcg_op);
 389         tcg_temp_free_i32(tcg_res);
 390     }
 391
 392     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 393     tcg_temp_free_i32(tcg_rmode);
 394
 395     tcg_temp_free_ptr(fpst);
 396     return true;
 397 }
 398
 399 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 400 {
 401     uint32_t rd, rm;
 402     bool dp = a->dp;
 403     TCGv_ptr fpst;
 404     TCGv_i32 tcg_rmode, tcg_shift;
 405     int rounding = fp_decode_rm[a->rm];
 406     bool is_signed = a->op;
 407
 408     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 409         return false;
 410     }
 411
 412     if (dp && !dc_isar_feature(aa32_fpdp_v2, s)) {
 413         return false;
 414     }
 415
 416     /* UNDEF accesses to D16-D31 if they don't exist */
 417     if (dp && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 418         return false;
 419     }
 420
 421     rd = a->vd;
 422     rm = a->vm;
 423
 424     if (!vfp_access_check(s)) {
 425         return true;
 426     }
 427
 428     fpst = get_fpstatus_ptr(0);
 429
 430     tcg_shift = tcg_const_i32(0);
 431
 432     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 433     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 434
 435     if (dp) {
 436         TCGv_i64 tcg_double, tcg_res;
 437         TCGv_i32 tcg_tmp;
 438         tcg_double = tcg_temp_new_i64();
 439         tcg_res = tcg_temp_new_i64();
 440         tcg_tmp = tcg_temp_new_i32();
 441         neon_load_reg64(tcg_double, rm);
 442         if (is_signed) {
 443             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 444         } else {
 445             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 446         }
 447         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 448         neon_store_reg32(tcg_tmp, rd);
 449         tcg_temp_free_i32(tcg_tmp);
 450         tcg_temp_free_i64(tcg_res);
 451         tcg_temp_free_i64(tcg_double);
 452     } else {
 453         TCGv_i32 tcg_single, tcg_res;
 454         tcg_single = tcg_temp_new_i32();
 455         tcg_res = tcg_temp_new_i32();
 456         neon_load_reg32(tcg_single, rm);
 457         if (is_signed) {
 458             gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 459         } else {
 460             gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 461         }
 462         neon_store_reg32(tcg_res, rd);
 463         tcg_temp_free_i32(tcg_res);
 464         tcg_temp_free_i32(tcg_single);
 465     }
 466
 467     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 468     tcg_temp_free_i32(tcg_rmode);
 469
 470     tcg_temp_free_i32(tcg_shift);
 471
 472     tcg_temp_free_ptr(fpst);
 473
 474     return true;
 475 }
 476
 477 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 478 {
 479     /* VMOV scalar to general purpose register */
 480     TCGv_i32 tmp;
 481     int pass;
 482     uint32_t offset;
 483
 484     /* SIZE == 2 is a VFP instruction; otherwise NEON.  */
 485     if (a->size == 2
 486         ? !dc_isar_feature(aa32_fpsp_v2, s)
 487         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 488         return false;
 489     }
 490
 491     /* UNDEF accesses to D16-D31 if they don't exist */
 492     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 493         return false;
 494     }
 495
 496     offset = a->index << a->size;
 497     pass = extract32(offset, 2, 1);
 498     offset = extract32(offset, 0, 2) * 8;
 499
 500     if (!vfp_access_check(s)) {
 501         return true;
 502     }
 503
 504     tmp = neon_load_reg(a->vn, pass);
 505     switch (a->size) {
 506     case 0:
 507         if (offset) {
 508             tcg_gen_shri_i32(tmp, tmp, offset);
 509         }
 510         if (a->u) {
 511             gen_uxtb(tmp);
 512         } else {
 513             gen_sxtb(tmp);
 514         }
 515         break;
 516     case 1:
 517         if (a->u) {
 518             if (offset) {
 519                 tcg_gen_shri_i32(tmp, tmp, 16);
 520             } else {
 521                 gen_uxth(tmp);
 522             }
 523         } else {
 524             if (offset) {
 525                 tcg_gen_sari_i32(tmp, tmp, 16);
 526             } else {
 527                 gen_sxth(tmp);
 528             }
 529         }
 530         break;
 531     case 2:
 532         break;
 533     }
 534     store_reg(s, a->rt, tmp);
 535
 536     return true;
 537 }
 538
 539 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 540 {
 541     /* VMOV general purpose register to scalar */
 542     TCGv_i32 tmp, tmp2;
 543     int pass;
 544     uint32_t offset;
 545
 546     /* SIZE == 2 is a VFP instruction; otherwise NEON.  */
 547     if (a->size == 2
 548         ? !dc_isar_feature(aa32_fpsp_v2, s)
 549         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 550         return false;
 551     }
 552
 553     /* UNDEF accesses to D16-D31 if they don't exist */
 554     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 555         return false;
 556     }
 557
 558     offset = a->index << a->size;
 559     pass = extract32(offset, 2, 1);
 560     offset = extract32(offset, 0, 2) * 8;
 561
 562     if (!vfp_access_check(s)) {
 563         return true;
 564     }
 565
 566     tmp = load_reg(s, a->rt);
 567     switch (a->size) {
 568     case 0:
 569         tmp2 = neon_load_reg(a->vn, pass);
 570         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
 571         tcg_temp_free_i32(tmp2);
 572         break;
 573     case 1:
 574         tmp2 = neon_load_reg(a->vn, pass);
 575         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
 576         tcg_temp_free_i32(tmp2);
 577         break;
 578     case 2:
 579         break;
 580     }
 581     neon_store_reg(a->vn, pass, tmp);
 582
 583     return true;
 584 }
 585
 586 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 587 {
 588     /* VDUP (general purpose register) */
 589     TCGv_i32 tmp;
 590     int size, vec_size;
 591
 592     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 593         return false;
 594     }
 595
 596     /* UNDEF accesses to D16-D31 if they don't exist */
 597     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 598         return false;
 599     }
 600
 601     if (a->b && a->e) {
 602         return false;
 603     }
 604
 605     if (a->q && (a->vn & 1)) {
 606         return false;
 607     }
 608
 609     vec_size = a->q ? 16 : 8;
 610     if (a->b) {
 611         size = 0;
 612     } else if (a->e) {
 613         size = 1;
 614     } else {
 615         size = 2;
 616     }
 617
 618     if (!vfp_access_check(s)) {
 619         return true;
 620     }
 621
 622     tmp = load_reg(s, a->rt);
 623     tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
 624                          vec_size, vec_size, tmp);
 625     tcg_temp_free_i32(tmp);
 626
 627     return true;
 628 }
 629
 630 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 631 {
 632     TCGv_i32 tmp;
 633     bool ignore_vfp_enabled = false;
 634
 635     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 636         return false;
 637     }
 638
 639     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 640         /*
 641          * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
 642          * Accesses to R15 are UNPREDICTABLE; we choose to undef.
 643          * (FPSCR -> r15 is a special case which writes to the PSR flags.)
 644          */
 645         if (a->rt == 15 && (!a->l || a->reg != ARM_VFP_FPSCR)) {
 646             return false;
 647         }
 648     }
 649
 650     switch (a->reg) {
 651     case ARM_VFP_FPSID:
 652         /*
 653          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 654          * all ID registers to privileged access only.
 655          */
 656         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 657             return false;
 658         }
 659         ignore_vfp_enabled = true;
 660         break;
 661     case ARM_VFP_MVFR0:
 662     case ARM_VFP_MVFR1:
 663         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 664             return false;
 665         }
 666         ignore_vfp_enabled = true;
 667         break;
 668     case ARM_VFP_MVFR2:
 669         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 670             return false;
 671         }
 672         ignore_vfp_enabled = true;
 673         break;
 674     case ARM_VFP_FPSCR:
 675         break;
 676     case ARM_VFP_FPEXC:
 677         if (IS_USER(s)) {
 678             return false;
 679         }
 680         ignore_vfp_enabled = true;
 681         break;
 682     case ARM_VFP_FPINST:
 683     case ARM_VFP_FPINST2:
 684         /* Not present in VFPv3 */
 685         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
 686             return false;
 687         }
 688         break;
 689     default:
 690         return false;
 691     }
 692
 693     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
 694         return true;
 695     }
 696
 697     if (a->l) {
 698         /* VMRS, move VFP special register to gp register */
 699         switch (a->reg) {
 700         case ARM_VFP_MVFR0:
 701         case ARM_VFP_MVFR1:
 702         case ARM_VFP_MVFR2:
 703         case ARM_VFP_FPSID:
 704             if (s->current_el == 1) {
 705                 TCGv_i32 tcg_reg, tcg_rt;
 706
 707                 gen_set_condexec(s);
 708                 gen_set_pc_im(s, s->pc_curr);
 709                 tcg_reg = tcg_const_i32(a->reg);
 710                 tcg_rt = tcg_const_i32(a->rt);
 711                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
 712                 tcg_temp_free_i32(tcg_reg);
 713                 tcg_temp_free_i32(tcg_rt);
 714             }
 715             /* fall through */
 716         case ARM_VFP_FPEXC:
 717         case ARM_VFP_FPINST:
 718         case ARM_VFP_FPINST2:
 719             tmp = load_cpu_field(vfp.xregs[a->reg]);
 720             break;
 721         case ARM_VFP_FPSCR:
 722             if (a->rt == 15) {
 723                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 724                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
 725             } else {
 726                 tmp = tcg_temp_new_i32();
 727                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
 728             }
 729             break;
 730         default:
 731             g_assert_not_reached();
 732         }
 733
 734         if (a->rt == 15) {
 735             /* Set the 4 flag bits in the CPSR.  */
 736             gen_set_nzcv(tmp);
 737             tcg_temp_free_i32(tmp);
 738         } else {
 739             store_reg(s, a->rt, tmp);
 740         }
 741     } else {
 742         /* VMSR, move gp register to VFP special register */
 743         switch (a->reg) {
 744         case ARM_VFP_FPSID:
 745         case ARM_VFP_MVFR0:
 746         case ARM_VFP_MVFR1:
 747         case ARM_VFP_MVFR2:
 748             /* Writes are ignored.  */
 749             break;
 750         case ARM_VFP_FPSCR:
 751             tmp = load_reg(s, a->rt);
 752             gen_helper_vfp_set_fpscr(cpu_env, tmp);
 753             tcg_temp_free_i32(tmp);
 754             gen_lookup_tb(s);
 755             break;
 756         case ARM_VFP_FPEXC:
 757             /*
 758              * TODO: VFP subarchitecture support.
 759              * For now, keep the EN bit only
 760              */
 761             tmp = load_reg(s, a->rt);
 762             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 763             store_cpu_field(tmp, vfp.xregs[a->reg]);
 764             gen_lookup_tb(s);
 765             break;
 766         case ARM_VFP_FPINST:
 767         case ARM_VFP_FPINST2:
 768             tmp = load_reg(s, a->rt);
 769             store_cpu_field(tmp, vfp.xregs[a->reg]);
 770             break;
 771         default:
 772             g_assert_not_reached();
 773         }
 774     }
 775
 776     return true;
 777 }
 778
 779 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
 780 {
 781     TCGv_i32 tmp;
 782
 783     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 784         return false;
 785     }
 786
 787     if (!vfp_access_check(s)) {
 788         return true;
 789     }
 790
 791     if (a->l) {
 792         /* VFP to general purpose register */
 793         tmp = tcg_temp_new_i32();
 794         neon_load_reg32(tmp, a->vn);
 795         if (a->rt == 15) {
 796             /* Set the 4 flag bits in the CPSR.  */
 797             gen_set_nzcv(tmp);
 798             tcg_temp_free_i32(tmp);
 799         } else {
 800             store_reg(s, a->rt, tmp);
 801         }
 802     } else {
 803         /* general purpose register to VFP */
 804         tmp = load_reg(s, a->rt);
 805         neon_store_reg32(tmp, a->vn);
 806         tcg_temp_free_i32(tmp);
 807     }
 808
 809     return true;
 810 }
 811
 812 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
 813 {
 814     TCGv_i32 tmp;
 815
 816     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 817         return false;
 818     }
 819
 820     /*
 821      * VMOV between two general-purpose registers and two single precision
 822      * floating point registers
 823      */
 824     if (!vfp_access_check(s)) {
 825         return true;
 826     }
 827
 828     if (a->op) {
 829         /* fpreg to gpreg */
 830         tmp = tcg_temp_new_i32();
 831         neon_load_reg32(tmp, a->vm);
 832         store_reg(s, a->rt, tmp);
 833         tmp = tcg_temp_new_i32();
 834         neon_load_reg32(tmp, a->vm + 1);
 835         store_reg(s, a->rt2, tmp);
 836     } else {
 837         /* gpreg to fpreg */
 838         tmp = load_reg(s, a->rt);
 839         neon_store_reg32(tmp, a->vm);
 840         tcg_temp_free_i32(tmp);
 841         tmp = load_reg(s, a->rt2);
 842         neon_store_reg32(tmp, a->vm + 1);
 843         tcg_temp_free_i32(tmp);
 844     }
 845
 846     return true;
 847 }
 848
 849 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
 850 {
 851     TCGv_i32 tmp;
 852
 853     /*
 854      * VMOV between two general-purpose registers and one double precision
 855      * floating point register.  Note that this does not require support
 856      * for double precision arithmetic.
 857      */
 858     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 859         return false;
 860     }
 861
 862     /* UNDEF accesses to D16-D31 if they don't exist */
 863     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 864         return false;
 865     }
 866
 867     if (!vfp_access_check(s)) {
 868         return true;
 869     }
 870
 871     if (a->op) {
 872         /* fpreg to gpreg */
 873         tmp = tcg_temp_new_i32();
 874         neon_load_reg32(tmp, a->vm * 2);
 875         store_reg(s, a->rt, tmp);
 876         tmp = tcg_temp_new_i32();
 877         neon_load_reg32(tmp, a->vm * 2 + 1);
 878         store_reg(s, a->rt2, tmp);
 879     } else {
 880         /* gpreg to fpreg */
 881         tmp = load_reg(s, a->rt);
 882         neon_store_reg32(tmp, a->vm * 2);
 883         tcg_temp_free_i32(tmp);
 884         tmp = load_reg(s, a->rt2);
 885         neon_store_reg32(tmp, a->vm * 2 + 1);
 886         tcg_temp_free_i32(tmp);
 887     }
 888
 889     return true;
 890 }
 891
 892 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
 893 {
 894     uint32_t offset;
 895     TCGv_i32 addr, tmp;
 896
 897     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 898         return false;
 899     }
 900
 901     if (!vfp_access_check(s)) {
 902         return true;
 903     }
 904
 905     offset = a->imm << 2;
 906     if (!a->u) {
 907         offset = -offset;
 908     }
 909
 910     /* For thumb, use of PC is UNPREDICTABLE.  */
 911     addr = add_reg_for_lit(s, a->rn, offset);
 912     tmp = tcg_temp_new_i32();
 913     if (a->l) {
 914         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
 915         neon_store_reg32(tmp, a->vd);
 916     } else {
 917         neon_load_reg32(tmp, a->vd);
 918         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
 919     }
 920     tcg_temp_free_i32(tmp);
 921     tcg_temp_free_i32(addr);
 922
 923     return true;
 924 }
 925
 926 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
 927 {
 928     uint32_t offset;
 929     TCGv_i32 addr;
 930     TCGv_i64 tmp;
 931
 932     /* Note that this does not require support for double arithmetic.  */
 933     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 934         return false;
 935     }
 936
 937     /* UNDEF accesses to D16-D31 if they don't exist */
 938     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
 939         return false;
 940     }
 941
 942     if (!vfp_access_check(s)) {
 943         return true;
 944     }
 945
 946     offset = a->imm << 2;
 947     if (!a->u) {
 948         offset = -offset;
 949     }
 950
 951     /* For thumb, use of PC is UNPREDICTABLE.  */
 952     addr = add_reg_for_lit(s, a->rn, offset);
 953     tmp = tcg_temp_new_i64();
 954     if (a->l) {
 955         gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
 956         neon_store_reg64(tmp, a->vd);
 957     } else {
 958         neon_load_reg64(tmp, a->vd);
 959         gen_aa32_st64(s, tmp, addr, get_mem_index(s));
 960     }
 961     tcg_temp_free_i64(tmp);
 962     tcg_temp_free_i32(addr);
 963
 964     return true;
 965 }
 966
 967 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
 968 {
 969     uint32_t offset;
 970     TCGv_i32 addr, tmp;
 971     int i, n;
 972
 973     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 974         return false;
 975     }
 976
 977     n = a->imm;
 978
 979     if (n == 0 || (a->vd + n) > 32) {
 980         /*
 981          * UNPREDICTABLE cases for bad immediates: we choose to
 982          * UNDEF to avoid generating huge numbers of TCG ops
 983          */
 984         return false;
 985     }
 986     if (a->rn == 15 && a->w) {
 987         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
 988         return false;
 989     }
 990
 991     if (!vfp_access_check(s)) {
 992         return true;
 993     }
 994
 995     /* For thumb, use of PC is UNPREDICTABLE.  */
 996     addr = add_reg_for_lit(s, a->rn, 0);
 997     if (a->p) {
 998         /* pre-decrement */
 999         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1000     }
1001
1002     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1003         /*
1004          * Here 'addr' is the lowest address we will store to,
1005          * and is either the old SP (if post-increment) or
1006          * the new SP (if pre-decrement). For post-increment
1007          * where the old value is below the limit and the new
1008          * value is above, it is UNKNOWN whether the limit check
1009          * triggers; we choose to trigger.
1010          */
1011         gen_helper_v8m_stackcheck(cpu_env, addr);
1012     }
1013
1014     offset = 4;
1015     tmp = tcg_temp_new_i32();
1016     for (i = 0; i < n; i++) {
1017         if (a->l) {
1018             /* load */
1019             gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1020             neon_store_reg32(tmp, a->vd + i);
1021         } else {
1022             /* store */
1023             neon_load_reg32(tmp, a->vd + i);
1024             gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1025         }
1026         tcg_gen_addi_i32(addr, addr, offset);
1027     }
1028     tcg_temp_free_i32(tmp);
1029     if (a->w) {
1030         /* writeback */
1031         if (a->p) {
1032             offset = -offset * n;
1033             tcg_gen_addi_i32(addr, addr, offset);
1034         }
1035         store_reg(s, a->rn, addr);
1036     } else {
1037         tcg_temp_free_i32(addr);
1038     }
1039
1040     return true;
1041 }
1042
1043 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1044 {
1045     uint32_t offset;
1046     TCGv_i32 addr;
1047     TCGv_i64 tmp;
1048     int i, n;
1049
1050     /* Note that this does not require support for double arithmetic.  */
1051     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1052         return false;
1053     }
1054
1055     n = a->imm >> 1;
1056
1057     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1058         /*
1059          * UNPREDICTABLE cases for bad immediates: we choose to
1060          * UNDEF to avoid generating huge numbers of TCG ops
1061          */
1062         return false;
1063     }
1064     if (a->rn == 15 && a->w) {
1065         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1066         return false;
1067     }
1068
1069     /* UNDEF accesses to D16-D31 if they don't exist */
1070     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1071         return false;
1072     }
1073
1074     if (!vfp_access_check(s)) {
1075         return true;
1076     }
1077
1078     /* For thumb, use of PC is UNPREDICTABLE.  */
1079     addr = add_reg_for_lit(s, a->rn, 0);
1080     if (a->p) {
1081         /* pre-decrement */
1082         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1083     }
1084
1085     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1086         /*
1087          * Here 'addr' is the lowest address we will store to,
1088          * and is either the old SP (if post-increment) or
1089          * the new SP (if pre-decrement). For post-increment
1090          * where the old value is below the limit and the new
1091          * value is above, it is UNKNOWN whether the limit check
1092          * triggers; we choose to trigger.
1093          */
1094         gen_helper_v8m_stackcheck(cpu_env, addr);
1095     }
1096
1097     offset = 8;
1098     tmp = tcg_temp_new_i64();
1099     for (i = 0; i < n; i++) {
1100         if (a->l) {
1101             /* load */
1102             gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1103             neon_store_reg64(tmp, a->vd + i);
1104         } else {
1105             /* store */
1106             neon_load_reg64(tmp, a->vd + i);
1107             gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1108         }
1109         tcg_gen_addi_i32(addr, addr, offset);
1110     }
1111     tcg_temp_free_i64(tmp);
1112     if (a->w) {
1113         /* writeback */
1114         if (a->p) {
1115             offset = -offset * n;
1116         } else if (a->imm & 1) {
1117             offset = 4;
1118         } else {
1119             offset = 0;
1120         }
1121
1122         if (offset != 0) {
1123             tcg_gen_addi_i32(addr, addr, offset);
1124         }
1125         store_reg(s, a->rn, addr);
1126     } else {
1127         tcg_temp_free_i32(addr);
1128     }
1129
1130     return true;
1131 }
1132
1133 /*
1134  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1135  * The callback should emit code to write a value to vd. If
1136  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1137  * will contain the old value of the relevant VFP register;
1138  * otherwise it must be written to only.
1139  */
1140 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1141                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1142 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1143                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1144
1145 /*
1146  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1147  * The callback should emit code to write a value to vd (which
1148  * should be written to only).
1149  */
1150 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1151 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1152
1153 /*
1154  * Return true if the specified S reg is in a scalar bank
1155  * (ie if it is s0..s7)
1156  */
1157 static inline bool vfp_sreg_is_scalar(int reg)
1158 {
1159     return (reg & 0x18) == 0;
1160 }
1161
1162 /*
1163  * Return true if the specified D reg is in a scalar bank
1164  * (ie if it is d0..d3 or d16..d19)
1165  */
1166 static inline bool vfp_dreg_is_scalar(int reg)
1167 {
1168     return (reg & 0xc) == 0;
1169 }
1170
1171 /*
1172  * Advance the S reg number forwards by delta within its bank
1173  * (ie increment the low 3 bits but leave the rest the same)
1174  */
1175 static inline int vfp_advance_sreg(int reg, int delta)
1176 {
1177     return ((reg + delta) & 0x7) | (reg & ~0x7);
1178 }
1179
1180 /*
1181  * Advance the D reg number forwards by delta within its bank
1182  * (ie increment the low 2 bits but leave the rest the same)
1183  */
1184 static inline int vfp_advance_dreg(int reg, int delta)
1185 {
1186     return ((reg + delta) & 0x3) | (reg & ~0x3);
1187 }
1188
1189 /*
1190  * Perform a 3-operand VFP data processing instruction. fn is the
1191  * callback to do the actual operation; this function deals with the
1192  * code to handle looping around for VFP vector processing.
1193  */
1194 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1195                           int vd, int vn, int vm, bool reads_vd)
1196 {
1197     uint32_t delta_m = 0;
1198     uint32_t delta_d = 0;
1199     int veclen = s->vec_len;
1200     TCGv_i32 f0, f1, fd;
1201     TCGv_ptr fpst;
1202
1203     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1204         return false;
1205     }
1206
1207     if (!dc_isar_feature(aa32_fpshvec, s) &&
1208         (veclen != 0 || s->vec_stride != 0)) {
1209         return false;
1210     }
1211
1212     if (!vfp_access_check(s)) {
1213         return true;
1214     }
1215
1216     if (veclen > 0) {
1217         /* Figure out what type of vector operation this is.  */
1218         if (vfp_sreg_is_scalar(vd)) {
1219             /* scalar */
1220             veclen = 0;
1221         } else {
1222             delta_d = s->vec_stride + 1;
1223
1224             if (vfp_sreg_is_scalar(vm)) {
1225                 /* mixed scalar/vector */
1226                 delta_m = 0;
1227             } else {
1228                 /* vector */
1229                 delta_m = delta_d;
1230             }
1231         }
1232     }
1233
1234     f0 = tcg_temp_new_i32();
1235     f1 = tcg_temp_new_i32();
1236     fd = tcg_temp_new_i32();
1237     fpst = get_fpstatus_ptr(0);
1238
1239     neon_load_reg32(f0, vn);
1240     neon_load_reg32(f1, vm);
1241
1242     for (;;) {
1243         if (reads_vd) {
1244             neon_load_reg32(fd, vd);
1245         }
1246         fn(fd, f0, f1, fpst);
1247         neon_store_reg32(fd, vd);
1248
1249         if (veclen == 0) {
1250             break;
1251         }
1252
1253         /* Set up the operands for the next iteration */
1254         veclen--;
1255         vd = vfp_advance_sreg(vd, delta_d);
1256         vn = vfp_advance_sreg(vn, delta_d);
1257         neon_load_reg32(f0, vn);
1258         if (delta_m) {
1259             vm = vfp_advance_sreg(vm, delta_m);
1260             neon_load_reg32(f1, vm);
1261         }
1262     }
1263
1264     tcg_temp_free_i32(f0);
1265     tcg_temp_free_i32(f1);
1266     tcg_temp_free_i32(fd);
1267     tcg_temp_free_ptr(fpst);
1268
1269     return true;
1270 }
1271
1272 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1273                           int vd, int vn, int vm, bool reads_vd)
1274 {
1275     uint32_t delta_m = 0;
1276     uint32_t delta_d = 0;
1277     int veclen = s->vec_len;
1278     TCGv_i64 f0, f1, fd;
1279     TCGv_ptr fpst;
1280
1281     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1282         return false;
1283     }
1284
1285     /* UNDEF accesses to D16-D31 if they don't exist */
1286     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1287         return false;
1288     }
1289
1290     if (!dc_isar_feature(aa32_fpshvec, s) &&
1291         (veclen != 0 || s->vec_stride != 0)) {
1292         return false;
1293     }
1294
1295     if (!vfp_access_check(s)) {
1296         return true;
1297     }
1298
1299     if (veclen > 0) {
1300         /* Figure out what type of vector operation this is.  */
1301         if (vfp_dreg_is_scalar(vd)) {
1302             /* scalar */
1303             veclen = 0;
1304         } else {
1305             delta_d = (s->vec_stride >> 1) + 1;
1306
1307             if (vfp_dreg_is_scalar(vm)) {
1308                 /* mixed scalar/vector */
1309                 delta_m = 0;
1310             } else {
1311                 /* vector */
1312                 delta_m = delta_d;
1313             }
1314         }
1315     }
1316
1317     f0 = tcg_temp_new_i64();
1318     f1 = tcg_temp_new_i64();
1319     fd = tcg_temp_new_i64();
1320     fpst = get_fpstatus_ptr(0);
1321
1322     neon_load_reg64(f0, vn);
1323     neon_load_reg64(f1, vm);
1324
1325     for (;;) {
1326         if (reads_vd) {
1327             neon_load_reg64(fd, vd);
1328         }
1329         fn(fd, f0, f1, fpst);
1330         neon_store_reg64(fd, vd);
1331
1332         if (veclen == 0) {
1333             break;
1334         }
1335         /* Set up the operands for the next iteration */
1336         veclen--;
1337         vd = vfp_advance_dreg(vd, delta_d);
1338         vn = vfp_advance_dreg(vn, delta_d);
1339         neon_load_reg64(f0, vn);
1340         if (delta_m) {
1341             vm = vfp_advance_dreg(vm, delta_m);
1342             neon_load_reg64(f1, vm);
1343         }
1344     }
1345
1346     tcg_temp_free_i64(f0);
1347     tcg_temp_free_i64(f1);
1348     tcg_temp_free_i64(fd);
1349     tcg_temp_free_ptr(fpst);
1350
1351     return true;
1352 }
1353
1354 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1355 {
1356     uint32_t delta_m = 0;
1357     uint32_t delta_d = 0;
1358     int veclen = s->vec_len;
1359     TCGv_i32 f0, fd;
1360
1361     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1362         return false;
1363     }
1364
1365     if (!dc_isar_feature(aa32_fpshvec, s) &&
1366         (veclen != 0 || s->vec_stride != 0)) {
1367         return false;
1368     }
1369
1370     if (!vfp_access_check(s)) {
1371         return true;
1372     }
1373
1374     if (veclen > 0) {
1375         /* Figure out what type of vector operation this is.  */
1376         if (vfp_sreg_is_scalar(vd)) {
1377             /* scalar */
1378             veclen = 0;
1379         } else {
1380             delta_d = s->vec_stride + 1;
1381
1382             if (vfp_sreg_is_scalar(vm)) {
1383                 /* mixed scalar/vector */
1384                 delta_m = 0;
1385             } else {
1386                 /* vector */
1387                 delta_m = delta_d;
1388             }
1389         }
1390     }
1391
1392     f0 = tcg_temp_new_i32();
1393     fd = tcg_temp_new_i32();
1394
1395     neon_load_reg32(f0, vm);
1396
1397     for (;;) {
1398         fn(fd, f0);
1399         neon_store_reg32(fd, vd);
1400
1401         if (veclen == 0) {
1402             break;
1403         }
1404
1405         if (delta_m == 0) {
1406             /* single source one-many */
1407             while (veclen--) {
1408                 vd = vfp_advance_sreg(vd, delta_d);
1409                 neon_store_reg32(fd, vd);
1410             }
1411             break;
1412         }
1413
1414         /* Set up the operands for the next iteration */
1415         veclen--;
1416         vd = vfp_advance_sreg(vd, delta_d);
1417         vm = vfp_advance_sreg(vm, delta_m);
1418         neon_load_reg32(f0, vm);
1419     }
1420
1421     tcg_temp_free_i32(f0);
1422     tcg_temp_free_i32(fd);
1423
1424     return true;
1425 }
1426
1427 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1428 {
1429     uint32_t delta_m = 0;
1430     uint32_t delta_d = 0;
1431     int veclen = s->vec_len;
1432     TCGv_i64 f0, fd;
1433
1434     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1435         return false;
1436     }
1437
1438     /* UNDEF accesses to D16-D31 if they don't exist */
1439     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1440         return false;
1441     }
1442
1443     if (!dc_isar_feature(aa32_fpshvec, s) &&
1444         (veclen != 0 || s->vec_stride != 0)) {
1445         return false;
1446     }
1447
1448     if (!vfp_access_check(s)) {
1449         return true;
1450     }
1451
1452     if (veclen > 0) {
1453         /* Figure out what type of vector operation this is.  */
1454         if (vfp_dreg_is_scalar(vd)) {
1455             /* scalar */
1456             veclen = 0;
1457         } else {
1458             delta_d = (s->vec_stride >> 1) + 1;
1459
1460             if (vfp_dreg_is_scalar(vm)) {
1461                 /* mixed scalar/vector */
1462                 delta_m = 0;
1463             } else {
1464                 /* vector */
1465                 delta_m = delta_d;
1466             }
1467         }
1468     }
1469
1470     f0 = tcg_temp_new_i64();
1471     fd = tcg_temp_new_i64();
1472
1473     neon_load_reg64(f0, vm);
1474
1475     for (;;) {
1476         fn(fd, f0);
1477         neon_store_reg64(fd, vd);
1478
1479         if (veclen == 0) {
1480             break;
1481         }
1482
1483         if (delta_m == 0) {
1484             /* single source one-many */
1485             while (veclen--) {
1486                 vd = vfp_advance_dreg(vd, delta_d);
1487                 neon_store_reg64(fd, vd);
1488             }
1489             break;
1490         }
1491
1492         /* Set up the operands for the next iteration */
1493         veclen--;
1494         vd = vfp_advance_dreg(vd, delta_d);
1495         vd = vfp_advance_dreg(vm, delta_m);
1496         neon_load_reg64(f0, vm);
1497     }
1498
1499     tcg_temp_free_i64(f0);
1500     tcg_temp_free_i64(fd);
1501
1502     return true;
1503 }
1504
1505 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1506 {
1507     /* Note that order of inputs to the add matters for NaNs */
1508     TCGv_i32 tmp = tcg_temp_new_i32();
1509
1510     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1511     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1512     tcg_temp_free_i32(tmp);
1513 }
1514
1515 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1516 {
1517     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1518 }
1519
1520 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1521 {
1522     /* Note that order of inputs to the add matters for NaNs */
1523     TCGv_i64 tmp = tcg_temp_new_i64();
1524
1525     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1526     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1527     tcg_temp_free_i64(tmp);
1528 }
1529
1530 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1531 {
1532     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1533 }
1534
1535 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1536 {
1537     /*
1538      * VMLS: vd = vd + -(vn * vm)
1539      * Note that order of inputs to the add matters for NaNs.
1540      */
1541     TCGv_i32 tmp = tcg_temp_new_i32();
1542
1543     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1544     gen_helper_vfp_negs(tmp, tmp);
1545     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1546     tcg_temp_free_i32(tmp);
1547 }
1548
1549 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1550 {
1551     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1552 }
1553
1554 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1555 {
1556     /*
1557      * VMLS: vd = vd + -(vn * vm)
1558      * Note that order of inputs to the add matters for NaNs.
1559      */
1560     TCGv_i64 tmp = tcg_temp_new_i64();
1561
1562     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1563     gen_helper_vfp_negd(tmp, tmp);
1564     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1565     tcg_temp_free_i64(tmp);
1566 }
1567
1568 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1569 {
1570     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1571 }
1572
1573 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1574 {
1575     /*
1576      * VNMLS: -fd + (fn * fm)
1577      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1578      * plausible looking simplifications because this will give wrong results
1579      * for NaNs.
1580      */
1581     TCGv_i32 tmp = tcg_temp_new_i32();
1582
1583     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1584     gen_helper_vfp_negs(vd, vd);
1585     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1586     tcg_temp_free_i32(tmp);
1587 }
1588
1589 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1590 {
1591     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1592 }
1593
1594 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1595 {
1596     /*
1597      * VNMLS: -fd + (fn * fm)
1598      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1599      * plausible looking simplifications because this will give wrong results
1600      * for NaNs.
1601      */
1602     TCGv_i64 tmp = tcg_temp_new_i64();
1603
1604     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1605     gen_helper_vfp_negd(vd, vd);
1606     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1607     tcg_temp_free_i64(tmp);
1608 }
1609
1610 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1611 {
1612     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1613 }
1614
1615 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1616 {
1617     /* VNMLA: -fd + -(fn * fm) */
1618     TCGv_i32 tmp = tcg_temp_new_i32();
1619
1620     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1621     gen_helper_vfp_negs(tmp, tmp);
1622     gen_helper_vfp_negs(vd, vd);
1623     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1624     tcg_temp_free_i32(tmp);
1625 }
1626
1627 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1628 {
1629     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1630 }
1631
1632 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1633 {
1634     /* VNMLA: -fd + (fn * fm) */
1635     TCGv_i64 tmp = tcg_temp_new_i64();
1636
1637     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1638     gen_helper_vfp_negd(tmp, tmp);
1639     gen_helper_vfp_negd(vd, vd);
1640     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1641     tcg_temp_free_i64(tmp);
1642 }
1643
1644 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1645 {
1646     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1647 }
1648
1649 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1650 {
1651     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1652 }
1653
1654 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
1655 {
1656     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1657 }
1658
1659 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1660 {
1661     /* VNMUL: -(fn * fm) */
1662     gen_helper_vfp_muls(vd, vn, vm, fpst);
1663     gen_helper_vfp_negs(vd, vd);
1664 }
1665
1666 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1667 {
1668     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1669 }
1670
1671 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1672 {
1673     /* VNMUL: -(fn * fm) */
1674     gen_helper_vfp_muld(vd, vn, vm, fpst);
1675     gen_helper_vfp_negd(vd, vd);
1676 }
1677
1678 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
1679 {
1680     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1681 }
1682
1683 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1684 {
1685     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1686 }
1687
1688 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
1689 {
1690     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1691 }
1692
1693 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1694 {
1695     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1696 }
1697
1698 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
1699 {
1700     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
1701 }
1702
1703 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
1704 {
1705     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
1706 }
1707
1708 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
1709 {
1710     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
1711 }
1712
1713 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
1714 {
1715     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1716         return false;
1717     }
1718     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
1719                          a->vd, a->vn, a->vm, false);
1720 }
1721
1722 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
1723 {
1724     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1725         return false;
1726     }
1727     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
1728                          a->vd, a->vn, a->vm, false);
1729 }
1730
1731 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
1732 {
1733     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1734         return false;
1735     }
1736     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
1737                          a->vd, a->vn, a->vm, false);
1738 }
1739
1740 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
1741 {
1742     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
1743         return false;
1744     }
1745     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
1746                          a->vd, a->vn, a->vm, false);
1747 }
1748
1749 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
1750 {
1751     /*
1752      * VFNMA : fd = muladd(-fd,  fn, fm)
1753      * VFNMS : fd = muladd(-fd, -fn, fm)
1754      * VFMA  : fd = muladd( fd,  fn, fm)
1755      * VFMS  : fd = muladd( fd, -fn, fm)
1756      *
1757      * These are fused multiply-add, and must be done as one floating
1758      * point operation with no rounding between the multiplication and
1759      * addition steps.  NB that doing the negations here as separate
1760      * steps is correct : an input NaN should come out with its sign
1761      * bit flipped if it is a negated-input.
1762      */
1763     TCGv_ptr fpst;
1764     TCGv_i32 vn, vm, vd;
1765
1766     /*
1767      * Present in VFPv4 only.
1768      * Note that we can't rely on the SIMDFMAC check alone, because
1769      * in a Neon-no-VFP core that ID register field will be non-zero.
1770      */
1771     if (!dc_isar_feature(aa32_simdfmac, s) ||
1772         !dc_isar_feature(aa32_fpsp_v2, s)) {
1773         return false;
1774     }
1775     /*
1776      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
1777      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
1778      */
1779     if (s->vec_len != 0 || s->vec_stride != 0) {
1780         return false;
1781     }
1782
1783     if (!vfp_access_check(s)) {
1784         return true;
1785     }
1786
1787     vn = tcg_temp_new_i32();
1788     vm = tcg_temp_new_i32();
1789     vd = tcg_temp_new_i32();
1790
1791     neon_load_reg32(vn, a->vn);
1792     neon_load_reg32(vm, a->vm);
1793     if (neg_n) {
1794         /* VFNMS, VFMS */
1795         gen_helper_vfp_negs(vn, vn);
1796     }
1797     neon_load_reg32(vd, a->vd);
1798     if (neg_d) {
1799         /* VFNMA, VFNMS */
1800         gen_helper_vfp_negs(vd, vd);
1801     }
1802     fpst = get_fpstatus_ptr(0);
1803     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
1804     neon_store_reg32(vd, a->vd);
1805
1806     tcg_temp_free_ptr(fpst);
1807     tcg_temp_free_i32(vn);
1808     tcg_temp_free_i32(vm);
1809     tcg_temp_free_i32(vd);
1810
1811     return true;
1812 }
1813
1814 static bool trans_VFMA_sp(DisasContext *s, arg_VFMA_sp *a)
1815 {
1816     return do_vfm_sp(s, a, false, false);
1817 }
1818
1819 static bool trans_VFMS_sp(DisasContext *s, arg_VFMS_sp *a)
1820 {
1821     return do_vfm_sp(s, a, true, false);
1822 }
1823
1824 static bool trans_VFNMA_sp(DisasContext *s, arg_VFNMA_sp *a)
1825 {
1826     return do_vfm_sp(s, a, false, true);
1827 }
1828
1829 static bool trans_VFNMS_sp(DisasContext *s, arg_VFNMS_sp *a)
1830 {
1831     return do_vfm_sp(s, a, true, true);
1832 }
1833
1834 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
1835 {
1836     /*
1837      * VFNMA : fd = muladd(-fd,  fn, fm)
1838      * VFNMS : fd = muladd(-fd, -fn, fm)
1839      * VFMA  : fd = muladd( fd,  fn, fm)
1840      * VFMS  : fd = muladd( fd, -fn, fm)
1841      *
1842      * These are fused multiply-add, and must be done as one floating
1843      * point operation with no rounding between the multiplication and
1844      * addition steps.  NB that doing the negations here as separate
1845      * steps is correct : an input NaN should come out with its sign
1846      * bit flipped if it is a negated-input.
1847      */
1848     TCGv_ptr fpst;
1849     TCGv_i64 vn, vm, vd;
1850
1851     /*
1852      * Present in VFPv4 only.
1853      * Note that we can't rely on the SIMDFMAC check alone, because
1854      * in a Neon-no-VFP core that ID register field will be non-zero.
1855      */
1856     if (!dc_isar_feature(aa32_simdfmac, s) ||
1857         !dc_isar_feature(aa32_fpdp_v2, s)) {
1858         return false;
1859     }
1860     /*
1861      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
1862      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
1863      */
1864     if (s->vec_len != 0 || s->vec_stride != 0) {
1865         return false;
1866     }
1867
1868     /* UNDEF accesses to D16-D31 if they don't exist. */
1869     if (!dc_isar_feature(aa32_simd_r32, s) &&
1870         ((a->vd | a->vn | a->vm) & 0x10)) {
1871         return false;
1872     }
1873
1874     if (!vfp_access_check(s)) {
1875         return true;
1876     }
1877
1878     vn = tcg_temp_new_i64();
1879     vm = tcg_temp_new_i64();
1880     vd = tcg_temp_new_i64();
1881
1882     neon_load_reg64(vn, a->vn);
1883     neon_load_reg64(vm, a->vm);
1884     if (neg_n) {
1885         /* VFNMS, VFMS */
1886         gen_helper_vfp_negd(vn, vn);
1887     }
1888     neon_load_reg64(vd, a->vd);
1889     if (neg_d) {
1890         /* VFNMA, VFNMS */
1891         gen_helper_vfp_negd(vd, vd);
1892     }
1893     fpst = get_fpstatus_ptr(0);
1894     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
1895     neon_store_reg64(vd, a->vd);
1896
1897     tcg_temp_free_ptr(fpst);
1898     tcg_temp_free_i64(vn);
1899     tcg_temp_free_i64(vm);
1900     tcg_temp_free_i64(vd);
1901
1902     return true;
1903 }
1904
1905 static bool trans_VFMA_dp(DisasContext *s, arg_VFMA_dp *a)
1906 {
1907     return do_vfm_dp(s, a, false, false);
1908 }
1909
1910 static bool trans_VFMS_dp(DisasContext *s, arg_VFMS_dp *a)
1911 {
1912     return do_vfm_dp(s, a, true, false);
1913 }
1914
1915 static bool trans_VFNMA_dp(DisasContext *s, arg_VFNMA_dp *a)
1916 {
1917     return do_vfm_dp(s, a, false, true);
1918 }
1919
1920 static bool trans_VFNMS_dp(DisasContext *s, arg_VFNMS_dp *a)
1921 {
1922     return do_vfm_dp(s, a, true, true);
1923 }
1924
1925 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
1926 {
1927     uint32_t delta_d = 0;
1928     int veclen = s->vec_len;
1929     TCGv_i32 fd;
1930     uint32_t vd;
1931
1932     vd = a->vd;
1933
1934     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
1935         return false;
1936     }
1937
1938     if (!dc_isar_feature(aa32_fpshvec, s) &&
1939         (veclen != 0 || s->vec_stride != 0)) {
1940         return false;
1941     }
1942
1943     if (!vfp_access_check(s)) {
1944         return true;
1945     }
1946
1947     if (veclen > 0) {
1948         /* Figure out what type of vector operation this is.  */
1949         if (vfp_sreg_is_scalar(vd)) {
1950             /* scalar */
1951             veclen = 0;
1952         } else {
1953             delta_d = s->vec_stride + 1;
1954         }
1955     }
1956
1957     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
1958
1959     for (;;) {
1960         neon_store_reg32(fd, vd);
1961
1962         if (veclen == 0) {
1963             break;
1964         }
1965
1966         /* Set up the operands for the next iteration */
1967         veclen--;
1968         vd = vfp_advance_sreg(vd, delta_d);
1969     }
1970
1971     tcg_temp_free_i32(fd);
1972     return true;
1973 }
1974
1975 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
1976 {
1977     uint32_t delta_d = 0;
1978     int veclen = s->vec_len;
1979     TCGv_i64 fd;
1980     uint32_t vd;
1981
1982     vd = a->vd;
1983
1984     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
1985         return false;
1986     }
1987
1988     /* UNDEF accesses to D16-D31 if they don't exist. */
1989     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
1990         return false;
1991     }
1992
1993     if (!dc_isar_feature(aa32_fpshvec, s) &&
1994         (veclen != 0 || s->vec_stride != 0)) {
1995         return false;
1996     }
1997
1998     if (!vfp_access_check(s)) {
1999         return true;
2000     }
2001
2002     if (veclen > 0) {
2003         /* Figure out what type of vector operation this is.  */
2004         if (vfp_dreg_is_scalar(vd)) {
2005             /* scalar */
2006             veclen = 0;
2007         } else {
2008             delta_d = (s->vec_stride >> 1) + 1;
2009         }
2010     }
2011
2012     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2013
2014     for (;;) {
2015         neon_store_reg64(fd, vd);
2016
2017         if (veclen == 0) {
2018             break;
2019         }
2020
2021         /* Set up the operands for the next iteration */
2022         veclen--;
2023         vd = vfp_advance_dreg(vd, delta_d);
2024     }
2025
2026     tcg_temp_free_i64(fd);
2027     return true;
2028 }
2029
2030 static bool trans_VMOV_reg_sp(DisasContext *s, arg_VMOV_reg_sp *a)
2031 {
2032     return do_vfp_2op_sp(s, tcg_gen_mov_i32, a->vd, a->vm);
2033 }
2034
2035 static bool trans_VMOV_reg_dp(DisasContext *s, arg_VMOV_reg_dp *a)
2036 {
2037     return do_vfp_2op_dp(s, tcg_gen_mov_i64, a->vd, a->vm);
2038 }
2039
2040 static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a)
2041 {
2042     return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm);
2043 }
2044
2045 static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a)
2046 {
2047     return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm);
2048 }
2049
2050 static bool trans_VNEG_sp(DisasContext *s, arg_VNEG_sp *a)
2051 {
2052     return do_vfp_2op_sp(s, gen_helper_vfp_negs, a->vd, a->vm);
2053 }
2054
2055 static bool trans_VNEG_dp(DisasContext *s, arg_VNEG_dp *a)
2056 {
2057     return do_vfp_2op_dp(s, gen_helper_vfp_negd, a->vd, a->vm);
2058 }
2059
2060 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2061 {
2062     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2063 }
2064
2065 static bool trans_VSQRT_sp(DisasContext *s, arg_VSQRT_sp *a)
2066 {
2067     return do_vfp_2op_sp(s, gen_VSQRT_sp, a->vd, a->vm);
2068 }
2069
2070 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2071 {
2072     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2073 }
2074
2075 static bool trans_VSQRT_dp(DisasContext *s, arg_VSQRT_dp *a)
2076 {
2077     return do_vfp_2op_dp(s, gen_VSQRT_dp, a->vd, a->vm);
2078 }
2079
2080 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2081 {
2082     TCGv_i32 vd, vm;
2083
2084     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2085         return false;
2086     }
2087
2088     /* Vm/M bits must be zero for the Z variant */
2089     if (a->z && a->vm != 0) {
2090         return false;
2091     }
2092
2093     if (!vfp_access_check(s)) {
2094         return true;
2095     }
2096
2097     vd = tcg_temp_new_i32();
2098     vm = tcg_temp_new_i32();
2099
2100     neon_load_reg32(vd, a->vd);
2101     if (a->z) {
2102         tcg_gen_movi_i32(vm, 0);
2103     } else {
2104         neon_load_reg32(vm, a->vm);
2105     }
2106
2107     if (a->e) {
2108         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2109     } else {
2110         gen_helper_vfp_cmps(vd, vm, cpu_env);
2111     }
2112
2113     tcg_temp_free_i32(vd);
2114     tcg_temp_free_i32(vm);
2115
2116     return true;
2117 }
2118
2119 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2120 {
2121     TCGv_i64 vd, vm;
2122
2123     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2124         return false;
2125     }
2126
2127     /* Vm/M bits must be zero for the Z variant */
2128     if (a->z && a->vm != 0) {
2129         return false;
2130     }
2131
2132     /* UNDEF accesses to D16-D31 if they don't exist. */
2133     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2134         return false;
2135     }
2136
2137     if (!vfp_access_check(s)) {
2138         return true;
2139     }
2140
2141     vd = tcg_temp_new_i64();
2142     vm = tcg_temp_new_i64();
2143
2144     neon_load_reg64(vd, a->vd);
2145     if (a->z) {
2146         tcg_gen_movi_i64(vm, 0);
2147     } else {
2148         neon_load_reg64(vm, a->vm);
2149     }
2150
2151     if (a->e) {
2152         gen_helper_vfp_cmped(vd, vm, cpu_env);
2153     } else {
2154         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2155     }
2156
2157     tcg_temp_free_i64(vd);
2158     tcg_temp_free_i64(vm);
2159
2160     return true;
2161 }
2162
2163 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2164 {
2165     TCGv_ptr fpst;
2166     TCGv_i32 ahp_mode;
2167     TCGv_i32 tmp;
2168
2169     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2170         return false;
2171     }
2172
2173     if (!vfp_access_check(s)) {
2174         return true;
2175     }
2176
2177     fpst = get_fpstatus_ptr(false);
2178     ahp_mode = get_ahp_flag();
2179     tmp = tcg_temp_new_i32();
2180     /* The T bit tells us if we want the low or high 16 bits of Vm */
2181     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2182     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2183     neon_store_reg32(tmp, a->vd);
2184     tcg_temp_free_i32(ahp_mode);
2185     tcg_temp_free_ptr(fpst);
2186     tcg_temp_free_i32(tmp);
2187     return true;
2188 }
2189
2190 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2191 {
2192     TCGv_ptr fpst;
2193     TCGv_i32 ahp_mode;
2194     TCGv_i32 tmp;
2195     TCGv_i64 vd;
2196
2197     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2198         return false;
2199     }
2200
2201     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2202         return false;
2203     }
2204
2205     /* UNDEF accesses to D16-D31 if they don't exist. */
2206     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2207         return false;
2208     }
2209
2210     if (!vfp_access_check(s)) {
2211         return true;
2212     }
2213
2214     fpst = get_fpstatus_ptr(false);
2215     ahp_mode = get_ahp_flag();
2216     tmp = tcg_temp_new_i32();
2217     /* The T bit tells us if we want the low or high 16 bits of Vm */
2218     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2219     vd = tcg_temp_new_i64();
2220     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2221     neon_store_reg64(vd, a->vd);
2222     tcg_temp_free_i32(ahp_mode);
2223     tcg_temp_free_ptr(fpst);
2224     tcg_temp_free_i32(tmp);
2225     tcg_temp_free_i64(vd);
2226     return true;
2227 }
2228
2229 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2230 {
2231     TCGv_ptr fpst;
2232     TCGv_i32 ahp_mode;
2233     TCGv_i32 tmp;
2234
2235     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2236         return false;
2237     }
2238
2239     if (!vfp_access_check(s)) {
2240         return true;
2241     }
2242
2243     fpst = get_fpstatus_ptr(false);
2244     ahp_mode = get_ahp_flag();
2245     tmp = tcg_temp_new_i32();
2246
2247     neon_load_reg32(tmp, a->vm);
2248     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2249     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2250     tcg_temp_free_i32(ahp_mode);
2251     tcg_temp_free_ptr(fpst);
2252     tcg_temp_free_i32(tmp);
2253     return true;
2254 }
2255
2256 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2257 {
2258     TCGv_ptr fpst;
2259     TCGv_i32 ahp_mode;
2260     TCGv_i32 tmp;
2261     TCGv_i64 vm;
2262
2263     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2264         return false;
2265     }
2266
2267     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2268         return false;
2269     }
2270
2271     /* UNDEF accesses to D16-D31 if they don't exist. */
2272     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2273         return false;
2274     }
2275
2276     if (!vfp_access_check(s)) {
2277         return true;
2278     }
2279
2280     fpst = get_fpstatus_ptr(false);
2281     ahp_mode = get_ahp_flag();
2282     tmp = tcg_temp_new_i32();
2283     vm = tcg_temp_new_i64();
2284
2285     neon_load_reg64(vm, a->vm);
2286     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2287     tcg_temp_free_i64(vm);
2288     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2289     tcg_temp_free_i32(ahp_mode);
2290     tcg_temp_free_ptr(fpst);
2291     tcg_temp_free_i32(tmp);
2292     return true;
2293 }
2294
2295 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2296 {
2297     TCGv_ptr fpst;
2298     TCGv_i32 tmp;
2299
2300     if (!dc_isar_feature(aa32_vrint, s)) {
2301         return false;
2302     }
2303
2304     if (!vfp_access_check(s)) {
2305         return true;
2306     }
2307
2308     tmp = tcg_temp_new_i32();
2309     neon_load_reg32(tmp, a->vm);
2310     fpst = get_fpstatus_ptr(false);
2311     gen_helper_rints(tmp, tmp, fpst);
2312     neon_store_reg32(tmp, a->vd);
2313     tcg_temp_free_ptr(fpst);
2314     tcg_temp_free_i32(tmp);
2315     return true;
2316 }
2317
2318 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2319 {
2320     TCGv_ptr fpst;
2321     TCGv_i64 tmp;
2322
2323     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2324         return false;
2325     }
2326
2327     if (!dc_isar_feature(aa32_vrint, s)) {
2328         return false;
2329     }
2330
2331     /* UNDEF accesses to D16-D31 if they don't exist. */
2332     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2333         return false;
2334     }
2335
2336     if (!vfp_access_check(s)) {
2337         return true;
2338     }
2339
2340     tmp = tcg_temp_new_i64();
2341     neon_load_reg64(tmp, a->vm);
2342     fpst = get_fpstatus_ptr(false);
2343     gen_helper_rintd(tmp, tmp, fpst);
2344     neon_store_reg64(tmp, a->vd);
2345     tcg_temp_free_ptr(fpst);
2346     tcg_temp_free_i64(tmp);
2347     return true;
2348 }
2349
2350 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2351 {
2352     TCGv_ptr fpst;
2353     TCGv_i32 tmp;
2354     TCGv_i32 tcg_rmode;
2355
2356     if (!dc_isar_feature(aa32_vrint, s)) {
2357         return false;
2358     }
2359
2360     if (!vfp_access_check(s)) {
2361         return true;
2362     }
2363
2364     tmp = tcg_temp_new_i32();
2365     neon_load_reg32(tmp, a->vm);
2366     fpst = get_fpstatus_ptr(false);
2367     tcg_rmode = tcg_const_i32(float_round_to_zero);
2368     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2369     gen_helper_rints(tmp, tmp, fpst);
2370     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2371     neon_store_reg32(tmp, a->vd);
2372     tcg_temp_free_ptr(fpst);
2373     tcg_temp_free_i32(tcg_rmode);
2374     tcg_temp_free_i32(tmp);
2375     return true;
2376 }
2377
2378 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2379 {
2380     TCGv_ptr fpst;
2381     TCGv_i64 tmp;
2382     TCGv_i32 tcg_rmode;
2383
2384     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2385         return false;
2386     }
2387
2388     if (!dc_isar_feature(aa32_vrint, s)) {
2389         return false;
2390     }
2391
2392     /* UNDEF accesses to D16-D31 if they don't exist. */
2393     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2394         return false;
2395     }
2396
2397     if (!vfp_access_check(s)) {
2398         return true;
2399     }
2400
2401     tmp = tcg_temp_new_i64();
2402     neon_load_reg64(tmp, a->vm);
2403     fpst = get_fpstatus_ptr(false);
2404     tcg_rmode = tcg_const_i32(float_round_to_zero);
2405     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2406     gen_helper_rintd(tmp, tmp, fpst);
2407     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2408     neon_store_reg64(tmp, a->vd);
2409     tcg_temp_free_ptr(fpst);
2410     tcg_temp_free_i64(tmp);
2411     tcg_temp_free_i32(tcg_rmode);
2412     return true;
2413 }
2414
2415 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
2416 {
2417     TCGv_ptr fpst;
2418     TCGv_i32 tmp;
2419
2420     if (!dc_isar_feature(aa32_vrint, s)) {
2421         return false;
2422     }
2423
2424     if (!vfp_access_check(s)) {
2425         return true;
2426     }
2427
2428     tmp = tcg_temp_new_i32();
2429     neon_load_reg32(tmp, a->vm);
2430     fpst = get_fpstatus_ptr(false);
2431     gen_helper_rints_exact(tmp, tmp, fpst);
2432     neon_store_reg32(tmp, a->vd);
2433     tcg_temp_free_ptr(fpst);
2434     tcg_temp_free_i32(tmp);
2435     return true;
2436 }
2437
2438 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
2439 {
2440     TCGv_ptr fpst;
2441     TCGv_i64 tmp;
2442
2443     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2444         return false;
2445     }
2446
2447     if (!dc_isar_feature(aa32_vrint, s)) {
2448         return false;
2449     }
2450
2451     /* UNDEF accesses to D16-D31 if they don't exist. */
2452     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2453         return false;
2454     }
2455
2456     if (!vfp_access_check(s)) {
2457         return true;
2458     }
2459
2460     tmp = tcg_temp_new_i64();
2461     neon_load_reg64(tmp, a->vm);
2462     fpst = get_fpstatus_ptr(false);
2463     gen_helper_rintd_exact(tmp, tmp, fpst);
2464     neon_store_reg64(tmp, a->vd);
2465     tcg_temp_free_ptr(fpst);
2466     tcg_temp_free_i64(tmp);
2467     return true;
2468 }
2469
2470 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
2471 {
2472     TCGv_i64 vd;
2473     TCGv_i32 vm;
2474
2475     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2476         return false;
2477     }
2478
2479     /* UNDEF accesses to D16-D31 if they don't exist. */
2480     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2481         return false;
2482     }
2483
2484     if (!vfp_access_check(s)) {
2485         return true;
2486     }
2487
2488     vm = tcg_temp_new_i32();
2489     vd = tcg_temp_new_i64();
2490     neon_load_reg32(vm, a->vm);
2491     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
2492     neon_store_reg64(vd, a->vd);
2493     tcg_temp_free_i32(vm);
2494     tcg_temp_free_i64(vd);
2495     return true;
2496 }
2497
2498 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
2499 {
2500     TCGv_i64 vm;
2501     TCGv_i32 vd;
2502
2503     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2504         return false;
2505     }
2506
2507     /* UNDEF accesses to D16-D31 if they don't exist. */
2508     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2509         return false;
2510     }
2511
2512     if (!vfp_access_check(s)) {
2513         return true;
2514     }
2515
2516     vd = tcg_temp_new_i32();
2517     vm = tcg_temp_new_i64();
2518     neon_load_reg64(vm, a->vm);
2519     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
2520     neon_store_reg32(vd, a->vd);
2521     tcg_temp_free_i32(vd);
2522     tcg_temp_free_i64(vm);
2523     return true;
2524 }
2525
2526 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
2527 {
2528     TCGv_i32 vm;
2529     TCGv_ptr fpst;
2530
2531     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2532         return false;
2533     }
2534
2535     if (!vfp_access_check(s)) {
2536         return true;
2537     }
2538
2539     vm = tcg_temp_new_i32();
2540     neon_load_reg32(vm, a->vm);
2541     fpst = get_fpstatus_ptr(false);
2542     if (a->s) {
2543         /* i32 -> f32 */
2544         gen_helper_vfp_sitos(vm, vm, fpst);
2545     } else {
2546         /* u32 -> f32 */
2547         gen_helper_vfp_uitos(vm, vm, fpst);
2548     }
2549     neon_store_reg32(vm, a->vd);
2550     tcg_temp_free_i32(vm);
2551     tcg_temp_free_ptr(fpst);
2552     return true;
2553 }
2554
2555 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
2556 {
2557     TCGv_i32 vm;
2558     TCGv_i64 vd;
2559     TCGv_ptr fpst;
2560
2561     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2562         return false;
2563     }
2564
2565     /* UNDEF accesses to D16-D31 if they don't exist. */
2566     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2567         return false;
2568     }
2569
2570     if (!vfp_access_check(s)) {
2571         return true;
2572     }
2573
2574     vm = tcg_temp_new_i32();
2575     vd = tcg_temp_new_i64();
2576     neon_load_reg32(vm, a->vm);
2577     fpst = get_fpstatus_ptr(false);
2578     if (a->s) {
2579         /* i32 -> f64 */
2580         gen_helper_vfp_sitod(vd, vm, fpst);
2581     } else {
2582         /* u32 -> f64 */
2583         gen_helper_vfp_uitod(vd, vm, fpst);
2584     }
2585     neon_store_reg64(vd, a->vd);
2586     tcg_temp_free_i32(vm);
2587     tcg_temp_free_i64(vd);
2588     tcg_temp_free_ptr(fpst);
2589     return true;
2590 }
2591
2592 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
2593 {
2594     TCGv_i32 vd;
2595     TCGv_i64 vm;
2596
2597     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2598         return false;
2599     }
2600
2601     if (!dc_isar_feature(aa32_jscvt, s)) {
2602         return false;
2603     }
2604
2605     /* UNDEF accesses to D16-D31 if they don't exist. */
2606     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2607         return false;
2608     }
2609
2610     if (!vfp_access_check(s)) {
2611         return true;
2612     }
2613
2614     vm = tcg_temp_new_i64();
2615     vd = tcg_temp_new_i32();
2616     neon_load_reg64(vm, a->vm);
2617     gen_helper_vjcvt(vd, vm, cpu_env);
2618     neon_store_reg32(vd, a->vd);
2619     tcg_temp_free_i64(vm);
2620     tcg_temp_free_i32(vd);
2621     return true;
2622 }
2623
2624 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
2625 {
2626     TCGv_i32 vd, shift;
2627     TCGv_ptr fpst;
2628     int frac_bits;
2629
2630     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2631         return false;
2632     }
2633
2634     if (!vfp_access_check(s)) {
2635         return true;
2636     }
2637
2638     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
2639
2640     vd = tcg_temp_new_i32();
2641     neon_load_reg32(vd, a->vd);
2642
2643     fpst = get_fpstatus_ptr(false);
2644     shift = tcg_const_i32(frac_bits);
2645
2646     /* Switch on op:U:sx bits */
2647     switch (a->opc) {
2648     case 0:
2649         gen_helper_vfp_shtos(vd, vd, shift, fpst);
2650         break;
2651     case 1:
2652         gen_helper_vfp_sltos(vd, vd, shift, fpst);
2653         break;
2654     case 2:
2655         gen_helper_vfp_uhtos(vd, vd, shift, fpst);
2656         break;
2657     case 3:
2658         gen_helper_vfp_ultos(vd, vd, shift, fpst);
2659         break;
2660     case 4:
2661         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
2662         break;
2663     case 5:
2664         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
2665         break;
2666     case 6:
2667         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
2668         break;
2669     case 7:
2670         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
2671         break;
2672     default:
2673         g_assert_not_reached();
2674     }
2675
2676     neon_store_reg32(vd, a->vd);
2677     tcg_temp_free_i32(vd);
2678     tcg_temp_free_i32(shift);
2679     tcg_temp_free_ptr(fpst);
2680     return true;
2681 }
2682
2683 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
2684 {
2685     TCGv_i64 vd;
2686     TCGv_i32 shift;
2687     TCGv_ptr fpst;
2688     int frac_bits;
2689
2690     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2691         return false;
2692     }
2693
2694     /* UNDEF accesses to D16-D31 if they don't exist. */
2695     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2696         return false;
2697     }
2698
2699     if (!vfp_access_check(s)) {
2700         return true;
2701     }
2702
2703     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
2704
2705     vd = tcg_temp_new_i64();
2706     neon_load_reg64(vd, a->vd);
2707
2708     fpst = get_fpstatus_ptr(false);
2709     shift = tcg_const_i32(frac_bits);
2710
2711     /* Switch on op:U:sx bits */
2712     switch (a->opc) {
2713     case 0:
2714         gen_helper_vfp_shtod(vd, vd, shift, fpst);
2715         break;
2716     case 1:
2717         gen_helper_vfp_sltod(vd, vd, shift, fpst);
2718         break;
2719     case 2:
2720         gen_helper_vfp_uhtod(vd, vd, shift, fpst);
2721         break;
2722     case 3:
2723         gen_helper_vfp_ultod(vd, vd, shift, fpst);
2724         break;
2725     case 4:
2726         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
2727         break;
2728     case 5:
2729         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
2730         break;
2731     case 6:
2732         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
2733         break;
2734     case 7:
2735         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
2736         break;
2737     default:
2738         g_assert_not_reached();
2739     }
2740
2741     neon_store_reg64(vd, a->vd);
2742     tcg_temp_free_i64(vd);
2743     tcg_temp_free_i32(shift);
2744     tcg_temp_free_ptr(fpst);
2745     return true;
2746 }
2747
2748 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
2749 {
2750     TCGv_i32 vm;
2751     TCGv_ptr fpst;
2752
2753     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2754         return false;
2755     }
2756
2757     if (!vfp_access_check(s)) {
2758         return true;
2759     }
2760
2761     fpst = get_fpstatus_ptr(false);
2762     vm = tcg_temp_new_i32();
2763     neon_load_reg32(vm, a->vm);
2764
2765     if (a->s) {
2766         if (a->rz) {
2767             gen_helper_vfp_tosizs(vm, vm, fpst);
2768         } else {
2769             gen_helper_vfp_tosis(vm, vm, fpst);
2770         }
2771     } else {
2772         if (a->rz) {
2773             gen_helper_vfp_touizs(vm, vm, fpst);
2774         } else {
2775             gen_helper_vfp_touis(vm, vm, fpst);
2776         }
2777     }
2778     neon_store_reg32(vm, a->vd);
2779     tcg_temp_free_i32(vm);
2780     tcg_temp_free_ptr(fpst);
2781     return true;
2782 }
2783
2784 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
2785 {
2786     TCGv_i32 vd;
2787     TCGv_i64 vm;
2788     TCGv_ptr fpst;
2789
2790     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2791         return false;
2792     }
2793
2794     /* UNDEF accesses to D16-D31 if they don't exist. */
2795     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2796         return false;
2797     }
2798
2799     if (!vfp_access_check(s)) {
2800         return true;
2801     }
2802
2803     fpst = get_fpstatus_ptr(false);
2804     vm = tcg_temp_new_i64();
2805     vd = tcg_temp_new_i32();
2806     neon_load_reg64(vm, a->vm);
2807
2808     if (a->s) {
2809         if (a->rz) {
2810             gen_helper_vfp_tosizd(vd, vm, fpst);
2811         } else {
2812             gen_helper_vfp_tosid(vd, vm, fpst);
2813         }
2814     } else {
2815         if (a->rz) {
2816             gen_helper_vfp_touizd(vd, vm, fpst);
2817         } else {
2818             gen_helper_vfp_touid(vd, vm, fpst);
2819         }
2820     }
2821     neon_store_reg32(vd, a->vd);
2822     tcg_temp_free_i32(vd);
2823     tcg_temp_free_i64(vm);
2824     tcg_temp_free_ptr(fpst);
2825     return true;
2826 }
2827
2828 /*
2829  * Decode VLLDM and VLSTM are nonstandard because:
2830  *  * if there is no FPU then these insns must NOP in
2831  *    Secure state and UNDEF in Nonsecure state
2832  *  * if there is an FPU then these insns do not have
2833  *    the usual behaviour that vfp_access_check() provides of
2834  *    being controlled by CPACR/NSACR enable bits or the
2835  *    lazy-stacking logic.
2836  */
2837 static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
2838 {
2839     TCGv_i32 fptr;
2840
2841     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
2842         !arm_dc_feature(s, ARM_FEATURE_V8)) {
2843         return false;
2844     }
2845     /* If not secure, UNDEF. */
2846     if (!s->v8m_secure) {
2847         return false;
2848     }
2849     /* If no fpu, NOP. */
2850     if (!dc_isar_feature(aa32_vfp, s)) {
2851         return true;
2852     }
2853
2854     fptr = load_reg(s, a->rn);
2855     if (a->l) {
2856         gen_helper_v7m_vlldm(cpu_env, fptr);
2857     } else {
2858         gen_helper_v7m_vlstm(cpu_env, fptr);
2859     }
2860     tcg_temp_free_i32(fptr);
2861
2862     /* End the TB, because we have updated FP control bits */
2863     s->base.is_jmp = DISAS_UPDATE_EXIT;
2864     return true;
2865 }