target/arm/translate-vfp.c.inc

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 /* Include the generated VFP decoder */
  30 #include "decode-vfp.c.inc"
  31 #include "decode-vfp-uncond.c.inc"
  32
  33 /*
  34  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  35  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  36  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  37  */
  38 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  39 {
  40     uint64_t imm;
  41
  42     switch (size) {
  43     case MO_64:
  44         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  45             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  46             extract32(imm8, 0, 6);
  47         imm <<= 48;
  48         break;
  49     case MO_32:
  50         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  51             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  52             (extract32(imm8, 0, 6) << 3);
  53         imm <<= 16;
  54         break;
  55     case MO_16:
  56         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  57             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  58             (extract32(imm8, 0, 6) << 6);
  59         break;
  60     default:
  61         g_assert_not_reached();
  62     }
  63     return imm;
  64 }
  65
  66 /*
  67  * Return the offset of a 16-bit half of the specified VFP single-precision
  68  * register. If top is true, returns the top 16 bits; otherwise the bottom
  69  * 16 bits.
  70  */
  71 static inline long vfp_f16_offset(unsigned reg, bool top)
  72 {
  73     long offs = vfp_reg_offset(false, reg);
  74 #ifdef HOST_WORDS_BIGENDIAN
  75     if (!top) {
  76         offs += 2;
  77     }
  78 #else
  79     if (top) {
  80         offs += 2;
  81     }
  82 #endif
  83     return offs;
  84 }
  85
  86 /*
  87  * Check that VFP access is enabled. If it is, do the necessary
  88  * M-profile lazy-FP handling and then return true.
  89  * If not, emit code to generate an appropriate exception and
  90  * return false.
  91  * The ignore_vfp_enabled argument specifies that we should ignore
  92  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
  93  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
  94  */
  95 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
  96 {
  97     if (s->fp_excp_el) {
  98         /* M-profile handled this earlier, in disas_m_nocp() */
  99         assert (!arm_dc_feature(s, ARM_FEATURE_M));
 100         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 101                            syn_fp_access_trap(1, 0xe, false),
 102                            s->fp_excp_el);
 103         return false;
 104     }
 105
 106     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 107         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 108         unallocated_encoding(s);
 109         return false;
 110     }
 111
 112     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 113         /* Handle M-profile lazy FP state mechanics */
 114
 115         /* Trigger lazy-state preservation if necessary */
 116         if (s->v7m_lspact) {
 117             /*
 118              * Lazy state saving affects external memory and also the NVIC,
 119              * so we must mark it as an IO operation for icount (and cause
 120              * this to be the last insn in the TB).
 121              */
 122             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 123                 s->base.is_jmp = DISAS_UPDATE_EXIT;
 124                 gen_io_start();
 125             }
 126             gen_helper_v7m_preserve_fp_state(cpu_env);
 127             /*
 128              * If the preserve_fp_state helper doesn't throw an exception
 129              * then it will clear LSPACT; we don't need to repeat this for
 130              * any further FP insns in this TB.
 131              */
 132             s->v7m_lspact = false;
 133         }
 134
 135         /* Update ownership of FP context: set FPCCR.S to match current state */
 136         if (s->v8m_fpccr_s_wrong) {
 137             TCGv_i32 tmp;
 138
 139             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 140             if (s->v8m_secure) {
 141                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 142             } else {
 143                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 144             }
 145             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 146             /* Don't need to do this for any further FP insns in this TB */
 147             s->v8m_fpccr_s_wrong = false;
 148         }
 149
 150         if (s->v7m_new_fp_ctxt_needed) {
 151             /*
 152              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 153              * and the FPSCR.
 154              */
 155             TCGv_i32 control, fpscr;
 156             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 157
 158             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 159             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 160             tcg_temp_free_i32(fpscr);
 161             /*
 162              * We don't need to arrange to end the TB, because the only
 163              * parts of FPSCR which we cache in the TB flags are the VECLEN
 164              * and VECSTRIDE, and those don't exist for M-profile.
 165              */
 166
 167             if (s->v8m_secure) {
 168                 bits |= R_V7M_CONTROL_SFPA_MASK;
 169             }
 170             control = load_cpu_field(v7m.control[M_REG_S]);
 171             tcg_gen_ori_i32(control, control, bits);
 172             store_cpu_field(control, v7m.control[M_REG_S]);
 173             /* Don't need to do this for any further FP insns in this TB */
 174             s->v7m_new_fp_ctxt_needed = false;
 175         }
 176     }
 177
 178     return true;
 179 }
 180
 181 /*
 182  * The most usual kind of VFP access check, for everything except
 183  * FMXR/FMRX to the always-available special registers.
 184  */
 185 static bool vfp_access_check(DisasContext *s)
 186 {
 187     return full_vfp_access_check(s, false);
 188 }
 189
 190 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 191 {
 192     uint32_t rd, rn, rm;
 193     int sz = a->sz;
 194
 195     if (!dc_isar_feature(aa32_vsel, s)) {
 196         return false;
 197     }
 198
 199     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 200         return false;
 201     }
 202
 203     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 204         return false;
 205     }
 206
 207     /* UNDEF accesses to D16-D31 if they don't exist */
 208     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 209         ((a->vm | a->vn | a->vd) & 0x10)) {
 210         return false;
 211     }
 212
 213     rd = a->vd;
 214     rn = a->vn;
 215     rm = a->vm;
 216
 217     if (!vfp_access_check(s)) {
 218         return true;
 219     }
 220
 221     if (sz == 3) {
 222         TCGv_i64 frn, frm, dest;
 223         TCGv_i64 tmp, zero, zf, nf, vf;
 224
 225         zero = tcg_const_i64(0);
 226
 227         frn = tcg_temp_new_i64();
 228         frm = tcg_temp_new_i64();
 229         dest = tcg_temp_new_i64();
 230
 231         zf = tcg_temp_new_i64();
 232         nf = tcg_temp_new_i64();
 233         vf = tcg_temp_new_i64();
 234
 235         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 236         tcg_gen_ext_i32_i64(nf, cpu_NF);
 237         tcg_gen_ext_i32_i64(vf, cpu_VF);
 238
 239         neon_load_reg64(frn, rn);
 240         neon_load_reg64(frm, rm);
 241         switch (a->cc) {
 242         case 0: /* eq: Z */
 243             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 244                                 frn, frm);
 245             break;
 246         case 1: /* vs: V */
 247             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 248                                 frn, frm);
 249             break;
 250         case 2: /* ge: N == V -> N ^ V == 0 */
 251             tmp = tcg_temp_new_i64();
 252             tcg_gen_xor_i64(tmp, vf, nf);
 253             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 254                                 frn, frm);
 255             tcg_temp_free_i64(tmp);
 256             break;
 257         case 3: /* gt: !Z && N == V */
 258             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 259                                 frn, frm);
 260             tmp = tcg_temp_new_i64();
 261             tcg_gen_xor_i64(tmp, vf, nf);
 262             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 263                                 dest, frm);
 264             tcg_temp_free_i64(tmp);
 265             break;
 266         }
 267         neon_store_reg64(dest, rd);
 268         tcg_temp_free_i64(frn);
 269         tcg_temp_free_i64(frm);
 270         tcg_temp_free_i64(dest);
 271
 272         tcg_temp_free_i64(zf);
 273         tcg_temp_free_i64(nf);
 274         tcg_temp_free_i64(vf);
 275
 276         tcg_temp_free_i64(zero);
 277     } else {
 278         TCGv_i32 frn, frm, dest;
 279         TCGv_i32 tmp, zero;
 280
 281         zero = tcg_const_i32(0);
 282
 283         frn = tcg_temp_new_i32();
 284         frm = tcg_temp_new_i32();
 285         dest = tcg_temp_new_i32();
 286         neon_load_reg32(frn, rn);
 287         neon_load_reg32(frm, rm);
 288         switch (a->cc) {
 289         case 0: /* eq: Z */
 290             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 291                                 frn, frm);
 292             break;
 293         case 1: /* vs: V */
 294             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 295                                 frn, frm);
 296             break;
 297         case 2: /* ge: N == V -> N ^ V == 0 */
 298             tmp = tcg_temp_new_i32();
 299             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 300             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 301                                 frn, frm);
 302             tcg_temp_free_i32(tmp);
 303             break;
 304         case 3: /* gt: !Z && N == V */
 305             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 306                                 frn, frm);
 307             tmp = tcg_temp_new_i32();
 308             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 309             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 310                                 dest, frm);
 311             tcg_temp_free_i32(tmp);
 312             break;
 313         }
 314         /* For fp16 the top half is always zeroes */
 315         if (sz == 1) {
 316             tcg_gen_andi_i32(dest, dest, 0xffff);
 317         }
 318         neon_store_reg32(dest, rd);
 319         tcg_temp_free_i32(frn);
 320         tcg_temp_free_i32(frm);
 321         tcg_temp_free_i32(dest);
 322
 323         tcg_temp_free_i32(zero);
 324     }
 325
 326     return true;
 327 }
 328
 329 /*
 330  * Table for converting the most common AArch32 encoding of
 331  * rounding mode to arm_fprounding order (which matches the
 332  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 333  */
 334 static const uint8_t fp_decode_rm[] = {
 335     FPROUNDING_TIEAWAY,
 336     FPROUNDING_TIEEVEN,
 337     FPROUNDING_POSINF,
 338     FPROUNDING_NEGINF,
 339 };
 340
 341 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 342 {
 343     uint32_t rd, rm;
 344     int sz = a->sz;
 345     TCGv_ptr fpst;
 346     TCGv_i32 tcg_rmode;
 347     int rounding = fp_decode_rm[a->rm];
 348
 349     if (!dc_isar_feature(aa32_vrint, s)) {
 350         return false;
 351     }
 352
 353     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 354         return false;
 355     }
 356
 357     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 358         return false;
 359     }
 360
 361     /* UNDEF accesses to D16-D31 if they don't exist */
 362     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 363         ((a->vm | a->vd) & 0x10)) {
 364         return false;
 365     }
 366
 367     rd = a->vd;
 368     rm = a->vm;
 369
 370     if (!vfp_access_check(s)) {
 371         return true;
 372     }
 373
 374     if (sz == 1) {
 375         fpst = fpstatus_ptr(FPST_FPCR_F16);
 376     } else {
 377         fpst = fpstatus_ptr(FPST_FPCR);
 378     }
 379
 380     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 381     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 382
 383     if (sz == 3) {
 384         TCGv_i64 tcg_op;
 385         TCGv_i64 tcg_res;
 386         tcg_op = tcg_temp_new_i64();
 387         tcg_res = tcg_temp_new_i64();
 388         neon_load_reg64(tcg_op, rm);
 389         gen_helper_rintd(tcg_res, tcg_op, fpst);
 390         neon_store_reg64(tcg_res, rd);
 391         tcg_temp_free_i64(tcg_op);
 392         tcg_temp_free_i64(tcg_res);
 393     } else {
 394         TCGv_i32 tcg_op;
 395         TCGv_i32 tcg_res;
 396         tcg_op = tcg_temp_new_i32();
 397         tcg_res = tcg_temp_new_i32();
 398         neon_load_reg32(tcg_op, rm);
 399         if (sz == 1) {
 400             gen_helper_rinth(tcg_res, tcg_op, fpst);
 401         } else {
 402             gen_helper_rints(tcg_res, tcg_op, fpst);
 403         }
 404         neon_store_reg32(tcg_res, rd);
 405         tcg_temp_free_i32(tcg_op);
 406         tcg_temp_free_i32(tcg_res);
 407     }
 408
 409     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 410     tcg_temp_free_i32(tcg_rmode);
 411
 412     tcg_temp_free_ptr(fpst);
 413     return true;
 414 }
 415
 416 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 417 {
 418     uint32_t rd, rm;
 419     int sz = a->sz;
 420     TCGv_ptr fpst;
 421     TCGv_i32 tcg_rmode, tcg_shift;
 422     int rounding = fp_decode_rm[a->rm];
 423     bool is_signed = a->op;
 424
 425     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 426         return false;
 427     }
 428
 429     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 430         return false;
 431     }
 432
 433     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 434         return false;
 435     }
 436
 437     /* UNDEF accesses to D16-D31 if they don't exist */
 438     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 439         return false;
 440     }
 441
 442     rd = a->vd;
 443     rm = a->vm;
 444
 445     if (!vfp_access_check(s)) {
 446         return true;
 447     }
 448
 449     if (sz == 1) {
 450         fpst = fpstatus_ptr(FPST_FPCR_F16);
 451     } else {
 452         fpst = fpstatus_ptr(FPST_FPCR);
 453     }
 454
 455     tcg_shift = tcg_const_i32(0);
 456
 457     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 458     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 459
 460     if (sz == 3) {
 461         TCGv_i64 tcg_double, tcg_res;
 462         TCGv_i32 tcg_tmp;
 463         tcg_double = tcg_temp_new_i64();
 464         tcg_res = tcg_temp_new_i64();
 465         tcg_tmp = tcg_temp_new_i32();
 466         neon_load_reg64(tcg_double, rm);
 467         if (is_signed) {
 468             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 469         } else {
 470             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 471         }
 472         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 473         neon_store_reg32(tcg_tmp, rd);
 474         tcg_temp_free_i32(tcg_tmp);
 475         tcg_temp_free_i64(tcg_res);
 476         tcg_temp_free_i64(tcg_double);
 477     } else {
 478         TCGv_i32 tcg_single, tcg_res;
 479         tcg_single = tcg_temp_new_i32();
 480         tcg_res = tcg_temp_new_i32();
 481         neon_load_reg32(tcg_single, rm);
 482         if (sz == 1) {
 483             if (is_signed) {
 484                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 485             } else {
 486                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 487             }
 488         } else {
 489             if (is_signed) {
 490                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 491             } else {
 492                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 493             }
 494         }
 495         neon_store_reg32(tcg_res, rd);
 496         tcg_temp_free_i32(tcg_res);
 497         tcg_temp_free_i32(tcg_single);
 498     }
 499
 500     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 501     tcg_temp_free_i32(tcg_rmode);
 502
 503     tcg_temp_free_i32(tcg_shift);
 504
 505     tcg_temp_free_ptr(fpst);
 506
 507     return true;
 508 }
 509
 510 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 511 {
 512     /* VMOV scalar to general purpose register */
 513     TCGv_i32 tmp;
 514     int pass;
 515     uint32_t offset;
 516
 517     /* SIZE == 2 is a VFP instruction; otherwise NEON.  */
 518     if (a->size == 2
 519         ? !dc_isar_feature(aa32_fpsp_v2, s)
 520         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 521         return false;
 522     }
 523
 524     /* UNDEF accesses to D16-D31 if they don't exist */
 525     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 526         return false;
 527     }
 528
 529     offset = a->index << a->size;
 530     pass = extract32(offset, 2, 1);
 531     offset = extract32(offset, 0, 2) * 8;
 532
 533     if (!vfp_access_check(s)) {
 534         return true;
 535     }
 536
 537     tmp = neon_load_reg(a->vn, pass);
 538     switch (a->size) {
 539     case 0:
 540         if (offset) {
 541             tcg_gen_shri_i32(tmp, tmp, offset);
 542         }
 543         if (a->u) {
 544             gen_uxtb(tmp);
 545         } else {
 546             gen_sxtb(tmp);
 547         }
 548         break;
 549     case 1:
 550         if (a->u) {
 551             if (offset) {
 552                 tcg_gen_shri_i32(tmp, tmp, 16);
 553             } else {
 554                 gen_uxth(tmp);
 555             }
 556         } else {
 557             if (offset) {
 558                 tcg_gen_sari_i32(tmp, tmp, 16);
 559             } else {
 560                 gen_sxth(tmp);
 561             }
 562         }
 563         break;
 564     case 2:
 565         break;
 566     }
 567     store_reg(s, a->rt, tmp);
 568
 569     return true;
 570 }
 571
 572 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 573 {
 574     /* VMOV general purpose register to scalar */
 575     TCGv_i32 tmp, tmp2;
 576     int pass;
 577     uint32_t offset;
 578
 579     /* SIZE == 2 is a VFP instruction; otherwise NEON.  */
 580     if (a->size == 2
 581         ? !dc_isar_feature(aa32_fpsp_v2, s)
 582         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 583         return false;
 584     }
 585
 586     /* UNDEF accesses to D16-D31 if they don't exist */
 587     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 588         return false;
 589     }
 590
 591     offset = a->index << a->size;
 592     pass = extract32(offset, 2, 1);
 593     offset = extract32(offset, 0, 2) * 8;
 594
 595     if (!vfp_access_check(s)) {
 596         return true;
 597     }
 598
 599     tmp = load_reg(s, a->rt);
 600     switch (a->size) {
 601     case 0:
 602         tmp2 = neon_load_reg(a->vn, pass);
 603         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
 604         tcg_temp_free_i32(tmp2);
 605         break;
 606     case 1:
 607         tmp2 = neon_load_reg(a->vn, pass);
 608         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
 609         tcg_temp_free_i32(tmp2);
 610         break;
 611     case 2:
 612         break;
 613     }
 614     neon_store_reg(a->vn, pass, tmp);
 615
 616     return true;
 617 }
 618
 619 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 620 {
 621     /* VDUP (general purpose register) */
 622     TCGv_i32 tmp;
 623     int size, vec_size;
 624
 625     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 626         return false;
 627     }
 628
 629     /* UNDEF accesses to D16-D31 if they don't exist */
 630     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 631         return false;
 632     }
 633
 634     if (a->b && a->e) {
 635         return false;
 636     }
 637
 638     if (a->q && (a->vn & 1)) {
 639         return false;
 640     }
 641
 642     vec_size = a->q ? 16 : 8;
 643     if (a->b) {
 644         size = 0;
 645     } else if (a->e) {
 646         size = 1;
 647     } else {
 648         size = 2;
 649     }
 650
 651     if (!vfp_access_check(s)) {
 652         return true;
 653     }
 654
 655     tmp = load_reg(s, a->rt);
 656     tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
 657                          vec_size, vec_size, tmp);
 658     tcg_temp_free_i32(tmp);
 659
 660     return true;
 661 }
 662
 663 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 664 {
 665     TCGv_i32 tmp;
 666     bool ignore_vfp_enabled = false;
 667
 668     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 669         return false;
 670     }
 671
 672     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 673         /*
 674          * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
 675          * Accesses to R15 are UNPREDICTABLE; we choose to undef.
 676          * (FPSCR -> r15 is a special case which writes to the PSR flags.)
 677          */
 678         if (a->rt == 15 && (!a->l || a->reg != ARM_VFP_FPSCR)) {
 679             return false;
 680         }
 681     }
 682
 683     switch (a->reg) {
 684     case ARM_VFP_FPSID:
 685         /*
 686          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 687          * all ID registers to privileged access only.
 688          */
 689         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
 690             return false;
 691         }
 692         ignore_vfp_enabled = true;
 693         break;
 694     case ARM_VFP_MVFR0:
 695     case ARM_VFP_MVFR1:
 696         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 697             return false;
 698         }
 699         ignore_vfp_enabled = true;
 700         break;
 701     case ARM_VFP_MVFR2:
 702         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 703             return false;
 704         }
 705         ignore_vfp_enabled = true;
 706         break;
 707     case ARM_VFP_FPSCR:
 708         break;
 709     case ARM_VFP_FPEXC:
 710         if (IS_USER(s)) {
 711             return false;
 712         }
 713         ignore_vfp_enabled = true;
 714         break;
 715     case ARM_VFP_FPINST:
 716     case ARM_VFP_FPINST2:
 717         /* Not present in VFPv3 */
 718         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
 719             return false;
 720         }
 721         break;
 722     default:
 723         return false;
 724     }
 725
 726     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
 727         return true;
 728     }
 729
 730     if (a->l) {
 731         /* VMRS, move VFP special register to gp register */
 732         switch (a->reg) {
 733         case ARM_VFP_MVFR0:
 734         case ARM_VFP_MVFR1:
 735         case ARM_VFP_MVFR2:
 736         case ARM_VFP_FPSID:
 737             if (s->current_el == 1) {
 738                 TCGv_i32 tcg_reg, tcg_rt;
 739
 740                 gen_set_condexec(s);
 741                 gen_set_pc_im(s, s->pc_curr);
 742                 tcg_reg = tcg_const_i32(a->reg);
 743                 tcg_rt = tcg_const_i32(a->rt);
 744                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
 745                 tcg_temp_free_i32(tcg_reg);
 746                 tcg_temp_free_i32(tcg_rt);
 747             }
 748             /* fall through */
 749         case ARM_VFP_FPEXC:
 750         case ARM_VFP_FPINST:
 751         case ARM_VFP_FPINST2:
 752             tmp = load_cpu_field(vfp.xregs[a->reg]);
 753             break;
 754         case ARM_VFP_FPSCR:
 755             if (a->rt == 15) {
 756                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 757                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
 758             } else {
 759                 tmp = tcg_temp_new_i32();
 760                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
 761             }
 762             break;
 763         default:
 764             g_assert_not_reached();
 765         }
 766
 767         if (a->rt == 15) {
 768             /* Set the 4 flag bits in the CPSR.  */
 769             gen_set_nzcv(tmp);
 770             tcg_temp_free_i32(tmp);
 771         } else {
 772             store_reg(s, a->rt, tmp);
 773         }
 774     } else {
 775         /* VMSR, move gp register to VFP special register */
 776         switch (a->reg) {
 777         case ARM_VFP_FPSID:
 778         case ARM_VFP_MVFR0:
 779         case ARM_VFP_MVFR1:
 780         case ARM_VFP_MVFR2:
 781             /* Writes are ignored.  */
 782             break;
 783         case ARM_VFP_FPSCR:
 784             tmp = load_reg(s, a->rt);
 785             gen_helper_vfp_set_fpscr(cpu_env, tmp);
 786             tcg_temp_free_i32(tmp);
 787             gen_lookup_tb(s);
 788             break;
 789         case ARM_VFP_FPEXC:
 790             /*
 791              * TODO: VFP subarchitecture support.
 792              * For now, keep the EN bit only
 793              */
 794             tmp = load_reg(s, a->rt);
 795             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 796             store_cpu_field(tmp, vfp.xregs[a->reg]);
 797             gen_lookup_tb(s);
 798             break;
 799         case ARM_VFP_FPINST:
 800         case ARM_VFP_FPINST2:
 801             tmp = load_reg(s, a->rt);
 802             store_cpu_field(tmp, vfp.xregs[a->reg]);
 803             break;
 804         default:
 805             g_assert_not_reached();
 806         }
 807     }
 808
 809     return true;
 810 }
 811
 812 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
 813 {
 814     TCGv_i32 tmp;
 815
 816     if (!dc_isar_feature(aa32_fp16_arith, s)) {
 817         return false;
 818     }
 819
 820     if (a->rt == 15) {
 821         /* UNPREDICTABLE; we choose to UNDEF */
 822         return false;
 823     }
 824
 825     if (!vfp_access_check(s)) {
 826         return true;
 827     }
 828
 829     if (a->l) {
 830         /* VFP to general purpose register */
 831         tmp = tcg_temp_new_i32();
 832         neon_load_reg32(tmp, a->vn);
 833         tcg_gen_andi_i32(tmp, tmp, 0xffff);
 834         store_reg(s, a->rt, tmp);
 835     } else {
 836         /* general purpose register to VFP */
 837         tmp = load_reg(s, a->rt);
 838         tcg_gen_andi_i32(tmp, tmp, 0xffff);
 839         neon_store_reg32(tmp, a->vn);
 840         tcg_temp_free_i32(tmp);
 841     }
 842
 843     return true;
 844 }
 845
 846 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
 847 {
 848     TCGv_i32 tmp;
 849
 850     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 851         return false;
 852     }
 853
 854     if (!vfp_access_check(s)) {
 855         return true;
 856     }
 857
 858     if (a->l) {
 859         /* VFP to general purpose register */
 860         tmp = tcg_temp_new_i32();
 861         neon_load_reg32(tmp, a->vn);
 862         if (a->rt == 15) {
 863             /* Set the 4 flag bits in the CPSR.  */
 864             gen_set_nzcv(tmp);
 865             tcg_temp_free_i32(tmp);
 866         } else {
 867             store_reg(s, a->rt, tmp);
 868         }
 869     } else {
 870         /* general purpose register to VFP */
 871         tmp = load_reg(s, a->rt);
 872         neon_store_reg32(tmp, a->vn);
 873         tcg_temp_free_i32(tmp);
 874     }
 875
 876     return true;
 877 }
 878
 879 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
 880 {
 881     TCGv_i32 tmp;
 882
 883     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 884         return false;
 885     }
 886
 887     /*
 888      * VMOV between two general-purpose registers and two single precision
 889      * floating point registers
 890      */
 891     if (!vfp_access_check(s)) {
 892         return true;
 893     }
 894
 895     if (a->op) {
 896         /* fpreg to gpreg */
 897         tmp = tcg_temp_new_i32();
 898         neon_load_reg32(tmp, a->vm);
 899         store_reg(s, a->rt, tmp);
 900         tmp = tcg_temp_new_i32();
 901         neon_load_reg32(tmp, a->vm + 1);
 902         store_reg(s, a->rt2, tmp);
 903     } else {
 904         /* gpreg to fpreg */
 905         tmp = load_reg(s, a->rt);
 906         neon_store_reg32(tmp, a->vm);
 907         tcg_temp_free_i32(tmp);
 908         tmp = load_reg(s, a->rt2);
 909         neon_store_reg32(tmp, a->vm + 1);
 910         tcg_temp_free_i32(tmp);
 911     }
 912
 913     return true;
 914 }
 915
 916 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
 917 {
 918     TCGv_i32 tmp;
 919
 920     /*
 921      * VMOV between two general-purpose registers and one double precision
 922      * floating point register.  Note that this does not require support
 923      * for double precision arithmetic.
 924      */
 925     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 926         return false;
 927     }
 928
 929     /* UNDEF accesses to D16-D31 if they don't exist */
 930     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 931         return false;
 932     }
 933
 934     if (!vfp_access_check(s)) {
 935         return true;
 936     }
 937
 938     if (a->op) {
 939         /* fpreg to gpreg */
 940         tmp = tcg_temp_new_i32();
 941         neon_load_reg32(tmp, a->vm * 2);
 942         store_reg(s, a->rt, tmp);
 943         tmp = tcg_temp_new_i32();
 944         neon_load_reg32(tmp, a->vm * 2 + 1);
 945         store_reg(s, a->rt2, tmp);
 946     } else {
 947         /* gpreg to fpreg */
 948         tmp = load_reg(s, a->rt);
 949         neon_store_reg32(tmp, a->vm * 2);
 950         tcg_temp_free_i32(tmp);
 951         tmp = load_reg(s, a->rt2);
 952         neon_store_reg32(tmp, a->vm * 2 + 1);
 953         tcg_temp_free_i32(tmp);
 954     }
 955
 956     return true;
 957 }
 958
 959 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
 960 {
 961     uint32_t offset;
 962     TCGv_i32 addr, tmp;
 963
 964     if (!dc_isar_feature(aa32_fp16_arith, s)) {
 965         return false;
 966     }
 967
 968     if (!vfp_access_check(s)) {
 969         return true;
 970     }
 971
 972     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
 973     offset = a->imm << 1;
 974     if (!a->u) {
 975         offset = -offset;
 976     }
 977
 978     /* For thumb, use of PC is UNPREDICTABLE.  */
 979     addr = add_reg_for_lit(s, a->rn, offset);
 980     tmp = tcg_temp_new_i32();
 981     if (a->l) {
 982         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
 983         neon_store_reg32(tmp, a->vd);
 984     } else {
 985         neon_load_reg32(tmp, a->vd);
 986         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
 987     }
 988     tcg_temp_free_i32(tmp);
 989     tcg_temp_free_i32(addr);
 990
 991     return true;
 992 }
 993
 994 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
 995 {
 996     uint32_t offset;
 997     TCGv_i32 addr, tmp;
 998
 999     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1000         return false;
1001     }
1002
1003     if (!vfp_access_check(s)) {
1004         return true;
1005     }
1006
1007     offset = a->imm << 2;
1008     if (!a->u) {
1009         offset = -offset;
1010     }
1011
1012     /* For thumb, use of PC is UNPREDICTABLE.  */
1013     addr = add_reg_for_lit(s, a->rn, offset);
1014     tmp = tcg_temp_new_i32();
1015     if (a->l) {
1016         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1017         neon_store_reg32(tmp, a->vd);
1018     } else {
1019         neon_load_reg32(tmp, a->vd);
1020         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1021     }
1022     tcg_temp_free_i32(tmp);
1023     tcg_temp_free_i32(addr);
1024
1025     return true;
1026 }
1027
1028 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1029 {
1030     uint32_t offset;
1031     TCGv_i32 addr;
1032     TCGv_i64 tmp;
1033
1034     /* Note that this does not require support for double arithmetic.  */
1035     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1036         return false;
1037     }
1038
1039     /* UNDEF accesses to D16-D31 if they don't exist */
1040     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1041         return false;
1042     }
1043
1044     if (!vfp_access_check(s)) {
1045         return true;
1046     }
1047
1048     offset = a->imm << 2;
1049     if (!a->u) {
1050         offset = -offset;
1051     }
1052
1053     /* For thumb, use of PC is UNPREDICTABLE.  */
1054     addr = add_reg_for_lit(s, a->rn, offset);
1055     tmp = tcg_temp_new_i64();
1056     if (a->l) {
1057         gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1058         neon_store_reg64(tmp, a->vd);
1059     } else {
1060         neon_load_reg64(tmp, a->vd);
1061         gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1062     }
1063     tcg_temp_free_i64(tmp);
1064     tcg_temp_free_i32(addr);
1065
1066     return true;
1067 }
1068
1069 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1070 {
1071     uint32_t offset;
1072     TCGv_i32 addr, tmp;
1073     int i, n;
1074
1075     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1076         return false;
1077     }
1078
1079     n = a->imm;
1080
1081     if (n == 0 || (a->vd + n) > 32) {
1082         /*
1083          * UNPREDICTABLE cases for bad immediates: we choose to
1084          * UNDEF to avoid generating huge numbers of TCG ops
1085          */
1086         return false;
1087     }
1088     if (a->rn == 15 && a->w) {
1089         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1090         return false;
1091     }
1092
1093     if (!vfp_access_check(s)) {
1094         return true;
1095     }
1096
1097     /* For thumb, use of PC is UNPREDICTABLE.  */
1098     addr = add_reg_for_lit(s, a->rn, 0);
1099     if (a->p) {
1100         /* pre-decrement */
1101         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1102     }
1103
1104     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1105         /*
1106          * Here 'addr' is the lowest address we will store to,
1107          * and is either the old SP (if post-increment) or
1108          * the new SP (if pre-decrement). For post-increment
1109          * where the old value is below the limit and the new
1110          * value is above, it is UNKNOWN whether the limit check
1111          * triggers; we choose to trigger.
1112          */
1113         gen_helper_v8m_stackcheck(cpu_env, addr);
1114     }
1115
1116     offset = 4;
1117     tmp = tcg_temp_new_i32();
1118     for (i = 0; i < n; i++) {
1119         if (a->l) {
1120             /* load */
1121             gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1122             neon_store_reg32(tmp, a->vd + i);
1123         } else {
1124             /* store */
1125             neon_load_reg32(tmp, a->vd + i);
1126             gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1127         }
1128         tcg_gen_addi_i32(addr, addr, offset);
1129     }
1130     tcg_temp_free_i32(tmp);
1131     if (a->w) {
1132         /* writeback */
1133         if (a->p) {
1134             offset = -offset * n;
1135             tcg_gen_addi_i32(addr, addr, offset);
1136         }
1137         store_reg(s, a->rn, addr);
1138     } else {
1139         tcg_temp_free_i32(addr);
1140     }
1141
1142     return true;
1143 }
1144
1145 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1146 {
1147     uint32_t offset;
1148     TCGv_i32 addr;
1149     TCGv_i64 tmp;
1150     int i, n;
1151
1152     /* Note that this does not require support for double arithmetic.  */
1153     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1154         return false;
1155     }
1156
1157     n = a->imm >> 1;
1158
1159     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1160         /*
1161          * UNPREDICTABLE cases for bad immediates: we choose to
1162          * UNDEF to avoid generating huge numbers of TCG ops
1163          */
1164         return false;
1165     }
1166     if (a->rn == 15 && a->w) {
1167         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1168         return false;
1169     }
1170
1171     /* UNDEF accesses to D16-D31 if they don't exist */
1172     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1173         return false;
1174     }
1175
1176     if (!vfp_access_check(s)) {
1177         return true;
1178     }
1179
1180     /* For thumb, use of PC is UNPREDICTABLE.  */
1181     addr = add_reg_for_lit(s, a->rn, 0);
1182     if (a->p) {
1183         /* pre-decrement */
1184         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1185     }
1186
1187     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1188         /*
1189          * Here 'addr' is the lowest address we will store to,
1190          * and is either the old SP (if post-increment) or
1191          * the new SP (if pre-decrement). For post-increment
1192          * where the old value is below the limit and the new
1193          * value is above, it is UNKNOWN whether the limit check
1194          * triggers; we choose to trigger.
1195          */
1196         gen_helper_v8m_stackcheck(cpu_env, addr);
1197     }
1198
1199     offset = 8;
1200     tmp = tcg_temp_new_i64();
1201     for (i = 0; i < n; i++) {
1202         if (a->l) {
1203             /* load */
1204             gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1205             neon_store_reg64(tmp, a->vd + i);
1206         } else {
1207             /* store */
1208             neon_load_reg64(tmp, a->vd + i);
1209             gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1210         }
1211         tcg_gen_addi_i32(addr, addr, offset);
1212     }
1213     tcg_temp_free_i64(tmp);
1214     if (a->w) {
1215         /* writeback */
1216         if (a->p) {
1217             offset = -offset * n;
1218         } else if (a->imm & 1) {
1219             offset = 4;
1220         } else {
1221             offset = 0;
1222         }
1223
1224         if (offset != 0) {
1225             tcg_gen_addi_i32(addr, addr, offset);
1226         }
1227         store_reg(s, a->rn, addr);
1228     } else {
1229         tcg_temp_free_i32(addr);
1230     }
1231
1232     return true;
1233 }
1234
1235 /*
1236  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1237  * The callback should emit code to write a value to vd. If
1238  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1239  * will contain the old value of the relevant VFP register;
1240  * otherwise it must be written to only.
1241  */
1242 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1243                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1244 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1245                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1246
1247 /*
1248  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1249  * The callback should emit code to write a value to vd (which
1250  * should be written to only).
1251  */
1252 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1253 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1254
1255 /*
1256  * Return true if the specified S reg is in a scalar bank
1257  * (ie if it is s0..s7)
1258  */
1259 static inline bool vfp_sreg_is_scalar(int reg)
1260 {
1261     return (reg & 0x18) == 0;
1262 }
1263
1264 /*
1265  * Return true if the specified D reg is in a scalar bank
1266  * (ie if it is d0..d3 or d16..d19)
1267  */
1268 static inline bool vfp_dreg_is_scalar(int reg)
1269 {
1270     return (reg & 0xc) == 0;
1271 }
1272
1273 /*
1274  * Advance the S reg number forwards by delta within its bank
1275  * (ie increment the low 3 bits but leave the rest the same)
1276  */
1277 static inline int vfp_advance_sreg(int reg, int delta)
1278 {
1279     return ((reg + delta) & 0x7) | (reg & ~0x7);
1280 }
1281
1282 /*
1283  * Advance the D reg number forwards by delta within its bank
1284  * (ie increment the low 2 bits but leave the rest the same)
1285  */
1286 static inline int vfp_advance_dreg(int reg, int delta)
1287 {
1288     return ((reg + delta) & 0x3) | (reg & ~0x3);
1289 }
1290
1291 /*
1292  * Perform a 3-operand VFP data processing instruction. fn is the
1293  * callback to do the actual operation; this function deals with the
1294  * code to handle looping around for VFP vector processing.
1295  */
1296 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1297                           int vd, int vn, int vm, bool reads_vd)
1298 {
1299     uint32_t delta_m = 0;
1300     uint32_t delta_d = 0;
1301     int veclen = s->vec_len;
1302     TCGv_i32 f0, f1, fd;
1303     TCGv_ptr fpst;
1304
1305     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1306         return false;
1307     }
1308
1309     if (!dc_isar_feature(aa32_fpshvec, s) &&
1310         (veclen != 0 || s->vec_stride != 0)) {
1311         return false;
1312     }
1313
1314     if (!vfp_access_check(s)) {
1315         return true;
1316     }
1317
1318     if (veclen > 0) {
1319         /* Figure out what type of vector operation this is.  */
1320         if (vfp_sreg_is_scalar(vd)) {
1321             /* scalar */
1322             veclen = 0;
1323         } else {
1324             delta_d = s->vec_stride + 1;
1325
1326             if (vfp_sreg_is_scalar(vm)) {
1327                 /* mixed scalar/vector */
1328                 delta_m = 0;
1329             } else {
1330                 /* vector */
1331                 delta_m = delta_d;
1332             }
1333         }
1334     }
1335
1336     f0 = tcg_temp_new_i32();
1337     f1 = tcg_temp_new_i32();
1338     fd = tcg_temp_new_i32();
1339     fpst = fpstatus_ptr(FPST_FPCR);
1340
1341     neon_load_reg32(f0, vn);
1342     neon_load_reg32(f1, vm);
1343
1344     for (;;) {
1345         if (reads_vd) {
1346             neon_load_reg32(fd, vd);
1347         }
1348         fn(fd, f0, f1, fpst);
1349         neon_store_reg32(fd, vd);
1350
1351         if (veclen == 0) {
1352             break;
1353         }
1354
1355         /* Set up the operands for the next iteration */
1356         veclen--;
1357         vd = vfp_advance_sreg(vd, delta_d);
1358         vn = vfp_advance_sreg(vn, delta_d);
1359         neon_load_reg32(f0, vn);
1360         if (delta_m) {
1361             vm = vfp_advance_sreg(vm, delta_m);
1362             neon_load_reg32(f1, vm);
1363         }
1364     }
1365
1366     tcg_temp_free_i32(f0);
1367     tcg_temp_free_i32(f1);
1368     tcg_temp_free_i32(fd);
1369     tcg_temp_free_ptr(fpst);
1370
1371     return true;
1372 }
1373
1374 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1375                           int vd, int vn, int vm, bool reads_vd)
1376 {
1377     /*
1378      * Do a half-precision operation. Functionally this is
1379      * the same as do_vfp_3op_sp(), except:
1380      *  - it uses the FPST_FPCR_F16
1381      *  - it doesn't need the VFP vector handling (fp16 is a
1382      *    v8 feature, and in v8 VFP vectors don't exist)
1383      *  - it does the aa32_fp16_arith feature test
1384      */
1385     TCGv_i32 f0, f1, fd;
1386     TCGv_ptr fpst;
1387
1388     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1389         return false;
1390     }
1391
1392     if (s->vec_len != 0 || s->vec_stride != 0) {
1393         return false;
1394     }
1395
1396     if (!vfp_access_check(s)) {
1397         return true;
1398     }
1399
1400     f0 = tcg_temp_new_i32();
1401     f1 = tcg_temp_new_i32();
1402     fd = tcg_temp_new_i32();
1403     fpst = fpstatus_ptr(FPST_FPCR_F16);
1404
1405     neon_load_reg32(f0, vn);
1406     neon_load_reg32(f1, vm);
1407
1408     if (reads_vd) {
1409         neon_load_reg32(fd, vd);
1410     }
1411     fn(fd, f0, f1, fpst);
1412     neon_store_reg32(fd, vd);
1413
1414     tcg_temp_free_i32(f0);
1415     tcg_temp_free_i32(f1);
1416     tcg_temp_free_i32(fd);
1417     tcg_temp_free_ptr(fpst);
1418
1419     return true;
1420 }
1421
1422 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1423                           int vd, int vn, int vm, bool reads_vd)
1424 {
1425     uint32_t delta_m = 0;
1426     uint32_t delta_d = 0;
1427     int veclen = s->vec_len;
1428     TCGv_i64 f0, f1, fd;
1429     TCGv_ptr fpst;
1430
1431     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1432         return false;
1433     }
1434
1435     /* UNDEF accesses to D16-D31 if they don't exist */
1436     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1437         return false;
1438     }
1439
1440     if (!dc_isar_feature(aa32_fpshvec, s) &&
1441         (veclen != 0 || s->vec_stride != 0)) {
1442         return false;
1443     }
1444
1445     if (!vfp_access_check(s)) {
1446         return true;
1447     }
1448
1449     if (veclen > 0) {
1450         /* Figure out what type of vector operation this is.  */
1451         if (vfp_dreg_is_scalar(vd)) {
1452             /* scalar */
1453             veclen = 0;
1454         } else {
1455             delta_d = (s->vec_stride >> 1) + 1;
1456
1457             if (vfp_dreg_is_scalar(vm)) {
1458                 /* mixed scalar/vector */
1459                 delta_m = 0;
1460             } else {
1461                 /* vector */
1462                 delta_m = delta_d;
1463             }
1464         }
1465     }
1466
1467     f0 = tcg_temp_new_i64();
1468     f1 = tcg_temp_new_i64();
1469     fd = tcg_temp_new_i64();
1470     fpst = fpstatus_ptr(FPST_FPCR);
1471
1472     neon_load_reg64(f0, vn);
1473     neon_load_reg64(f1, vm);
1474
1475     for (;;) {
1476         if (reads_vd) {
1477             neon_load_reg64(fd, vd);
1478         }
1479         fn(fd, f0, f1, fpst);
1480         neon_store_reg64(fd, vd);
1481
1482         if (veclen == 0) {
1483             break;
1484         }
1485         /* Set up the operands for the next iteration */
1486         veclen--;
1487         vd = vfp_advance_dreg(vd, delta_d);
1488         vn = vfp_advance_dreg(vn, delta_d);
1489         neon_load_reg64(f0, vn);
1490         if (delta_m) {
1491             vm = vfp_advance_dreg(vm, delta_m);
1492             neon_load_reg64(f1, vm);
1493         }
1494     }
1495
1496     tcg_temp_free_i64(f0);
1497     tcg_temp_free_i64(f1);
1498     tcg_temp_free_i64(fd);
1499     tcg_temp_free_ptr(fpst);
1500
1501     return true;
1502 }
1503
1504 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1505 {
1506     uint32_t delta_m = 0;
1507     uint32_t delta_d = 0;
1508     int veclen = s->vec_len;
1509     TCGv_i32 f0, fd;
1510
1511     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1512         return false;
1513     }
1514
1515     if (!dc_isar_feature(aa32_fpshvec, s) &&
1516         (veclen != 0 || s->vec_stride != 0)) {
1517         return false;
1518     }
1519
1520     if (!vfp_access_check(s)) {
1521         return true;
1522     }
1523
1524     if (veclen > 0) {
1525         /* Figure out what type of vector operation this is.  */
1526         if (vfp_sreg_is_scalar(vd)) {
1527             /* scalar */
1528             veclen = 0;
1529         } else {
1530             delta_d = s->vec_stride + 1;
1531
1532             if (vfp_sreg_is_scalar(vm)) {
1533                 /* mixed scalar/vector */
1534                 delta_m = 0;
1535             } else {
1536                 /* vector */
1537                 delta_m = delta_d;
1538             }
1539         }
1540     }
1541
1542     f0 = tcg_temp_new_i32();
1543     fd = tcg_temp_new_i32();
1544
1545     neon_load_reg32(f0, vm);
1546
1547     for (;;) {
1548         fn(fd, f0);
1549         neon_store_reg32(fd, vd);
1550
1551         if (veclen == 0) {
1552             break;
1553         }
1554
1555         if (delta_m == 0) {
1556             /* single source one-many */
1557             while (veclen--) {
1558                 vd = vfp_advance_sreg(vd, delta_d);
1559                 neon_store_reg32(fd, vd);
1560             }
1561             break;
1562         }
1563
1564         /* Set up the operands for the next iteration */
1565         veclen--;
1566         vd = vfp_advance_sreg(vd, delta_d);
1567         vm = vfp_advance_sreg(vm, delta_m);
1568         neon_load_reg32(f0, vm);
1569     }
1570
1571     tcg_temp_free_i32(f0);
1572     tcg_temp_free_i32(fd);
1573
1574     return true;
1575 }
1576
1577 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1578 {
1579     /*
1580      * Do a half-precision operation. Functionally this is
1581      * the same as do_vfp_2op_sp(), except:
1582      *  - it doesn't need the VFP vector handling (fp16 is a
1583      *    v8 feature, and in v8 VFP vectors don't exist)
1584      *  - it does the aa32_fp16_arith feature test
1585      */
1586     TCGv_i32 f0;
1587
1588     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1589         return false;
1590     }
1591
1592     if (s->vec_len != 0 || s->vec_stride != 0) {
1593         return false;
1594     }
1595
1596     if (!vfp_access_check(s)) {
1597         return true;
1598     }
1599
1600     f0 = tcg_temp_new_i32();
1601     neon_load_reg32(f0, vm);
1602     fn(f0, f0);
1603     neon_store_reg32(f0, vd);
1604     tcg_temp_free_i32(f0);
1605
1606     return true;
1607 }
1608
1609 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1610 {
1611     uint32_t delta_m = 0;
1612     uint32_t delta_d = 0;
1613     int veclen = s->vec_len;
1614     TCGv_i64 f0, fd;
1615
1616     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1617         return false;
1618     }
1619
1620     /* UNDEF accesses to D16-D31 if they don't exist */
1621     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1622         return false;
1623     }
1624
1625     if (!dc_isar_feature(aa32_fpshvec, s) &&
1626         (veclen != 0 || s->vec_stride != 0)) {
1627         return false;
1628     }
1629
1630     if (!vfp_access_check(s)) {
1631         return true;
1632     }
1633
1634     if (veclen > 0) {
1635         /* Figure out what type of vector operation this is.  */
1636         if (vfp_dreg_is_scalar(vd)) {
1637             /* scalar */
1638             veclen = 0;
1639         } else {
1640             delta_d = (s->vec_stride >> 1) + 1;
1641
1642             if (vfp_dreg_is_scalar(vm)) {
1643                 /* mixed scalar/vector */
1644                 delta_m = 0;
1645             } else {
1646                 /* vector */
1647                 delta_m = delta_d;
1648             }
1649         }
1650     }
1651
1652     f0 = tcg_temp_new_i64();
1653     fd = tcg_temp_new_i64();
1654
1655     neon_load_reg64(f0, vm);
1656
1657     for (;;) {
1658         fn(fd, f0);
1659         neon_store_reg64(fd, vd);
1660
1661         if (veclen == 0) {
1662             break;
1663         }
1664
1665         if (delta_m == 0) {
1666             /* single source one-many */
1667             while (veclen--) {
1668                 vd = vfp_advance_dreg(vd, delta_d);
1669                 neon_store_reg64(fd, vd);
1670             }
1671             break;
1672         }
1673
1674         /* Set up the operands for the next iteration */
1675         veclen--;
1676         vd = vfp_advance_dreg(vd, delta_d);
1677         vd = vfp_advance_dreg(vm, delta_m);
1678         neon_load_reg64(f0, vm);
1679     }
1680
1681     tcg_temp_free_i64(f0);
1682     tcg_temp_free_i64(fd);
1683
1684     return true;
1685 }
1686
1687 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1688 {
1689     /* Note that order of inputs to the add matters for NaNs */
1690     TCGv_i32 tmp = tcg_temp_new_i32();
1691
1692     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1693     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1694     tcg_temp_free_i32(tmp);
1695 }
1696
1697 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1698 {
1699     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1700 }
1701
1702 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1703 {
1704     /* Note that order of inputs to the add matters for NaNs */
1705     TCGv_i32 tmp = tcg_temp_new_i32();
1706
1707     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1708     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1709     tcg_temp_free_i32(tmp);
1710 }
1711
1712 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1713 {
1714     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1715 }
1716
1717 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1718 {
1719     /* Note that order of inputs to the add matters for NaNs */
1720     TCGv_i64 tmp = tcg_temp_new_i64();
1721
1722     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1723     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1724     tcg_temp_free_i64(tmp);
1725 }
1726
1727 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
1728 {
1729     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1730 }
1731
1732 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1733 {
1734     /*
1735      * VMLS: vd = vd + -(vn * vm)
1736      * Note that order of inputs to the add matters for NaNs.
1737      */
1738     TCGv_i32 tmp = tcg_temp_new_i32();
1739
1740     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1741     gen_helper_vfp_negh(tmp, tmp);
1742     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1743     tcg_temp_free_i32(tmp);
1744 }
1745
1746 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
1747 {
1748     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
1749 }
1750
1751 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1752 {
1753     /*
1754      * VMLS: vd = vd + -(vn * vm)
1755      * Note that order of inputs to the add matters for NaNs.
1756      */
1757     TCGv_i32 tmp = tcg_temp_new_i32();
1758
1759     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1760     gen_helper_vfp_negs(tmp, tmp);
1761     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1762     tcg_temp_free_i32(tmp);
1763 }
1764
1765 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1766 {
1767     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1768 }
1769
1770 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1771 {
1772     /*
1773      * VMLS: vd = vd + -(vn * vm)
1774      * Note that order of inputs to the add matters for NaNs.
1775      */
1776     TCGv_i64 tmp = tcg_temp_new_i64();
1777
1778     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1779     gen_helper_vfp_negd(tmp, tmp);
1780     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1781     tcg_temp_free_i64(tmp);
1782 }
1783
1784 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
1785 {
1786     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1787 }
1788
1789 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1790 {
1791     /*
1792      * VNMLS: -fd + (fn * fm)
1793      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1794      * plausible looking simplifications because this will give wrong results
1795      * for NaNs.
1796      */
1797     TCGv_i32 tmp = tcg_temp_new_i32();
1798
1799     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1800     gen_helper_vfp_negh(vd, vd);
1801     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1802     tcg_temp_free_i32(tmp);
1803 }
1804
1805 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
1806 {
1807     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
1808 }
1809
1810 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1811 {
1812     /*
1813      * VNMLS: -fd + (fn * fm)
1814      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1815      * plausible looking simplifications because this will give wrong results
1816      * for NaNs.
1817      */
1818     TCGv_i32 tmp = tcg_temp_new_i32();
1819
1820     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1821     gen_helper_vfp_negs(vd, vd);
1822     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1823     tcg_temp_free_i32(tmp);
1824 }
1825
1826 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1827 {
1828     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1829 }
1830
1831 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1832 {
1833     /*
1834      * VNMLS: -fd + (fn * fm)
1835      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1836      * plausible looking simplifications because this will give wrong results
1837      * for NaNs.
1838      */
1839     TCGv_i64 tmp = tcg_temp_new_i64();
1840
1841     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1842     gen_helper_vfp_negd(vd, vd);
1843     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1844     tcg_temp_free_i64(tmp);
1845 }
1846
1847 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
1848 {
1849     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1850 }
1851
1852 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1853 {
1854     /* VNMLA: -fd + -(fn * fm) */
1855     TCGv_i32 tmp = tcg_temp_new_i32();
1856
1857     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1858     gen_helper_vfp_negh(tmp, tmp);
1859     gen_helper_vfp_negh(vd, vd);
1860     gen_helper_vfp_addh(vd, vd, tmp, fpst);
1861     tcg_temp_free_i32(tmp);
1862 }
1863
1864 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
1865 {
1866     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
1867 }
1868
1869 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1870 {
1871     /* VNMLA: -fd + -(fn * fm) */
1872     TCGv_i32 tmp = tcg_temp_new_i32();
1873
1874     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1875     gen_helper_vfp_negs(tmp, tmp);
1876     gen_helper_vfp_negs(vd, vd);
1877     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1878     tcg_temp_free_i32(tmp);
1879 }
1880
1881 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1882 {
1883     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1884 }
1885
1886 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1887 {
1888     /* VNMLA: -fd + (fn * fm) */
1889     TCGv_i64 tmp = tcg_temp_new_i64();
1890
1891     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1892     gen_helper_vfp_negd(tmp, tmp);
1893     gen_helper_vfp_negd(vd, vd);
1894     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1895     tcg_temp_free_i64(tmp);
1896 }
1897
1898 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
1899 {
1900     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1901 }
1902
1903 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
1904 {
1905     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
1906 }
1907
1908 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1909 {
1910     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1911 }
1912
1913 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
1914 {
1915     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1916 }
1917
1918 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1919 {
1920     /* VNMUL: -(fn * fm) */
1921     gen_helper_vfp_mulh(vd, vn, vm, fpst);
1922     gen_helper_vfp_negh(vd, vd);
1923 }
1924
1925 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
1926 {
1927     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
1928 }
1929
1930 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1931 {
1932     /* VNMUL: -(fn * fm) */
1933     gen_helper_vfp_muls(vd, vn, vm, fpst);
1934     gen_helper_vfp_negs(vd, vd);
1935 }
1936
1937 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1938 {
1939     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1940 }
1941
1942 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1943 {
1944     /* VNMUL: -(fn * fm) */
1945     gen_helper_vfp_muld(vd, vn, vm, fpst);
1946     gen_helper_vfp_negd(vd, vd);
1947 }
1948
1949 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
1950 {
1951     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1952 }
1953
1954 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
1955 {
1956     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
1957 }
1958
1959 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1960 {
1961     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1962 }
1963
1964 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
1965 {
1966     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1967 }
1968
1969 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
1970 {
1971     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
1972 }
1973
1974 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1975 {
1976     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1977 }
1978
1979 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
1980 {
1981     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
1982 }
1983
1984 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
1985 {
1986     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
1987 }
1988
1989 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
1990 {
1991     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
1992 }
1993
1994 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
1995 {
1996     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
1997 }
1998
1999 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2000 {
2001     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2002         return false;
2003     }
2004     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2005                          a->vd, a->vn, a->vm, false);
2006 }
2007
2008 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2009 {
2010     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2011         return false;
2012     }
2013     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2014                          a->vd, a->vn, a->vm, false);
2015 }
2016
2017 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2018 {
2019     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2020         return false;
2021     }
2022     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2023                          a->vd, a->vn, a->vm, false);
2024 }
2025
2026 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2027 {
2028     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2029         return false;
2030     }
2031     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2032                          a->vd, a->vn, a->vm, false);
2033 }
2034
2035 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2036 {
2037     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2038         return false;
2039     }
2040     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2041                          a->vd, a->vn, a->vm, false);
2042 }
2043
2044 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2045 {
2046     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2047         return false;
2048     }
2049     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2050                          a->vd, a->vn, a->vm, false);
2051 }
2052
2053 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2054 {
2055     /*
2056      * VFNMA : fd = muladd(-fd,  fn, fm)
2057      * VFNMS : fd = muladd(-fd, -fn, fm)
2058      * VFMA  : fd = muladd( fd,  fn, fm)
2059      * VFMS  : fd = muladd( fd, -fn, fm)
2060      *
2061      * These are fused multiply-add, and must be done as one floating
2062      * point operation with no rounding between the multiplication and
2063      * addition steps.  NB that doing the negations here as separate
2064      * steps is correct : an input NaN should come out with its sign
2065      * bit flipped if it is a negated-input.
2066      */
2067     TCGv_ptr fpst;
2068     TCGv_i32 vn, vm, vd;
2069
2070     /*
2071      * Present in VFPv4 only, and only with the FP16 extension.
2072      * Note that we can't rely on the SIMDFMAC check alone, because
2073      * in a Neon-no-VFP core that ID register field will be non-zero.
2074      */
2075     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2076         !dc_isar_feature(aa32_simdfmac, s) ||
2077         !dc_isar_feature(aa32_fpsp_v2, s)) {
2078         return false;
2079     }
2080
2081     if (s->vec_len != 0 || s->vec_stride != 0) {
2082         return false;
2083     }
2084
2085     if (!vfp_access_check(s)) {
2086         return true;
2087     }
2088
2089     vn = tcg_temp_new_i32();
2090     vm = tcg_temp_new_i32();
2091     vd = tcg_temp_new_i32();
2092
2093     neon_load_reg32(vn, a->vn);
2094     neon_load_reg32(vm, a->vm);
2095     if (neg_n) {
2096         /* VFNMS, VFMS */
2097         gen_helper_vfp_negh(vn, vn);
2098     }
2099     neon_load_reg32(vd, a->vd);
2100     if (neg_d) {
2101         /* VFNMA, VFNMS */
2102         gen_helper_vfp_negh(vd, vd);
2103     }
2104     fpst = fpstatus_ptr(FPST_FPCR_F16);
2105     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2106     neon_store_reg32(vd, a->vd);
2107
2108     tcg_temp_free_ptr(fpst);
2109     tcg_temp_free_i32(vn);
2110     tcg_temp_free_i32(vm);
2111     tcg_temp_free_i32(vd);
2112
2113     return true;
2114 }
2115
2116 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2117 {
2118     /*
2119      * VFNMA : fd = muladd(-fd,  fn, fm)
2120      * VFNMS : fd = muladd(-fd, -fn, fm)
2121      * VFMA  : fd = muladd( fd,  fn, fm)
2122      * VFMS  : fd = muladd( fd, -fn, fm)
2123      *
2124      * These are fused multiply-add, and must be done as one floating
2125      * point operation with no rounding between the multiplication and
2126      * addition steps.  NB that doing the negations here as separate
2127      * steps is correct : an input NaN should come out with its sign
2128      * bit flipped if it is a negated-input.
2129      */
2130     TCGv_ptr fpst;
2131     TCGv_i32 vn, vm, vd;
2132
2133     /*
2134      * Present in VFPv4 only.
2135      * Note that we can't rely on the SIMDFMAC check alone, because
2136      * in a Neon-no-VFP core that ID register field will be non-zero.
2137      */
2138     if (!dc_isar_feature(aa32_simdfmac, s) ||
2139         !dc_isar_feature(aa32_fpsp_v2, s)) {
2140         return false;
2141     }
2142     /*
2143      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2144      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2145      */
2146     if (s->vec_len != 0 || s->vec_stride != 0) {
2147         return false;
2148     }
2149
2150     if (!vfp_access_check(s)) {
2151         return true;
2152     }
2153
2154     vn = tcg_temp_new_i32();
2155     vm = tcg_temp_new_i32();
2156     vd = tcg_temp_new_i32();
2157
2158     neon_load_reg32(vn, a->vn);
2159     neon_load_reg32(vm, a->vm);
2160     if (neg_n) {
2161         /* VFNMS, VFMS */
2162         gen_helper_vfp_negs(vn, vn);
2163     }
2164     neon_load_reg32(vd, a->vd);
2165     if (neg_d) {
2166         /* VFNMA, VFNMS */
2167         gen_helper_vfp_negs(vd, vd);
2168     }
2169     fpst = fpstatus_ptr(FPST_FPCR);
2170     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2171     neon_store_reg32(vd, a->vd);
2172
2173     tcg_temp_free_ptr(fpst);
2174     tcg_temp_free_i32(vn);
2175     tcg_temp_free_i32(vm);
2176     tcg_temp_free_i32(vd);
2177
2178     return true;
2179 }
2180
2181 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2182 {
2183     /*
2184      * VFNMA : fd = muladd(-fd,  fn, fm)
2185      * VFNMS : fd = muladd(-fd, -fn, fm)
2186      * VFMA  : fd = muladd( fd,  fn, fm)
2187      * VFMS  : fd = muladd( fd, -fn, fm)
2188      *
2189      * These are fused multiply-add, and must be done as one floating
2190      * point operation with no rounding between the multiplication and
2191      * addition steps.  NB that doing the negations here as separate
2192      * steps is correct : an input NaN should come out with its sign
2193      * bit flipped if it is a negated-input.
2194      */
2195     TCGv_ptr fpst;
2196     TCGv_i64 vn, vm, vd;
2197
2198     /*
2199      * Present in VFPv4 only.
2200      * Note that we can't rely on the SIMDFMAC check alone, because
2201      * in a Neon-no-VFP core that ID register field will be non-zero.
2202      */
2203     if (!dc_isar_feature(aa32_simdfmac, s) ||
2204         !dc_isar_feature(aa32_fpdp_v2, s)) {
2205         return false;
2206     }
2207     /*
2208      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2209      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2210      */
2211     if (s->vec_len != 0 || s->vec_stride != 0) {
2212         return false;
2213     }
2214
2215     /* UNDEF accesses to D16-D31 if they don't exist. */
2216     if (!dc_isar_feature(aa32_simd_r32, s) &&
2217         ((a->vd | a->vn | a->vm) & 0x10)) {
2218         return false;
2219     }
2220
2221     if (!vfp_access_check(s)) {
2222         return true;
2223     }
2224
2225     vn = tcg_temp_new_i64();
2226     vm = tcg_temp_new_i64();
2227     vd = tcg_temp_new_i64();
2228
2229     neon_load_reg64(vn, a->vn);
2230     neon_load_reg64(vm, a->vm);
2231     if (neg_n) {
2232         /* VFNMS, VFMS */
2233         gen_helper_vfp_negd(vn, vn);
2234     }
2235     neon_load_reg64(vd, a->vd);
2236     if (neg_d) {
2237         /* VFNMA, VFNMS */
2238         gen_helper_vfp_negd(vd, vd);
2239     }
2240     fpst = fpstatus_ptr(FPST_FPCR);
2241     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2242     neon_store_reg64(vd, a->vd);
2243
2244     tcg_temp_free_ptr(fpst);
2245     tcg_temp_free_i64(vn);
2246     tcg_temp_free_i64(vm);
2247     tcg_temp_free_i64(vd);
2248
2249     return true;
2250 }
2251
2252 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2253     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2254                                       arg_##INSN##_##PREC *a)           \
2255     {                                                                   \
2256         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2257     }
2258
2259 #define MAKE_VFM_TRANS_FNS(PREC) \
2260     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2261     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2262     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2263     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2264
2265 MAKE_VFM_TRANS_FNS(hp)
2266 MAKE_VFM_TRANS_FNS(sp)
2267 MAKE_VFM_TRANS_FNS(dp)
2268
2269 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2270 {
2271     TCGv_i32 fd;
2272
2273     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2274         return false;
2275     }
2276
2277     if (s->vec_len != 0 || s->vec_stride != 0) {
2278         return false;
2279     }
2280
2281     if (!vfp_access_check(s)) {
2282         return true;
2283     }
2284
2285     fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2286     neon_store_reg32(fd, a->vd);
2287     tcg_temp_free_i32(fd);
2288     return true;
2289 }
2290
2291 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2292 {
2293     uint32_t delta_d = 0;
2294     int veclen = s->vec_len;
2295     TCGv_i32 fd;
2296     uint32_t vd;
2297
2298     vd = a->vd;
2299
2300     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2301         return false;
2302     }
2303
2304     if (!dc_isar_feature(aa32_fpshvec, s) &&
2305         (veclen != 0 || s->vec_stride != 0)) {
2306         return false;
2307     }
2308
2309     if (!vfp_access_check(s)) {
2310         return true;
2311     }
2312
2313     if (veclen > 0) {
2314         /* Figure out what type of vector operation this is.  */
2315         if (vfp_sreg_is_scalar(vd)) {
2316             /* scalar */
2317             veclen = 0;
2318         } else {
2319             delta_d = s->vec_stride + 1;
2320         }
2321     }
2322
2323     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2324
2325     for (;;) {
2326         neon_store_reg32(fd, vd);
2327
2328         if (veclen == 0) {
2329             break;
2330         }
2331
2332         /* Set up the operands for the next iteration */
2333         veclen--;
2334         vd = vfp_advance_sreg(vd, delta_d);
2335     }
2336
2337     tcg_temp_free_i32(fd);
2338     return true;
2339 }
2340
2341 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2342 {
2343     uint32_t delta_d = 0;
2344     int veclen = s->vec_len;
2345     TCGv_i64 fd;
2346     uint32_t vd;
2347
2348     vd = a->vd;
2349
2350     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2351         return false;
2352     }
2353
2354     /* UNDEF accesses to D16-D31 if they don't exist. */
2355     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2356         return false;
2357     }
2358
2359     if (!dc_isar_feature(aa32_fpshvec, s) &&
2360         (veclen != 0 || s->vec_stride != 0)) {
2361         return false;
2362     }
2363
2364     if (!vfp_access_check(s)) {
2365         return true;
2366     }
2367
2368     if (veclen > 0) {
2369         /* Figure out what type of vector operation this is.  */
2370         if (vfp_dreg_is_scalar(vd)) {
2371             /* scalar */
2372             veclen = 0;
2373         } else {
2374             delta_d = (s->vec_stride >> 1) + 1;
2375         }
2376     }
2377
2378     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2379
2380     for (;;) {
2381         neon_store_reg64(fd, vd);
2382
2383         if (veclen == 0) {
2384             break;
2385         }
2386
2387         /* Set up the operands for the next iteration */
2388         veclen--;
2389         vd = vfp_advance_dreg(vd, delta_d);
2390     }
2391
2392     tcg_temp_free_i64(fd);
2393     return true;
2394 }
2395
2396 #define DO_VFP_2OP(INSN, PREC, FN)                              \
2397     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2398                                       arg_##INSN##_##PREC *a)   \
2399     {                                                           \
2400         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2401     }
2402
2403 DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
2404 DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
2405
2406 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
2407 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
2408 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
2409
2410 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
2411 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
2412 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
2413
2414 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2415 {
2416     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2417 }
2418
2419 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2420 {
2421     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2422 }
2423
2424 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2425 {
2426     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2427 }
2428
2429 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
2430 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
2431 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
2432
2433 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2434 {
2435     TCGv_i32 vd, vm;
2436
2437     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2438         return false;
2439     }
2440
2441     /* Vm/M bits must be zero for the Z variant */
2442     if (a->z && a->vm != 0) {
2443         return false;
2444     }
2445
2446     if (!vfp_access_check(s)) {
2447         return true;
2448     }
2449
2450     vd = tcg_temp_new_i32();
2451     vm = tcg_temp_new_i32();
2452
2453     neon_load_reg32(vd, a->vd);
2454     if (a->z) {
2455         tcg_gen_movi_i32(vm, 0);
2456     } else {
2457         neon_load_reg32(vm, a->vm);
2458     }
2459
2460     if (a->e) {
2461         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2462     } else {
2463         gen_helper_vfp_cmph(vd, vm, cpu_env);
2464     }
2465
2466     tcg_temp_free_i32(vd);
2467     tcg_temp_free_i32(vm);
2468
2469     return true;
2470 }
2471
2472 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2473 {
2474     TCGv_i32 vd, vm;
2475
2476     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2477         return false;
2478     }
2479
2480     /* Vm/M bits must be zero for the Z variant */
2481     if (a->z && a->vm != 0) {
2482         return false;
2483     }
2484
2485     if (!vfp_access_check(s)) {
2486         return true;
2487     }
2488
2489     vd = tcg_temp_new_i32();
2490     vm = tcg_temp_new_i32();
2491
2492     neon_load_reg32(vd, a->vd);
2493     if (a->z) {
2494         tcg_gen_movi_i32(vm, 0);
2495     } else {
2496         neon_load_reg32(vm, a->vm);
2497     }
2498
2499     if (a->e) {
2500         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2501     } else {
2502         gen_helper_vfp_cmps(vd, vm, cpu_env);
2503     }
2504
2505     tcg_temp_free_i32(vd);
2506     tcg_temp_free_i32(vm);
2507
2508     return true;
2509 }
2510
2511 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2512 {
2513     TCGv_i64 vd, vm;
2514
2515     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2516         return false;
2517     }
2518
2519     /* Vm/M bits must be zero for the Z variant */
2520     if (a->z && a->vm != 0) {
2521         return false;
2522     }
2523
2524     /* UNDEF accesses to D16-D31 if they don't exist. */
2525     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2526         return false;
2527     }
2528
2529     if (!vfp_access_check(s)) {
2530         return true;
2531     }
2532
2533     vd = tcg_temp_new_i64();
2534     vm = tcg_temp_new_i64();
2535
2536     neon_load_reg64(vd, a->vd);
2537     if (a->z) {
2538         tcg_gen_movi_i64(vm, 0);
2539     } else {
2540         neon_load_reg64(vm, a->vm);
2541     }
2542
2543     if (a->e) {
2544         gen_helper_vfp_cmped(vd, vm, cpu_env);
2545     } else {
2546         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2547     }
2548
2549     tcg_temp_free_i64(vd);
2550     tcg_temp_free_i64(vm);
2551
2552     return true;
2553 }
2554
2555 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2556 {
2557     TCGv_ptr fpst;
2558     TCGv_i32 ahp_mode;
2559     TCGv_i32 tmp;
2560
2561     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2562         return false;
2563     }
2564
2565     if (!vfp_access_check(s)) {
2566         return true;
2567     }
2568
2569     fpst = fpstatus_ptr(FPST_FPCR);
2570     ahp_mode = get_ahp_flag();
2571     tmp = tcg_temp_new_i32();
2572     /* The T bit tells us if we want the low or high 16 bits of Vm */
2573     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2574     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2575     neon_store_reg32(tmp, a->vd);
2576     tcg_temp_free_i32(ahp_mode);
2577     tcg_temp_free_ptr(fpst);
2578     tcg_temp_free_i32(tmp);
2579     return true;
2580 }
2581
2582 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2583 {
2584     TCGv_ptr fpst;
2585     TCGv_i32 ahp_mode;
2586     TCGv_i32 tmp;
2587     TCGv_i64 vd;
2588
2589     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2590         return false;
2591     }
2592
2593     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2594         return false;
2595     }
2596
2597     /* UNDEF accesses to D16-D31 if they don't exist. */
2598     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
2599         return false;
2600     }
2601
2602     if (!vfp_access_check(s)) {
2603         return true;
2604     }
2605
2606     fpst = fpstatus_ptr(FPST_FPCR);
2607     ahp_mode = get_ahp_flag();
2608     tmp = tcg_temp_new_i32();
2609     /* The T bit tells us if we want the low or high 16 bits of Vm */
2610     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2611     vd = tcg_temp_new_i64();
2612     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2613     neon_store_reg64(vd, a->vd);
2614     tcg_temp_free_i32(ahp_mode);
2615     tcg_temp_free_ptr(fpst);
2616     tcg_temp_free_i32(tmp);
2617     tcg_temp_free_i64(vd);
2618     return true;
2619 }
2620
2621 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2622 {
2623     TCGv_ptr fpst;
2624     TCGv_i32 ahp_mode;
2625     TCGv_i32 tmp;
2626
2627     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2628         return false;
2629     }
2630
2631     if (!vfp_access_check(s)) {
2632         return true;
2633     }
2634
2635     fpst = fpstatus_ptr(FPST_FPCR);
2636     ahp_mode = get_ahp_flag();
2637     tmp = tcg_temp_new_i32();
2638
2639     neon_load_reg32(tmp, a->vm);
2640     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2641     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2642     tcg_temp_free_i32(ahp_mode);
2643     tcg_temp_free_ptr(fpst);
2644     tcg_temp_free_i32(tmp);
2645     return true;
2646 }
2647
2648 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2649 {
2650     TCGv_ptr fpst;
2651     TCGv_i32 ahp_mode;
2652     TCGv_i32 tmp;
2653     TCGv_i64 vm;
2654
2655     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2656         return false;
2657     }
2658
2659     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2660         return false;
2661     }
2662
2663     /* UNDEF accesses to D16-D31 if they don't exist. */
2664     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
2665         return false;
2666     }
2667
2668     if (!vfp_access_check(s)) {
2669         return true;
2670     }
2671
2672     fpst = fpstatus_ptr(FPST_FPCR);
2673     ahp_mode = get_ahp_flag();
2674     tmp = tcg_temp_new_i32();
2675     vm = tcg_temp_new_i64();
2676
2677     neon_load_reg64(vm, a->vm);
2678     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2679     tcg_temp_free_i64(vm);
2680     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2681     tcg_temp_free_i32(ahp_mode);
2682     tcg_temp_free_ptr(fpst);
2683     tcg_temp_free_i32(tmp);
2684     return true;
2685 }
2686
2687 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2688 {
2689     TCGv_ptr fpst;
2690     TCGv_i32 tmp;
2691
2692     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2693         return false;
2694     }
2695
2696     if (!vfp_access_check(s)) {
2697         return true;
2698     }
2699
2700     tmp = tcg_temp_new_i32();
2701     neon_load_reg32(tmp, a->vm);
2702     fpst = fpstatus_ptr(FPST_FPCR_F16);
2703     gen_helper_rinth(tmp, tmp, fpst);
2704     neon_store_reg32(tmp, a->vd);
2705     tcg_temp_free_ptr(fpst);
2706     tcg_temp_free_i32(tmp);
2707     return true;
2708 }
2709
2710 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
2711 {
2712     TCGv_ptr fpst;
2713     TCGv_i32 tmp;
2714
2715     if (!dc_isar_feature(aa32_vrint, s)) {
2716         return false;
2717     }
2718
2719     if (!vfp_access_check(s)) {
2720         return true;
2721     }
2722
2723     tmp = tcg_temp_new_i32();
2724     neon_load_reg32(tmp, a->vm);
2725     fpst = fpstatus_ptr(FPST_FPCR);
2726     gen_helper_rints(tmp, tmp, fpst);
2727     neon_store_reg32(tmp, a->vd);
2728     tcg_temp_free_ptr(fpst);
2729     tcg_temp_free_i32(tmp);
2730     return true;
2731 }
2732
2733 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
2734 {
2735     TCGv_ptr fpst;
2736     TCGv_i64 tmp;
2737
2738     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2739         return false;
2740     }
2741
2742     if (!dc_isar_feature(aa32_vrint, s)) {
2743         return false;
2744     }
2745
2746     /* UNDEF accesses to D16-D31 if they don't exist. */
2747     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2748         return false;
2749     }
2750
2751     if (!vfp_access_check(s)) {
2752         return true;
2753     }
2754
2755     tmp = tcg_temp_new_i64();
2756     neon_load_reg64(tmp, a->vm);
2757     fpst = fpstatus_ptr(FPST_FPCR);
2758     gen_helper_rintd(tmp, tmp, fpst);
2759     neon_store_reg64(tmp, a->vd);
2760     tcg_temp_free_ptr(fpst);
2761     tcg_temp_free_i64(tmp);
2762     return true;
2763 }
2764
2765 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
2766 {
2767     TCGv_ptr fpst;
2768     TCGv_i32 tmp;
2769     TCGv_i32 tcg_rmode;
2770
2771     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2772         return false;
2773     }
2774
2775     if (!vfp_access_check(s)) {
2776         return true;
2777     }
2778
2779     tmp = tcg_temp_new_i32();
2780     neon_load_reg32(tmp, a->vm);
2781     fpst = fpstatus_ptr(FPST_FPCR_F16);
2782     tcg_rmode = tcg_const_i32(float_round_to_zero);
2783     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2784     gen_helper_rinth(tmp, tmp, fpst);
2785     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2786     neon_store_reg32(tmp, a->vd);
2787     tcg_temp_free_ptr(fpst);
2788     tcg_temp_free_i32(tcg_rmode);
2789     tcg_temp_free_i32(tmp);
2790     return true;
2791 }
2792
2793 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
2794 {
2795     TCGv_ptr fpst;
2796     TCGv_i32 tmp;
2797     TCGv_i32 tcg_rmode;
2798
2799     if (!dc_isar_feature(aa32_vrint, s)) {
2800         return false;
2801     }
2802
2803     if (!vfp_access_check(s)) {
2804         return true;
2805     }
2806
2807     tmp = tcg_temp_new_i32();
2808     neon_load_reg32(tmp, a->vm);
2809     fpst = fpstatus_ptr(FPST_FPCR);
2810     tcg_rmode = tcg_const_i32(float_round_to_zero);
2811     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2812     gen_helper_rints(tmp, tmp, fpst);
2813     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2814     neon_store_reg32(tmp, a->vd);
2815     tcg_temp_free_ptr(fpst);
2816     tcg_temp_free_i32(tcg_rmode);
2817     tcg_temp_free_i32(tmp);
2818     return true;
2819 }
2820
2821 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
2822 {
2823     TCGv_ptr fpst;
2824     TCGv_i64 tmp;
2825     TCGv_i32 tcg_rmode;
2826
2827     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2828         return false;
2829     }
2830
2831     if (!dc_isar_feature(aa32_vrint, s)) {
2832         return false;
2833     }
2834
2835     /* UNDEF accesses to D16-D31 if they don't exist. */
2836     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2837         return false;
2838     }
2839
2840     if (!vfp_access_check(s)) {
2841         return true;
2842     }
2843
2844     tmp = tcg_temp_new_i64();
2845     neon_load_reg64(tmp, a->vm);
2846     fpst = fpstatus_ptr(FPST_FPCR);
2847     tcg_rmode = tcg_const_i32(float_round_to_zero);
2848     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2849     gen_helper_rintd(tmp, tmp, fpst);
2850     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
2851     neon_store_reg64(tmp, a->vd);
2852     tcg_temp_free_ptr(fpst);
2853     tcg_temp_free_i64(tmp);
2854     tcg_temp_free_i32(tcg_rmode);
2855     return true;
2856 }
2857
2858 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
2859 {
2860     TCGv_ptr fpst;
2861     TCGv_i32 tmp;
2862
2863     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2864         return false;
2865     }
2866
2867     if (!vfp_access_check(s)) {
2868         return true;
2869     }
2870
2871     tmp = tcg_temp_new_i32();
2872     neon_load_reg32(tmp, a->vm);
2873     fpst = fpstatus_ptr(FPST_FPCR_F16);
2874     gen_helper_rinth_exact(tmp, tmp, fpst);
2875     neon_store_reg32(tmp, a->vd);
2876     tcg_temp_free_ptr(fpst);
2877     tcg_temp_free_i32(tmp);
2878     return true;
2879 }
2880
2881 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
2882 {
2883     TCGv_ptr fpst;
2884     TCGv_i32 tmp;
2885
2886     if (!dc_isar_feature(aa32_vrint, s)) {
2887         return false;
2888     }
2889
2890     if (!vfp_access_check(s)) {
2891         return true;
2892     }
2893
2894     tmp = tcg_temp_new_i32();
2895     neon_load_reg32(tmp, a->vm);
2896     fpst = fpstatus_ptr(FPST_FPCR);
2897     gen_helper_rints_exact(tmp, tmp, fpst);
2898     neon_store_reg32(tmp, a->vd);
2899     tcg_temp_free_ptr(fpst);
2900     tcg_temp_free_i32(tmp);
2901     return true;
2902 }
2903
2904 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
2905 {
2906     TCGv_ptr fpst;
2907     TCGv_i64 tmp;
2908
2909     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2910         return false;
2911     }
2912
2913     if (!dc_isar_feature(aa32_vrint, s)) {
2914         return false;
2915     }
2916
2917     /* UNDEF accesses to D16-D31 if they don't exist. */
2918     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2919         return false;
2920     }
2921
2922     if (!vfp_access_check(s)) {
2923         return true;
2924     }
2925
2926     tmp = tcg_temp_new_i64();
2927     neon_load_reg64(tmp, a->vm);
2928     fpst = fpstatus_ptr(FPST_FPCR);
2929     gen_helper_rintd_exact(tmp, tmp, fpst);
2930     neon_store_reg64(tmp, a->vd);
2931     tcg_temp_free_ptr(fpst);
2932     tcg_temp_free_i64(tmp);
2933     return true;
2934 }
2935
2936 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
2937 {
2938     TCGv_i64 vd;
2939     TCGv_i32 vm;
2940
2941     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2942         return false;
2943     }
2944
2945     /* UNDEF accesses to D16-D31 if they don't exist. */
2946     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2947         return false;
2948     }
2949
2950     if (!vfp_access_check(s)) {
2951         return true;
2952     }
2953
2954     vm = tcg_temp_new_i32();
2955     vd = tcg_temp_new_i64();
2956     neon_load_reg32(vm, a->vm);
2957     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
2958     neon_store_reg64(vd, a->vd);
2959     tcg_temp_free_i32(vm);
2960     tcg_temp_free_i64(vd);
2961     return true;
2962 }
2963
2964 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
2965 {
2966     TCGv_i64 vm;
2967     TCGv_i32 vd;
2968
2969     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2970         return false;
2971     }
2972
2973     /* UNDEF accesses to D16-D31 if they don't exist. */
2974     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2975         return false;
2976     }
2977
2978     if (!vfp_access_check(s)) {
2979         return true;
2980     }
2981
2982     vd = tcg_temp_new_i32();
2983     vm = tcg_temp_new_i64();
2984     neon_load_reg64(vm, a->vm);
2985     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
2986     neon_store_reg32(vd, a->vd);
2987     tcg_temp_free_i32(vd);
2988     tcg_temp_free_i64(vm);
2989     return true;
2990 }
2991
2992 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
2993 {
2994     TCGv_i32 vm;
2995     TCGv_ptr fpst;
2996
2997     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2998         return false;
2999     }
3000
3001     if (!vfp_access_check(s)) {
3002         return true;
3003     }
3004
3005     vm = tcg_temp_new_i32();
3006     neon_load_reg32(vm, a->vm);
3007     fpst = fpstatus_ptr(FPST_FPCR_F16);
3008     if (a->s) {
3009         /* i32 -> f16 */
3010         gen_helper_vfp_sitoh(vm, vm, fpst);
3011     } else {
3012         /* u32 -> f16 */
3013         gen_helper_vfp_uitoh(vm, vm, fpst);
3014     }
3015     neon_store_reg32(vm, a->vd);
3016     tcg_temp_free_i32(vm);
3017     tcg_temp_free_ptr(fpst);
3018     return true;
3019 }
3020
3021 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3022 {
3023     TCGv_i32 vm;
3024     TCGv_ptr fpst;
3025
3026     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3027         return false;
3028     }
3029
3030     if (!vfp_access_check(s)) {
3031         return true;
3032     }
3033
3034     vm = tcg_temp_new_i32();
3035     neon_load_reg32(vm, a->vm);
3036     fpst = fpstatus_ptr(FPST_FPCR);
3037     if (a->s) {
3038         /* i32 -> f32 */
3039         gen_helper_vfp_sitos(vm, vm, fpst);
3040     } else {
3041         /* u32 -> f32 */
3042         gen_helper_vfp_uitos(vm, vm, fpst);
3043     }
3044     neon_store_reg32(vm, a->vd);
3045     tcg_temp_free_i32(vm);
3046     tcg_temp_free_ptr(fpst);
3047     return true;
3048 }
3049
3050 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3051 {
3052     TCGv_i32 vm;
3053     TCGv_i64 vd;
3054     TCGv_ptr fpst;
3055
3056     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3057         return false;
3058     }
3059
3060     /* UNDEF accesses to D16-D31 if they don't exist. */
3061     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3062         return false;
3063     }
3064
3065     if (!vfp_access_check(s)) {
3066         return true;
3067     }
3068
3069     vm = tcg_temp_new_i32();
3070     vd = tcg_temp_new_i64();
3071     neon_load_reg32(vm, a->vm);
3072     fpst = fpstatus_ptr(FPST_FPCR);
3073     if (a->s) {
3074         /* i32 -> f64 */
3075         gen_helper_vfp_sitod(vd, vm, fpst);
3076     } else {
3077         /* u32 -> f64 */
3078         gen_helper_vfp_uitod(vd, vm, fpst);
3079     }
3080     neon_store_reg64(vd, a->vd);
3081     tcg_temp_free_i32(vm);
3082     tcg_temp_free_i64(vd);
3083     tcg_temp_free_ptr(fpst);
3084     return true;
3085 }
3086
3087 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3088 {
3089     TCGv_i32 vd;
3090     TCGv_i64 vm;
3091
3092     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3093         return false;
3094     }
3095
3096     if (!dc_isar_feature(aa32_jscvt, s)) {
3097         return false;
3098     }
3099
3100     /* UNDEF accesses to D16-D31 if they don't exist. */
3101     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3102         return false;
3103     }
3104
3105     if (!vfp_access_check(s)) {
3106         return true;
3107     }
3108
3109     vm = tcg_temp_new_i64();
3110     vd = tcg_temp_new_i32();
3111     neon_load_reg64(vm, a->vm);
3112     gen_helper_vjcvt(vd, vm, cpu_env);
3113     neon_store_reg32(vd, a->vd);
3114     tcg_temp_free_i64(vm);
3115     tcg_temp_free_i32(vd);
3116     return true;
3117 }
3118
3119 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3120 {
3121     TCGv_i32 vd, shift;
3122     TCGv_ptr fpst;
3123     int frac_bits;
3124
3125     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3126         return false;
3127     }
3128
3129     if (!vfp_access_check(s)) {
3130         return true;
3131     }
3132
3133     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3134
3135     vd = tcg_temp_new_i32();
3136     neon_load_reg32(vd, a->vd);
3137
3138     fpst = fpstatus_ptr(FPST_FPCR_F16);
3139     shift = tcg_const_i32(frac_bits);
3140
3141     /* Switch on op:U:sx bits */
3142     switch (a->opc) {
3143     case 0:
3144         gen_helper_vfp_shtoh(vd, vd, shift, fpst);
3145         break;
3146     case 1:
3147         gen_helper_vfp_sltoh(vd, vd, shift, fpst);
3148         break;
3149     case 2:
3150         gen_helper_vfp_uhtoh(vd, vd, shift, fpst);
3151         break;
3152     case 3:
3153         gen_helper_vfp_ultoh(vd, vd, shift, fpst);
3154         break;
3155     case 4:
3156         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3157         break;
3158     case 5:
3159         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3160         break;
3161     case 6:
3162         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3163         break;
3164     case 7:
3165         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3166         break;
3167     default:
3168         g_assert_not_reached();
3169     }
3170
3171     neon_store_reg32(vd, a->vd);
3172     tcg_temp_free_i32(vd);
3173     tcg_temp_free_i32(shift);
3174     tcg_temp_free_ptr(fpst);
3175     return true;
3176 }
3177
3178 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3179 {
3180     TCGv_i32 vd, shift;
3181     TCGv_ptr fpst;
3182     int frac_bits;
3183
3184     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3185         return false;
3186     }
3187
3188     if (!vfp_access_check(s)) {
3189         return true;
3190     }
3191
3192     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3193
3194     vd = tcg_temp_new_i32();
3195     neon_load_reg32(vd, a->vd);
3196
3197     fpst = fpstatus_ptr(FPST_FPCR);
3198     shift = tcg_const_i32(frac_bits);
3199
3200     /* Switch on op:U:sx bits */
3201     switch (a->opc) {
3202     case 0:
3203         gen_helper_vfp_shtos(vd, vd, shift, fpst);
3204         break;
3205     case 1:
3206         gen_helper_vfp_sltos(vd, vd, shift, fpst);
3207         break;
3208     case 2:
3209         gen_helper_vfp_uhtos(vd, vd, shift, fpst);
3210         break;
3211     case 3:
3212         gen_helper_vfp_ultos(vd, vd, shift, fpst);
3213         break;
3214     case 4:
3215         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3216         break;
3217     case 5:
3218         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3219         break;
3220     case 6:
3221         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3222         break;
3223     case 7:
3224         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3225         break;
3226     default:
3227         g_assert_not_reached();
3228     }
3229
3230     neon_store_reg32(vd, a->vd);
3231     tcg_temp_free_i32(vd);
3232     tcg_temp_free_i32(shift);
3233     tcg_temp_free_ptr(fpst);
3234     return true;
3235 }
3236
3237 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3238 {
3239     TCGv_i64 vd;
3240     TCGv_i32 shift;
3241     TCGv_ptr fpst;
3242     int frac_bits;
3243
3244     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3245         return false;
3246     }
3247
3248     /* UNDEF accesses to D16-D31 if they don't exist. */
3249     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3250         return false;
3251     }
3252
3253     if (!vfp_access_check(s)) {
3254         return true;
3255     }
3256
3257     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3258
3259     vd = tcg_temp_new_i64();
3260     neon_load_reg64(vd, a->vd);
3261
3262     fpst = fpstatus_ptr(FPST_FPCR);
3263     shift = tcg_const_i32(frac_bits);
3264
3265     /* Switch on op:U:sx bits */
3266     switch (a->opc) {
3267     case 0:
3268         gen_helper_vfp_shtod(vd, vd, shift, fpst);
3269         break;
3270     case 1:
3271         gen_helper_vfp_sltod(vd, vd, shift, fpst);
3272         break;
3273     case 2:
3274         gen_helper_vfp_uhtod(vd, vd, shift, fpst);
3275         break;
3276     case 3:
3277         gen_helper_vfp_ultod(vd, vd, shift, fpst);
3278         break;
3279     case 4:
3280         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3281         break;
3282     case 5:
3283         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3284         break;
3285     case 6:
3286         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3287         break;
3288     case 7:
3289         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3290         break;
3291     default:
3292         g_assert_not_reached();
3293     }
3294
3295     neon_store_reg64(vd, a->vd);
3296     tcg_temp_free_i64(vd);
3297     tcg_temp_free_i32(shift);
3298     tcg_temp_free_ptr(fpst);
3299     return true;
3300 }
3301
3302 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3303 {
3304     TCGv_i32 vm;
3305     TCGv_ptr fpst;
3306
3307     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3308         return false;
3309     }
3310
3311     if (!vfp_access_check(s)) {
3312         return true;
3313     }
3314
3315     fpst = fpstatus_ptr(FPST_FPCR_F16);
3316     vm = tcg_temp_new_i32();
3317     neon_load_reg32(vm, a->vm);
3318
3319     if (a->s) {
3320         if (a->rz) {
3321             gen_helper_vfp_tosizh(vm, vm, fpst);
3322         } else {
3323             gen_helper_vfp_tosih(vm, vm, fpst);
3324         }
3325     } else {
3326         if (a->rz) {
3327             gen_helper_vfp_touizh(vm, vm, fpst);
3328         } else {
3329             gen_helper_vfp_touih(vm, vm, fpst);
3330         }
3331     }
3332     neon_store_reg32(vm, a->vd);
3333     tcg_temp_free_i32(vm);
3334     tcg_temp_free_ptr(fpst);
3335     return true;
3336 }
3337
3338 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3339 {
3340     TCGv_i32 vm;
3341     TCGv_ptr fpst;
3342
3343     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3344         return false;
3345     }
3346
3347     if (!vfp_access_check(s)) {
3348         return true;
3349     }
3350
3351     fpst = fpstatus_ptr(FPST_FPCR);
3352     vm = tcg_temp_new_i32();
3353     neon_load_reg32(vm, a->vm);
3354
3355     if (a->s) {
3356         if (a->rz) {
3357             gen_helper_vfp_tosizs(vm, vm, fpst);
3358         } else {
3359             gen_helper_vfp_tosis(vm, vm, fpst);
3360         }
3361     } else {
3362         if (a->rz) {
3363             gen_helper_vfp_touizs(vm, vm, fpst);
3364         } else {
3365             gen_helper_vfp_touis(vm, vm, fpst);
3366         }
3367     }
3368     neon_store_reg32(vm, a->vd);
3369     tcg_temp_free_i32(vm);
3370     tcg_temp_free_ptr(fpst);
3371     return true;
3372 }
3373
3374 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3375 {
3376     TCGv_i32 vd;
3377     TCGv_i64 vm;
3378     TCGv_ptr fpst;
3379
3380     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3381         return false;
3382     }
3383
3384     /* UNDEF accesses to D16-D31 if they don't exist. */
3385     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3386         return false;
3387     }
3388
3389     if (!vfp_access_check(s)) {
3390         return true;
3391     }
3392
3393     fpst = fpstatus_ptr(FPST_FPCR);
3394     vm = tcg_temp_new_i64();
3395     vd = tcg_temp_new_i32();
3396     neon_load_reg64(vm, a->vm);
3397
3398     if (a->s) {
3399         if (a->rz) {
3400             gen_helper_vfp_tosizd(vd, vm, fpst);
3401         } else {
3402             gen_helper_vfp_tosid(vd, vm, fpst);
3403         }
3404     } else {
3405         if (a->rz) {
3406             gen_helper_vfp_touizd(vd, vm, fpst);
3407         } else {
3408             gen_helper_vfp_touid(vd, vm, fpst);
3409         }
3410     }
3411     neon_store_reg32(vd, a->vd);
3412     tcg_temp_free_i32(vd);
3413     tcg_temp_free_i64(vm);
3414     tcg_temp_free_ptr(fpst);
3415     return true;
3416 }
3417
3418 /*
3419  * Decode VLLDM and VLSTM are nonstandard because:
3420  *  * if there is no FPU then these insns must NOP in
3421  *    Secure state and UNDEF in Nonsecure state
3422  *  * if there is an FPU then these insns do not have
3423  *    the usual behaviour that vfp_access_check() provides of
3424  *    being controlled by CPACR/NSACR enable bits or the
3425  *    lazy-stacking logic.
3426  */
3427 static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
3428 {
3429     TCGv_i32 fptr;
3430
3431     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
3432         !arm_dc_feature(s, ARM_FEATURE_V8)) {
3433         return false;
3434     }
3435     /*
3436      * If not secure, UNDEF. We must emit code for this
3437      * rather than returning false so that this takes
3438      * precedence over the m-nocp.decode NOCP fallback.
3439      */
3440     if (!s->v8m_secure) {
3441         unallocated_encoding(s);
3442         return true;
3443     }
3444     /* If no fpu, NOP. */
3445     if (!dc_isar_feature(aa32_vfp, s)) {
3446         return true;
3447     }
3448
3449     fptr = load_reg(s, a->rn);
3450     if (a->l) {
3451         gen_helper_v7m_vlldm(cpu_env, fptr);
3452     } else {
3453         gen_helper_v7m_vlstm(cpu_env, fptr);
3454     }
3455     tcg_temp_free_i32(fptr);
3456
3457     /* End the TB, because we have updated FP control bits */
3458     s->base.is_jmp = DISAS_UPDATE_EXIT;
3459     return true;
3460 }
3461
3462 static bool trans_NOCP(DisasContext *s, arg_NOCP *a)
3463 {
3464     /*
3465      * Handle M-profile early check for disabled coprocessor:
3466      * all we need to do here is emit the NOCP exception if
3467      * the coprocessor is disabled. Otherwise we return false
3468      * and the real VFP/etc decode will handle the insn.
3469      */
3470     assert(arm_dc_feature(s, ARM_FEATURE_M));
3471
3472     if (a->cp == 11) {
3473         a->cp = 10;
3474     }
3475     /* TODO: in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
3476
3477     if (a->cp != 10) {
3478         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3479                            syn_uncategorized(), default_exception_el(s));
3480         return true;
3481     }
3482
3483     if (s->fp_excp_el != 0) {
3484         gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3485                            syn_uncategorized(), s->fp_excp_el);
3486         return true;
3487     }
3488
3489     return false;
3490 }
3491
3492 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3493 {
3494     TCGv_i32 rd, rm;
3495
3496     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3497         return false;
3498     }
3499
3500     if (s->vec_len != 0 || s->vec_stride != 0) {
3501         return false;
3502     }
3503
3504     if (!vfp_access_check(s)) {
3505         return true;
3506     }
3507
3508     /* Insert low half of Vm into high half of Vd */
3509     rm = tcg_temp_new_i32();
3510     rd = tcg_temp_new_i32();
3511     neon_load_reg32(rm, a->vm);
3512     neon_load_reg32(rd, a->vd);
3513     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3514     neon_store_reg32(rd, a->vd);
3515     tcg_temp_free_i32(rm);
3516     tcg_temp_free_i32(rd);
3517     return true;
3518 }
3519
3520 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3521 {
3522     TCGv_i32 rm;
3523
3524     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3525         return false;
3526     }
3527
3528     if (s->vec_len != 0 || s->vec_stride != 0) {
3529         return false;
3530     }
3531
3532     if (!vfp_access_check(s)) {
3533         return true;
3534     }
3535
3536     /* Set Vd to high half of Vm */
3537     rm = tcg_temp_new_i32();
3538     neon_load_reg32(rm, a->vm);
3539     tcg_gen_shri_i32(rm, rm, 16);
3540     neon_store_reg32(rm, a->vd);
3541     tcg_temp_free_i32(rm);
3542     return true;
3543 }