target/arm/translate-vfp.inc.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 /*
  24  * This file is intended to be included from translate.c; it uses
  25  * some macros and definitions provided by that file.
  26  * It might be possible to convert it to a standalone .c file eventually.
  27  */
  28
  29 /* Include the generated VFP decoder */
  30 #include "decode-vfp.inc.c"
  31 #include "decode-vfp-uncond.inc.c"
  32
  33 /*
  34  * Check that VFP access is enabled. If it is, do the necessary
  35  * M-profile lazy-FP handling and then return true.
  36  * If not, emit code to generate an appropriate exception and
  37  * return false.
  38  * The ignore_vfp_enabled argument specifies that we should ignore
  39  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
  40  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
  41  */
  42 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
  43 {
  44     if (s->fp_excp_el) {
  45         if (arm_dc_feature(s, ARM_FEATURE_M)) {
  46             gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
  47                                s->fp_excp_el);
  48         } else {
  49             gen_exception_insn(s, 4, EXCP_UDEF,
  50                                syn_fp_access_trap(1, 0xe, false),
  51                                s->fp_excp_el);
  52         }
  53         return false;
  54     }
  55
  56     if (!s->vfp_enabled && !ignore_vfp_enabled) {
  57         assert(!arm_dc_feature(s, ARM_FEATURE_M));
  58         gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
  59                            default_exception_el(s));
  60         return false;
  61     }
  62
  63     if (arm_dc_feature(s, ARM_FEATURE_M)) {
  64         /* Handle M-profile lazy FP state mechanics */
  65
  66         /* Trigger lazy-state preservation if necessary */
  67         if (s->v7m_lspact) {
  68             /*
  69              * Lazy state saving affects external memory and also the NVIC,
  70              * so we must mark it as an IO operation for icount.
  71              */
  72             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
  73                 gen_io_start();
  74             }
  75             gen_helper_v7m_preserve_fp_state(cpu_env);
  76             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
  77                 gen_io_end();
  78             }
  79             /*
  80              * If the preserve_fp_state helper doesn't throw an exception
  81              * then it will clear LSPACT; we don't need to repeat this for
  82              * any further FP insns in this TB.
  83              */
  84             s->v7m_lspact = false;
  85         }
  86
  87         /* Update ownership of FP context: set FPCCR.S to match current state */
  88         if (s->v8m_fpccr_s_wrong) {
  89             TCGv_i32 tmp;
  90
  91             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
  92             if (s->v8m_secure) {
  93                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
  94             } else {
  95                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
  96             }
  97             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
  98             /* Don't need to do this for any further FP insns in this TB */
  99             s->v8m_fpccr_s_wrong = false;
 100         }
 101
 102         if (s->v7m_new_fp_ctxt_needed) {
 103             /*
 104              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 105              * and the FPSCR.
 106              */
 107             TCGv_i32 control, fpscr;
 108             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 109
 110             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 111             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 112             tcg_temp_free_i32(fpscr);
 113             /*
 114              * We don't need to arrange to end the TB, because the only
 115              * parts of FPSCR which we cache in the TB flags are the VECLEN
 116              * and VECSTRIDE, and those don't exist for M-profile.
 117              */
 118
 119             if (s->v8m_secure) {
 120                 bits |= R_V7M_CONTROL_SFPA_MASK;
 121             }
 122             control = load_cpu_field(v7m.control[M_REG_S]);
 123             tcg_gen_ori_i32(control, control, bits);
 124             store_cpu_field(control, v7m.control[M_REG_S]);
 125             /* Don't need to do this for any further FP insns in this TB */
 126             s->v7m_new_fp_ctxt_needed = false;
 127         }
 128     }
 129
 130     return true;
 131 }
 132
 133 /*
 134  * The most usual kind of VFP access check, for everything except
 135  * FMXR/FMRX to the always-available special registers.
 136  */
 137 static bool vfp_access_check(DisasContext *s)
 138 {
 139     return full_vfp_access_check(s, false);
 140 }
 141
 142 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 143 {
 144     uint32_t rd, rn, rm;
 145     bool dp = a->dp;
 146
 147     if (!dc_isar_feature(aa32_vsel, s)) {
 148         return false;
 149     }
 150
 151     /* UNDEF accesses to D16-D31 if they don't exist */
 152     if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
 153         ((a->vm | a->vn | a->vd) & 0x10)) {
 154         return false;
 155     }
 156     rd = a->vd;
 157     rn = a->vn;
 158     rm = a->vm;
 159
 160     if (!vfp_access_check(s)) {
 161         return true;
 162     }
 163
 164     if (dp) {
 165         TCGv_i64 frn, frm, dest;
 166         TCGv_i64 tmp, zero, zf, nf, vf;
 167
 168         zero = tcg_const_i64(0);
 169
 170         frn = tcg_temp_new_i64();
 171         frm = tcg_temp_new_i64();
 172         dest = tcg_temp_new_i64();
 173
 174         zf = tcg_temp_new_i64();
 175         nf = tcg_temp_new_i64();
 176         vf = tcg_temp_new_i64();
 177
 178         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 179         tcg_gen_ext_i32_i64(nf, cpu_NF);
 180         tcg_gen_ext_i32_i64(vf, cpu_VF);
 181
 182         neon_load_reg64(frn, rn);
 183         neon_load_reg64(frm, rm);
 184         switch (a->cc) {
 185         case 0: /* eq: Z */
 186             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 187                                 frn, frm);
 188             break;
 189         case 1: /* vs: V */
 190             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 191                                 frn, frm);
 192             break;
 193         case 2: /* ge: N == V -> N ^ V == 0 */
 194             tmp = tcg_temp_new_i64();
 195             tcg_gen_xor_i64(tmp, vf, nf);
 196             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 197                                 frn, frm);
 198             tcg_temp_free_i64(tmp);
 199             break;
 200         case 3: /* gt: !Z && N == V */
 201             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 202                                 frn, frm);
 203             tmp = tcg_temp_new_i64();
 204             tcg_gen_xor_i64(tmp, vf, nf);
 205             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 206                                 dest, frm);
 207             tcg_temp_free_i64(tmp);
 208             break;
 209         }
 210         neon_store_reg64(dest, rd);
 211         tcg_temp_free_i64(frn);
 212         tcg_temp_free_i64(frm);
 213         tcg_temp_free_i64(dest);
 214
 215         tcg_temp_free_i64(zf);
 216         tcg_temp_free_i64(nf);
 217         tcg_temp_free_i64(vf);
 218
 219         tcg_temp_free_i64(zero);
 220     } else {
 221         TCGv_i32 frn, frm, dest;
 222         TCGv_i32 tmp, zero;
 223
 224         zero = tcg_const_i32(0);
 225
 226         frn = tcg_temp_new_i32();
 227         frm = tcg_temp_new_i32();
 228         dest = tcg_temp_new_i32();
 229         neon_load_reg32(frn, rn);
 230         neon_load_reg32(frm, rm);
 231         switch (a->cc) {
 232         case 0: /* eq: Z */
 233             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 234                                 frn, frm);
 235             break;
 236         case 1: /* vs: V */
 237             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 238                                 frn, frm);
 239             break;
 240         case 2: /* ge: N == V -> N ^ V == 0 */
 241             tmp = tcg_temp_new_i32();
 242             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 243             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 244                                 frn, frm);
 245             tcg_temp_free_i32(tmp);
 246             break;
 247         case 3: /* gt: !Z && N == V */
 248             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 249                                 frn, frm);
 250             tmp = tcg_temp_new_i32();
 251             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 252             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 253                                 dest, frm);
 254             tcg_temp_free_i32(tmp);
 255             break;
 256         }
 257         neon_store_reg32(dest, rd);
 258         tcg_temp_free_i32(frn);
 259         tcg_temp_free_i32(frm);
 260         tcg_temp_free_i32(dest);
 261
 262         tcg_temp_free_i32(zero);
 263     }
 264
 265     return true;
 266 }
 267
 268 static bool trans_VMINMAXNM(DisasContext *s, arg_VMINMAXNM *a)
 269 {
 270     uint32_t rd, rn, rm;
 271     bool dp = a->dp;
 272     bool vmin = a->op;
 273     TCGv_ptr fpst;
 274
 275     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
 276         return false;
 277     }
 278
 279     /* UNDEF accesses to D16-D31 if they don't exist */
 280     if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
 281         ((a->vm | a->vn | a->vd) & 0x10)) {
 282         return false;
 283     }
 284     rd = a->vd;
 285     rn = a->vn;
 286     rm = a->vm;
 287
 288     if (!vfp_access_check(s)) {
 289         return true;
 290     }
 291
 292     fpst = get_fpstatus_ptr(0);
 293
 294     if (dp) {
 295         TCGv_i64 frn, frm, dest;
 296
 297         frn = tcg_temp_new_i64();
 298         frm = tcg_temp_new_i64();
 299         dest = tcg_temp_new_i64();
 300
 301         neon_load_reg64(frn, rn);
 302         neon_load_reg64(frm, rm);
 303         if (vmin) {
 304             gen_helper_vfp_minnumd(dest, frn, frm, fpst);
 305         } else {
 306             gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
 307         }
 308         neon_store_reg64(dest, rd);
 309         tcg_temp_free_i64(frn);
 310         tcg_temp_free_i64(frm);
 311         tcg_temp_free_i64(dest);
 312     } else {
 313         TCGv_i32 frn, frm, dest;
 314
 315         frn = tcg_temp_new_i32();
 316         frm = tcg_temp_new_i32();
 317         dest = tcg_temp_new_i32();
 318
 319         neon_load_reg32(frn, rn);
 320         neon_load_reg32(frm, rm);
 321         if (vmin) {
 322             gen_helper_vfp_minnums(dest, frn, frm, fpst);
 323         } else {
 324             gen_helper_vfp_maxnums(dest, frn, frm, fpst);
 325         }
 326         neon_store_reg32(dest, rd);
 327         tcg_temp_free_i32(frn);
 328         tcg_temp_free_i32(frm);
 329         tcg_temp_free_i32(dest);
 330     }
 331
 332     tcg_temp_free_ptr(fpst);
 333     return true;
 334 }
 335
 336 /*
 337  * Table for converting the most common AArch32 encoding of
 338  * rounding mode to arm_fprounding order (which matches the
 339  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 340  */
 341 static const uint8_t fp_decode_rm[] = {
 342     FPROUNDING_TIEAWAY,
 343     FPROUNDING_TIEEVEN,
 344     FPROUNDING_POSINF,
 345     FPROUNDING_NEGINF,
 346 };
 347
 348 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 349 {
 350     uint32_t rd, rm;
 351     bool dp = a->dp;
 352     TCGv_ptr fpst;
 353     TCGv_i32 tcg_rmode;
 354     int rounding = fp_decode_rm[a->rm];
 355
 356     if (!dc_isar_feature(aa32_vrint, s)) {
 357         return false;
 358     }
 359
 360     /* UNDEF accesses to D16-D31 if they don't exist */
 361     if (dp && !dc_isar_feature(aa32_fp_d32, s) &&
 362         ((a->vm | a->vd) & 0x10)) {
 363         return false;
 364     }
 365     rd = a->vd;
 366     rm = a->vm;
 367
 368     if (!vfp_access_check(s)) {
 369         return true;
 370     }
 371
 372     fpst = get_fpstatus_ptr(0);
 373
 374     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 375     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 376
 377     if (dp) {
 378         TCGv_i64 tcg_op;
 379         TCGv_i64 tcg_res;
 380         tcg_op = tcg_temp_new_i64();
 381         tcg_res = tcg_temp_new_i64();
 382         neon_load_reg64(tcg_op, rm);
 383         gen_helper_rintd(tcg_res, tcg_op, fpst);
 384         neon_store_reg64(tcg_res, rd);
 385         tcg_temp_free_i64(tcg_op);
 386         tcg_temp_free_i64(tcg_res);
 387     } else {
 388         TCGv_i32 tcg_op;
 389         TCGv_i32 tcg_res;
 390         tcg_op = tcg_temp_new_i32();
 391         tcg_res = tcg_temp_new_i32();
 392         neon_load_reg32(tcg_op, rm);
 393         gen_helper_rints(tcg_res, tcg_op, fpst);
 394         neon_store_reg32(tcg_res, rd);
 395         tcg_temp_free_i32(tcg_op);
 396         tcg_temp_free_i32(tcg_res);
 397     }
 398
 399     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 400     tcg_temp_free_i32(tcg_rmode);
 401
 402     tcg_temp_free_ptr(fpst);
 403     return true;
 404 }
 405
 406 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 407 {
 408     uint32_t rd, rm;
 409     bool dp = a->dp;
 410     TCGv_ptr fpst;
 411     TCGv_i32 tcg_rmode, tcg_shift;
 412     int rounding = fp_decode_rm[a->rm];
 413     bool is_signed = a->op;
 414
 415     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 416         return false;
 417     }
 418
 419     /* UNDEF accesses to D16-D31 if they don't exist */
 420     if (dp && !dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
 421         return false;
 422     }
 423     rd = a->vd;
 424     rm = a->vm;
 425
 426     if (!vfp_access_check(s)) {
 427         return true;
 428     }
 429
 430     fpst = get_fpstatus_ptr(0);
 431
 432     tcg_shift = tcg_const_i32(0);
 433
 434     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 435     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 436
 437     if (dp) {
 438         TCGv_i64 tcg_double, tcg_res;
 439         TCGv_i32 tcg_tmp;
 440         tcg_double = tcg_temp_new_i64();
 441         tcg_res = tcg_temp_new_i64();
 442         tcg_tmp = tcg_temp_new_i32();
 443         neon_load_reg64(tcg_double, rm);
 444         if (is_signed) {
 445             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 446         } else {
 447             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 448         }
 449         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 450         neon_store_reg32(tcg_tmp, rd);
 451         tcg_temp_free_i32(tcg_tmp);
 452         tcg_temp_free_i64(tcg_res);
 453         tcg_temp_free_i64(tcg_double);
 454     } else {
 455         TCGv_i32 tcg_single, tcg_res;
 456         tcg_single = tcg_temp_new_i32();
 457         tcg_res = tcg_temp_new_i32();
 458         neon_load_reg32(tcg_single, rm);
 459         if (is_signed) {
 460             gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 461         } else {
 462             gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 463         }
 464         neon_store_reg32(tcg_res, rd);
 465         tcg_temp_free_i32(tcg_res);
 466         tcg_temp_free_i32(tcg_single);
 467     }
 468
 469     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 470     tcg_temp_free_i32(tcg_rmode);
 471
 472     tcg_temp_free_i32(tcg_shift);
 473
 474     tcg_temp_free_ptr(fpst);
 475
 476     return true;
 477 }
 478
 479 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 480 {
 481     /* VMOV scalar to general purpose register */
 482     TCGv_i32 tmp;
 483     int pass;
 484     uint32_t offset;
 485
 486     /* UNDEF accesses to D16-D31 if they don't exist */
 487     if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
 488         return false;
 489     }
 490
 491     offset = a->index << a->size;
 492     pass = extract32(offset, 2, 1);
 493     offset = extract32(offset, 0, 2) * 8;
 494
 495     if (a->size != 2 && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 496         return false;
 497     }
 498
 499     if (!vfp_access_check(s)) {
 500         return true;
 501     }
 502
 503     tmp = neon_load_reg(a->vn, pass);
 504     switch (a->size) {
 505     case 0:
 506         if (offset) {
 507             tcg_gen_shri_i32(tmp, tmp, offset);
 508         }
 509         if (a->u) {
 510             gen_uxtb(tmp);
 511         } else {
 512             gen_sxtb(tmp);
 513         }
 514         break;
 515     case 1:
 516         if (a->u) {
 517             if (offset) {
 518                 tcg_gen_shri_i32(tmp, tmp, 16);
 519             } else {
 520                 gen_uxth(tmp);
 521             }
 522         } else {
 523             if (offset) {
 524                 tcg_gen_sari_i32(tmp, tmp, 16);
 525             } else {
 526                 gen_sxth(tmp);
 527             }
 528         }
 529         break;
 530     case 2:
 531         break;
 532     }
 533     store_reg(s, a->rt, tmp);
 534
 535     return true;
 536 }
 537
 538 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 539 {
 540     /* VMOV general purpose register to scalar */
 541     TCGv_i32 tmp, tmp2;
 542     int pass;
 543     uint32_t offset;
 544
 545     /* UNDEF accesses to D16-D31 if they don't exist */
 546     if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
 547         return false;
 548     }
 549
 550     offset = a->index << a->size;
 551     pass = extract32(offset, 2, 1);
 552     offset = extract32(offset, 0, 2) * 8;
 553
 554     if (a->size != 2 && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 555         return false;
 556     }
 557
 558     if (!vfp_access_check(s)) {
 559         return true;
 560     }
 561
 562     tmp = load_reg(s, a->rt);
 563     switch (a->size) {
 564     case 0:
 565         tmp2 = neon_load_reg(a->vn, pass);
 566         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
 567         tcg_temp_free_i32(tmp2);
 568         break;
 569     case 1:
 570         tmp2 = neon_load_reg(a->vn, pass);
 571         tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
 572         tcg_temp_free_i32(tmp2);
 573         break;
 574     case 2:
 575         break;
 576     }
 577     neon_store_reg(a->vn, pass, tmp);
 578
 579     return true;
 580 }
 581
 582 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 583 {
 584     /* VDUP (general purpose register) */
 585     TCGv_i32 tmp;
 586     int size, vec_size;
 587
 588     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 589         return false;
 590     }
 591
 592     /* UNDEF accesses to D16-D31 if they don't exist */
 593     if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) {
 594         return false;
 595     }
 596
 597     if (a->b && a->e) {
 598         return false;
 599     }
 600
 601     if (a->q && (a->vn & 1)) {
 602         return false;
 603     }
 604
 605     vec_size = a->q ? 16 : 8;
 606     if (a->b) {
 607         size = 0;
 608     } else if (a->e) {
 609         size = 1;
 610     } else {
 611         size = 2;
 612     }
 613
 614     if (!vfp_access_check(s)) {
 615         return true;
 616     }
 617
 618     tmp = load_reg(s, a->rt);
 619     tcg_gen_gvec_dup_i32(size, neon_reg_offset(a->vn, 0),
 620                          vec_size, vec_size, tmp);
 621     tcg_temp_free_i32(tmp);
 622
 623     return true;
 624 }
 625
 626 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 627 {
 628     TCGv_i32 tmp;
 629     bool ignore_vfp_enabled = false;
 630
 631     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 632         /*
 633          * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
 634          * Writes to R15 are UNPREDICTABLE; we choose to undef.
 635          */
 636         if (a->rt == 15 || a->reg != ARM_VFP_FPSCR) {
 637             return false;
 638         }
 639     }
 640
 641     switch (a->reg) {
 642     case ARM_VFP_FPSID:
 643         /*
 644          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
 645          * all ID registers to privileged access only.
 646          */
 647         if (IS_USER(s) && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
 648             return false;
 649         }
 650         ignore_vfp_enabled = true;
 651         break;
 652     case ARM_VFP_MVFR0:
 653     case ARM_VFP_MVFR1:
 654         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
 655             return false;
 656         }
 657         ignore_vfp_enabled = true;
 658         break;
 659     case ARM_VFP_MVFR2:
 660         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
 661             return false;
 662         }
 663         ignore_vfp_enabled = true;
 664         break;
 665     case ARM_VFP_FPSCR:
 666         break;
 667     case ARM_VFP_FPEXC:
 668         if (IS_USER(s)) {
 669             return false;
 670         }
 671         ignore_vfp_enabled = true;
 672         break;
 673     case ARM_VFP_FPINST:
 674     case ARM_VFP_FPINST2:
 675         /* Not present in VFPv3 */
 676         if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
 677             return false;
 678         }
 679         break;
 680     default:
 681         return false;
 682     }
 683
 684     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
 685         return true;
 686     }
 687
 688     if (a->l) {
 689         /* VMRS, move VFP special register to gp register */
 690         switch (a->reg) {
 691         case ARM_VFP_FPSID:
 692         case ARM_VFP_FPEXC:
 693         case ARM_VFP_FPINST:
 694         case ARM_VFP_FPINST2:
 695         case ARM_VFP_MVFR0:
 696         case ARM_VFP_MVFR1:
 697         case ARM_VFP_MVFR2:
 698             tmp = load_cpu_field(vfp.xregs[a->reg]);
 699             break;
 700         case ARM_VFP_FPSCR:
 701             if (a->rt == 15) {
 702                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 703                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
 704             } else {
 705                 tmp = tcg_temp_new_i32();
 706                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
 707             }
 708             break;
 709         default:
 710             g_assert_not_reached();
 711         }
 712
 713         if (a->rt == 15) {
 714             /* Set the 4 flag bits in the CPSR.  */
 715             gen_set_nzcv(tmp);
 716             tcg_temp_free_i32(tmp);
 717         } else {
 718             store_reg(s, a->rt, tmp);
 719         }
 720     } else {
 721         /* VMSR, move gp register to VFP special register */
 722         switch (a->reg) {
 723         case ARM_VFP_FPSID:
 724         case ARM_VFP_MVFR0:
 725         case ARM_VFP_MVFR1:
 726         case ARM_VFP_MVFR2:
 727             /* Writes are ignored.  */
 728             break;
 729         case ARM_VFP_FPSCR:
 730             tmp = load_reg(s, a->rt);
 731             gen_helper_vfp_set_fpscr(cpu_env, tmp);
 732             tcg_temp_free_i32(tmp);
 733             gen_lookup_tb(s);
 734             break;
 735         case ARM_VFP_FPEXC:
 736             /*
 737              * TODO: VFP subarchitecture support.
 738              * For now, keep the EN bit only
 739              */
 740             tmp = load_reg(s, a->rt);
 741             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
 742             store_cpu_field(tmp, vfp.xregs[a->reg]);
 743             gen_lookup_tb(s);
 744             break;
 745         case ARM_VFP_FPINST:
 746         case ARM_VFP_FPINST2:
 747             tmp = load_reg(s, a->rt);
 748             store_cpu_field(tmp, vfp.xregs[a->reg]);
 749             break;
 750         default:
 751             g_assert_not_reached();
 752         }
 753     }
 754
 755     return true;
 756 }
 757
 758 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
 759 {
 760     TCGv_i32 tmp;
 761
 762     if (!vfp_access_check(s)) {
 763         return true;
 764     }
 765
 766     if (a->l) {
 767         /* VFP to general purpose register */
 768         tmp = tcg_temp_new_i32();
 769         neon_load_reg32(tmp, a->vn);
 770         if (a->rt == 15) {
 771             /* Set the 4 flag bits in the CPSR.  */
 772             gen_set_nzcv(tmp);
 773             tcg_temp_free_i32(tmp);
 774         } else {
 775             store_reg(s, a->rt, tmp);
 776         }
 777     } else {
 778         /* general purpose register to VFP */
 779         tmp = load_reg(s, a->rt);
 780         neon_store_reg32(tmp, a->vn);
 781         tcg_temp_free_i32(tmp);
 782     }
 783
 784     return true;
 785 }
 786
 787 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
 788 {
 789     TCGv_i32 tmp;
 790
 791     /*
 792      * VMOV between two general-purpose registers and two single precision
 793      * floating point registers
 794      */
 795     if (!vfp_access_check(s)) {
 796         return true;
 797     }
 798
 799     if (a->op) {
 800         /* fpreg to gpreg */
 801         tmp = tcg_temp_new_i32();
 802         neon_load_reg32(tmp, a->vm);
 803         store_reg(s, a->rt, tmp);
 804         tmp = tcg_temp_new_i32();
 805         neon_load_reg32(tmp, a->vm + 1);
 806         store_reg(s, a->rt2, tmp);
 807     } else {
 808         /* gpreg to fpreg */
 809         tmp = load_reg(s, a->rt);
 810         neon_store_reg32(tmp, a->vm);
 811         tmp = load_reg(s, a->rt2);
 812         neon_store_reg32(tmp, a->vm + 1);
 813     }
 814
 815     return true;
 816 }
 817
 818 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_sp *a)
 819 {
 820     TCGv_i32 tmp;
 821
 822     /*
 823      * VMOV between two general-purpose registers and one double precision
 824      * floating point register
 825      */
 826
 827     /* UNDEF accesses to D16-D31 if they don't exist */
 828     if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
 829         return false;
 830     }
 831
 832     if (!vfp_access_check(s)) {
 833         return true;
 834     }
 835
 836     if (a->op) {
 837         /* fpreg to gpreg */
 838         tmp = tcg_temp_new_i32();
 839         neon_load_reg32(tmp, a->vm * 2);
 840         store_reg(s, a->rt, tmp);
 841         tmp = tcg_temp_new_i32();
 842         neon_load_reg32(tmp, a->vm * 2 + 1);
 843         store_reg(s, a->rt2, tmp);
 844     } else {
 845         /* gpreg to fpreg */
 846         tmp = load_reg(s, a->rt);
 847         neon_store_reg32(tmp, a->vm * 2);
 848         tcg_temp_free_i32(tmp);
 849         tmp = load_reg(s, a->rt2);
 850         neon_store_reg32(tmp, a->vm * 2 + 1);
 851         tcg_temp_free_i32(tmp);
 852     }
 853
 854     return true;
 855 }
 856
 857 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
 858 {
 859     uint32_t offset;
 860     TCGv_i32 addr, tmp;
 861
 862     if (!vfp_access_check(s)) {
 863         return true;
 864     }
 865
 866     offset = a->imm << 2;
 867     if (!a->u) {
 868         offset = -offset;
 869     }
 870
 871     if (s->thumb && a->rn == 15) {
 872         /* This is actually UNPREDICTABLE */
 873         addr = tcg_temp_new_i32();
 874         tcg_gen_movi_i32(addr, s->pc & ~2);
 875     } else {
 876         addr = load_reg(s, a->rn);
 877     }
 878     tcg_gen_addi_i32(addr, addr, offset);
 879     tmp = tcg_temp_new_i32();
 880     if (a->l) {
 881         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
 882         neon_store_reg32(tmp, a->vd);
 883     } else {
 884         neon_load_reg32(tmp, a->vd);
 885         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
 886     }
 887     tcg_temp_free_i32(tmp);
 888     tcg_temp_free_i32(addr);
 889
 890     return true;
 891 }
 892
 893 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a)
 894 {
 895     uint32_t offset;
 896     TCGv_i32 addr;
 897     TCGv_i64 tmp;
 898
 899     /* UNDEF accesses to D16-D31 if they don't exist */
 900     if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd & 0x10)) {
 901         return false;
 902     }
 903
 904     if (!vfp_access_check(s)) {
 905         return true;
 906     }
 907
 908     offset = a->imm << 2;
 909     if (!a->u) {
 910         offset = -offset;
 911     }
 912
 913     if (s->thumb && a->rn == 15) {
 914         /* This is actually UNPREDICTABLE */
 915         addr = tcg_temp_new_i32();
 916         tcg_gen_movi_i32(addr, s->pc & ~2);
 917     } else {
 918         addr = load_reg(s, a->rn);
 919     }
 920     tcg_gen_addi_i32(addr, addr, offset);
 921     tmp = tcg_temp_new_i64();
 922     if (a->l) {
 923         gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
 924         neon_store_reg64(tmp, a->vd);
 925     } else {
 926         neon_load_reg64(tmp, a->vd);
 927         gen_aa32_st64(s, tmp, addr, get_mem_index(s));
 928     }
 929     tcg_temp_free_i64(tmp);
 930     tcg_temp_free_i32(addr);
 931
 932     return true;
 933 }
 934
 935 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
 936 {
 937     uint32_t offset;
 938     TCGv_i32 addr, tmp;
 939     int i, n;
 940
 941     n = a->imm;
 942
 943     if (n == 0 || (a->vd + n) > 32) {
 944         /*
 945          * UNPREDICTABLE cases for bad immediates: we choose to
 946          * UNDEF to avoid generating huge numbers of TCG ops
 947          */
 948         return false;
 949     }
 950     if (a->rn == 15 && a->w) {
 951         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
 952         return false;
 953     }
 954
 955     if (!vfp_access_check(s)) {
 956         return true;
 957     }
 958
 959     if (s->thumb && a->rn == 15) {
 960         /* This is actually UNPREDICTABLE */
 961         addr = tcg_temp_new_i32();
 962         tcg_gen_movi_i32(addr, s->pc & ~2);
 963     } else {
 964         addr = load_reg(s, a->rn);
 965     }
 966     if (a->p) {
 967         /* pre-decrement */
 968         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
 969     }
 970
 971     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
 972         /*
 973          * Here 'addr' is the lowest address we will store to,
 974          * and is either the old SP (if post-increment) or
 975          * the new SP (if pre-decrement). For post-increment
 976          * where the old value is below the limit and the new
 977          * value is above, it is UNKNOWN whether the limit check
 978          * triggers; we choose to trigger.
 979          */
 980         gen_helper_v8m_stackcheck(cpu_env, addr);
 981     }
 982
 983     offset = 4;
 984     tmp = tcg_temp_new_i32();
 985     for (i = 0; i < n; i++) {
 986         if (a->l) {
 987             /* load */
 988             gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
 989             neon_store_reg32(tmp, a->vd + i);
 990         } else {
 991             /* store */
 992             neon_load_reg32(tmp, a->vd + i);
 993             gen_aa32_st32(s, tmp, addr, get_mem_index(s));
 994         }
 995         tcg_gen_addi_i32(addr, addr, offset);
 996     }
 997     tcg_temp_free_i32(tmp);
 998     if (a->w) {
 999         /* writeback */
1000         if (a->p) {
1001             offset = -offset * n;
1002             tcg_gen_addi_i32(addr, addr, offset);
1003         }
1004         store_reg(s, a->rn, addr);
1005     } else {
1006         tcg_temp_free_i32(addr);
1007     }
1008
1009     return true;
1010 }
1011
1012 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1013 {
1014     uint32_t offset;
1015     TCGv_i32 addr;
1016     TCGv_i64 tmp;
1017     int i, n;
1018
1019     n = a->imm >> 1;
1020
1021     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1022         /*
1023          * UNPREDICTABLE cases for bad immediates: we choose to
1024          * UNDEF to avoid generating huge numbers of TCG ops
1025          */
1026         return false;
1027     }
1028     if (a->rn == 15 && a->w) {
1029         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1030         return false;
1031     }
1032
1033     /* UNDEF accesses to D16-D31 if they don't exist */
1034     if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd + n) > 16) {
1035         return false;
1036     }
1037
1038     if (!vfp_access_check(s)) {
1039         return true;
1040     }
1041
1042     if (s->thumb && a->rn == 15) {
1043         /* This is actually UNPREDICTABLE */
1044         addr = tcg_temp_new_i32();
1045         tcg_gen_movi_i32(addr, s->pc & ~2);
1046     } else {
1047         addr = load_reg(s, a->rn);
1048     }
1049     if (a->p) {
1050         /* pre-decrement */
1051         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1052     }
1053
1054     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1055         /*
1056          * Here 'addr' is the lowest address we will store to,
1057          * and is either the old SP (if post-increment) or
1058          * the new SP (if pre-decrement). For post-increment
1059          * where the old value is below the limit and the new
1060          * value is above, it is UNKNOWN whether the limit check
1061          * triggers; we choose to trigger.
1062          */
1063         gen_helper_v8m_stackcheck(cpu_env, addr);
1064     }
1065
1066     offset = 8;
1067     tmp = tcg_temp_new_i64();
1068     for (i = 0; i < n; i++) {
1069         if (a->l) {
1070             /* load */
1071             gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1072             neon_store_reg64(tmp, a->vd + i);
1073         } else {
1074             /* store */
1075             neon_load_reg64(tmp, a->vd + i);
1076             gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1077         }
1078         tcg_gen_addi_i32(addr, addr, offset);
1079     }
1080     tcg_temp_free_i64(tmp);
1081     if (a->w) {
1082         /* writeback */
1083         if (a->p) {
1084             offset = -offset * n;
1085         } else if (a->imm & 1) {
1086             offset = 4;
1087         } else {
1088             offset = 0;
1089         }
1090
1091         if (offset != 0) {
1092             tcg_gen_addi_i32(addr, addr, offset);
1093         }
1094         store_reg(s, a->rn, addr);
1095     } else {
1096         tcg_temp_free_i32(addr);
1097     }
1098
1099     return true;
1100 }
1101
1102 /*
1103  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1104  * The callback should emit code to write a value to vd. If
1105  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1106  * will contain the old value of the relevant VFP register;
1107  * otherwise it must be written to only.
1108  */
1109 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1110                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1111 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1112                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1113
1114 /*
1115  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1116  * The callback should emit code to write a value to vd (which
1117  * should be written to only).
1118  */
1119 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1120 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1121
1122 /*
1123  * Perform a 3-operand VFP data processing instruction. fn is the
1124  * callback to do the actual operation; this function deals with the
1125  * code to handle looping around for VFP vector processing.
1126  */
1127 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1128                           int vd, int vn, int vm, bool reads_vd)
1129 {
1130     uint32_t delta_m = 0;
1131     uint32_t delta_d = 0;
1132     uint32_t bank_mask = 0;
1133     int veclen = s->vec_len;
1134     TCGv_i32 f0, f1, fd;
1135     TCGv_ptr fpst;
1136
1137     if (!dc_isar_feature(aa32_fpshvec, s) &&
1138         (veclen != 0 || s->vec_stride != 0)) {
1139         return false;
1140     }
1141
1142     if (!vfp_access_check(s)) {
1143         return true;
1144     }
1145
1146     if (veclen > 0) {
1147         bank_mask = 0x18;
1148
1149         /* Figure out what type of vector operation this is.  */
1150         if ((vd & bank_mask) == 0) {
1151             /* scalar */
1152             veclen = 0;
1153         } else {
1154             delta_d = s->vec_stride + 1;
1155
1156             if ((vm & bank_mask) == 0) {
1157                 /* mixed scalar/vector */
1158                 delta_m = 0;
1159             } else {
1160                 /* vector */
1161                 delta_m = delta_d;
1162             }
1163         }
1164     }
1165
1166     f0 = tcg_temp_new_i32();
1167     f1 = tcg_temp_new_i32();
1168     fd = tcg_temp_new_i32();
1169     fpst = get_fpstatus_ptr(0);
1170
1171     neon_load_reg32(f0, vn);
1172     neon_load_reg32(f1, vm);
1173
1174     for (;;) {
1175         if (reads_vd) {
1176             neon_load_reg32(fd, vd);
1177         }
1178         fn(fd, f0, f1, fpst);
1179         neon_store_reg32(fd, vd);
1180
1181         if (veclen == 0) {
1182             break;
1183         }
1184
1185         /* Set up the operands for the next iteration */
1186         veclen--;
1187         vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
1188         vn = ((vn + delta_d) & (bank_mask - 1)) | (vn & bank_mask);
1189         neon_load_reg32(f0, vn);
1190         if (delta_m) {
1191             vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
1192             neon_load_reg32(f1, vm);
1193         }
1194     }
1195
1196     tcg_temp_free_i32(f0);
1197     tcg_temp_free_i32(f1);
1198     tcg_temp_free_i32(fd);
1199     tcg_temp_free_ptr(fpst);
1200
1201     return true;
1202 }
1203
1204 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1205                           int vd, int vn, int vm, bool reads_vd)
1206 {
1207     uint32_t delta_m = 0;
1208     uint32_t delta_d = 0;
1209     uint32_t bank_mask = 0;
1210     int veclen = s->vec_len;
1211     TCGv_i64 f0, f1, fd;
1212     TCGv_ptr fpst;
1213
1214     /* UNDEF accesses to D16-D31 if they don't exist */
1215     if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vn | vm) & 0x10)) {
1216         return false;
1217     }
1218
1219     if (!dc_isar_feature(aa32_fpshvec, s) &&
1220         (veclen != 0 || s->vec_stride != 0)) {
1221         return false;
1222     }
1223
1224     if (!vfp_access_check(s)) {
1225         return true;
1226     }
1227
1228     if (veclen > 0) {
1229         bank_mask = 0xc;
1230
1231         /* Figure out what type of vector operation this is.  */
1232         if ((vd & bank_mask) == 0) {
1233             /* scalar */
1234             veclen = 0;
1235         } else {
1236             delta_d = (s->vec_stride >> 1) + 1;
1237
1238             if ((vm & bank_mask) == 0) {
1239                 /* mixed scalar/vector */
1240                 delta_m = 0;
1241             } else {
1242                 /* vector */
1243                 delta_m = delta_d;
1244             }
1245         }
1246     }
1247
1248     f0 = tcg_temp_new_i64();
1249     f1 = tcg_temp_new_i64();
1250     fd = tcg_temp_new_i64();
1251     fpst = get_fpstatus_ptr(0);
1252
1253     neon_load_reg64(f0, vn);
1254     neon_load_reg64(f1, vm);
1255
1256     for (;;) {
1257         if (reads_vd) {
1258             neon_load_reg64(fd, vd);
1259         }
1260         fn(fd, f0, f1, fpst);
1261         neon_store_reg64(fd, vd);
1262
1263         if (veclen == 0) {
1264             break;
1265         }
1266         /* Set up the operands for the next iteration */
1267         veclen--;
1268         vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
1269         vn = ((vn + delta_d) & (bank_mask - 1)) | (vn & bank_mask);
1270         neon_load_reg64(f0, vn);
1271         if (delta_m) {
1272             vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
1273             neon_load_reg64(f1, vm);
1274         }
1275     }
1276
1277     tcg_temp_free_i64(f0);
1278     tcg_temp_free_i64(f1);
1279     tcg_temp_free_i64(fd);
1280     tcg_temp_free_ptr(fpst);
1281
1282     return true;
1283 }
1284
1285 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1286 {
1287     uint32_t delta_m = 0;
1288     uint32_t delta_d = 0;
1289     uint32_t bank_mask = 0;
1290     int veclen = s->vec_len;
1291     TCGv_i32 f0, fd;
1292
1293     if (!dc_isar_feature(aa32_fpshvec, s) &&
1294         (veclen != 0 || s->vec_stride != 0)) {
1295         return false;
1296     }
1297
1298     if (!vfp_access_check(s)) {
1299         return true;
1300     }
1301
1302     if (veclen > 0) {
1303         bank_mask = 0x18;
1304
1305         /* Figure out what type of vector operation this is.  */
1306         if ((vd & bank_mask) == 0) {
1307             /* scalar */
1308             veclen = 0;
1309         } else {
1310             delta_d = s->vec_stride + 1;
1311
1312             if ((vm & bank_mask) == 0) {
1313                 /* mixed scalar/vector */
1314                 delta_m = 0;
1315             } else {
1316                 /* vector */
1317                 delta_m = delta_d;
1318             }
1319         }
1320     }
1321
1322     f0 = tcg_temp_new_i32();
1323     fd = tcg_temp_new_i32();
1324
1325     neon_load_reg32(f0, vm);
1326
1327     for (;;) {
1328         fn(fd, f0);
1329         neon_store_reg32(fd, vd);
1330
1331         if (veclen == 0) {
1332             break;
1333         }
1334
1335         if (delta_m == 0) {
1336             /* single source one-many */
1337             while (veclen--) {
1338                 vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
1339                 neon_store_reg32(fd, vd);
1340             }
1341             break;
1342         }
1343
1344         /* Set up the operands for the next iteration */
1345         veclen--;
1346         vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
1347         vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
1348         neon_load_reg32(f0, vm);
1349     }
1350
1351     tcg_temp_free_i32(f0);
1352     tcg_temp_free_i32(fd);
1353
1354     return true;
1355 }
1356
1357 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1358 {
1359     uint32_t delta_m = 0;
1360     uint32_t delta_d = 0;
1361     uint32_t bank_mask = 0;
1362     int veclen = s->vec_len;
1363     TCGv_i64 f0, fd;
1364
1365     /* UNDEF accesses to D16-D31 if they don't exist */
1366     if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vm) & 0x10)) {
1367         return false;
1368     }
1369
1370     if (!dc_isar_feature(aa32_fpshvec, s) &&
1371         (veclen != 0 || s->vec_stride != 0)) {
1372         return false;
1373     }
1374
1375     if (!vfp_access_check(s)) {
1376         return true;
1377     }
1378
1379     if (veclen > 0) {
1380         bank_mask = 0xc;
1381
1382         /* Figure out what type of vector operation this is.  */
1383         if ((vd & bank_mask) == 0) {
1384             /* scalar */
1385             veclen = 0;
1386         } else {
1387             delta_d = (s->vec_stride >> 1) + 1;
1388
1389             if ((vm & bank_mask) == 0) {
1390                 /* mixed scalar/vector */
1391                 delta_m = 0;
1392             } else {
1393                 /* vector */
1394                 delta_m = delta_d;
1395             }
1396         }
1397     }
1398
1399     f0 = tcg_temp_new_i64();
1400     fd = tcg_temp_new_i64();
1401
1402     neon_load_reg64(f0, vm);
1403
1404     for (;;) {
1405         fn(fd, f0);
1406         neon_store_reg64(fd, vd);
1407
1408         if (veclen == 0) {
1409             break;
1410         }
1411
1412         if (delta_m == 0) {
1413             /* single source one-many */
1414             while (veclen--) {
1415                 vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
1416                 neon_store_reg64(fd, vd);
1417             }
1418             break;
1419         }
1420
1421         /* Set up the operands for the next iteration */
1422         veclen--;
1423         vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
1424         vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
1425         neon_load_reg64(f0, vm);
1426     }
1427
1428     tcg_temp_free_i64(f0);
1429     tcg_temp_free_i64(fd);
1430
1431     return true;
1432 }
1433
1434 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1435 {
1436     /* Note that order of inputs to the add matters for NaNs */
1437     TCGv_i32 tmp = tcg_temp_new_i32();
1438
1439     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1440     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1441     tcg_temp_free_i32(tmp);
1442 }
1443
1444 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
1445 {
1446     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
1447 }
1448
1449 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1450 {
1451     /* Note that order of inputs to the add matters for NaNs */
1452     TCGv_i64 tmp = tcg_temp_new_i64();
1453
1454     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1455     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1456     tcg_temp_free_i64(tmp);
1457 }
1458
1459 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_sp *a)
1460 {
1461     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
1462 }
1463
1464 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1465 {
1466     /*
1467      * VMLS: vd = vd + -(vn * vm)
1468      * Note that order of inputs to the add matters for NaNs.
1469      */
1470     TCGv_i32 tmp = tcg_temp_new_i32();
1471
1472     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1473     gen_helper_vfp_negs(tmp, tmp);
1474     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1475     tcg_temp_free_i32(tmp);
1476 }
1477
1478 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
1479 {
1480     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
1481 }
1482
1483 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1484 {
1485     /*
1486      * VMLS: vd = vd + -(vn * vm)
1487      * Note that order of inputs to the add matters for NaNs.
1488      */
1489     TCGv_i64 tmp = tcg_temp_new_i64();
1490
1491     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1492     gen_helper_vfp_negd(tmp, tmp);
1493     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1494     tcg_temp_free_i64(tmp);
1495 }
1496
1497 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_sp *a)
1498 {
1499     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
1500 }
1501
1502 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1503 {
1504     /*
1505      * VNMLS: -fd + (fn * fm)
1506      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1507      * plausible looking simplifications because this will give wrong results
1508      * for NaNs.
1509      */
1510     TCGv_i32 tmp = tcg_temp_new_i32();
1511
1512     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1513     gen_helper_vfp_negs(vd, vd);
1514     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1515     tcg_temp_free_i32(tmp);
1516 }
1517
1518 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
1519 {
1520     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
1521 }
1522
1523 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1524 {
1525     /*
1526      * VNMLS: -fd + (fn * fm)
1527      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1528      * plausible looking simplifications because this will give wrong results
1529      * for NaNs.
1530      */
1531     TCGv_i64 tmp = tcg_temp_new_i64();
1532
1533     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1534     gen_helper_vfp_negd(vd, vd);
1535     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1536     tcg_temp_free_i64(tmp);
1537 }
1538
1539 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_sp *a)
1540 {
1541     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
1542 }
1543
1544 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1545 {
1546     /* VNMLA: -fd + -(fn * fm) */
1547     TCGv_i32 tmp = tcg_temp_new_i32();
1548
1549     gen_helper_vfp_muls(tmp, vn, vm, fpst);
1550     gen_helper_vfp_negs(tmp, tmp);
1551     gen_helper_vfp_negs(vd, vd);
1552     gen_helper_vfp_adds(vd, vd, tmp, fpst);
1553     tcg_temp_free_i32(tmp);
1554 }
1555
1556 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
1557 {
1558     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
1559 }
1560
1561 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1562 {
1563     /* VNMLA: -fd + (fn * fm) */
1564     TCGv_i64 tmp = tcg_temp_new_i64();
1565
1566     gen_helper_vfp_muld(tmp, vn, vm, fpst);
1567     gen_helper_vfp_negd(tmp, tmp);
1568     gen_helper_vfp_negd(vd, vd);
1569     gen_helper_vfp_addd(vd, vd, tmp, fpst);
1570     tcg_temp_free_i64(tmp);
1571 }
1572
1573 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_sp *a)
1574 {
1575     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
1576 }
1577
1578 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
1579 {
1580     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
1581 }
1582
1583 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_sp *a)
1584 {
1585     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
1586 }
1587
1588 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1589 {
1590     /* VNMUL: -(fn * fm) */
1591     gen_helper_vfp_muls(vd, vn, vm, fpst);
1592     gen_helper_vfp_negs(vd, vd);
1593 }
1594
1595 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
1596 {
1597     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
1598 }
1599
1600 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
1601 {
1602     /* VNMUL: -(fn * fm) */
1603     gen_helper_vfp_muld(vd, vn, vm, fpst);
1604     gen_helper_vfp_negd(vd, vd);
1605 }
1606
1607 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_sp *a)
1608 {
1609     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
1610 }
1611
1612 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
1613 {
1614     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
1615 }
1616
1617 static bool trans_VADD_dp(DisasContext *s, arg_VADD_sp *a)
1618 {
1619     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
1620 }
1621
1622 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
1623 {
1624     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
1625 }
1626
1627 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_sp *a)
1628 {
1629     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
1630 }
1631
1632 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
1633 {
1634     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
1635 }
1636
1637 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_sp *a)
1638 {
1639     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
1640 }
1641
1642 static bool trans_VFM_sp(DisasContext *s, arg_VFM_sp *a)
1643 {
1644     /*
1645      * VFNMA : fd = muladd(-fd,  fn, fm)
1646      * VFNMS : fd = muladd(-fd, -fn, fm)
1647      * VFMA  : fd = muladd( fd,  fn, fm)
1648      * VFMS  : fd = muladd( fd, -fn, fm)
1649      *
1650      * These are fused multiply-add, and must be done as one floating
1651      * point operation with no rounding between the multiplication and
1652      * addition steps.  NB that doing the negations here as separate
1653      * steps is correct : an input NaN should come out with its sign
1654      * bit flipped if it is a negated-input.
1655      */
1656     TCGv_ptr fpst;
1657     TCGv_i32 vn, vm, vd;
1658
1659     /*
1660      * Present in VFPv4 only.
1661      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
1662      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
1663      */
1664     if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
1665         (s->vec_len != 0 || s->vec_stride != 0)) {
1666         return false;
1667     }
1668
1669     if (!vfp_access_check(s)) {
1670         return true;
1671     }
1672
1673     vn = tcg_temp_new_i32();
1674     vm = tcg_temp_new_i32();
1675     vd = tcg_temp_new_i32();
1676
1677     neon_load_reg32(vn, a->vn);
1678     neon_load_reg32(vm, a->vm);
1679     if (a->o2) {
1680         /* VFNMS, VFMS */
1681         gen_helper_vfp_negs(vn, vn);
1682     }
1683     neon_load_reg32(vd, a->vd);
1684     if (a->o1 & 1) {
1685         /* VFNMA, VFNMS */
1686         gen_helper_vfp_negs(vd, vd);
1687     }
1688     fpst = get_fpstatus_ptr(0);
1689     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
1690     neon_store_reg32(vd, a->vd);
1691
1692     tcg_temp_free_ptr(fpst);
1693     tcg_temp_free_i32(vn);
1694     tcg_temp_free_i32(vm);
1695     tcg_temp_free_i32(vd);
1696
1697     return true;
1698 }
1699
1700 static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
1701 {
1702     /*
1703      * VFNMA : fd = muladd(-fd,  fn, fm)
1704      * VFNMS : fd = muladd(-fd, -fn, fm)
1705      * VFMA  : fd = muladd( fd,  fn, fm)
1706      * VFMS  : fd = muladd( fd, -fn, fm)
1707      *
1708      * These are fused multiply-add, and must be done as one floating
1709      * point operation with no rounding between the multiplication and
1710      * addition steps.  NB that doing the negations here as separate
1711      * steps is correct : an input NaN should come out with its sign
1712      * bit flipped if it is a negated-input.
1713      */
1714     TCGv_ptr fpst;
1715     TCGv_i64 vn, vm, vd;
1716
1717     /*
1718      * Present in VFPv4 only.
1719      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
1720      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
1721      */
1722     if (!arm_dc_feature(s, ARM_FEATURE_VFP4) ||
1723         (s->vec_len != 0 || s->vec_stride != 0)) {
1724         return false;
1725     }
1726
1727     /* UNDEF accesses to D16-D31 if they don't exist. */
1728     if (!dc_isar_feature(aa32_fp_d32, s) && ((a->vd | a->vn | a->vm) & 0x10)) {
1729         return false;
1730     }
1731
1732     if (!vfp_access_check(s)) {
1733         return true;
1734     }
1735
1736     vn = tcg_temp_new_i64();
1737     vm = tcg_temp_new_i64();
1738     vd = tcg_temp_new_i64();
1739
1740     neon_load_reg64(vn, a->vn);
1741     neon_load_reg64(vm, a->vm);
1742     if (a->o2) {
1743         /* VFNMS, VFMS */
1744         gen_helper_vfp_negd(vn, vn);
1745     }
1746     neon_load_reg64(vd, a->vd);
1747     if (a->o1 & 1) {
1748         /* VFNMA, VFNMS */
1749         gen_helper_vfp_negd(vd, vd);
1750     }
1751     fpst = get_fpstatus_ptr(0);
1752     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
1753     neon_store_reg64(vd, a->vd);
1754
1755     tcg_temp_free_ptr(fpst);
1756     tcg_temp_free_i64(vn);
1757     tcg_temp_free_i64(vm);
1758     tcg_temp_free_i64(vd);
1759
1760     return true;
1761 }
1762
1763 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
1764 {
1765     uint32_t delta_d = 0;
1766     uint32_t bank_mask = 0;
1767     int veclen = s->vec_len;
1768     TCGv_i32 fd;
1769     uint32_t n, i, vd;
1770
1771     vd = a->vd;
1772
1773     if (!dc_isar_feature(aa32_fpshvec, s) &&
1774         (veclen != 0 || s->vec_stride != 0)) {
1775         return false;
1776     }
1777
1778     if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
1779         return false;
1780     }
1781
1782     if (!vfp_access_check(s)) {
1783         return true;
1784     }
1785
1786     if (veclen > 0) {
1787         bank_mask = 0x18;
1788         /* Figure out what type of vector operation this is.  */
1789         if ((vd & bank_mask) == 0) {
1790             /* scalar */
1791             veclen = 0;
1792         } else {
1793             delta_d = s->vec_stride + 1;
1794         }
1795     }
1796
1797     n = (a->imm4h << 28) & 0x80000000;
1798     i = ((a->imm4h << 4) & 0x70) | a->imm4l;
1799     if (i & 0x40) {
1800         i |= 0x780;
1801     } else {
1802         i |= 0x800;
1803     }
1804     n |= i << 19;
1805
1806     fd = tcg_temp_new_i32();
1807     tcg_gen_movi_i32(fd, n);
1808
1809     for (;;) {
1810         neon_store_reg32(fd, vd);
1811
1812         if (veclen == 0) {
1813             break;
1814         }
1815
1816         /* Set up the operands for the next iteration */
1817         veclen--;
1818         vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
1819     }
1820
1821     tcg_temp_free_i32(fd);
1822     return true;
1823 }
1824
1825 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
1826 {
1827     uint32_t delta_d = 0;
1828     uint32_t bank_mask = 0;
1829     int veclen = s->vec_len;
1830     TCGv_i64 fd;
1831     uint32_t n, i, vd;
1832
1833     vd = a->vd;
1834
1835     /* UNDEF accesses to D16-D31 if they don't exist. */
1836     if (!dc_isar_feature(aa32_fp_d32, s) && (vd & 0x10)) {
1837         return false;
1838     }
1839
1840     if (!dc_isar_feature(aa32_fpshvec, s) &&
1841         (veclen != 0 || s->vec_stride != 0)) {
1842         return false;
1843     }
1844
1845     if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
1846         return false;
1847     }
1848
1849     if (!vfp_access_check(s)) {
1850         return true;
1851     }
1852
1853     if (veclen > 0) {
1854         bank_mask = 0xc;
1855         /* Figure out what type of vector operation this is.  */
1856         if ((vd & bank_mask) == 0) {
1857             /* scalar */
1858             veclen = 0;
1859         } else {
1860             delta_d = (s->vec_stride >> 1) + 1;
1861         }
1862     }
1863
1864     n = (a->imm4h << 28) & 0x80000000;
1865     i = ((a->imm4h << 4) & 0x70) | a->imm4l;
1866     if (i & 0x40) {
1867         i |= 0x3f80;
1868     } else {
1869         i |= 0x4000;
1870     }
1871     n |= i << 16;
1872
1873     fd = tcg_temp_new_i64();
1874     tcg_gen_movi_i64(fd, ((uint64_t)n) << 32);
1875
1876     for (;;) {
1877         neon_store_reg64(fd, vd);
1878
1879         if (veclen == 0) {
1880             break;
1881         }
1882
1883         /* Set up the operands for the next iteration */
1884         veclen--;
1885         vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
1886     }
1887
1888     tcg_temp_free_i64(fd);
1889     return true;
1890 }
1891
1892 static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a)
1893 {
1894     return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm);
1895 }
1896
1897 static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a)
1898 {
1899     return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm);
1900 }
1901
1902 static bool trans_VNEG_sp(DisasContext *s, arg_VNEG_sp *a)
1903 {
1904     return do_vfp_2op_sp(s, gen_helper_vfp_negs, a->vd, a->vm);
1905 }
1906
1907 static bool trans_VNEG_dp(DisasContext *s, arg_VNEG_dp *a)
1908 {
1909     return do_vfp_2op_dp(s, gen_helper_vfp_negd, a->vd, a->vm);
1910 }