target/arm/translate-vfp.c

   1 /*
   2  *  ARM translation: AArch32 VFP instructions
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *  Copyright (c) 2019 Linaro, Ltd.
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21  */
  22
  23 #include "qemu/osdep.h"
  24 #include "tcg/tcg-op.h"
  25 #include "tcg/tcg-op-gvec.h"
  26 #include "exec/exec-all.h"
  27 #include "exec/gen-icount.h"
  28 #include "translate.h"
  29 #include "translate-a32.h"
  30
  31 /* Include the generated VFP decoder */
  32 #include "decode-vfp.c.inc"
  33 #include "decode-vfp-uncond.c.inc"
  34
  35 static inline void vfp_load_reg64(TCGv_i64 var, int reg)
  36 {
  37     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg));
  38 }
  39
  40 static inline void vfp_store_reg64(TCGv_i64 var, int reg)
  41 {
  42     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg));
  43 }
  44
  45 static inline void vfp_load_reg32(TCGv_i32 var, int reg)
  46 {
  47     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
  48 }
  49
  50 static inline void vfp_store_reg32(TCGv_i32 var, int reg)
  51 {
  52     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
  53 }
  54
  55 /*
  56  * The imm8 encodes the sign bit, enough bits to represent an exponent in
  57  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
  58  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
  59  */
  60 uint64_t vfp_expand_imm(int size, uint8_t imm8)
  61 {
  62     uint64_t imm;
  63
  64     switch (size) {
  65     case MO_64:
  66         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  67             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
  68             extract32(imm8, 0, 6);
  69         imm <<= 48;
  70         break;
  71     case MO_32:
  72         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  73             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
  74             (extract32(imm8, 0, 6) << 3);
  75         imm <<= 16;
  76         break;
  77     case MO_16:
  78         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
  79             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
  80             (extract32(imm8, 0, 6) << 6);
  81         break;
  82     default:
  83         g_assert_not_reached();
  84     }
  85     return imm;
  86 }
  87
  88 /*
  89  * Return the offset of a 16-bit half of the specified VFP single-precision
  90  * register. If top is true, returns the top 16 bits; otherwise the bottom
  91  * 16 bits.
  92  */
  93 static inline long vfp_f16_offset(unsigned reg, bool top)
  94 {
  95     long offs = vfp_reg_offset(false, reg);
  96 #ifdef HOST_WORDS_BIGENDIAN
  97     if (!top) {
  98         offs += 2;
  99     }
 100 #else
 101     if (top) {
 102         offs += 2;
 103     }
 104 #endif
 105     return offs;
 106 }
 107
 108 /*
 109  * Generate code for M-profile lazy FP state preservation if needed;
 110  * this corresponds to the pseudocode PreserveFPState() function.
 111  */
 112 static void gen_preserve_fp_state(DisasContext *s)
 113 {
 114     if (s->v7m_lspact) {
 115         /*
 116          * Lazy state saving affects external memory and also the NVIC,
 117          * so we must mark it as an IO operation for icount (and cause
 118          * this to be the last insn in the TB).
 119          */
 120         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
 121             s->base.is_jmp = DISAS_UPDATE_EXIT;
 122             gen_io_start();
 123         }
 124         gen_helper_v7m_preserve_fp_state(cpu_env);
 125         /*
 126          * If the preserve_fp_state helper doesn't throw an exception
 127          * then it will clear LSPACT; we don't need to repeat this for
 128          * any further FP insns in this TB.
 129          */
 130         s->v7m_lspact = false;
 131     }
 132 }
 133
 134 /*
 135  * Check that VFP access is enabled. If it is, do the necessary
 136  * M-profile lazy-FP handling and then return true.
 137  * If not, emit code to generate an appropriate exception and
 138  * return false.
 139  * The ignore_vfp_enabled argument specifies that we should ignore
 140  * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
 141  * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
 142  */
 143 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
 144 {
 145     if (s->fp_excp_el) {
 146         /* M-profile handled this earlier, in disas_m_nocp() */
 147         assert (!arm_dc_feature(s, ARM_FEATURE_M));
 148         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
 149                            syn_fp_access_trap(1, 0xe, false),
 150                            s->fp_excp_el);
 151         return false;
 152     }
 153
 154     if (!s->vfp_enabled && !ignore_vfp_enabled) {
 155         assert(!arm_dc_feature(s, ARM_FEATURE_M));
 156         unallocated_encoding(s);
 157         return false;
 158     }
 159
 160     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 161         /* Handle M-profile lazy FP state mechanics */
 162
 163         /* Trigger lazy-state preservation if necessary */
 164         gen_preserve_fp_state(s);
 165
 166         /* Update ownership of FP context: set FPCCR.S to match current state */
 167         if (s->v8m_fpccr_s_wrong) {
 168             TCGv_i32 tmp;
 169
 170             tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
 171             if (s->v8m_secure) {
 172                 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
 173             } else {
 174                 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
 175             }
 176             store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
 177             /* Don't need to do this for any further FP insns in this TB */
 178             s->v8m_fpccr_s_wrong = false;
 179         }
 180
 181         if (s->v7m_new_fp_ctxt_needed) {
 182             /*
 183              * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
 184              * and the FPSCR.
 185              */
 186             TCGv_i32 control, fpscr;
 187             uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
 188
 189             fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
 190             gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 191             tcg_temp_free_i32(fpscr);
 192             /*
 193              * We don't need to arrange to end the TB, because the only
 194              * parts of FPSCR which we cache in the TB flags are the VECLEN
 195              * and VECSTRIDE, and those don't exist for M-profile.
 196              */
 197
 198             if (s->v8m_secure) {
 199                 bits |= R_V7M_CONTROL_SFPA_MASK;
 200             }
 201             control = load_cpu_field(v7m.control[M_REG_S]);
 202             tcg_gen_ori_i32(control, control, bits);
 203             store_cpu_field(control, v7m.control[M_REG_S]);
 204             /* Don't need to do this for any further FP insns in this TB */
 205             s->v7m_new_fp_ctxt_needed = false;
 206         }
 207     }
 208
 209     return true;
 210 }
 211
 212 /*
 213  * The most usual kind of VFP access check, for everything except
 214  * FMXR/FMRX to the always-available special registers.
 215  */
 216 bool vfp_access_check(DisasContext *s)
 217 {
 218     return full_vfp_access_check(s, false);
 219 }
 220
 221 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
 222 {
 223     uint32_t rd, rn, rm;
 224     int sz = a->sz;
 225
 226     if (!dc_isar_feature(aa32_vsel, s)) {
 227         return false;
 228     }
 229
 230     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 231         return false;
 232     }
 233
 234     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 235         return false;
 236     }
 237
 238     /* UNDEF accesses to D16-D31 if they don't exist */
 239     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 240         ((a->vm | a->vn | a->vd) & 0x10)) {
 241         return false;
 242     }
 243
 244     rd = a->vd;
 245     rn = a->vn;
 246     rm = a->vm;
 247
 248     if (!vfp_access_check(s)) {
 249         return true;
 250     }
 251
 252     if (sz == 3) {
 253         TCGv_i64 frn, frm, dest;
 254         TCGv_i64 tmp, zero, zf, nf, vf;
 255
 256         zero = tcg_const_i64(0);
 257
 258         frn = tcg_temp_new_i64();
 259         frm = tcg_temp_new_i64();
 260         dest = tcg_temp_new_i64();
 261
 262         zf = tcg_temp_new_i64();
 263         nf = tcg_temp_new_i64();
 264         vf = tcg_temp_new_i64();
 265
 266         tcg_gen_extu_i32_i64(zf, cpu_ZF);
 267         tcg_gen_ext_i32_i64(nf, cpu_NF);
 268         tcg_gen_ext_i32_i64(vf, cpu_VF);
 269
 270         vfp_load_reg64(frn, rn);
 271         vfp_load_reg64(frm, rm);
 272         switch (a->cc) {
 273         case 0: /* eq: Z */
 274             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
 275                                 frn, frm);
 276             break;
 277         case 1: /* vs: V */
 278             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
 279                                 frn, frm);
 280             break;
 281         case 2: /* ge: N == V -> N ^ V == 0 */
 282             tmp = tcg_temp_new_i64();
 283             tcg_gen_xor_i64(tmp, vf, nf);
 284             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 285                                 frn, frm);
 286             tcg_temp_free_i64(tmp);
 287             break;
 288         case 3: /* gt: !Z && N == V */
 289             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
 290                                 frn, frm);
 291             tmp = tcg_temp_new_i64();
 292             tcg_gen_xor_i64(tmp, vf, nf);
 293             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
 294                                 dest, frm);
 295             tcg_temp_free_i64(tmp);
 296             break;
 297         }
 298         vfp_store_reg64(dest, rd);
 299         tcg_temp_free_i64(frn);
 300         tcg_temp_free_i64(frm);
 301         tcg_temp_free_i64(dest);
 302
 303         tcg_temp_free_i64(zf);
 304         tcg_temp_free_i64(nf);
 305         tcg_temp_free_i64(vf);
 306
 307         tcg_temp_free_i64(zero);
 308     } else {
 309         TCGv_i32 frn, frm, dest;
 310         TCGv_i32 tmp, zero;
 311
 312         zero = tcg_const_i32(0);
 313
 314         frn = tcg_temp_new_i32();
 315         frm = tcg_temp_new_i32();
 316         dest = tcg_temp_new_i32();
 317         vfp_load_reg32(frn, rn);
 318         vfp_load_reg32(frm, rm);
 319         switch (a->cc) {
 320         case 0: /* eq: Z */
 321             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
 322                                 frn, frm);
 323             break;
 324         case 1: /* vs: V */
 325             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
 326                                 frn, frm);
 327             break;
 328         case 2: /* ge: N == V -> N ^ V == 0 */
 329             tmp = tcg_temp_new_i32();
 330             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 331             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 332                                 frn, frm);
 333             tcg_temp_free_i32(tmp);
 334             break;
 335         case 3: /* gt: !Z && N == V */
 336             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
 337                                 frn, frm);
 338             tmp = tcg_temp_new_i32();
 339             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
 340             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
 341                                 dest, frm);
 342             tcg_temp_free_i32(tmp);
 343             break;
 344         }
 345         /* For fp16 the top half is always zeroes */
 346         if (sz == 1) {
 347             tcg_gen_andi_i32(dest, dest, 0xffff);
 348         }
 349         vfp_store_reg32(dest, rd);
 350         tcg_temp_free_i32(frn);
 351         tcg_temp_free_i32(frm);
 352         tcg_temp_free_i32(dest);
 353
 354         tcg_temp_free_i32(zero);
 355     }
 356
 357     return true;
 358 }
 359
 360 /*
 361  * Table for converting the most common AArch32 encoding of
 362  * rounding mode to arm_fprounding order (which matches the
 363  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
 364  */
 365 static const uint8_t fp_decode_rm[] = {
 366     FPROUNDING_TIEAWAY,
 367     FPROUNDING_TIEEVEN,
 368     FPROUNDING_POSINF,
 369     FPROUNDING_NEGINF,
 370 };
 371
 372 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
 373 {
 374     uint32_t rd, rm;
 375     int sz = a->sz;
 376     TCGv_ptr fpst;
 377     TCGv_i32 tcg_rmode;
 378     int rounding = fp_decode_rm[a->rm];
 379
 380     if (!dc_isar_feature(aa32_vrint, s)) {
 381         return false;
 382     }
 383
 384     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 385         return false;
 386     }
 387
 388     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 389         return false;
 390     }
 391
 392     /* UNDEF accesses to D16-D31 if they don't exist */
 393     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
 394         ((a->vm | a->vd) & 0x10)) {
 395         return false;
 396     }
 397
 398     rd = a->vd;
 399     rm = a->vm;
 400
 401     if (!vfp_access_check(s)) {
 402         return true;
 403     }
 404
 405     if (sz == 1) {
 406         fpst = fpstatus_ptr(FPST_FPCR_F16);
 407     } else {
 408         fpst = fpstatus_ptr(FPST_FPCR);
 409     }
 410
 411     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 412     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 413
 414     if (sz == 3) {
 415         TCGv_i64 tcg_op;
 416         TCGv_i64 tcg_res;
 417         tcg_op = tcg_temp_new_i64();
 418         tcg_res = tcg_temp_new_i64();
 419         vfp_load_reg64(tcg_op, rm);
 420         gen_helper_rintd(tcg_res, tcg_op, fpst);
 421         vfp_store_reg64(tcg_res, rd);
 422         tcg_temp_free_i64(tcg_op);
 423         tcg_temp_free_i64(tcg_res);
 424     } else {
 425         TCGv_i32 tcg_op;
 426         TCGv_i32 tcg_res;
 427         tcg_op = tcg_temp_new_i32();
 428         tcg_res = tcg_temp_new_i32();
 429         vfp_load_reg32(tcg_op, rm);
 430         if (sz == 1) {
 431             gen_helper_rinth(tcg_res, tcg_op, fpst);
 432         } else {
 433             gen_helper_rints(tcg_res, tcg_op, fpst);
 434         }
 435         vfp_store_reg32(tcg_res, rd);
 436         tcg_temp_free_i32(tcg_op);
 437         tcg_temp_free_i32(tcg_res);
 438     }
 439
 440     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 441     tcg_temp_free_i32(tcg_rmode);
 442
 443     tcg_temp_free_ptr(fpst);
 444     return true;
 445 }
 446
 447 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
 448 {
 449     uint32_t rd, rm;
 450     int sz = a->sz;
 451     TCGv_ptr fpst;
 452     TCGv_i32 tcg_rmode, tcg_shift;
 453     int rounding = fp_decode_rm[a->rm];
 454     bool is_signed = a->op;
 455
 456     if (!dc_isar_feature(aa32_vcvt_dr, s)) {
 457         return false;
 458     }
 459
 460     if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
 461         return false;
 462     }
 463
 464     if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
 465         return false;
 466     }
 467
 468     /* UNDEF accesses to D16-D31 if they don't exist */
 469     if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
 470         return false;
 471     }
 472
 473     rd = a->vd;
 474     rm = a->vm;
 475
 476     if (!vfp_access_check(s)) {
 477         return true;
 478     }
 479
 480     if (sz == 1) {
 481         fpst = fpstatus_ptr(FPST_FPCR_F16);
 482     } else {
 483         fpst = fpstatus_ptr(FPST_FPCR);
 484     }
 485
 486     tcg_shift = tcg_const_i32(0);
 487
 488     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
 489     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 490
 491     if (sz == 3) {
 492         TCGv_i64 tcg_double, tcg_res;
 493         TCGv_i32 tcg_tmp;
 494         tcg_double = tcg_temp_new_i64();
 495         tcg_res = tcg_temp_new_i64();
 496         tcg_tmp = tcg_temp_new_i32();
 497         vfp_load_reg64(tcg_double, rm);
 498         if (is_signed) {
 499             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
 500         } else {
 501             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
 502         }
 503         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
 504         vfp_store_reg32(tcg_tmp, rd);
 505         tcg_temp_free_i32(tcg_tmp);
 506         tcg_temp_free_i64(tcg_res);
 507         tcg_temp_free_i64(tcg_double);
 508     } else {
 509         TCGv_i32 tcg_single, tcg_res;
 510         tcg_single = tcg_temp_new_i32();
 511         tcg_res = tcg_temp_new_i32();
 512         vfp_load_reg32(tcg_single, rm);
 513         if (sz == 1) {
 514             if (is_signed) {
 515                 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
 516             } else {
 517                 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
 518             }
 519         } else {
 520             if (is_signed) {
 521                 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
 522             } else {
 523                 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
 524             }
 525         }
 526         vfp_store_reg32(tcg_res, rd);
 527         tcg_temp_free_i32(tcg_res);
 528         tcg_temp_free_i32(tcg_single);
 529     }
 530
 531     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
 532     tcg_temp_free_i32(tcg_rmode);
 533
 534     tcg_temp_free_i32(tcg_shift);
 535
 536     tcg_temp_free_ptr(fpst);
 537
 538     return true;
 539 }
 540
 541 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
 542 {
 543     /* VMOV scalar to general purpose register */
 544     TCGv_i32 tmp;
 545
 546     /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
 547     if (a->size == MO_32
 548         ? !dc_isar_feature(aa32_fpsp_v2, s)
 549         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 550         return false;
 551     }
 552
 553     /* UNDEF accesses to D16-D31 if they don't exist */
 554     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 555         return false;
 556     }
 557
 558     if (!vfp_access_check(s)) {
 559         return true;
 560     }
 561
 562     tmp = tcg_temp_new_i32();
 563     read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
 564     store_reg(s, a->rt, tmp);
 565
 566     return true;
 567 }
 568
 569 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
 570 {
 571     /* VMOV general purpose register to scalar */
 572     TCGv_i32 tmp;
 573
 574     /* SIZE == MO_32 is a VFP instruction; otherwise NEON.  */
 575     if (a->size == MO_32
 576         ? !dc_isar_feature(aa32_fpsp_v2, s)
 577         : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
 578         return false;
 579     }
 580
 581     /* UNDEF accesses to D16-D31 if they don't exist */
 582     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 583         return false;
 584     }
 585
 586     if (!vfp_access_check(s)) {
 587         return true;
 588     }
 589
 590     tmp = load_reg(s, a->rt);
 591     write_neon_element32(tmp, a->vn, a->index, a->size);
 592     tcg_temp_free_i32(tmp);
 593
 594     return true;
 595 }
 596
 597 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 598 {
 599     /* VDUP (general purpose register) */
 600     TCGv_i32 tmp;
 601     int size, vec_size;
 602
 603     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
 604         return false;
 605     }
 606
 607     /* UNDEF accesses to D16-D31 if they don't exist */
 608     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
 609         return false;
 610     }
 611
 612     if (a->b && a->e) {
 613         return false;
 614     }
 615
 616     if (a->q && (a->vn & 1)) {
 617         return false;
 618     }
 619
 620     vec_size = a->q ? 16 : 8;
 621     if (a->b) {
 622         size = 0;
 623     } else if (a->e) {
 624         size = 1;
 625     } else {
 626         size = 2;
 627     }
 628
 629     if (!vfp_access_check(s)) {
 630         return true;
 631     }
 632
 633     tmp = load_reg(s, a->rt);
 634     tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
 635                          vec_size, vec_size, tmp);
 636     tcg_temp_free_i32(tmp);
 637
 638     return true;
 639 }
 640
 641 /*
 642  * M-profile provides two different sets of instructions that can
 643  * access floating point system registers: VMSR/VMRS (which move
 644  * to/from a general purpose register) and VLDR/VSTR sysreg (which
 645  * move directly to/from memory). In some cases there are also side
 646  * effects which must happen after any write to memory (which could
 647  * cause an exception). So we implement the common logic for the
 648  * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
 649  * which take pointers to callback functions which will perform the
 650  * actual "read/write general purpose register" and "read/write
 651  * memory" operations.
 652  */
 653
 654 /*
 655  * Emit code to store the sysreg to its final destination; frees the
 656  * TCG temp 'value' it is passed.
 657  */
 658 typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
 659 /*
 660  * Emit code to load the value to be copied to the sysreg; returns
 661  * a new TCG temporary
 662  */
 663 typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
 664
 665 /* Common decode/access checks for fp sysreg read/write */
 666 typedef enum FPSysRegCheckResult {
 667     FPSysRegCheckFailed, /* caller should return false */
 668     FPSysRegCheckDone, /* caller should return true */
 669     FPSysRegCheckContinue, /* caller should continue generating code */
 670 } FPSysRegCheckResult;
 671
 672 static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
 673 {
 674     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 675         return FPSysRegCheckFailed;
 676     }
 677
 678     switch (regno) {
 679     case ARM_VFP_FPSCR:
 680     case QEMU_VFP_FPSCR_NZCV:
 681         break;
 682     case ARM_VFP_FPSCR_NZCVQC:
 683         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 684             return false;
 685         }
 686         break;
 687     case ARM_VFP_FPCXT_S:
 688     case ARM_VFP_FPCXT_NS:
 689         if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
 690             return false;
 691         }
 692         if (!s->v8m_secure) {
 693             return false;
 694         }
 695         break;
 696     default:
 697         return FPSysRegCheckFailed;
 698     }
 699
 700     /*
 701      * FPCXT_NS is a special case: it has specific handling for
 702      * "current FP state is inactive", and must do the PreserveFPState()
 703      * but not the usual full set of actions done by ExecuteFPCheck().
 704      * So we don't call vfp_access_check() and the callers must handle this.
 705      */
 706     if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
 707         return FPSysRegCheckDone;
 708     }
 709     return FPSysRegCheckContinue;
 710 }
 711
 712 static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
 713                                   TCGLabel *label)
 714 {
 715     /*
 716      * FPCXT_NS is a special case: it has specific handling for
 717      * "current FP state is inactive", and must do the PreserveFPState()
 718      * but not the usual full set of actions done by ExecuteFPCheck().
 719      * We don't have a TB flag that matches the fpInactive check, so we
 720      * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
 721      *
 722      * Emit code that checks fpInactive and does a conditional
 723      * branch to label based on it:
 724      *  if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
 725      *  if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
 726      */
 727     assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
 728
 729     /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
 730     TCGv_i32 aspen, fpca;
 731     aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
 732     fpca = load_cpu_field(v7m.control[M_REG_S]);
 733     tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
 734     tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
 735     tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
 736     tcg_gen_or_i32(fpca, fpca, aspen);
 737     tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
 738     tcg_temp_free_i32(aspen);
 739     tcg_temp_free_i32(fpca);
 740 }
 741
 742 static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
 743
 744                                   fp_sysreg_loadfn *loadfn,
 745                                  void *opaque)
 746 {
 747     /* Do a write to an M-profile floating point system register */
 748     TCGv_i32 tmp;
 749     TCGLabel *lab_end = NULL;
 750
 751     switch (fp_sysreg_checks(s, regno)) {
 752     case FPSysRegCheckFailed:
 753         return false;
 754     case FPSysRegCheckDone:
 755         return true;
 756     case FPSysRegCheckContinue:
 757         break;
 758     }
 759
 760     switch (regno) {
 761     case ARM_VFP_FPSCR:
 762         tmp = loadfn(s, opaque);
 763         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 764         tcg_temp_free_i32(tmp);
 765         gen_lookup_tb(s);
 766         break;
 767     case ARM_VFP_FPSCR_NZCVQC:
 768     {
 769         TCGv_i32 fpscr;
 770         tmp = loadfn(s, opaque);
 771         /*
 772          * TODO: when we implement MVE, write the QC bit.
 773          * For non-MVE, QC is RES0.
 774          */
 775         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 776         fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 777         tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
 778         tcg_gen_or_i32(fpscr, fpscr, tmp);
 779         store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
 780         tcg_temp_free_i32(tmp);
 781         break;
 782     }
 783     case ARM_VFP_FPCXT_NS:
 784         lab_end = gen_new_label();
 785         /* fpInactive case: write is a NOP, so branch to end */
 786         gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
 787         /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
 788         gen_preserve_fp_state(s);
 789         /* fall through */
 790     case ARM_VFP_FPCXT_S:
 791     {
 792         TCGv_i32 sfpa, control;
 793         /*
 794          * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
 795          * bits [27:0] from value and zeroes bits [31:28].
 796          */
 797         tmp = loadfn(s, opaque);
 798         sfpa = tcg_temp_new_i32();
 799         tcg_gen_shri_i32(sfpa, tmp, 31);
 800         control = load_cpu_field(v7m.control[M_REG_S]);
 801         tcg_gen_deposit_i32(control, control, sfpa,
 802                             R_V7M_CONTROL_SFPA_SHIFT, 1);
 803         store_cpu_field(control, v7m.control[M_REG_S]);
 804         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 805         gen_helper_vfp_set_fpscr(cpu_env, tmp);
 806         tcg_temp_free_i32(tmp);
 807         tcg_temp_free_i32(sfpa);
 808         break;
 809     }
 810     default:
 811         g_assert_not_reached();
 812     }
 813     if (lab_end) {
 814         gen_set_label(lab_end);
 815     }
 816     return true;
 817 }
 818
 819 static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
 820                                 fp_sysreg_storefn *storefn,
 821                                 void *opaque)
 822 {
 823     /* Do a read from an M-profile floating point system register */
 824     TCGv_i32 tmp;
 825     TCGLabel *lab_end = NULL;
 826     bool lookup_tb = false;
 827
 828     switch (fp_sysreg_checks(s, regno)) {
 829     case FPSysRegCheckFailed:
 830         return false;
 831     case FPSysRegCheckDone:
 832         return true;
 833     case FPSysRegCheckContinue:
 834         break;
 835     }
 836
 837     switch (regno) {
 838     case ARM_VFP_FPSCR:
 839         tmp = tcg_temp_new_i32();
 840         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 841         storefn(s, opaque, tmp);
 842         break;
 843     case ARM_VFP_FPSCR_NZCVQC:
 844         /*
 845          * TODO: MVE has a QC bit, which we probably won't store
 846          * in the xregs[] field. For non-MVE, where QC is RES0,
 847          * we can just fall through to the FPSCR_NZCV case.
 848          */
 849     case QEMU_VFP_FPSCR_NZCV:
 850         /*
 851          * Read just NZCV; this is a special case to avoid the
 852          * helper call for the "VMRS to CPSR.NZCV" insn.
 853          */
 854         tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
 855         tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
 856         storefn(s, opaque, tmp);
 857         break;
 858     case ARM_VFP_FPCXT_S:
 859     {
 860         TCGv_i32 control, sfpa, fpscr;
 861         /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
 862         tmp = tcg_temp_new_i32();
 863         sfpa = tcg_temp_new_i32();
 864         gen_helper_vfp_get_fpscr(tmp, cpu_env);
 865         tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
 866         control = load_cpu_field(v7m.control[M_REG_S]);
 867         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 868         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 869         tcg_gen_or_i32(tmp, tmp, sfpa);
 870         tcg_temp_free_i32(sfpa);
 871         /*
 872          * Store result before updating FPSCR etc, in case
 873          * it is a memory write which causes an exception.
 874          */
 875         storefn(s, opaque, tmp);
 876         /*
 877          * Now we must reset FPSCR from FPDSCR_NS, and clear
 878          * CONTROL.SFPA; so we'll end the TB here.
 879          */
 880         tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
 881         store_cpu_field(control, v7m.control[M_REG_S]);
 882         fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 883         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 884         tcg_temp_free_i32(fpscr);
 885         lookup_tb = true;
 886         break;
 887     }
 888     case ARM_VFP_FPCXT_NS:
 889     {
 890         TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
 891         TCGLabel *lab_active = gen_new_label();
 892
 893         lookup_tb = true;
 894
 895         gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
 896         /* fpInactive case: reads as FPDSCR_NS */
 897         TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 898         storefn(s, opaque, tmp);
 899         lab_end = gen_new_label();
 900         tcg_gen_br(lab_end);
 901
 902         gen_set_label(lab_active);
 903         /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
 904         gen_preserve_fp_state(s);
 905         tmp = tcg_temp_new_i32();
 906         sfpa = tcg_temp_new_i32();
 907         fpscr = tcg_temp_new_i32();
 908         gen_helper_vfp_get_fpscr(fpscr, cpu_env);
 909         tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
 910         control = load_cpu_field(v7m.control[M_REG_S]);
 911         tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
 912         tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
 913         tcg_gen_or_i32(tmp, tmp, sfpa);
 914         tcg_temp_free_i32(control);
 915         /* Store result before updating FPSCR, in case it faults */
 916         storefn(s, opaque, tmp);
 917         /* If SFPA is zero then set FPSCR from FPDSCR_NS */
 918         fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
 919         zero = tcg_const_i32(0);
 920         tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
 921         gen_helper_vfp_set_fpscr(cpu_env, fpscr);
 922         tcg_temp_free_i32(zero);
 923         tcg_temp_free_i32(sfpa);
 924         tcg_temp_free_i32(fpdscr);
 925         tcg_temp_free_i32(fpscr);
 926         break;
 927     }
 928     default:
 929         g_assert_not_reached();
 930     }
 931
 932     if (lab_end) {
 933         gen_set_label(lab_end);
 934     }
 935     if (lookup_tb) {
 936         gen_lookup_tb(s);
 937     }
 938     return true;
 939 }
 940
 941 static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
 942 {
 943     arg_VMSR_VMRS *a = opaque;
 944
 945     if (a->rt == 15) {
 946         /* Set the 4 flag bits in the CPSR */
 947         gen_set_nzcv(value);
 948         tcg_temp_free_i32(value);
 949     } else {
 950         store_reg(s, a->rt, value);
 951     }
 952 }
 953
 954 static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
 955 {
 956     arg_VMSR_VMRS *a = opaque;
 957
 958     return load_reg(s, a->rt);
 959 }
 960
 961 static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 962 {
 963     /*
 964      * Accesses to R15 are UNPREDICTABLE; we choose to undef.
 965      * FPSCR -> r15 is a special case which writes to the PSR flags;
 966      * set a->reg to a special value to tell gen_M_fp_sysreg_read()
 967      * we only care about the top 4 bits of FPSCR there.
 968      */
 969     if (a->rt == 15) {
 970         if (a->l && a->reg == ARM_VFP_FPSCR) {
 971             a->reg = QEMU_VFP_FPSCR_NZCV;
 972         } else {
 973             return false;
 974         }
 975     }
 976
 977     if (a->l) {
 978         /* VMRS, move FP system register to gp register */
 979         return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
 980     } else {
 981         /* VMSR, move gp register to FP system register */
 982         return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
 983     }
 984 }
 985
 986 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
 987 {
 988     TCGv_i32 tmp;
 989     bool ignore_vfp_enabled = false;
 990
 991     if (arm_dc_feature(s, ARM_FEATURE_M)) {
 992         return gen_M_VMSR_VMRS(s, a);
 993     }
 994
 995     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
 996         return false;
 997     }
 998
 999     switch (a->reg) {
1000     case ARM_VFP_FPSID:
1001         /*
1002          * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
1003          * all ID registers to privileged access only.
1004          */
1005         if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
1006             return false;
1007         }
1008         ignore_vfp_enabled = true;
1009         break;
1010     case ARM_VFP_MVFR0:
1011     case ARM_VFP_MVFR1:
1012         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
1013             return false;
1014         }
1015         ignore_vfp_enabled = true;
1016         break;
1017     case ARM_VFP_MVFR2:
1018         if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
1019             return false;
1020         }
1021         ignore_vfp_enabled = true;
1022         break;
1023     case ARM_VFP_FPSCR:
1024         break;
1025     case ARM_VFP_FPEXC:
1026         if (IS_USER(s)) {
1027             return false;
1028         }
1029         ignore_vfp_enabled = true;
1030         break;
1031     case ARM_VFP_FPINST:
1032     case ARM_VFP_FPINST2:
1033         /* Not present in VFPv3 */
1034         if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
1035             return false;
1036         }
1037         break;
1038     default:
1039         return false;
1040     }
1041
1042     if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
1043         return true;
1044     }
1045
1046     if (a->l) {
1047         /* VMRS, move VFP special register to gp register */
1048         switch (a->reg) {
1049         case ARM_VFP_MVFR0:
1050         case ARM_VFP_MVFR1:
1051         case ARM_VFP_MVFR2:
1052         case ARM_VFP_FPSID:
1053             if (s->current_el == 1) {
1054                 TCGv_i32 tcg_reg, tcg_rt;
1055
1056                 gen_set_condexec(s);
1057                 gen_set_pc_im(s, s->pc_curr);
1058                 tcg_reg = tcg_const_i32(a->reg);
1059                 tcg_rt = tcg_const_i32(a->rt);
1060                 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
1061                 tcg_temp_free_i32(tcg_reg);
1062                 tcg_temp_free_i32(tcg_rt);
1063             }
1064             /* fall through */
1065         case ARM_VFP_FPEXC:
1066         case ARM_VFP_FPINST:
1067         case ARM_VFP_FPINST2:
1068             tmp = load_cpu_field(vfp.xregs[a->reg]);
1069             break;
1070         case ARM_VFP_FPSCR:
1071             if (a->rt == 15) {
1072                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
1073                 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
1074             } else {
1075                 tmp = tcg_temp_new_i32();
1076                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
1077             }
1078             break;
1079         default:
1080             g_assert_not_reached();
1081         }
1082
1083         if (a->rt == 15) {
1084             /* Set the 4 flag bits in the CPSR.  */
1085             gen_set_nzcv(tmp);
1086             tcg_temp_free_i32(tmp);
1087         } else {
1088             store_reg(s, a->rt, tmp);
1089         }
1090     } else {
1091         /* VMSR, move gp register to VFP special register */
1092         switch (a->reg) {
1093         case ARM_VFP_FPSID:
1094         case ARM_VFP_MVFR0:
1095         case ARM_VFP_MVFR1:
1096         case ARM_VFP_MVFR2:
1097             /* Writes are ignored.  */
1098             break;
1099         case ARM_VFP_FPSCR:
1100             tmp = load_reg(s, a->rt);
1101             gen_helper_vfp_set_fpscr(cpu_env, tmp);
1102             tcg_temp_free_i32(tmp);
1103             gen_lookup_tb(s);
1104             break;
1105         case ARM_VFP_FPEXC:
1106             /*
1107              * TODO: VFP subarchitecture support.
1108              * For now, keep the EN bit only
1109              */
1110             tmp = load_reg(s, a->rt);
1111             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
1112             store_cpu_field(tmp, vfp.xregs[a->reg]);
1113             gen_lookup_tb(s);
1114             break;
1115         case ARM_VFP_FPINST:
1116         case ARM_VFP_FPINST2:
1117             tmp = load_reg(s, a->rt);
1118             store_cpu_field(tmp, vfp.xregs[a->reg]);
1119             break;
1120         default:
1121             g_assert_not_reached();
1122         }
1123     }
1124
1125     return true;
1126 }
1127
1128 static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
1129 {
1130     arg_vldr_sysreg *a = opaque;
1131     uint32_t offset = a->imm;
1132     TCGv_i32 addr;
1133
1134     if (!a->a) {
1135         offset = - offset;
1136     }
1137
1138     addr = load_reg(s, a->rn);
1139     if (a->p) {
1140         tcg_gen_addi_i32(addr, addr, offset);
1141     }
1142
1143     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1144         gen_helper_v8m_stackcheck(cpu_env, addr);
1145     }
1146
1147     gen_aa32_st_i32(s, value, addr, get_mem_index(s),
1148                     MO_UL | MO_ALIGN | s->be_data);
1149     tcg_temp_free_i32(value);
1150
1151     if (a->w) {
1152         /* writeback */
1153         if (!a->p) {
1154             tcg_gen_addi_i32(addr, addr, offset);
1155         }
1156         store_reg(s, a->rn, addr);
1157     } else {
1158         tcg_temp_free_i32(addr);
1159     }
1160 }
1161
1162 static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1163 {
1164     arg_vldr_sysreg *a = opaque;
1165     uint32_t offset = a->imm;
1166     TCGv_i32 addr;
1167     TCGv_i32 value = tcg_temp_new_i32();
1168
1169     if (!a->a) {
1170         offset = - offset;
1171     }
1172
1173     addr = load_reg(s, a->rn);
1174     if (a->p) {
1175         tcg_gen_addi_i32(addr, addr, offset);
1176     }
1177
1178     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1179         gen_helper_v8m_stackcheck(cpu_env, addr);
1180     }
1181
1182     gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1183                     MO_UL | MO_ALIGN | s->be_data);
1184
1185     if (a->w) {
1186         /* writeback */
1187         if (!a->p) {
1188             tcg_gen_addi_i32(addr, addr, offset);
1189         }
1190         store_reg(s, a->rn, addr);
1191     } else {
1192         tcg_temp_free_i32(addr);
1193     }
1194     return value;
1195 }
1196
1197 static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1198 {
1199     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1200         return false;
1201     }
1202     if (a->rn == 15) {
1203         return false;
1204     }
1205     return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1206 }
1207
1208 static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1209 {
1210     if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1211         return false;
1212     }
1213     if (a->rn == 15) {
1214         return false;
1215     }
1216     return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1217 }
1218
1219 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1220 {
1221     TCGv_i32 tmp;
1222
1223     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1224         return false;
1225     }
1226
1227     if (a->rt == 15) {
1228         /* UNPREDICTABLE; we choose to UNDEF */
1229         return false;
1230     }
1231
1232     if (!vfp_access_check(s)) {
1233         return true;
1234     }
1235
1236     if (a->l) {
1237         /* VFP to general purpose register */
1238         tmp = tcg_temp_new_i32();
1239         vfp_load_reg32(tmp, a->vn);
1240         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1241         store_reg(s, a->rt, tmp);
1242     } else {
1243         /* general purpose register to VFP */
1244         tmp = load_reg(s, a->rt);
1245         tcg_gen_andi_i32(tmp, tmp, 0xffff);
1246         vfp_store_reg32(tmp, a->vn);
1247         tcg_temp_free_i32(tmp);
1248     }
1249
1250     return true;
1251 }
1252
1253 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
1254 {
1255     TCGv_i32 tmp;
1256
1257     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1258         return false;
1259     }
1260
1261     if (!vfp_access_check(s)) {
1262         return true;
1263     }
1264
1265     if (a->l) {
1266         /* VFP to general purpose register */
1267         tmp = tcg_temp_new_i32();
1268         vfp_load_reg32(tmp, a->vn);
1269         if (a->rt == 15) {
1270             /* Set the 4 flag bits in the CPSR.  */
1271             gen_set_nzcv(tmp);
1272             tcg_temp_free_i32(tmp);
1273         } else {
1274             store_reg(s, a->rt, tmp);
1275         }
1276     } else {
1277         /* general purpose register to VFP */
1278         tmp = load_reg(s, a->rt);
1279         vfp_store_reg32(tmp, a->vn);
1280         tcg_temp_free_i32(tmp);
1281     }
1282
1283     return true;
1284 }
1285
1286 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
1287 {
1288     TCGv_i32 tmp;
1289
1290     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1291         return false;
1292     }
1293
1294     /*
1295      * VMOV between two general-purpose registers and two single precision
1296      * floating point registers
1297      */
1298     if (!vfp_access_check(s)) {
1299         return true;
1300     }
1301
1302     if (a->op) {
1303         /* fpreg to gpreg */
1304         tmp = tcg_temp_new_i32();
1305         vfp_load_reg32(tmp, a->vm);
1306         store_reg(s, a->rt, tmp);
1307         tmp = tcg_temp_new_i32();
1308         vfp_load_reg32(tmp, a->vm + 1);
1309         store_reg(s, a->rt2, tmp);
1310     } else {
1311         /* gpreg to fpreg */
1312         tmp = load_reg(s, a->rt);
1313         vfp_store_reg32(tmp, a->vm);
1314         tcg_temp_free_i32(tmp);
1315         tmp = load_reg(s, a->rt2);
1316         vfp_store_reg32(tmp, a->vm + 1);
1317         tcg_temp_free_i32(tmp);
1318     }
1319
1320     return true;
1321 }
1322
1323 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1324 {
1325     TCGv_i32 tmp;
1326
1327     /*
1328      * VMOV between two general-purpose registers and one double precision
1329      * floating point register.  Note that this does not require support
1330      * for double precision arithmetic.
1331      */
1332     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1333         return false;
1334     }
1335
1336     /* UNDEF accesses to D16-D31 if they don't exist */
1337     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1338         return false;
1339     }
1340
1341     if (!vfp_access_check(s)) {
1342         return true;
1343     }
1344
1345     if (a->op) {
1346         /* fpreg to gpreg */
1347         tmp = tcg_temp_new_i32();
1348         vfp_load_reg32(tmp, a->vm * 2);
1349         store_reg(s, a->rt, tmp);
1350         tmp = tcg_temp_new_i32();
1351         vfp_load_reg32(tmp, a->vm * 2 + 1);
1352         store_reg(s, a->rt2, tmp);
1353     } else {
1354         /* gpreg to fpreg */
1355         tmp = load_reg(s, a->rt);
1356         vfp_store_reg32(tmp, a->vm * 2);
1357         tcg_temp_free_i32(tmp);
1358         tmp = load_reg(s, a->rt2);
1359         vfp_store_reg32(tmp, a->vm * 2 + 1);
1360         tcg_temp_free_i32(tmp);
1361     }
1362
1363     return true;
1364 }
1365
1366 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1367 {
1368     uint32_t offset;
1369     TCGv_i32 addr, tmp;
1370
1371     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1372         return false;
1373     }
1374
1375     if (!vfp_access_check(s)) {
1376         return true;
1377     }
1378
1379     /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1380     offset = a->imm << 1;
1381     if (!a->u) {
1382         offset = -offset;
1383     }
1384
1385     /* For thumb, use of PC is UNPREDICTABLE.  */
1386     addr = add_reg_for_lit(s, a->rn, offset);
1387     tmp = tcg_temp_new_i32();
1388     if (a->l) {
1389         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1390         vfp_store_reg32(tmp, a->vd);
1391     } else {
1392         vfp_load_reg32(tmp, a->vd);
1393         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN);
1394     }
1395     tcg_temp_free_i32(tmp);
1396     tcg_temp_free_i32(addr);
1397
1398     return true;
1399 }
1400
1401 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1402 {
1403     uint32_t offset;
1404     TCGv_i32 addr, tmp;
1405
1406     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1407         return false;
1408     }
1409
1410     if (!vfp_access_check(s)) {
1411         return true;
1412     }
1413
1414     offset = a->imm << 2;
1415     if (!a->u) {
1416         offset = -offset;
1417     }
1418
1419     /* For thumb, use of PC is UNPREDICTABLE.  */
1420     addr = add_reg_for_lit(s, a->rn, offset);
1421     tmp = tcg_temp_new_i32();
1422     if (a->l) {
1423         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1424         vfp_store_reg32(tmp, a->vd);
1425     } else {
1426         vfp_load_reg32(tmp, a->vd);
1427         gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1428     }
1429     tcg_temp_free_i32(tmp);
1430     tcg_temp_free_i32(addr);
1431
1432     return true;
1433 }
1434
1435 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1436 {
1437     uint32_t offset;
1438     TCGv_i32 addr;
1439     TCGv_i64 tmp;
1440
1441     /* Note that this does not require support for double arithmetic.  */
1442     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1443         return false;
1444     }
1445
1446     /* UNDEF accesses to D16-D31 if they don't exist */
1447     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1448         return false;
1449     }
1450
1451     if (!vfp_access_check(s)) {
1452         return true;
1453     }
1454
1455     offset = a->imm << 2;
1456     if (!a->u) {
1457         offset = -offset;
1458     }
1459
1460     /* For thumb, use of PC is UNPREDICTABLE.  */
1461     addr = add_reg_for_lit(s, a->rn, offset);
1462     tmp = tcg_temp_new_i64();
1463     if (a->l) {
1464         gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1465         vfp_store_reg64(tmp, a->vd);
1466     } else {
1467         vfp_load_reg64(tmp, a->vd);
1468         gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1469     }
1470     tcg_temp_free_i64(tmp);
1471     tcg_temp_free_i32(addr);
1472
1473     return true;
1474 }
1475
1476 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1477 {
1478     uint32_t offset;
1479     TCGv_i32 addr, tmp;
1480     int i, n;
1481
1482     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1483         return false;
1484     }
1485
1486     n = a->imm;
1487
1488     if (n == 0 || (a->vd + n) > 32) {
1489         /*
1490          * UNPREDICTABLE cases for bad immediates: we choose to
1491          * UNDEF to avoid generating huge numbers of TCG ops
1492          */
1493         return false;
1494     }
1495     if (a->rn == 15 && a->w) {
1496         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1497         return false;
1498     }
1499
1500     if (!vfp_access_check(s)) {
1501         return true;
1502     }
1503
1504     /* For thumb, use of PC is UNPREDICTABLE.  */
1505     addr = add_reg_for_lit(s, a->rn, 0);
1506     if (a->p) {
1507         /* pre-decrement */
1508         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1509     }
1510
1511     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1512         /*
1513          * Here 'addr' is the lowest address we will store to,
1514          * and is either the old SP (if post-increment) or
1515          * the new SP (if pre-decrement). For post-increment
1516          * where the old value is below the limit and the new
1517          * value is above, it is UNKNOWN whether the limit check
1518          * triggers; we choose to trigger.
1519          */
1520         gen_helper_v8m_stackcheck(cpu_env, addr);
1521     }
1522
1523     offset = 4;
1524     tmp = tcg_temp_new_i32();
1525     for (i = 0; i < n; i++) {
1526         if (a->l) {
1527             /* load */
1528             gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1529             vfp_store_reg32(tmp, a->vd + i);
1530         } else {
1531             /* store */
1532             vfp_load_reg32(tmp, a->vd + i);
1533             gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN);
1534         }
1535         tcg_gen_addi_i32(addr, addr, offset);
1536     }
1537     tcg_temp_free_i32(tmp);
1538     if (a->w) {
1539         /* writeback */
1540         if (a->p) {
1541             offset = -offset * n;
1542             tcg_gen_addi_i32(addr, addr, offset);
1543         }
1544         store_reg(s, a->rn, addr);
1545     } else {
1546         tcg_temp_free_i32(addr);
1547     }
1548
1549     return true;
1550 }
1551
1552 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1553 {
1554     uint32_t offset;
1555     TCGv_i32 addr;
1556     TCGv_i64 tmp;
1557     int i, n;
1558
1559     /* Note that this does not require support for double arithmetic.  */
1560     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1561         return false;
1562     }
1563
1564     n = a->imm >> 1;
1565
1566     if (n == 0 || (a->vd + n) > 32 || n > 16) {
1567         /*
1568          * UNPREDICTABLE cases for bad immediates: we choose to
1569          * UNDEF to avoid generating huge numbers of TCG ops
1570          */
1571         return false;
1572     }
1573     if (a->rn == 15 && a->w) {
1574         /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1575         return false;
1576     }
1577
1578     /* UNDEF accesses to D16-D31 if they don't exist */
1579     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1580         return false;
1581     }
1582
1583     if (!vfp_access_check(s)) {
1584         return true;
1585     }
1586
1587     /* For thumb, use of PC is UNPREDICTABLE.  */
1588     addr = add_reg_for_lit(s, a->rn, 0);
1589     if (a->p) {
1590         /* pre-decrement */
1591         tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1592     }
1593
1594     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1595         /*
1596          * Here 'addr' is the lowest address we will store to,
1597          * and is either the old SP (if post-increment) or
1598          * the new SP (if pre-decrement). For post-increment
1599          * where the old value is below the limit and the new
1600          * value is above, it is UNKNOWN whether the limit check
1601          * triggers; we choose to trigger.
1602          */
1603         gen_helper_v8m_stackcheck(cpu_env, addr);
1604     }
1605
1606     offset = 8;
1607     tmp = tcg_temp_new_i64();
1608     for (i = 0; i < n; i++) {
1609         if (a->l) {
1610             /* load */
1611             gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1612             vfp_store_reg64(tmp, a->vd + i);
1613         } else {
1614             /* store */
1615             vfp_load_reg64(tmp, a->vd + i);
1616             gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_Q | MO_ALIGN_4);
1617         }
1618         tcg_gen_addi_i32(addr, addr, offset);
1619     }
1620     tcg_temp_free_i64(tmp);
1621     if (a->w) {
1622         /* writeback */
1623         if (a->p) {
1624             offset = -offset * n;
1625         } else if (a->imm & 1) {
1626             offset = 4;
1627         } else {
1628             offset = 0;
1629         }
1630
1631         if (offset != 0) {
1632             tcg_gen_addi_i32(addr, addr, offset);
1633         }
1634         store_reg(s, a->rn, addr);
1635     } else {
1636         tcg_temp_free_i32(addr);
1637     }
1638
1639     return true;
1640 }
1641
1642 /*
1643  * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1644  * The callback should emit code to write a value to vd. If
1645  * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1646  * will contain the old value of the relevant VFP register;
1647  * otherwise it must be written to only.
1648  */
1649 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1650                            TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1651 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1652                            TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1653
1654 /*
1655  * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1656  * The callback should emit code to write a value to vd (which
1657  * should be written to only).
1658  */
1659 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1660 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1661
1662 /*
1663  * Return true if the specified S reg is in a scalar bank
1664  * (ie if it is s0..s7)
1665  */
1666 static inline bool vfp_sreg_is_scalar(int reg)
1667 {
1668     return (reg & 0x18) == 0;
1669 }
1670
1671 /*
1672  * Return true if the specified D reg is in a scalar bank
1673  * (ie if it is d0..d3 or d16..d19)
1674  */
1675 static inline bool vfp_dreg_is_scalar(int reg)
1676 {
1677     return (reg & 0xc) == 0;
1678 }
1679
1680 /*
1681  * Advance the S reg number forwards by delta within its bank
1682  * (ie increment the low 3 bits but leave the rest the same)
1683  */
1684 static inline int vfp_advance_sreg(int reg, int delta)
1685 {
1686     return ((reg + delta) & 0x7) | (reg & ~0x7);
1687 }
1688
1689 /*
1690  * Advance the D reg number forwards by delta within its bank
1691  * (ie increment the low 2 bits but leave the rest the same)
1692  */
1693 static inline int vfp_advance_dreg(int reg, int delta)
1694 {
1695     return ((reg + delta) & 0x3) | (reg & ~0x3);
1696 }
1697
1698 /*
1699  * Perform a 3-operand VFP data processing instruction. fn is the
1700  * callback to do the actual operation; this function deals with the
1701  * code to handle looping around for VFP vector processing.
1702  */
1703 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1704                           int vd, int vn, int vm, bool reads_vd)
1705 {
1706     uint32_t delta_m = 0;
1707     uint32_t delta_d = 0;
1708     int veclen = s->vec_len;
1709     TCGv_i32 f0, f1, fd;
1710     TCGv_ptr fpst;
1711
1712     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1713         return false;
1714     }
1715
1716     if (!dc_isar_feature(aa32_fpshvec, s) &&
1717         (veclen != 0 || s->vec_stride != 0)) {
1718         return false;
1719     }
1720
1721     if (!vfp_access_check(s)) {
1722         return true;
1723     }
1724
1725     if (veclen > 0) {
1726         /* Figure out what type of vector operation this is.  */
1727         if (vfp_sreg_is_scalar(vd)) {
1728             /* scalar */
1729             veclen = 0;
1730         } else {
1731             delta_d = s->vec_stride + 1;
1732
1733             if (vfp_sreg_is_scalar(vm)) {
1734                 /* mixed scalar/vector */
1735                 delta_m = 0;
1736             } else {
1737                 /* vector */
1738                 delta_m = delta_d;
1739             }
1740         }
1741     }
1742
1743     f0 = tcg_temp_new_i32();
1744     f1 = tcg_temp_new_i32();
1745     fd = tcg_temp_new_i32();
1746     fpst = fpstatus_ptr(FPST_FPCR);
1747
1748     vfp_load_reg32(f0, vn);
1749     vfp_load_reg32(f1, vm);
1750
1751     for (;;) {
1752         if (reads_vd) {
1753             vfp_load_reg32(fd, vd);
1754         }
1755         fn(fd, f0, f1, fpst);
1756         vfp_store_reg32(fd, vd);
1757
1758         if (veclen == 0) {
1759             break;
1760         }
1761
1762         /* Set up the operands for the next iteration */
1763         veclen--;
1764         vd = vfp_advance_sreg(vd, delta_d);
1765         vn = vfp_advance_sreg(vn, delta_d);
1766         vfp_load_reg32(f0, vn);
1767         if (delta_m) {
1768             vm = vfp_advance_sreg(vm, delta_m);
1769             vfp_load_reg32(f1, vm);
1770         }
1771     }
1772
1773     tcg_temp_free_i32(f0);
1774     tcg_temp_free_i32(f1);
1775     tcg_temp_free_i32(fd);
1776     tcg_temp_free_ptr(fpst);
1777
1778     return true;
1779 }
1780
1781 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1782                           int vd, int vn, int vm, bool reads_vd)
1783 {
1784     /*
1785      * Do a half-precision operation. Functionally this is
1786      * the same as do_vfp_3op_sp(), except:
1787      *  - it uses the FPST_FPCR_F16
1788      *  - it doesn't need the VFP vector handling (fp16 is a
1789      *    v8 feature, and in v8 VFP vectors don't exist)
1790      *  - it does the aa32_fp16_arith feature test
1791      */
1792     TCGv_i32 f0, f1, fd;
1793     TCGv_ptr fpst;
1794
1795     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1796         return false;
1797     }
1798
1799     if (s->vec_len != 0 || s->vec_stride != 0) {
1800         return false;
1801     }
1802
1803     if (!vfp_access_check(s)) {
1804         return true;
1805     }
1806
1807     f0 = tcg_temp_new_i32();
1808     f1 = tcg_temp_new_i32();
1809     fd = tcg_temp_new_i32();
1810     fpst = fpstatus_ptr(FPST_FPCR_F16);
1811
1812     vfp_load_reg32(f0, vn);
1813     vfp_load_reg32(f1, vm);
1814
1815     if (reads_vd) {
1816         vfp_load_reg32(fd, vd);
1817     }
1818     fn(fd, f0, f1, fpst);
1819     vfp_store_reg32(fd, vd);
1820
1821     tcg_temp_free_i32(f0);
1822     tcg_temp_free_i32(f1);
1823     tcg_temp_free_i32(fd);
1824     tcg_temp_free_ptr(fpst);
1825
1826     return true;
1827 }
1828
1829 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1830                           int vd, int vn, int vm, bool reads_vd)
1831 {
1832     uint32_t delta_m = 0;
1833     uint32_t delta_d = 0;
1834     int veclen = s->vec_len;
1835     TCGv_i64 f0, f1, fd;
1836     TCGv_ptr fpst;
1837
1838     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1839         return false;
1840     }
1841
1842     /* UNDEF accesses to D16-D31 if they don't exist */
1843     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1844         return false;
1845     }
1846
1847     if (!dc_isar_feature(aa32_fpshvec, s) &&
1848         (veclen != 0 || s->vec_stride != 0)) {
1849         return false;
1850     }
1851
1852     if (!vfp_access_check(s)) {
1853         return true;
1854     }
1855
1856     if (veclen > 0) {
1857         /* Figure out what type of vector operation this is.  */
1858         if (vfp_dreg_is_scalar(vd)) {
1859             /* scalar */
1860             veclen = 0;
1861         } else {
1862             delta_d = (s->vec_stride >> 1) + 1;
1863
1864             if (vfp_dreg_is_scalar(vm)) {
1865                 /* mixed scalar/vector */
1866                 delta_m = 0;
1867             } else {
1868                 /* vector */
1869                 delta_m = delta_d;
1870             }
1871         }
1872     }
1873
1874     f0 = tcg_temp_new_i64();
1875     f1 = tcg_temp_new_i64();
1876     fd = tcg_temp_new_i64();
1877     fpst = fpstatus_ptr(FPST_FPCR);
1878
1879     vfp_load_reg64(f0, vn);
1880     vfp_load_reg64(f1, vm);
1881
1882     for (;;) {
1883         if (reads_vd) {
1884             vfp_load_reg64(fd, vd);
1885         }
1886         fn(fd, f0, f1, fpst);
1887         vfp_store_reg64(fd, vd);
1888
1889         if (veclen == 0) {
1890             break;
1891         }
1892         /* Set up the operands for the next iteration */
1893         veclen--;
1894         vd = vfp_advance_dreg(vd, delta_d);
1895         vn = vfp_advance_dreg(vn, delta_d);
1896         vfp_load_reg64(f0, vn);
1897         if (delta_m) {
1898             vm = vfp_advance_dreg(vm, delta_m);
1899             vfp_load_reg64(f1, vm);
1900         }
1901     }
1902
1903     tcg_temp_free_i64(f0);
1904     tcg_temp_free_i64(f1);
1905     tcg_temp_free_i64(fd);
1906     tcg_temp_free_ptr(fpst);
1907
1908     return true;
1909 }
1910
1911 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1912 {
1913     uint32_t delta_m = 0;
1914     uint32_t delta_d = 0;
1915     int veclen = s->vec_len;
1916     TCGv_i32 f0, fd;
1917
1918     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1919         return false;
1920     }
1921
1922     if (!dc_isar_feature(aa32_fpshvec, s) &&
1923         (veclen != 0 || s->vec_stride != 0)) {
1924         return false;
1925     }
1926
1927     if (!vfp_access_check(s)) {
1928         return true;
1929     }
1930
1931     if (veclen > 0) {
1932         /* Figure out what type of vector operation this is.  */
1933         if (vfp_sreg_is_scalar(vd)) {
1934             /* scalar */
1935             veclen = 0;
1936         } else {
1937             delta_d = s->vec_stride + 1;
1938
1939             if (vfp_sreg_is_scalar(vm)) {
1940                 /* mixed scalar/vector */
1941                 delta_m = 0;
1942             } else {
1943                 /* vector */
1944                 delta_m = delta_d;
1945             }
1946         }
1947     }
1948
1949     f0 = tcg_temp_new_i32();
1950     fd = tcg_temp_new_i32();
1951
1952     vfp_load_reg32(f0, vm);
1953
1954     for (;;) {
1955         fn(fd, f0);
1956         vfp_store_reg32(fd, vd);
1957
1958         if (veclen == 0) {
1959             break;
1960         }
1961
1962         if (delta_m == 0) {
1963             /* single source one-many */
1964             while (veclen--) {
1965                 vd = vfp_advance_sreg(vd, delta_d);
1966                 vfp_store_reg32(fd, vd);
1967             }
1968             break;
1969         }
1970
1971         /* Set up the operands for the next iteration */
1972         veclen--;
1973         vd = vfp_advance_sreg(vd, delta_d);
1974         vm = vfp_advance_sreg(vm, delta_m);
1975         vfp_load_reg32(f0, vm);
1976     }
1977
1978     tcg_temp_free_i32(f0);
1979     tcg_temp_free_i32(fd);
1980
1981     return true;
1982 }
1983
1984 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1985 {
1986     /*
1987      * Do a half-precision operation. Functionally this is
1988      * the same as do_vfp_2op_sp(), except:
1989      *  - it doesn't need the VFP vector handling (fp16 is a
1990      *    v8 feature, and in v8 VFP vectors don't exist)
1991      *  - it does the aa32_fp16_arith feature test
1992      */
1993     TCGv_i32 f0;
1994
1995     if (!dc_isar_feature(aa32_fp16_arith, s)) {
1996         return false;
1997     }
1998
1999     if (s->vec_len != 0 || s->vec_stride != 0) {
2000         return false;
2001     }
2002
2003     if (!vfp_access_check(s)) {
2004         return true;
2005     }
2006
2007     f0 = tcg_temp_new_i32();
2008     vfp_load_reg32(f0, vm);
2009     fn(f0, f0);
2010     vfp_store_reg32(f0, vd);
2011     tcg_temp_free_i32(f0);
2012
2013     return true;
2014 }
2015
2016 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
2017 {
2018     uint32_t delta_m = 0;
2019     uint32_t delta_d = 0;
2020     int veclen = s->vec_len;
2021     TCGv_i64 f0, fd;
2022
2023     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2024         return false;
2025     }
2026
2027     /* UNDEF accesses to D16-D31 if they don't exist */
2028     if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
2029         return false;
2030     }
2031
2032     if (!dc_isar_feature(aa32_fpshvec, s) &&
2033         (veclen != 0 || s->vec_stride != 0)) {
2034         return false;
2035     }
2036
2037     if (!vfp_access_check(s)) {
2038         return true;
2039     }
2040
2041     if (veclen > 0) {
2042         /* Figure out what type of vector operation this is.  */
2043         if (vfp_dreg_is_scalar(vd)) {
2044             /* scalar */
2045             veclen = 0;
2046         } else {
2047             delta_d = (s->vec_stride >> 1) + 1;
2048
2049             if (vfp_dreg_is_scalar(vm)) {
2050                 /* mixed scalar/vector */
2051                 delta_m = 0;
2052             } else {
2053                 /* vector */
2054                 delta_m = delta_d;
2055             }
2056         }
2057     }
2058
2059     f0 = tcg_temp_new_i64();
2060     fd = tcg_temp_new_i64();
2061
2062     vfp_load_reg64(f0, vm);
2063
2064     for (;;) {
2065         fn(fd, f0);
2066         vfp_store_reg64(fd, vd);
2067
2068         if (veclen == 0) {
2069             break;
2070         }
2071
2072         if (delta_m == 0) {
2073             /* single source one-many */
2074             while (veclen--) {
2075                 vd = vfp_advance_dreg(vd, delta_d);
2076                 vfp_store_reg64(fd, vd);
2077             }
2078             break;
2079         }
2080
2081         /* Set up the operands for the next iteration */
2082         veclen--;
2083         vd = vfp_advance_dreg(vd, delta_d);
2084         vd = vfp_advance_dreg(vm, delta_m);
2085         vfp_load_reg64(f0, vm);
2086     }
2087
2088     tcg_temp_free_i64(f0);
2089     tcg_temp_free_i64(fd);
2090
2091     return true;
2092 }
2093
2094 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2095 {
2096     /* Note that order of inputs to the add matters for NaNs */
2097     TCGv_i32 tmp = tcg_temp_new_i32();
2098
2099     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2100     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2101     tcg_temp_free_i32(tmp);
2102 }
2103
2104 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
2105 {
2106     return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
2107 }
2108
2109 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2110 {
2111     /* Note that order of inputs to the add matters for NaNs */
2112     TCGv_i32 tmp = tcg_temp_new_i32();
2113
2114     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2115     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2116     tcg_temp_free_i32(tmp);
2117 }
2118
2119 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
2120 {
2121     return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
2122 }
2123
2124 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2125 {
2126     /* Note that order of inputs to the add matters for NaNs */
2127     TCGv_i64 tmp = tcg_temp_new_i64();
2128
2129     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2130     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2131     tcg_temp_free_i64(tmp);
2132 }
2133
2134 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
2135 {
2136     return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
2137 }
2138
2139 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2140 {
2141     /*
2142      * VMLS: vd = vd + -(vn * vm)
2143      * Note that order of inputs to the add matters for NaNs.
2144      */
2145     TCGv_i32 tmp = tcg_temp_new_i32();
2146
2147     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2148     gen_helper_vfp_negh(tmp, tmp);
2149     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2150     tcg_temp_free_i32(tmp);
2151 }
2152
2153 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
2154 {
2155     return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
2156 }
2157
2158 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2159 {
2160     /*
2161      * VMLS: vd = vd + -(vn * vm)
2162      * Note that order of inputs to the add matters for NaNs.
2163      */
2164     TCGv_i32 tmp = tcg_temp_new_i32();
2165
2166     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2167     gen_helper_vfp_negs(tmp, tmp);
2168     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2169     tcg_temp_free_i32(tmp);
2170 }
2171
2172 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
2173 {
2174     return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
2175 }
2176
2177 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2178 {
2179     /*
2180      * VMLS: vd = vd + -(vn * vm)
2181      * Note that order of inputs to the add matters for NaNs.
2182      */
2183     TCGv_i64 tmp = tcg_temp_new_i64();
2184
2185     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2186     gen_helper_vfp_negd(tmp, tmp);
2187     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2188     tcg_temp_free_i64(tmp);
2189 }
2190
2191 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
2192 {
2193     return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
2194 }
2195
2196 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2197 {
2198     /*
2199      * VNMLS: -fd + (fn * fm)
2200      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2201      * plausible looking simplifications because this will give wrong results
2202      * for NaNs.
2203      */
2204     TCGv_i32 tmp = tcg_temp_new_i32();
2205
2206     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2207     gen_helper_vfp_negh(vd, vd);
2208     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2209     tcg_temp_free_i32(tmp);
2210 }
2211
2212 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
2213 {
2214     return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
2215 }
2216
2217 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2218 {
2219     /*
2220      * VNMLS: -fd + (fn * fm)
2221      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2222      * plausible looking simplifications because this will give wrong results
2223      * for NaNs.
2224      */
2225     TCGv_i32 tmp = tcg_temp_new_i32();
2226
2227     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2228     gen_helper_vfp_negs(vd, vd);
2229     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2230     tcg_temp_free_i32(tmp);
2231 }
2232
2233 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
2234 {
2235     return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
2236 }
2237
2238 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2239 {
2240     /*
2241      * VNMLS: -fd + (fn * fm)
2242      * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2243      * plausible looking simplifications because this will give wrong results
2244      * for NaNs.
2245      */
2246     TCGv_i64 tmp = tcg_temp_new_i64();
2247
2248     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2249     gen_helper_vfp_negd(vd, vd);
2250     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2251     tcg_temp_free_i64(tmp);
2252 }
2253
2254 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
2255 {
2256     return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
2257 }
2258
2259 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2260 {
2261     /* VNMLA: -fd + -(fn * fm) */
2262     TCGv_i32 tmp = tcg_temp_new_i32();
2263
2264     gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2265     gen_helper_vfp_negh(tmp, tmp);
2266     gen_helper_vfp_negh(vd, vd);
2267     gen_helper_vfp_addh(vd, vd, tmp, fpst);
2268     tcg_temp_free_i32(tmp);
2269 }
2270
2271 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
2272 {
2273     return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
2274 }
2275
2276 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2277 {
2278     /* VNMLA: -fd + -(fn * fm) */
2279     TCGv_i32 tmp = tcg_temp_new_i32();
2280
2281     gen_helper_vfp_muls(tmp, vn, vm, fpst);
2282     gen_helper_vfp_negs(tmp, tmp);
2283     gen_helper_vfp_negs(vd, vd);
2284     gen_helper_vfp_adds(vd, vd, tmp, fpst);
2285     tcg_temp_free_i32(tmp);
2286 }
2287
2288 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2289 {
2290     return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2291 }
2292
2293 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2294 {
2295     /* VNMLA: -fd + (fn * fm) */
2296     TCGv_i64 tmp = tcg_temp_new_i64();
2297
2298     gen_helper_vfp_muld(tmp, vn, vm, fpst);
2299     gen_helper_vfp_negd(tmp, tmp);
2300     gen_helper_vfp_negd(vd, vd);
2301     gen_helper_vfp_addd(vd, vd, tmp, fpst);
2302     tcg_temp_free_i64(tmp);
2303 }
2304
2305 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2306 {
2307     return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2308 }
2309
2310 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2311 {
2312     return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2313 }
2314
2315 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2316 {
2317     return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2318 }
2319
2320 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2321 {
2322     return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2323 }
2324
2325 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2326 {
2327     /* VNMUL: -(fn * fm) */
2328     gen_helper_vfp_mulh(vd, vn, vm, fpst);
2329     gen_helper_vfp_negh(vd, vd);
2330 }
2331
2332 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2333 {
2334     return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2335 }
2336
2337 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2338 {
2339     /* VNMUL: -(fn * fm) */
2340     gen_helper_vfp_muls(vd, vn, vm, fpst);
2341     gen_helper_vfp_negs(vd, vd);
2342 }
2343
2344 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2345 {
2346     return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2347 }
2348
2349 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2350 {
2351     /* VNMUL: -(fn * fm) */
2352     gen_helper_vfp_muld(vd, vn, vm, fpst);
2353     gen_helper_vfp_negd(vd, vd);
2354 }
2355
2356 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2357 {
2358     return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2359 }
2360
2361 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2362 {
2363     return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2364 }
2365
2366 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2367 {
2368     return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2369 }
2370
2371 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2372 {
2373     return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2374 }
2375
2376 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2377 {
2378     return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2379 }
2380
2381 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2382 {
2383     return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2384 }
2385
2386 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2387 {
2388     return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2389 }
2390
2391 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2392 {
2393     return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2394 }
2395
2396 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2397 {
2398     return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2399 }
2400
2401 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2402 {
2403     return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2404 }
2405
2406 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2407 {
2408     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2409         return false;
2410     }
2411     return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2412                          a->vd, a->vn, a->vm, false);
2413 }
2414
2415 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2416 {
2417     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2418         return false;
2419     }
2420     return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2421                          a->vd, a->vn, a->vm, false);
2422 }
2423
2424 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2425 {
2426     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2427         return false;
2428     }
2429     return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2430                          a->vd, a->vn, a->vm, false);
2431 }
2432
2433 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2434 {
2435     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2436         return false;
2437     }
2438     return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2439                          a->vd, a->vn, a->vm, false);
2440 }
2441
2442 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2443 {
2444     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2445         return false;
2446     }
2447     return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2448                          a->vd, a->vn, a->vm, false);
2449 }
2450
2451 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2452 {
2453     if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2454         return false;
2455     }
2456     return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2457                          a->vd, a->vn, a->vm, false);
2458 }
2459
2460 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2461 {
2462     /*
2463      * VFNMA : fd = muladd(-fd,  fn, fm)
2464      * VFNMS : fd = muladd(-fd, -fn, fm)
2465      * VFMA  : fd = muladd( fd,  fn, fm)
2466      * VFMS  : fd = muladd( fd, -fn, fm)
2467      *
2468      * These are fused multiply-add, and must be done as one floating
2469      * point operation with no rounding between the multiplication and
2470      * addition steps.  NB that doing the negations here as separate
2471      * steps is correct : an input NaN should come out with its sign
2472      * bit flipped if it is a negated-input.
2473      */
2474     TCGv_ptr fpst;
2475     TCGv_i32 vn, vm, vd;
2476
2477     /*
2478      * Present in VFPv4 only, and only with the FP16 extension.
2479      * Note that we can't rely on the SIMDFMAC check alone, because
2480      * in a Neon-no-VFP core that ID register field will be non-zero.
2481      */
2482     if (!dc_isar_feature(aa32_fp16_arith, s) ||
2483         !dc_isar_feature(aa32_simdfmac, s) ||
2484         !dc_isar_feature(aa32_fpsp_v2, s)) {
2485         return false;
2486     }
2487
2488     if (s->vec_len != 0 || s->vec_stride != 0) {
2489         return false;
2490     }
2491
2492     if (!vfp_access_check(s)) {
2493         return true;
2494     }
2495
2496     vn = tcg_temp_new_i32();
2497     vm = tcg_temp_new_i32();
2498     vd = tcg_temp_new_i32();
2499
2500     vfp_load_reg32(vn, a->vn);
2501     vfp_load_reg32(vm, a->vm);
2502     if (neg_n) {
2503         /* VFNMS, VFMS */
2504         gen_helper_vfp_negh(vn, vn);
2505     }
2506     vfp_load_reg32(vd, a->vd);
2507     if (neg_d) {
2508         /* VFNMA, VFNMS */
2509         gen_helper_vfp_negh(vd, vd);
2510     }
2511     fpst = fpstatus_ptr(FPST_FPCR_F16);
2512     gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2513     vfp_store_reg32(vd, a->vd);
2514
2515     tcg_temp_free_ptr(fpst);
2516     tcg_temp_free_i32(vn);
2517     tcg_temp_free_i32(vm);
2518     tcg_temp_free_i32(vd);
2519
2520     return true;
2521 }
2522
2523 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2524 {
2525     /*
2526      * VFNMA : fd = muladd(-fd,  fn, fm)
2527      * VFNMS : fd = muladd(-fd, -fn, fm)
2528      * VFMA  : fd = muladd( fd,  fn, fm)
2529      * VFMS  : fd = muladd( fd, -fn, fm)
2530      *
2531      * These are fused multiply-add, and must be done as one floating
2532      * point operation with no rounding between the multiplication and
2533      * addition steps.  NB that doing the negations here as separate
2534      * steps is correct : an input NaN should come out with its sign
2535      * bit flipped if it is a negated-input.
2536      */
2537     TCGv_ptr fpst;
2538     TCGv_i32 vn, vm, vd;
2539
2540     /*
2541      * Present in VFPv4 only.
2542      * Note that we can't rely on the SIMDFMAC check alone, because
2543      * in a Neon-no-VFP core that ID register field will be non-zero.
2544      */
2545     if (!dc_isar_feature(aa32_simdfmac, s) ||
2546         !dc_isar_feature(aa32_fpsp_v2, s)) {
2547         return false;
2548     }
2549     /*
2550      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2551      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2552      */
2553     if (s->vec_len != 0 || s->vec_stride != 0) {
2554         return false;
2555     }
2556
2557     if (!vfp_access_check(s)) {
2558         return true;
2559     }
2560
2561     vn = tcg_temp_new_i32();
2562     vm = tcg_temp_new_i32();
2563     vd = tcg_temp_new_i32();
2564
2565     vfp_load_reg32(vn, a->vn);
2566     vfp_load_reg32(vm, a->vm);
2567     if (neg_n) {
2568         /* VFNMS, VFMS */
2569         gen_helper_vfp_negs(vn, vn);
2570     }
2571     vfp_load_reg32(vd, a->vd);
2572     if (neg_d) {
2573         /* VFNMA, VFNMS */
2574         gen_helper_vfp_negs(vd, vd);
2575     }
2576     fpst = fpstatus_ptr(FPST_FPCR);
2577     gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2578     vfp_store_reg32(vd, a->vd);
2579
2580     tcg_temp_free_ptr(fpst);
2581     tcg_temp_free_i32(vn);
2582     tcg_temp_free_i32(vm);
2583     tcg_temp_free_i32(vd);
2584
2585     return true;
2586 }
2587
2588 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2589 {
2590     /*
2591      * VFNMA : fd = muladd(-fd,  fn, fm)
2592      * VFNMS : fd = muladd(-fd, -fn, fm)
2593      * VFMA  : fd = muladd( fd,  fn, fm)
2594      * VFMS  : fd = muladd( fd, -fn, fm)
2595      *
2596      * These are fused multiply-add, and must be done as one floating
2597      * point operation with no rounding between the multiplication and
2598      * addition steps.  NB that doing the negations here as separate
2599      * steps is correct : an input NaN should come out with its sign
2600      * bit flipped if it is a negated-input.
2601      */
2602     TCGv_ptr fpst;
2603     TCGv_i64 vn, vm, vd;
2604
2605     /*
2606      * Present in VFPv4 only.
2607      * Note that we can't rely on the SIMDFMAC check alone, because
2608      * in a Neon-no-VFP core that ID register field will be non-zero.
2609      */
2610     if (!dc_isar_feature(aa32_simdfmac, s) ||
2611         !dc_isar_feature(aa32_fpdp_v2, s)) {
2612         return false;
2613     }
2614     /*
2615      * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2616      * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2617      */
2618     if (s->vec_len != 0 || s->vec_stride != 0) {
2619         return false;
2620     }
2621
2622     /* UNDEF accesses to D16-D31 if they don't exist. */
2623     if (!dc_isar_feature(aa32_simd_r32, s) &&
2624         ((a->vd | a->vn | a->vm) & 0x10)) {
2625         return false;
2626     }
2627
2628     if (!vfp_access_check(s)) {
2629         return true;
2630     }
2631
2632     vn = tcg_temp_new_i64();
2633     vm = tcg_temp_new_i64();
2634     vd = tcg_temp_new_i64();
2635
2636     vfp_load_reg64(vn, a->vn);
2637     vfp_load_reg64(vm, a->vm);
2638     if (neg_n) {
2639         /* VFNMS, VFMS */
2640         gen_helper_vfp_negd(vn, vn);
2641     }
2642     vfp_load_reg64(vd, a->vd);
2643     if (neg_d) {
2644         /* VFNMA, VFNMS */
2645         gen_helper_vfp_negd(vd, vd);
2646     }
2647     fpst = fpstatus_ptr(FPST_FPCR);
2648     gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2649     vfp_store_reg64(vd, a->vd);
2650
2651     tcg_temp_free_ptr(fpst);
2652     tcg_temp_free_i64(vn);
2653     tcg_temp_free_i64(vm);
2654     tcg_temp_free_i64(vd);
2655
2656     return true;
2657 }
2658
2659 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD)                   \
2660     static bool trans_##INSN##_##PREC(DisasContext *s,                  \
2661                                       arg_##INSN##_##PREC *a)           \
2662     {                                                                   \
2663         return do_vfm_##PREC(s, a, NEGN, NEGD);                         \
2664     }
2665
2666 #define MAKE_VFM_TRANS_FNS(PREC) \
2667     MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2668     MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2669     MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2670     MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2671
2672 MAKE_VFM_TRANS_FNS(hp)
2673 MAKE_VFM_TRANS_FNS(sp)
2674 MAKE_VFM_TRANS_FNS(dp)
2675
2676 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2677 {
2678     TCGv_i32 fd;
2679
2680     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2681         return false;
2682     }
2683
2684     if (s->vec_len != 0 || s->vec_stride != 0) {
2685         return false;
2686     }
2687
2688     if (!vfp_access_check(s)) {
2689         return true;
2690     }
2691
2692     fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2693     vfp_store_reg32(fd, a->vd);
2694     tcg_temp_free_i32(fd);
2695     return true;
2696 }
2697
2698 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2699 {
2700     uint32_t delta_d = 0;
2701     int veclen = s->vec_len;
2702     TCGv_i32 fd;
2703     uint32_t vd;
2704
2705     vd = a->vd;
2706
2707     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2708         return false;
2709     }
2710
2711     if (!dc_isar_feature(aa32_fpshvec, s) &&
2712         (veclen != 0 || s->vec_stride != 0)) {
2713         return false;
2714     }
2715
2716     if (!vfp_access_check(s)) {
2717         return true;
2718     }
2719
2720     if (veclen > 0) {
2721         /* Figure out what type of vector operation this is.  */
2722         if (vfp_sreg_is_scalar(vd)) {
2723             /* scalar */
2724             veclen = 0;
2725         } else {
2726             delta_d = s->vec_stride + 1;
2727         }
2728     }
2729
2730     fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2731
2732     for (;;) {
2733         vfp_store_reg32(fd, vd);
2734
2735         if (veclen == 0) {
2736             break;
2737         }
2738
2739         /* Set up the operands for the next iteration */
2740         veclen--;
2741         vd = vfp_advance_sreg(vd, delta_d);
2742     }
2743
2744     tcg_temp_free_i32(fd);
2745     return true;
2746 }
2747
2748 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2749 {
2750     uint32_t delta_d = 0;
2751     int veclen = s->vec_len;
2752     TCGv_i64 fd;
2753     uint32_t vd;
2754
2755     vd = a->vd;
2756
2757     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2758         return false;
2759     }
2760
2761     /* UNDEF accesses to D16-D31 if they don't exist. */
2762     if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2763         return false;
2764     }
2765
2766     if (!dc_isar_feature(aa32_fpshvec, s) &&
2767         (veclen != 0 || s->vec_stride != 0)) {
2768         return false;
2769     }
2770
2771     if (!vfp_access_check(s)) {
2772         return true;
2773     }
2774
2775     if (veclen > 0) {
2776         /* Figure out what type of vector operation this is.  */
2777         if (vfp_dreg_is_scalar(vd)) {
2778             /* scalar */
2779             veclen = 0;
2780         } else {
2781             delta_d = (s->vec_stride >> 1) + 1;
2782         }
2783     }
2784
2785     fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2786
2787     for (;;) {
2788         vfp_store_reg64(fd, vd);
2789
2790         if (veclen == 0) {
2791             break;
2792         }
2793
2794         /* Set up the operands for the next iteration */
2795         veclen--;
2796         vd = vfp_advance_dreg(vd, delta_d);
2797     }
2798
2799     tcg_temp_free_i64(fd);
2800     return true;
2801 }
2802
2803 #define DO_VFP_2OP(INSN, PREC, FN)                              \
2804     static bool trans_##INSN##_##PREC(DisasContext *s,          \
2805                                       arg_##INSN##_##PREC *a)   \
2806     {                                                           \
2807         return do_vfp_2op_##PREC(s, FN, a->vd, a->vm);          \
2808     }
2809
2810 DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
2811 DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
2812
2813 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
2814 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
2815 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
2816
2817 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
2818 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
2819 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
2820
2821 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2822 {
2823     gen_helper_vfp_sqrth(vd, vm, cpu_env);
2824 }
2825
2826 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2827 {
2828     gen_helper_vfp_sqrts(vd, vm, cpu_env);
2829 }
2830
2831 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2832 {
2833     gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2834 }
2835
2836 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
2837 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
2838 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
2839
2840 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2841 {
2842     TCGv_i32 vd, vm;
2843
2844     if (!dc_isar_feature(aa32_fp16_arith, s)) {
2845         return false;
2846     }
2847
2848     /* Vm/M bits must be zero for the Z variant */
2849     if (a->z && a->vm != 0) {
2850         return false;
2851     }
2852
2853     if (!vfp_access_check(s)) {
2854         return true;
2855     }
2856
2857     vd = tcg_temp_new_i32();
2858     vm = tcg_temp_new_i32();
2859
2860     vfp_load_reg32(vd, a->vd);
2861     if (a->z) {
2862         tcg_gen_movi_i32(vm, 0);
2863     } else {
2864         vfp_load_reg32(vm, a->vm);
2865     }
2866
2867     if (a->e) {
2868         gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2869     } else {
2870         gen_helper_vfp_cmph(vd, vm, cpu_env);
2871     }
2872
2873     tcg_temp_free_i32(vd);
2874     tcg_temp_free_i32(vm);
2875
2876     return true;
2877 }
2878
2879 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2880 {
2881     TCGv_i32 vd, vm;
2882
2883     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2884         return false;
2885     }
2886
2887     /* Vm/M bits must be zero for the Z variant */
2888     if (a->z && a->vm != 0) {
2889         return false;
2890     }
2891
2892     if (!vfp_access_check(s)) {
2893         return true;
2894     }
2895
2896     vd = tcg_temp_new_i32();
2897     vm = tcg_temp_new_i32();
2898
2899     vfp_load_reg32(vd, a->vd);
2900     if (a->z) {
2901         tcg_gen_movi_i32(vm, 0);
2902     } else {
2903         vfp_load_reg32(vm, a->vm);
2904     }
2905
2906     if (a->e) {
2907         gen_helper_vfp_cmpes(vd, vm, cpu_env);
2908     } else {
2909         gen_helper_vfp_cmps(vd, vm, cpu_env);
2910     }
2911
2912     tcg_temp_free_i32(vd);
2913     tcg_temp_free_i32(vm);
2914
2915     return true;
2916 }
2917
2918 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2919 {
2920     TCGv_i64 vd, vm;
2921
2922     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2923         return false;
2924     }
2925
2926     /* Vm/M bits must be zero for the Z variant */
2927     if (a->z && a->vm != 0) {
2928         return false;
2929     }
2930
2931     /* UNDEF accesses to D16-D31 if they don't exist. */
2932     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2933         return false;
2934     }
2935
2936     if (!vfp_access_check(s)) {
2937         return true;
2938     }
2939
2940     vd = tcg_temp_new_i64();
2941     vm = tcg_temp_new_i64();
2942
2943     vfp_load_reg64(vd, a->vd);
2944     if (a->z) {
2945         tcg_gen_movi_i64(vm, 0);
2946     } else {
2947         vfp_load_reg64(vm, a->vm);
2948     }
2949
2950     if (a->e) {
2951         gen_helper_vfp_cmped(vd, vm, cpu_env);
2952     } else {
2953         gen_helper_vfp_cmpd(vd, vm, cpu_env);
2954     }
2955
2956     tcg_temp_free_i64(vd);
2957     tcg_temp_free_i64(vm);
2958
2959     return true;
2960 }
2961
2962 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2963 {
2964     TCGv_ptr fpst;
2965     TCGv_i32 ahp_mode;
2966     TCGv_i32 tmp;
2967
2968     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2969         return false;
2970     }
2971
2972     if (!vfp_access_check(s)) {
2973         return true;
2974     }
2975
2976     fpst = fpstatus_ptr(FPST_FPCR);
2977     ahp_mode = get_ahp_flag();
2978     tmp = tcg_temp_new_i32();
2979     /* The T bit tells us if we want the low or high 16 bits of Vm */
2980     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2981     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2982     vfp_store_reg32(tmp, a->vd);
2983     tcg_temp_free_i32(ahp_mode);
2984     tcg_temp_free_ptr(fpst);
2985     tcg_temp_free_i32(tmp);
2986     return true;
2987 }
2988
2989 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2990 {
2991     TCGv_ptr fpst;
2992     TCGv_i32 ahp_mode;
2993     TCGv_i32 tmp;
2994     TCGv_i64 vd;
2995
2996     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2997         return false;
2998     }
2999
3000     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3001         return false;
3002     }
3003
3004     /* UNDEF accesses to D16-D31 if they don't exist. */
3005     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd  & 0x10)) {
3006         return false;
3007     }
3008
3009     if (!vfp_access_check(s)) {
3010         return true;
3011     }
3012
3013     fpst = fpstatus_ptr(FPST_FPCR);
3014     ahp_mode = get_ahp_flag();
3015     tmp = tcg_temp_new_i32();
3016     /* The T bit tells us if we want the low or high 16 bits of Vm */
3017     tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
3018     vd = tcg_temp_new_i64();
3019     gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
3020     vfp_store_reg64(vd, a->vd);
3021     tcg_temp_free_i32(ahp_mode);
3022     tcg_temp_free_ptr(fpst);
3023     tcg_temp_free_i32(tmp);
3024     tcg_temp_free_i64(vd);
3025     return true;
3026 }
3027
3028 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
3029 {
3030     TCGv_ptr fpst;
3031     TCGv_i32 ahp_mode;
3032     TCGv_i32 tmp;
3033
3034     if (!dc_isar_feature(aa32_fp16_spconv, s)) {
3035         return false;
3036     }
3037
3038     if (!vfp_access_check(s)) {
3039         return true;
3040     }
3041
3042     fpst = fpstatus_ptr(FPST_FPCR);
3043     ahp_mode = get_ahp_flag();
3044     tmp = tcg_temp_new_i32();
3045
3046     vfp_load_reg32(tmp, a->vm);
3047     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
3048     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3049     tcg_temp_free_i32(ahp_mode);
3050     tcg_temp_free_ptr(fpst);
3051     tcg_temp_free_i32(tmp);
3052     return true;
3053 }
3054
3055 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
3056 {
3057     TCGv_ptr fpst;
3058     TCGv_i32 ahp_mode;
3059     TCGv_i32 tmp;
3060     TCGv_i64 vm;
3061
3062     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3063         return false;
3064     }
3065
3066     if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3067         return false;
3068     }
3069
3070     /* UNDEF accesses to D16-D31 if they don't exist. */
3071     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm  & 0x10)) {
3072         return false;
3073     }
3074
3075     if (!vfp_access_check(s)) {
3076         return true;
3077     }
3078
3079     fpst = fpstatus_ptr(FPST_FPCR);
3080     ahp_mode = get_ahp_flag();
3081     tmp = tcg_temp_new_i32();
3082     vm = tcg_temp_new_i64();
3083
3084     vfp_load_reg64(vm, a->vm);
3085     gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
3086     tcg_temp_free_i64(vm);
3087     tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3088     tcg_temp_free_i32(ahp_mode);
3089     tcg_temp_free_ptr(fpst);
3090     tcg_temp_free_i32(tmp);
3091     return true;
3092 }
3093
3094 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
3095 {
3096     TCGv_ptr fpst;
3097     TCGv_i32 tmp;
3098
3099     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3100         return false;
3101     }
3102
3103     if (!vfp_access_check(s)) {
3104         return true;
3105     }
3106
3107     tmp = tcg_temp_new_i32();
3108     vfp_load_reg32(tmp, a->vm);
3109     fpst = fpstatus_ptr(FPST_FPCR_F16);
3110     gen_helper_rinth(tmp, tmp, fpst);
3111     vfp_store_reg32(tmp, a->vd);
3112     tcg_temp_free_ptr(fpst);
3113     tcg_temp_free_i32(tmp);
3114     return true;
3115 }
3116
3117 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
3118 {
3119     TCGv_ptr fpst;
3120     TCGv_i32 tmp;
3121
3122     if (!dc_isar_feature(aa32_vrint, s)) {
3123         return false;
3124     }
3125
3126     if (!vfp_access_check(s)) {
3127         return true;
3128     }
3129
3130     tmp = tcg_temp_new_i32();
3131     vfp_load_reg32(tmp, a->vm);
3132     fpst = fpstatus_ptr(FPST_FPCR);
3133     gen_helper_rints(tmp, tmp, fpst);
3134     vfp_store_reg32(tmp, a->vd);
3135     tcg_temp_free_ptr(fpst);
3136     tcg_temp_free_i32(tmp);
3137     return true;
3138 }
3139
3140 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
3141 {
3142     TCGv_ptr fpst;
3143     TCGv_i64 tmp;
3144
3145     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3146         return false;
3147     }
3148
3149     if (!dc_isar_feature(aa32_vrint, s)) {
3150         return false;
3151     }
3152
3153     /* UNDEF accesses to D16-D31 if they don't exist. */
3154     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3155         return false;
3156     }
3157
3158     if (!vfp_access_check(s)) {
3159         return true;
3160     }
3161
3162     tmp = tcg_temp_new_i64();
3163     vfp_load_reg64(tmp, a->vm);
3164     fpst = fpstatus_ptr(FPST_FPCR);
3165     gen_helper_rintd(tmp, tmp, fpst);
3166     vfp_store_reg64(tmp, a->vd);
3167     tcg_temp_free_ptr(fpst);
3168     tcg_temp_free_i64(tmp);
3169     return true;
3170 }
3171
3172 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
3173 {
3174     TCGv_ptr fpst;
3175     TCGv_i32 tmp;
3176     TCGv_i32 tcg_rmode;
3177
3178     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3179         return false;
3180     }
3181
3182     if (!vfp_access_check(s)) {
3183         return true;
3184     }
3185
3186     tmp = tcg_temp_new_i32();
3187     vfp_load_reg32(tmp, a->vm);
3188     fpst = fpstatus_ptr(FPST_FPCR_F16);
3189     tcg_rmode = tcg_const_i32(float_round_to_zero);
3190     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3191     gen_helper_rinth(tmp, tmp, fpst);
3192     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3193     vfp_store_reg32(tmp, a->vd);
3194     tcg_temp_free_ptr(fpst);
3195     tcg_temp_free_i32(tcg_rmode);
3196     tcg_temp_free_i32(tmp);
3197     return true;
3198 }
3199
3200 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
3201 {
3202     TCGv_ptr fpst;
3203     TCGv_i32 tmp;
3204     TCGv_i32 tcg_rmode;
3205
3206     if (!dc_isar_feature(aa32_vrint, s)) {
3207         return false;
3208     }
3209
3210     if (!vfp_access_check(s)) {
3211         return true;
3212     }
3213
3214     tmp = tcg_temp_new_i32();
3215     vfp_load_reg32(tmp, a->vm);
3216     fpst = fpstatus_ptr(FPST_FPCR);
3217     tcg_rmode = tcg_const_i32(float_round_to_zero);
3218     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3219     gen_helper_rints(tmp, tmp, fpst);
3220     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3221     vfp_store_reg32(tmp, a->vd);
3222     tcg_temp_free_ptr(fpst);
3223     tcg_temp_free_i32(tcg_rmode);
3224     tcg_temp_free_i32(tmp);
3225     return true;
3226 }
3227
3228 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
3229 {
3230     TCGv_ptr fpst;
3231     TCGv_i64 tmp;
3232     TCGv_i32 tcg_rmode;
3233
3234     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3235         return false;
3236     }
3237
3238     if (!dc_isar_feature(aa32_vrint, s)) {
3239         return false;
3240     }
3241
3242     /* UNDEF accesses to D16-D31 if they don't exist. */
3243     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3244         return false;
3245     }
3246
3247     if (!vfp_access_check(s)) {
3248         return true;
3249     }
3250
3251     tmp = tcg_temp_new_i64();
3252     vfp_load_reg64(tmp, a->vm);
3253     fpst = fpstatus_ptr(FPST_FPCR);
3254     tcg_rmode = tcg_const_i32(float_round_to_zero);
3255     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3256     gen_helper_rintd(tmp, tmp, fpst);
3257     gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3258     vfp_store_reg64(tmp, a->vd);
3259     tcg_temp_free_ptr(fpst);
3260     tcg_temp_free_i64(tmp);
3261     tcg_temp_free_i32(tcg_rmode);
3262     return true;
3263 }
3264
3265 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3266 {
3267     TCGv_ptr fpst;
3268     TCGv_i32 tmp;
3269
3270     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3271         return false;
3272     }
3273
3274     if (!vfp_access_check(s)) {
3275         return true;
3276     }
3277
3278     tmp = tcg_temp_new_i32();
3279     vfp_load_reg32(tmp, a->vm);
3280     fpst = fpstatus_ptr(FPST_FPCR_F16);
3281     gen_helper_rinth_exact(tmp, tmp, fpst);
3282     vfp_store_reg32(tmp, a->vd);
3283     tcg_temp_free_ptr(fpst);
3284     tcg_temp_free_i32(tmp);
3285     return true;
3286 }
3287
3288 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3289 {
3290     TCGv_ptr fpst;
3291     TCGv_i32 tmp;
3292
3293     if (!dc_isar_feature(aa32_vrint, s)) {
3294         return false;
3295     }
3296
3297     if (!vfp_access_check(s)) {
3298         return true;
3299     }
3300
3301     tmp = tcg_temp_new_i32();
3302     vfp_load_reg32(tmp, a->vm);
3303     fpst = fpstatus_ptr(FPST_FPCR);
3304     gen_helper_rints_exact(tmp, tmp, fpst);
3305     vfp_store_reg32(tmp, a->vd);
3306     tcg_temp_free_ptr(fpst);
3307     tcg_temp_free_i32(tmp);
3308     return true;
3309 }
3310
3311 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3312 {
3313     TCGv_ptr fpst;
3314     TCGv_i64 tmp;
3315
3316     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3317         return false;
3318     }
3319
3320     if (!dc_isar_feature(aa32_vrint, s)) {
3321         return false;
3322     }
3323
3324     /* UNDEF accesses to D16-D31 if they don't exist. */
3325     if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3326         return false;
3327     }
3328
3329     if (!vfp_access_check(s)) {
3330         return true;
3331     }
3332
3333     tmp = tcg_temp_new_i64();
3334     vfp_load_reg64(tmp, a->vm);
3335     fpst = fpstatus_ptr(FPST_FPCR);
3336     gen_helper_rintd_exact(tmp, tmp, fpst);
3337     vfp_store_reg64(tmp, a->vd);
3338     tcg_temp_free_ptr(fpst);
3339     tcg_temp_free_i64(tmp);
3340     return true;
3341 }
3342
3343 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3344 {
3345     TCGv_i64 vd;
3346     TCGv_i32 vm;
3347
3348     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3349         return false;
3350     }
3351
3352     /* UNDEF accesses to D16-D31 if they don't exist. */
3353     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3354         return false;
3355     }
3356
3357     if (!vfp_access_check(s)) {
3358         return true;
3359     }
3360
3361     vm = tcg_temp_new_i32();
3362     vd = tcg_temp_new_i64();
3363     vfp_load_reg32(vm, a->vm);
3364     gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3365     vfp_store_reg64(vd, a->vd);
3366     tcg_temp_free_i32(vm);
3367     tcg_temp_free_i64(vd);
3368     return true;
3369 }
3370
3371 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3372 {
3373     TCGv_i64 vm;
3374     TCGv_i32 vd;
3375
3376     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3377         return false;
3378     }
3379
3380     /* UNDEF accesses to D16-D31 if they don't exist. */
3381     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3382         return false;
3383     }
3384
3385     if (!vfp_access_check(s)) {
3386         return true;
3387     }
3388
3389     vd = tcg_temp_new_i32();
3390     vm = tcg_temp_new_i64();
3391     vfp_load_reg64(vm, a->vm);
3392     gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3393     vfp_store_reg32(vd, a->vd);
3394     tcg_temp_free_i32(vd);
3395     tcg_temp_free_i64(vm);
3396     return true;
3397 }
3398
3399 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3400 {
3401     TCGv_i32 vm;
3402     TCGv_ptr fpst;
3403
3404     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3405         return false;
3406     }
3407
3408     if (!vfp_access_check(s)) {
3409         return true;
3410     }
3411
3412     vm = tcg_temp_new_i32();
3413     vfp_load_reg32(vm, a->vm);
3414     fpst = fpstatus_ptr(FPST_FPCR_F16);
3415     if (a->s) {
3416         /* i32 -> f16 */
3417         gen_helper_vfp_sitoh(vm, vm, fpst);
3418     } else {
3419         /* u32 -> f16 */
3420         gen_helper_vfp_uitoh(vm, vm, fpst);
3421     }
3422     vfp_store_reg32(vm, a->vd);
3423     tcg_temp_free_i32(vm);
3424     tcg_temp_free_ptr(fpst);
3425     return true;
3426 }
3427
3428 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3429 {
3430     TCGv_i32 vm;
3431     TCGv_ptr fpst;
3432
3433     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3434         return false;
3435     }
3436
3437     if (!vfp_access_check(s)) {
3438         return true;
3439     }
3440
3441     vm = tcg_temp_new_i32();
3442     vfp_load_reg32(vm, a->vm);
3443     fpst = fpstatus_ptr(FPST_FPCR);
3444     if (a->s) {
3445         /* i32 -> f32 */
3446         gen_helper_vfp_sitos(vm, vm, fpst);
3447     } else {
3448         /* u32 -> f32 */
3449         gen_helper_vfp_uitos(vm, vm, fpst);
3450     }
3451     vfp_store_reg32(vm, a->vd);
3452     tcg_temp_free_i32(vm);
3453     tcg_temp_free_ptr(fpst);
3454     return true;
3455 }
3456
3457 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3458 {
3459     TCGv_i32 vm;
3460     TCGv_i64 vd;
3461     TCGv_ptr fpst;
3462
3463     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3464         return false;
3465     }
3466
3467     /* UNDEF accesses to D16-D31 if they don't exist. */
3468     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3469         return false;
3470     }
3471
3472     if (!vfp_access_check(s)) {
3473         return true;
3474     }
3475
3476     vm = tcg_temp_new_i32();
3477     vd = tcg_temp_new_i64();
3478     vfp_load_reg32(vm, a->vm);
3479     fpst = fpstatus_ptr(FPST_FPCR);
3480     if (a->s) {
3481         /* i32 -> f64 */
3482         gen_helper_vfp_sitod(vd, vm, fpst);
3483     } else {
3484         /* u32 -> f64 */
3485         gen_helper_vfp_uitod(vd, vm, fpst);
3486     }
3487     vfp_store_reg64(vd, a->vd);
3488     tcg_temp_free_i32(vm);
3489     tcg_temp_free_i64(vd);
3490     tcg_temp_free_ptr(fpst);
3491     return true;
3492 }
3493
3494 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3495 {
3496     TCGv_i32 vd;
3497     TCGv_i64 vm;
3498
3499     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3500         return false;
3501     }
3502
3503     if (!dc_isar_feature(aa32_jscvt, s)) {
3504         return false;
3505     }
3506
3507     /* UNDEF accesses to D16-D31 if they don't exist. */
3508     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3509         return false;
3510     }
3511
3512     if (!vfp_access_check(s)) {
3513         return true;
3514     }
3515
3516     vm = tcg_temp_new_i64();
3517     vd = tcg_temp_new_i32();
3518     vfp_load_reg64(vm, a->vm);
3519     gen_helper_vjcvt(vd, vm, cpu_env);
3520     vfp_store_reg32(vd, a->vd);
3521     tcg_temp_free_i64(vm);
3522     tcg_temp_free_i32(vd);
3523     return true;
3524 }
3525
3526 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3527 {
3528     TCGv_i32 vd, shift;
3529     TCGv_ptr fpst;
3530     int frac_bits;
3531
3532     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3533         return false;
3534     }
3535
3536     if (!vfp_access_check(s)) {
3537         return true;
3538     }
3539
3540     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3541
3542     vd = tcg_temp_new_i32();
3543     vfp_load_reg32(vd, a->vd);
3544
3545     fpst = fpstatus_ptr(FPST_FPCR_F16);
3546     shift = tcg_const_i32(frac_bits);
3547
3548     /* Switch on op:U:sx bits */
3549     switch (a->opc) {
3550     case 0:
3551         gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3552         break;
3553     case 1:
3554         gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3555         break;
3556     case 2:
3557         gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3558         break;
3559     case 3:
3560         gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3561         break;
3562     case 4:
3563         gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3564         break;
3565     case 5:
3566         gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3567         break;
3568     case 6:
3569         gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3570         break;
3571     case 7:
3572         gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3573         break;
3574     default:
3575         g_assert_not_reached();
3576     }
3577
3578     vfp_store_reg32(vd, a->vd);
3579     tcg_temp_free_i32(vd);
3580     tcg_temp_free_i32(shift);
3581     tcg_temp_free_ptr(fpst);
3582     return true;
3583 }
3584
3585 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3586 {
3587     TCGv_i32 vd, shift;
3588     TCGv_ptr fpst;
3589     int frac_bits;
3590
3591     if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3592         return false;
3593     }
3594
3595     if (!vfp_access_check(s)) {
3596         return true;
3597     }
3598
3599     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3600
3601     vd = tcg_temp_new_i32();
3602     vfp_load_reg32(vd, a->vd);
3603
3604     fpst = fpstatus_ptr(FPST_FPCR);
3605     shift = tcg_const_i32(frac_bits);
3606
3607     /* Switch on op:U:sx bits */
3608     switch (a->opc) {
3609     case 0:
3610         gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3611         break;
3612     case 1:
3613         gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3614         break;
3615     case 2:
3616         gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3617         break;
3618     case 3:
3619         gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3620         break;
3621     case 4:
3622         gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3623         break;
3624     case 5:
3625         gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3626         break;
3627     case 6:
3628         gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3629         break;
3630     case 7:
3631         gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3632         break;
3633     default:
3634         g_assert_not_reached();
3635     }
3636
3637     vfp_store_reg32(vd, a->vd);
3638     tcg_temp_free_i32(vd);
3639     tcg_temp_free_i32(shift);
3640     tcg_temp_free_ptr(fpst);
3641     return true;
3642 }
3643
3644 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3645 {
3646     TCGv_i64 vd;
3647     TCGv_i32 shift;
3648     TCGv_ptr fpst;
3649     int frac_bits;
3650
3651     if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3652         return false;
3653     }
3654
3655     /* UNDEF accesses to D16-D31 if they don't exist. */
3656     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3657         return false;
3658     }
3659
3660     if (!vfp_access_check(s)) {
3661         return true;
3662     }
3663
3664     frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3665
3666     vd = tcg_temp_new_i64();
3667     vfp_load_reg64(vd, a->vd);
3668
3669     fpst = fpstatus_ptr(FPST_FPCR);
3670     shift = tcg_const_i32(frac_bits);
3671
3672     /* Switch on op:U:sx bits */
3673     switch (a->opc) {
3674     case 0:
3675         gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3676         break;
3677     case 1:
3678         gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3679         break;
3680     case 2:
3681         gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3682         break;
3683     case 3:
3684         gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3685         break;
3686     case 4:
3687         gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3688         break;
3689     case 5:
3690         gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3691         break;
3692     case 6:
3693         gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3694         break;
3695     case 7:
3696         gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3697         break;
3698     default:
3699         g_assert_not_reached();
3700     }
3701
3702     vfp_store_reg64(vd, a->vd);
3703     tcg_temp_free_i64(vd);
3704     tcg_temp_free_i32(shift);
3705     tcg_temp_free_ptr(fpst);
3706     return true;
3707 }
3708
3709 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3710 {
3711     TCGv_i32 vm;
3712     TCGv_ptr fpst;
3713
3714     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3715         return false;
3716     }
3717
3718     if (!vfp_access_check(s)) {
3719         return true;
3720     }
3721
3722     fpst = fpstatus_ptr(FPST_FPCR_F16);
3723     vm = tcg_temp_new_i32();
3724     vfp_load_reg32(vm, a->vm);
3725
3726     if (a->s) {
3727         if (a->rz) {
3728             gen_helper_vfp_tosizh(vm, vm, fpst);
3729         } else {
3730             gen_helper_vfp_tosih(vm, vm, fpst);
3731         }
3732     } else {
3733         if (a->rz) {
3734             gen_helper_vfp_touizh(vm, vm, fpst);
3735         } else {
3736             gen_helper_vfp_touih(vm, vm, fpst);
3737         }
3738     }
3739     vfp_store_reg32(vm, a->vd);
3740     tcg_temp_free_i32(vm);
3741     tcg_temp_free_ptr(fpst);
3742     return true;
3743 }
3744
3745 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3746 {
3747     TCGv_i32 vm;
3748     TCGv_ptr fpst;
3749
3750     if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3751         return false;
3752     }
3753
3754     if (!vfp_access_check(s)) {
3755         return true;
3756     }
3757
3758     fpst = fpstatus_ptr(FPST_FPCR);
3759     vm = tcg_temp_new_i32();
3760     vfp_load_reg32(vm, a->vm);
3761
3762     if (a->s) {
3763         if (a->rz) {
3764             gen_helper_vfp_tosizs(vm, vm, fpst);
3765         } else {
3766             gen_helper_vfp_tosis(vm, vm, fpst);
3767         }
3768     } else {
3769         if (a->rz) {
3770             gen_helper_vfp_touizs(vm, vm, fpst);
3771         } else {
3772             gen_helper_vfp_touis(vm, vm, fpst);
3773         }
3774     }
3775     vfp_store_reg32(vm, a->vd);
3776     tcg_temp_free_i32(vm);
3777     tcg_temp_free_ptr(fpst);
3778     return true;
3779 }
3780
3781 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3782 {
3783     TCGv_i32 vd;
3784     TCGv_i64 vm;
3785     TCGv_ptr fpst;
3786
3787     if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3788         return false;
3789     }
3790
3791     /* UNDEF accesses to D16-D31 if they don't exist. */
3792     if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3793         return false;
3794     }
3795
3796     if (!vfp_access_check(s)) {
3797         return true;
3798     }
3799
3800     fpst = fpstatus_ptr(FPST_FPCR);
3801     vm = tcg_temp_new_i64();
3802     vd = tcg_temp_new_i32();
3803     vfp_load_reg64(vm, a->vm);
3804
3805     if (a->s) {
3806         if (a->rz) {
3807             gen_helper_vfp_tosizd(vd, vm, fpst);
3808         } else {
3809             gen_helper_vfp_tosid(vd, vm, fpst);
3810         }
3811     } else {
3812         if (a->rz) {
3813             gen_helper_vfp_touizd(vd, vm, fpst);
3814         } else {
3815             gen_helper_vfp_touid(vd, vm, fpst);
3816         }
3817     }
3818     vfp_store_reg32(vd, a->vd);
3819     tcg_temp_free_i32(vd);
3820     tcg_temp_free_i64(vm);
3821     tcg_temp_free_ptr(fpst);
3822     return true;
3823 }
3824
3825 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3826 {
3827     TCGv_i32 rd, rm;
3828
3829     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3830         return false;
3831     }
3832
3833     if (s->vec_len != 0 || s->vec_stride != 0) {
3834         return false;
3835     }
3836
3837     if (!vfp_access_check(s)) {
3838         return true;
3839     }
3840
3841     /* Insert low half of Vm into high half of Vd */
3842     rm = tcg_temp_new_i32();
3843     rd = tcg_temp_new_i32();
3844     vfp_load_reg32(rm, a->vm);
3845     vfp_load_reg32(rd, a->vd);
3846     tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3847     vfp_store_reg32(rd, a->vd);
3848     tcg_temp_free_i32(rm);
3849     tcg_temp_free_i32(rd);
3850     return true;
3851 }
3852
3853 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3854 {
3855     TCGv_i32 rm;
3856
3857     if (!dc_isar_feature(aa32_fp16_arith, s)) {
3858         return false;
3859     }
3860
3861     if (s->vec_len != 0 || s->vec_stride != 0) {
3862         return false;
3863     }
3864
3865     if (!vfp_access_check(s)) {
3866         return true;
3867     }
3868
3869     /* Set Vd to high half of Vm */
3870     rm = tcg_temp_new_i32();
3871     vfp_load_reg32(rm, a->vm);
3872     tcg_gen_shri_i32(rm, rm, 16);
3873     vfp_store_reg32(rm, a->vd);
3874     tcg_temp_free_i32(rm);
3875     return true;
3876 }