2 * ARM translation: AArch32 VFP instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2019 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 * This file is intended to be included from translate.c; it uses
25 * some macros and definitions provided by that file.
26 * It might be possible to convert it to a standalone .c file eventually.
29 /* Include the generated VFP decoder */
30 #include "decode-vfp.c.inc"
31 #include "decode-vfp-uncond.c.inc"
34 * The imm8 encodes the sign bit, enough bits to represent an exponent in
35 * the range 01....1xx to 10....0xx, and the most significant 4 bits of
36 * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
38 uint64_t vfp_expand_imm(int size, uint8_t imm8)
44 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
45 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
46 extract32(imm8, 0, 6);
50 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
51 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
52 (extract32(imm8, 0, 6) << 3);
56 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
57 (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
58 (extract32(imm8, 0, 6) << 6);
61 g_assert_not_reached();
67 * Return the offset of a 16-bit half of the specified VFP single-precision
68 * register. If top is true, returns the top 16 bits; otherwise the bottom
71 static inline long vfp_f16_offset(unsigned reg, bool top)
73 long offs = vfp_reg_offset(false, reg);
74 #ifdef HOST_WORDS_BIGENDIAN
87 * Generate code for M-profile lazy FP state preservation if needed;
88 * this corresponds to the pseudocode PreserveFPState() function.
90 static void gen_preserve_fp_state(DisasContext *s)
94 * Lazy state saving affects external memory and also the NVIC,
95 * so we must mark it as an IO operation for icount (and cause
96 * this to be the last insn in the TB).
98 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
99 s->base.is_jmp = DISAS_UPDATE_EXIT;
102 gen_helper_v7m_preserve_fp_state(cpu_env);
104 * If the preserve_fp_state helper doesn't throw an exception
105 * then it will clear LSPACT; we don't need to repeat this for
106 * any further FP insns in this TB.
108 s->v7m_lspact = false;
113 * Check that VFP access is enabled. If it is, do the necessary
114 * M-profile lazy-FP handling and then return true.
115 * If not, emit code to generate an appropriate exception and
117 * The ignore_vfp_enabled argument specifies that we should ignore
118 * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
119 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
121 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
124 /* M-profile handled this earlier, in disas_m_nocp() */
125 assert (!arm_dc_feature(s, ARM_FEATURE_M));
126 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
127 syn_fp_access_trap(1, 0xe, false),
132 if (!s->vfp_enabled && !ignore_vfp_enabled) {
133 assert(!arm_dc_feature(s, ARM_FEATURE_M));
134 unallocated_encoding(s);
138 if (arm_dc_feature(s, ARM_FEATURE_M)) {
139 /* Handle M-profile lazy FP state mechanics */
141 /* Trigger lazy-state preservation if necessary */
142 gen_preserve_fp_state(s);
144 /* Update ownership of FP context: set FPCCR.S to match current state */
145 if (s->v8m_fpccr_s_wrong) {
148 tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
150 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
152 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
154 store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
155 /* Don't need to do this for any further FP insns in this TB */
156 s->v8m_fpccr_s_wrong = false;
159 if (s->v7m_new_fp_ctxt_needed) {
161 * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
164 TCGv_i32 control, fpscr;
165 uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
167 fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
168 gen_helper_vfp_set_fpscr(cpu_env, fpscr);
169 tcg_temp_free_i32(fpscr);
171 * We don't need to arrange to end the TB, because the only
172 * parts of FPSCR which we cache in the TB flags are the VECLEN
173 * and VECSTRIDE, and those don't exist for M-profile.
177 bits |= R_V7M_CONTROL_SFPA_MASK;
179 control = load_cpu_field(v7m.control[M_REG_S]);
180 tcg_gen_ori_i32(control, control, bits);
181 store_cpu_field(control, v7m.control[M_REG_S]);
182 /* Don't need to do this for any further FP insns in this TB */
183 s->v7m_new_fp_ctxt_needed = false;
191 * The most usual kind of VFP access check, for everything except
192 * FMXR/FMRX to the always-available special registers.
194 static bool vfp_access_check(DisasContext *s)
196 return full_vfp_access_check(s, false);
199 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
204 if (!dc_isar_feature(aa32_vsel, s)) {
208 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
212 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
216 /* UNDEF accesses to D16-D31 if they don't exist */
217 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
218 ((a->vm | a->vn | a->vd) & 0x10)) {
226 if (!vfp_access_check(s)) {
231 TCGv_i64 frn, frm, dest;
232 TCGv_i64 tmp, zero, zf, nf, vf;
234 zero = tcg_const_i64(0);
236 frn = tcg_temp_new_i64();
237 frm = tcg_temp_new_i64();
238 dest = tcg_temp_new_i64();
240 zf = tcg_temp_new_i64();
241 nf = tcg_temp_new_i64();
242 vf = tcg_temp_new_i64();
244 tcg_gen_extu_i32_i64(zf, cpu_ZF);
245 tcg_gen_ext_i32_i64(nf, cpu_NF);
246 tcg_gen_ext_i32_i64(vf, cpu_VF);
248 vfp_load_reg64(frn, rn);
249 vfp_load_reg64(frm, rm);
252 tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
256 tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
259 case 2: /* ge: N == V -> N ^ V == 0 */
260 tmp = tcg_temp_new_i64();
261 tcg_gen_xor_i64(tmp, vf, nf);
262 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
264 tcg_temp_free_i64(tmp);
266 case 3: /* gt: !Z && N == V */
267 tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
269 tmp = tcg_temp_new_i64();
270 tcg_gen_xor_i64(tmp, vf, nf);
271 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
273 tcg_temp_free_i64(tmp);
276 vfp_store_reg64(dest, rd);
277 tcg_temp_free_i64(frn);
278 tcg_temp_free_i64(frm);
279 tcg_temp_free_i64(dest);
281 tcg_temp_free_i64(zf);
282 tcg_temp_free_i64(nf);
283 tcg_temp_free_i64(vf);
285 tcg_temp_free_i64(zero);
287 TCGv_i32 frn, frm, dest;
290 zero = tcg_const_i32(0);
292 frn = tcg_temp_new_i32();
293 frm = tcg_temp_new_i32();
294 dest = tcg_temp_new_i32();
295 vfp_load_reg32(frn, rn);
296 vfp_load_reg32(frm, rm);
299 tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
303 tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
306 case 2: /* ge: N == V -> N ^ V == 0 */
307 tmp = tcg_temp_new_i32();
308 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
309 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
311 tcg_temp_free_i32(tmp);
313 case 3: /* gt: !Z && N == V */
314 tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
316 tmp = tcg_temp_new_i32();
317 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
318 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
320 tcg_temp_free_i32(tmp);
323 /* For fp16 the top half is always zeroes */
325 tcg_gen_andi_i32(dest, dest, 0xffff);
327 vfp_store_reg32(dest, rd);
328 tcg_temp_free_i32(frn);
329 tcg_temp_free_i32(frm);
330 tcg_temp_free_i32(dest);
332 tcg_temp_free_i32(zero);
339 * Table for converting the most common AArch32 encoding of
340 * rounding mode to arm_fprounding order (which matches the
341 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
343 static const uint8_t fp_decode_rm[] = {
350 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
356 int rounding = fp_decode_rm[a->rm];
358 if (!dc_isar_feature(aa32_vrint, s)) {
362 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
366 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
370 /* UNDEF accesses to D16-D31 if they don't exist */
371 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
372 ((a->vm | a->vd) & 0x10)) {
379 if (!vfp_access_check(s)) {
384 fpst = fpstatus_ptr(FPST_FPCR_F16);
386 fpst = fpstatus_ptr(FPST_FPCR);
389 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
390 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
395 tcg_op = tcg_temp_new_i64();
396 tcg_res = tcg_temp_new_i64();
397 vfp_load_reg64(tcg_op, rm);
398 gen_helper_rintd(tcg_res, tcg_op, fpst);
399 vfp_store_reg64(tcg_res, rd);
400 tcg_temp_free_i64(tcg_op);
401 tcg_temp_free_i64(tcg_res);
405 tcg_op = tcg_temp_new_i32();
406 tcg_res = tcg_temp_new_i32();
407 vfp_load_reg32(tcg_op, rm);
409 gen_helper_rinth(tcg_res, tcg_op, fpst);
411 gen_helper_rints(tcg_res, tcg_op, fpst);
413 vfp_store_reg32(tcg_res, rd);
414 tcg_temp_free_i32(tcg_op);
415 tcg_temp_free_i32(tcg_res);
418 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
419 tcg_temp_free_i32(tcg_rmode);
421 tcg_temp_free_ptr(fpst);
425 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
430 TCGv_i32 tcg_rmode, tcg_shift;
431 int rounding = fp_decode_rm[a->rm];
432 bool is_signed = a->op;
434 if (!dc_isar_feature(aa32_vcvt_dr, s)) {
438 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
442 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
446 /* UNDEF accesses to D16-D31 if they don't exist */
447 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
454 if (!vfp_access_check(s)) {
459 fpst = fpstatus_ptr(FPST_FPCR_F16);
461 fpst = fpstatus_ptr(FPST_FPCR);
464 tcg_shift = tcg_const_i32(0);
466 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
467 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
470 TCGv_i64 tcg_double, tcg_res;
472 tcg_double = tcg_temp_new_i64();
473 tcg_res = tcg_temp_new_i64();
474 tcg_tmp = tcg_temp_new_i32();
475 vfp_load_reg64(tcg_double, rm);
477 gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
479 gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
481 tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
482 vfp_store_reg32(tcg_tmp, rd);
483 tcg_temp_free_i32(tcg_tmp);
484 tcg_temp_free_i64(tcg_res);
485 tcg_temp_free_i64(tcg_double);
487 TCGv_i32 tcg_single, tcg_res;
488 tcg_single = tcg_temp_new_i32();
489 tcg_res = tcg_temp_new_i32();
490 vfp_load_reg32(tcg_single, rm);
493 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
495 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
499 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
501 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
504 vfp_store_reg32(tcg_res, rd);
505 tcg_temp_free_i32(tcg_res);
506 tcg_temp_free_i32(tcg_single);
509 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
510 tcg_temp_free_i32(tcg_rmode);
512 tcg_temp_free_i32(tcg_shift);
514 tcg_temp_free_ptr(fpst);
519 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
521 /* VMOV scalar to general purpose register */
524 /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
526 ? !dc_isar_feature(aa32_fpsp_v2, s)
527 : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
531 /* UNDEF accesses to D16-D31 if they don't exist */
532 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
536 if (!vfp_access_check(s)) {
540 tmp = tcg_temp_new_i32();
541 read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
542 store_reg(s, a->rt, tmp);
547 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
549 /* VMOV general purpose register to scalar */
552 /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
554 ? !dc_isar_feature(aa32_fpsp_v2, s)
555 : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
559 /* UNDEF accesses to D16-D31 if they don't exist */
560 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
564 if (!vfp_access_check(s)) {
568 tmp = load_reg(s, a->rt);
569 write_neon_element32(tmp, a->vn, a->index, a->size);
570 tcg_temp_free_i32(tmp);
575 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
577 /* VDUP (general purpose register) */
581 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
585 /* UNDEF accesses to D16-D31 if they don't exist */
586 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
594 if (a->q && (a->vn & 1)) {
598 vec_size = a->q ? 16 : 8;
607 if (!vfp_access_check(s)) {
611 tmp = load_reg(s, a->rt);
612 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
613 vec_size, vec_size, tmp);
614 tcg_temp_free_i32(tmp);
620 * M-profile provides two different sets of instructions that can
621 * access floating point system registers: VMSR/VMRS (which move
622 * to/from a general purpose register) and VLDR/VSTR sysreg (which
623 * move directly to/from memory). In some cases there are also side
624 * effects which must happen after any write to memory (which could
625 * cause an exception). So we implement the common logic for the
626 * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
627 * which take pointers to callback functions which will perform the
628 * actual "read/write general purpose register" and "read/write
629 * memory" operations.
633 * Emit code to store the sysreg to its final destination; frees the
634 * TCG temp 'value' it is passed.
636 typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
638 * Emit code to load the value to be copied to the sysreg; returns
639 * a new TCG temporary
641 typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
643 /* Common decode/access checks for fp sysreg read/write */
644 typedef enum FPSysRegCheckResult {
645 FPSysRegCheckFailed, /* caller should return false */
646 FPSysRegCheckDone, /* caller should return true */
647 FPSysRegCheckContinue, /* caller should continue generating code */
648 } FPSysRegCheckResult;
650 static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
652 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
653 return FPSysRegCheckFailed;
658 case QEMU_VFP_FPSCR_NZCV:
660 case ARM_VFP_FPSCR_NZCVQC:
661 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
665 case ARM_VFP_FPCXT_S:
666 case ARM_VFP_FPCXT_NS:
667 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
670 if (!s->v8m_secure) {
675 return FPSysRegCheckFailed;
679 * FPCXT_NS is a special case: it has specific handling for
680 * "current FP state is inactive", and must do the PreserveFPState()
681 * but not the usual full set of actions done by ExecuteFPCheck().
682 * So we don't call vfp_access_check() and the callers must handle this.
684 if (regno != ARM_VFP_FPCXT_NS && !vfp_access_check(s)) {
685 return FPSysRegCheckDone;
687 return FPSysRegCheckContinue;
690 static void gen_branch_fpInactive(DisasContext *s, TCGCond cond,
694 * FPCXT_NS is a special case: it has specific handling for
695 * "current FP state is inactive", and must do the PreserveFPState()
696 * but not the usual full set of actions done by ExecuteFPCheck().
697 * We don't have a TB flag that matches the fpInactive check, so we
698 * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
700 * Emit code that checks fpInactive and does a conditional
701 * branch to label based on it:
702 * if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
703 * if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
705 assert(cond == TCG_COND_EQ || cond == TCG_COND_NE);
707 /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
708 TCGv_i32 aspen, fpca;
709 aspen = load_cpu_field(v7m.fpccr[M_REG_NS]);
710 fpca = load_cpu_field(v7m.control[M_REG_S]);
711 tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
712 tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
713 tcg_gen_andi_i32(fpca, fpca, R_V7M_CONTROL_FPCA_MASK);
714 tcg_gen_or_i32(fpca, fpca, aspen);
715 tcg_gen_brcondi_i32(tcg_invert_cond(cond), fpca, 0, label);
716 tcg_temp_free_i32(aspen);
717 tcg_temp_free_i32(fpca);
720 static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
722 fp_sysreg_loadfn *loadfn,
725 /* Do a write to an M-profile floating point system register */
727 TCGLabel *lab_end = NULL;
729 switch (fp_sysreg_checks(s, regno)) {
730 case FPSysRegCheckFailed:
732 case FPSysRegCheckDone:
734 case FPSysRegCheckContinue:
740 tmp = loadfn(s, opaque);
741 gen_helper_vfp_set_fpscr(cpu_env, tmp);
742 tcg_temp_free_i32(tmp);
745 case ARM_VFP_FPSCR_NZCVQC:
748 tmp = loadfn(s, opaque);
750 * TODO: when we implement MVE, write the QC bit.
751 * For non-MVE, QC is RES0.
753 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
754 fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
755 tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
756 tcg_gen_or_i32(fpscr, fpscr, tmp);
757 store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
758 tcg_temp_free_i32(tmp);
761 case ARM_VFP_FPCXT_NS:
762 lab_end = gen_new_label();
763 /* fpInactive case: write is a NOP, so branch to end */
764 gen_branch_fpInactive(s, TCG_COND_NE, lab_end);
765 /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
766 gen_preserve_fp_state(s);
768 case ARM_VFP_FPCXT_S:
770 TCGv_i32 sfpa, control;
772 * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
773 * bits [27:0] from value and zeroes bits [31:28].
775 tmp = loadfn(s, opaque);
776 sfpa = tcg_temp_new_i32();
777 tcg_gen_shri_i32(sfpa, tmp, 31);
778 control = load_cpu_field(v7m.control[M_REG_S]);
779 tcg_gen_deposit_i32(control, control, sfpa,
780 R_V7M_CONTROL_SFPA_SHIFT, 1);
781 store_cpu_field(control, v7m.control[M_REG_S]);
782 tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
783 gen_helper_vfp_set_fpscr(cpu_env, tmp);
784 tcg_temp_free_i32(tmp);
785 tcg_temp_free_i32(sfpa);
789 g_assert_not_reached();
792 gen_set_label(lab_end);
797 static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
798 fp_sysreg_storefn *storefn,
801 /* Do a read from an M-profile floating point system register */
803 TCGLabel *lab_end = NULL;
804 bool lookup_tb = false;
806 switch (fp_sysreg_checks(s, regno)) {
807 case FPSysRegCheckFailed:
809 case FPSysRegCheckDone:
811 case FPSysRegCheckContinue:
817 tmp = tcg_temp_new_i32();
818 gen_helper_vfp_get_fpscr(tmp, cpu_env);
819 storefn(s, opaque, tmp);
821 case ARM_VFP_FPSCR_NZCVQC:
823 * TODO: MVE has a QC bit, which we probably won't store
824 * in the xregs[] field. For non-MVE, where QC is RES0,
825 * we can just fall through to the FPSCR_NZCV case.
827 case QEMU_VFP_FPSCR_NZCV:
829 * Read just NZCV; this is a special case to avoid the
830 * helper call for the "VMRS to CPSR.NZCV" insn.
832 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
833 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
834 storefn(s, opaque, tmp);
836 case ARM_VFP_FPCXT_S:
838 TCGv_i32 control, sfpa, fpscr;
839 /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
840 tmp = tcg_temp_new_i32();
841 sfpa = tcg_temp_new_i32();
842 gen_helper_vfp_get_fpscr(tmp, cpu_env);
843 tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
844 control = load_cpu_field(v7m.control[M_REG_S]);
845 tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
846 tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
847 tcg_gen_or_i32(tmp, tmp, sfpa);
848 tcg_temp_free_i32(sfpa);
850 * Store result before updating FPSCR etc, in case
851 * it is a memory write which causes an exception.
853 storefn(s, opaque, tmp);
855 * Now we must reset FPSCR from FPDSCR_NS, and clear
856 * CONTROL.SFPA; so we'll end the TB here.
858 tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
859 store_cpu_field(control, v7m.control[M_REG_S]);
860 fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
861 gen_helper_vfp_set_fpscr(cpu_env, fpscr);
862 tcg_temp_free_i32(fpscr);
866 case ARM_VFP_FPCXT_NS:
868 TCGv_i32 control, sfpa, fpscr, fpdscr, zero;
869 TCGLabel *lab_active = gen_new_label();
873 gen_branch_fpInactive(s, TCG_COND_EQ, lab_active);
874 /* fpInactive case: reads as FPDSCR_NS */
875 TCGv_i32 tmp = load_cpu_field(v7m.fpdscr[M_REG_NS]);
876 storefn(s, opaque, tmp);
877 lab_end = gen_new_label();
880 gen_set_label(lab_active);
881 /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
882 gen_preserve_fp_state(s);
883 tmp = tcg_temp_new_i32();
884 sfpa = tcg_temp_new_i32();
885 fpscr = tcg_temp_new_i32();
886 gen_helper_vfp_get_fpscr(fpscr, cpu_env);
887 tcg_gen_andi_i32(tmp, fpscr, ~FPCR_NZCV_MASK);
888 control = load_cpu_field(v7m.control[M_REG_S]);
889 tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
890 tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
891 tcg_gen_or_i32(tmp, tmp, sfpa);
892 tcg_temp_free_i32(control);
893 /* Store result before updating FPSCR, in case it faults */
894 storefn(s, opaque, tmp);
895 /* If SFPA is zero then set FPSCR from FPDSCR_NS */
896 fpdscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
897 zero = tcg_const_i32(0);
898 tcg_gen_movcond_i32(TCG_COND_EQ, fpscr, sfpa, zero, fpdscr, fpscr);
899 gen_helper_vfp_set_fpscr(cpu_env, fpscr);
900 tcg_temp_free_i32(zero);
901 tcg_temp_free_i32(sfpa);
902 tcg_temp_free_i32(fpdscr);
903 tcg_temp_free_i32(fpscr);
907 g_assert_not_reached();
911 gen_set_label(lab_end);
919 static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
921 arg_VMSR_VMRS *a = opaque;
924 /* Set the 4 flag bits in the CPSR */
926 tcg_temp_free_i32(value);
928 store_reg(s, a->rt, value);
932 static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
934 arg_VMSR_VMRS *a = opaque;
936 return load_reg(s, a->rt);
939 static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
942 * Accesses to R15 are UNPREDICTABLE; we choose to undef.
943 * FPSCR -> r15 is a special case which writes to the PSR flags;
944 * set a->reg to a special value to tell gen_M_fp_sysreg_read()
945 * we only care about the top 4 bits of FPSCR there.
948 if (a->l && a->reg == ARM_VFP_FPSCR) {
949 a->reg = QEMU_VFP_FPSCR_NZCV;
956 /* VMRS, move FP system register to gp register */
957 return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
959 /* VMSR, move gp register to FP system register */
960 return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
964 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
967 bool ignore_vfp_enabled = false;
969 if (arm_dc_feature(s, ARM_FEATURE_M)) {
970 return gen_M_VMSR_VMRS(s, a);
973 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
980 * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
981 * all ID registers to privileged access only.
983 if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
986 ignore_vfp_enabled = true;
990 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
993 ignore_vfp_enabled = true;
996 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
999 ignore_vfp_enabled = true;
1007 ignore_vfp_enabled = true;
1009 case ARM_VFP_FPINST:
1010 case ARM_VFP_FPINST2:
1011 /* Not present in VFPv3 */
1012 if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
1020 if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
1025 /* VMRS, move VFP special register to gp register */
1031 if (s->current_el == 1) {
1032 TCGv_i32 tcg_reg, tcg_rt;
1034 gen_set_condexec(s);
1035 gen_set_pc_im(s, s->pc_curr);
1036 tcg_reg = tcg_const_i32(a->reg);
1037 tcg_rt = tcg_const_i32(a->rt);
1038 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
1039 tcg_temp_free_i32(tcg_reg);
1040 tcg_temp_free_i32(tcg_rt);
1044 case ARM_VFP_FPINST:
1045 case ARM_VFP_FPINST2:
1046 tmp = load_cpu_field(vfp.xregs[a->reg]);
1050 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
1051 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
1053 tmp = tcg_temp_new_i32();
1054 gen_helper_vfp_get_fpscr(tmp, cpu_env);
1058 g_assert_not_reached();
1062 /* Set the 4 flag bits in the CPSR. */
1064 tcg_temp_free_i32(tmp);
1066 store_reg(s, a->rt, tmp);
1069 /* VMSR, move gp register to VFP special register */
1075 /* Writes are ignored. */
1078 tmp = load_reg(s, a->rt);
1079 gen_helper_vfp_set_fpscr(cpu_env, tmp);
1080 tcg_temp_free_i32(tmp);
1085 * TODO: VFP subarchitecture support.
1086 * For now, keep the EN bit only
1088 tmp = load_reg(s, a->rt);
1089 tcg_gen_andi_i32(tmp, tmp, 1 << 30);
1090 store_cpu_field(tmp, vfp.xregs[a->reg]);
1093 case ARM_VFP_FPINST:
1094 case ARM_VFP_FPINST2:
1095 tmp = load_reg(s, a->rt);
1096 store_cpu_field(tmp, vfp.xregs[a->reg]);
1099 g_assert_not_reached();
1106 static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
1108 arg_vldr_sysreg *a = opaque;
1109 uint32_t offset = a->imm;
1116 addr = load_reg(s, a->rn);
1118 tcg_gen_addi_i32(addr, addr, offset);
1121 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1122 gen_helper_v8m_stackcheck(cpu_env, addr);
1125 gen_aa32_st_i32(s, value, addr, get_mem_index(s),
1126 MO_UL | MO_ALIGN | s->be_data);
1127 tcg_temp_free_i32(value);
1132 tcg_gen_addi_i32(addr, addr, offset);
1134 store_reg(s, a->rn, addr);
1136 tcg_temp_free_i32(addr);
1140 static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1142 arg_vldr_sysreg *a = opaque;
1143 uint32_t offset = a->imm;
1145 TCGv_i32 value = tcg_temp_new_i32();
1151 addr = load_reg(s, a->rn);
1153 tcg_gen_addi_i32(addr, addr, offset);
1156 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1157 gen_helper_v8m_stackcheck(cpu_env, addr);
1160 gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1161 MO_UL | MO_ALIGN | s->be_data);
1166 tcg_gen_addi_i32(addr, addr, offset);
1168 store_reg(s, a->rn, addr);
1170 tcg_temp_free_i32(addr);
1175 static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1177 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1183 return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1186 static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1188 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1194 return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1197 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1201 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1206 /* UNPREDICTABLE; we choose to UNDEF */
1210 if (!vfp_access_check(s)) {
1215 /* VFP to general purpose register */
1216 tmp = tcg_temp_new_i32();
1217 vfp_load_reg32(tmp, a->vn);
1218 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1219 store_reg(s, a->rt, tmp);
1221 /* general purpose register to VFP */
1222 tmp = load_reg(s, a->rt);
1223 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1224 vfp_store_reg32(tmp, a->vn);
1225 tcg_temp_free_i32(tmp);
1231 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
1235 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1239 if (!vfp_access_check(s)) {
1244 /* VFP to general purpose register */
1245 tmp = tcg_temp_new_i32();
1246 vfp_load_reg32(tmp, a->vn);
1248 /* Set the 4 flag bits in the CPSR. */
1250 tcg_temp_free_i32(tmp);
1252 store_reg(s, a->rt, tmp);
1255 /* general purpose register to VFP */
1256 tmp = load_reg(s, a->rt);
1257 vfp_store_reg32(tmp, a->vn);
1258 tcg_temp_free_i32(tmp);
1264 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
1268 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1273 * VMOV between two general-purpose registers and two single precision
1274 * floating point registers
1276 if (!vfp_access_check(s)) {
1281 /* fpreg to gpreg */
1282 tmp = tcg_temp_new_i32();
1283 vfp_load_reg32(tmp, a->vm);
1284 store_reg(s, a->rt, tmp);
1285 tmp = tcg_temp_new_i32();
1286 vfp_load_reg32(tmp, a->vm + 1);
1287 store_reg(s, a->rt2, tmp);
1289 /* gpreg to fpreg */
1290 tmp = load_reg(s, a->rt);
1291 vfp_store_reg32(tmp, a->vm);
1292 tcg_temp_free_i32(tmp);
1293 tmp = load_reg(s, a->rt2);
1294 vfp_store_reg32(tmp, a->vm + 1);
1295 tcg_temp_free_i32(tmp);
1301 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1306 * VMOV between two general-purpose registers and one double precision
1307 * floating point register. Note that this does not require support
1308 * for double precision arithmetic.
1310 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1314 /* UNDEF accesses to D16-D31 if they don't exist */
1315 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1319 if (!vfp_access_check(s)) {
1324 /* fpreg to gpreg */
1325 tmp = tcg_temp_new_i32();
1326 vfp_load_reg32(tmp, a->vm * 2);
1327 store_reg(s, a->rt, tmp);
1328 tmp = tcg_temp_new_i32();
1329 vfp_load_reg32(tmp, a->vm * 2 + 1);
1330 store_reg(s, a->rt2, tmp);
1332 /* gpreg to fpreg */
1333 tmp = load_reg(s, a->rt);
1334 vfp_store_reg32(tmp, a->vm * 2);
1335 tcg_temp_free_i32(tmp);
1336 tmp = load_reg(s, a->rt2);
1337 vfp_store_reg32(tmp, a->vm * 2 + 1);
1338 tcg_temp_free_i32(tmp);
1344 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1349 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1353 if (!vfp_access_check(s)) {
1357 /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1358 offset = a->imm << 1;
1363 /* For thumb, use of PC is UNPREDICTABLE. */
1364 addr = add_reg_for_lit(s, a->rn, offset);
1365 tmp = tcg_temp_new_i32();
1367 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1368 vfp_store_reg32(tmp, a->vd);
1370 vfp_load_reg32(tmp, a->vd);
1371 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1373 tcg_temp_free_i32(tmp);
1374 tcg_temp_free_i32(addr);
1379 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1384 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1388 if (!vfp_access_check(s)) {
1392 offset = a->imm << 2;
1397 /* For thumb, use of PC is UNPREDICTABLE. */
1398 addr = add_reg_for_lit(s, a->rn, offset);
1399 tmp = tcg_temp_new_i32();
1401 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1402 vfp_store_reg32(tmp, a->vd);
1404 vfp_load_reg32(tmp, a->vd);
1405 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1407 tcg_temp_free_i32(tmp);
1408 tcg_temp_free_i32(addr);
1413 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1419 /* Note that this does not require support for double arithmetic. */
1420 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1424 /* UNDEF accesses to D16-D31 if they don't exist */
1425 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1429 if (!vfp_access_check(s)) {
1433 offset = a->imm << 2;
1438 /* For thumb, use of PC is UNPREDICTABLE. */
1439 addr = add_reg_for_lit(s, a->rn, offset);
1440 tmp = tcg_temp_new_i64();
1442 gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1443 vfp_store_reg64(tmp, a->vd);
1445 vfp_load_reg64(tmp, a->vd);
1446 gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1448 tcg_temp_free_i64(tmp);
1449 tcg_temp_free_i32(addr);
1454 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1460 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1466 if (n == 0 || (a->vd + n) > 32) {
1468 * UNPREDICTABLE cases for bad immediates: we choose to
1469 * UNDEF to avoid generating huge numbers of TCG ops
1473 if (a->rn == 15 && a->w) {
1474 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1478 if (!vfp_access_check(s)) {
1482 /* For thumb, use of PC is UNPREDICTABLE. */
1483 addr = add_reg_for_lit(s, a->rn, 0);
1486 tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1489 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1491 * Here 'addr' is the lowest address we will store to,
1492 * and is either the old SP (if post-increment) or
1493 * the new SP (if pre-decrement). For post-increment
1494 * where the old value is below the limit and the new
1495 * value is above, it is UNKNOWN whether the limit check
1496 * triggers; we choose to trigger.
1498 gen_helper_v8m_stackcheck(cpu_env, addr);
1502 tmp = tcg_temp_new_i32();
1503 for (i = 0; i < n; i++) {
1506 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1507 vfp_store_reg32(tmp, a->vd + i);
1510 vfp_load_reg32(tmp, a->vd + i);
1511 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1513 tcg_gen_addi_i32(addr, addr, offset);
1515 tcg_temp_free_i32(tmp);
1519 offset = -offset * n;
1520 tcg_gen_addi_i32(addr, addr, offset);
1522 store_reg(s, a->rn, addr);
1524 tcg_temp_free_i32(addr);
1530 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1537 /* Note that this does not require support for double arithmetic. */
1538 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1544 if (n == 0 || (a->vd + n) > 32 || n > 16) {
1546 * UNPREDICTABLE cases for bad immediates: we choose to
1547 * UNDEF to avoid generating huge numbers of TCG ops
1551 if (a->rn == 15 && a->w) {
1552 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1556 /* UNDEF accesses to D16-D31 if they don't exist */
1557 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1561 if (!vfp_access_check(s)) {
1565 /* For thumb, use of PC is UNPREDICTABLE. */
1566 addr = add_reg_for_lit(s, a->rn, 0);
1569 tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1572 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1574 * Here 'addr' is the lowest address we will store to,
1575 * and is either the old SP (if post-increment) or
1576 * the new SP (if pre-decrement). For post-increment
1577 * where the old value is below the limit and the new
1578 * value is above, it is UNKNOWN whether the limit check
1579 * triggers; we choose to trigger.
1581 gen_helper_v8m_stackcheck(cpu_env, addr);
1585 tmp = tcg_temp_new_i64();
1586 for (i = 0; i < n; i++) {
1589 gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1590 vfp_store_reg64(tmp, a->vd + i);
1593 vfp_load_reg64(tmp, a->vd + i);
1594 gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1596 tcg_gen_addi_i32(addr, addr, offset);
1598 tcg_temp_free_i64(tmp);
1602 offset = -offset * n;
1603 } else if (a->imm & 1) {
1610 tcg_gen_addi_i32(addr, addr, offset);
1612 store_reg(s, a->rn, addr);
1614 tcg_temp_free_i32(addr);
1621 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1622 * The callback should emit code to write a value to vd. If
1623 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1624 * will contain the old value of the relevant VFP register;
1625 * otherwise it must be written to only.
1627 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1628 TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1629 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1630 TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1633 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1634 * The callback should emit code to write a value to vd (which
1635 * should be written to only).
1637 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1638 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1641 * Return true if the specified S reg is in a scalar bank
1642 * (ie if it is s0..s7)
1644 static inline bool vfp_sreg_is_scalar(int reg)
1646 return (reg & 0x18) == 0;
1650 * Return true if the specified D reg is in a scalar bank
1651 * (ie if it is d0..d3 or d16..d19)
1653 static inline bool vfp_dreg_is_scalar(int reg)
1655 return (reg & 0xc) == 0;
1659 * Advance the S reg number forwards by delta within its bank
1660 * (ie increment the low 3 bits but leave the rest the same)
1662 static inline int vfp_advance_sreg(int reg, int delta)
1664 return ((reg + delta) & 0x7) | (reg & ~0x7);
1668 * Advance the D reg number forwards by delta within its bank
1669 * (ie increment the low 2 bits but leave the rest the same)
1671 static inline int vfp_advance_dreg(int reg, int delta)
1673 return ((reg + delta) & 0x3) | (reg & ~0x3);
1677 * Perform a 3-operand VFP data processing instruction. fn is the
1678 * callback to do the actual operation; this function deals with the
1679 * code to handle looping around for VFP vector processing.
1681 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1682 int vd, int vn, int vm, bool reads_vd)
1684 uint32_t delta_m = 0;
1685 uint32_t delta_d = 0;
1686 int veclen = s->vec_len;
1687 TCGv_i32 f0, f1, fd;
1690 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1694 if (!dc_isar_feature(aa32_fpshvec, s) &&
1695 (veclen != 0 || s->vec_stride != 0)) {
1699 if (!vfp_access_check(s)) {
1704 /* Figure out what type of vector operation this is. */
1705 if (vfp_sreg_is_scalar(vd)) {
1709 delta_d = s->vec_stride + 1;
1711 if (vfp_sreg_is_scalar(vm)) {
1712 /* mixed scalar/vector */
1721 f0 = tcg_temp_new_i32();
1722 f1 = tcg_temp_new_i32();
1723 fd = tcg_temp_new_i32();
1724 fpst = fpstatus_ptr(FPST_FPCR);
1726 vfp_load_reg32(f0, vn);
1727 vfp_load_reg32(f1, vm);
1731 vfp_load_reg32(fd, vd);
1733 fn(fd, f0, f1, fpst);
1734 vfp_store_reg32(fd, vd);
1740 /* Set up the operands for the next iteration */
1742 vd = vfp_advance_sreg(vd, delta_d);
1743 vn = vfp_advance_sreg(vn, delta_d);
1744 vfp_load_reg32(f0, vn);
1746 vm = vfp_advance_sreg(vm, delta_m);
1747 vfp_load_reg32(f1, vm);
1751 tcg_temp_free_i32(f0);
1752 tcg_temp_free_i32(f1);
1753 tcg_temp_free_i32(fd);
1754 tcg_temp_free_ptr(fpst);
1759 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1760 int vd, int vn, int vm, bool reads_vd)
1763 * Do a half-precision operation. Functionally this is
1764 * the same as do_vfp_3op_sp(), except:
1765 * - it uses the FPST_FPCR_F16
1766 * - it doesn't need the VFP vector handling (fp16 is a
1767 * v8 feature, and in v8 VFP vectors don't exist)
1768 * - it does the aa32_fp16_arith feature test
1770 TCGv_i32 f0, f1, fd;
1773 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1777 if (s->vec_len != 0 || s->vec_stride != 0) {
1781 if (!vfp_access_check(s)) {
1785 f0 = tcg_temp_new_i32();
1786 f1 = tcg_temp_new_i32();
1787 fd = tcg_temp_new_i32();
1788 fpst = fpstatus_ptr(FPST_FPCR_F16);
1790 vfp_load_reg32(f0, vn);
1791 vfp_load_reg32(f1, vm);
1794 vfp_load_reg32(fd, vd);
1796 fn(fd, f0, f1, fpst);
1797 vfp_store_reg32(fd, vd);
1799 tcg_temp_free_i32(f0);
1800 tcg_temp_free_i32(f1);
1801 tcg_temp_free_i32(fd);
1802 tcg_temp_free_ptr(fpst);
1807 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1808 int vd, int vn, int vm, bool reads_vd)
1810 uint32_t delta_m = 0;
1811 uint32_t delta_d = 0;
1812 int veclen = s->vec_len;
1813 TCGv_i64 f0, f1, fd;
1816 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1820 /* UNDEF accesses to D16-D31 if they don't exist */
1821 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1825 if (!dc_isar_feature(aa32_fpshvec, s) &&
1826 (veclen != 0 || s->vec_stride != 0)) {
1830 if (!vfp_access_check(s)) {
1835 /* Figure out what type of vector operation this is. */
1836 if (vfp_dreg_is_scalar(vd)) {
1840 delta_d = (s->vec_stride >> 1) + 1;
1842 if (vfp_dreg_is_scalar(vm)) {
1843 /* mixed scalar/vector */
1852 f0 = tcg_temp_new_i64();
1853 f1 = tcg_temp_new_i64();
1854 fd = tcg_temp_new_i64();
1855 fpst = fpstatus_ptr(FPST_FPCR);
1857 vfp_load_reg64(f0, vn);
1858 vfp_load_reg64(f1, vm);
1862 vfp_load_reg64(fd, vd);
1864 fn(fd, f0, f1, fpst);
1865 vfp_store_reg64(fd, vd);
1870 /* Set up the operands for the next iteration */
1872 vd = vfp_advance_dreg(vd, delta_d);
1873 vn = vfp_advance_dreg(vn, delta_d);
1874 vfp_load_reg64(f0, vn);
1876 vm = vfp_advance_dreg(vm, delta_m);
1877 vfp_load_reg64(f1, vm);
1881 tcg_temp_free_i64(f0);
1882 tcg_temp_free_i64(f1);
1883 tcg_temp_free_i64(fd);
1884 tcg_temp_free_ptr(fpst);
1889 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1891 uint32_t delta_m = 0;
1892 uint32_t delta_d = 0;
1893 int veclen = s->vec_len;
1896 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1900 if (!dc_isar_feature(aa32_fpshvec, s) &&
1901 (veclen != 0 || s->vec_stride != 0)) {
1905 if (!vfp_access_check(s)) {
1910 /* Figure out what type of vector operation this is. */
1911 if (vfp_sreg_is_scalar(vd)) {
1915 delta_d = s->vec_stride + 1;
1917 if (vfp_sreg_is_scalar(vm)) {
1918 /* mixed scalar/vector */
1927 f0 = tcg_temp_new_i32();
1928 fd = tcg_temp_new_i32();
1930 vfp_load_reg32(f0, vm);
1934 vfp_store_reg32(fd, vd);
1941 /* single source one-many */
1943 vd = vfp_advance_sreg(vd, delta_d);
1944 vfp_store_reg32(fd, vd);
1949 /* Set up the operands for the next iteration */
1951 vd = vfp_advance_sreg(vd, delta_d);
1952 vm = vfp_advance_sreg(vm, delta_m);
1953 vfp_load_reg32(f0, vm);
1956 tcg_temp_free_i32(f0);
1957 tcg_temp_free_i32(fd);
1962 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1965 * Do a half-precision operation. Functionally this is
1966 * the same as do_vfp_2op_sp(), except:
1967 * - it doesn't need the VFP vector handling (fp16 is a
1968 * v8 feature, and in v8 VFP vectors don't exist)
1969 * - it does the aa32_fp16_arith feature test
1973 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1977 if (s->vec_len != 0 || s->vec_stride != 0) {
1981 if (!vfp_access_check(s)) {
1985 f0 = tcg_temp_new_i32();
1986 vfp_load_reg32(f0, vm);
1988 vfp_store_reg32(f0, vd);
1989 tcg_temp_free_i32(f0);
1994 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1996 uint32_t delta_m = 0;
1997 uint32_t delta_d = 0;
1998 int veclen = s->vec_len;
2001 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2005 /* UNDEF accesses to D16-D31 if they don't exist */
2006 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
2010 if (!dc_isar_feature(aa32_fpshvec, s) &&
2011 (veclen != 0 || s->vec_stride != 0)) {
2015 if (!vfp_access_check(s)) {
2020 /* Figure out what type of vector operation this is. */
2021 if (vfp_dreg_is_scalar(vd)) {
2025 delta_d = (s->vec_stride >> 1) + 1;
2027 if (vfp_dreg_is_scalar(vm)) {
2028 /* mixed scalar/vector */
2037 f0 = tcg_temp_new_i64();
2038 fd = tcg_temp_new_i64();
2040 vfp_load_reg64(f0, vm);
2044 vfp_store_reg64(fd, vd);
2051 /* single source one-many */
2053 vd = vfp_advance_dreg(vd, delta_d);
2054 vfp_store_reg64(fd, vd);
2059 /* Set up the operands for the next iteration */
2061 vd = vfp_advance_dreg(vd, delta_d);
2062 vd = vfp_advance_dreg(vm, delta_m);
2063 vfp_load_reg64(f0, vm);
2066 tcg_temp_free_i64(f0);
2067 tcg_temp_free_i64(fd);
2072 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2074 /* Note that order of inputs to the add matters for NaNs */
2075 TCGv_i32 tmp = tcg_temp_new_i32();
2077 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2078 gen_helper_vfp_addh(vd, vd, tmp, fpst);
2079 tcg_temp_free_i32(tmp);
2082 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
2084 return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
2087 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2089 /* Note that order of inputs to the add matters for NaNs */
2090 TCGv_i32 tmp = tcg_temp_new_i32();
2092 gen_helper_vfp_muls(tmp, vn, vm, fpst);
2093 gen_helper_vfp_adds(vd, vd, tmp, fpst);
2094 tcg_temp_free_i32(tmp);
2097 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
2099 return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
2102 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2104 /* Note that order of inputs to the add matters for NaNs */
2105 TCGv_i64 tmp = tcg_temp_new_i64();
2107 gen_helper_vfp_muld(tmp, vn, vm, fpst);
2108 gen_helper_vfp_addd(vd, vd, tmp, fpst);
2109 tcg_temp_free_i64(tmp);
2112 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
2114 return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
2117 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2120 * VMLS: vd = vd + -(vn * vm)
2121 * Note that order of inputs to the add matters for NaNs.
2123 TCGv_i32 tmp = tcg_temp_new_i32();
2125 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2126 gen_helper_vfp_negh(tmp, tmp);
2127 gen_helper_vfp_addh(vd, vd, tmp, fpst);
2128 tcg_temp_free_i32(tmp);
2131 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
2133 return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
2136 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2139 * VMLS: vd = vd + -(vn * vm)
2140 * Note that order of inputs to the add matters for NaNs.
2142 TCGv_i32 tmp = tcg_temp_new_i32();
2144 gen_helper_vfp_muls(tmp, vn, vm, fpst);
2145 gen_helper_vfp_negs(tmp, tmp);
2146 gen_helper_vfp_adds(vd, vd, tmp, fpst);
2147 tcg_temp_free_i32(tmp);
2150 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
2152 return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
2155 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2158 * VMLS: vd = vd + -(vn * vm)
2159 * Note that order of inputs to the add matters for NaNs.
2161 TCGv_i64 tmp = tcg_temp_new_i64();
2163 gen_helper_vfp_muld(tmp, vn, vm, fpst);
2164 gen_helper_vfp_negd(tmp, tmp);
2165 gen_helper_vfp_addd(vd, vd, tmp, fpst);
2166 tcg_temp_free_i64(tmp);
2169 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
2171 return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
2174 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2177 * VNMLS: -fd + (fn * fm)
2178 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2179 * plausible looking simplifications because this will give wrong results
2182 TCGv_i32 tmp = tcg_temp_new_i32();
2184 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2185 gen_helper_vfp_negh(vd, vd);
2186 gen_helper_vfp_addh(vd, vd, tmp, fpst);
2187 tcg_temp_free_i32(tmp);
2190 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
2192 return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
2195 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2198 * VNMLS: -fd + (fn * fm)
2199 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2200 * plausible looking simplifications because this will give wrong results
2203 TCGv_i32 tmp = tcg_temp_new_i32();
2205 gen_helper_vfp_muls(tmp, vn, vm, fpst);
2206 gen_helper_vfp_negs(vd, vd);
2207 gen_helper_vfp_adds(vd, vd, tmp, fpst);
2208 tcg_temp_free_i32(tmp);
2211 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
2213 return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
2216 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2219 * VNMLS: -fd + (fn * fm)
2220 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2221 * plausible looking simplifications because this will give wrong results
2224 TCGv_i64 tmp = tcg_temp_new_i64();
2226 gen_helper_vfp_muld(tmp, vn, vm, fpst);
2227 gen_helper_vfp_negd(vd, vd);
2228 gen_helper_vfp_addd(vd, vd, tmp, fpst);
2229 tcg_temp_free_i64(tmp);
2232 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
2234 return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
2237 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2239 /* VNMLA: -fd + -(fn * fm) */
2240 TCGv_i32 tmp = tcg_temp_new_i32();
2242 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2243 gen_helper_vfp_negh(tmp, tmp);
2244 gen_helper_vfp_negh(vd, vd);
2245 gen_helper_vfp_addh(vd, vd, tmp, fpst);
2246 tcg_temp_free_i32(tmp);
2249 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
2251 return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
2254 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2256 /* VNMLA: -fd + -(fn * fm) */
2257 TCGv_i32 tmp = tcg_temp_new_i32();
2259 gen_helper_vfp_muls(tmp, vn, vm, fpst);
2260 gen_helper_vfp_negs(tmp, tmp);
2261 gen_helper_vfp_negs(vd, vd);
2262 gen_helper_vfp_adds(vd, vd, tmp, fpst);
2263 tcg_temp_free_i32(tmp);
2266 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2268 return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2271 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2273 /* VNMLA: -fd + (fn * fm) */
2274 TCGv_i64 tmp = tcg_temp_new_i64();
2276 gen_helper_vfp_muld(tmp, vn, vm, fpst);
2277 gen_helper_vfp_negd(tmp, tmp);
2278 gen_helper_vfp_negd(vd, vd);
2279 gen_helper_vfp_addd(vd, vd, tmp, fpst);
2280 tcg_temp_free_i64(tmp);
2283 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2285 return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2288 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2290 return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2293 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2295 return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2298 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2300 return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2303 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2305 /* VNMUL: -(fn * fm) */
2306 gen_helper_vfp_mulh(vd, vn, vm, fpst);
2307 gen_helper_vfp_negh(vd, vd);
2310 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2312 return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2315 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2317 /* VNMUL: -(fn * fm) */
2318 gen_helper_vfp_muls(vd, vn, vm, fpst);
2319 gen_helper_vfp_negs(vd, vd);
2322 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2324 return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2327 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2329 /* VNMUL: -(fn * fm) */
2330 gen_helper_vfp_muld(vd, vn, vm, fpst);
2331 gen_helper_vfp_negd(vd, vd);
2334 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2336 return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2339 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2341 return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2344 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2346 return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2349 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2351 return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2354 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2356 return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2359 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2361 return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2364 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2366 return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2369 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2371 return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2374 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2376 return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2379 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2381 return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2384 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2386 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2389 return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2390 a->vd, a->vn, a->vm, false);
2393 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2395 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2398 return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2399 a->vd, a->vn, a->vm, false);
2402 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2404 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2407 return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2408 a->vd, a->vn, a->vm, false);
2411 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2413 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2416 return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2417 a->vd, a->vn, a->vm, false);
2420 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2422 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2425 return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2426 a->vd, a->vn, a->vm, false);
2429 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2431 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2434 return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2435 a->vd, a->vn, a->vm, false);
2438 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2441 * VFNMA : fd = muladd(-fd, fn, fm)
2442 * VFNMS : fd = muladd(-fd, -fn, fm)
2443 * VFMA : fd = muladd( fd, fn, fm)
2444 * VFMS : fd = muladd( fd, -fn, fm)
2446 * These are fused multiply-add, and must be done as one floating
2447 * point operation with no rounding between the multiplication and
2448 * addition steps. NB that doing the negations here as separate
2449 * steps is correct : an input NaN should come out with its sign
2450 * bit flipped if it is a negated-input.
2453 TCGv_i32 vn, vm, vd;
2456 * Present in VFPv4 only, and only with the FP16 extension.
2457 * Note that we can't rely on the SIMDFMAC check alone, because
2458 * in a Neon-no-VFP core that ID register field will be non-zero.
2460 if (!dc_isar_feature(aa32_fp16_arith, s) ||
2461 !dc_isar_feature(aa32_simdfmac, s) ||
2462 !dc_isar_feature(aa32_fpsp_v2, s)) {
2466 if (s->vec_len != 0 || s->vec_stride != 0) {
2470 if (!vfp_access_check(s)) {
2474 vn = tcg_temp_new_i32();
2475 vm = tcg_temp_new_i32();
2476 vd = tcg_temp_new_i32();
2478 vfp_load_reg32(vn, a->vn);
2479 vfp_load_reg32(vm, a->vm);
2482 gen_helper_vfp_negh(vn, vn);
2484 vfp_load_reg32(vd, a->vd);
2487 gen_helper_vfp_negh(vd, vd);
2489 fpst = fpstatus_ptr(FPST_FPCR_F16);
2490 gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2491 vfp_store_reg32(vd, a->vd);
2493 tcg_temp_free_ptr(fpst);
2494 tcg_temp_free_i32(vn);
2495 tcg_temp_free_i32(vm);
2496 tcg_temp_free_i32(vd);
2501 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2504 * VFNMA : fd = muladd(-fd, fn, fm)
2505 * VFNMS : fd = muladd(-fd, -fn, fm)
2506 * VFMA : fd = muladd( fd, fn, fm)
2507 * VFMS : fd = muladd( fd, -fn, fm)
2509 * These are fused multiply-add, and must be done as one floating
2510 * point operation with no rounding between the multiplication and
2511 * addition steps. NB that doing the negations here as separate
2512 * steps is correct : an input NaN should come out with its sign
2513 * bit flipped if it is a negated-input.
2516 TCGv_i32 vn, vm, vd;
2519 * Present in VFPv4 only.
2520 * Note that we can't rely on the SIMDFMAC check alone, because
2521 * in a Neon-no-VFP core that ID register field will be non-zero.
2523 if (!dc_isar_feature(aa32_simdfmac, s) ||
2524 !dc_isar_feature(aa32_fpsp_v2, s)) {
2528 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2529 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2531 if (s->vec_len != 0 || s->vec_stride != 0) {
2535 if (!vfp_access_check(s)) {
2539 vn = tcg_temp_new_i32();
2540 vm = tcg_temp_new_i32();
2541 vd = tcg_temp_new_i32();
2543 vfp_load_reg32(vn, a->vn);
2544 vfp_load_reg32(vm, a->vm);
2547 gen_helper_vfp_negs(vn, vn);
2549 vfp_load_reg32(vd, a->vd);
2552 gen_helper_vfp_negs(vd, vd);
2554 fpst = fpstatus_ptr(FPST_FPCR);
2555 gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2556 vfp_store_reg32(vd, a->vd);
2558 tcg_temp_free_ptr(fpst);
2559 tcg_temp_free_i32(vn);
2560 tcg_temp_free_i32(vm);
2561 tcg_temp_free_i32(vd);
2566 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2569 * VFNMA : fd = muladd(-fd, fn, fm)
2570 * VFNMS : fd = muladd(-fd, -fn, fm)
2571 * VFMA : fd = muladd( fd, fn, fm)
2572 * VFMS : fd = muladd( fd, -fn, fm)
2574 * These are fused multiply-add, and must be done as one floating
2575 * point operation with no rounding between the multiplication and
2576 * addition steps. NB that doing the negations here as separate
2577 * steps is correct : an input NaN should come out with its sign
2578 * bit flipped if it is a negated-input.
2581 TCGv_i64 vn, vm, vd;
2584 * Present in VFPv4 only.
2585 * Note that we can't rely on the SIMDFMAC check alone, because
2586 * in a Neon-no-VFP core that ID register field will be non-zero.
2588 if (!dc_isar_feature(aa32_simdfmac, s) ||
2589 !dc_isar_feature(aa32_fpdp_v2, s)) {
2593 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2594 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2596 if (s->vec_len != 0 || s->vec_stride != 0) {
2600 /* UNDEF accesses to D16-D31 if they don't exist. */
2601 if (!dc_isar_feature(aa32_simd_r32, s) &&
2602 ((a->vd | a->vn | a->vm) & 0x10)) {
2606 if (!vfp_access_check(s)) {
2610 vn = tcg_temp_new_i64();
2611 vm = tcg_temp_new_i64();
2612 vd = tcg_temp_new_i64();
2614 vfp_load_reg64(vn, a->vn);
2615 vfp_load_reg64(vm, a->vm);
2618 gen_helper_vfp_negd(vn, vn);
2620 vfp_load_reg64(vd, a->vd);
2623 gen_helper_vfp_negd(vd, vd);
2625 fpst = fpstatus_ptr(FPST_FPCR);
2626 gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2627 vfp_store_reg64(vd, a->vd);
2629 tcg_temp_free_ptr(fpst);
2630 tcg_temp_free_i64(vn);
2631 tcg_temp_free_i64(vm);
2632 tcg_temp_free_i64(vd);
2637 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \
2638 static bool trans_##INSN##_##PREC(DisasContext *s, \
2639 arg_##INSN##_##PREC *a) \
2641 return do_vfm_##PREC(s, a, NEGN, NEGD); \
2644 #define MAKE_VFM_TRANS_FNS(PREC) \
2645 MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2646 MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2647 MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2648 MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2650 MAKE_VFM_TRANS_FNS(hp)
2651 MAKE_VFM_TRANS_FNS(sp)
2652 MAKE_VFM_TRANS_FNS(dp)
2654 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2658 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2662 if (s->vec_len != 0 || s->vec_stride != 0) {
2666 if (!vfp_access_check(s)) {
2670 fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2671 vfp_store_reg32(fd, a->vd);
2672 tcg_temp_free_i32(fd);
2676 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2678 uint32_t delta_d = 0;
2679 int veclen = s->vec_len;
2685 if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2689 if (!dc_isar_feature(aa32_fpshvec, s) &&
2690 (veclen != 0 || s->vec_stride != 0)) {
2694 if (!vfp_access_check(s)) {
2699 /* Figure out what type of vector operation this is. */
2700 if (vfp_sreg_is_scalar(vd)) {
2704 delta_d = s->vec_stride + 1;
2708 fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2711 vfp_store_reg32(fd, vd);
2717 /* Set up the operands for the next iteration */
2719 vd = vfp_advance_sreg(vd, delta_d);
2722 tcg_temp_free_i32(fd);
2726 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2728 uint32_t delta_d = 0;
2729 int veclen = s->vec_len;
2735 if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2739 /* UNDEF accesses to D16-D31 if they don't exist. */
2740 if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2744 if (!dc_isar_feature(aa32_fpshvec, s) &&
2745 (veclen != 0 || s->vec_stride != 0)) {
2749 if (!vfp_access_check(s)) {
2754 /* Figure out what type of vector operation this is. */
2755 if (vfp_dreg_is_scalar(vd)) {
2759 delta_d = (s->vec_stride >> 1) + 1;
2763 fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2766 vfp_store_reg64(fd, vd);
2772 /* Set up the operands for the next iteration */
2774 vd = vfp_advance_dreg(vd, delta_d);
2777 tcg_temp_free_i64(fd);
2781 #define DO_VFP_2OP(INSN, PREC, FN) \
2782 static bool trans_##INSN##_##PREC(DisasContext *s, \
2783 arg_##INSN##_##PREC *a) \
2785 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
2788 DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
2789 DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
2791 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
2792 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
2793 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
2795 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
2796 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
2797 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
2799 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2801 gen_helper_vfp_sqrth(vd, vm, cpu_env);
2804 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2806 gen_helper_vfp_sqrts(vd, vm, cpu_env);
2809 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2811 gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2814 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
2815 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
2816 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
2818 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2822 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2826 /* Vm/M bits must be zero for the Z variant */
2827 if (a->z && a->vm != 0) {
2831 if (!vfp_access_check(s)) {
2835 vd = tcg_temp_new_i32();
2836 vm = tcg_temp_new_i32();
2838 vfp_load_reg32(vd, a->vd);
2840 tcg_gen_movi_i32(vm, 0);
2842 vfp_load_reg32(vm, a->vm);
2846 gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2848 gen_helper_vfp_cmph(vd, vm, cpu_env);
2851 tcg_temp_free_i32(vd);
2852 tcg_temp_free_i32(vm);
2857 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2861 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2865 /* Vm/M bits must be zero for the Z variant */
2866 if (a->z && a->vm != 0) {
2870 if (!vfp_access_check(s)) {
2874 vd = tcg_temp_new_i32();
2875 vm = tcg_temp_new_i32();
2877 vfp_load_reg32(vd, a->vd);
2879 tcg_gen_movi_i32(vm, 0);
2881 vfp_load_reg32(vm, a->vm);
2885 gen_helper_vfp_cmpes(vd, vm, cpu_env);
2887 gen_helper_vfp_cmps(vd, vm, cpu_env);
2890 tcg_temp_free_i32(vd);
2891 tcg_temp_free_i32(vm);
2896 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2900 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2904 /* Vm/M bits must be zero for the Z variant */
2905 if (a->z && a->vm != 0) {
2909 /* UNDEF accesses to D16-D31 if they don't exist. */
2910 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2914 if (!vfp_access_check(s)) {
2918 vd = tcg_temp_new_i64();
2919 vm = tcg_temp_new_i64();
2921 vfp_load_reg64(vd, a->vd);
2923 tcg_gen_movi_i64(vm, 0);
2925 vfp_load_reg64(vm, a->vm);
2929 gen_helper_vfp_cmped(vd, vm, cpu_env);
2931 gen_helper_vfp_cmpd(vd, vm, cpu_env);
2934 tcg_temp_free_i64(vd);
2935 tcg_temp_free_i64(vm);
2940 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2946 if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2950 if (!vfp_access_check(s)) {
2954 fpst = fpstatus_ptr(FPST_FPCR);
2955 ahp_mode = get_ahp_flag();
2956 tmp = tcg_temp_new_i32();
2957 /* The T bit tells us if we want the low or high 16 bits of Vm */
2958 tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2959 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2960 vfp_store_reg32(tmp, a->vd);
2961 tcg_temp_free_i32(ahp_mode);
2962 tcg_temp_free_ptr(fpst);
2963 tcg_temp_free_i32(tmp);
2967 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2974 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2978 if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2982 /* UNDEF accesses to D16-D31 if they don't exist. */
2983 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2987 if (!vfp_access_check(s)) {
2991 fpst = fpstatus_ptr(FPST_FPCR);
2992 ahp_mode = get_ahp_flag();
2993 tmp = tcg_temp_new_i32();
2994 /* The T bit tells us if we want the low or high 16 bits of Vm */
2995 tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2996 vd = tcg_temp_new_i64();
2997 gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2998 vfp_store_reg64(vd, a->vd);
2999 tcg_temp_free_i32(ahp_mode);
3000 tcg_temp_free_ptr(fpst);
3001 tcg_temp_free_i32(tmp);
3002 tcg_temp_free_i64(vd);
3006 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
3012 if (!dc_isar_feature(aa32_fp16_spconv, s)) {
3016 if (!vfp_access_check(s)) {
3020 fpst = fpstatus_ptr(FPST_FPCR);
3021 ahp_mode = get_ahp_flag();
3022 tmp = tcg_temp_new_i32();
3024 vfp_load_reg32(tmp, a->vm);
3025 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
3026 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3027 tcg_temp_free_i32(ahp_mode);
3028 tcg_temp_free_ptr(fpst);
3029 tcg_temp_free_i32(tmp);
3033 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
3040 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3044 if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
3048 /* UNDEF accesses to D16-D31 if they don't exist. */
3049 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3053 if (!vfp_access_check(s)) {
3057 fpst = fpstatus_ptr(FPST_FPCR);
3058 ahp_mode = get_ahp_flag();
3059 tmp = tcg_temp_new_i32();
3060 vm = tcg_temp_new_i64();
3062 vfp_load_reg64(vm, a->vm);
3063 gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
3064 tcg_temp_free_i64(vm);
3065 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
3066 tcg_temp_free_i32(ahp_mode);
3067 tcg_temp_free_ptr(fpst);
3068 tcg_temp_free_i32(tmp);
3072 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
3077 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3081 if (!vfp_access_check(s)) {
3085 tmp = tcg_temp_new_i32();
3086 vfp_load_reg32(tmp, a->vm);
3087 fpst = fpstatus_ptr(FPST_FPCR_F16);
3088 gen_helper_rinth(tmp, tmp, fpst);
3089 vfp_store_reg32(tmp, a->vd);
3090 tcg_temp_free_ptr(fpst);
3091 tcg_temp_free_i32(tmp);
3095 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
3100 if (!dc_isar_feature(aa32_vrint, s)) {
3104 if (!vfp_access_check(s)) {
3108 tmp = tcg_temp_new_i32();
3109 vfp_load_reg32(tmp, a->vm);
3110 fpst = fpstatus_ptr(FPST_FPCR);
3111 gen_helper_rints(tmp, tmp, fpst);
3112 vfp_store_reg32(tmp, a->vd);
3113 tcg_temp_free_ptr(fpst);
3114 tcg_temp_free_i32(tmp);
3118 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
3123 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3127 if (!dc_isar_feature(aa32_vrint, s)) {
3131 /* UNDEF accesses to D16-D31 if they don't exist. */
3132 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3136 if (!vfp_access_check(s)) {
3140 tmp = tcg_temp_new_i64();
3141 vfp_load_reg64(tmp, a->vm);
3142 fpst = fpstatus_ptr(FPST_FPCR);
3143 gen_helper_rintd(tmp, tmp, fpst);
3144 vfp_store_reg64(tmp, a->vd);
3145 tcg_temp_free_ptr(fpst);
3146 tcg_temp_free_i64(tmp);
3150 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
3156 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3160 if (!vfp_access_check(s)) {
3164 tmp = tcg_temp_new_i32();
3165 vfp_load_reg32(tmp, a->vm);
3166 fpst = fpstatus_ptr(FPST_FPCR_F16);
3167 tcg_rmode = tcg_const_i32(float_round_to_zero);
3168 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3169 gen_helper_rinth(tmp, tmp, fpst);
3170 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3171 vfp_store_reg32(tmp, a->vd);
3172 tcg_temp_free_ptr(fpst);
3173 tcg_temp_free_i32(tcg_rmode);
3174 tcg_temp_free_i32(tmp);
3178 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
3184 if (!dc_isar_feature(aa32_vrint, s)) {
3188 if (!vfp_access_check(s)) {
3192 tmp = tcg_temp_new_i32();
3193 vfp_load_reg32(tmp, a->vm);
3194 fpst = fpstatus_ptr(FPST_FPCR);
3195 tcg_rmode = tcg_const_i32(float_round_to_zero);
3196 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3197 gen_helper_rints(tmp, tmp, fpst);
3198 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3199 vfp_store_reg32(tmp, a->vd);
3200 tcg_temp_free_ptr(fpst);
3201 tcg_temp_free_i32(tcg_rmode);
3202 tcg_temp_free_i32(tmp);
3206 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
3212 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3216 if (!dc_isar_feature(aa32_vrint, s)) {
3220 /* UNDEF accesses to D16-D31 if they don't exist. */
3221 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3225 if (!vfp_access_check(s)) {
3229 tmp = tcg_temp_new_i64();
3230 vfp_load_reg64(tmp, a->vm);
3231 fpst = fpstatus_ptr(FPST_FPCR);
3232 tcg_rmode = tcg_const_i32(float_round_to_zero);
3233 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3234 gen_helper_rintd(tmp, tmp, fpst);
3235 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3236 vfp_store_reg64(tmp, a->vd);
3237 tcg_temp_free_ptr(fpst);
3238 tcg_temp_free_i64(tmp);
3239 tcg_temp_free_i32(tcg_rmode);
3243 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3248 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3252 if (!vfp_access_check(s)) {
3256 tmp = tcg_temp_new_i32();
3257 vfp_load_reg32(tmp, a->vm);
3258 fpst = fpstatus_ptr(FPST_FPCR_F16);
3259 gen_helper_rinth_exact(tmp, tmp, fpst);
3260 vfp_store_reg32(tmp, a->vd);
3261 tcg_temp_free_ptr(fpst);
3262 tcg_temp_free_i32(tmp);
3266 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3271 if (!dc_isar_feature(aa32_vrint, s)) {
3275 if (!vfp_access_check(s)) {
3279 tmp = tcg_temp_new_i32();
3280 vfp_load_reg32(tmp, a->vm);
3281 fpst = fpstatus_ptr(FPST_FPCR);
3282 gen_helper_rints_exact(tmp, tmp, fpst);
3283 vfp_store_reg32(tmp, a->vd);
3284 tcg_temp_free_ptr(fpst);
3285 tcg_temp_free_i32(tmp);
3289 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3294 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3298 if (!dc_isar_feature(aa32_vrint, s)) {
3302 /* UNDEF accesses to D16-D31 if they don't exist. */
3303 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3307 if (!vfp_access_check(s)) {
3311 tmp = tcg_temp_new_i64();
3312 vfp_load_reg64(tmp, a->vm);
3313 fpst = fpstatus_ptr(FPST_FPCR);
3314 gen_helper_rintd_exact(tmp, tmp, fpst);
3315 vfp_store_reg64(tmp, a->vd);
3316 tcg_temp_free_ptr(fpst);
3317 tcg_temp_free_i64(tmp);
3321 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3326 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3330 /* UNDEF accesses to D16-D31 if they don't exist. */
3331 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3335 if (!vfp_access_check(s)) {
3339 vm = tcg_temp_new_i32();
3340 vd = tcg_temp_new_i64();
3341 vfp_load_reg32(vm, a->vm);
3342 gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3343 vfp_store_reg64(vd, a->vd);
3344 tcg_temp_free_i32(vm);
3345 tcg_temp_free_i64(vd);
3349 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3354 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3358 /* UNDEF accesses to D16-D31 if they don't exist. */
3359 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3363 if (!vfp_access_check(s)) {
3367 vd = tcg_temp_new_i32();
3368 vm = tcg_temp_new_i64();
3369 vfp_load_reg64(vm, a->vm);
3370 gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3371 vfp_store_reg32(vd, a->vd);
3372 tcg_temp_free_i32(vd);
3373 tcg_temp_free_i64(vm);
3377 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3382 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3386 if (!vfp_access_check(s)) {
3390 vm = tcg_temp_new_i32();
3391 vfp_load_reg32(vm, a->vm);
3392 fpst = fpstatus_ptr(FPST_FPCR_F16);
3395 gen_helper_vfp_sitoh(vm, vm, fpst);
3398 gen_helper_vfp_uitoh(vm, vm, fpst);
3400 vfp_store_reg32(vm, a->vd);
3401 tcg_temp_free_i32(vm);
3402 tcg_temp_free_ptr(fpst);
3406 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3411 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3415 if (!vfp_access_check(s)) {
3419 vm = tcg_temp_new_i32();
3420 vfp_load_reg32(vm, a->vm);
3421 fpst = fpstatus_ptr(FPST_FPCR);
3424 gen_helper_vfp_sitos(vm, vm, fpst);
3427 gen_helper_vfp_uitos(vm, vm, fpst);
3429 vfp_store_reg32(vm, a->vd);
3430 tcg_temp_free_i32(vm);
3431 tcg_temp_free_ptr(fpst);
3435 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3441 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3445 /* UNDEF accesses to D16-D31 if they don't exist. */
3446 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3450 if (!vfp_access_check(s)) {
3454 vm = tcg_temp_new_i32();
3455 vd = tcg_temp_new_i64();
3456 vfp_load_reg32(vm, a->vm);
3457 fpst = fpstatus_ptr(FPST_FPCR);
3460 gen_helper_vfp_sitod(vd, vm, fpst);
3463 gen_helper_vfp_uitod(vd, vm, fpst);
3465 vfp_store_reg64(vd, a->vd);
3466 tcg_temp_free_i32(vm);
3467 tcg_temp_free_i64(vd);
3468 tcg_temp_free_ptr(fpst);
3472 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3477 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3481 if (!dc_isar_feature(aa32_jscvt, s)) {
3485 /* UNDEF accesses to D16-D31 if they don't exist. */
3486 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3490 if (!vfp_access_check(s)) {
3494 vm = tcg_temp_new_i64();
3495 vd = tcg_temp_new_i32();
3496 vfp_load_reg64(vm, a->vm);
3497 gen_helper_vjcvt(vd, vm, cpu_env);
3498 vfp_store_reg32(vd, a->vd);
3499 tcg_temp_free_i64(vm);
3500 tcg_temp_free_i32(vd);
3504 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3510 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3514 if (!vfp_access_check(s)) {
3518 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3520 vd = tcg_temp_new_i32();
3521 vfp_load_reg32(vd, a->vd);
3523 fpst = fpstatus_ptr(FPST_FPCR_F16);
3524 shift = tcg_const_i32(frac_bits);
3526 /* Switch on op:U:sx bits */
3529 gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3532 gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3535 gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3538 gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3541 gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3544 gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3547 gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3550 gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3553 g_assert_not_reached();
3556 vfp_store_reg32(vd, a->vd);
3557 tcg_temp_free_i32(vd);
3558 tcg_temp_free_i32(shift);
3559 tcg_temp_free_ptr(fpst);
3563 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3569 if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3573 if (!vfp_access_check(s)) {
3577 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3579 vd = tcg_temp_new_i32();
3580 vfp_load_reg32(vd, a->vd);
3582 fpst = fpstatus_ptr(FPST_FPCR);
3583 shift = tcg_const_i32(frac_bits);
3585 /* Switch on op:U:sx bits */
3588 gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3591 gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3594 gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3597 gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3600 gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3603 gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3606 gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3609 gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3612 g_assert_not_reached();
3615 vfp_store_reg32(vd, a->vd);
3616 tcg_temp_free_i32(vd);
3617 tcg_temp_free_i32(shift);
3618 tcg_temp_free_ptr(fpst);
3622 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3629 if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3633 /* UNDEF accesses to D16-D31 if they don't exist. */
3634 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3638 if (!vfp_access_check(s)) {
3642 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3644 vd = tcg_temp_new_i64();
3645 vfp_load_reg64(vd, a->vd);
3647 fpst = fpstatus_ptr(FPST_FPCR);
3648 shift = tcg_const_i32(frac_bits);
3650 /* Switch on op:U:sx bits */
3653 gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3656 gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3659 gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3662 gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3665 gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3668 gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3671 gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3674 gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3677 g_assert_not_reached();
3680 vfp_store_reg64(vd, a->vd);
3681 tcg_temp_free_i64(vd);
3682 tcg_temp_free_i32(shift);
3683 tcg_temp_free_ptr(fpst);
3687 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3692 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3696 if (!vfp_access_check(s)) {
3700 fpst = fpstatus_ptr(FPST_FPCR_F16);
3701 vm = tcg_temp_new_i32();
3702 vfp_load_reg32(vm, a->vm);
3706 gen_helper_vfp_tosizh(vm, vm, fpst);
3708 gen_helper_vfp_tosih(vm, vm, fpst);
3712 gen_helper_vfp_touizh(vm, vm, fpst);
3714 gen_helper_vfp_touih(vm, vm, fpst);
3717 vfp_store_reg32(vm, a->vd);
3718 tcg_temp_free_i32(vm);
3719 tcg_temp_free_ptr(fpst);
3723 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3728 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3732 if (!vfp_access_check(s)) {
3736 fpst = fpstatus_ptr(FPST_FPCR);
3737 vm = tcg_temp_new_i32();
3738 vfp_load_reg32(vm, a->vm);
3742 gen_helper_vfp_tosizs(vm, vm, fpst);
3744 gen_helper_vfp_tosis(vm, vm, fpst);
3748 gen_helper_vfp_touizs(vm, vm, fpst);
3750 gen_helper_vfp_touis(vm, vm, fpst);
3753 vfp_store_reg32(vm, a->vd);
3754 tcg_temp_free_i32(vm);
3755 tcg_temp_free_ptr(fpst);
3759 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3765 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3769 /* UNDEF accesses to D16-D31 if they don't exist. */
3770 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3774 if (!vfp_access_check(s)) {
3778 fpst = fpstatus_ptr(FPST_FPCR);
3779 vm = tcg_temp_new_i64();
3780 vd = tcg_temp_new_i32();
3781 vfp_load_reg64(vm, a->vm);
3785 gen_helper_vfp_tosizd(vd, vm, fpst);
3787 gen_helper_vfp_tosid(vd, vm, fpst);
3791 gen_helper_vfp_touizd(vd, vm, fpst);
3793 gen_helper_vfp_touid(vd, vm, fpst);
3796 vfp_store_reg32(vd, a->vd);
3797 tcg_temp_free_i32(vd);
3798 tcg_temp_free_i64(vm);
3799 tcg_temp_free_ptr(fpst);
3804 * Decode VLLDM and VLSTM are nonstandard because:
3805 * * if there is no FPU then these insns must NOP in
3806 * Secure state and UNDEF in Nonsecure state
3807 * * if there is an FPU then these insns do not have
3808 * the usual behaviour that vfp_access_check() provides of
3809 * being controlled by CPACR/NSACR enable bits or the
3810 * lazy-stacking logic.
3812 static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
3816 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
3817 !arm_dc_feature(s, ARM_FEATURE_V8)) {
3823 * T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
3824 * to take the IMPDEF option to make memory accesses to the stack
3825 * slots that correspond to the D16-D31 registers (discarding
3826 * read data and writing UNKNOWN values), so for us the T2
3827 * encoding behaves identically to the T1 encoding.
3829 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3834 * T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
3835 * This is currently architecturally impossible, but we add the
3836 * check to stay in line with the pseudocode. Note that we must
3837 * emit code for the UNDEF so it takes precedence over the NOCP.
3839 if (dc_isar_feature(aa32_simd_r32, s)) {
3840 unallocated_encoding(s);
3846 * If not secure, UNDEF. We must emit code for this
3847 * rather than returning false so that this takes
3848 * precedence over the m-nocp.decode NOCP fallback.
3850 if (!s->v8m_secure) {
3851 unallocated_encoding(s);
3854 /* If no fpu, NOP. */
3855 if (!dc_isar_feature(aa32_vfp, s)) {
3859 fptr = load_reg(s, a->rn);
3861 gen_helper_v7m_vlldm(cpu_env, fptr);
3863 gen_helper_v7m_vlstm(cpu_env, fptr);
3865 tcg_temp_free_i32(fptr);
3867 /* End the TB, because we have updated FP control bits */
3868 s->base.is_jmp = DISAS_UPDATE_EXIT;
3872 static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
3876 TCGv_i32 aspen, sfpa;
3878 if (!dc_isar_feature(aa32_m_sec_state, s)) {
3879 /* Before v8.1M, fall through in decode to NOCP check */
3883 /* Explicitly UNDEF because this takes precedence over NOCP */
3884 if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
3885 unallocated_encoding(s);
3889 if (!dc_isar_feature(aa32_vfp_simd, s)) {
3890 /* NOP if we have neither FP nor MVE */
3895 * If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
3896 * active floating point context so we must NOP (without doing
3897 * any lazy state preservation or the NOCP check).
3899 aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
3900 sfpa = load_cpu_field(v7m.control[M_REG_S]);
3901 tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3902 tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3903 tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
3904 tcg_gen_or_i32(sfpa, sfpa, aspen);
3905 arm_gen_condlabel(s);
3906 tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel);
3908 if (s->fp_excp_el != 0) {
3909 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3910 syn_uncategorized(), s->fp_excp_el);
3914 topreg = a->vd + a->imm - 1;
3917 /* Convert to Sreg numbers if the insn specified in Dregs */
3919 topreg = topreg * 2 + 1;
3923 if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
3924 /* UNPREDICTABLE: we choose to undef */
3925 unallocated_encoding(s);
3929 /* Silently ignore requests to clear D16-D31 if they don't exist */
3930 if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
3934 if (!vfp_access_check(s)) {
3938 /* Zero the Sregs from btmreg to topreg inclusive. */
3939 zero = tcg_const_i64(0);
3941 write_neon_element64(zero, btmreg >> 1, 1, MO_32);
3944 for (; btmreg + 1 <= topreg; btmreg += 2) {
3945 write_neon_element64(zero, btmreg >> 1, 0, MO_64);
3947 if (btmreg == topreg) {
3948 write_neon_element64(zero, btmreg >> 1, 0, MO_32);
3951 assert(btmreg == topreg + 1);
3952 /* TODO: when MVE is implemented, zero VPR here */
3956 static bool trans_NOCP(DisasContext *s, arg_nocp *a)
3959 * Handle M-profile early check for disabled coprocessor:
3960 * all we need to do here is emit the NOCP exception if
3961 * the coprocessor is disabled. Otherwise we return false
3962 * and the real VFP/etc decode will handle the insn.
3964 assert(arm_dc_feature(s, ARM_FEATURE_M));
3969 if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
3970 (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
3971 /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
3976 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3977 syn_uncategorized(), default_exception_el(s));
3981 if (s->fp_excp_el != 0) {
3982 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3983 syn_uncategorized(), s->fp_excp_el);
3990 static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
3992 /* This range needs a coprocessor check for v8.1M and later only */
3993 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3996 return trans_NOCP(s, a);
3999 static bool trans_VINS(DisasContext *s, arg_VINS *a)
4003 if (!dc_isar_feature(aa32_fp16_arith, s)) {
4007 if (s->vec_len != 0 || s->vec_stride != 0) {
4011 if (!vfp_access_check(s)) {
4015 /* Insert low half of Vm into high half of Vd */
4016 rm = tcg_temp_new_i32();
4017 rd = tcg_temp_new_i32();
4018 vfp_load_reg32(rm, a->vm);
4019 vfp_load_reg32(rd, a->vd);
4020 tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
4021 vfp_store_reg32(rd, a->vd);
4022 tcg_temp_free_i32(rm);
4023 tcg_temp_free_i32(rd);
4027 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
4031 if (!dc_isar_feature(aa32_fp16_arith, s)) {
4035 if (s->vec_len != 0 || s->vec_stride != 0) {
4039 if (!vfp_access_check(s)) {
4043 /* Set Vd to high half of Vm */
4044 rm = tcg_temp_new_i32();
4045 vfp_load_reg32(rm, a->vm);
4046 tcg_gen_shri_i32(rm, rm, 16);
4047 vfp_store_reg32(rm, a->vd);
4048 tcg_temp_free_i32(rm);