2 * ARM translation: AArch32 VFP instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2019 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
24 * This file is intended to be included from translate.c; it uses
25 * some macros and definitions provided by that file.
26 * It might be possible to convert it to a standalone .c file eventually.
29 /* Include the generated VFP decoder */
30 #include "decode-vfp.c.inc"
31 #include "decode-vfp-uncond.c.inc"
34 * The imm8 encodes the sign bit, enough bits to represent an exponent in
35 * the range 01....1xx to 10....0xx, and the most significant 4 bits of
36 * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
38 uint64_t vfp_expand_imm(int size, uint8_t imm8)
44 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
45 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
46 extract32(imm8, 0, 6);
50 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
51 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
52 (extract32(imm8, 0, 6) << 3);
56 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
57 (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
58 (extract32(imm8, 0, 6) << 6);
61 g_assert_not_reached();
67 * Return the offset of a 16-bit half of the specified VFP single-precision
68 * register. If top is true, returns the top 16 bits; otherwise the bottom
71 static inline long vfp_f16_offset(unsigned reg, bool top)
73 long offs = vfp_reg_offset(false, reg);
74 #ifdef HOST_WORDS_BIGENDIAN
87 * Generate code for M-profile lazy FP state preservation if needed;
88 * this corresponds to the pseudocode PreserveFPState() function.
90 static void gen_preserve_fp_state(DisasContext *s)
94 * Lazy state saving affects external memory and also the NVIC,
95 * so we must mark it as an IO operation for icount (and cause
96 * this to be the last insn in the TB).
98 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
99 s->base.is_jmp = DISAS_UPDATE_EXIT;
102 gen_helper_v7m_preserve_fp_state(cpu_env);
104 * If the preserve_fp_state helper doesn't throw an exception
105 * then it will clear LSPACT; we don't need to repeat this for
106 * any further FP insns in this TB.
108 s->v7m_lspact = false;
113 * Check that VFP access is enabled. If it is, do the necessary
114 * M-profile lazy-FP handling and then return true.
115 * If not, emit code to generate an appropriate exception and
117 * The ignore_vfp_enabled argument specifies that we should ignore
118 * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
119 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
121 static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
124 /* M-profile handled this earlier, in disas_m_nocp() */
125 assert (!arm_dc_feature(s, ARM_FEATURE_M));
126 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
127 syn_fp_access_trap(1, 0xe, false),
132 if (!s->vfp_enabled && !ignore_vfp_enabled) {
133 assert(!arm_dc_feature(s, ARM_FEATURE_M));
134 unallocated_encoding(s);
138 if (arm_dc_feature(s, ARM_FEATURE_M)) {
139 /* Handle M-profile lazy FP state mechanics */
141 /* Trigger lazy-state preservation if necessary */
142 gen_preserve_fp_state(s);
144 /* Update ownership of FP context: set FPCCR.S to match current state */
145 if (s->v8m_fpccr_s_wrong) {
148 tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
150 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
152 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
154 store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
155 /* Don't need to do this for any further FP insns in this TB */
156 s->v8m_fpccr_s_wrong = false;
159 if (s->v7m_new_fp_ctxt_needed) {
161 * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
164 TCGv_i32 control, fpscr;
165 uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
167 fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
168 gen_helper_vfp_set_fpscr(cpu_env, fpscr);
169 tcg_temp_free_i32(fpscr);
171 * We don't need to arrange to end the TB, because the only
172 * parts of FPSCR which we cache in the TB flags are the VECLEN
173 * and VECSTRIDE, and those don't exist for M-profile.
177 bits |= R_V7M_CONTROL_SFPA_MASK;
179 control = load_cpu_field(v7m.control[M_REG_S]);
180 tcg_gen_ori_i32(control, control, bits);
181 store_cpu_field(control, v7m.control[M_REG_S]);
182 /* Don't need to do this for any further FP insns in this TB */
183 s->v7m_new_fp_ctxt_needed = false;
191 * The most usual kind of VFP access check, for everything except
192 * FMXR/FMRX to the always-available special registers.
194 static bool vfp_access_check(DisasContext *s)
196 return full_vfp_access_check(s, false);
199 static bool trans_VSEL(DisasContext *s, arg_VSEL *a)
204 if (!dc_isar_feature(aa32_vsel, s)) {
208 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
212 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
216 /* UNDEF accesses to D16-D31 if they don't exist */
217 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
218 ((a->vm | a->vn | a->vd) & 0x10)) {
226 if (!vfp_access_check(s)) {
231 TCGv_i64 frn, frm, dest;
232 TCGv_i64 tmp, zero, zf, nf, vf;
234 zero = tcg_const_i64(0);
236 frn = tcg_temp_new_i64();
237 frm = tcg_temp_new_i64();
238 dest = tcg_temp_new_i64();
240 zf = tcg_temp_new_i64();
241 nf = tcg_temp_new_i64();
242 vf = tcg_temp_new_i64();
244 tcg_gen_extu_i32_i64(zf, cpu_ZF);
245 tcg_gen_ext_i32_i64(nf, cpu_NF);
246 tcg_gen_ext_i32_i64(vf, cpu_VF);
248 vfp_load_reg64(frn, rn);
249 vfp_load_reg64(frm, rm);
252 tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
256 tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
259 case 2: /* ge: N == V -> N ^ V == 0 */
260 tmp = tcg_temp_new_i64();
261 tcg_gen_xor_i64(tmp, vf, nf);
262 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
264 tcg_temp_free_i64(tmp);
266 case 3: /* gt: !Z && N == V */
267 tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
269 tmp = tcg_temp_new_i64();
270 tcg_gen_xor_i64(tmp, vf, nf);
271 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
273 tcg_temp_free_i64(tmp);
276 vfp_store_reg64(dest, rd);
277 tcg_temp_free_i64(frn);
278 tcg_temp_free_i64(frm);
279 tcg_temp_free_i64(dest);
281 tcg_temp_free_i64(zf);
282 tcg_temp_free_i64(nf);
283 tcg_temp_free_i64(vf);
285 tcg_temp_free_i64(zero);
287 TCGv_i32 frn, frm, dest;
290 zero = tcg_const_i32(0);
292 frn = tcg_temp_new_i32();
293 frm = tcg_temp_new_i32();
294 dest = tcg_temp_new_i32();
295 vfp_load_reg32(frn, rn);
296 vfp_load_reg32(frm, rm);
299 tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
303 tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
306 case 2: /* ge: N == V -> N ^ V == 0 */
307 tmp = tcg_temp_new_i32();
308 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
309 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
311 tcg_temp_free_i32(tmp);
313 case 3: /* gt: !Z && N == V */
314 tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
316 tmp = tcg_temp_new_i32();
317 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
318 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
320 tcg_temp_free_i32(tmp);
323 /* For fp16 the top half is always zeroes */
325 tcg_gen_andi_i32(dest, dest, 0xffff);
327 vfp_store_reg32(dest, rd);
328 tcg_temp_free_i32(frn);
329 tcg_temp_free_i32(frm);
330 tcg_temp_free_i32(dest);
332 tcg_temp_free_i32(zero);
339 * Table for converting the most common AArch32 encoding of
340 * rounding mode to arm_fprounding order (which matches the
341 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
343 static const uint8_t fp_decode_rm[] = {
350 static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
356 int rounding = fp_decode_rm[a->rm];
358 if (!dc_isar_feature(aa32_vrint, s)) {
362 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
366 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
370 /* UNDEF accesses to D16-D31 if they don't exist */
371 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) &&
372 ((a->vm | a->vd) & 0x10)) {
379 if (!vfp_access_check(s)) {
384 fpst = fpstatus_ptr(FPST_FPCR_F16);
386 fpst = fpstatus_ptr(FPST_FPCR);
389 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
390 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
395 tcg_op = tcg_temp_new_i64();
396 tcg_res = tcg_temp_new_i64();
397 vfp_load_reg64(tcg_op, rm);
398 gen_helper_rintd(tcg_res, tcg_op, fpst);
399 vfp_store_reg64(tcg_res, rd);
400 tcg_temp_free_i64(tcg_op);
401 tcg_temp_free_i64(tcg_res);
405 tcg_op = tcg_temp_new_i32();
406 tcg_res = tcg_temp_new_i32();
407 vfp_load_reg32(tcg_op, rm);
409 gen_helper_rinth(tcg_res, tcg_op, fpst);
411 gen_helper_rints(tcg_res, tcg_op, fpst);
413 vfp_store_reg32(tcg_res, rd);
414 tcg_temp_free_i32(tcg_op);
415 tcg_temp_free_i32(tcg_res);
418 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
419 tcg_temp_free_i32(tcg_rmode);
421 tcg_temp_free_ptr(fpst);
425 static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
430 TCGv_i32 tcg_rmode, tcg_shift;
431 int rounding = fp_decode_rm[a->rm];
432 bool is_signed = a->op;
434 if (!dc_isar_feature(aa32_vcvt_dr, s)) {
438 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) {
442 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) {
446 /* UNDEF accesses to D16-D31 if they don't exist */
447 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
454 if (!vfp_access_check(s)) {
459 fpst = fpstatus_ptr(FPST_FPCR_F16);
461 fpst = fpstatus_ptr(FPST_FPCR);
464 tcg_shift = tcg_const_i32(0);
466 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
467 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
470 TCGv_i64 tcg_double, tcg_res;
472 tcg_double = tcg_temp_new_i64();
473 tcg_res = tcg_temp_new_i64();
474 tcg_tmp = tcg_temp_new_i32();
475 vfp_load_reg64(tcg_double, rm);
477 gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
479 gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
481 tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
482 vfp_store_reg32(tcg_tmp, rd);
483 tcg_temp_free_i32(tcg_tmp);
484 tcg_temp_free_i64(tcg_res);
485 tcg_temp_free_i64(tcg_double);
487 TCGv_i32 tcg_single, tcg_res;
488 tcg_single = tcg_temp_new_i32();
489 tcg_res = tcg_temp_new_i32();
490 vfp_load_reg32(tcg_single, rm);
493 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst);
495 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst);
499 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
501 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
504 vfp_store_reg32(tcg_res, rd);
505 tcg_temp_free_i32(tcg_res);
506 tcg_temp_free_i32(tcg_single);
509 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
510 tcg_temp_free_i32(tcg_rmode);
512 tcg_temp_free_i32(tcg_shift);
514 tcg_temp_free_ptr(fpst);
519 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a)
521 /* VMOV scalar to general purpose register */
524 /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
526 ? !dc_isar_feature(aa32_fpsp_v2, s)
527 : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
531 /* UNDEF accesses to D16-D31 if they don't exist */
532 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
536 if (!vfp_access_check(s)) {
540 tmp = tcg_temp_new_i32();
541 read_neon_element32(tmp, a->vn, a->index, a->size | (a->u ? 0 : MO_SIGN));
542 store_reg(s, a->rt, tmp);
547 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a)
549 /* VMOV general purpose register to scalar */
552 /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
554 ? !dc_isar_feature(aa32_fpsp_v2, s)
555 : !arm_dc_feature(s, ARM_FEATURE_NEON)) {
559 /* UNDEF accesses to D16-D31 if they don't exist */
560 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
564 if (!vfp_access_check(s)) {
568 tmp = load_reg(s, a->rt);
569 write_neon_element32(tmp, a->vn, a->index, a->size);
570 tcg_temp_free_i32(tmp);
575 static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
577 /* VDUP (general purpose register) */
581 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
585 /* UNDEF accesses to D16-D31 if they don't exist */
586 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) {
594 if (a->q && (a->vn & 1)) {
598 vec_size = a->q ? 16 : 8;
607 if (!vfp_access_check(s)) {
611 tmp = load_reg(s, a->rt);
612 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn),
613 vec_size, vec_size, tmp);
614 tcg_temp_free_i32(tmp);
620 * M-profile provides two different sets of instructions that can
621 * access floating point system registers: VMSR/VMRS (which move
622 * to/from a general purpose register) and VLDR/VSTR sysreg (which
623 * move directly to/from memory). In some cases there are also side
624 * effects which must happen after any write to memory (which could
625 * cause an exception). So we implement the common logic for the
626 * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
627 * which take pointers to callback functions which will perform the
628 * actual "read/write general purpose register" and "read/write
629 * memory" operations.
633 * Emit code to store the sysreg to its final destination; frees the
634 * TCG temp 'value' it is passed.
636 typedef void fp_sysreg_storefn(DisasContext *s, void *opaque, TCGv_i32 value);
638 * Emit code to load the value to be copied to the sysreg; returns
639 * a new TCG temporary
641 typedef TCGv_i32 fp_sysreg_loadfn(DisasContext *s, void *opaque);
643 /* Common decode/access checks for fp sysreg read/write */
644 typedef enum FPSysRegCheckResult {
645 FPSysRegCheckFailed, /* caller should return false */
646 FPSysRegCheckDone, /* caller should return true */
647 FPSysRegCheckContinue, /* caller should continue generating code */
648 } FPSysRegCheckResult;
650 static FPSysRegCheckResult fp_sysreg_checks(DisasContext *s, int regno)
652 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
653 return FPSysRegCheckFailed;
658 case QEMU_VFP_FPSCR_NZCV:
660 case ARM_VFP_FPSCR_NZCVQC:
661 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
665 case ARM_VFP_FPCXT_S:
666 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
669 if (!s->v8m_secure) {
674 return FPSysRegCheckFailed;
677 if (!vfp_access_check(s)) {
678 return FPSysRegCheckDone;
681 return FPSysRegCheckContinue;
684 static bool gen_M_fp_sysreg_write(DisasContext *s, int regno,
686 fp_sysreg_loadfn *loadfn,
689 /* Do a write to an M-profile floating point system register */
692 switch (fp_sysreg_checks(s, regno)) {
693 case FPSysRegCheckFailed:
695 case FPSysRegCheckDone:
697 case FPSysRegCheckContinue:
703 tmp = loadfn(s, opaque);
704 gen_helper_vfp_set_fpscr(cpu_env, tmp);
705 tcg_temp_free_i32(tmp);
708 case ARM_VFP_FPSCR_NZCVQC:
711 tmp = loadfn(s, opaque);
713 * TODO: when we implement MVE, write the QC bit.
714 * For non-MVE, QC is RES0.
716 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
717 fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
718 tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
719 tcg_gen_or_i32(fpscr, fpscr, tmp);
720 store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
721 tcg_temp_free_i32(tmp);
724 case ARM_VFP_FPCXT_S:
726 TCGv_i32 sfpa, control, fpscr;
727 /* Set FPSCR[27:0] and CONTROL.SFPA from value */
728 tmp = loadfn(s, opaque);
729 sfpa = tcg_temp_new_i32();
730 tcg_gen_shri_i32(sfpa, tmp, 31);
731 control = load_cpu_field(v7m.control[M_REG_S]);
732 tcg_gen_deposit_i32(control, control, sfpa,
733 R_V7M_CONTROL_SFPA_SHIFT, 1);
734 store_cpu_field(control, v7m.control[M_REG_S]);
735 fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
736 tcg_gen_andi_i32(fpscr, fpscr, FPCR_NZCV_MASK);
737 tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
738 tcg_gen_or_i32(fpscr, fpscr, tmp);
739 store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);
740 tcg_temp_free_i32(tmp);
741 tcg_temp_free_i32(sfpa);
745 g_assert_not_reached();
750 static bool gen_M_fp_sysreg_read(DisasContext *s, int regno,
751 fp_sysreg_storefn *storefn,
754 /* Do a read from an M-profile floating point system register */
757 switch (fp_sysreg_checks(s, regno)) {
758 case FPSysRegCheckFailed:
760 case FPSysRegCheckDone:
762 case FPSysRegCheckContinue:
768 tmp = tcg_temp_new_i32();
769 gen_helper_vfp_get_fpscr(tmp, cpu_env);
770 storefn(s, opaque, tmp);
772 case ARM_VFP_FPSCR_NZCVQC:
774 * TODO: MVE has a QC bit, which we probably won't store
775 * in the xregs[] field. For non-MVE, where QC is RES0,
776 * we can just fall through to the FPSCR_NZCV case.
778 case QEMU_VFP_FPSCR_NZCV:
780 * Read just NZCV; this is a special case to avoid the
781 * helper call for the "VMRS to CPSR.NZCV" insn.
783 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
784 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
785 storefn(s, opaque, tmp);
787 case ARM_VFP_FPCXT_S:
789 TCGv_i32 control, sfpa, fpscr;
790 /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
791 tmp = tcg_temp_new_i32();
792 sfpa = tcg_temp_new_i32();
793 gen_helper_vfp_get_fpscr(tmp, cpu_env);
794 tcg_gen_andi_i32(tmp, tmp, ~FPCR_NZCV_MASK);
795 control = load_cpu_field(v7m.control[M_REG_S]);
796 tcg_gen_andi_i32(sfpa, control, R_V7M_CONTROL_SFPA_MASK);
797 tcg_gen_shli_i32(sfpa, sfpa, 31 - R_V7M_CONTROL_SFPA_SHIFT);
798 tcg_gen_or_i32(tmp, tmp, sfpa);
799 tcg_temp_free_i32(sfpa);
801 * Store result before updating FPSCR etc, in case
802 * it is a memory write which causes an exception.
804 storefn(s, opaque, tmp);
806 * Now we must reset FPSCR from FPDSCR_NS, and clear
807 * CONTROL.SFPA; so we'll end the TB here.
809 tcg_gen_andi_i32(control, control, ~R_V7M_CONTROL_SFPA_MASK);
810 store_cpu_field(control, v7m.control[M_REG_S]);
811 fpscr = load_cpu_field(v7m.fpdscr[M_REG_NS]);
812 gen_helper_vfp_set_fpscr(cpu_env, fpscr);
813 tcg_temp_free_i32(fpscr);
818 g_assert_not_reached();
823 static void fp_sysreg_to_gpr(DisasContext *s, void *opaque, TCGv_i32 value)
825 arg_VMSR_VMRS *a = opaque;
828 /* Set the 4 flag bits in the CPSR */
830 tcg_temp_free_i32(value);
832 store_reg(s, a->rt, value);
836 static TCGv_i32 gpr_to_fp_sysreg(DisasContext *s, void *opaque)
838 arg_VMSR_VMRS *a = opaque;
840 return load_reg(s, a->rt);
843 static bool gen_M_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
846 * Accesses to R15 are UNPREDICTABLE; we choose to undef.
847 * FPSCR -> r15 is a special case which writes to the PSR flags;
848 * set a->reg to a special value to tell gen_M_fp_sysreg_read()
849 * we only care about the top 4 bits of FPSCR there.
852 if (a->l && a->reg == ARM_VFP_FPSCR) {
853 a->reg = QEMU_VFP_FPSCR_NZCV;
860 /* VMRS, move FP system register to gp register */
861 return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_gpr, a);
863 /* VMSR, move gp register to FP system register */
864 return gen_M_fp_sysreg_write(s, a->reg, gpr_to_fp_sysreg, a);
868 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a)
871 bool ignore_vfp_enabled = false;
873 if (arm_dc_feature(s, ARM_FEATURE_M)) {
874 return gen_M_VMSR_VMRS(s, a);
877 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
884 * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
885 * all ID registers to privileged access only.
887 if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) {
890 ignore_vfp_enabled = true;
894 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
897 ignore_vfp_enabled = true;
900 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) {
903 ignore_vfp_enabled = true;
911 ignore_vfp_enabled = true;
914 case ARM_VFP_FPINST2:
915 /* Not present in VFPv3 */
916 if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) {
924 if (!full_vfp_access_check(s, ignore_vfp_enabled)) {
929 /* VMRS, move VFP special register to gp register */
935 if (s->current_el == 1) {
936 TCGv_i32 tcg_reg, tcg_rt;
939 gen_set_pc_im(s, s->pc_curr);
940 tcg_reg = tcg_const_i32(a->reg);
941 tcg_rt = tcg_const_i32(a->rt);
942 gen_helper_check_hcr_el2_trap(cpu_env, tcg_rt, tcg_reg);
943 tcg_temp_free_i32(tcg_reg);
944 tcg_temp_free_i32(tcg_rt);
949 case ARM_VFP_FPINST2:
950 tmp = load_cpu_field(vfp.xregs[a->reg]);
954 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
955 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK);
957 tmp = tcg_temp_new_i32();
958 gen_helper_vfp_get_fpscr(tmp, cpu_env);
962 g_assert_not_reached();
966 /* Set the 4 flag bits in the CPSR. */
968 tcg_temp_free_i32(tmp);
970 store_reg(s, a->rt, tmp);
973 /* VMSR, move gp register to VFP special register */
979 /* Writes are ignored. */
982 tmp = load_reg(s, a->rt);
983 gen_helper_vfp_set_fpscr(cpu_env, tmp);
984 tcg_temp_free_i32(tmp);
989 * TODO: VFP subarchitecture support.
990 * For now, keep the EN bit only
992 tmp = load_reg(s, a->rt);
993 tcg_gen_andi_i32(tmp, tmp, 1 << 30);
994 store_cpu_field(tmp, vfp.xregs[a->reg]);
998 case ARM_VFP_FPINST2:
999 tmp = load_reg(s, a->rt);
1000 store_cpu_field(tmp, vfp.xregs[a->reg]);
1003 g_assert_not_reached();
1010 static void fp_sysreg_to_memory(DisasContext *s, void *opaque, TCGv_i32 value)
1012 arg_vldr_sysreg *a = opaque;
1013 uint32_t offset = a->imm;
1020 addr = load_reg(s, a->rn);
1022 tcg_gen_addi_i32(addr, addr, offset);
1025 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1026 gen_helper_v8m_stackcheck(cpu_env, addr);
1029 gen_aa32_st_i32(s, value, addr, get_mem_index(s),
1030 MO_UL | MO_ALIGN | s->be_data);
1031 tcg_temp_free_i32(value);
1036 tcg_gen_addi_i32(addr, addr, offset);
1038 store_reg(s, a->rn, addr);
1040 tcg_temp_free_i32(addr);
1044 static TCGv_i32 memory_to_fp_sysreg(DisasContext *s, void *opaque)
1046 arg_vldr_sysreg *a = opaque;
1047 uint32_t offset = a->imm;
1049 TCGv_i32 value = tcg_temp_new_i32();
1055 addr = load_reg(s, a->rn);
1057 tcg_gen_addi_i32(addr, addr, offset);
1060 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1061 gen_helper_v8m_stackcheck(cpu_env, addr);
1064 gen_aa32_ld_i32(s, value, addr, get_mem_index(s),
1065 MO_UL | MO_ALIGN | s->be_data);
1070 tcg_gen_addi_i32(addr, addr, offset);
1072 store_reg(s, a->rn, addr);
1074 tcg_temp_free_i32(addr);
1079 static bool trans_VLDR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1081 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1087 return gen_M_fp_sysreg_write(s, a->reg, memory_to_fp_sysreg, a);
1090 static bool trans_VSTR_sysreg(DisasContext *s, arg_vldr_sysreg *a)
1092 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
1098 return gen_M_fp_sysreg_read(s, a->reg, fp_sysreg_to_memory, a);
1101 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a)
1105 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1110 /* UNPREDICTABLE; we choose to UNDEF */
1114 if (!vfp_access_check(s)) {
1119 /* VFP to general purpose register */
1120 tmp = tcg_temp_new_i32();
1121 vfp_load_reg32(tmp, a->vn);
1122 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1123 store_reg(s, a->rt, tmp);
1125 /* general purpose register to VFP */
1126 tmp = load_reg(s, a->rt);
1127 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1128 vfp_store_reg32(tmp, a->vn);
1129 tcg_temp_free_i32(tmp);
1135 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a)
1139 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1143 if (!vfp_access_check(s)) {
1148 /* VFP to general purpose register */
1149 tmp = tcg_temp_new_i32();
1150 vfp_load_reg32(tmp, a->vn);
1152 /* Set the 4 flag bits in the CPSR. */
1154 tcg_temp_free_i32(tmp);
1156 store_reg(s, a->rt, tmp);
1159 /* general purpose register to VFP */
1160 tmp = load_reg(s, a->rt);
1161 vfp_store_reg32(tmp, a->vn);
1162 tcg_temp_free_i32(tmp);
1168 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a)
1172 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1177 * VMOV between two general-purpose registers and two single precision
1178 * floating point registers
1180 if (!vfp_access_check(s)) {
1185 /* fpreg to gpreg */
1186 tmp = tcg_temp_new_i32();
1187 vfp_load_reg32(tmp, a->vm);
1188 store_reg(s, a->rt, tmp);
1189 tmp = tcg_temp_new_i32();
1190 vfp_load_reg32(tmp, a->vm + 1);
1191 store_reg(s, a->rt2, tmp);
1193 /* gpreg to fpreg */
1194 tmp = load_reg(s, a->rt);
1195 vfp_store_reg32(tmp, a->vm);
1196 tcg_temp_free_i32(tmp);
1197 tmp = load_reg(s, a->rt2);
1198 vfp_store_reg32(tmp, a->vm + 1);
1199 tcg_temp_free_i32(tmp);
1205 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a)
1210 * VMOV between two general-purpose registers and one double precision
1211 * floating point register. Note that this does not require support
1212 * for double precision arithmetic.
1214 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1218 /* UNDEF accesses to D16-D31 if they don't exist */
1219 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
1223 if (!vfp_access_check(s)) {
1228 /* fpreg to gpreg */
1229 tmp = tcg_temp_new_i32();
1230 vfp_load_reg32(tmp, a->vm * 2);
1231 store_reg(s, a->rt, tmp);
1232 tmp = tcg_temp_new_i32();
1233 vfp_load_reg32(tmp, a->vm * 2 + 1);
1234 store_reg(s, a->rt2, tmp);
1236 /* gpreg to fpreg */
1237 tmp = load_reg(s, a->rt);
1238 vfp_store_reg32(tmp, a->vm * 2);
1239 tcg_temp_free_i32(tmp);
1240 tmp = load_reg(s, a->rt2);
1241 vfp_store_reg32(tmp, a->vm * 2 + 1);
1242 tcg_temp_free_i32(tmp);
1248 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1253 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1257 if (!vfp_access_check(s)) {
1261 /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1262 offset = a->imm << 1;
1267 /* For thumb, use of PC is UNPREDICTABLE. */
1268 addr = add_reg_for_lit(s, a->rn, offset);
1269 tmp = tcg_temp_new_i32();
1271 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1272 vfp_store_reg32(tmp, a->vd);
1274 vfp_load_reg32(tmp, a->vd);
1275 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1277 tcg_temp_free_i32(tmp);
1278 tcg_temp_free_i32(addr);
1283 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a)
1288 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1292 if (!vfp_access_check(s)) {
1296 offset = a->imm << 2;
1301 /* For thumb, use of PC is UNPREDICTABLE. */
1302 addr = add_reg_for_lit(s, a->rn, offset);
1303 tmp = tcg_temp_new_i32();
1305 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1306 vfp_store_reg32(tmp, a->vd);
1308 vfp_load_reg32(tmp, a->vd);
1309 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1311 tcg_temp_free_i32(tmp);
1312 tcg_temp_free_i32(addr);
1317 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a)
1323 /* Note that this does not require support for double arithmetic. */
1324 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1328 /* UNDEF accesses to D16-D31 if they don't exist */
1329 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
1333 if (!vfp_access_check(s)) {
1337 offset = a->imm << 2;
1342 /* For thumb, use of PC is UNPREDICTABLE. */
1343 addr = add_reg_for_lit(s, a->rn, offset);
1344 tmp = tcg_temp_new_i64();
1346 gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1347 vfp_store_reg64(tmp, a->vd);
1349 vfp_load_reg64(tmp, a->vd);
1350 gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1352 tcg_temp_free_i64(tmp);
1353 tcg_temp_free_i32(addr);
1358 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
1364 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1370 if (n == 0 || (a->vd + n) > 32) {
1372 * UNPREDICTABLE cases for bad immediates: we choose to
1373 * UNDEF to avoid generating huge numbers of TCG ops
1377 if (a->rn == 15 && a->w) {
1378 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1382 if (!vfp_access_check(s)) {
1386 /* For thumb, use of PC is UNPREDICTABLE. */
1387 addr = add_reg_for_lit(s, a->rn, 0);
1390 tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1393 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1395 * Here 'addr' is the lowest address we will store to,
1396 * and is either the old SP (if post-increment) or
1397 * the new SP (if pre-decrement). For post-increment
1398 * where the old value is below the limit and the new
1399 * value is above, it is UNKNOWN whether the limit check
1400 * triggers; we choose to trigger.
1402 gen_helper_v8m_stackcheck(cpu_env, addr);
1406 tmp = tcg_temp_new_i32();
1407 for (i = 0; i < n; i++) {
1410 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1411 vfp_store_reg32(tmp, a->vd + i);
1414 vfp_load_reg32(tmp, a->vd + i);
1415 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1417 tcg_gen_addi_i32(addr, addr, offset);
1419 tcg_temp_free_i32(tmp);
1423 offset = -offset * n;
1424 tcg_gen_addi_i32(addr, addr, offset);
1426 store_reg(s, a->rn, addr);
1428 tcg_temp_free_i32(addr);
1434 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
1441 /* Note that this does not require support for double arithmetic. */
1442 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1448 if (n == 0 || (a->vd + n) > 32 || n > 16) {
1450 * UNPREDICTABLE cases for bad immediates: we choose to
1451 * UNDEF to avoid generating huge numbers of TCG ops
1455 if (a->rn == 15 && a->w) {
1456 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1460 /* UNDEF accesses to D16-D31 if they don't exist */
1461 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) {
1465 if (!vfp_access_check(s)) {
1469 /* For thumb, use of PC is UNPREDICTABLE. */
1470 addr = add_reg_for_lit(s, a->rn, 0);
1473 tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
1476 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
1478 * Here 'addr' is the lowest address we will store to,
1479 * and is either the old SP (if post-increment) or
1480 * the new SP (if pre-decrement). For post-increment
1481 * where the old value is below the limit and the new
1482 * value is above, it is UNKNOWN whether the limit check
1483 * triggers; we choose to trigger.
1485 gen_helper_v8m_stackcheck(cpu_env, addr);
1489 tmp = tcg_temp_new_i64();
1490 for (i = 0; i < n; i++) {
1493 gen_aa32_ld64(s, tmp, addr, get_mem_index(s));
1494 vfp_store_reg64(tmp, a->vd + i);
1497 vfp_load_reg64(tmp, a->vd + i);
1498 gen_aa32_st64(s, tmp, addr, get_mem_index(s));
1500 tcg_gen_addi_i32(addr, addr, offset);
1502 tcg_temp_free_i64(tmp);
1506 offset = -offset * n;
1507 } else if (a->imm & 1) {
1514 tcg_gen_addi_i32(addr, addr, offset);
1516 store_reg(s, a->rn, addr);
1518 tcg_temp_free_i32(addr);
1525 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1526 * The callback should emit code to write a value to vd. If
1527 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1528 * will contain the old value of the relevant VFP register;
1529 * otherwise it must be written to only.
1531 typedef void VFPGen3OpSPFn(TCGv_i32 vd,
1532 TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst);
1533 typedef void VFPGen3OpDPFn(TCGv_i64 vd,
1534 TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
1537 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1538 * The callback should emit code to write a value to vd (which
1539 * should be written to only).
1541 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
1542 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);
1545 * Return true if the specified S reg is in a scalar bank
1546 * (ie if it is s0..s7)
1548 static inline bool vfp_sreg_is_scalar(int reg)
1550 return (reg & 0x18) == 0;
1554 * Return true if the specified D reg is in a scalar bank
1555 * (ie if it is d0..d3 or d16..d19)
1557 static inline bool vfp_dreg_is_scalar(int reg)
1559 return (reg & 0xc) == 0;
1563 * Advance the S reg number forwards by delta within its bank
1564 * (ie increment the low 3 bits but leave the rest the same)
1566 static inline int vfp_advance_sreg(int reg, int delta)
1568 return ((reg + delta) & 0x7) | (reg & ~0x7);
1572 * Advance the D reg number forwards by delta within its bank
1573 * (ie increment the low 2 bits but leave the rest the same)
1575 static inline int vfp_advance_dreg(int reg, int delta)
1577 return ((reg + delta) & 0x3) | (reg & ~0x3);
1581 * Perform a 3-operand VFP data processing instruction. fn is the
1582 * callback to do the actual operation; this function deals with the
1583 * code to handle looping around for VFP vector processing.
1585 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
1586 int vd, int vn, int vm, bool reads_vd)
1588 uint32_t delta_m = 0;
1589 uint32_t delta_d = 0;
1590 int veclen = s->vec_len;
1591 TCGv_i32 f0, f1, fd;
1594 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1598 if (!dc_isar_feature(aa32_fpshvec, s) &&
1599 (veclen != 0 || s->vec_stride != 0)) {
1603 if (!vfp_access_check(s)) {
1608 /* Figure out what type of vector operation this is. */
1609 if (vfp_sreg_is_scalar(vd)) {
1613 delta_d = s->vec_stride + 1;
1615 if (vfp_sreg_is_scalar(vm)) {
1616 /* mixed scalar/vector */
1625 f0 = tcg_temp_new_i32();
1626 f1 = tcg_temp_new_i32();
1627 fd = tcg_temp_new_i32();
1628 fpst = fpstatus_ptr(FPST_FPCR);
1630 vfp_load_reg32(f0, vn);
1631 vfp_load_reg32(f1, vm);
1635 vfp_load_reg32(fd, vd);
1637 fn(fd, f0, f1, fpst);
1638 vfp_store_reg32(fd, vd);
1644 /* Set up the operands for the next iteration */
1646 vd = vfp_advance_sreg(vd, delta_d);
1647 vn = vfp_advance_sreg(vn, delta_d);
1648 vfp_load_reg32(f0, vn);
1650 vm = vfp_advance_sreg(vm, delta_m);
1651 vfp_load_reg32(f1, vm);
1655 tcg_temp_free_i32(f0);
1656 tcg_temp_free_i32(f1);
1657 tcg_temp_free_i32(fd);
1658 tcg_temp_free_ptr(fpst);
1663 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
1664 int vd, int vn, int vm, bool reads_vd)
1667 * Do a half-precision operation. Functionally this is
1668 * the same as do_vfp_3op_sp(), except:
1669 * - it uses the FPST_FPCR_F16
1670 * - it doesn't need the VFP vector handling (fp16 is a
1671 * v8 feature, and in v8 VFP vectors don't exist)
1672 * - it does the aa32_fp16_arith feature test
1674 TCGv_i32 f0, f1, fd;
1677 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1681 if (s->vec_len != 0 || s->vec_stride != 0) {
1685 if (!vfp_access_check(s)) {
1689 f0 = tcg_temp_new_i32();
1690 f1 = tcg_temp_new_i32();
1691 fd = tcg_temp_new_i32();
1692 fpst = fpstatus_ptr(FPST_FPCR_F16);
1694 vfp_load_reg32(f0, vn);
1695 vfp_load_reg32(f1, vm);
1698 vfp_load_reg32(fd, vd);
1700 fn(fd, f0, f1, fpst);
1701 vfp_store_reg32(fd, vd);
1703 tcg_temp_free_i32(f0);
1704 tcg_temp_free_i32(f1);
1705 tcg_temp_free_i32(fd);
1706 tcg_temp_free_ptr(fpst);
1711 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
1712 int vd, int vn, int vm, bool reads_vd)
1714 uint32_t delta_m = 0;
1715 uint32_t delta_d = 0;
1716 int veclen = s->vec_len;
1717 TCGv_i64 f0, f1, fd;
1720 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1724 /* UNDEF accesses to D16-D31 if they don't exist */
1725 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) {
1729 if (!dc_isar_feature(aa32_fpshvec, s) &&
1730 (veclen != 0 || s->vec_stride != 0)) {
1734 if (!vfp_access_check(s)) {
1739 /* Figure out what type of vector operation this is. */
1740 if (vfp_dreg_is_scalar(vd)) {
1744 delta_d = (s->vec_stride >> 1) + 1;
1746 if (vfp_dreg_is_scalar(vm)) {
1747 /* mixed scalar/vector */
1756 f0 = tcg_temp_new_i64();
1757 f1 = tcg_temp_new_i64();
1758 fd = tcg_temp_new_i64();
1759 fpst = fpstatus_ptr(FPST_FPCR);
1761 vfp_load_reg64(f0, vn);
1762 vfp_load_reg64(f1, vm);
1766 vfp_load_reg64(fd, vd);
1768 fn(fd, f0, f1, fpst);
1769 vfp_store_reg64(fd, vd);
1774 /* Set up the operands for the next iteration */
1776 vd = vfp_advance_dreg(vd, delta_d);
1777 vn = vfp_advance_dreg(vn, delta_d);
1778 vfp_load_reg64(f0, vn);
1780 vm = vfp_advance_dreg(vm, delta_m);
1781 vfp_load_reg64(f1, vm);
1785 tcg_temp_free_i64(f0);
1786 tcg_temp_free_i64(f1);
1787 tcg_temp_free_i64(fd);
1788 tcg_temp_free_ptr(fpst);
1793 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1795 uint32_t delta_m = 0;
1796 uint32_t delta_d = 0;
1797 int veclen = s->vec_len;
1800 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
1804 if (!dc_isar_feature(aa32_fpshvec, s) &&
1805 (veclen != 0 || s->vec_stride != 0)) {
1809 if (!vfp_access_check(s)) {
1814 /* Figure out what type of vector operation this is. */
1815 if (vfp_sreg_is_scalar(vd)) {
1819 delta_d = s->vec_stride + 1;
1821 if (vfp_sreg_is_scalar(vm)) {
1822 /* mixed scalar/vector */
1831 f0 = tcg_temp_new_i32();
1832 fd = tcg_temp_new_i32();
1834 vfp_load_reg32(f0, vm);
1838 vfp_store_reg32(fd, vd);
1845 /* single source one-many */
1847 vd = vfp_advance_sreg(vd, delta_d);
1848 vfp_store_reg32(fd, vd);
1853 /* Set up the operands for the next iteration */
1855 vd = vfp_advance_sreg(vd, delta_d);
1856 vm = vfp_advance_sreg(vm, delta_m);
1857 vfp_load_reg32(f0, vm);
1860 tcg_temp_free_i32(f0);
1861 tcg_temp_free_i32(fd);
1866 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
1869 * Do a half-precision operation. Functionally this is
1870 * the same as do_vfp_2op_sp(), except:
1871 * - it doesn't need the VFP vector handling (fp16 is a
1872 * v8 feature, and in v8 VFP vectors don't exist)
1873 * - it does the aa32_fp16_arith feature test
1877 if (!dc_isar_feature(aa32_fp16_arith, s)) {
1881 if (s->vec_len != 0 || s->vec_stride != 0) {
1885 if (!vfp_access_check(s)) {
1889 f0 = tcg_temp_new_i32();
1890 vfp_load_reg32(f0, vm);
1892 vfp_store_reg32(f0, vd);
1893 tcg_temp_free_i32(f0);
1898 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
1900 uint32_t delta_m = 0;
1901 uint32_t delta_d = 0;
1902 int veclen = s->vec_len;
1905 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
1909 /* UNDEF accesses to D16-D31 if they don't exist */
1910 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) {
1914 if (!dc_isar_feature(aa32_fpshvec, s) &&
1915 (veclen != 0 || s->vec_stride != 0)) {
1919 if (!vfp_access_check(s)) {
1924 /* Figure out what type of vector operation this is. */
1925 if (vfp_dreg_is_scalar(vd)) {
1929 delta_d = (s->vec_stride >> 1) + 1;
1931 if (vfp_dreg_is_scalar(vm)) {
1932 /* mixed scalar/vector */
1941 f0 = tcg_temp_new_i64();
1942 fd = tcg_temp_new_i64();
1944 vfp_load_reg64(f0, vm);
1948 vfp_store_reg64(fd, vd);
1955 /* single source one-many */
1957 vd = vfp_advance_dreg(vd, delta_d);
1958 vfp_store_reg64(fd, vd);
1963 /* Set up the operands for the next iteration */
1965 vd = vfp_advance_dreg(vd, delta_d);
1966 vd = vfp_advance_dreg(vm, delta_m);
1967 vfp_load_reg64(f0, vm);
1970 tcg_temp_free_i64(f0);
1971 tcg_temp_free_i64(fd);
1976 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1978 /* Note that order of inputs to the add matters for NaNs */
1979 TCGv_i32 tmp = tcg_temp_new_i32();
1981 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
1982 gen_helper_vfp_addh(vd, vd, tmp, fpst);
1983 tcg_temp_free_i32(tmp);
1986 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
1988 return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
1991 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
1993 /* Note that order of inputs to the add matters for NaNs */
1994 TCGv_i32 tmp = tcg_temp_new_i32();
1996 gen_helper_vfp_muls(tmp, vn, vm, fpst);
1997 gen_helper_vfp_adds(vd, vd, tmp, fpst);
1998 tcg_temp_free_i32(tmp);
2001 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a)
2003 return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true);
2006 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2008 /* Note that order of inputs to the add matters for NaNs */
2009 TCGv_i64 tmp = tcg_temp_new_i64();
2011 gen_helper_vfp_muld(tmp, vn, vm, fpst);
2012 gen_helper_vfp_addd(vd, vd, tmp, fpst);
2013 tcg_temp_free_i64(tmp);
2016 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a)
2018 return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
2021 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2024 * VMLS: vd = vd + -(vn * vm)
2025 * Note that order of inputs to the add matters for NaNs.
2027 TCGv_i32 tmp = tcg_temp_new_i32();
2029 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2030 gen_helper_vfp_negh(tmp, tmp);
2031 gen_helper_vfp_addh(vd, vd, tmp, fpst);
2032 tcg_temp_free_i32(tmp);
2035 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
2037 return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
2040 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2043 * VMLS: vd = vd + -(vn * vm)
2044 * Note that order of inputs to the add matters for NaNs.
2046 TCGv_i32 tmp = tcg_temp_new_i32();
2048 gen_helper_vfp_muls(tmp, vn, vm, fpst);
2049 gen_helper_vfp_negs(tmp, tmp);
2050 gen_helper_vfp_adds(vd, vd, tmp, fpst);
2051 tcg_temp_free_i32(tmp);
2054 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a)
2056 return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true);
2059 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2062 * VMLS: vd = vd + -(vn * vm)
2063 * Note that order of inputs to the add matters for NaNs.
2065 TCGv_i64 tmp = tcg_temp_new_i64();
2067 gen_helper_vfp_muld(tmp, vn, vm, fpst);
2068 gen_helper_vfp_negd(tmp, tmp);
2069 gen_helper_vfp_addd(vd, vd, tmp, fpst);
2070 tcg_temp_free_i64(tmp);
2073 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a)
2075 return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
2078 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2081 * VNMLS: -fd + (fn * fm)
2082 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2083 * plausible looking simplifications because this will give wrong results
2086 TCGv_i32 tmp = tcg_temp_new_i32();
2088 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2089 gen_helper_vfp_negh(vd, vd);
2090 gen_helper_vfp_addh(vd, vd, tmp, fpst);
2091 tcg_temp_free_i32(tmp);
2094 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
2096 return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
2099 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2102 * VNMLS: -fd + (fn * fm)
2103 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2104 * plausible looking simplifications because this will give wrong results
2107 TCGv_i32 tmp = tcg_temp_new_i32();
2109 gen_helper_vfp_muls(tmp, vn, vm, fpst);
2110 gen_helper_vfp_negs(vd, vd);
2111 gen_helper_vfp_adds(vd, vd, tmp, fpst);
2112 tcg_temp_free_i32(tmp);
2115 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a)
2117 return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true);
2120 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2123 * VNMLS: -fd + (fn * fm)
2124 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2125 * plausible looking simplifications because this will give wrong results
2128 TCGv_i64 tmp = tcg_temp_new_i64();
2130 gen_helper_vfp_muld(tmp, vn, vm, fpst);
2131 gen_helper_vfp_negd(vd, vd);
2132 gen_helper_vfp_addd(vd, vd, tmp, fpst);
2133 tcg_temp_free_i64(tmp);
2136 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a)
2138 return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
2141 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2143 /* VNMLA: -fd + -(fn * fm) */
2144 TCGv_i32 tmp = tcg_temp_new_i32();
2146 gen_helper_vfp_mulh(tmp, vn, vm, fpst);
2147 gen_helper_vfp_negh(tmp, tmp);
2148 gen_helper_vfp_negh(vd, vd);
2149 gen_helper_vfp_addh(vd, vd, tmp, fpst);
2150 tcg_temp_free_i32(tmp);
2153 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
2155 return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
2158 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2160 /* VNMLA: -fd + -(fn * fm) */
2161 TCGv_i32 tmp = tcg_temp_new_i32();
2163 gen_helper_vfp_muls(tmp, vn, vm, fpst);
2164 gen_helper_vfp_negs(tmp, tmp);
2165 gen_helper_vfp_negs(vd, vd);
2166 gen_helper_vfp_adds(vd, vd, tmp, fpst);
2167 tcg_temp_free_i32(tmp);
2170 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a)
2172 return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true);
2175 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2177 /* VNMLA: -fd + (fn * fm) */
2178 TCGv_i64 tmp = tcg_temp_new_i64();
2180 gen_helper_vfp_muld(tmp, vn, vm, fpst);
2181 gen_helper_vfp_negd(tmp, tmp);
2182 gen_helper_vfp_negd(vd, vd);
2183 gen_helper_vfp_addd(vd, vd, tmp, fpst);
2184 tcg_temp_free_i64(tmp);
2187 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a)
2189 return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true);
2192 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a)
2194 return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false);
2197 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a)
2199 return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false);
2202 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a)
2204 return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
2207 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2209 /* VNMUL: -(fn * fm) */
2210 gen_helper_vfp_mulh(vd, vn, vm, fpst);
2211 gen_helper_vfp_negh(vd, vd);
2214 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
2216 return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
2219 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
2221 /* VNMUL: -(fn * fm) */
2222 gen_helper_vfp_muls(vd, vn, vm, fpst);
2223 gen_helper_vfp_negs(vd, vd);
2226 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a)
2228 return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false);
2231 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst)
2233 /* VNMUL: -(fn * fm) */
2234 gen_helper_vfp_muld(vd, vn, vm, fpst);
2235 gen_helper_vfp_negd(vd, vd);
2238 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a)
2240 return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false);
2243 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a)
2245 return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false);
2248 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a)
2250 return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false);
2253 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a)
2255 return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false);
2258 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a)
2260 return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false);
2263 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a)
2265 return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false);
2268 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a)
2270 return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false);
2273 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a)
2275 return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false);
2278 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a)
2280 return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false);
2283 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a)
2285 return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false);
2288 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a)
2290 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2293 return do_vfp_3op_hp(s, gen_helper_vfp_minnumh,
2294 a->vd, a->vn, a->vm, false);
2297 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a)
2299 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2302 return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh,
2303 a->vd, a->vn, a->vm, false);
2306 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a)
2308 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2311 return do_vfp_3op_sp(s, gen_helper_vfp_minnums,
2312 a->vd, a->vn, a->vm, false);
2315 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a)
2317 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2320 return do_vfp_3op_sp(s, gen_helper_vfp_maxnums,
2321 a->vd, a->vn, a->vm, false);
2324 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a)
2326 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2329 return do_vfp_3op_dp(s, gen_helper_vfp_minnumd,
2330 a->vd, a->vn, a->vm, false);
2333 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
2335 if (!dc_isar_feature(aa32_vminmaxnm, s)) {
2338 return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd,
2339 a->vd, a->vn, a->vm, false);
2342 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2345 * VFNMA : fd = muladd(-fd, fn, fm)
2346 * VFNMS : fd = muladd(-fd, -fn, fm)
2347 * VFMA : fd = muladd( fd, fn, fm)
2348 * VFMS : fd = muladd( fd, -fn, fm)
2350 * These are fused multiply-add, and must be done as one floating
2351 * point operation with no rounding between the multiplication and
2352 * addition steps. NB that doing the negations here as separate
2353 * steps is correct : an input NaN should come out with its sign
2354 * bit flipped if it is a negated-input.
2357 TCGv_i32 vn, vm, vd;
2360 * Present in VFPv4 only, and only with the FP16 extension.
2361 * Note that we can't rely on the SIMDFMAC check alone, because
2362 * in a Neon-no-VFP core that ID register field will be non-zero.
2364 if (!dc_isar_feature(aa32_fp16_arith, s) ||
2365 !dc_isar_feature(aa32_simdfmac, s) ||
2366 !dc_isar_feature(aa32_fpsp_v2, s)) {
2370 if (s->vec_len != 0 || s->vec_stride != 0) {
2374 if (!vfp_access_check(s)) {
2378 vn = tcg_temp_new_i32();
2379 vm = tcg_temp_new_i32();
2380 vd = tcg_temp_new_i32();
2382 vfp_load_reg32(vn, a->vn);
2383 vfp_load_reg32(vm, a->vm);
2386 gen_helper_vfp_negh(vn, vn);
2388 vfp_load_reg32(vd, a->vd);
2391 gen_helper_vfp_negh(vd, vd);
2393 fpst = fpstatus_ptr(FPST_FPCR_F16);
2394 gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
2395 vfp_store_reg32(vd, a->vd);
2397 tcg_temp_free_ptr(fpst);
2398 tcg_temp_free_i32(vn);
2399 tcg_temp_free_i32(vm);
2400 tcg_temp_free_i32(vd);
2405 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
2408 * VFNMA : fd = muladd(-fd, fn, fm)
2409 * VFNMS : fd = muladd(-fd, -fn, fm)
2410 * VFMA : fd = muladd( fd, fn, fm)
2411 * VFMS : fd = muladd( fd, -fn, fm)
2413 * These are fused multiply-add, and must be done as one floating
2414 * point operation with no rounding between the multiplication and
2415 * addition steps. NB that doing the negations here as separate
2416 * steps is correct : an input NaN should come out with its sign
2417 * bit flipped if it is a negated-input.
2420 TCGv_i32 vn, vm, vd;
2423 * Present in VFPv4 only.
2424 * Note that we can't rely on the SIMDFMAC check alone, because
2425 * in a Neon-no-VFP core that ID register field will be non-zero.
2427 if (!dc_isar_feature(aa32_simdfmac, s) ||
2428 !dc_isar_feature(aa32_fpsp_v2, s)) {
2432 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2433 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2435 if (s->vec_len != 0 || s->vec_stride != 0) {
2439 if (!vfp_access_check(s)) {
2443 vn = tcg_temp_new_i32();
2444 vm = tcg_temp_new_i32();
2445 vd = tcg_temp_new_i32();
2447 vfp_load_reg32(vn, a->vn);
2448 vfp_load_reg32(vm, a->vm);
2451 gen_helper_vfp_negs(vn, vn);
2453 vfp_load_reg32(vd, a->vd);
2456 gen_helper_vfp_negs(vd, vd);
2458 fpst = fpstatus_ptr(FPST_FPCR);
2459 gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
2460 vfp_store_reg32(vd, a->vd);
2462 tcg_temp_free_ptr(fpst);
2463 tcg_temp_free_i32(vn);
2464 tcg_temp_free_i32(vm);
2465 tcg_temp_free_i32(vd);
2470 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
2473 * VFNMA : fd = muladd(-fd, fn, fm)
2474 * VFNMS : fd = muladd(-fd, -fn, fm)
2475 * VFMA : fd = muladd( fd, fn, fm)
2476 * VFMS : fd = muladd( fd, -fn, fm)
2478 * These are fused multiply-add, and must be done as one floating
2479 * point operation with no rounding between the multiplication and
2480 * addition steps. NB that doing the negations here as separate
2481 * steps is correct : an input NaN should come out with its sign
2482 * bit flipped if it is a negated-input.
2485 TCGv_i64 vn, vm, vd;
2488 * Present in VFPv4 only.
2489 * Note that we can't rely on the SIMDFMAC check alone, because
2490 * in a Neon-no-VFP core that ID register field will be non-zero.
2492 if (!dc_isar_feature(aa32_simdfmac, s) ||
2493 !dc_isar_feature(aa32_fpdp_v2, s)) {
2497 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2498 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2500 if (s->vec_len != 0 || s->vec_stride != 0) {
2504 /* UNDEF accesses to D16-D31 if they don't exist. */
2505 if (!dc_isar_feature(aa32_simd_r32, s) &&
2506 ((a->vd | a->vn | a->vm) & 0x10)) {
2510 if (!vfp_access_check(s)) {
2514 vn = tcg_temp_new_i64();
2515 vm = tcg_temp_new_i64();
2516 vd = tcg_temp_new_i64();
2518 vfp_load_reg64(vn, a->vn);
2519 vfp_load_reg64(vm, a->vm);
2522 gen_helper_vfp_negd(vn, vn);
2524 vfp_load_reg64(vd, a->vd);
2527 gen_helper_vfp_negd(vd, vd);
2529 fpst = fpstatus_ptr(FPST_FPCR);
2530 gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
2531 vfp_store_reg64(vd, a->vd);
2533 tcg_temp_free_ptr(fpst);
2534 tcg_temp_free_i64(vn);
2535 tcg_temp_free_i64(vm);
2536 tcg_temp_free_i64(vd);
2541 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \
2542 static bool trans_##INSN##_##PREC(DisasContext *s, \
2543 arg_##INSN##_##PREC *a) \
2545 return do_vfm_##PREC(s, a, NEGN, NEGD); \
2548 #define MAKE_VFM_TRANS_FNS(PREC) \
2549 MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2550 MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2551 MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2552 MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2554 MAKE_VFM_TRANS_FNS(hp)
2555 MAKE_VFM_TRANS_FNS(sp)
2556 MAKE_VFM_TRANS_FNS(dp)
2558 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a)
2562 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2566 if (s->vec_len != 0 || s->vec_stride != 0) {
2570 if (!vfp_access_check(s)) {
2574 fd = tcg_const_i32(vfp_expand_imm(MO_16, a->imm));
2575 vfp_store_reg32(fd, a->vd);
2576 tcg_temp_free_i32(fd);
2580 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
2582 uint32_t delta_d = 0;
2583 int veclen = s->vec_len;
2589 if (!dc_isar_feature(aa32_fpsp_v3, s)) {
2593 if (!dc_isar_feature(aa32_fpshvec, s) &&
2594 (veclen != 0 || s->vec_stride != 0)) {
2598 if (!vfp_access_check(s)) {
2603 /* Figure out what type of vector operation this is. */
2604 if (vfp_sreg_is_scalar(vd)) {
2608 delta_d = s->vec_stride + 1;
2612 fd = tcg_const_i32(vfp_expand_imm(MO_32, a->imm));
2615 vfp_store_reg32(fd, vd);
2621 /* Set up the operands for the next iteration */
2623 vd = vfp_advance_sreg(vd, delta_d);
2626 tcg_temp_free_i32(fd);
2630 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
2632 uint32_t delta_d = 0;
2633 int veclen = s->vec_len;
2639 if (!dc_isar_feature(aa32_fpdp_v3, s)) {
2643 /* UNDEF accesses to D16-D31 if they don't exist. */
2644 if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) {
2648 if (!dc_isar_feature(aa32_fpshvec, s) &&
2649 (veclen != 0 || s->vec_stride != 0)) {
2653 if (!vfp_access_check(s)) {
2658 /* Figure out what type of vector operation this is. */
2659 if (vfp_dreg_is_scalar(vd)) {
2663 delta_d = (s->vec_stride >> 1) + 1;
2667 fd = tcg_const_i64(vfp_expand_imm(MO_64, a->imm));
2670 vfp_store_reg64(fd, vd);
2676 /* Set up the operands for the next iteration */
2678 vd = vfp_advance_dreg(vd, delta_d);
2681 tcg_temp_free_i64(fd);
2685 #define DO_VFP_2OP(INSN, PREC, FN) \
2686 static bool trans_##INSN##_##PREC(DisasContext *s, \
2687 arg_##INSN##_##PREC *a) \
2689 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
2692 DO_VFP_2OP(VMOV_reg, sp, tcg_gen_mov_i32)
2693 DO_VFP_2OP(VMOV_reg, dp, tcg_gen_mov_i64)
2695 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh)
2696 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss)
2697 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd)
2699 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh)
2700 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs)
2701 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd)
2703 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
2705 gen_helper_vfp_sqrth(vd, vm, cpu_env);
2708 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
2710 gen_helper_vfp_sqrts(vd, vm, cpu_env);
2713 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
2715 gen_helper_vfp_sqrtd(vd, vm, cpu_env);
2718 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp)
2719 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp)
2720 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp)
2722 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a)
2726 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2730 /* Vm/M bits must be zero for the Z variant */
2731 if (a->z && a->vm != 0) {
2735 if (!vfp_access_check(s)) {
2739 vd = tcg_temp_new_i32();
2740 vm = tcg_temp_new_i32();
2742 vfp_load_reg32(vd, a->vd);
2744 tcg_gen_movi_i32(vm, 0);
2746 vfp_load_reg32(vm, a->vm);
2750 gen_helper_vfp_cmpeh(vd, vm, cpu_env);
2752 gen_helper_vfp_cmph(vd, vm, cpu_env);
2755 tcg_temp_free_i32(vd);
2756 tcg_temp_free_i32(vm);
2761 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a)
2765 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
2769 /* Vm/M bits must be zero for the Z variant */
2770 if (a->z && a->vm != 0) {
2774 if (!vfp_access_check(s)) {
2778 vd = tcg_temp_new_i32();
2779 vm = tcg_temp_new_i32();
2781 vfp_load_reg32(vd, a->vd);
2783 tcg_gen_movi_i32(vm, 0);
2785 vfp_load_reg32(vm, a->vm);
2789 gen_helper_vfp_cmpes(vd, vm, cpu_env);
2791 gen_helper_vfp_cmps(vd, vm, cpu_env);
2794 tcg_temp_free_i32(vd);
2795 tcg_temp_free_i32(vm);
2800 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a)
2804 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2808 /* Vm/M bits must be zero for the Z variant */
2809 if (a->z && a->vm != 0) {
2813 /* UNDEF accesses to D16-D31 if they don't exist. */
2814 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
2818 if (!vfp_access_check(s)) {
2822 vd = tcg_temp_new_i64();
2823 vm = tcg_temp_new_i64();
2825 vfp_load_reg64(vd, a->vd);
2827 tcg_gen_movi_i64(vm, 0);
2829 vfp_load_reg64(vm, a->vm);
2833 gen_helper_vfp_cmped(vd, vm, cpu_env);
2835 gen_helper_vfp_cmpd(vd, vm, cpu_env);
2838 tcg_temp_free_i64(vd);
2839 tcg_temp_free_i64(vm);
2844 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
2850 if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2854 if (!vfp_access_check(s)) {
2858 fpst = fpstatus_ptr(FPST_FPCR);
2859 ahp_mode = get_ahp_flag();
2860 tmp = tcg_temp_new_i32();
2861 /* The T bit tells us if we want the low or high 16 bits of Vm */
2862 tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2863 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode);
2864 vfp_store_reg32(tmp, a->vd);
2865 tcg_temp_free_i32(ahp_mode);
2866 tcg_temp_free_ptr(fpst);
2867 tcg_temp_free_i32(tmp);
2871 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
2878 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2882 if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2886 /* UNDEF accesses to D16-D31 if they don't exist. */
2887 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
2891 if (!vfp_access_check(s)) {
2895 fpst = fpstatus_ptr(FPST_FPCR);
2896 ahp_mode = get_ahp_flag();
2897 tmp = tcg_temp_new_i32();
2898 /* The T bit tells us if we want the low or high 16 bits of Vm */
2899 tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t));
2900 vd = tcg_temp_new_i64();
2901 gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode);
2902 vfp_store_reg64(vd, a->vd);
2903 tcg_temp_free_i32(ahp_mode);
2904 tcg_temp_free_ptr(fpst);
2905 tcg_temp_free_i32(tmp);
2906 tcg_temp_free_i64(vd);
2910 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
2916 if (!dc_isar_feature(aa32_fp16_spconv, s)) {
2920 if (!vfp_access_check(s)) {
2924 fpst = fpstatus_ptr(FPST_FPCR);
2925 ahp_mode = get_ahp_flag();
2926 tmp = tcg_temp_new_i32();
2928 vfp_load_reg32(tmp, a->vm);
2929 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
2930 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2931 tcg_temp_free_i32(ahp_mode);
2932 tcg_temp_free_ptr(fpst);
2933 tcg_temp_free_i32(tmp);
2937 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
2944 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
2948 if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
2952 /* UNDEF accesses to D16-D31 if they don't exist. */
2953 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
2957 if (!vfp_access_check(s)) {
2961 fpst = fpstatus_ptr(FPST_FPCR);
2962 ahp_mode = get_ahp_flag();
2963 tmp = tcg_temp_new_i32();
2964 vm = tcg_temp_new_i64();
2966 vfp_load_reg64(vm, a->vm);
2967 gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
2968 tcg_temp_free_i64(vm);
2969 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
2970 tcg_temp_free_i32(ahp_mode);
2971 tcg_temp_free_ptr(fpst);
2972 tcg_temp_free_i32(tmp);
2976 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
2981 if (!dc_isar_feature(aa32_fp16_arith, s)) {
2985 if (!vfp_access_check(s)) {
2989 tmp = tcg_temp_new_i32();
2990 vfp_load_reg32(tmp, a->vm);
2991 fpst = fpstatus_ptr(FPST_FPCR_F16);
2992 gen_helper_rinth(tmp, tmp, fpst);
2993 vfp_store_reg32(tmp, a->vd);
2994 tcg_temp_free_ptr(fpst);
2995 tcg_temp_free_i32(tmp);
2999 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
3004 if (!dc_isar_feature(aa32_vrint, s)) {
3008 if (!vfp_access_check(s)) {
3012 tmp = tcg_temp_new_i32();
3013 vfp_load_reg32(tmp, a->vm);
3014 fpst = fpstatus_ptr(FPST_FPCR);
3015 gen_helper_rints(tmp, tmp, fpst);
3016 vfp_store_reg32(tmp, a->vd);
3017 tcg_temp_free_ptr(fpst);
3018 tcg_temp_free_i32(tmp);
3022 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
3027 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3031 if (!dc_isar_feature(aa32_vrint, s)) {
3035 /* UNDEF accesses to D16-D31 if they don't exist. */
3036 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3040 if (!vfp_access_check(s)) {
3044 tmp = tcg_temp_new_i64();
3045 vfp_load_reg64(tmp, a->vm);
3046 fpst = fpstatus_ptr(FPST_FPCR);
3047 gen_helper_rintd(tmp, tmp, fpst);
3048 vfp_store_reg64(tmp, a->vd);
3049 tcg_temp_free_ptr(fpst);
3050 tcg_temp_free_i64(tmp);
3054 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
3060 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3064 if (!vfp_access_check(s)) {
3068 tmp = tcg_temp_new_i32();
3069 vfp_load_reg32(tmp, a->vm);
3070 fpst = fpstatus_ptr(FPST_FPCR_F16);
3071 tcg_rmode = tcg_const_i32(float_round_to_zero);
3072 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3073 gen_helper_rinth(tmp, tmp, fpst);
3074 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3075 vfp_store_reg32(tmp, a->vd);
3076 tcg_temp_free_ptr(fpst);
3077 tcg_temp_free_i32(tcg_rmode);
3078 tcg_temp_free_i32(tmp);
3082 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
3088 if (!dc_isar_feature(aa32_vrint, s)) {
3092 if (!vfp_access_check(s)) {
3096 tmp = tcg_temp_new_i32();
3097 vfp_load_reg32(tmp, a->vm);
3098 fpst = fpstatus_ptr(FPST_FPCR);
3099 tcg_rmode = tcg_const_i32(float_round_to_zero);
3100 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3101 gen_helper_rints(tmp, tmp, fpst);
3102 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3103 vfp_store_reg32(tmp, a->vd);
3104 tcg_temp_free_ptr(fpst);
3105 tcg_temp_free_i32(tcg_rmode);
3106 tcg_temp_free_i32(tmp);
3110 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
3116 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3120 if (!dc_isar_feature(aa32_vrint, s)) {
3124 /* UNDEF accesses to D16-D31 if they don't exist. */
3125 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3129 if (!vfp_access_check(s)) {
3133 tmp = tcg_temp_new_i64();
3134 vfp_load_reg64(tmp, a->vm);
3135 fpst = fpstatus_ptr(FPST_FPCR);
3136 tcg_rmode = tcg_const_i32(float_round_to_zero);
3137 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3138 gen_helper_rintd(tmp, tmp, fpst);
3139 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3140 vfp_store_reg64(tmp, a->vd);
3141 tcg_temp_free_ptr(fpst);
3142 tcg_temp_free_i64(tmp);
3143 tcg_temp_free_i32(tcg_rmode);
3147 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
3152 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3156 if (!vfp_access_check(s)) {
3160 tmp = tcg_temp_new_i32();
3161 vfp_load_reg32(tmp, a->vm);
3162 fpst = fpstatus_ptr(FPST_FPCR_F16);
3163 gen_helper_rinth_exact(tmp, tmp, fpst);
3164 vfp_store_reg32(tmp, a->vd);
3165 tcg_temp_free_ptr(fpst);
3166 tcg_temp_free_i32(tmp);
3170 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
3175 if (!dc_isar_feature(aa32_vrint, s)) {
3179 if (!vfp_access_check(s)) {
3183 tmp = tcg_temp_new_i32();
3184 vfp_load_reg32(tmp, a->vm);
3185 fpst = fpstatus_ptr(FPST_FPCR);
3186 gen_helper_rints_exact(tmp, tmp, fpst);
3187 vfp_store_reg32(tmp, a->vd);
3188 tcg_temp_free_ptr(fpst);
3189 tcg_temp_free_i32(tmp);
3193 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
3198 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3202 if (!dc_isar_feature(aa32_vrint, s)) {
3206 /* UNDEF accesses to D16-D31 if they don't exist. */
3207 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) {
3211 if (!vfp_access_check(s)) {
3215 tmp = tcg_temp_new_i64();
3216 vfp_load_reg64(tmp, a->vm);
3217 fpst = fpstatus_ptr(FPST_FPCR);
3218 gen_helper_rintd_exact(tmp, tmp, fpst);
3219 vfp_store_reg64(tmp, a->vd);
3220 tcg_temp_free_ptr(fpst);
3221 tcg_temp_free_i64(tmp);
3225 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
3230 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3234 /* UNDEF accesses to D16-D31 if they don't exist. */
3235 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3239 if (!vfp_access_check(s)) {
3243 vm = tcg_temp_new_i32();
3244 vd = tcg_temp_new_i64();
3245 vfp_load_reg32(vm, a->vm);
3246 gen_helper_vfp_fcvtds(vd, vm, cpu_env);
3247 vfp_store_reg64(vd, a->vd);
3248 tcg_temp_free_i32(vm);
3249 tcg_temp_free_i64(vd);
3253 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
3258 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3262 /* UNDEF accesses to D16-D31 if they don't exist. */
3263 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3267 if (!vfp_access_check(s)) {
3271 vd = tcg_temp_new_i32();
3272 vm = tcg_temp_new_i64();
3273 vfp_load_reg64(vm, a->vm);
3274 gen_helper_vfp_fcvtsd(vd, vm, cpu_env);
3275 vfp_store_reg32(vd, a->vd);
3276 tcg_temp_free_i32(vd);
3277 tcg_temp_free_i64(vm);
3281 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
3286 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3290 if (!vfp_access_check(s)) {
3294 vm = tcg_temp_new_i32();
3295 vfp_load_reg32(vm, a->vm);
3296 fpst = fpstatus_ptr(FPST_FPCR_F16);
3299 gen_helper_vfp_sitoh(vm, vm, fpst);
3302 gen_helper_vfp_uitoh(vm, vm, fpst);
3304 vfp_store_reg32(vm, a->vd);
3305 tcg_temp_free_i32(vm);
3306 tcg_temp_free_ptr(fpst);
3310 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
3315 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3319 if (!vfp_access_check(s)) {
3323 vm = tcg_temp_new_i32();
3324 vfp_load_reg32(vm, a->vm);
3325 fpst = fpstatus_ptr(FPST_FPCR);
3328 gen_helper_vfp_sitos(vm, vm, fpst);
3331 gen_helper_vfp_uitos(vm, vm, fpst);
3333 vfp_store_reg32(vm, a->vd);
3334 tcg_temp_free_i32(vm);
3335 tcg_temp_free_ptr(fpst);
3339 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
3345 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3349 /* UNDEF accesses to D16-D31 if they don't exist. */
3350 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3354 if (!vfp_access_check(s)) {
3358 vm = tcg_temp_new_i32();
3359 vd = tcg_temp_new_i64();
3360 vfp_load_reg32(vm, a->vm);
3361 fpst = fpstatus_ptr(FPST_FPCR);
3364 gen_helper_vfp_sitod(vd, vm, fpst);
3367 gen_helper_vfp_uitod(vd, vm, fpst);
3369 vfp_store_reg64(vd, a->vd);
3370 tcg_temp_free_i32(vm);
3371 tcg_temp_free_i64(vd);
3372 tcg_temp_free_ptr(fpst);
3376 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a)
3381 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3385 if (!dc_isar_feature(aa32_jscvt, s)) {
3389 /* UNDEF accesses to D16-D31 if they don't exist. */
3390 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3394 if (!vfp_access_check(s)) {
3398 vm = tcg_temp_new_i64();
3399 vd = tcg_temp_new_i32();
3400 vfp_load_reg64(vm, a->vm);
3401 gen_helper_vjcvt(vd, vm, cpu_env);
3402 vfp_store_reg32(vd, a->vd);
3403 tcg_temp_free_i64(vm);
3404 tcg_temp_free_i32(vd);
3408 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
3414 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3418 if (!vfp_access_check(s)) {
3422 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3424 vd = tcg_temp_new_i32();
3425 vfp_load_reg32(vd, a->vd);
3427 fpst = fpstatus_ptr(FPST_FPCR_F16);
3428 shift = tcg_const_i32(frac_bits);
3430 /* Switch on op:U:sx bits */
3433 gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst);
3436 gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst);
3439 gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst);
3442 gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst);
3445 gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst);
3448 gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst);
3451 gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst);
3454 gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst);
3457 g_assert_not_reached();
3460 vfp_store_reg32(vd, a->vd);
3461 tcg_temp_free_i32(vd);
3462 tcg_temp_free_i32(shift);
3463 tcg_temp_free_ptr(fpst);
3467 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
3473 if (!dc_isar_feature(aa32_fpsp_v3, s)) {
3477 if (!vfp_access_check(s)) {
3481 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3483 vd = tcg_temp_new_i32();
3484 vfp_load_reg32(vd, a->vd);
3486 fpst = fpstatus_ptr(FPST_FPCR);
3487 shift = tcg_const_i32(frac_bits);
3489 /* Switch on op:U:sx bits */
3492 gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst);
3495 gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst);
3498 gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst);
3501 gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst);
3504 gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst);
3507 gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst);
3510 gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst);
3513 gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst);
3516 g_assert_not_reached();
3519 vfp_store_reg32(vd, a->vd);
3520 tcg_temp_free_i32(vd);
3521 tcg_temp_free_i32(shift);
3522 tcg_temp_free_ptr(fpst);
3526 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
3533 if (!dc_isar_feature(aa32_fpdp_v3, s)) {
3537 /* UNDEF accesses to D16-D31 if they don't exist. */
3538 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
3542 if (!vfp_access_check(s)) {
3546 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm);
3548 vd = tcg_temp_new_i64();
3549 vfp_load_reg64(vd, a->vd);
3551 fpst = fpstatus_ptr(FPST_FPCR);
3552 shift = tcg_const_i32(frac_bits);
3554 /* Switch on op:U:sx bits */
3557 gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst);
3560 gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst);
3563 gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst);
3566 gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst);
3569 gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst);
3572 gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst);
3575 gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst);
3578 gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst);
3581 g_assert_not_reached();
3584 vfp_store_reg64(vd, a->vd);
3585 tcg_temp_free_i64(vd);
3586 tcg_temp_free_i32(shift);
3587 tcg_temp_free_ptr(fpst);
3591 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
3596 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3600 if (!vfp_access_check(s)) {
3604 fpst = fpstatus_ptr(FPST_FPCR_F16);
3605 vm = tcg_temp_new_i32();
3606 vfp_load_reg32(vm, a->vm);
3610 gen_helper_vfp_tosizh(vm, vm, fpst);
3612 gen_helper_vfp_tosih(vm, vm, fpst);
3616 gen_helper_vfp_touizh(vm, vm, fpst);
3618 gen_helper_vfp_touih(vm, vm, fpst);
3621 vfp_store_reg32(vm, a->vd);
3622 tcg_temp_free_i32(vm);
3623 tcg_temp_free_ptr(fpst);
3627 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
3632 if (!dc_isar_feature(aa32_fpsp_v2, s)) {
3636 if (!vfp_access_check(s)) {
3640 fpst = fpstatus_ptr(FPST_FPCR);
3641 vm = tcg_temp_new_i32();
3642 vfp_load_reg32(vm, a->vm);
3646 gen_helper_vfp_tosizs(vm, vm, fpst);
3648 gen_helper_vfp_tosis(vm, vm, fpst);
3652 gen_helper_vfp_touizs(vm, vm, fpst);
3654 gen_helper_vfp_touis(vm, vm, fpst);
3657 vfp_store_reg32(vm, a->vd);
3658 tcg_temp_free_i32(vm);
3659 tcg_temp_free_ptr(fpst);
3663 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
3669 if (!dc_isar_feature(aa32_fpdp_v2, s)) {
3673 /* UNDEF accesses to D16-D31 if they don't exist. */
3674 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) {
3678 if (!vfp_access_check(s)) {
3682 fpst = fpstatus_ptr(FPST_FPCR);
3683 vm = tcg_temp_new_i64();
3684 vd = tcg_temp_new_i32();
3685 vfp_load_reg64(vm, a->vm);
3689 gen_helper_vfp_tosizd(vd, vm, fpst);
3691 gen_helper_vfp_tosid(vd, vm, fpst);
3695 gen_helper_vfp_touizd(vd, vm, fpst);
3697 gen_helper_vfp_touid(vd, vm, fpst);
3700 vfp_store_reg32(vd, a->vd);
3701 tcg_temp_free_i32(vd);
3702 tcg_temp_free_i64(vm);
3703 tcg_temp_free_ptr(fpst);
3708 * Decode VLLDM and VLSTM are nonstandard because:
3709 * * if there is no FPU then these insns must NOP in
3710 * Secure state and UNDEF in Nonsecure state
3711 * * if there is an FPU then these insns do not have
3712 * the usual behaviour that vfp_access_check() provides of
3713 * being controlled by CPACR/NSACR enable bits or the
3714 * lazy-stacking logic.
3716 static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
3720 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
3721 !arm_dc_feature(s, ARM_FEATURE_V8)) {
3727 * T2 encoding ({D0-D31} reglist): v8.1M and up. We choose not
3728 * to take the IMPDEF option to make memory accesses to the stack
3729 * slots that correspond to the D16-D31 registers (discarding
3730 * read data and writing UNKNOWN values), so for us the T2
3731 * encoding behaves identically to the T1 encoding.
3733 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3738 * T1 encoding ({D0-D15} reglist); undef if we have 32 Dregs.
3739 * This is currently architecturally impossible, but we add the
3740 * check to stay in line with the pseudocode. Note that we must
3741 * emit code for the UNDEF so it takes precedence over the NOCP.
3743 if (dc_isar_feature(aa32_simd_r32, s)) {
3744 unallocated_encoding(s);
3750 * If not secure, UNDEF. We must emit code for this
3751 * rather than returning false so that this takes
3752 * precedence over the m-nocp.decode NOCP fallback.
3754 if (!s->v8m_secure) {
3755 unallocated_encoding(s);
3758 /* If no fpu, NOP. */
3759 if (!dc_isar_feature(aa32_vfp, s)) {
3763 fptr = load_reg(s, a->rn);
3765 gen_helper_v7m_vlldm(cpu_env, fptr);
3767 gen_helper_v7m_vlstm(cpu_env, fptr);
3769 tcg_temp_free_i32(fptr);
3771 /* End the TB, because we have updated FP control bits */
3772 s->base.is_jmp = DISAS_UPDATE_EXIT;
3776 static bool trans_VSCCLRM(DisasContext *s, arg_VSCCLRM *a)
3780 TCGv_i32 aspen, sfpa;
3782 if (!dc_isar_feature(aa32_m_sec_state, s)) {
3783 /* Before v8.1M, fall through in decode to NOCP check */
3787 /* Explicitly UNDEF because this takes precedence over NOCP */
3788 if (!arm_dc_feature(s, ARM_FEATURE_M_MAIN) || !s->v8m_secure) {
3789 unallocated_encoding(s);
3793 if (!dc_isar_feature(aa32_vfp_simd, s)) {
3794 /* NOP if we have neither FP nor MVE */
3799 * If FPCCR.ASPEN != 0 && CONTROL_S.SFPA == 0 then there is no
3800 * active floating point context so we must NOP (without doing
3801 * any lazy state preservation or the NOCP check).
3803 aspen = load_cpu_field(v7m.fpccr[M_REG_S]);
3804 sfpa = load_cpu_field(v7m.control[M_REG_S]);
3805 tcg_gen_andi_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3806 tcg_gen_xori_i32(aspen, aspen, R_V7M_FPCCR_ASPEN_MASK);
3807 tcg_gen_andi_i32(sfpa, sfpa, R_V7M_CONTROL_SFPA_MASK);
3808 tcg_gen_or_i32(sfpa, sfpa, aspen);
3809 arm_gen_condlabel(s);
3810 tcg_gen_brcondi_i32(TCG_COND_EQ, sfpa, 0, s->condlabel);
3812 if (s->fp_excp_el != 0) {
3813 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3814 syn_uncategorized(), s->fp_excp_el);
3818 topreg = a->vd + a->imm - 1;
3821 /* Convert to Sreg numbers if the insn specified in Dregs */
3823 topreg = topreg * 2 + 1;
3827 if (topreg > 63 || (topreg > 31 && !(topreg & 1))) {
3828 /* UNPREDICTABLE: we choose to undef */
3829 unallocated_encoding(s);
3833 /* Silently ignore requests to clear D16-D31 if they don't exist */
3834 if (topreg > 31 && !dc_isar_feature(aa32_simd_r32, s)) {
3838 if (!vfp_access_check(s)) {
3842 /* Zero the Sregs from btmreg to topreg inclusive. */
3843 zero = tcg_const_i64(0);
3845 write_neon_element64(zero, btmreg >> 1, 1, MO_32);
3848 for (; btmreg + 1 <= topreg; btmreg += 2) {
3849 write_neon_element64(zero, btmreg >> 1, 0, MO_64);
3851 if (btmreg == topreg) {
3852 write_neon_element64(zero, btmreg >> 1, 0, MO_32);
3855 assert(btmreg == topreg + 1);
3856 /* TODO: when MVE is implemented, zero VPR here */
3860 static bool trans_NOCP(DisasContext *s, arg_nocp *a)
3863 * Handle M-profile early check for disabled coprocessor:
3864 * all we need to do here is emit the NOCP exception if
3865 * the coprocessor is disabled. Otherwise we return false
3866 * and the real VFP/etc decode will handle the insn.
3868 assert(arm_dc_feature(s, ARM_FEATURE_M));
3873 if (arm_dc_feature(s, ARM_FEATURE_V8_1M) &&
3874 (a->cp == 8 || a->cp == 9 || a->cp == 14 || a->cp == 15)) {
3875 /* in v8.1M cp 8, 9, 14, 15 also are governed by the cp10 enable */
3880 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3881 syn_uncategorized(), default_exception_el(s));
3885 if (s->fp_excp_el != 0) {
3886 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
3887 syn_uncategorized(), s->fp_excp_el);
3894 static bool trans_NOCP_8_1(DisasContext *s, arg_nocp *a)
3896 /* This range needs a coprocessor check for v8.1M and later only */
3897 if (!arm_dc_feature(s, ARM_FEATURE_V8_1M)) {
3900 return trans_NOCP(s, a);
3903 static bool trans_VINS(DisasContext *s, arg_VINS *a)
3907 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3911 if (s->vec_len != 0 || s->vec_stride != 0) {
3915 if (!vfp_access_check(s)) {
3919 /* Insert low half of Vm into high half of Vd */
3920 rm = tcg_temp_new_i32();
3921 rd = tcg_temp_new_i32();
3922 vfp_load_reg32(rm, a->vm);
3923 vfp_load_reg32(rd, a->vd);
3924 tcg_gen_deposit_i32(rd, rd, rm, 16, 16);
3925 vfp_store_reg32(rd, a->vd);
3926 tcg_temp_free_i32(rm);
3927 tcg_temp_free_i32(rd);
3931 static bool trans_VMOVX(DisasContext *s, arg_VINS *a)
3935 if (!dc_isar_feature(aa32_fp16_arith, s)) {
3939 if (s->vec_len != 0 || s->vec_stride != 0) {
3943 if (!vfp_access_check(s)) {
3947 /* Set Vd to high half of Vm */
3948 rm = tcg_temp_new_i32();
3949 vfp_load_reg32(rm, a->vm);
3950 tcg_gen_shri_i32(rm, rm, 16);
3951 vfp_store_reg32(rm, a->vd);
3952 tcg_temp_free_i32(rm);