2 * ARM translation: AArch32 VFP instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2019 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include "qemu/osdep.h"
24 #include "tcg/tcg-op.h"
25 #include "tcg/tcg-op-gvec.h"
26 #include "exec/exec-all.h"
27 #include "exec/gen-icount.h"
28 #include "translate.h"
29 #include "translate-a32.h"
31 /* Include the generated VFP decoder */
32 #include "decode-vfp.c.inc"
33 #include "decode-vfp-uncond.c.inc"
35 static inline void vfp_load_reg64(TCGv_i64 var
, int reg
)
37 tcg_gen_ld_i64(var
, cpu_env
, vfp_reg_offset(true, reg
));
40 static inline void vfp_store_reg64(TCGv_i64 var
, int reg
)
42 tcg_gen_st_i64(var
, cpu_env
, vfp_reg_offset(true, reg
));
45 static inline void vfp_load_reg32(TCGv_i32 var
, int reg
)
47 tcg_gen_ld_i32(var
, cpu_env
, vfp_reg_offset(false, reg
));
50 static inline void vfp_store_reg32(TCGv_i32 var
, int reg
)
52 tcg_gen_st_i32(var
, cpu_env
, vfp_reg_offset(false, reg
));
56 * The imm8 encodes the sign bit, enough bits to represent an exponent in
57 * the range 01....1xx to 10....0xx, and the most significant 4 bits of
58 * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
60 uint64_t vfp_expand_imm(int size
, uint8_t imm8
)
66 imm
= (extract32(imm8
, 7, 1) ? 0x8000 : 0) |
67 (extract32(imm8
, 6, 1) ? 0x3fc0 : 0x4000) |
68 extract32(imm8
, 0, 6);
72 imm
= (extract32(imm8
, 7, 1) ? 0x8000 : 0) |
73 (extract32(imm8
, 6, 1) ? 0x3e00 : 0x4000) |
74 (extract32(imm8
, 0, 6) << 3);
78 imm
= (extract32(imm8
, 7, 1) ? 0x8000 : 0) |
79 (extract32(imm8
, 6, 1) ? 0x3000 : 0x4000) |
80 (extract32(imm8
, 0, 6) << 6);
83 g_assert_not_reached();
89 * Return the offset of a 16-bit half of the specified VFP single-precision
90 * register. If top is true, returns the top 16 bits; otherwise the bottom
93 static inline long vfp_f16_offset(unsigned reg
, bool top
)
95 long offs
= vfp_reg_offset(false, reg
);
96 #ifdef HOST_WORDS_BIGENDIAN
109 * Generate code for M-profile lazy FP state preservation if needed;
110 * this corresponds to the pseudocode PreserveFPState() function.
112 static void gen_preserve_fp_state(DisasContext
*s
)
116 * Lazy state saving affects external memory and also the NVIC,
117 * so we must mark it as an IO operation for icount (and cause
118 * this to be the last insn in the TB).
120 if (tb_cflags(s
->base
.tb
) & CF_USE_ICOUNT
) {
121 s
->base
.is_jmp
= DISAS_UPDATE_EXIT
;
124 gen_helper_v7m_preserve_fp_state(cpu_env
);
126 * If the preserve_fp_state helper doesn't throw an exception
127 * then it will clear LSPACT; we don't need to repeat this for
128 * any further FP insns in this TB.
130 s
->v7m_lspact
= false;
135 * Check that VFP access is enabled. If it is, do the necessary
136 * M-profile lazy-FP handling and then return true.
137 * If not, emit code to generate an appropriate exception and
139 * The ignore_vfp_enabled argument specifies that we should ignore
140 * whether VFP is enabled via FPEXC[EN]: this should be true for FMXR/FMRX
141 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
143 static bool full_vfp_access_check(DisasContext
*s
, bool ignore_vfp_enabled
)
146 /* M-profile handled this earlier, in disas_m_nocp() */
147 assert (!arm_dc_feature(s
, ARM_FEATURE_M
));
148 gen_exception_insn(s
, s
->pc_curr
, EXCP_UDEF
,
149 syn_fp_access_trap(1, 0xe, false),
154 if (!s
->vfp_enabled
&& !ignore_vfp_enabled
) {
155 assert(!arm_dc_feature(s
, ARM_FEATURE_M
));
156 unallocated_encoding(s
);
160 if (arm_dc_feature(s
, ARM_FEATURE_M
)) {
161 /* Handle M-profile lazy FP state mechanics */
163 /* Trigger lazy-state preservation if necessary */
164 gen_preserve_fp_state(s
);
166 /* Update ownership of FP context: set FPCCR.S to match current state */
167 if (s
->v8m_fpccr_s_wrong
) {
170 tmp
= load_cpu_field(v7m
.fpccr
[M_REG_S
]);
172 tcg_gen_ori_i32(tmp
, tmp
, R_V7M_FPCCR_S_MASK
);
174 tcg_gen_andi_i32(tmp
, tmp
, ~R_V7M_FPCCR_S_MASK
);
176 store_cpu_field(tmp
, v7m
.fpccr
[M_REG_S
]);
177 /* Don't need to do this for any further FP insns in this TB */
178 s
->v8m_fpccr_s_wrong
= false;
181 if (s
->v7m_new_fp_ctxt_needed
) {
183 * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
186 TCGv_i32 control
, fpscr
;
187 uint32_t bits
= R_V7M_CONTROL_FPCA_MASK
;
189 fpscr
= load_cpu_field(v7m
.fpdscr
[s
->v8m_secure
]);
190 gen_helper_vfp_set_fpscr(cpu_env
, fpscr
);
191 tcg_temp_free_i32(fpscr
);
193 * We don't need to arrange to end the TB, because the only
194 * parts of FPSCR which we cache in the TB flags are the VECLEN
195 * and VECSTRIDE, and those don't exist for M-profile.
199 bits
|= R_V7M_CONTROL_SFPA_MASK
;
201 control
= load_cpu_field(v7m
.control
[M_REG_S
]);
202 tcg_gen_ori_i32(control
, control
, bits
);
203 store_cpu_field(control
, v7m
.control
[M_REG_S
]);
204 /* Don't need to do this for any further FP insns in this TB */
205 s
->v7m_new_fp_ctxt_needed
= false;
213 * The most usual kind of VFP access check, for everything except
214 * FMXR/FMRX to the always-available special registers.
216 bool vfp_access_check(DisasContext
*s
)
218 return full_vfp_access_check(s
, false);
221 static bool trans_VSEL(DisasContext
*s
, arg_VSEL
*a
)
226 if (!dc_isar_feature(aa32_vsel
, s
)) {
230 if (sz
== 3 && !dc_isar_feature(aa32_fpdp_v2
, s
)) {
234 if (sz
== 1 && !dc_isar_feature(aa32_fp16_arith
, s
)) {
238 /* UNDEF accesses to D16-D31 if they don't exist */
239 if (sz
== 3 && !dc_isar_feature(aa32_simd_r32
, s
) &&
240 ((a
->vm
| a
->vn
| a
->vd
) & 0x10)) {
248 if (!vfp_access_check(s
)) {
253 TCGv_i64 frn
, frm
, dest
;
254 TCGv_i64 tmp
, zero
, zf
, nf
, vf
;
256 zero
= tcg_const_i64(0);
258 frn
= tcg_temp_new_i64();
259 frm
= tcg_temp_new_i64();
260 dest
= tcg_temp_new_i64();
262 zf
= tcg_temp_new_i64();
263 nf
= tcg_temp_new_i64();
264 vf
= tcg_temp_new_i64();
266 tcg_gen_extu_i32_i64(zf
, cpu_ZF
);
267 tcg_gen_ext_i32_i64(nf
, cpu_NF
);
268 tcg_gen_ext_i32_i64(vf
, cpu_VF
);
270 vfp_load_reg64(frn
, rn
);
271 vfp_load_reg64(frm
, rm
);
274 tcg_gen_movcond_i64(TCG_COND_EQ
, dest
, zf
, zero
,
278 tcg_gen_movcond_i64(TCG_COND_LT
, dest
, vf
, zero
,
281 case 2: /* ge: N == V -> N ^ V == 0 */
282 tmp
= tcg_temp_new_i64();
283 tcg_gen_xor_i64(tmp
, vf
, nf
);
284 tcg_gen_movcond_i64(TCG_COND_GE
, dest
, tmp
, zero
,
286 tcg_temp_free_i64(tmp
);
288 case 3: /* gt: !Z && N == V */
289 tcg_gen_movcond_i64(TCG_COND_NE
, dest
, zf
, zero
,
291 tmp
= tcg_temp_new_i64();
292 tcg_gen_xor_i64(tmp
, vf
, nf
);
293 tcg_gen_movcond_i64(TCG_COND_GE
, dest
, tmp
, zero
,
295 tcg_temp_free_i64(tmp
);
298 vfp_store_reg64(dest
, rd
);
299 tcg_temp_free_i64(frn
);
300 tcg_temp_free_i64(frm
);
301 tcg_temp_free_i64(dest
);
303 tcg_temp_free_i64(zf
);
304 tcg_temp_free_i64(nf
);
305 tcg_temp_free_i64(vf
);
307 tcg_temp_free_i64(zero
);
309 TCGv_i32 frn
, frm
, dest
;
312 zero
= tcg_const_i32(0);
314 frn
= tcg_temp_new_i32();
315 frm
= tcg_temp_new_i32();
316 dest
= tcg_temp_new_i32();
317 vfp_load_reg32(frn
, rn
);
318 vfp_load_reg32(frm
, rm
);
321 tcg_gen_movcond_i32(TCG_COND_EQ
, dest
, cpu_ZF
, zero
,
325 tcg_gen_movcond_i32(TCG_COND_LT
, dest
, cpu_VF
, zero
,
328 case 2: /* ge: N == V -> N ^ V == 0 */
329 tmp
= tcg_temp_new_i32();
330 tcg_gen_xor_i32(tmp
, cpu_VF
, cpu_NF
);
331 tcg_gen_movcond_i32(TCG_COND_GE
, dest
, tmp
, zero
,
333 tcg_temp_free_i32(tmp
);
335 case 3: /* gt: !Z && N == V */
336 tcg_gen_movcond_i32(TCG_COND_NE
, dest
, cpu_ZF
, zero
,
338 tmp
= tcg_temp_new_i32();
339 tcg_gen_xor_i32(tmp
, cpu_VF
, cpu_NF
);
340 tcg_gen_movcond_i32(TCG_COND_GE
, dest
, tmp
, zero
,
342 tcg_temp_free_i32(tmp
);
345 /* For fp16 the top half is always zeroes */
347 tcg_gen_andi_i32(dest
, dest
, 0xffff);
349 vfp_store_reg32(dest
, rd
);
350 tcg_temp_free_i32(frn
);
351 tcg_temp_free_i32(frm
);
352 tcg_temp_free_i32(dest
);
354 tcg_temp_free_i32(zero
);
361 * Table for converting the most common AArch32 encoding of
362 * rounding mode to arm_fprounding order (which matches the
363 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
365 static const uint8_t fp_decode_rm
[] = {
372 static bool trans_VRINT(DisasContext
*s
, arg_VRINT
*a
)
378 int rounding
= fp_decode_rm
[a
->rm
];
380 if (!dc_isar_feature(aa32_vrint
, s
)) {
384 if (sz
== 3 && !dc_isar_feature(aa32_fpdp_v2
, s
)) {
388 if (sz
== 1 && !dc_isar_feature(aa32_fp16_arith
, s
)) {
392 /* UNDEF accesses to D16-D31 if they don't exist */
393 if (sz
== 3 && !dc_isar_feature(aa32_simd_r32
, s
) &&
394 ((a
->vm
| a
->vd
) & 0x10)) {
401 if (!vfp_access_check(s
)) {
406 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
408 fpst
= fpstatus_ptr(FPST_FPCR
);
411 tcg_rmode
= tcg_const_i32(arm_rmode_to_sf(rounding
));
412 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
417 tcg_op
= tcg_temp_new_i64();
418 tcg_res
= tcg_temp_new_i64();
419 vfp_load_reg64(tcg_op
, rm
);
420 gen_helper_rintd(tcg_res
, tcg_op
, fpst
);
421 vfp_store_reg64(tcg_res
, rd
);
422 tcg_temp_free_i64(tcg_op
);
423 tcg_temp_free_i64(tcg_res
);
427 tcg_op
= tcg_temp_new_i32();
428 tcg_res
= tcg_temp_new_i32();
429 vfp_load_reg32(tcg_op
, rm
);
431 gen_helper_rinth(tcg_res
, tcg_op
, fpst
);
433 gen_helper_rints(tcg_res
, tcg_op
, fpst
);
435 vfp_store_reg32(tcg_res
, rd
);
436 tcg_temp_free_i32(tcg_op
);
437 tcg_temp_free_i32(tcg_res
);
440 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
441 tcg_temp_free_i32(tcg_rmode
);
443 tcg_temp_free_ptr(fpst
);
447 static bool trans_VCVT(DisasContext
*s
, arg_VCVT
*a
)
452 TCGv_i32 tcg_rmode
, tcg_shift
;
453 int rounding
= fp_decode_rm
[a
->rm
];
454 bool is_signed
= a
->op
;
456 if (!dc_isar_feature(aa32_vcvt_dr
, s
)) {
460 if (sz
== 3 && !dc_isar_feature(aa32_fpdp_v2
, s
)) {
464 if (sz
== 1 && !dc_isar_feature(aa32_fp16_arith
, s
)) {
468 /* UNDEF accesses to D16-D31 if they don't exist */
469 if (sz
== 3 && !dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
476 if (!vfp_access_check(s
)) {
481 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
483 fpst
= fpstatus_ptr(FPST_FPCR
);
486 tcg_shift
= tcg_const_i32(0);
488 tcg_rmode
= tcg_const_i32(arm_rmode_to_sf(rounding
));
489 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
492 TCGv_i64 tcg_double
, tcg_res
;
494 tcg_double
= tcg_temp_new_i64();
495 tcg_res
= tcg_temp_new_i64();
496 tcg_tmp
= tcg_temp_new_i32();
497 vfp_load_reg64(tcg_double
, rm
);
499 gen_helper_vfp_tosld(tcg_res
, tcg_double
, tcg_shift
, fpst
);
501 gen_helper_vfp_tould(tcg_res
, tcg_double
, tcg_shift
, fpst
);
503 tcg_gen_extrl_i64_i32(tcg_tmp
, tcg_res
);
504 vfp_store_reg32(tcg_tmp
, rd
);
505 tcg_temp_free_i32(tcg_tmp
);
506 tcg_temp_free_i64(tcg_res
);
507 tcg_temp_free_i64(tcg_double
);
509 TCGv_i32 tcg_single
, tcg_res
;
510 tcg_single
= tcg_temp_new_i32();
511 tcg_res
= tcg_temp_new_i32();
512 vfp_load_reg32(tcg_single
, rm
);
515 gen_helper_vfp_toslh(tcg_res
, tcg_single
, tcg_shift
, fpst
);
517 gen_helper_vfp_toulh(tcg_res
, tcg_single
, tcg_shift
, fpst
);
521 gen_helper_vfp_tosls(tcg_res
, tcg_single
, tcg_shift
, fpst
);
523 gen_helper_vfp_touls(tcg_res
, tcg_single
, tcg_shift
, fpst
);
526 vfp_store_reg32(tcg_res
, rd
);
527 tcg_temp_free_i32(tcg_res
);
528 tcg_temp_free_i32(tcg_single
);
531 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
532 tcg_temp_free_i32(tcg_rmode
);
534 tcg_temp_free_i32(tcg_shift
);
536 tcg_temp_free_ptr(fpst
);
541 static bool trans_VMOV_to_gp(DisasContext
*s
, arg_VMOV_to_gp
*a
)
543 /* VMOV scalar to general purpose register */
546 /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
548 ? !dc_isar_feature(aa32_fpsp_v2
, s
)
549 : !arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
553 /* UNDEF accesses to D16-D31 if they don't exist */
554 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vn
& 0x10)) {
558 if (!vfp_access_check(s
)) {
562 tmp
= tcg_temp_new_i32();
563 read_neon_element32(tmp
, a
->vn
, a
->index
, a
->size
| (a
->u
? 0 : MO_SIGN
));
564 store_reg(s
, a
->rt
, tmp
);
569 static bool trans_VMOV_from_gp(DisasContext
*s
, arg_VMOV_from_gp
*a
)
571 /* VMOV general purpose register to scalar */
574 /* SIZE == MO_32 is a VFP instruction; otherwise NEON. */
576 ? !dc_isar_feature(aa32_fpsp_v2
, s
)
577 : !arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
581 /* UNDEF accesses to D16-D31 if they don't exist */
582 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vn
& 0x10)) {
586 if (!vfp_access_check(s
)) {
590 tmp
= load_reg(s
, a
->rt
);
591 write_neon_element32(tmp
, a
->vn
, a
->index
, a
->size
);
592 tcg_temp_free_i32(tmp
);
597 static bool trans_VDUP(DisasContext
*s
, arg_VDUP
*a
)
599 /* VDUP (general purpose register) */
603 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
607 /* UNDEF accesses to D16-D31 if they don't exist */
608 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vn
& 0x10)) {
616 if (a
->q
&& (a
->vn
& 1)) {
620 vec_size
= a
->q
? 16 : 8;
629 if (!vfp_access_check(s
)) {
633 tmp
= load_reg(s
, a
->rt
);
634 tcg_gen_gvec_dup_i32(size
, neon_full_reg_offset(a
->vn
),
635 vec_size
, vec_size
, tmp
);
636 tcg_temp_free_i32(tmp
);
642 * M-profile provides two different sets of instructions that can
643 * access floating point system registers: VMSR/VMRS (which move
644 * to/from a general purpose register) and VLDR/VSTR sysreg (which
645 * move directly to/from memory). In some cases there are also side
646 * effects which must happen after any write to memory (which could
647 * cause an exception). So we implement the common logic for the
648 * sysreg access in gen_M_fp_sysreg_write() and gen_M_fp_sysreg_read(),
649 * which take pointers to callback functions which will perform the
650 * actual "read/write general purpose register" and "read/write
651 * memory" operations.
655 * Emit code to store the sysreg to its final destination; frees the
656 * TCG temp 'value' it is passed.
658 typedef void fp_sysreg_storefn(DisasContext
*s
, void *opaque
, TCGv_i32 value
);
660 * Emit code to load the value to be copied to the sysreg; returns
661 * a new TCG temporary
663 typedef TCGv_i32
fp_sysreg_loadfn(DisasContext
*s
, void *opaque
);
665 /* Common decode/access checks for fp sysreg read/write */
666 typedef enum FPSysRegCheckResult
{
667 FPSysRegCheckFailed
, /* caller should return false */
668 FPSysRegCheckDone
, /* caller should return true */
669 FPSysRegCheckContinue
, /* caller should continue generating code */
670 } FPSysRegCheckResult
;
672 static FPSysRegCheckResult
fp_sysreg_checks(DisasContext
*s
, int regno
)
674 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
675 return FPSysRegCheckFailed
;
680 case QEMU_VFP_FPSCR_NZCV
:
682 case ARM_VFP_FPSCR_NZCVQC
:
683 if (!arm_dc_feature(s
, ARM_FEATURE_V8_1M
)) {
687 case ARM_VFP_FPCXT_S
:
688 case ARM_VFP_FPCXT_NS
:
689 if (!arm_dc_feature(s
, ARM_FEATURE_V8_1M
)) {
692 if (!s
->v8m_secure
) {
697 return FPSysRegCheckFailed
;
701 * FPCXT_NS is a special case: it has specific handling for
702 * "current FP state is inactive", and must do the PreserveFPState()
703 * but not the usual full set of actions done by ExecuteFPCheck().
704 * So we don't call vfp_access_check() and the callers must handle this.
706 if (regno
!= ARM_VFP_FPCXT_NS
&& !vfp_access_check(s
)) {
707 return FPSysRegCheckDone
;
709 return FPSysRegCheckContinue
;
712 static void gen_branch_fpInactive(DisasContext
*s
, TCGCond cond
,
716 * FPCXT_NS is a special case: it has specific handling for
717 * "current FP state is inactive", and must do the PreserveFPState()
718 * but not the usual full set of actions done by ExecuteFPCheck().
719 * We don't have a TB flag that matches the fpInactive check, so we
720 * do it at runtime as we don't expect FPCXT_NS accesses to be frequent.
722 * Emit code that checks fpInactive and does a conditional
723 * branch to label based on it:
724 * if cond is TCG_COND_NE then branch if fpInactive != 0 (ie if inactive)
725 * if cond is TCG_COND_EQ then branch if fpInactive == 0 (ie if active)
727 assert(cond
== TCG_COND_EQ
|| cond
== TCG_COND_NE
);
729 /* fpInactive = FPCCR_NS.ASPEN == 1 && CONTROL.FPCA == 0 */
730 TCGv_i32 aspen
, fpca
;
731 aspen
= load_cpu_field(v7m
.fpccr
[M_REG_NS
]);
732 fpca
= load_cpu_field(v7m
.control
[M_REG_S
]);
733 tcg_gen_andi_i32(aspen
, aspen
, R_V7M_FPCCR_ASPEN_MASK
);
734 tcg_gen_xori_i32(aspen
, aspen
, R_V7M_FPCCR_ASPEN_MASK
);
735 tcg_gen_andi_i32(fpca
, fpca
, R_V7M_CONTROL_FPCA_MASK
);
736 tcg_gen_or_i32(fpca
, fpca
, aspen
);
737 tcg_gen_brcondi_i32(tcg_invert_cond(cond
), fpca
, 0, label
);
738 tcg_temp_free_i32(aspen
);
739 tcg_temp_free_i32(fpca
);
742 static bool gen_M_fp_sysreg_write(DisasContext
*s
, int regno
,
744 fp_sysreg_loadfn
*loadfn
,
747 /* Do a write to an M-profile floating point system register */
749 TCGLabel
*lab_end
= NULL
;
751 switch (fp_sysreg_checks(s
, regno
)) {
752 case FPSysRegCheckFailed
:
754 case FPSysRegCheckDone
:
756 case FPSysRegCheckContinue
:
762 tmp
= loadfn(s
, opaque
);
763 gen_helper_vfp_set_fpscr(cpu_env
, tmp
);
764 tcg_temp_free_i32(tmp
);
767 case ARM_VFP_FPSCR_NZCVQC
:
770 tmp
= loadfn(s
, opaque
);
772 * TODO: when we implement MVE, write the QC bit.
773 * For non-MVE, QC is RES0.
775 tcg_gen_andi_i32(tmp
, tmp
, FPCR_NZCV_MASK
);
776 fpscr
= load_cpu_field(vfp
.xregs
[ARM_VFP_FPSCR
]);
777 tcg_gen_andi_i32(fpscr
, fpscr
, ~FPCR_NZCV_MASK
);
778 tcg_gen_or_i32(fpscr
, fpscr
, tmp
);
779 store_cpu_field(fpscr
, vfp
.xregs
[ARM_VFP_FPSCR
]);
780 tcg_temp_free_i32(tmp
);
783 case ARM_VFP_FPCXT_NS
:
784 lab_end
= gen_new_label();
785 /* fpInactive case: write is a NOP, so branch to end */
786 gen_branch_fpInactive(s
, TCG_COND_NE
, lab_end
);
787 /* !fpInactive: PreserveFPState(), and reads same as FPCXT_S */
788 gen_preserve_fp_state(s
);
790 case ARM_VFP_FPCXT_S
:
792 TCGv_i32 sfpa
, control
;
794 * Set FPSCR and CONTROL.SFPA from value; the new FPSCR takes
795 * bits [27:0] from value and zeroes bits [31:28].
797 tmp
= loadfn(s
, opaque
);
798 sfpa
= tcg_temp_new_i32();
799 tcg_gen_shri_i32(sfpa
, tmp
, 31);
800 control
= load_cpu_field(v7m
.control
[M_REG_S
]);
801 tcg_gen_deposit_i32(control
, control
, sfpa
,
802 R_V7M_CONTROL_SFPA_SHIFT
, 1);
803 store_cpu_field(control
, v7m
.control
[M_REG_S
]);
804 tcg_gen_andi_i32(tmp
, tmp
, ~FPCR_NZCV_MASK
);
805 gen_helper_vfp_set_fpscr(cpu_env
, tmp
);
806 tcg_temp_free_i32(tmp
);
807 tcg_temp_free_i32(sfpa
);
811 g_assert_not_reached();
814 gen_set_label(lab_end
);
819 static bool gen_M_fp_sysreg_read(DisasContext
*s
, int regno
,
820 fp_sysreg_storefn
*storefn
,
823 /* Do a read from an M-profile floating point system register */
825 TCGLabel
*lab_end
= NULL
;
826 bool lookup_tb
= false;
828 switch (fp_sysreg_checks(s
, regno
)) {
829 case FPSysRegCheckFailed
:
831 case FPSysRegCheckDone
:
833 case FPSysRegCheckContinue
:
839 tmp
= tcg_temp_new_i32();
840 gen_helper_vfp_get_fpscr(tmp
, cpu_env
);
841 storefn(s
, opaque
, tmp
);
843 case ARM_VFP_FPSCR_NZCVQC
:
845 * TODO: MVE has a QC bit, which we probably won't store
846 * in the xregs[] field. For non-MVE, where QC is RES0,
847 * we can just fall through to the FPSCR_NZCV case.
849 case QEMU_VFP_FPSCR_NZCV
:
851 * Read just NZCV; this is a special case to avoid the
852 * helper call for the "VMRS to CPSR.NZCV" insn.
854 tmp
= load_cpu_field(vfp
.xregs
[ARM_VFP_FPSCR
]);
855 tcg_gen_andi_i32(tmp
, tmp
, FPCR_NZCV_MASK
);
856 storefn(s
, opaque
, tmp
);
858 case ARM_VFP_FPCXT_S
:
860 TCGv_i32 control
, sfpa
, fpscr
;
861 /* Bits [27:0] from FPSCR, bit [31] from CONTROL.SFPA */
862 tmp
= tcg_temp_new_i32();
863 sfpa
= tcg_temp_new_i32();
864 gen_helper_vfp_get_fpscr(tmp
, cpu_env
);
865 tcg_gen_andi_i32(tmp
, tmp
, ~FPCR_NZCV_MASK
);
866 control
= load_cpu_field(v7m
.control
[M_REG_S
]);
867 tcg_gen_andi_i32(sfpa
, control
, R_V7M_CONTROL_SFPA_MASK
);
868 tcg_gen_shli_i32(sfpa
, sfpa
, 31 - R_V7M_CONTROL_SFPA_SHIFT
);
869 tcg_gen_or_i32(tmp
, tmp
, sfpa
);
870 tcg_temp_free_i32(sfpa
);
872 * Store result before updating FPSCR etc, in case
873 * it is a memory write which causes an exception.
875 storefn(s
, opaque
, tmp
);
877 * Now we must reset FPSCR from FPDSCR_NS, and clear
878 * CONTROL.SFPA; so we'll end the TB here.
880 tcg_gen_andi_i32(control
, control
, ~R_V7M_CONTROL_SFPA_MASK
);
881 store_cpu_field(control
, v7m
.control
[M_REG_S
]);
882 fpscr
= load_cpu_field(v7m
.fpdscr
[M_REG_NS
]);
883 gen_helper_vfp_set_fpscr(cpu_env
, fpscr
);
884 tcg_temp_free_i32(fpscr
);
888 case ARM_VFP_FPCXT_NS
:
890 TCGv_i32 control
, sfpa
, fpscr
, fpdscr
, zero
;
891 TCGLabel
*lab_active
= gen_new_label();
895 gen_branch_fpInactive(s
, TCG_COND_EQ
, lab_active
);
896 /* fpInactive case: reads as FPDSCR_NS */
897 TCGv_i32 tmp
= load_cpu_field(v7m
.fpdscr
[M_REG_NS
]);
898 storefn(s
, opaque
, tmp
);
899 lab_end
= gen_new_label();
902 gen_set_label(lab_active
);
903 /* !fpInactive: Reads the same as FPCXT_S, but side effects differ */
904 gen_preserve_fp_state(s
);
905 tmp
= tcg_temp_new_i32();
906 sfpa
= tcg_temp_new_i32();
907 fpscr
= tcg_temp_new_i32();
908 gen_helper_vfp_get_fpscr(fpscr
, cpu_env
);
909 tcg_gen_andi_i32(tmp
, fpscr
, ~FPCR_NZCV_MASK
);
910 control
= load_cpu_field(v7m
.control
[M_REG_S
]);
911 tcg_gen_andi_i32(sfpa
, control
, R_V7M_CONTROL_SFPA_MASK
);
912 tcg_gen_shli_i32(sfpa
, sfpa
, 31 - R_V7M_CONTROL_SFPA_SHIFT
);
913 tcg_gen_or_i32(tmp
, tmp
, sfpa
);
914 tcg_temp_free_i32(control
);
915 /* Store result before updating FPSCR, in case it faults */
916 storefn(s
, opaque
, tmp
);
917 /* If SFPA is zero then set FPSCR from FPDSCR_NS */
918 fpdscr
= load_cpu_field(v7m
.fpdscr
[M_REG_NS
]);
919 zero
= tcg_const_i32(0);
920 tcg_gen_movcond_i32(TCG_COND_EQ
, fpscr
, sfpa
, zero
, fpdscr
, fpscr
);
921 gen_helper_vfp_set_fpscr(cpu_env
, fpscr
);
922 tcg_temp_free_i32(zero
);
923 tcg_temp_free_i32(sfpa
);
924 tcg_temp_free_i32(fpdscr
);
925 tcg_temp_free_i32(fpscr
);
929 g_assert_not_reached();
933 gen_set_label(lab_end
);
941 static void fp_sysreg_to_gpr(DisasContext
*s
, void *opaque
, TCGv_i32 value
)
943 arg_VMSR_VMRS
*a
= opaque
;
946 /* Set the 4 flag bits in the CPSR */
948 tcg_temp_free_i32(value
);
950 store_reg(s
, a
->rt
, value
);
954 static TCGv_i32
gpr_to_fp_sysreg(DisasContext
*s
, void *opaque
)
956 arg_VMSR_VMRS
*a
= opaque
;
958 return load_reg(s
, a
->rt
);
961 static bool gen_M_VMSR_VMRS(DisasContext
*s
, arg_VMSR_VMRS
*a
)
964 * Accesses to R15 are UNPREDICTABLE; we choose to undef.
965 * FPSCR -> r15 is a special case which writes to the PSR flags;
966 * set a->reg to a special value to tell gen_M_fp_sysreg_read()
967 * we only care about the top 4 bits of FPSCR there.
970 if (a
->l
&& a
->reg
== ARM_VFP_FPSCR
) {
971 a
->reg
= QEMU_VFP_FPSCR_NZCV
;
978 /* VMRS, move FP system register to gp register */
979 return gen_M_fp_sysreg_read(s
, a
->reg
, fp_sysreg_to_gpr
, a
);
981 /* VMSR, move gp register to FP system register */
982 return gen_M_fp_sysreg_write(s
, a
->reg
, gpr_to_fp_sysreg
, a
);
986 static bool trans_VMSR_VMRS(DisasContext
*s
, arg_VMSR_VMRS
*a
)
989 bool ignore_vfp_enabled
= false;
991 if (arm_dc_feature(s
, ARM_FEATURE_M
)) {
992 return gen_M_VMSR_VMRS(s
, a
);
995 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1002 * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
1003 * all ID registers to privileged access only.
1005 if (IS_USER(s
) && dc_isar_feature(aa32_fpsp_v3
, s
)) {
1008 ignore_vfp_enabled
= true;
1012 if (IS_USER(s
) || !arm_dc_feature(s
, ARM_FEATURE_MVFR
)) {
1015 ignore_vfp_enabled
= true;
1018 if (IS_USER(s
) || !arm_dc_feature(s
, ARM_FEATURE_V8
)) {
1021 ignore_vfp_enabled
= true;
1029 ignore_vfp_enabled
= true;
1031 case ARM_VFP_FPINST
:
1032 case ARM_VFP_FPINST2
:
1033 /* Not present in VFPv3 */
1034 if (IS_USER(s
) || dc_isar_feature(aa32_fpsp_v3
, s
)) {
1042 if (!full_vfp_access_check(s
, ignore_vfp_enabled
)) {
1047 /* VMRS, move VFP special register to gp register */
1053 if (s
->current_el
== 1) {
1054 TCGv_i32 tcg_reg
, tcg_rt
;
1056 gen_set_condexec(s
);
1057 gen_set_pc_im(s
, s
->pc_curr
);
1058 tcg_reg
= tcg_const_i32(a
->reg
);
1059 tcg_rt
= tcg_const_i32(a
->rt
);
1060 gen_helper_check_hcr_el2_trap(cpu_env
, tcg_rt
, tcg_reg
);
1061 tcg_temp_free_i32(tcg_reg
);
1062 tcg_temp_free_i32(tcg_rt
);
1066 case ARM_VFP_FPINST
:
1067 case ARM_VFP_FPINST2
:
1068 tmp
= load_cpu_field(vfp
.xregs
[a
->reg
]);
1072 tmp
= load_cpu_field(vfp
.xregs
[ARM_VFP_FPSCR
]);
1073 tcg_gen_andi_i32(tmp
, tmp
, FPCR_NZCV_MASK
);
1075 tmp
= tcg_temp_new_i32();
1076 gen_helper_vfp_get_fpscr(tmp
, cpu_env
);
1080 g_assert_not_reached();
1084 /* Set the 4 flag bits in the CPSR. */
1086 tcg_temp_free_i32(tmp
);
1088 store_reg(s
, a
->rt
, tmp
);
1091 /* VMSR, move gp register to VFP special register */
1097 /* Writes are ignored. */
1100 tmp
= load_reg(s
, a
->rt
);
1101 gen_helper_vfp_set_fpscr(cpu_env
, tmp
);
1102 tcg_temp_free_i32(tmp
);
1107 * TODO: VFP subarchitecture support.
1108 * For now, keep the EN bit only
1110 tmp
= load_reg(s
, a
->rt
);
1111 tcg_gen_andi_i32(tmp
, tmp
, 1 << 30);
1112 store_cpu_field(tmp
, vfp
.xregs
[a
->reg
]);
1115 case ARM_VFP_FPINST
:
1116 case ARM_VFP_FPINST2
:
1117 tmp
= load_reg(s
, a
->rt
);
1118 store_cpu_field(tmp
, vfp
.xregs
[a
->reg
]);
1121 g_assert_not_reached();
1128 static void fp_sysreg_to_memory(DisasContext
*s
, void *opaque
, TCGv_i32 value
)
1130 arg_vldr_sysreg
*a
= opaque
;
1131 uint32_t offset
= a
->imm
;
1138 addr
= load_reg(s
, a
->rn
);
1140 tcg_gen_addi_i32(addr
, addr
, offset
);
1143 if (s
->v8m_stackcheck
&& a
->rn
== 13 && a
->w
) {
1144 gen_helper_v8m_stackcheck(cpu_env
, addr
);
1147 gen_aa32_st_i32(s
, value
, addr
, get_mem_index(s
),
1148 MO_UL
| MO_ALIGN
| s
->be_data
);
1149 tcg_temp_free_i32(value
);
1154 tcg_gen_addi_i32(addr
, addr
, offset
);
1156 store_reg(s
, a
->rn
, addr
);
1158 tcg_temp_free_i32(addr
);
1162 static TCGv_i32
memory_to_fp_sysreg(DisasContext
*s
, void *opaque
)
1164 arg_vldr_sysreg
*a
= opaque
;
1165 uint32_t offset
= a
->imm
;
1167 TCGv_i32 value
= tcg_temp_new_i32();
1173 addr
= load_reg(s
, a
->rn
);
1175 tcg_gen_addi_i32(addr
, addr
, offset
);
1178 if (s
->v8m_stackcheck
&& a
->rn
== 13 && a
->w
) {
1179 gen_helper_v8m_stackcheck(cpu_env
, addr
);
1182 gen_aa32_ld_i32(s
, value
, addr
, get_mem_index(s
),
1183 MO_UL
| MO_ALIGN
| s
->be_data
);
1188 tcg_gen_addi_i32(addr
, addr
, offset
);
1190 store_reg(s
, a
->rn
, addr
);
1192 tcg_temp_free_i32(addr
);
1197 static bool trans_VLDR_sysreg(DisasContext
*s
, arg_vldr_sysreg
*a
)
1199 if (!arm_dc_feature(s
, ARM_FEATURE_V8_1M
)) {
1205 return gen_M_fp_sysreg_write(s
, a
->reg
, memory_to_fp_sysreg
, a
);
1208 static bool trans_VSTR_sysreg(DisasContext
*s
, arg_vldr_sysreg
*a
)
1210 if (!arm_dc_feature(s
, ARM_FEATURE_V8_1M
)) {
1216 return gen_M_fp_sysreg_read(s
, a
->reg
, fp_sysreg_to_memory
, a
);
1219 static bool trans_VMOV_half(DisasContext
*s
, arg_VMOV_single
*a
)
1223 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
1228 /* UNPREDICTABLE; we choose to UNDEF */
1232 if (!vfp_access_check(s
)) {
1237 /* VFP to general purpose register */
1238 tmp
= tcg_temp_new_i32();
1239 vfp_load_reg32(tmp
, a
->vn
);
1240 tcg_gen_andi_i32(tmp
, tmp
, 0xffff);
1241 store_reg(s
, a
->rt
, tmp
);
1243 /* general purpose register to VFP */
1244 tmp
= load_reg(s
, a
->rt
);
1245 tcg_gen_andi_i32(tmp
, tmp
, 0xffff);
1246 vfp_store_reg32(tmp
, a
->vn
);
1247 tcg_temp_free_i32(tmp
);
1253 static bool trans_VMOV_single(DisasContext
*s
, arg_VMOV_single
*a
)
1257 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1261 if (!vfp_access_check(s
)) {
1266 /* VFP to general purpose register */
1267 tmp
= tcg_temp_new_i32();
1268 vfp_load_reg32(tmp
, a
->vn
);
1270 /* Set the 4 flag bits in the CPSR. */
1272 tcg_temp_free_i32(tmp
);
1274 store_reg(s
, a
->rt
, tmp
);
1277 /* general purpose register to VFP */
1278 tmp
= load_reg(s
, a
->rt
);
1279 vfp_store_reg32(tmp
, a
->vn
);
1280 tcg_temp_free_i32(tmp
);
1286 static bool trans_VMOV_64_sp(DisasContext
*s
, arg_VMOV_64_sp
*a
)
1290 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1295 * VMOV between two general-purpose registers and two single precision
1296 * floating point registers
1298 if (!vfp_access_check(s
)) {
1303 /* fpreg to gpreg */
1304 tmp
= tcg_temp_new_i32();
1305 vfp_load_reg32(tmp
, a
->vm
);
1306 store_reg(s
, a
->rt
, tmp
);
1307 tmp
= tcg_temp_new_i32();
1308 vfp_load_reg32(tmp
, a
->vm
+ 1);
1309 store_reg(s
, a
->rt2
, tmp
);
1311 /* gpreg to fpreg */
1312 tmp
= load_reg(s
, a
->rt
);
1313 vfp_store_reg32(tmp
, a
->vm
);
1314 tcg_temp_free_i32(tmp
);
1315 tmp
= load_reg(s
, a
->rt2
);
1316 vfp_store_reg32(tmp
, a
->vm
+ 1);
1317 tcg_temp_free_i32(tmp
);
1323 static bool trans_VMOV_64_dp(DisasContext
*s
, arg_VMOV_64_dp
*a
)
1328 * VMOV between two general-purpose registers and one double precision
1329 * floating point register. Note that this does not require support
1330 * for double precision arithmetic.
1332 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1336 /* UNDEF accesses to D16-D31 if they don't exist */
1337 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
1341 if (!vfp_access_check(s
)) {
1346 /* fpreg to gpreg */
1347 tmp
= tcg_temp_new_i32();
1348 vfp_load_reg32(tmp
, a
->vm
* 2);
1349 store_reg(s
, a
->rt
, tmp
);
1350 tmp
= tcg_temp_new_i32();
1351 vfp_load_reg32(tmp
, a
->vm
* 2 + 1);
1352 store_reg(s
, a
->rt2
, tmp
);
1354 /* gpreg to fpreg */
1355 tmp
= load_reg(s
, a
->rt
);
1356 vfp_store_reg32(tmp
, a
->vm
* 2);
1357 tcg_temp_free_i32(tmp
);
1358 tmp
= load_reg(s
, a
->rt2
);
1359 vfp_store_reg32(tmp
, a
->vm
* 2 + 1);
1360 tcg_temp_free_i32(tmp
);
1366 static bool trans_VLDR_VSTR_hp(DisasContext
*s
, arg_VLDR_VSTR_sp
*a
)
1371 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
1375 if (!vfp_access_check(s
)) {
1379 /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1380 offset
= a
->imm
<< 1;
1385 /* For thumb, use of PC is UNPREDICTABLE. */
1386 addr
= add_reg_for_lit(s
, a
->rn
, offset
);
1387 tmp
= tcg_temp_new_i32();
1389 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UW
| MO_ALIGN
);
1390 vfp_store_reg32(tmp
, a
->vd
);
1392 vfp_load_reg32(tmp
, a
->vd
);
1393 gen_aa32_st_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UW
| MO_ALIGN
);
1395 tcg_temp_free_i32(tmp
);
1396 tcg_temp_free_i32(addr
);
1401 static bool trans_VLDR_VSTR_sp(DisasContext
*s
, arg_VLDR_VSTR_sp
*a
)
1406 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1410 if (!vfp_access_check(s
)) {
1414 offset
= a
->imm
<< 2;
1419 /* For thumb, use of PC is UNPREDICTABLE. */
1420 addr
= add_reg_for_lit(s
, a
->rn
, offset
);
1421 tmp
= tcg_temp_new_i32();
1423 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UL
| MO_ALIGN
);
1424 vfp_store_reg32(tmp
, a
->vd
);
1426 vfp_load_reg32(tmp
, a
->vd
);
1427 gen_aa32_st_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UL
| MO_ALIGN
);
1429 tcg_temp_free_i32(tmp
);
1430 tcg_temp_free_i32(addr
);
1435 static bool trans_VLDR_VSTR_dp(DisasContext
*s
, arg_VLDR_VSTR_dp
*a
)
1441 /* Note that this does not require support for double arithmetic. */
1442 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1446 /* UNDEF accesses to D16-D31 if they don't exist */
1447 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
1451 if (!vfp_access_check(s
)) {
1455 offset
= a
->imm
<< 2;
1460 /* For thumb, use of PC is UNPREDICTABLE. */
1461 addr
= add_reg_for_lit(s
, a
->rn
, offset
);
1462 tmp
= tcg_temp_new_i64();
1464 gen_aa32_ld_i64(s
, tmp
, addr
, get_mem_index(s
), MO_Q
| MO_ALIGN_4
);
1465 vfp_store_reg64(tmp
, a
->vd
);
1467 vfp_load_reg64(tmp
, a
->vd
);
1468 gen_aa32_st_i64(s
, tmp
, addr
, get_mem_index(s
), MO_Q
| MO_ALIGN_4
);
1470 tcg_temp_free_i64(tmp
);
1471 tcg_temp_free_i32(addr
);
1476 static bool trans_VLDM_VSTM_sp(DisasContext
*s
, arg_VLDM_VSTM_sp
*a
)
1482 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1488 if (n
== 0 || (a
->vd
+ n
) > 32) {
1490 * UNPREDICTABLE cases for bad immediates: we choose to
1491 * UNDEF to avoid generating huge numbers of TCG ops
1495 if (a
->rn
== 15 && a
->w
) {
1496 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1500 if (!vfp_access_check(s
)) {
1504 /* For thumb, use of PC is UNPREDICTABLE. */
1505 addr
= add_reg_for_lit(s
, a
->rn
, 0);
1508 tcg_gen_addi_i32(addr
, addr
, -(a
->imm
<< 2));
1511 if (s
->v8m_stackcheck
&& a
->rn
== 13 && a
->w
) {
1513 * Here 'addr' is the lowest address we will store to,
1514 * and is either the old SP (if post-increment) or
1515 * the new SP (if pre-decrement). For post-increment
1516 * where the old value is below the limit and the new
1517 * value is above, it is UNKNOWN whether the limit check
1518 * triggers; we choose to trigger.
1520 gen_helper_v8m_stackcheck(cpu_env
, addr
);
1524 tmp
= tcg_temp_new_i32();
1525 for (i
= 0; i
< n
; i
++) {
1528 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UL
| MO_ALIGN
);
1529 vfp_store_reg32(tmp
, a
->vd
+ i
);
1532 vfp_load_reg32(tmp
, a
->vd
+ i
);
1533 gen_aa32_st_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UL
| MO_ALIGN
);
1535 tcg_gen_addi_i32(addr
, addr
, offset
);
1537 tcg_temp_free_i32(tmp
);
1541 offset
= -offset
* n
;
1542 tcg_gen_addi_i32(addr
, addr
, offset
);
1544 store_reg(s
, a
->rn
, addr
);
1546 tcg_temp_free_i32(addr
);
1552 static bool trans_VLDM_VSTM_dp(DisasContext
*s
, arg_VLDM_VSTM_dp
*a
)
1559 /* Note that this does not require support for double arithmetic. */
1560 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1566 if (n
== 0 || (a
->vd
+ n
) > 32 || n
> 16) {
1568 * UNPREDICTABLE cases for bad immediates: we choose to
1569 * UNDEF to avoid generating huge numbers of TCG ops
1573 if (a
->rn
== 15 && a
->w
) {
1574 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1578 /* UNDEF accesses to D16-D31 if they don't exist */
1579 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
+ n
) > 16) {
1583 if (!vfp_access_check(s
)) {
1587 /* For thumb, use of PC is UNPREDICTABLE. */
1588 addr
= add_reg_for_lit(s
, a
->rn
, 0);
1591 tcg_gen_addi_i32(addr
, addr
, -(a
->imm
<< 2));
1594 if (s
->v8m_stackcheck
&& a
->rn
== 13 && a
->w
) {
1596 * Here 'addr' is the lowest address we will store to,
1597 * and is either the old SP (if post-increment) or
1598 * the new SP (if pre-decrement). For post-increment
1599 * where the old value is below the limit and the new
1600 * value is above, it is UNKNOWN whether the limit check
1601 * triggers; we choose to trigger.
1603 gen_helper_v8m_stackcheck(cpu_env
, addr
);
1607 tmp
= tcg_temp_new_i64();
1608 for (i
= 0; i
< n
; i
++) {
1611 gen_aa32_ld_i64(s
, tmp
, addr
, get_mem_index(s
), MO_Q
| MO_ALIGN_4
);
1612 vfp_store_reg64(tmp
, a
->vd
+ i
);
1615 vfp_load_reg64(tmp
, a
->vd
+ i
);
1616 gen_aa32_st_i64(s
, tmp
, addr
, get_mem_index(s
), MO_Q
| MO_ALIGN_4
);
1618 tcg_gen_addi_i32(addr
, addr
, offset
);
1620 tcg_temp_free_i64(tmp
);
1624 offset
= -offset
* n
;
1625 } else if (a
->imm
& 1) {
1632 tcg_gen_addi_i32(addr
, addr
, offset
);
1634 store_reg(s
, a
->rn
, addr
);
1636 tcg_temp_free_i32(addr
);
1643 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1644 * The callback should emit code to write a value to vd. If
1645 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1646 * will contain the old value of the relevant VFP register;
1647 * otherwise it must be written to only.
1649 typedef void VFPGen3OpSPFn(TCGv_i32 vd
,
1650 TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
);
1651 typedef void VFPGen3OpDPFn(TCGv_i64 vd
,
1652 TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
);
1655 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1656 * The callback should emit code to write a value to vd (which
1657 * should be written to only).
1659 typedef void VFPGen2OpSPFn(TCGv_i32 vd
, TCGv_i32 vm
);
1660 typedef void VFPGen2OpDPFn(TCGv_i64 vd
, TCGv_i64 vm
);
1663 * Return true if the specified S reg is in a scalar bank
1664 * (ie if it is s0..s7)
1666 static inline bool vfp_sreg_is_scalar(int reg
)
1668 return (reg
& 0x18) == 0;
1672 * Return true if the specified D reg is in a scalar bank
1673 * (ie if it is d0..d3 or d16..d19)
1675 static inline bool vfp_dreg_is_scalar(int reg
)
1677 return (reg
& 0xc) == 0;
1681 * Advance the S reg number forwards by delta within its bank
1682 * (ie increment the low 3 bits but leave the rest the same)
1684 static inline int vfp_advance_sreg(int reg
, int delta
)
1686 return ((reg
+ delta
) & 0x7) | (reg
& ~0x7);
1690 * Advance the D reg number forwards by delta within its bank
1691 * (ie increment the low 2 bits but leave the rest the same)
1693 static inline int vfp_advance_dreg(int reg
, int delta
)
1695 return ((reg
+ delta
) & 0x3) | (reg
& ~0x3);
1699 * Perform a 3-operand VFP data processing instruction. fn is the
1700 * callback to do the actual operation; this function deals with the
1701 * code to handle looping around for VFP vector processing.
1703 static bool do_vfp_3op_sp(DisasContext
*s
, VFPGen3OpSPFn
*fn
,
1704 int vd
, int vn
, int vm
, bool reads_vd
)
1706 uint32_t delta_m
= 0;
1707 uint32_t delta_d
= 0;
1708 int veclen
= s
->vec_len
;
1709 TCGv_i32 f0
, f1
, fd
;
1712 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1716 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
1717 (veclen
!= 0 || s
->vec_stride
!= 0)) {
1721 if (!vfp_access_check(s
)) {
1726 /* Figure out what type of vector operation this is. */
1727 if (vfp_sreg_is_scalar(vd
)) {
1731 delta_d
= s
->vec_stride
+ 1;
1733 if (vfp_sreg_is_scalar(vm
)) {
1734 /* mixed scalar/vector */
1743 f0
= tcg_temp_new_i32();
1744 f1
= tcg_temp_new_i32();
1745 fd
= tcg_temp_new_i32();
1746 fpst
= fpstatus_ptr(FPST_FPCR
);
1748 vfp_load_reg32(f0
, vn
);
1749 vfp_load_reg32(f1
, vm
);
1753 vfp_load_reg32(fd
, vd
);
1755 fn(fd
, f0
, f1
, fpst
);
1756 vfp_store_reg32(fd
, vd
);
1762 /* Set up the operands for the next iteration */
1764 vd
= vfp_advance_sreg(vd
, delta_d
);
1765 vn
= vfp_advance_sreg(vn
, delta_d
);
1766 vfp_load_reg32(f0
, vn
);
1768 vm
= vfp_advance_sreg(vm
, delta_m
);
1769 vfp_load_reg32(f1
, vm
);
1773 tcg_temp_free_i32(f0
);
1774 tcg_temp_free_i32(f1
);
1775 tcg_temp_free_i32(fd
);
1776 tcg_temp_free_ptr(fpst
);
1781 static bool do_vfp_3op_hp(DisasContext
*s
, VFPGen3OpSPFn
*fn
,
1782 int vd
, int vn
, int vm
, bool reads_vd
)
1785 * Do a half-precision operation. Functionally this is
1786 * the same as do_vfp_3op_sp(), except:
1787 * - it uses the FPST_FPCR_F16
1788 * - it doesn't need the VFP vector handling (fp16 is a
1789 * v8 feature, and in v8 VFP vectors don't exist)
1790 * - it does the aa32_fp16_arith feature test
1792 TCGv_i32 f0
, f1
, fd
;
1795 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
1799 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
1803 if (!vfp_access_check(s
)) {
1807 f0
= tcg_temp_new_i32();
1808 f1
= tcg_temp_new_i32();
1809 fd
= tcg_temp_new_i32();
1810 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
1812 vfp_load_reg32(f0
, vn
);
1813 vfp_load_reg32(f1
, vm
);
1816 vfp_load_reg32(fd
, vd
);
1818 fn(fd
, f0
, f1
, fpst
);
1819 vfp_store_reg32(fd
, vd
);
1821 tcg_temp_free_i32(f0
);
1822 tcg_temp_free_i32(f1
);
1823 tcg_temp_free_i32(fd
);
1824 tcg_temp_free_ptr(fpst
);
1829 static bool do_vfp_3op_dp(DisasContext
*s
, VFPGen3OpDPFn
*fn
,
1830 int vd
, int vn
, int vm
, bool reads_vd
)
1832 uint32_t delta_m
= 0;
1833 uint32_t delta_d
= 0;
1834 int veclen
= s
->vec_len
;
1835 TCGv_i64 f0
, f1
, fd
;
1838 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
1842 /* UNDEF accesses to D16-D31 if they don't exist */
1843 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((vd
| vn
| vm
) & 0x10)) {
1847 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
1848 (veclen
!= 0 || s
->vec_stride
!= 0)) {
1852 if (!vfp_access_check(s
)) {
1857 /* Figure out what type of vector operation this is. */
1858 if (vfp_dreg_is_scalar(vd
)) {
1862 delta_d
= (s
->vec_stride
>> 1) + 1;
1864 if (vfp_dreg_is_scalar(vm
)) {
1865 /* mixed scalar/vector */
1874 f0
= tcg_temp_new_i64();
1875 f1
= tcg_temp_new_i64();
1876 fd
= tcg_temp_new_i64();
1877 fpst
= fpstatus_ptr(FPST_FPCR
);
1879 vfp_load_reg64(f0
, vn
);
1880 vfp_load_reg64(f1
, vm
);
1884 vfp_load_reg64(fd
, vd
);
1886 fn(fd
, f0
, f1
, fpst
);
1887 vfp_store_reg64(fd
, vd
);
1892 /* Set up the operands for the next iteration */
1894 vd
= vfp_advance_dreg(vd
, delta_d
);
1895 vn
= vfp_advance_dreg(vn
, delta_d
);
1896 vfp_load_reg64(f0
, vn
);
1898 vm
= vfp_advance_dreg(vm
, delta_m
);
1899 vfp_load_reg64(f1
, vm
);
1903 tcg_temp_free_i64(f0
);
1904 tcg_temp_free_i64(f1
);
1905 tcg_temp_free_i64(fd
);
1906 tcg_temp_free_ptr(fpst
);
1911 static bool do_vfp_2op_sp(DisasContext
*s
, VFPGen2OpSPFn
*fn
, int vd
, int vm
)
1913 uint32_t delta_m
= 0;
1914 uint32_t delta_d
= 0;
1915 int veclen
= s
->vec_len
;
1918 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1922 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
1923 (veclen
!= 0 || s
->vec_stride
!= 0)) {
1927 if (!vfp_access_check(s
)) {
1932 /* Figure out what type of vector operation this is. */
1933 if (vfp_sreg_is_scalar(vd
)) {
1937 delta_d
= s
->vec_stride
+ 1;
1939 if (vfp_sreg_is_scalar(vm
)) {
1940 /* mixed scalar/vector */
1949 f0
= tcg_temp_new_i32();
1950 fd
= tcg_temp_new_i32();
1952 vfp_load_reg32(f0
, vm
);
1956 vfp_store_reg32(fd
, vd
);
1963 /* single source one-many */
1965 vd
= vfp_advance_sreg(vd
, delta_d
);
1966 vfp_store_reg32(fd
, vd
);
1971 /* Set up the operands for the next iteration */
1973 vd
= vfp_advance_sreg(vd
, delta_d
);
1974 vm
= vfp_advance_sreg(vm
, delta_m
);
1975 vfp_load_reg32(f0
, vm
);
1978 tcg_temp_free_i32(f0
);
1979 tcg_temp_free_i32(fd
);
1984 static bool do_vfp_2op_hp(DisasContext
*s
, VFPGen2OpSPFn
*fn
, int vd
, int vm
)
1987 * Do a half-precision operation. Functionally this is
1988 * the same as do_vfp_2op_sp(), except:
1989 * - it doesn't need the VFP vector handling (fp16 is a
1990 * v8 feature, and in v8 VFP vectors don't exist)
1991 * - it does the aa32_fp16_arith feature test
1995 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
1999 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2003 if (!vfp_access_check(s
)) {
2007 f0
= tcg_temp_new_i32();
2008 vfp_load_reg32(f0
, vm
);
2010 vfp_store_reg32(f0
, vd
);
2011 tcg_temp_free_i32(f0
);
2016 static bool do_vfp_2op_dp(DisasContext
*s
, VFPGen2OpDPFn
*fn
, int vd
, int vm
)
2018 uint32_t delta_m
= 0;
2019 uint32_t delta_d
= 0;
2020 int veclen
= s
->vec_len
;
2023 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
2027 /* UNDEF accesses to D16-D31 if they don't exist */
2028 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((vd
| vm
) & 0x10)) {
2032 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
2033 (veclen
!= 0 || s
->vec_stride
!= 0)) {
2037 if (!vfp_access_check(s
)) {
2042 /* Figure out what type of vector operation this is. */
2043 if (vfp_dreg_is_scalar(vd
)) {
2047 delta_d
= (s
->vec_stride
>> 1) + 1;
2049 if (vfp_dreg_is_scalar(vm
)) {
2050 /* mixed scalar/vector */
2059 f0
= tcg_temp_new_i64();
2060 fd
= tcg_temp_new_i64();
2062 vfp_load_reg64(f0
, vm
);
2066 vfp_store_reg64(fd
, vd
);
2073 /* single source one-many */
2075 vd
= vfp_advance_dreg(vd
, delta_d
);
2076 vfp_store_reg64(fd
, vd
);
2081 /* Set up the operands for the next iteration */
2083 vd
= vfp_advance_dreg(vd
, delta_d
);
2084 vd
= vfp_advance_dreg(vm
, delta_m
);
2085 vfp_load_reg64(f0
, vm
);
2088 tcg_temp_free_i64(f0
);
2089 tcg_temp_free_i64(fd
);
2094 static void gen_VMLA_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2096 /* Note that order of inputs to the add matters for NaNs */
2097 TCGv_i32 tmp
= tcg_temp_new_i32();
2099 gen_helper_vfp_mulh(tmp
, vn
, vm
, fpst
);
2100 gen_helper_vfp_addh(vd
, vd
, tmp
, fpst
);
2101 tcg_temp_free_i32(tmp
);
2104 static bool trans_VMLA_hp(DisasContext
*s
, arg_VMLA_sp
*a
)
2106 return do_vfp_3op_hp(s
, gen_VMLA_hp
, a
->vd
, a
->vn
, a
->vm
, true);
2109 static void gen_VMLA_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2111 /* Note that order of inputs to the add matters for NaNs */
2112 TCGv_i32 tmp
= tcg_temp_new_i32();
2114 gen_helper_vfp_muls(tmp
, vn
, vm
, fpst
);
2115 gen_helper_vfp_adds(vd
, vd
, tmp
, fpst
);
2116 tcg_temp_free_i32(tmp
);
2119 static bool trans_VMLA_sp(DisasContext
*s
, arg_VMLA_sp
*a
)
2121 return do_vfp_3op_sp(s
, gen_VMLA_sp
, a
->vd
, a
->vn
, a
->vm
, true);
2124 static void gen_VMLA_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
2126 /* Note that order of inputs to the add matters for NaNs */
2127 TCGv_i64 tmp
= tcg_temp_new_i64();
2129 gen_helper_vfp_muld(tmp
, vn
, vm
, fpst
);
2130 gen_helper_vfp_addd(vd
, vd
, tmp
, fpst
);
2131 tcg_temp_free_i64(tmp
);
2134 static bool trans_VMLA_dp(DisasContext
*s
, arg_VMLA_dp
*a
)
2136 return do_vfp_3op_dp(s
, gen_VMLA_dp
, a
->vd
, a
->vn
, a
->vm
, true);
2139 static void gen_VMLS_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2142 * VMLS: vd = vd + -(vn * vm)
2143 * Note that order of inputs to the add matters for NaNs.
2145 TCGv_i32 tmp
= tcg_temp_new_i32();
2147 gen_helper_vfp_mulh(tmp
, vn
, vm
, fpst
);
2148 gen_helper_vfp_negh(tmp
, tmp
);
2149 gen_helper_vfp_addh(vd
, vd
, tmp
, fpst
);
2150 tcg_temp_free_i32(tmp
);
2153 static bool trans_VMLS_hp(DisasContext
*s
, arg_VMLS_sp
*a
)
2155 return do_vfp_3op_hp(s
, gen_VMLS_hp
, a
->vd
, a
->vn
, a
->vm
, true);
2158 static void gen_VMLS_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2161 * VMLS: vd = vd + -(vn * vm)
2162 * Note that order of inputs to the add matters for NaNs.
2164 TCGv_i32 tmp
= tcg_temp_new_i32();
2166 gen_helper_vfp_muls(tmp
, vn
, vm
, fpst
);
2167 gen_helper_vfp_negs(tmp
, tmp
);
2168 gen_helper_vfp_adds(vd
, vd
, tmp
, fpst
);
2169 tcg_temp_free_i32(tmp
);
2172 static bool trans_VMLS_sp(DisasContext
*s
, arg_VMLS_sp
*a
)
2174 return do_vfp_3op_sp(s
, gen_VMLS_sp
, a
->vd
, a
->vn
, a
->vm
, true);
2177 static void gen_VMLS_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
2180 * VMLS: vd = vd + -(vn * vm)
2181 * Note that order of inputs to the add matters for NaNs.
2183 TCGv_i64 tmp
= tcg_temp_new_i64();
2185 gen_helper_vfp_muld(tmp
, vn
, vm
, fpst
);
2186 gen_helper_vfp_negd(tmp
, tmp
);
2187 gen_helper_vfp_addd(vd
, vd
, tmp
, fpst
);
2188 tcg_temp_free_i64(tmp
);
2191 static bool trans_VMLS_dp(DisasContext
*s
, arg_VMLS_dp
*a
)
2193 return do_vfp_3op_dp(s
, gen_VMLS_dp
, a
->vd
, a
->vn
, a
->vm
, true);
2196 static void gen_VNMLS_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2199 * VNMLS: -fd + (fn * fm)
2200 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2201 * plausible looking simplifications because this will give wrong results
2204 TCGv_i32 tmp
= tcg_temp_new_i32();
2206 gen_helper_vfp_mulh(tmp
, vn
, vm
, fpst
);
2207 gen_helper_vfp_negh(vd
, vd
);
2208 gen_helper_vfp_addh(vd
, vd
, tmp
, fpst
);
2209 tcg_temp_free_i32(tmp
);
2212 static bool trans_VNMLS_hp(DisasContext
*s
, arg_VNMLS_sp
*a
)
2214 return do_vfp_3op_hp(s
, gen_VNMLS_hp
, a
->vd
, a
->vn
, a
->vm
, true);
2217 static void gen_VNMLS_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2220 * VNMLS: -fd + (fn * fm)
2221 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2222 * plausible looking simplifications because this will give wrong results
2225 TCGv_i32 tmp
= tcg_temp_new_i32();
2227 gen_helper_vfp_muls(tmp
, vn
, vm
, fpst
);
2228 gen_helper_vfp_negs(vd
, vd
);
2229 gen_helper_vfp_adds(vd
, vd
, tmp
, fpst
);
2230 tcg_temp_free_i32(tmp
);
2233 static bool trans_VNMLS_sp(DisasContext
*s
, arg_VNMLS_sp
*a
)
2235 return do_vfp_3op_sp(s
, gen_VNMLS_sp
, a
->vd
, a
->vn
, a
->vm
, true);
2238 static void gen_VNMLS_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
2241 * VNMLS: -fd + (fn * fm)
2242 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
2243 * plausible looking simplifications because this will give wrong results
2246 TCGv_i64 tmp
= tcg_temp_new_i64();
2248 gen_helper_vfp_muld(tmp
, vn
, vm
, fpst
);
2249 gen_helper_vfp_negd(vd
, vd
);
2250 gen_helper_vfp_addd(vd
, vd
, tmp
, fpst
);
2251 tcg_temp_free_i64(tmp
);
2254 static bool trans_VNMLS_dp(DisasContext
*s
, arg_VNMLS_dp
*a
)
2256 return do_vfp_3op_dp(s
, gen_VNMLS_dp
, a
->vd
, a
->vn
, a
->vm
, true);
2259 static void gen_VNMLA_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2261 /* VNMLA: -fd + -(fn * fm) */
2262 TCGv_i32 tmp
= tcg_temp_new_i32();
2264 gen_helper_vfp_mulh(tmp
, vn
, vm
, fpst
);
2265 gen_helper_vfp_negh(tmp
, tmp
);
2266 gen_helper_vfp_negh(vd
, vd
);
2267 gen_helper_vfp_addh(vd
, vd
, tmp
, fpst
);
2268 tcg_temp_free_i32(tmp
);
2271 static bool trans_VNMLA_hp(DisasContext
*s
, arg_VNMLA_sp
*a
)
2273 return do_vfp_3op_hp(s
, gen_VNMLA_hp
, a
->vd
, a
->vn
, a
->vm
, true);
2276 static void gen_VNMLA_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2278 /* VNMLA: -fd + -(fn * fm) */
2279 TCGv_i32 tmp
= tcg_temp_new_i32();
2281 gen_helper_vfp_muls(tmp
, vn
, vm
, fpst
);
2282 gen_helper_vfp_negs(tmp
, tmp
);
2283 gen_helper_vfp_negs(vd
, vd
);
2284 gen_helper_vfp_adds(vd
, vd
, tmp
, fpst
);
2285 tcg_temp_free_i32(tmp
);
2288 static bool trans_VNMLA_sp(DisasContext
*s
, arg_VNMLA_sp
*a
)
2290 return do_vfp_3op_sp(s
, gen_VNMLA_sp
, a
->vd
, a
->vn
, a
->vm
, true);
2293 static void gen_VNMLA_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
2295 /* VNMLA: -fd + (fn * fm) */
2296 TCGv_i64 tmp
= tcg_temp_new_i64();
2298 gen_helper_vfp_muld(tmp
, vn
, vm
, fpst
);
2299 gen_helper_vfp_negd(tmp
, tmp
);
2300 gen_helper_vfp_negd(vd
, vd
);
2301 gen_helper_vfp_addd(vd
, vd
, tmp
, fpst
);
2302 tcg_temp_free_i64(tmp
);
2305 static bool trans_VNMLA_dp(DisasContext
*s
, arg_VNMLA_dp
*a
)
2307 return do_vfp_3op_dp(s
, gen_VNMLA_dp
, a
->vd
, a
->vn
, a
->vm
, true);
2310 static bool trans_VMUL_hp(DisasContext
*s
, arg_VMUL_sp
*a
)
2312 return do_vfp_3op_hp(s
, gen_helper_vfp_mulh
, a
->vd
, a
->vn
, a
->vm
, false);
2315 static bool trans_VMUL_sp(DisasContext
*s
, arg_VMUL_sp
*a
)
2317 return do_vfp_3op_sp(s
, gen_helper_vfp_muls
, a
->vd
, a
->vn
, a
->vm
, false);
2320 static bool trans_VMUL_dp(DisasContext
*s
, arg_VMUL_dp
*a
)
2322 return do_vfp_3op_dp(s
, gen_helper_vfp_muld
, a
->vd
, a
->vn
, a
->vm
, false);
2325 static void gen_VNMUL_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2327 /* VNMUL: -(fn * fm) */
2328 gen_helper_vfp_mulh(vd
, vn
, vm
, fpst
);
2329 gen_helper_vfp_negh(vd
, vd
);
2332 static bool trans_VNMUL_hp(DisasContext
*s
, arg_VNMUL_sp
*a
)
2334 return do_vfp_3op_hp(s
, gen_VNMUL_hp
, a
->vd
, a
->vn
, a
->vm
, false);
2337 static void gen_VNMUL_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2339 /* VNMUL: -(fn * fm) */
2340 gen_helper_vfp_muls(vd
, vn
, vm
, fpst
);
2341 gen_helper_vfp_negs(vd
, vd
);
2344 static bool trans_VNMUL_sp(DisasContext
*s
, arg_VNMUL_sp
*a
)
2346 return do_vfp_3op_sp(s
, gen_VNMUL_sp
, a
->vd
, a
->vn
, a
->vm
, false);
2349 static void gen_VNMUL_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
2351 /* VNMUL: -(fn * fm) */
2352 gen_helper_vfp_muld(vd
, vn
, vm
, fpst
);
2353 gen_helper_vfp_negd(vd
, vd
);
2356 static bool trans_VNMUL_dp(DisasContext
*s
, arg_VNMUL_dp
*a
)
2358 return do_vfp_3op_dp(s
, gen_VNMUL_dp
, a
->vd
, a
->vn
, a
->vm
, false);
2361 static bool trans_VADD_hp(DisasContext
*s
, arg_VADD_sp
*a
)
2363 return do_vfp_3op_hp(s
, gen_helper_vfp_addh
, a
->vd
, a
->vn
, a
->vm
, false);
2366 static bool trans_VADD_sp(DisasContext
*s
, arg_VADD_sp
*a
)
2368 return do_vfp_3op_sp(s
, gen_helper_vfp_adds
, a
->vd
, a
->vn
, a
->vm
, false);
2371 static bool trans_VADD_dp(DisasContext
*s
, arg_VADD_dp
*a
)
2373 return do_vfp_3op_dp(s
, gen_helper_vfp_addd
, a
->vd
, a
->vn
, a
->vm
, false);
2376 static bool trans_VSUB_hp(DisasContext
*s
, arg_VSUB_sp
*a
)
2378 return do_vfp_3op_hp(s
, gen_helper_vfp_subh
, a
->vd
, a
->vn
, a
->vm
, false);
2381 static bool trans_VSUB_sp(DisasContext
*s
, arg_VSUB_sp
*a
)
2383 return do_vfp_3op_sp(s
, gen_helper_vfp_subs
, a
->vd
, a
->vn
, a
->vm
, false);
2386 static bool trans_VSUB_dp(DisasContext
*s
, arg_VSUB_dp
*a
)
2388 return do_vfp_3op_dp(s
, gen_helper_vfp_subd
, a
->vd
, a
->vn
, a
->vm
, false);
2391 static bool trans_VDIV_hp(DisasContext
*s
, arg_VDIV_sp
*a
)
2393 return do_vfp_3op_hp(s
, gen_helper_vfp_divh
, a
->vd
, a
->vn
, a
->vm
, false);
2396 static bool trans_VDIV_sp(DisasContext
*s
, arg_VDIV_sp
*a
)
2398 return do_vfp_3op_sp(s
, gen_helper_vfp_divs
, a
->vd
, a
->vn
, a
->vm
, false);
2401 static bool trans_VDIV_dp(DisasContext
*s
, arg_VDIV_dp
*a
)
2403 return do_vfp_3op_dp(s
, gen_helper_vfp_divd
, a
->vd
, a
->vn
, a
->vm
, false);
2406 static bool trans_VMINNM_hp(DisasContext
*s
, arg_VMINNM_sp
*a
)
2408 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2411 return do_vfp_3op_hp(s
, gen_helper_vfp_minnumh
,
2412 a
->vd
, a
->vn
, a
->vm
, false);
2415 static bool trans_VMAXNM_hp(DisasContext
*s
, arg_VMAXNM_sp
*a
)
2417 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2420 return do_vfp_3op_hp(s
, gen_helper_vfp_maxnumh
,
2421 a
->vd
, a
->vn
, a
->vm
, false);
2424 static bool trans_VMINNM_sp(DisasContext
*s
, arg_VMINNM_sp
*a
)
2426 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2429 return do_vfp_3op_sp(s
, gen_helper_vfp_minnums
,
2430 a
->vd
, a
->vn
, a
->vm
, false);
2433 static bool trans_VMAXNM_sp(DisasContext
*s
, arg_VMAXNM_sp
*a
)
2435 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2438 return do_vfp_3op_sp(s
, gen_helper_vfp_maxnums
,
2439 a
->vd
, a
->vn
, a
->vm
, false);
2442 static bool trans_VMINNM_dp(DisasContext
*s
, arg_VMINNM_dp
*a
)
2444 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2447 return do_vfp_3op_dp(s
, gen_helper_vfp_minnumd
,
2448 a
->vd
, a
->vn
, a
->vm
, false);
2451 static bool trans_VMAXNM_dp(DisasContext
*s
, arg_VMAXNM_dp
*a
)
2453 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2456 return do_vfp_3op_dp(s
, gen_helper_vfp_maxnumd
,
2457 a
->vd
, a
->vn
, a
->vm
, false);
2460 static bool do_vfm_hp(DisasContext
*s
, arg_VFMA_sp
*a
, bool neg_n
, bool neg_d
)
2463 * VFNMA : fd = muladd(-fd, fn, fm)
2464 * VFNMS : fd = muladd(-fd, -fn, fm)
2465 * VFMA : fd = muladd( fd, fn, fm)
2466 * VFMS : fd = muladd( fd, -fn, fm)
2468 * These are fused multiply-add, and must be done as one floating
2469 * point operation with no rounding between the multiplication and
2470 * addition steps. NB that doing the negations here as separate
2471 * steps is correct : an input NaN should come out with its sign
2472 * bit flipped if it is a negated-input.
2475 TCGv_i32 vn
, vm
, vd
;
2478 * Present in VFPv4 only, and only with the FP16 extension.
2479 * Note that we can't rely on the SIMDFMAC check alone, because
2480 * in a Neon-no-VFP core that ID register field will be non-zero.
2482 if (!dc_isar_feature(aa32_fp16_arith
, s
) ||
2483 !dc_isar_feature(aa32_simdfmac
, s
) ||
2484 !dc_isar_feature(aa32_fpsp_v2
, s
)) {
2488 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2492 if (!vfp_access_check(s
)) {
2496 vn
= tcg_temp_new_i32();
2497 vm
= tcg_temp_new_i32();
2498 vd
= tcg_temp_new_i32();
2500 vfp_load_reg32(vn
, a
->vn
);
2501 vfp_load_reg32(vm
, a
->vm
);
2504 gen_helper_vfp_negh(vn
, vn
);
2506 vfp_load_reg32(vd
, a
->vd
);
2509 gen_helper_vfp_negh(vd
, vd
);
2511 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
2512 gen_helper_vfp_muladdh(vd
, vn
, vm
, vd
, fpst
);
2513 vfp_store_reg32(vd
, a
->vd
);
2515 tcg_temp_free_ptr(fpst
);
2516 tcg_temp_free_i32(vn
);
2517 tcg_temp_free_i32(vm
);
2518 tcg_temp_free_i32(vd
);
2523 static bool do_vfm_sp(DisasContext
*s
, arg_VFMA_sp
*a
, bool neg_n
, bool neg_d
)
2526 * VFNMA : fd = muladd(-fd, fn, fm)
2527 * VFNMS : fd = muladd(-fd, -fn, fm)
2528 * VFMA : fd = muladd( fd, fn, fm)
2529 * VFMS : fd = muladd( fd, -fn, fm)
2531 * These are fused multiply-add, and must be done as one floating
2532 * point operation with no rounding between the multiplication and
2533 * addition steps. NB that doing the negations here as separate
2534 * steps is correct : an input NaN should come out with its sign
2535 * bit flipped if it is a negated-input.
2538 TCGv_i32 vn
, vm
, vd
;
2541 * Present in VFPv4 only.
2542 * Note that we can't rely on the SIMDFMAC check alone, because
2543 * in a Neon-no-VFP core that ID register field will be non-zero.
2545 if (!dc_isar_feature(aa32_simdfmac
, s
) ||
2546 !dc_isar_feature(aa32_fpsp_v2
, s
)) {
2550 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2551 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2553 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2557 if (!vfp_access_check(s
)) {
2561 vn
= tcg_temp_new_i32();
2562 vm
= tcg_temp_new_i32();
2563 vd
= tcg_temp_new_i32();
2565 vfp_load_reg32(vn
, a
->vn
);
2566 vfp_load_reg32(vm
, a
->vm
);
2569 gen_helper_vfp_negs(vn
, vn
);
2571 vfp_load_reg32(vd
, a
->vd
);
2574 gen_helper_vfp_negs(vd
, vd
);
2576 fpst
= fpstatus_ptr(FPST_FPCR
);
2577 gen_helper_vfp_muladds(vd
, vn
, vm
, vd
, fpst
);
2578 vfp_store_reg32(vd
, a
->vd
);
2580 tcg_temp_free_ptr(fpst
);
2581 tcg_temp_free_i32(vn
);
2582 tcg_temp_free_i32(vm
);
2583 tcg_temp_free_i32(vd
);
2588 static bool do_vfm_dp(DisasContext
*s
, arg_VFMA_dp
*a
, bool neg_n
, bool neg_d
)
2591 * VFNMA : fd = muladd(-fd, fn, fm)
2592 * VFNMS : fd = muladd(-fd, -fn, fm)
2593 * VFMA : fd = muladd( fd, fn, fm)
2594 * VFMS : fd = muladd( fd, -fn, fm)
2596 * These are fused multiply-add, and must be done as one floating
2597 * point operation with no rounding between the multiplication and
2598 * addition steps. NB that doing the negations here as separate
2599 * steps is correct : an input NaN should come out with its sign
2600 * bit flipped if it is a negated-input.
2603 TCGv_i64 vn
, vm
, vd
;
2606 * Present in VFPv4 only.
2607 * Note that we can't rely on the SIMDFMAC check alone, because
2608 * in a Neon-no-VFP core that ID register field will be non-zero.
2610 if (!dc_isar_feature(aa32_simdfmac
, s
) ||
2611 !dc_isar_feature(aa32_fpdp_v2
, s
)) {
2615 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2616 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2618 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2622 /* UNDEF accesses to D16-D31 if they don't exist. */
2623 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
2624 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
2628 if (!vfp_access_check(s
)) {
2632 vn
= tcg_temp_new_i64();
2633 vm
= tcg_temp_new_i64();
2634 vd
= tcg_temp_new_i64();
2636 vfp_load_reg64(vn
, a
->vn
);
2637 vfp_load_reg64(vm
, a
->vm
);
2640 gen_helper_vfp_negd(vn
, vn
);
2642 vfp_load_reg64(vd
, a
->vd
);
2645 gen_helper_vfp_negd(vd
, vd
);
2647 fpst
= fpstatus_ptr(FPST_FPCR
);
2648 gen_helper_vfp_muladdd(vd
, vn
, vm
, vd
, fpst
);
2649 vfp_store_reg64(vd
, a
->vd
);
2651 tcg_temp_free_ptr(fpst
);
2652 tcg_temp_free_i64(vn
);
2653 tcg_temp_free_i64(vm
);
2654 tcg_temp_free_i64(vd
);
2659 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \
2660 static bool trans_##INSN##_##PREC(DisasContext *s, \
2661 arg_##INSN##_##PREC *a) \
2663 return do_vfm_##PREC(s, a, NEGN, NEGD); \
2666 #define MAKE_VFM_TRANS_FNS(PREC) \
2667 MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2668 MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2669 MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2670 MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2672 MAKE_VFM_TRANS_FNS(hp
)
2673 MAKE_VFM_TRANS_FNS(sp
)
2674 MAKE_VFM_TRANS_FNS(dp
)
2676 static bool trans_VMOV_imm_hp(DisasContext
*s
, arg_VMOV_imm_sp
*a
)
2680 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
2684 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2688 if (!vfp_access_check(s
)) {
2692 fd
= tcg_const_i32(vfp_expand_imm(MO_16
, a
->imm
));
2693 vfp_store_reg32(fd
, a
->vd
);
2694 tcg_temp_free_i32(fd
);
2698 static bool trans_VMOV_imm_sp(DisasContext
*s
, arg_VMOV_imm_sp
*a
)
2700 uint32_t delta_d
= 0;
2701 int veclen
= s
->vec_len
;
2707 if (!dc_isar_feature(aa32_fpsp_v3
, s
)) {
2711 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
2712 (veclen
!= 0 || s
->vec_stride
!= 0)) {
2716 if (!vfp_access_check(s
)) {
2721 /* Figure out what type of vector operation this is. */
2722 if (vfp_sreg_is_scalar(vd
)) {
2726 delta_d
= s
->vec_stride
+ 1;
2730 fd
= tcg_const_i32(vfp_expand_imm(MO_32
, a
->imm
));
2733 vfp_store_reg32(fd
, vd
);
2739 /* Set up the operands for the next iteration */
2741 vd
= vfp_advance_sreg(vd
, delta_d
);
2744 tcg_temp_free_i32(fd
);
2748 static bool trans_VMOV_imm_dp(DisasContext
*s
, arg_VMOV_imm_dp
*a
)
2750 uint32_t delta_d
= 0;
2751 int veclen
= s
->vec_len
;
2757 if (!dc_isar_feature(aa32_fpdp_v3
, s
)) {
2761 /* UNDEF accesses to D16-D31 if they don't exist. */
2762 if (!dc_isar_feature(aa32_simd_r32
, s
) && (vd
& 0x10)) {
2766 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
2767 (veclen
!= 0 || s
->vec_stride
!= 0)) {
2771 if (!vfp_access_check(s
)) {
2776 /* Figure out what type of vector operation this is. */
2777 if (vfp_dreg_is_scalar(vd
)) {
2781 delta_d
= (s
->vec_stride
>> 1) + 1;
2785 fd
= tcg_const_i64(vfp_expand_imm(MO_64
, a
->imm
));
2788 vfp_store_reg64(fd
, vd
);
2794 /* Set up the operands for the next iteration */
2796 vd
= vfp_advance_dreg(vd
, delta_d
);
2799 tcg_temp_free_i64(fd
);
2803 #define DO_VFP_2OP(INSN, PREC, FN) \
2804 static bool trans_##INSN##_##PREC(DisasContext *s, \
2805 arg_##INSN##_##PREC *a) \
2807 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
2810 DO_VFP_2OP(VMOV_reg
, sp
, tcg_gen_mov_i32
)
2811 DO_VFP_2OP(VMOV_reg
, dp
, tcg_gen_mov_i64
)
2813 DO_VFP_2OP(VABS
, hp
, gen_helper_vfp_absh
)
2814 DO_VFP_2OP(VABS
, sp
, gen_helper_vfp_abss
)
2815 DO_VFP_2OP(VABS
, dp
, gen_helper_vfp_absd
)
2817 DO_VFP_2OP(VNEG
, hp
, gen_helper_vfp_negh
)
2818 DO_VFP_2OP(VNEG
, sp
, gen_helper_vfp_negs
)
2819 DO_VFP_2OP(VNEG
, dp
, gen_helper_vfp_negd
)
2821 static void gen_VSQRT_hp(TCGv_i32 vd
, TCGv_i32 vm
)
2823 gen_helper_vfp_sqrth(vd
, vm
, cpu_env
);
2826 static void gen_VSQRT_sp(TCGv_i32 vd
, TCGv_i32 vm
)
2828 gen_helper_vfp_sqrts(vd
, vm
, cpu_env
);
2831 static void gen_VSQRT_dp(TCGv_i64 vd
, TCGv_i64 vm
)
2833 gen_helper_vfp_sqrtd(vd
, vm
, cpu_env
);
2836 DO_VFP_2OP(VSQRT
, hp
, gen_VSQRT_hp
)
2837 DO_VFP_2OP(VSQRT
, sp
, gen_VSQRT_sp
)
2838 DO_VFP_2OP(VSQRT
, dp
, gen_VSQRT_dp
)
2840 static bool trans_VCMP_hp(DisasContext
*s
, arg_VCMP_sp
*a
)
2844 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
2848 /* Vm/M bits must be zero for the Z variant */
2849 if (a
->z
&& a
->vm
!= 0) {
2853 if (!vfp_access_check(s
)) {
2857 vd
= tcg_temp_new_i32();
2858 vm
= tcg_temp_new_i32();
2860 vfp_load_reg32(vd
, a
->vd
);
2862 tcg_gen_movi_i32(vm
, 0);
2864 vfp_load_reg32(vm
, a
->vm
);
2868 gen_helper_vfp_cmpeh(vd
, vm
, cpu_env
);
2870 gen_helper_vfp_cmph(vd
, vm
, cpu_env
);
2873 tcg_temp_free_i32(vd
);
2874 tcg_temp_free_i32(vm
);
2879 static bool trans_VCMP_sp(DisasContext
*s
, arg_VCMP_sp
*a
)
2883 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
2887 /* Vm/M bits must be zero for the Z variant */
2888 if (a
->z
&& a
->vm
!= 0) {
2892 if (!vfp_access_check(s
)) {
2896 vd
= tcg_temp_new_i32();
2897 vm
= tcg_temp_new_i32();
2899 vfp_load_reg32(vd
, a
->vd
);
2901 tcg_gen_movi_i32(vm
, 0);
2903 vfp_load_reg32(vm
, a
->vm
);
2907 gen_helper_vfp_cmpes(vd
, vm
, cpu_env
);
2909 gen_helper_vfp_cmps(vd
, vm
, cpu_env
);
2912 tcg_temp_free_i32(vd
);
2913 tcg_temp_free_i32(vm
);
2918 static bool trans_VCMP_dp(DisasContext
*s
, arg_VCMP_dp
*a
)
2922 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
2926 /* Vm/M bits must be zero for the Z variant */
2927 if (a
->z
&& a
->vm
!= 0) {
2931 /* UNDEF accesses to D16-D31 if they don't exist. */
2932 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((a
->vd
| a
->vm
) & 0x10)) {
2936 if (!vfp_access_check(s
)) {
2940 vd
= tcg_temp_new_i64();
2941 vm
= tcg_temp_new_i64();
2943 vfp_load_reg64(vd
, a
->vd
);
2945 tcg_gen_movi_i64(vm
, 0);
2947 vfp_load_reg64(vm
, a
->vm
);
2951 gen_helper_vfp_cmped(vd
, vm
, cpu_env
);
2953 gen_helper_vfp_cmpd(vd
, vm
, cpu_env
);
2956 tcg_temp_free_i64(vd
);
2957 tcg_temp_free_i64(vm
);
2962 static bool trans_VCVT_f32_f16(DisasContext
*s
, arg_VCVT_f32_f16
*a
)
2968 if (!dc_isar_feature(aa32_fp16_spconv
, s
)) {
2972 if (!vfp_access_check(s
)) {
2976 fpst
= fpstatus_ptr(FPST_FPCR
);
2977 ahp_mode
= get_ahp_flag();
2978 tmp
= tcg_temp_new_i32();
2979 /* The T bit tells us if we want the low or high 16 bits of Vm */
2980 tcg_gen_ld16u_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vm
, a
->t
));
2981 gen_helper_vfp_fcvt_f16_to_f32(tmp
, tmp
, fpst
, ahp_mode
);
2982 vfp_store_reg32(tmp
, a
->vd
);
2983 tcg_temp_free_i32(ahp_mode
);
2984 tcg_temp_free_ptr(fpst
);
2985 tcg_temp_free_i32(tmp
);
2989 static bool trans_VCVT_f64_f16(DisasContext
*s
, arg_VCVT_f64_f16
*a
)
2996 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3000 if (!dc_isar_feature(aa32_fp16_dpconv
, s
)) {
3004 /* UNDEF accesses to D16-D31 if they don't exist. */
3005 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
3009 if (!vfp_access_check(s
)) {
3013 fpst
= fpstatus_ptr(FPST_FPCR
);
3014 ahp_mode
= get_ahp_flag();
3015 tmp
= tcg_temp_new_i32();
3016 /* The T bit tells us if we want the low or high 16 bits of Vm */
3017 tcg_gen_ld16u_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vm
, a
->t
));
3018 vd
= tcg_temp_new_i64();
3019 gen_helper_vfp_fcvt_f16_to_f64(vd
, tmp
, fpst
, ahp_mode
);
3020 vfp_store_reg64(vd
, a
->vd
);
3021 tcg_temp_free_i32(ahp_mode
);
3022 tcg_temp_free_ptr(fpst
);
3023 tcg_temp_free_i32(tmp
);
3024 tcg_temp_free_i64(vd
);
3028 static bool trans_VCVT_f16_f32(DisasContext
*s
, arg_VCVT_f16_f32
*a
)
3034 if (!dc_isar_feature(aa32_fp16_spconv
, s
)) {
3038 if (!vfp_access_check(s
)) {
3042 fpst
= fpstatus_ptr(FPST_FPCR
);
3043 ahp_mode
= get_ahp_flag();
3044 tmp
= tcg_temp_new_i32();
3046 vfp_load_reg32(tmp
, a
->vm
);
3047 gen_helper_vfp_fcvt_f32_to_f16(tmp
, tmp
, fpst
, ahp_mode
);
3048 tcg_gen_st16_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vd
, a
->t
));
3049 tcg_temp_free_i32(ahp_mode
);
3050 tcg_temp_free_ptr(fpst
);
3051 tcg_temp_free_i32(tmp
);
3055 static bool trans_VCVT_f16_f64(DisasContext
*s
, arg_VCVT_f16_f64
*a
)
3062 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3066 if (!dc_isar_feature(aa32_fp16_dpconv
, s
)) {
3070 /* UNDEF accesses to D16-D31 if they don't exist. */
3071 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
3075 if (!vfp_access_check(s
)) {
3079 fpst
= fpstatus_ptr(FPST_FPCR
);
3080 ahp_mode
= get_ahp_flag();
3081 tmp
= tcg_temp_new_i32();
3082 vm
= tcg_temp_new_i64();
3084 vfp_load_reg64(vm
, a
->vm
);
3085 gen_helper_vfp_fcvt_f64_to_f16(tmp
, vm
, fpst
, ahp_mode
);
3086 tcg_temp_free_i64(vm
);
3087 tcg_gen_st16_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vd
, a
->t
));
3088 tcg_temp_free_i32(ahp_mode
);
3089 tcg_temp_free_ptr(fpst
);
3090 tcg_temp_free_i32(tmp
);
3094 static bool trans_VRINTR_hp(DisasContext
*s
, arg_VRINTR_sp
*a
)
3099 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3103 if (!vfp_access_check(s
)) {
3107 tmp
= tcg_temp_new_i32();
3108 vfp_load_reg32(tmp
, a
->vm
);
3109 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3110 gen_helper_rinth(tmp
, tmp
, fpst
);
3111 vfp_store_reg32(tmp
, a
->vd
);
3112 tcg_temp_free_ptr(fpst
);
3113 tcg_temp_free_i32(tmp
);
3117 static bool trans_VRINTR_sp(DisasContext
*s
, arg_VRINTR_sp
*a
)
3122 if (!dc_isar_feature(aa32_vrint
, s
)) {
3126 if (!vfp_access_check(s
)) {
3130 tmp
= tcg_temp_new_i32();
3131 vfp_load_reg32(tmp
, a
->vm
);
3132 fpst
= fpstatus_ptr(FPST_FPCR
);
3133 gen_helper_rints(tmp
, tmp
, fpst
);
3134 vfp_store_reg32(tmp
, a
->vd
);
3135 tcg_temp_free_ptr(fpst
);
3136 tcg_temp_free_i32(tmp
);
3140 static bool trans_VRINTR_dp(DisasContext
*s
, arg_VRINTR_dp
*a
)
3145 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3149 if (!dc_isar_feature(aa32_vrint
, s
)) {
3153 /* UNDEF accesses to D16-D31 if they don't exist. */
3154 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((a
->vd
| a
->vm
) & 0x10)) {
3158 if (!vfp_access_check(s
)) {
3162 tmp
= tcg_temp_new_i64();
3163 vfp_load_reg64(tmp
, a
->vm
);
3164 fpst
= fpstatus_ptr(FPST_FPCR
);
3165 gen_helper_rintd(tmp
, tmp
, fpst
);
3166 vfp_store_reg64(tmp
, a
->vd
);
3167 tcg_temp_free_ptr(fpst
);
3168 tcg_temp_free_i64(tmp
);
3172 static bool trans_VRINTZ_hp(DisasContext
*s
, arg_VRINTZ_sp
*a
)
3178 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3182 if (!vfp_access_check(s
)) {
3186 tmp
= tcg_temp_new_i32();
3187 vfp_load_reg32(tmp
, a
->vm
);
3188 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3189 tcg_rmode
= tcg_const_i32(float_round_to_zero
);
3190 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
3191 gen_helper_rinth(tmp
, tmp
, fpst
);
3192 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
3193 vfp_store_reg32(tmp
, a
->vd
);
3194 tcg_temp_free_ptr(fpst
);
3195 tcg_temp_free_i32(tcg_rmode
);
3196 tcg_temp_free_i32(tmp
);
3200 static bool trans_VRINTZ_sp(DisasContext
*s
, arg_VRINTZ_sp
*a
)
3206 if (!dc_isar_feature(aa32_vrint
, s
)) {
3210 if (!vfp_access_check(s
)) {
3214 tmp
= tcg_temp_new_i32();
3215 vfp_load_reg32(tmp
, a
->vm
);
3216 fpst
= fpstatus_ptr(FPST_FPCR
);
3217 tcg_rmode
= tcg_const_i32(float_round_to_zero
);
3218 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
3219 gen_helper_rints(tmp
, tmp
, fpst
);
3220 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
3221 vfp_store_reg32(tmp
, a
->vd
);
3222 tcg_temp_free_ptr(fpst
);
3223 tcg_temp_free_i32(tcg_rmode
);
3224 tcg_temp_free_i32(tmp
);
3228 static bool trans_VRINTZ_dp(DisasContext
*s
, arg_VRINTZ_dp
*a
)
3234 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3238 if (!dc_isar_feature(aa32_vrint
, s
)) {
3242 /* UNDEF accesses to D16-D31 if they don't exist. */
3243 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((a
->vd
| a
->vm
) & 0x10)) {
3247 if (!vfp_access_check(s
)) {
3251 tmp
= tcg_temp_new_i64();
3252 vfp_load_reg64(tmp
, a
->vm
);
3253 fpst
= fpstatus_ptr(FPST_FPCR
);
3254 tcg_rmode
= tcg_const_i32(float_round_to_zero
);
3255 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
3256 gen_helper_rintd(tmp
, tmp
, fpst
);
3257 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
3258 vfp_store_reg64(tmp
, a
->vd
);
3259 tcg_temp_free_ptr(fpst
);
3260 tcg_temp_free_i64(tmp
);
3261 tcg_temp_free_i32(tcg_rmode
);
3265 static bool trans_VRINTX_hp(DisasContext
*s
, arg_VRINTX_sp
*a
)
3270 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3274 if (!vfp_access_check(s
)) {
3278 tmp
= tcg_temp_new_i32();
3279 vfp_load_reg32(tmp
, a
->vm
);
3280 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3281 gen_helper_rinth_exact(tmp
, tmp
, fpst
);
3282 vfp_store_reg32(tmp
, a
->vd
);
3283 tcg_temp_free_ptr(fpst
);
3284 tcg_temp_free_i32(tmp
);
3288 static bool trans_VRINTX_sp(DisasContext
*s
, arg_VRINTX_sp
*a
)
3293 if (!dc_isar_feature(aa32_vrint
, s
)) {
3297 if (!vfp_access_check(s
)) {
3301 tmp
= tcg_temp_new_i32();
3302 vfp_load_reg32(tmp
, a
->vm
);
3303 fpst
= fpstatus_ptr(FPST_FPCR
);
3304 gen_helper_rints_exact(tmp
, tmp
, fpst
);
3305 vfp_store_reg32(tmp
, a
->vd
);
3306 tcg_temp_free_ptr(fpst
);
3307 tcg_temp_free_i32(tmp
);
3311 static bool trans_VRINTX_dp(DisasContext
*s
, arg_VRINTX_dp
*a
)
3316 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3320 if (!dc_isar_feature(aa32_vrint
, s
)) {
3324 /* UNDEF accesses to D16-D31 if they don't exist. */
3325 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((a
->vd
| a
->vm
) & 0x10)) {
3329 if (!vfp_access_check(s
)) {
3333 tmp
= tcg_temp_new_i64();
3334 vfp_load_reg64(tmp
, a
->vm
);
3335 fpst
= fpstatus_ptr(FPST_FPCR
);
3336 gen_helper_rintd_exact(tmp
, tmp
, fpst
);
3337 vfp_store_reg64(tmp
, a
->vd
);
3338 tcg_temp_free_ptr(fpst
);
3339 tcg_temp_free_i64(tmp
);
3343 static bool trans_VCVT_sp(DisasContext
*s
, arg_VCVT_sp
*a
)
3348 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3352 /* UNDEF accesses to D16-D31 if they don't exist. */
3353 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
3357 if (!vfp_access_check(s
)) {
3361 vm
= tcg_temp_new_i32();
3362 vd
= tcg_temp_new_i64();
3363 vfp_load_reg32(vm
, a
->vm
);
3364 gen_helper_vfp_fcvtds(vd
, vm
, cpu_env
);
3365 vfp_store_reg64(vd
, a
->vd
);
3366 tcg_temp_free_i32(vm
);
3367 tcg_temp_free_i64(vd
);
3371 static bool trans_VCVT_dp(DisasContext
*s
, arg_VCVT_dp
*a
)
3376 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3380 /* UNDEF accesses to D16-D31 if they don't exist. */
3381 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
3385 if (!vfp_access_check(s
)) {
3389 vd
= tcg_temp_new_i32();
3390 vm
= tcg_temp_new_i64();
3391 vfp_load_reg64(vm
, a
->vm
);
3392 gen_helper_vfp_fcvtsd(vd
, vm
, cpu_env
);
3393 vfp_store_reg32(vd
, a
->vd
);
3394 tcg_temp_free_i32(vd
);
3395 tcg_temp_free_i64(vm
);
3399 static bool trans_VCVT_int_hp(DisasContext
*s
, arg_VCVT_int_sp
*a
)
3404 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3408 if (!vfp_access_check(s
)) {
3412 vm
= tcg_temp_new_i32();
3413 vfp_load_reg32(vm
, a
->vm
);
3414 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3417 gen_helper_vfp_sitoh(vm
, vm
, fpst
);
3420 gen_helper_vfp_uitoh(vm
, vm
, fpst
);
3422 vfp_store_reg32(vm
, a
->vd
);
3423 tcg_temp_free_i32(vm
);
3424 tcg_temp_free_ptr(fpst
);
3428 static bool trans_VCVT_int_sp(DisasContext
*s
, arg_VCVT_int_sp
*a
)
3433 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
3437 if (!vfp_access_check(s
)) {
3441 vm
= tcg_temp_new_i32();
3442 vfp_load_reg32(vm
, a
->vm
);
3443 fpst
= fpstatus_ptr(FPST_FPCR
);
3446 gen_helper_vfp_sitos(vm
, vm
, fpst
);
3449 gen_helper_vfp_uitos(vm
, vm
, fpst
);
3451 vfp_store_reg32(vm
, a
->vd
);
3452 tcg_temp_free_i32(vm
);
3453 tcg_temp_free_ptr(fpst
);
3457 static bool trans_VCVT_int_dp(DisasContext
*s
, arg_VCVT_int_dp
*a
)
3463 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3467 /* UNDEF accesses to D16-D31 if they don't exist. */
3468 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
3472 if (!vfp_access_check(s
)) {
3476 vm
= tcg_temp_new_i32();
3477 vd
= tcg_temp_new_i64();
3478 vfp_load_reg32(vm
, a
->vm
);
3479 fpst
= fpstatus_ptr(FPST_FPCR
);
3482 gen_helper_vfp_sitod(vd
, vm
, fpst
);
3485 gen_helper_vfp_uitod(vd
, vm
, fpst
);
3487 vfp_store_reg64(vd
, a
->vd
);
3488 tcg_temp_free_i32(vm
);
3489 tcg_temp_free_i64(vd
);
3490 tcg_temp_free_ptr(fpst
);
3494 static bool trans_VJCVT(DisasContext
*s
, arg_VJCVT
*a
)
3499 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3503 if (!dc_isar_feature(aa32_jscvt
, s
)) {
3507 /* UNDEF accesses to D16-D31 if they don't exist. */
3508 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
3512 if (!vfp_access_check(s
)) {
3516 vm
= tcg_temp_new_i64();
3517 vd
= tcg_temp_new_i32();
3518 vfp_load_reg64(vm
, a
->vm
);
3519 gen_helper_vjcvt(vd
, vm
, cpu_env
);
3520 vfp_store_reg32(vd
, a
->vd
);
3521 tcg_temp_free_i64(vm
);
3522 tcg_temp_free_i32(vd
);
3526 static bool trans_VCVT_fix_hp(DisasContext
*s
, arg_VCVT_fix_sp
*a
)
3532 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3536 if (!vfp_access_check(s
)) {
3540 frac_bits
= (a
->opc
& 1) ? (32 - a
->imm
) : (16 - a
->imm
);
3542 vd
= tcg_temp_new_i32();
3543 vfp_load_reg32(vd
, a
->vd
);
3545 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3546 shift
= tcg_const_i32(frac_bits
);
3548 /* Switch on op:U:sx bits */
3551 gen_helper_vfp_shtoh_round_to_nearest(vd
, vd
, shift
, fpst
);
3554 gen_helper_vfp_sltoh_round_to_nearest(vd
, vd
, shift
, fpst
);
3557 gen_helper_vfp_uhtoh_round_to_nearest(vd
, vd
, shift
, fpst
);
3560 gen_helper_vfp_ultoh_round_to_nearest(vd
, vd
, shift
, fpst
);
3563 gen_helper_vfp_toshh_round_to_zero(vd
, vd
, shift
, fpst
);
3566 gen_helper_vfp_toslh_round_to_zero(vd
, vd
, shift
, fpst
);
3569 gen_helper_vfp_touhh_round_to_zero(vd
, vd
, shift
, fpst
);
3572 gen_helper_vfp_toulh_round_to_zero(vd
, vd
, shift
, fpst
);
3575 g_assert_not_reached();
3578 vfp_store_reg32(vd
, a
->vd
);
3579 tcg_temp_free_i32(vd
);
3580 tcg_temp_free_i32(shift
);
3581 tcg_temp_free_ptr(fpst
);
3585 static bool trans_VCVT_fix_sp(DisasContext
*s
, arg_VCVT_fix_sp
*a
)
3591 if (!dc_isar_feature(aa32_fpsp_v3
, s
)) {
3595 if (!vfp_access_check(s
)) {
3599 frac_bits
= (a
->opc
& 1) ? (32 - a
->imm
) : (16 - a
->imm
);
3601 vd
= tcg_temp_new_i32();
3602 vfp_load_reg32(vd
, a
->vd
);
3604 fpst
= fpstatus_ptr(FPST_FPCR
);
3605 shift
= tcg_const_i32(frac_bits
);
3607 /* Switch on op:U:sx bits */
3610 gen_helper_vfp_shtos_round_to_nearest(vd
, vd
, shift
, fpst
);
3613 gen_helper_vfp_sltos_round_to_nearest(vd
, vd
, shift
, fpst
);
3616 gen_helper_vfp_uhtos_round_to_nearest(vd
, vd
, shift
, fpst
);
3619 gen_helper_vfp_ultos_round_to_nearest(vd
, vd
, shift
, fpst
);
3622 gen_helper_vfp_toshs_round_to_zero(vd
, vd
, shift
, fpst
);
3625 gen_helper_vfp_tosls_round_to_zero(vd
, vd
, shift
, fpst
);
3628 gen_helper_vfp_touhs_round_to_zero(vd
, vd
, shift
, fpst
);
3631 gen_helper_vfp_touls_round_to_zero(vd
, vd
, shift
, fpst
);
3634 g_assert_not_reached();
3637 vfp_store_reg32(vd
, a
->vd
);
3638 tcg_temp_free_i32(vd
);
3639 tcg_temp_free_i32(shift
);
3640 tcg_temp_free_ptr(fpst
);
3644 static bool trans_VCVT_fix_dp(DisasContext
*s
, arg_VCVT_fix_dp
*a
)
3651 if (!dc_isar_feature(aa32_fpdp_v3
, s
)) {
3655 /* UNDEF accesses to D16-D31 if they don't exist. */
3656 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
3660 if (!vfp_access_check(s
)) {
3664 frac_bits
= (a
->opc
& 1) ? (32 - a
->imm
) : (16 - a
->imm
);
3666 vd
= tcg_temp_new_i64();
3667 vfp_load_reg64(vd
, a
->vd
);
3669 fpst
= fpstatus_ptr(FPST_FPCR
);
3670 shift
= tcg_const_i32(frac_bits
);
3672 /* Switch on op:U:sx bits */
3675 gen_helper_vfp_shtod_round_to_nearest(vd
, vd
, shift
, fpst
);
3678 gen_helper_vfp_sltod_round_to_nearest(vd
, vd
, shift
, fpst
);
3681 gen_helper_vfp_uhtod_round_to_nearest(vd
, vd
, shift
, fpst
);
3684 gen_helper_vfp_ultod_round_to_nearest(vd
, vd
, shift
, fpst
);
3687 gen_helper_vfp_toshd_round_to_zero(vd
, vd
, shift
, fpst
);
3690 gen_helper_vfp_tosld_round_to_zero(vd
, vd
, shift
, fpst
);
3693 gen_helper_vfp_touhd_round_to_zero(vd
, vd
, shift
, fpst
);
3696 gen_helper_vfp_tould_round_to_zero(vd
, vd
, shift
, fpst
);
3699 g_assert_not_reached();
3702 vfp_store_reg64(vd
, a
->vd
);
3703 tcg_temp_free_i64(vd
);
3704 tcg_temp_free_i32(shift
);
3705 tcg_temp_free_ptr(fpst
);
3709 static bool trans_VCVT_hp_int(DisasContext
*s
, arg_VCVT_sp_int
*a
)
3714 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3718 if (!vfp_access_check(s
)) {
3722 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3723 vm
= tcg_temp_new_i32();
3724 vfp_load_reg32(vm
, a
->vm
);
3728 gen_helper_vfp_tosizh(vm
, vm
, fpst
);
3730 gen_helper_vfp_tosih(vm
, vm
, fpst
);
3734 gen_helper_vfp_touizh(vm
, vm
, fpst
);
3736 gen_helper_vfp_touih(vm
, vm
, fpst
);
3739 vfp_store_reg32(vm
, a
->vd
);
3740 tcg_temp_free_i32(vm
);
3741 tcg_temp_free_ptr(fpst
);
3745 static bool trans_VCVT_sp_int(DisasContext
*s
, arg_VCVT_sp_int
*a
)
3750 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
3754 if (!vfp_access_check(s
)) {
3758 fpst
= fpstatus_ptr(FPST_FPCR
);
3759 vm
= tcg_temp_new_i32();
3760 vfp_load_reg32(vm
, a
->vm
);
3764 gen_helper_vfp_tosizs(vm
, vm
, fpst
);
3766 gen_helper_vfp_tosis(vm
, vm
, fpst
);
3770 gen_helper_vfp_touizs(vm
, vm
, fpst
);
3772 gen_helper_vfp_touis(vm
, vm
, fpst
);
3775 vfp_store_reg32(vm
, a
->vd
);
3776 tcg_temp_free_i32(vm
);
3777 tcg_temp_free_ptr(fpst
);
3781 static bool trans_VCVT_dp_int(DisasContext
*s
, arg_VCVT_dp_int
*a
)
3787 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3791 /* UNDEF accesses to D16-D31 if they don't exist. */
3792 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
3796 if (!vfp_access_check(s
)) {
3800 fpst
= fpstatus_ptr(FPST_FPCR
);
3801 vm
= tcg_temp_new_i64();
3802 vd
= tcg_temp_new_i32();
3803 vfp_load_reg64(vm
, a
->vm
);
3807 gen_helper_vfp_tosizd(vd
, vm
, fpst
);
3809 gen_helper_vfp_tosid(vd
, vm
, fpst
);
3813 gen_helper_vfp_touizd(vd
, vm
, fpst
);
3815 gen_helper_vfp_touid(vd
, vm
, fpst
);
3818 vfp_store_reg32(vd
, a
->vd
);
3819 tcg_temp_free_i32(vd
);
3820 tcg_temp_free_i64(vm
);
3821 tcg_temp_free_ptr(fpst
);
3825 static bool trans_VINS(DisasContext
*s
, arg_VINS
*a
)
3829 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3833 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
3837 if (!vfp_access_check(s
)) {
3841 /* Insert low half of Vm into high half of Vd */
3842 rm
= tcg_temp_new_i32();
3843 rd
= tcg_temp_new_i32();
3844 vfp_load_reg32(rm
, a
->vm
);
3845 vfp_load_reg32(rd
, a
->vd
);
3846 tcg_gen_deposit_i32(rd
, rd
, rm
, 16, 16);
3847 vfp_store_reg32(rd
, a
->vd
);
3848 tcg_temp_free_i32(rm
);
3849 tcg_temp_free_i32(rd
);
3853 static bool trans_VMOVX(DisasContext
*s
, arg_VINS
*a
)
3857 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3861 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
3865 if (!vfp_access_check(s
)) {
3869 /* Set Vd to high half of Vm */
3870 rm
= tcg_temp_new_i32();
3871 vfp_load_reg32(rm
, a
->vm
);
3872 tcg_gen_shri_i32(rm
, rm
, 16);
3873 vfp_store_reg32(rm
, a
->vd
);
3874 tcg_temp_free_i32(rm
);