2 * ARM translation: AArch32 VFP instructions
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
7 * Copyright (c) 2019 Linaro, Ltd.
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include "qemu/osdep.h"
24 #include "tcg/tcg-op.h"
25 #include "tcg/tcg-op-gvec.h"
26 #include "exec/exec-all.h"
27 #include "exec/gen-icount.h"
28 #include "translate.h"
29 #include "translate-a32.h"
31 /* Include the generated VFP decoder */
32 #include "decode-vfp.c.inc"
33 #include "decode-vfp-uncond.c.inc"
35 static inline void vfp_load_reg64(TCGv_i64 var
, int reg
)
37 tcg_gen_ld_i64(var
, cpu_env
, vfp_reg_offset(true, reg
));
40 static inline void vfp_store_reg64(TCGv_i64 var
, int reg
)
42 tcg_gen_st_i64(var
, cpu_env
, vfp_reg_offset(true, reg
));
45 static inline void vfp_load_reg32(TCGv_i32 var
, int reg
)
47 tcg_gen_ld_i32(var
, cpu_env
, vfp_reg_offset(false, reg
));
50 static inline void vfp_store_reg32(TCGv_i32 var
, int reg
)
52 tcg_gen_st_i32(var
, cpu_env
, vfp_reg_offset(false, reg
));
56 * The imm8 encodes the sign bit, enough bits to represent an exponent in
57 * the range 01....1xx to 10....0xx, and the most significant 4 bits of
58 * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
60 uint64_t vfp_expand_imm(int size
, uint8_t imm8
)
66 imm
= (extract32(imm8
, 7, 1) ? 0x8000 : 0) |
67 (extract32(imm8
, 6, 1) ? 0x3fc0 : 0x4000) |
68 extract32(imm8
, 0, 6);
72 imm
= (extract32(imm8
, 7, 1) ? 0x8000 : 0) |
73 (extract32(imm8
, 6, 1) ? 0x3e00 : 0x4000) |
74 (extract32(imm8
, 0, 6) << 3);
78 imm
= (extract32(imm8
, 7, 1) ? 0x8000 : 0) |
79 (extract32(imm8
, 6, 1) ? 0x3000 : 0x4000) |
80 (extract32(imm8
, 0, 6) << 6);
83 g_assert_not_reached();
89 * Return the offset of a 16-bit half of the specified VFP single-precision
90 * register. If top is true, returns the top 16 bits; otherwise the bottom
93 static inline long vfp_f16_offset(unsigned reg
, bool top
)
95 long offs
= vfp_reg_offset(false, reg
);
109 * Generate code for M-profile lazy FP state preservation if needed;
110 * this corresponds to the pseudocode PreserveFPState() function.
112 static void gen_preserve_fp_state(DisasContext
*s
, bool skip_context_update
)
116 * Lazy state saving affects external memory and also the NVIC,
117 * so we must mark it as an IO operation for icount (and cause
118 * this to be the last insn in the TB).
120 if (tb_cflags(s
->base
.tb
) & CF_USE_ICOUNT
) {
121 s
->base
.is_jmp
= DISAS_UPDATE_EXIT
;
124 gen_helper_v7m_preserve_fp_state(cpu_env
);
126 * If the preserve_fp_state helper doesn't throw an exception
127 * then it will clear LSPACT; we don't need to repeat this for
128 * any further FP insns in this TB.
130 s
->v7m_lspact
= false;
132 * The helper might have zeroed VPR, so we do not know the
133 * correct value for the MVE_NO_PRED TB flag any more.
134 * If we're about to create a new fp context then that
135 * will precisely determine the MVE_NO_PRED value (see
136 * gen_update_fp_context()). Otherwise, we must:
137 * - set s->mve_no_pred to false, so this instruction
138 * is generated to use helper functions
139 * - end the TB now, without chaining to the next TB
141 if (skip_context_update
|| !s
->v7m_new_fp_ctxt_needed
) {
142 s
->mve_no_pred
= false;
143 s
->base
.is_jmp
= DISAS_UPDATE_NOCHAIN
;
149 * Generate code for M-profile FP context handling: update the
150 * ownership of the FP context, and create a new context if
151 * necessary. This corresponds to the parts of the pseudocode
152 * ExecuteFPCheck() after the inital PreserveFPState() call.
154 static void gen_update_fp_context(DisasContext
*s
)
156 /* Update ownership of FP context: set FPCCR.S to match current state */
157 if (s
->v8m_fpccr_s_wrong
) {
160 tmp
= load_cpu_field(v7m
.fpccr
[M_REG_S
]);
162 tcg_gen_ori_i32(tmp
, tmp
, R_V7M_FPCCR_S_MASK
);
164 tcg_gen_andi_i32(tmp
, tmp
, ~R_V7M_FPCCR_S_MASK
);
166 store_cpu_field(tmp
, v7m
.fpccr
[M_REG_S
]);
167 /* Don't need to do this for any further FP insns in this TB */
168 s
->v8m_fpccr_s_wrong
= false;
171 if (s
->v7m_new_fp_ctxt_needed
) {
173 * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA,
174 * the FPSCR, and VPR.
176 TCGv_i32 control
, fpscr
;
177 uint32_t bits
= R_V7M_CONTROL_FPCA_MASK
;
179 fpscr
= load_cpu_field(v7m
.fpdscr
[s
->v8m_secure
]);
180 gen_helper_vfp_set_fpscr(cpu_env
, fpscr
);
181 tcg_temp_free_i32(fpscr
);
182 if (dc_isar_feature(aa32_mve
, s
)) {
183 store_cpu_field(tcg_constant_i32(0), v7m
.vpr
);
186 * We just updated the FPSCR and VPR. Some of this state is cached
187 * in the MVE_NO_PRED TB flag. We want to avoid having to end the
188 * TB here, which means we need the new value of the MVE_NO_PRED
189 * flag to be exactly known here and the same for all executions.
190 * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is
191 * always set to 0, so the new MVE_NO_PRED flag is always 1
192 * if and only if we have MVE.
194 * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE,
195 * but those do not exist for M-profile, so are not relevant here.)
197 s
->mve_no_pred
= dc_isar_feature(aa32_mve
, s
);
200 bits
|= R_V7M_CONTROL_SFPA_MASK
;
202 control
= load_cpu_field(v7m
.control
[M_REG_S
]);
203 tcg_gen_ori_i32(control
, control
, bits
);
204 store_cpu_field(control
, v7m
.control
[M_REG_S
]);
205 /* Don't need to do this for any further FP insns in this TB */
206 s
->v7m_new_fp_ctxt_needed
= false;
211 * Check that VFP access is enabled, A-profile specific version.
213 * If VFP is enabled, return true. If not, emit code to generate an
214 * appropriate exception and return false.
215 * The ignore_vfp_enabled argument specifies that we should ignore
216 * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX
217 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns.
219 static bool vfp_access_check_a(DisasContext
*s
, bool ignore_vfp_enabled
)
223 * The full syndrome is only used for HSR when HCPTR traps:
224 * For v8, when TA==0, coproc is RES0.
225 * For v7, any use of a Floating-point instruction or access
226 * to a Floating-point Extension register that is trapped to
227 * Hyp mode because of a trap configured in the HCPTR sets
230 int coproc
= arm_dc_feature(s
, ARM_FEATURE_V8
) ? 0 : 0xa;
231 uint32_t syn
= syn_fp_access_trap(1, 0xe, false, coproc
);
233 gen_exception_insn_el(s
, 0, EXCP_UDEF
, syn
, s
->fp_excp_el
);
238 * Note that rebuild_hflags_a32 has already accounted for being in EL0
239 * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not
240 * appear to be any insns which touch VFP which are allowed.
242 if (s
->sme_trap_nonstreaming
) {
243 gen_exception_insn(s
, 0, EXCP_UDEF
,
244 syn_smetrap(SME_ET_Streaming
,
245 curr_insn_len(s
) == 2));
249 if (!s
->vfp_enabled
&& !ignore_vfp_enabled
) {
250 assert(!arm_dc_feature(s
, ARM_FEATURE_M
));
251 unallocated_encoding(s
);
258 * Check that VFP access is enabled, M-profile specific version.
260 * If VFP is enabled, do the necessary M-profile lazy-FP handling and then
261 * return true. If not, emit code to generate an appropriate exception and
263 * skip_context_update is true to skip the "update FP context" part of this.
265 bool vfp_access_check_m(DisasContext
*s
, bool skip_context_update
)
269 * M-profile mostly catches the "FPU disabled" case early, in
270 * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP)
271 * which do coprocessor-checks are outside the large ranges of
272 * the encoding space handled by the patterns in m-nocp.decode,
273 * and for them we may need to raise NOCP here.
275 gen_exception_insn_el(s
, 0, EXCP_NOCP
,
276 syn_uncategorized(), s
->fp_excp_el
);
280 /* Handle M-profile lazy FP state mechanics */
282 /* Trigger lazy-state preservation if necessary */
283 gen_preserve_fp_state(s
, skip_context_update
);
285 if (!skip_context_update
) {
286 /* Update ownership of FP context and create new FP context if needed */
287 gen_update_fp_context(s
);
294 * The most usual kind of VFP access check, for everything except
295 * FMXR/FMRX to the always-available special registers.
297 bool vfp_access_check(DisasContext
*s
)
299 if (arm_dc_feature(s
, ARM_FEATURE_M
)) {
300 return vfp_access_check_m(s
, false);
302 return vfp_access_check_a(s
, false);
306 static bool trans_VSEL(DisasContext
*s
, arg_VSEL
*a
)
311 if (!dc_isar_feature(aa32_vsel
, s
)) {
315 if (sz
== 3 && !dc_isar_feature(aa32_fpdp_v2
, s
)) {
319 if (sz
== 1 && !dc_isar_feature(aa32_fp16_arith
, s
)) {
323 /* UNDEF accesses to D16-D31 if they don't exist */
324 if (sz
== 3 && !dc_isar_feature(aa32_simd_r32
, s
) &&
325 ((a
->vm
| a
->vn
| a
->vd
) & 0x10)) {
333 if (!vfp_access_check(s
)) {
338 TCGv_i64 frn
, frm
, dest
;
339 TCGv_i64 tmp
, zero
, zf
, nf
, vf
;
341 zero
= tcg_constant_i64(0);
343 frn
= tcg_temp_new_i64();
344 frm
= tcg_temp_new_i64();
345 dest
= tcg_temp_new_i64();
347 zf
= tcg_temp_new_i64();
348 nf
= tcg_temp_new_i64();
349 vf
= tcg_temp_new_i64();
351 tcg_gen_extu_i32_i64(zf
, cpu_ZF
);
352 tcg_gen_ext_i32_i64(nf
, cpu_NF
);
353 tcg_gen_ext_i32_i64(vf
, cpu_VF
);
355 vfp_load_reg64(frn
, rn
);
356 vfp_load_reg64(frm
, rm
);
359 tcg_gen_movcond_i64(TCG_COND_EQ
, dest
, zf
, zero
, frn
, frm
);
362 tcg_gen_movcond_i64(TCG_COND_LT
, dest
, vf
, zero
, frn
, frm
);
364 case 2: /* ge: N == V -> N ^ V == 0 */
365 tmp
= tcg_temp_new_i64();
366 tcg_gen_xor_i64(tmp
, vf
, nf
);
367 tcg_gen_movcond_i64(TCG_COND_GE
, dest
, tmp
, zero
, frn
, frm
);
368 tcg_temp_free_i64(tmp
);
370 case 3: /* gt: !Z && N == V */
371 tcg_gen_movcond_i64(TCG_COND_NE
, dest
, zf
, zero
, frn
, frm
);
372 tmp
= tcg_temp_new_i64();
373 tcg_gen_xor_i64(tmp
, vf
, nf
);
374 tcg_gen_movcond_i64(TCG_COND_GE
, dest
, tmp
, zero
, dest
, frm
);
375 tcg_temp_free_i64(tmp
);
378 vfp_store_reg64(dest
, rd
);
379 tcg_temp_free_i64(frn
);
380 tcg_temp_free_i64(frm
);
381 tcg_temp_free_i64(dest
);
383 tcg_temp_free_i64(zf
);
384 tcg_temp_free_i64(nf
);
385 tcg_temp_free_i64(vf
);
387 TCGv_i32 frn
, frm
, dest
;
390 zero
= tcg_constant_i32(0);
392 frn
= tcg_temp_new_i32();
393 frm
= tcg_temp_new_i32();
394 dest
= tcg_temp_new_i32();
395 vfp_load_reg32(frn
, rn
);
396 vfp_load_reg32(frm
, rm
);
399 tcg_gen_movcond_i32(TCG_COND_EQ
, dest
, cpu_ZF
, zero
, frn
, frm
);
402 tcg_gen_movcond_i32(TCG_COND_LT
, dest
, cpu_VF
, zero
, frn
, frm
);
404 case 2: /* ge: N == V -> N ^ V == 0 */
405 tmp
= tcg_temp_new_i32();
406 tcg_gen_xor_i32(tmp
, cpu_VF
, cpu_NF
);
407 tcg_gen_movcond_i32(TCG_COND_GE
, dest
, tmp
, zero
, frn
, frm
);
408 tcg_temp_free_i32(tmp
);
410 case 3: /* gt: !Z && N == V */
411 tcg_gen_movcond_i32(TCG_COND_NE
, dest
, cpu_ZF
, zero
, frn
, frm
);
412 tmp
= tcg_temp_new_i32();
413 tcg_gen_xor_i32(tmp
, cpu_VF
, cpu_NF
);
414 tcg_gen_movcond_i32(TCG_COND_GE
, dest
, tmp
, zero
, dest
, frm
);
415 tcg_temp_free_i32(tmp
);
418 /* For fp16 the top half is always zeroes */
420 tcg_gen_andi_i32(dest
, dest
, 0xffff);
422 vfp_store_reg32(dest
, rd
);
423 tcg_temp_free_i32(frn
);
424 tcg_temp_free_i32(frm
);
425 tcg_temp_free_i32(dest
);
432 * Table for converting the most common AArch32 encoding of
433 * rounding mode to arm_fprounding order (which matches the
434 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
436 static const uint8_t fp_decode_rm
[] = {
443 static bool trans_VRINT(DisasContext
*s
, arg_VRINT
*a
)
449 int rounding
= fp_decode_rm
[a
->rm
];
451 if (!dc_isar_feature(aa32_vrint
, s
)) {
455 if (sz
== 3 && !dc_isar_feature(aa32_fpdp_v2
, s
)) {
459 if (sz
== 1 && !dc_isar_feature(aa32_fp16_arith
, s
)) {
463 /* UNDEF accesses to D16-D31 if they don't exist */
464 if (sz
== 3 && !dc_isar_feature(aa32_simd_r32
, s
) &&
465 ((a
->vm
| a
->vd
) & 0x10)) {
472 if (!vfp_access_check(s
)) {
477 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
479 fpst
= fpstatus_ptr(FPST_FPCR
);
482 tcg_rmode
= tcg_const_i32(arm_rmode_to_sf(rounding
));
483 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
488 tcg_op
= tcg_temp_new_i64();
489 tcg_res
= tcg_temp_new_i64();
490 vfp_load_reg64(tcg_op
, rm
);
491 gen_helper_rintd(tcg_res
, tcg_op
, fpst
);
492 vfp_store_reg64(tcg_res
, rd
);
493 tcg_temp_free_i64(tcg_op
);
494 tcg_temp_free_i64(tcg_res
);
498 tcg_op
= tcg_temp_new_i32();
499 tcg_res
= tcg_temp_new_i32();
500 vfp_load_reg32(tcg_op
, rm
);
502 gen_helper_rinth(tcg_res
, tcg_op
, fpst
);
504 gen_helper_rints(tcg_res
, tcg_op
, fpst
);
506 vfp_store_reg32(tcg_res
, rd
);
507 tcg_temp_free_i32(tcg_op
);
508 tcg_temp_free_i32(tcg_res
);
511 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
512 tcg_temp_free_i32(tcg_rmode
);
514 tcg_temp_free_ptr(fpst
);
518 static bool trans_VCVT(DisasContext
*s
, arg_VCVT
*a
)
523 TCGv_i32 tcg_rmode
, tcg_shift
;
524 int rounding
= fp_decode_rm
[a
->rm
];
525 bool is_signed
= a
->op
;
527 if (!dc_isar_feature(aa32_vcvt_dr
, s
)) {
531 if (sz
== 3 && !dc_isar_feature(aa32_fpdp_v2
, s
)) {
535 if (sz
== 1 && !dc_isar_feature(aa32_fp16_arith
, s
)) {
539 /* UNDEF accesses to D16-D31 if they don't exist */
540 if (sz
== 3 && !dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
547 if (!vfp_access_check(s
)) {
552 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
554 fpst
= fpstatus_ptr(FPST_FPCR
);
557 tcg_shift
= tcg_constant_i32(0);
559 tcg_rmode
= tcg_const_i32(arm_rmode_to_sf(rounding
));
560 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
563 TCGv_i64 tcg_double
, tcg_res
;
565 tcg_double
= tcg_temp_new_i64();
566 tcg_res
= tcg_temp_new_i64();
567 tcg_tmp
= tcg_temp_new_i32();
568 vfp_load_reg64(tcg_double
, rm
);
570 gen_helper_vfp_tosld(tcg_res
, tcg_double
, tcg_shift
, fpst
);
572 gen_helper_vfp_tould(tcg_res
, tcg_double
, tcg_shift
, fpst
);
574 tcg_gen_extrl_i64_i32(tcg_tmp
, tcg_res
);
575 vfp_store_reg32(tcg_tmp
, rd
);
576 tcg_temp_free_i32(tcg_tmp
);
577 tcg_temp_free_i64(tcg_res
);
578 tcg_temp_free_i64(tcg_double
);
580 TCGv_i32 tcg_single
, tcg_res
;
581 tcg_single
= tcg_temp_new_i32();
582 tcg_res
= tcg_temp_new_i32();
583 vfp_load_reg32(tcg_single
, rm
);
586 gen_helper_vfp_toslh(tcg_res
, tcg_single
, tcg_shift
, fpst
);
588 gen_helper_vfp_toulh(tcg_res
, tcg_single
, tcg_shift
, fpst
);
592 gen_helper_vfp_tosls(tcg_res
, tcg_single
, tcg_shift
, fpst
);
594 gen_helper_vfp_touls(tcg_res
, tcg_single
, tcg_shift
, fpst
);
597 vfp_store_reg32(tcg_res
, rd
);
598 tcg_temp_free_i32(tcg_res
);
599 tcg_temp_free_i32(tcg_single
);
602 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
603 tcg_temp_free_i32(tcg_rmode
);
605 tcg_temp_free_ptr(fpst
);
610 bool mve_skip_vmov(DisasContext
*s
, int vn
, int index
, int size
)
613 * In a CPU with MVE, the VMOV (vector lane to general-purpose register)
614 * and VMOV (general-purpose register to vector lane) insns are not
615 * predicated, but they are subject to beatwise execution if they are
616 * not in an IT block.
618 * Since our implementation always executes all 4 beats in one tick,
619 * this means only that if PSR.ECI says we should not be executing
620 * the beat corresponding to the lane of the vector register being
621 * accessed then we should skip performing the move, and that we need
622 * to do the usual check for bad ECI state and advance of ECI state.
624 * Note that if PSR.ECI is non-zero then we cannot be in an IT block.
626 * Return true if this VMOV scalar <-> gpreg should be skipped because
627 * the MVE PSR.ECI state says we skip the beat where the store happens.
630 /* Calculate the byte offset into Qn which we're going to access */
631 int ofs
= (index
<< size
) + ((vn
& 1) * 8);
633 if (!dc_isar_feature(aa32_mve
, s
)) {
648 g_assert_not_reached();
652 static bool trans_VMOV_to_gp(DisasContext
*s
, arg_VMOV_to_gp
*a
)
654 /* VMOV scalar to general purpose register */
658 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
659 * all sizes, whether the CPU has fp or not.
661 if (!dc_isar_feature(aa32_mve
, s
)) {
663 ? !dc_isar_feature(aa32_fpsp_v2
, s
)
664 : !arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
669 /* UNDEF accesses to D16-D31 if they don't exist */
670 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vn
& 0x10)) {
674 if (dc_isar_feature(aa32_mve
, s
)) {
675 if (!mve_eci_check(s
)) {
680 if (!vfp_access_check(s
)) {
684 if (!mve_skip_vmov(s
, a
->vn
, a
->index
, a
->size
)) {
685 tmp
= tcg_temp_new_i32();
686 read_neon_element32(tmp
, a
->vn
, a
->index
,
687 a
->size
| (a
->u
? 0 : MO_SIGN
));
688 store_reg(s
, a
->rt
, tmp
);
691 if (dc_isar_feature(aa32_mve
, s
)) {
692 mve_update_and_store_eci(s
);
697 static bool trans_VMOV_from_gp(DisasContext
*s
, arg_VMOV_from_gp
*a
)
699 /* VMOV general purpose register to scalar */
703 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has
704 * all sizes, whether the CPU has fp or not.
706 if (!dc_isar_feature(aa32_mve
, s
)) {
708 ? !dc_isar_feature(aa32_fpsp_v2
, s
)
709 : !arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
714 /* UNDEF accesses to D16-D31 if they don't exist */
715 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vn
& 0x10)) {
719 if (dc_isar_feature(aa32_mve
, s
)) {
720 if (!mve_eci_check(s
)) {
725 if (!vfp_access_check(s
)) {
729 if (!mve_skip_vmov(s
, a
->vn
, a
->index
, a
->size
)) {
730 tmp
= load_reg(s
, a
->rt
);
731 write_neon_element32(tmp
, a
->vn
, a
->index
, a
->size
);
732 tcg_temp_free_i32(tmp
);
735 if (dc_isar_feature(aa32_mve
, s
)) {
736 mve_update_and_store_eci(s
);
741 static bool trans_VDUP(DisasContext
*s
, arg_VDUP
*a
)
743 /* VDUP (general purpose register) */
747 if (!arm_dc_feature(s
, ARM_FEATURE_NEON
)) {
751 /* UNDEF accesses to D16-D31 if they don't exist */
752 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vn
& 0x10)) {
760 if (a
->q
&& (a
->vn
& 1)) {
764 vec_size
= a
->q
? 16 : 8;
773 if (!vfp_access_check(s
)) {
777 tmp
= load_reg(s
, a
->rt
);
778 tcg_gen_gvec_dup_i32(size
, neon_full_reg_offset(a
->vn
),
779 vec_size
, vec_size
, tmp
);
780 tcg_temp_free_i32(tmp
);
785 static bool trans_VMSR_VMRS(DisasContext
*s
, arg_VMSR_VMRS
*a
)
788 bool ignore_vfp_enabled
= false;
790 if (arm_dc_feature(s
, ARM_FEATURE_M
)) {
791 /* M profile version was already handled in m-nocp.decode */
795 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
802 * VFPv2 allows access to FPSID from userspace; VFPv3 restricts
803 * all ID registers to privileged access only.
805 if (IS_USER(s
) && dc_isar_feature(aa32_fpsp_v3
, s
)) {
808 ignore_vfp_enabled
= true;
812 if (IS_USER(s
) || !arm_dc_feature(s
, ARM_FEATURE_MVFR
)) {
815 ignore_vfp_enabled
= true;
818 if (IS_USER(s
) || !arm_dc_feature(s
, ARM_FEATURE_V8
)) {
821 ignore_vfp_enabled
= true;
829 ignore_vfp_enabled
= true;
832 case ARM_VFP_FPINST2
:
833 /* Not present in VFPv3 */
834 if (IS_USER(s
) || dc_isar_feature(aa32_fpsp_v3
, s
)) {
843 * Call vfp_access_check_a() directly, because we need to tell
844 * it to ignore FPEXC.EN for some register accesses.
846 if (!vfp_access_check_a(s
, ignore_vfp_enabled
)) {
851 /* VMRS, move VFP special register to gp register */
857 if (s
->current_el
== 1) {
860 gen_helper_check_hcr_el2_trap(cpu_env
,
861 tcg_constant_i32(a
->rt
),
862 tcg_constant_i32(a
->reg
));
867 case ARM_VFP_FPINST2
:
868 tmp
= load_cpu_field(vfp
.xregs
[a
->reg
]);
872 tmp
= load_cpu_field(vfp
.xregs
[ARM_VFP_FPSCR
]);
873 tcg_gen_andi_i32(tmp
, tmp
, FPCR_NZCV_MASK
);
875 tmp
= tcg_temp_new_i32();
876 gen_helper_vfp_get_fpscr(tmp
, cpu_env
);
880 g_assert_not_reached();
884 /* Set the 4 flag bits in the CPSR. */
886 tcg_temp_free_i32(tmp
);
888 store_reg(s
, a
->rt
, tmp
);
891 /* VMSR, move gp register to VFP special register */
897 /* Writes are ignored. */
900 tmp
= load_reg(s
, a
->rt
);
901 gen_helper_vfp_set_fpscr(cpu_env
, tmp
);
902 tcg_temp_free_i32(tmp
);
907 * TODO: VFP subarchitecture support.
908 * For now, keep the EN bit only
910 tmp
= load_reg(s
, a
->rt
);
911 tcg_gen_andi_i32(tmp
, tmp
, 1 << 30);
912 store_cpu_field(tmp
, vfp
.xregs
[a
->reg
]);
916 case ARM_VFP_FPINST2
:
917 tmp
= load_reg(s
, a
->rt
);
918 store_cpu_field(tmp
, vfp
.xregs
[a
->reg
]);
921 g_assert_not_reached();
929 static bool trans_VMOV_half(DisasContext
*s
, arg_VMOV_single
*a
)
933 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
938 /* UNPREDICTABLE; we choose to UNDEF */
942 if (!vfp_access_check(s
)) {
947 /* VFP to general purpose register */
948 tmp
= tcg_temp_new_i32();
949 vfp_load_reg32(tmp
, a
->vn
);
950 tcg_gen_andi_i32(tmp
, tmp
, 0xffff);
951 store_reg(s
, a
->rt
, tmp
);
953 /* general purpose register to VFP */
954 tmp
= load_reg(s
, a
->rt
);
955 tcg_gen_andi_i32(tmp
, tmp
, 0xffff);
956 vfp_store_reg32(tmp
, a
->vn
);
957 tcg_temp_free_i32(tmp
);
963 static bool trans_VMOV_single(DisasContext
*s
, arg_VMOV_single
*a
)
967 if (!dc_isar_feature(aa32_fpsp_v2
, s
) && !dc_isar_feature(aa32_mve
, s
)) {
971 if (!vfp_access_check(s
)) {
976 /* VFP to general purpose register */
977 tmp
= tcg_temp_new_i32();
978 vfp_load_reg32(tmp
, a
->vn
);
980 /* Set the 4 flag bits in the CPSR. */
982 tcg_temp_free_i32(tmp
);
984 store_reg(s
, a
->rt
, tmp
);
987 /* general purpose register to VFP */
988 tmp
= load_reg(s
, a
->rt
);
989 vfp_store_reg32(tmp
, a
->vn
);
990 tcg_temp_free_i32(tmp
);
996 static bool trans_VMOV_64_sp(DisasContext
*s
, arg_VMOV_64_sp
*a
)
1000 if (!dc_isar_feature(aa32_fpsp_v2
, s
) && !dc_isar_feature(aa32_mve
, s
)) {
1005 * VMOV between two general-purpose registers and two single precision
1006 * floating point registers
1008 if (!vfp_access_check(s
)) {
1013 /* fpreg to gpreg */
1014 tmp
= tcg_temp_new_i32();
1015 vfp_load_reg32(tmp
, a
->vm
);
1016 store_reg(s
, a
->rt
, tmp
);
1017 tmp
= tcg_temp_new_i32();
1018 vfp_load_reg32(tmp
, a
->vm
+ 1);
1019 store_reg(s
, a
->rt2
, tmp
);
1021 /* gpreg to fpreg */
1022 tmp
= load_reg(s
, a
->rt
);
1023 vfp_store_reg32(tmp
, a
->vm
);
1024 tcg_temp_free_i32(tmp
);
1025 tmp
= load_reg(s
, a
->rt2
);
1026 vfp_store_reg32(tmp
, a
->vm
+ 1);
1027 tcg_temp_free_i32(tmp
);
1033 static bool trans_VMOV_64_dp(DisasContext
*s
, arg_VMOV_64_dp
*a
)
1038 * VMOV between two general-purpose registers and one double precision
1039 * floating point register. Note that this does not require support
1040 * for double precision arithmetic.
1042 if (!dc_isar_feature(aa32_fpsp_v2
, s
) && !dc_isar_feature(aa32_mve
, s
)) {
1046 /* UNDEF accesses to D16-D31 if they don't exist */
1047 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
1051 if (!vfp_access_check(s
)) {
1056 /* fpreg to gpreg */
1057 tmp
= tcg_temp_new_i32();
1058 vfp_load_reg32(tmp
, a
->vm
* 2);
1059 store_reg(s
, a
->rt
, tmp
);
1060 tmp
= tcg_temp_new_i32();
1061 vfp_load_reg32(tmp
, a
->vm
* 2 + 1);
1062 store_reg(s
, a
->rt2
, tmp
);
1064 /* gpreg to fpreg */
1065 tmp
= load_reg(s
, a
->rt
);
1066 vfp_store_reg32(tmp
, a
->vm
* 2);
1067 tcg_temp_free_i32(tmp
);
1068 tmp
= load_reg(s
, a
->rt2
);
1069 vfp_store_reg32(tmp
, a
->vm
* 2 + 1);
1070 tcg_temp_free_i32(tmp
);
1076 static bool trans_VLDR_VSTR_hp(DisasContext
*s
, arg_VLDR_VSTR_sp
*a
)
1081 if (!dc_isar_feature(aa32_fpsp_v2
, s
) && !dc_isar_feature(aa32_mve
, s
)) {
1085 if (!vfp_access_check(s
)) {
1089 /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */
1090 offset
= a
->imm
<< 1;
1095 /* For thumb, use of PC is UNPREDICTABLE. */
1096 addr
= add_reg_for_lit(s
, a
->rn
, offset
);
1097 tmp
= tcg_temp_new_i32();
1099 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UW
| MO_ALIGN
);
1100 vfp_store_reg32(tmp
, a
->vd
);
1102 vfp_load_reg32(tmp
, a
->vd
);
1103 gen_aa32_st_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UW
| MO_ALIGN
);
1105 tcg_temp_free_i32(tmp
);
1106 tcg_temp_free_i32(addr
);
1111 static bool trans_VLDR_VSTR_sp(DisasContext
*s
, arg_VLDR_VSTR_sp
*a
)
1116 if (!dc_isar_feature(aa32_fpsp_v2
, s
) && !dc_isar_feature(aa32_mve
, s
)) {
1120 if (!vfp_access_check(s
)) {
1124 offset
= a
->imm
<< 2;
1129 /* For thumb, use of PC is UNPREDICTABLE. */
1130 addr
= add_reg_for_lit(s
, a
->rn
, offset
);
1131 tmp
= tcg_temp_new_i32();
1133 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UL
| MO_ALIGN
);
1134 vfp_store_reg32(tmp
, a
->vd
);
1136 vfp_load_reg32(tmp
, a
->vd
);
1137 gen_aa32_st_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UL
| MO_ALIGN
);
1139 tcg_temp_free_i32(tmp
);
1140 tcg_temp_free_i32(addr
);
1145 static bool trans_VLDR_VSTR_dp(DisasContext
*s
, arg_VLDR_VSTR_dp
*a
)
1151 /* Note that this does not require support for double arithmetic. */
1152 if (!dc_isar_feature(aa32_fpsp_v2
, s
) && !dc_isar_feature(aa32_mve
, s
)) {
1156 /* UNDEF accesses to D16-D31 if they don't exist */
1157 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
1161 if (!vfp_access_check(s
)) {
1165 offset
= a
->imm
<< 2;
1170 /* For thumb, use of PC is UNPREDICTABLE. */
1171 addr
= add_reg_for_lit(s
, a
->rn
, offset
);
1172 tmp
= tcg_temp_new_i64();
1174 gen_aa32_ld_i64(s
, tmp
, addr
, get_mem_index(s
), MO_UQ
| MO_ALIGN_4
);
1175 vfp_store_reg64(tmp
, a
->vd
);
1177 vfp_load_reg64(tmp
, a
->vd
);
1178 gen_aa32_st_i64(s
, tmp
, addr
, get_mem_index(s
), MO_UQ
| MO_ALIGN_4
);
1180 tcg_temp_free_i64(tmp
);
1181 tcg_temp_free_i32(addr
);
1186 static bool trans_VLDM_VSTM_sp(DisasContext
*s
, arg_VLDM_VSTM_sp
*a
)
1192 if (!dc_isar_feature(aa32_fpsp_v2
, s
) && !dc_isar_feature(aa32_mve
, s
)) {
1198 if (n
== 0 || (a
->vd
+ n
) > 32) {
1200 * UNPREDICTABLE cases for bad immediates: we choose to
1201 * UNDEF to avoid generating huge numbers of TCG ops
1205 if (a
->rn
== 15 && a
->w
) {
1206 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1210 s
->eci_handled
= true;
1212 if (!vfp_access_check(s
)) {
1216 /* For thumb, use of PC is UNPREDICTABLE. */
1217 addr
= add_reg_for_lit(s
, a
->rn
, 0);
1220 tcg_gen_addi_i32(addr
, addr
, -(a
->imm
<< 2));
1223 if (s
->v8m_stackcheck
&& a
->rn
== 13 && a
->w
) {
1225 * Here 'addr' is the lowest address we will store to,
1226 * and is either the old SP (if post-increment) or
1227 * the new SP (if pre-decrement). For post-increment
1228 * where the old value is below the limit and the new
1229 * value is above, it is UNKNOWN whether the limit check
1230 * triggers; we choose to trigger.
1232 gen_helper_v8m_stackcheck(cpu_env
, addr
);
1236 tmp
= tcg_temp_new_i32();
1237 for (i
= 0; i
< n
; i
++) {
1240 gen_aa32_ld_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UL
| MO_ALIGN
);
1241 vfp_store_reg32(tmp
, a
->vd
+ i
);
1244 vfp_load_reg32(tmp
, a
->vd
+ i
);
1245 gen_aa32_st_i32(s
, tmp
, addr
, get_mem_index(s
), MO_UL
| MO_ALIGN
);
1247 tcg_gen_addi_i32(addr
, addr
, offset
);
1249 tcg_temp_free_i32(tmp
);
1253 offset
= -offset
* n
;
1254 tcg_gen_addi_i32(addr
, addr
, offset
);
1256 store_reg(s
, a
->rn
, addr
);
1258 tcg_temp_free_i32(addr
);
1265 static bool trans_VLDM_VSTM_dp(DisasContext
*s
, arg_VLDM_VSTM_dp
*a
)
1272 /* Note that this does not require support for double arithmetic. */
1273 if (!dc_isar_feature(aa32_fpsp_v2
, s
) && !dc_isar_feature(aa32_mve
, s
)) {
1279 if (n
== 0 || (a
->vd
+ n
) > 32 || n
> 16) {
1281 * UNPREDICTABLE cases for bad immediates: we choose to
1282 * UNDEF to avoid generating huge numbers of TCG ops
1286 if (a
->rn
== 15 && a
->w
) {
1287 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
1291 /* UNDEF accesses to D16-D31 if they don't exist */
1292 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
+ n
) > 16) {
1296 s
->eci_handled
= true;
1298 if (!vfp_access_check(s
)) {
1302 /* For thumb, use of PC is UNPREDICTABLE. */
1303 addr
= add_reg_for_lit(s
, a
->rn
, 0);
1306 tcg_gen_addi_i32(addr
, addr
, -(a
->imm
<< 2));
1309 if (s
->v8m_stackcheck
&& a
->rn
== 13 && a
->w
) {
1311 * Here 'addr' is the lowest address we will store to,
1312 * and is either the old SP (if post-increment) or
1313 * the new SP (if pre-decrement). For post-increment
1314 * where the old value is below the limit and the new
1315 * value is above, it is UNKNOWN whether the limit check
1316 * triggers; we choose to trigger.
1318 gen_helper_v8m_stackcheck(cpu_env
, addr
);
1322 tmp
= tcg_temp_new_i64();
1323 for (i
= 0; i
< n
; i
++) {
1326 gen_aa32_ld_i64(s
, tmp
, addr
, get_mem_index(s
), MO_UQ
| MO_ALIGN_4
);
1327 vfp_store_reg64(tmp
, a
->vd
+ i
);
1330 vfp_load_reg64(tmp
, a
->vd
+ i
);
1331 gen_aa32_st_i64(s
, tmp
, addr
, get_mem_index(s
), MO_UQ
| MO_ALIGN_4
);
1333 tcg_gen_addi_i32(addr
, addr
, offset
);
1335 tcg_temp_free_i64(tmp
);
1339 offset
= -offset
* n
;
1340 } else if (a
->imm
& 1) {
1347 tcg_gen_addi_i32(addr
, addr
, offset
);
1349 store_reg(s
, a
->rn
, addr
);
1351 tcg_temp_free_i32(addr
);
1359 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp().
1360 * The callback should emit code to write a value to vd. If
1361 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd
1362 * will contain the old value of the relevant VFP register;
1363 * otherwise it must be written to only.
1365 typedef void VFPGen3OpSPFn(TCGv_i32 vd
,
1366 TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
);
1367 typedef void VFPGen3OpDPFn(TCGv_i64 vd
,
1368 TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
);
1371 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
1372 * The callback should emit code to write a value to vd (which
1373 * should be written to only).
1375 typedef void VFPGen2OpSPFn(TCGv_i32 vd
, TCGv_i32 vm
);
1376 typedef void VFPGen2OpDPFn(TCGv_i64 vd
, TCGv_i64 vm
);
1379 * Return true if the specified S reg is in a scalar bank
1380 * (ie if it is s0..s7)
1382 static inline bool vfp_sreg_is_scalar(int reg
)
1384 return (reg
& 0x18) == 0;
1388 * Return true if the specified D reg is in a scalar bank
1389 * (ie if it is d0..d3 or d16..d19)
1391 static inline bool vfp_dreg_is_scalar(int reg
)
1393 return (reg
& 0xc) == 0;
1397 * Advance the S reg number forwards by delta within its bank
1398 * (ie increment the low 3 bits but leave the rest the same)
1400 static inline int vfp_advance_sreg(int reg
, int delta
)
1402 return ((reg
+ delta
) & 0x7) | (reg
& ~0x7);
1406 * Advance the D reg number forwards by delta within its bank
1407 * (ie increment the low 2 bits but leave the rest the same)
1409 static inline int vfp_advance_dreg(int reg
, int delta
)
1411 return ((reg
+ delta
) & 0x3) | (reg
& ~0x3);
1415 * Perform a 3-operand VFP data processing instruction. fn is the
1416 * callback to do the actual operation; this function deals with the
1417 * code to handle looping around for VFP vector processing.
1419 static bool do_vfp_3op_sp(DisasContext
*s
, VFPGen3OpSPFn
*fn
,
1420 int vd
, int vn
, int vm
, bool reads_vd
)
1422 uint32_t delta_m
= 0;
1423 uint32_t delta_d
= 0;
1424 int veclen
= s
->vec_len
;
1425 TCGv_i32 f0
, f1
, fd
;
1428 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
1432 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
1433 (veclen
!= 0 || s
->vec_stride
!= 0)) {
1437 if (!vfp_access_check(s
)) {
1442 /* Figure out what type of vector operation this is. */
1443 if (vfp_sreg_is_scalar(vd
)) {
1447 delta_d
= s
->vec_stride
+ 1;
1449 if (vfp_sreg_is_scalar(vm
)) {
1450 /* mixed scalar/vector */
1459 f0
= tcg_temp_new_i32();
1460 f1
= tcg_temp_new_i32();
1461 fd
= tcg_temp_new_i32();
1462 fpst
= fpstatus_ptr(FPST_FPCR
);
1464 vfp_load_reg32(f0
, vn
);
1465 vfp_load_reg32(f1
, vm
);
1469 vfp_load_reg32(fd
, vd
);
1471 fn(fd
, f0
, f1
, fpst
);
1472 vfp_store_reg32(fd
, vd
);
1478 /* Set up the operands for the next iteration */
1480 vd
= vfp_advance_sreg(vd
, delta_d
);
1481 vn
= vfp_advance_sreg(vn
, delta_d
);
1482 vfp_load_reg32(f0
, vn
);
1484 vm
= vfp_advance_sreg(vm
, delta_m
);
1485 vfp_load_reg32(f1
, vm
);
1489 tcg_temp_free_i32(f0
);
1490 tcg_temp_free_i32(f1
);
1491 tcg_temp_free_i32(fd
);
1492 tcg_temp_free_ptr(fpst
);
1497 static bool do_vfp_3op_hp(DisasContext
*s
, VFPGen3OpSPFn
*fn
,
1498 int vd
, int vn
, int vm
, bool reads_vd
)
1501 * Do a half-precision operation. Functionally this is
1502 * the same as do_vfp_3op_sp(), except:
1503 * - it uses the FPST_FPCR_F16
1504 * - it doesn't need the VFP vector handling (fp16 is a
1505 * v8 feature, and in v8 VFP vectors don't exist)
1506 * - it does the aa32_fp16_arith feature test
1508 TCGv_i32 f0
, f1
, fd
;
1511 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
1515 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
1519 if (!vfp_access_check(s
)) {
1523 f0
= tcg_temp_new_i32();
1524 f1
= tcg_temp_new_i32();
1525 fd
= tcg_temp_new_i32();
1526 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
1528 vfp_load_reg32(f0
, vn
);
1529 vfp_load_reg32(f1
, vm
);
1532 vfp_load_reg32(fd
, vd
);
1534 fn(fd
, f0
, f1
, fpst
);
1535 vfp_store_reg32(fd
, vd
);
1537 tcg_temp_free_i32(f0
);
1538 tcg_temp_free_i32(f1
);
1539 tcg_temp_free_i32(fd
);
1540 tcg_temp_free_ptr(fpst
);
1545 static bool do_vfp_3op_dp(DisasContext
*s
, VFPGen3OpDPFn
*fn
,
1546 int vd
, int vn
, int vm
, bool reads_vd
)
1548 uint32_t delta_m
= 0;
1549 uint32_t delta_d
= 0;
1550 int veclen
= s
->vec_len
;
1551 TCGv_i64 f0
, f1
, fd
;
1554 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
1558 /* UNDEF accesses to D16-D31 if they don't exist */
1559 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((vd
| vn
| vm
) & 0x10)) {
1563 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
1564 (veclen
!= 0 || s
->vec_stride
!= 0)) {
1568 if (!vfp_access_check(s
)) {
1573 /* Figure out what type of vector operation this is. */
1574 if (vfp_dreg_is_scalar(vd
)) {
1578 delta_d
= (s
->vec_stride
>> 1) + 1;
1580 if (vfp_dreg_is_scalar(vm
)) {
1581 /* mixed scalar/vector */
1590 f0
= tcg_temp_new_i64();
1591 f1
= tcg_temp_new_i64();
1592 fd
= tcg_temp_new_i64();
1593 fpst
= fpstatus_ptr(FPST_FPCR
);
1595 vfp_load_reg64(f0
, vn
);
1596 vfp_load_reg64(f1
, vm
);
1600 vfp_load_reg64(fd
, vd
);
1602 fn(fd
, f0
, f1
, fpst
);
1603 vfp_store_reg64(fd
, vd
);
1608 /* Set up the operands for the next iteration */
1610 vd
= vfp_advance_dreg(vd
, delta_d
);
1611 vn
= vfp_advance_dreg(vn
, delta_d
);
1612 vfp_load_reg64(f0
, vn
);
1614 vm
= vfp_advance_dreg(vm
, delta_m
);
1615 vfp_load_reg64(f1
, vm
);
1619 tcg_temp_free_i64(f0
);
1620 tcg_temp_free_i64(f1
);
1621 tcg_temp_free_i64(fd
);
1622 tcg_temp_free_ptr(fpst
);
1627 static bool do_vfp_2op_sp(DisasContext
*s
, VFPGen2OpSPFn
*fn
, int vd
, int vm
)
1629 uint32_t delta_m
= 0;
1630 uint32_t delta_d
= 0;
1631 int veclen
= s
->vec_len
;
1634 /* Note that the caller must check the aa32_fpsp_v2 feature. */
1636 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
1637 (veclen
!= 0 || s
->vec_stride
!= 0)) {
1641 if (!vfp_access_check(s
)) {
1646 /* Figure out what type of vector operation this is. */
1647 if (vfp_sreg_is_scalar(vd
)) {
1651 delta_d
= s
->vec_stride
+ 1;
1653 if (vfp_sreg_is_scalar(vm
)) {
1654 /* mixed scalar/vector */
1663 f0
= tcg_temp_new_i32();
1664 fd
= tcg_temp_new_i32();
1666 vfp_load_reg32(f0
, vm
);
1670 vfp_store_reg32(fd
, vd
);
1677 /* single source one-many */
1679 vd
= vfp_advance_sreg(vd
, delta_d
);
1680 vfp_store_reg32(fd
, vd
);
1685 /* Set up the operands for the next iteration */
1687 vd
= vfp_advance_sreg(vd
, delta_d
);
1688 vm
= vfp_advance_sreg(vm
, delta_m
);
1689 vfp_load_reg32(f0
, vm
);
1692 tcg_temp_free_i32(f0
);
1693 tcg_temp_free_i32(fd
);
1698 static bool do_vfp_2op_hp(DisasContext
*s
, VFPGen2OpSPFn
*fn
, int vd
, int vm
)
1701 * Do a half-precision operation. Functionally this is
1702 * the same as do_vfp_2op_sp(), except:
1703 * - it doesn't need the VFP vector handling (fp16 is a
1704 * v8 feature, and in v8 VFP vectors don't exist)
1705 * - it does the aa32_fp16_arith feature test
1709 /* Note that the caller must check the aa32_fp16_arith feature */
1711 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
1715 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
1719 if (!vfp_access_check(s
)) {
1723 f0
= tcg_temp_new_i32();
1724 vfp_load_reg32(f0
, vm
);
1726 vfp_store_reg32(f0
, vd
);
1727 tcg_temp_free_i32(f0
);
1732 static bool do_vfp_2op_dp(DisasContext
*s
, VFPGen2OpDPFn
*fn
, int vd
, int vm
)
1734 uint32_t delta_m
= 0;
1735 uint32_t delta_d
= 0;
1736 int veclen
= s
->vec_len
;
1739 /* Note that the caller must check the aa32_fpdp_v2 feature. */
1741 /* UNDEF accesses to D16-D31 if they don't exist */
1742 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((vd
| vm
) & 0x10)) {
1746 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
1747 (veclen
!= 0 || s
->vec_stride
!= 0)) {
1751 if (!vfp_access_check(s
)) {
1756 /* Figure out what type of vector operation this is. */
1757 if (vfp_dreg_is_scalar(vd
)) {
1761 delta_d
= (s
->vec_stride
>> 1) + 1;
1763 if (vfp_dreg_is_scalar(vm
)) {
1764 /* mixed scalar/vector */
1773 f0
= tcg_temp_new_i64();
1774 fd
= tcg_temp_new_i64();
1776 vfp_load_reg64(f0
, vm
);
1780 vfp_store_reg64(fd
, vd
);
1787 /* single source one-many */
1789 vd
= vfp_advance_dreg(vd
, delta_d
);
1790 vfp_store_reg64(fd
, vd
);
1795 /* Set up the operands for the next iteration */
1797 vd
= vfp_advance_dreg(vd
, delta_d
);
1798 vd
= vfp_advance_dreg(vm
, delta_m
);
1799 vfp_load_reg64(f0
, vm
);
1802 tcg_temp_free_i64(f0
);
1803 tcg_temp_free_i64(fd
);
1808 static void gen_VMLA_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
1810 /* Note that order of inputs to the add matters for NaNs */
1811 TCGv_i32 tmp
= tcg_temp_new_i32();
1813 gen_helper_vfp_mulh(tmp
, vn
, vm
, fpst
);
1814 gen_helper_vfp_addh(vd
, vd
, tmp
, fpst
);
1815 tcg_temp_free_i32(tmp
);
1818 static bool trans_VMLA_hp(DisasContext
*s
, arg_VMLA_sp
*a
)
1820 return do_vfp_3op_hp(s
, gen_VMLA_hp
, a
->vd
, a
->vn
, a
->vm
, true);
1823 static void gen_VMLA_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
1825 /* Note that order of inputs to the add matters for NaNs */
1826 TCGv_i32 tmp
= tcg_temp_new_i32();
1828 gen_helper_vfp_muls(tmp
, vn
, vm
, fpst
);
1829 gen_helper_vfp_adds(vd
, vd
, tmp
, fpst
);
1830 tcg_temp_free_i32(tmp
);
1833 static bool trans_VMLA_sp(DisasContext
*s
, arg_VMLA_sp
*a
)
1835 return do_vfp_3op_sp(s
, gen_VMLA_sp
, a
->vd
, a
->vn
, a
->vm
, true);
1838 static void gen_VMLA_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
1840 /* Note that order of inputs to the add matters for NaNs */
1841 TCGv_i64 tmp
= tcg_temp_new_i64();
1843 gen_helper_vfp_muld(tmp
, vn
, vm
, fpst
);
1844 gen_helper_vfp_addd(vd
, vd
, tmp
, fpst
);
1845 tcg_temp_free_i64(tmp
);
1848 static bool trans_VMLA_dp(DisasContext
*s
, arg_VMLA_dp
*a
)
1850 return do_vfp_3op_dp(s
, gen_VMLA_dp
, a
->vd
, a
->vn
, a
->vm
, true);
1853 static void gen_VMLS_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
1856 * VMLS: vd = vd + -(vn * vm)
1857 * Note that order of inputs to the add matters for NaNs.
1859 TCGv_i32 tmp
= tcg_temp_new_i32();
1861 gen_helper_vfp_mulh(tmp
, vn
, vm
, fpst
);
1862 gen_helper_vfp_negh(tmp
, tmp
);
1863 gen_helper_vfp_addh(vd
, vd
, tmp
, fpst
);
1864 tcg_temp_free_i32(tmp
);
1867 static bool trans_VMLS_hp(DisasContext
*s
, arg_VMLS_sp
*a
)
1869 return do_vfp_3op_hp(s
, gen_VMLS_hp
, a
->vd
, a
->vn
, a
->vm
, true);
1872 static void gen_VMLS_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
1875 * VMLS: vd = vd + -(vn * vm)
1876 * Note that order of inputs to the add matters for NaNs.
1878 TCGv_i32 tmp
= tcg_temp_new_i32();
1880 gen_helper_vfp_muls(tmp
, vn
, vm
, fpst
);
1881 gen_helper_vfp_negs(tmp
, tmp
);
1882 gen_helper_vfp_adds(vd
, vd
, tmp
, fpst
);
1883 tcg_temp_free_i32(tmp
);
1886 static bool trans_VMLS_sp(DisasContext
*s
, arg_VMLS_sp
*a
)
1888 return do_vfp_3op_sp(s
, gen_VMLS_sp
, a
->vd
, a
->vn
, a
->vm
, true);
1891 static void gen_VMLS_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
1894 * VMLS: vd = vd + -(vn * vm)
1895 * Note that order of inputs to the add matters for NaNs.
1897 TCGv_i64 tmp
= tcg_temp_new_i64();
1899 gen_helper_vfp_muld(tmp
, vn
, vm
, fpst
);
1900 gen_helper_vfp_negd(tmp
, tmp
);
1901 gen_helper_vfp_addd(vd
, vd
, tmp
, fpst
);
1902 tcg_temp_free_i64(tmp
);
1905 static bool trans_VMLS_dp(DisasContext
*s
, arg_VMLS_dp
*a
)
1907 return do_vfp_3op_dp(s
, gen_VMLS_dp
, a
->vd
, a
->vn
, a
->vm
, true);
1910 static void gen_VNMLS_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
1913 * VNMLS: -fd + (fn * fm)
1914 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1915 * plausible looking simplifications because this will give wrong results
1918 TCGv_i32 tmp
= tcg_temp_new_i32();
1920 gen_helper_vfp_mulh(tmp
, vn
, vm
, fpst
);
1921 gen_helper_vfp_negh(vd
, vd
);
1922 gen_helper_vfp_addh(vd
, vd
, tmp
, fpst
);
1923 tcg_temp_free_i32(tmp
);
1926 static bool trans_VNMLS_hp(DisasContext
*s
, arg_VNMLS_sp
*a
)
1928 return do_vfp_3op_hp(s
, gen_VNMLS_hp
, a
->vd
, a
->vn
, a
->vm
, true);
1931 static void gen_VNMLS_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
1934 * VNMLS: -fd + (fn * fm)
1935 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1936 * plausible looking simplifications because this will give wrong results
1939 TCGv_i32 tmp
= tcg_temp_new_i32();
1941 gen_helper_vfp_muls(tmp
, vn
, vm
, fpst
);
1942 gen_helper_vfp_negs(vd
, vd
);
1943 gen_helper_vfp_adds(vd
, vd
, tmp
, fpst
);
1944 tcg_temp_free_i32(tmp
);
1947 static bool trans_VNMLS_sp(DisasContext
*s
, arg_VNMLS_sp
*a
)
1949 return do_vfp_3op_sp(s
, gen_VNMLS_sp
, a
->vd
, a
->vn
, a
->vm
, true);
1952 static void gen_VNMLS_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
1955 * VNMLS: -fd + (fn * fm)
1956 * Note that it isn't valid to replace (-A + B) with (B - A) or similar
1957 * plausible looking simplifications because this will give wrong results
1960 TCGv_i64 tmp
= tcg_temp_new_i64();
1962 gen_helper_vfp_muld(tmp
, vn
, vm
, fpst
);
1963 gen_helper_vfp_negd(vd
, vd
);
1964 gen_helper_vfp_addd(vd
, vd
, tmp
, fpst
);
1965 tcg_temp_free_i64(tmp
);
1968 static bool trans_VNMLS_dp(DisasContext
*s
, arg_VNMLS_dp
*a
)
1970 return do_vfp_3op_dp(s
, gen_VNMLS_dp
, a
->vd
, a
->vn
, a
->vm
, true);
1973 static void gen_VNMLA_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
1975 /* VNMLA: -fd + -(fn * fm) */
1976 TCGv_i32 tmp
= tcg_temp_new_i32();
1978 gen_helper_vfp_mulh(tmp
, vn
, vm
, fpst
);
1979 gen_helper_vfp_negh(tmp
, tmp
);
1980 gen_helper_vfp_negh(vd
, vd
);
1981 gen_helper_vfp_addh(vd
, vd
, tmp
, fpst
);
1982 tcg_temp_free_i32(tmp
);
1985 static bool trans_VNMLA_hp(DisasContext
*s
, arg_VNMLA_sp
*a
)
1987 return do_vfp_3op_hp(s
, gen_VNMLA_hp
, a
->vd
, a
->vn
, a
->vm
, true);
1990 static void gen_VNMLA_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
1992 /* VNMLA: -fd + -(fn * fm) */
1993 TCGv_i32 tmp
= tcg_temp_new_i32();
1995 gen_helper_vfp_muls(tmp
, vn
, vm
, fpst
);
1996 gen_helper_vfp_negs(tmp
, tmp
);
1997 gen_helper_vfp_negs(vd
, vd
);
1998 gen_helper_vfp_adds(vd
, vd
, tmp
, fpst
);
1999 tcg_temp_free_i32(tmp
);
2002 static bool trans_VNMLA_sp(DisasContext
*s
, arg_VNMLA_sp
*a
)
2004 return do_vfp_3op_sp(s
, gen_VNMLA_sp
, a
->vd
, a
->vn
, a
->vm
, true);
2007 static void gen_VNMLA_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
2009 /* VNMLA: -fd + (fn * fm) */
2010 TCGv_i64 tmp
= tcg_temp_new_i64();
2012 gen_helper_vfp_muld(tmp
, vn
, vm
, fpst
);
2013 gen_helper_vfp_negd(tmp
, tmp
);
2014 gen_helper_vfp_negd(vd
, vd
);
2015 gen_helper_vfp_addd(vd
, vd
, tmp
, fpst
);
2016 tcg_temp_free_i64(tmp
);
2019 static bool trans_VNMLA_dp(DisasContext
*s
, arg_VNMLA_dp
*a
)
2021 return do_vfp_3op_dp(s
, gen_VNMLA_dp
, a
->vd
, a
->vn
, a
->vm
, true);
2024 static bool trans_VMUL_hp(DisasContext
*s
, arg_VMUL_sp
*a
)
2026 return do_vfp_3op_hp(s
, gen_helper_vfp_mulh
, a
->vd
, a
->vn
, a
->vm
, false);
2029 static bool trans_VMUL_sp(DisasContext
*s
, arg_VMUL_sp
*a
)
2031 return do_vfp_3op_sp(s
, gen_helper_vfp_muls
, a
->vd
, a
->vn
, a
->vm
, false);
2034 static bool trans_VMUL_dp(DisasContext
*s
, arg_VMUL_dp
*a
)
2036 return do_vfp_3op_dp(s
, gen_helper_vfp_muld
, a
->vd
, a
->vn
, a
->vm
, false);
2039 static void gen_VNMUL_hp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2041 /* VNMUL: -(fn * fm) */
2042 gen_helper_vfp_mulh(vd
, vn
, vm
, fpst
);
2043 gen_helper_vfp_negh(vd
, vd
);
2046 static bool trans_VNMUL_hp(DisasContext
*s
, arg_VNMUL_sp
*a
)
2048 return do_vfp_3op_hp(s
, gen_VNMUL_hp
, a
->vd
, a
->vn
, a
->vm
, false);
2051 static void gen_VNMUL_sp(TCGv_i32 vd
, TCGv_i32 vn
, TCGv_i32 vm
, TCGv_ptr fpst
)
2053 /* VNMUL: -(fn * fm) */
2054 gen_helper_vfp_muls(vd
, vn
, vm
, fpst
);
2055 gen_helper_vfp_negs(vd
, vd
);
2058 static bool trans_VNMUL_sp(DisasContext
*s
, arg_VNMUL_sp
*a
)
2060 return do_vfp_3op_sp(s
, gen_VNMUL_sp
, a
->vd
, a
->vn
, a
->vm
, false);
2063 static void gen_VNMUL_dp(TCGv_i64 vd
, TCGv_i64 vn
, TCGv_i64 vm
, TCGv_ptr fpst
)
2065 /* VNMUL: -(fn * fm) */
2066 gen_helper_vfp_muld(vd
, vn
, vm
, fpst
);
2067 gen_helper_vfp_negd(vd
, vd
);
2070 static bool trans_VNMUL_dp(DisasContext
*s
, arg_VNMUL_dp
*a
)
2072 return do_vfp_3op_dp(s
, gen_VNMUL_dp
, a
->vd
, a
->vn
, a
->vm
, false);
2075 static bool trans_VADD_hp(DisasContext
*s
, arg_VADD_sp
*a
)
2077 return do_vfp_3op_hp(s
, gen_helper_vfp_addh
, a
->vd
, a
->vn
, a
->vm
, false);
2080 static bool trans_VADD_sp(DisasContext
*s
, arg_VADD_sp
*a
)
2082 return do_vfp_3op_sp(s
, gen_helper_vfp_adds
, a
->vd
, a
->vn
, a
->vm
, false);
2085 static bool trans_VADD_dp(DisasContext
*s
, arg_VADD_dp
*a
)
2087 return do_vfp_3op_dp(s
, gen_helper_vfp_addd
, a
->vd
, a
->vn
, a
->vm
, false);
2090 static bool trans_VSUB_hp(DisasContext
*s
, arg_VSUB_sp
*a
)
2092 return do_vfp_3op_hp(s
, gen_helper_vfp_subh
, a
->vd
, a
->vn
, a
->vm
, false);
2095 static bool trans_VSUB_sp(DisasContext
*s
, arg_VSUB_sp
*a
)
2097 return do_vfp_3op_sp(s
, gen_helper_vfp_subs
, a
->vd
, a
->vn
, a
->vm
, false);
2100 static bool trans_VSUB_dp(DisasContext
*s
, arg_VSUB_dp
*a
)
2102 return do_vfp_3op_dp(s
, gen_helper_vfp_subd
, a
->vd
, a
->vn
, a
->vm
, false);
2105 static bool trans_VDIV_hp(DisasContext
*s
, arg_VDIV_sp
*a
)
2107 return do_vfp_3op_hp(s
, gen_helper_vfp_divh
, a
->vd
, a
->vn
, a
->vm
, false);
2110 static bool trans_VDIV_sp(DisasContext
*s
, arg_VDIV_sp
*a
)
2112 return do_vfp_3op_sp(s
, gen_helper_vfp_divs
, a
->vd
, a
->vn
, a
->vm
, false);
2115 static bool trans_VDIV_dp(DisasContext
*s
, arg_VDIV_dp
*a
)
2117 return do_vfp_3op_dp(s
, gen_helper_vfp_divd
, a
->vd
, a
->vn
, a
->vm
, false);
2120 static bool trans_VMINNM_hp(DisasContext
*s
, arg_VMINNM_sp
*a
)
2122 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2125 return do_vfp_3op_hp(s
, gen_helper_vfp_minnumh
,
2126 a
->vd
, a
->vn
, a
->vm
, false);
2129 static bool trans_VMAXNM_hp(DisasContext
*s
, arg_VMAXNM_sp
*a
)
2131 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2134 return do_vfp_3op_hp(s
, gen_helper_vfp_maxnumh
,
2135 a
->vd
, a
->vn
, a
->vm
, false);
2138 static bool trans_VMINNM_sp(DisasContext
*s
, arg_VMINNM_sp
*a
)
2140 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2143 return do_vfp_3op_sp(s
, gen_helper_vfp_minnums
,
2144 a
->vd
, a
->vn
, a
->vm
, false);
2147 static bool trans_VMAXNM_sp(DisasContext
*s
, arg_VMAXNM_sp
*a
)
2149 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2152 return do_vfp_3op_sp(s
, gen_helper_vfp_maxnums
,
2153 a
->vd
, a
->vn
, a
->vm
, false);
2156 static bool trans_VMINNM_dp(DisasContext
*s
, arg_VMINNM_dp
*a
)
2158 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2161 return do_vfp_3op_dp(s
, gen_helper_vfp_minnumd
,
2162 a
->vd
, a
->vn
, a
->vm
, false);
2165 static bool trans_VMAXNM_dp(DisasContext
*s
, arg_VMAXNM_dp
*a
)
2167 if (!dc_isar_feature(aa32_vminmaxnm
, s
)) {
2170 return do_vfp_3op_dp(s
, gen_helper_vfp_maxnumd
,
2171 a
->vd
, a
->vn
, a
->vm
, false);
2174 static bool do_vfm_hp(DisasContext
*s
, arg_VFMA_sp
*a
, bool neg_n
, bool neg_d
)
2177 * VFNMA : fd = muladd(-fd, fn, fm)
2178 * VFNMS : fd = muladd(-fd, -fn, fm)
2179 * VFMA : fd = muladd( fd, fn, fm)
2180 * VFMS : fd = muladd( fd, -fn, fm)
2182 * These are fused multiply-add, and must be done as one floating
2183 * point operation with no rounding between the multiplication and
2184 * addition steps. NB that doing the negations here as separate
2185 * steps is correct : an input NaN should come out with its sign
2186 * bit flipped if it is a negated-input.
2189 TCGv_i32 vn
, vm
, vd
;
2192 * Present in VFPv4 only, and only with the FP16 extension.
2193 * Note that we can't rely on the SIMDFMAC check alone, because
2194 * in a Neon-no-VFP core that ID register field will be non-zero.
2196 if (!dc_isar_feature(aa32_fp16_arith
, s
) ||
2197 !dc_isar_feature(aa32_simdfmac
, s
) ||
2198 !dc_isar_feature(aa32_fpsp_v2
, s
)) {
2202 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2206 if (!vfp_access_check(s
)) {
2210 vn
= tcg_temp_new_i32();
2211 vm
= tcg_temp_new_i32();
2212 vd
= tcg_temp_new_i32();
2214 vfp_load_reg32(vn
, a
->vn
);
2215 vfp_load_reg32(vm
, a
->vm
);
2218 gen_helper_vfp_negh(vn
, vn
);
2220 vfp_load_reg32(vd
, a
->vd
);
2223 gen_helper_vfp_negh(vd
, vd
);
2225 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
2226 gen_helper_vfp_muladdh(vd
, vn
, vm
, vd
, fpst
);
2227 vfp_store_reg32(vd
, a
->vd
);
2229 tcg_temp_free_ptr(fpst
);
2230 tcg_temp_free_i32(vn
);
2231 tcg_temp_free_i32(vm
);
2232 tcg_temp_free_i32(vd
);
2237 static bool do_vfm_sp(DisasContext
*s
, arg_VFMA_sp
*a
, bool neg_n
, bool neg_d
)
2240 * VFNMA : fd = muladd(-fd, fn, fm)
2241 * VFNMS : fd = muladd(-fd, -fn, fm)
2242 * VFMA : fd = muladd( fd, fn, fm)
2243 * VFMS : fd = muladd( fd, -fn, fm)
2245 * These are fused multiply-add, and must be done as one floating
2246 * point operation with no rounding between the multiplication and
2247 * addition steps. NB that doing the negations here as separate
2248 * steps is correct : an input NaN should come out with its sign
2249 * bit flipped if it is a negated-input.
2252 TCGv_i32 vn
, vm
, vd
;
2255 * Present in VFPv4 only.
2256 * Note that we can't rely on the SIMDFMAC check alone, because
2257 * in a Neon-no-VFP core that ID register field will be non-zero.
2259 if (!dc_isar_feature(aa32_simdfmac
, s
) ||
2260 !dc_isar_feature(aa32_fpsp_v2
, s
)) {
2264 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2265 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2267 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2271 if (!vfp_access_check(s
)) {
2275 vn
= tcg_temp_new_i32();
2276 vm
= tcg_temp_new_i32();
2277 vd
= tcg_temp_new_i32();
2279 vfp_load_reg32(vn
, a
->vn
);
2280 vfp_load_reg32(vm
, a
->vm
);
2283 gen_helper_vfp_negs(vn
, vn
);
2285 vfp_load_reg32(vd
, a
->vd
);
2288 gen_helper_vfp_negs(vd
, vd
);
2290 fpst
= fpstatus_ptr(FPST_FPCR
);
2291 gen_helper_vfp_muladds(vd
, vn
, vm
, vd
, fpst
);
2292 vfp_store_reg32(vd
, a
->vd
);
2294 tcg_temp_free_ptr(fpst
);
2295 tcg_temp_free_i32(vn
);
2296 tcg_temp_free_i32(vm
);
2297 tcg_temp_free_i32(vd
);
2302 static bool do_vfm_dp(DisasContext
*s
, arg_VFMA_dp
*a
, bool neg_n
, bool neg_d
)
2305 * VFNMA : fd = muladd(-fd, fn, fm)
2306 * VFNMS : fd = muladd(-fd, -fn, fm)
2307 * VFMA : fd = muladd( fd, fn, fm)
2308 * VFMS : fd = muladd( fd, -fn, fm)
2310 * These are fused multiply-add, and must be done as one floating
2311 * point operation with no rounding between the multiplication and
2312 * addition steps. NB that doing the negations here as separate
2313 * steps is correct : an input NaN should come out with its sign
2314 * bit flipped if it is a negated-input.
2317 TCGv_i64 vn
, vm
, vd
;
2320 * Present in VFPv4 only.
2321 * Note that we can't rely on the SIMDFMAC check alone, because
2322 * in a Neon-no-VFP core that ID register field will be non-zero.
2324 if (!dc_isar_feature(aa32_simdfmac
, s
) ||
2325 !dc_isar_feature(aa32_fpdp_v2
, s
)) {
2329 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from
2330 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A.
2332 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2336 /* UNDEF accesses to D16-D31 if they don't exist. */
2337 if (!dc_isar_feature(aa32_simd_r32
, s
) &&
2338 ((a
->vd
| a
->vn
| a
->vm
) & 0x10)) {
2342 if (!vfp_access_check(s
)) {
2346 vn
= tcg_temp_new_i64();
2347 vm
= tcg_temp_new_i64();
2348 vd
= tcg_temp_new_i64();
2350 vfp_load_reg64(vn
, a
->vn
);
2351 vfp_load_reg64(vm
, a
->vm
);
2354 gen_helper_vfp_negd(vn
, vn
);
2356 vfp_load_reg64(vd
, a
->vd
);
2359 gen_helper_vfp_negd(vd
, vd
);
2361 fpst
= fpstatus_ptr(FPST_FPCR
);
2362 gen_helper_vfp_muladdd(vd
, vn
, vm
, vd
, fpst
);
2363 vfp_store_reg64(vd
, a
->vd
);
2365 tcg_temp_free_ptr(fpst
);
2366 tcg_temp_free_i64(vn
);
2367 tcg_temp_free_i64(vm
);
2368 tcg_temp_free_i64(vd
);
2373 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \
2374 static bool trans_##INSN##_##PREC(DisasContext *s, \
2375 arg_##INSN##_##PREC *a) \
2377 return do_vfm_##PREC(s, a, NEGN, NEGD); \
2380 #define MAKE_VFM_TRANS_FNS(PREC) \
2381 MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \
2382 MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \
2383 MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
2384 MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
2386 MAKE_VFM_TRANS_FNS(hp
)
2387 MAKE_VFM_TRANS_FNS(sp
)
2388 MAKE_VFM_TRANS_FNS(dp
)
2390 static bool trans_VMOV_imm_hp(DisasContext
*s
, arg_VMOV_imm_sp
*a
)
2392 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
2396 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
2400 if (!vfp_access_check(s
)) {
2404 vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16
, a
->imm
)), a
->vd
);
2408 static bool trans_VMOV_imm_sp(DisasContext
*s
, arg_VMOV_imm_sp
*a
)
2410 uint32_t delta_d
= 0;
2411 int veclen
= s
->vec_len
;
2417 if (!dc_isar_feature(aa32_fpsp_v3
, s
)) {
2421 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
2422 (veclen
!= 0 || s
->vec_stride
!= 0)) {
2426 if (!vfp_access_check(s
)) {
2431 /* Figure out what type of vector operation this is. */
2432 if (vfp_sreg_is_scalar(vd
)) {
2436 delta_d
= s
->vec_stride
+ 1;
2440 fd
= tcg_constant_i32(vfp_expand_imm(MO_32
, a
->imm
));
2443 vfp_store_reg32(fd
, vd
);
2449 /* Set up the operands for the next iteration */
2451 vd
= vfp_advance_sreg(vd
, delta_d
);
2457 static bool trans_VMOV_imm_dp(DisasContext
*s
, arg_VMOV_imm_dp
*a
)
2459 uint32_t delta_d
= 0;
2460 int veclen
= s
->vec_len
;
2466 if (!dc_isar_feature(aa32_fpdp_v3
, s
)) {
2470 /* UNDEF accesses to D16-D31 if they don't exist. */
2471 if (!dc_isar_feature(aa32_simd_r32
, s
) && (vd
& 0x10)) {
2475 if (!dc_isar_feature(aa32_fpshvec
, s
) &&
2476 (veclen
!= 0 || s
->vec_stride
!= 0)) {
2480 if (!vfp_access_check(s
)) {
2485 /* Figure out what type of vector operation this is. */
2486 if (vfp_dreg_is_scalar(vd
)) {
2490 delta_d
= (s
->vec_stride
>> 1) + 1;
2494 fd
= tcg_constant_i64(vfp_expand_imm(MO_64
, a
->imm
));
2497 vfp_store_reg64(fd
, vd
);
2503 /* Set up the operands for the next iteration */
2505 vd
= vfp_advance_dreg(vd
, delta_d
);
2511 #define DO_VFP_2OP(INSN, PREC, FN, CHECK) \
2512 static bool trans_##INSN##_##PREC(DisasContext *s, \
2513 arg_##INSN##_##PREC *a) \
2515 if (!dc_isar_feature(CHECK, s)) { \
2518 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
2521 #define DO_VFP_VMOV(INSN, PREC, FN) \
2522 static bool trans_##INSN##_##PREC(DisasContext *s, \
2523 arg_##INSN##_##PREC *a) \
2525 if (!dc_isar_feature(aa32_fp##PREC##_v2, s) && \
2526 !dc_isar_feature(aa32_mve, s)) { \
2529 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \
2532 DO_VFP_VMOV(VMOV_reg
, sp
, tcg_gen_mov_i32
)
2533 DO_VFP_VMOV(VMOV_reg
, dp
, tcg_gen_mov_i64
)
2535 DO_VFP_2OP(VABS
, hp
, gen_helper_vfp_absh
, aa32_fp16_arith
)
2536 DO_VFP_2OP(VABS
, sp
, gen_helper_vfp_abss
, aa32_fpsp_v2
)
2537 DO_VFP_2OP(VABS
, dp
, gen_helper_vfp_absd
, aa32_fpdp_v2
)
2539 DO_VFP_2OP(VNEG
, hp
, gen_helper_vfp_negh
, aa32_fp16_arith
)
2540 DO_VFP_2OP(VNEG
, sp
, gen_helper_vfp_negs
, aa32_fpsp_v2
)
2541 DO_VFP_2OP(VNEG
, dp
, gen_helper_vfp_negd
, aa32_fpdp_v2
)
2543 static void gen_VSQRT_hp(TCGv_i32 vd
, TCGv_i32 vm
)
2545 gen_helper_vfp_sqrth(vd
, vm
, cpu_env
);
2548 static void gen_VSQRT_sp(TCGv_i32 vd
, TCGv_i32 vm
)
2550 gen_helper_vfp_sqrts(vd
, vm
, cpu_env
);
2553 static void gen_VSQRT_dp(TCGv_i64 vd
, TCGv_i64 vm
)
2555 gen_helper_vfp_sqrtd(vd
, vm
, cpu_env
);
2558 DO_VFP_2OP(VSQRT
, hp
, gen_VSQRT_hp
, aa32_fp16_arith
)
2559 DO_VFP_2OP(VSQRT
, sp
, gen_VSQRT_sp
, aa32_fpsp_v2
)
2560 DO_VFP_2OP(VSQRT
, dp
, gen_VSQRT_dp
, aa32_fpdp_v2
)
2562 static bool trans_VCMP_hp(DisasContext
*s
, arg_VCMP_sp
*a
)
2566 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
2570 /* Vm/M bits must be zero for the Z variant */
2571 if (a
->z
&& a
->vm
!= 0) {
2575 if (!vfp_access_check(s
)) {
2579 vd
= tcg_temp_new_i32();
2580 vm
= tcg_temp_new_i32();
2582 vfp_load_reg32(vd
, a
->vd
);
2584 tcg_gen_movi_i32(vm
, 0);
2586 vfp_load_reg32(vm
, a
->vm
);
2590 gen_helper_vfp_cmpeh(vd
, vm
, cpu_env
);
2592 gen_helper_vfp_cmph(vd
, vm
, cpu_env
);
2595 tcg_temp_free_i32(vd
);
2596 tcg_temp_free_i32(vm
);
2601 static bool trans_VCMP_sp(DisasContext
*s
, arg_VCMP_sp
*a
)
2605 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
2609 /* Vm/M bits must be zero for the Z variant */
2610 if (a
->z
&& a
->vm
!= 0) {
2614 if (!vfp_access_check(s
)) {
2618 vd
= tcg_temp_new_i32();
2619 vm
= tcg_temp_new_i32();
2621 vfp_load_reg32(vd
, a
->vd
);
2623 tcg_gen_movi_i32(vm
, 0);
2625 vfp_load_reg32(vm
, a
->vm
);
2629 gen_helper_vfp_cmpes(vd
, vm
, cpu_env
);
2631 gen_helper_vfp_cmps(vd
, vm
, cpu_env
);
2634 tcg_temp_free_i32(vd
);
2635 tcg_temp_free_i32(vm
);
2640 static bool trans_VCMP_dp(DisasContext
*s
, arg_VCMP_dp
*a
)
2644 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
2648 /* Vm/M bits must be zero for the Z variant */
2649 if (a
->z
&& a
->vm
!= 0) {
2653 /* UNDEF accesses to D16-D31 if they don't exist. */
2654 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((a
->vd
| a
->vm
) & 0x10)) {
2658 if (!vfp_access_check(s
)) {
2662 vd
= tcg_temp_new_i64();
2663 vm
= tcg_temp_new_i64();
2665 vfp_load_reg64(vd
, a
->vd
);
2667 tcg_gen_movi_i64(vm
, 0);
2669 vfp_load_reg64(vm
, a
->vm
);
2673 gen_helper_vfp_cmped(vd
, vm
, cpu_env
);
2675 gen_helper_vfp_cmpd(vd
, vm
, cpu_env
);
2678 tcg_temp_free_i64(vd
);
2679 tcg_temp_free_i64(vm
);
2684 static bool trans_VCVT_f32_f16(DisasContext
*s
, arg_VCVT_f32_f16
*a
)
2690 if (!dc_isar_feature(aa32_fp16_spconv
, s
)) {
2694 if (!vfp_access_check(s
)) {
2698 fpst
= fpstatus_ptr(FPST_FPCR
);
2699 ahp_mode
= get_ahp_flag();
2700 tmp
= tcg_temp_new_i32();
2701 /* The T bit tells us if we want the low or high 16 bits of Vm */
2702 tcg_gen_ld16u_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vm
, a
->t
));
2703 gen_helper_vfp_fcvt_f16_to_f32(tmp
, tmp
, fpst
, ahp_mode
);
2704 vfp_store_reg32(tmp
, a
->vd
);
2705 tcg_temp_free_i32(ahp_mode
);
2706 tcg_temp_free_ptr(fpst
);
2707 tcg_temp_free_i32(tmp
);
2711 static bool trans_VCVT_f64_f16(DisasContext
*s
, arg_VCVT_f64_f16
*a
)
2718 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
2722 if (!dc_isar_feature(aa32_fp16_dpconv
, s
)) {
2726 /* UNDEF accesses to D16-D31 if they don't exist. */
2727 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
2731 if (!vfp_access_check(s
)) {
2735 fpst
= fpstatus_ptr(FPST_FPCR
);
2736 ahp_mode
= get_ahp_flag();
2737 tmp
= tcg_temp_new_i32();
2738 /* The T bit tells us if we want the low or high 16 bits of Vm */
2739 tcg_gen_ld16u_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vm
, a
->t
));
2740 vd
= tcg_temp_new_i64();
2741 gen_helper_vfp_fcvt_f16_to_f64(vd
, tmp
, fpst
, ahp_mode
);
2742 vfp_store_reg64(vd
, a
->vd
);
2743 tcg_temp_free_i32(ahp_mode
);
2744 tcg_temp_free_ptr(fpst
);
2745 tcg_temp_free_i32(tmp
);
2746 tcg_temp_free_i64(vd
);
2750 static bool trans_VCVT_b16_f32(DisasContext
*s
, arg_VCVT_b16_f32
*a
)
2755 if (!dc_isar_feature(aa32_bf16
, s
)) {
2759 if (!vfp_access_check(s
)) {
2763 fpst
= fpstatus_ptr(FPST_FPCR
);
2764 tmp
= tcg_temp_new_i32();
2766 vfp_load_reg32(tmp
, a
->vm
);
2767 gen_helper_bfcvt(tmp
, tmp
, fpst
);
2768 tcg_gen_st16_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vd
, a
->t
));
2769 tcg_temp_free_ptr(fpst
);
2770 tcg_temp_free_i32(tmp
);
2774 static bool trans_VCVT_f16_f32(DisasContext
*s
, arg_VCVT_f16_f32
*a
)
2780 if (!dc_isar_feature(aa32_fp16_spconv
, s
)) {
2784 if (!vfp_access_check(s
)) {
2788 fpst
= fpstatus_ptr(FPST_FPCR
);
2789 ahp_mode
= get_ahp_flag();
2790 tmp
= tcg_temp_new_i32();
2792 vfp_load_reg32(tmp
, a
->vm
);
2793 gen_helper_vfp_fcvt_f32_to_f16(tmp
, tmp
, fpst
, ahp_mode
);
2794 tcg_gen_st16_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vd
, a
->t
));
2795 tcg_temp_free_i32(ahp_mode
);
2796 tcg_temp_free_ptr(fpst
);
2797 tcg_temp_free_i32(tmp
);
2801 static bool trans_VCVT_f16_f64(DisasContext
*s
, arg_VCVT_f16_f64
*a
)
2808 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
2812 if (!dc_isar_feature(aa32_fp16_dpconv
, s
)) {
2816 /* UNDEF accesses to D16-D31 if they don't exist. */
2817 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
2821 if (!vfp_access_check(s
)) {
2825 fpst
= fpstatus_ptr(FPST_FPCR
);
2826 ahp_mode
= get_ahp_flag();
2827 tmp
= tcg_temp_new_i32();
2828 vm
= tcg_temp_new_i64();
2830 vfp_load_reg64(vm
, a
->vm
);
2831 gen_helper_vfp_fcvt_f64_to_f16(tmp
, vm
, fpst
, ahp_mode
);
2832 tcg_temp_free_i64(vm
);
2833 tcg_gen_st16_i32(tmp
, cpu_env
, vfp_f16_offset(a
->vd
, a
->t
));
2834 tcg_temp_free_i32(ahp_mode
);
2835 tcg_temp_free_ptr(fpst
);
2836 tcg_temp_free_i32(tmp
);
2840 static bool trans_VRINTR_hp(DisasContext
*s
, arg_VRINTR_sp
*a
)
2845 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
2849 if (!vfp_access_check(s
)) {
2853 tmp
= tcg_temp_new_i32();
2854 vfp_load_reg32(tmp
, a
->vm
);
2855 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
2856 gen_helper_rinth(tmp
, tmp
, fpst
);
2857 vfp_store_reg32(tmp
, a
->vd
);
2858 tcg_temp_free_ptr(fpst
);
2859 tcg_temp_free_i32(tmp
);
2863 static bool trans_VRINTR_sp(DisasContext
*s
, arg_VRINTR_sp
*a
)
2868 if (!dc_isar_feature(aa32_vrint
, s
)) {
2872 if (!vfp_access_check(s
)) {
2876 tmp
= tcg_temp_new_i32();
2877 vfp_load_reg32(tmp
, a
->vm
);
2878 fpst
= fpstatus_ptr(FPST_FPCR
);
2879 gen_helper_rints(tmp
, tmp
, fpst
);
2880 vfp_store_reg32(tmp
, a
->vd
);
2881 tcg_temp_free_ptr(fpst
);
2882 tcg_temp_free_i32(tmp
);
2886 static bool trans_VRINTR_dp(DisasContext
*s
, arg_VRINTR_dp
*a
)
2891 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
2895 if (!dc_isar_feature(aa32_vrint
, s
)) {
2899 /* UNDEF accesses to D16-D31 if they don't exist. */
2900 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((a
->vd
| a
->vm
) & 0x10)) {
2904 if (!vfp_access_check(s
)) {
2908 tmp
= tcg_temp_new_i64();
2909 vfp_load_reg64(tmp
, a
->vm
);
2910 fpst
= fpstatus_ptr(FPST_FPCR
);
2911 gen_helper_rintd(tmp
, tmp
, fpst
);
2912 vfp_store_reg64(tmp
, a
->vd
);
2913 tcg_temp_free_ptr(fpst
);
2914 tcg_temp_free_i64(tmp
);
2918 static bool trans_VRINTZ_hp(DisasContext
*s
, arg_VRINTZ_sp
*a
)
2924 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
2928 if (!vfp_access_check(s
)) {
2932 tmp
= tcg_temp_new_i32();
2933 vfp_load_reg32(tmp
, a
->vm
);
2934 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
2935 tcg_rmode
= tcg_const_i32(float_round_to_zero
);
2936 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
2937 gen_helper_rinth(tmp
, tmp
, fpst
);
2938 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
2939 vfp_store_reg32(tmp
, a
->vd
);
2940 tcg_temp_free_ptr(fpst
);
2941 tcg_temp_free_i32(tcg_rmode
);
2942 tcg_temp_free_i32(tmp
);
2946 static bool trans_VRINTZ_sp(DisasContext
*s
, arg_VRINTZ_sp
*a
)
2952 if (!dc_isar_feature(aa32_vrint
, s
)) {
2956 if (!vfp_access_check(s
)) {
2960 tmp
= tcg_temp_new_i32();
2961 vfp_load_reg32(tmp
, a
->vm
);
2962 fpst
= fpstatus_ptr(FPST_FPCR
);
2963 tcg_rmode
= tcg_const_i32(float_round_to_zero
);
2964 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
2965 gen_helper_rints(tmp
, tmp
, fpst
);
2966 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
2967 vfp_store_reg32(tmp
, a
->vd
);
2968 tcg_temp_free_ptr(fpst
);
2969 tcg_temp_free_i32(tcg_rmode
);
2970 tcg_temp_free_i32(tmp
);
2974 static bool trans_VRINTZ_dp(DisasContext
*s
, arg_VRINTZ_dp
*a
)
2980 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
2984 if (!dc_isar_feature(aa32_vrint
, s
)) {
2988 /* UNDEF accesses to D16-D31 if they don't exist. */
2989 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((a
->vd
| a
->vm
) & 0x10)) {
2993 if (!vfp_access_check(s
)) {
2997 tmp
= tcg_temp_new_i64();
2998 vfp_load_reg64(tmp
, a
->vm
);
2999 fpst
= fpstatus_ptr(FPST_FPCR
);
3000 tcg_rmode
= tcg_const_i32(float_round_to_zero
);
3001 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
3002 gen_helper_rintd(tmp
, tmp
, fpst
);
3003 gen_helper_set_rmode(tcg_rmode
, tcg_rmode
, fpst
);
3004 vfp_store_reg64(tmp
, a
->vd
);
3005 tcg_temp_free_ptr(fpst
);
3006 tcg_temp_free_i64(tmp
);
3007 tcg_temp_free_i32(tcg_rmode
);
3011 static bool trans_VRINTX_hp(DisasContext
*s
, arg_VRINTX_sp
*a
)
3016 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3020 if (!vfp_access_check(s
)) {
3024 tmp
= tcg_temp_new_i32();
3025 vfp_load_reg32(tmp
, a
->vm
);
3026 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3027 gen_helper_rinth_exact(tmp
, tmp
, fpst
);
3028 vfp_store_reg32(tmp
, a
->vd
);
3029 tcg_temp_free_ptr(fpst
);
3030 tcg_temp_free_i32(tmp
);
3034 static bool trans_VRINTX_sp(DisasContext
*s
, arg_VRINTX_sp
*a
)
3039 if (!dc_isar_feature(aa32_vrint
, s
)) {
3043 if (!vfp_access_check(s
)) {
3047 tmp
= tcg_temp_new_i32();
3048 vfp_load_reg32(tmp
, a
->vm
);
3049 fpst
= fpstatus_ptr(FPST_FPCR
);
3050 gen_helper_rints_exact(tmp
, tmp
, fpst
);
3051 vfp_store_reg32(tmp
, a
->vd
);
3052 tcg_temp_free_ptr(fpst
);
3053 tcg_temp_free_i32(tmp
);
3057 static bool trans_VRINTX_dp(DisasContext
*s
, arg_VRINTX_dp
*a
)
3062 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3066 if (!dc_isar_feature(aa32_vrint
, s
)) {
3070 /* UNDEF accesses to D16-D31 if they don't exist. */
3071 if (!dc_isar_feature(aa32_simd_r32
, s
) && ((a
->vd
| a
->vm
) & 0x10)) {
3075 if (!vfp_access_check(s
)) {
3079 tmp
= tcg_temp_new_i64();
3080 vfp_load_reg64(tmp
, a
->vm
);
3081 fpst
= fpstatus_ptr(FPST_FPCR
);
3082 gen_helper_rintd_exact(tmp
, tmp
, fpst
);
3083 vfp_store_reg64(tmp
, a
->vd
);
3084 tcg_temp_free_ptr(fpst
);
3085 tcg_temp_free_i64(tmp
);
3089 static bool trans_VCVT_sp(DisasContext
*s
, arg_VCVT_sp
*a
)
3094 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3098 /* UNDEF accesses to D16-D31 if they don't exist. */
3099 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
3103 if (!vfp_access_check(s
)) {
3107 vm
= tcg_temp_new_i32();
3108 vd
= tcg_temp_new_i64();
3109 vfp_load_reg32(vm
, a
->vm
);
3110 gen_helper_vfp_fcvtds(vd
, vm
, cpu_env
);
3111 vfp_store_reg64(vd
, a
->vd
);
3112 tcg_temp_free_i32(vm
);
3113 tcg_temp_free_i64(vd
);
3117 static bool trans_VCVT_dp(DisasContext
*s
, arg_VCVT_dp
*a
)
3122 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3126 /* UNDEF accesses to D16-D31 if they don't exist. */
3127 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
3131 if (!vfp_access_check(s
)) {
3135 vd
= tcg_temp_new_i32();
3136 vm
= tcg_temp_new_i64();
3137 vfp_load_reg64(vm
, a
->vm
);
3138 gen_helper_vfp_fcvtsd(vd
, vm
, cpu_env
);
3139 vfp_store_reg32(vd
, a
->vd
);
3140 tcg_temp_free_i32(vd
);
3141 tcg_temp_free_i64(vm
);
3145 static bool trans_VCVT_int_hp(DisasContext
*s
, arg_VCVT_int_sp
*a
)
3150 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3154 if (!vfp_access_check(s
)) {
3158 vm
= tcg_temp_new_i32();
3159 vfp_load_reg32(vm
, a
->vm
);
3160 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3163 gen_helper_vfp_sitoh(vm
, vm
, fpst
);
3166 gen_helper_vfp_uitoh(vm
, vm
, fpst
);
3168 vfp_store_reg32(vm
, a
->vd
);
3169 tcg_temp_free_i32(vm
);
3170 tcg_temp_free_ptr(fpst
);
3174 static bool trans_VCVT_int_sp(DisasContext
*s
, arg_VCVT_int_sp
*a
)
3179 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
3183 if (!vfp_access_check(s
)) {
3187 vm
= tcg_temp_new_i32();
3188 vfp_load_reg32(vm
, a
->vm
);
3189 fpst
= fpstatus_ptr(FPST_FPCR
);
3192 gen_helper_vfp_sitos(vm
, vm
, fpst
);
3195 gen_helper_vfp_uitos(vm
, vm
, fpst
);
3197 vfp_store_reg32(vm
, a
->vd
);
3198 tcg_temp_free_i32(vm
);
3199 tcg_temp_free_ptr(fpst
);
3203 static bool trans_VCVT_int_dp(DisasContext
*s
, arg_VCVT_int_dp
*a
)
3209 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3213 /* UNDEF accesses to D16-D31 if they don't exist. */
3214 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
3218 if (!vfp_access_check(s
)) {
3222 vm
= tcg_temp_new_i32();
3223 vd
= tcg_temp_new_i64();
3224 vfp_load_reg32(vm
, a
->vm
);
3225 fpst
= fpstatus_ptr(FPST_FPCR
);
3228 gen_helper_vfp_sitod(vd
, vm
, fpst
);
3231 gen_helper_vfp_uitod(vd
, vm
, fpst
);
3233 vfp_store_reg64(vd
, a
->vd
);
3234 tcg_temp_free_i32(vm
);
3235 tcg_temp_free_i64(vd
);
3236 tcg_temp_free_ptr(fpst
);
3240 static bool trans_VJCVT(DisasContext
*s
, arg_VJCVT
*a
)
3245 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3249 if (!dc_isar_feature(aa32_jscvt
, s
)) {
3253 /* UNDEF accesses to D16-D31 if they don't exist. */
3254 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
3258 if (!vfp_access_check(s
)) {
3262 vm
= tcg_temp_new_i64();
3263 vd
= tcg_temp_new_i32();
3264 vfp_load_reg64(vm
, a
->vm
);
3265 gen_helper_vjcvt(vd
, vm
, cpu_env
);
3266 vfp_store_reg32(vd
, a
->vd
);
3267 tcg_temp_free_i64(vm
);
3268 tcg_temp_free_i32(vd
);
3272 static bool trans_VCVT_fix_hp(DisasContext
*s
, arg_VCVT_fix_sp
*a
)
3278 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3282 if (!vfp_access_check(s
)) {
3286 frac_bits
= (a
->opc
& 1) ? (32 - a
->imm
) : (16 - a
->imm
);
3288 vd
= tcg_temp_new_i32();
3289 vfp_load_reg32(vd
, a
->vd
);
3291 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3292 shift
= tcg_constant_i32(frac_bits
);
3294 /* Switch on op:U:sx bits */
3297 gen_helper_vfp_shtoh_round_to_nearest(vd
, vd
, shift
, fpst
);
3300 gen_helper_vfp_sltoh_round_to_nearest(vd
, vd
, shift
, fpst
);
3303 gen_helper_vfp_uhtoh_round_to_nearest(vd
, vd
, shift
, fpst
);
3306 gen_helper_vfp_ultoh_round_to_nearest(vd
, vd
, shift
, fpst
);
3309 gen_helper_vfp_toshh_round_to_zero(vd
, vd
, shift
, fpst
);
3312 gen_helper_vfp_toslh_round_to_zero(vd
, vd
, shift
, fpst
);
3315 gen_helper_vfp_touhh_round_to_zero(vd
, vd
, shift
, fpst
);
3318 gen_helper_vfp_toulh_round_to_zero(vd
, vd
, shift
, fpst
);
3321 g_assert_not_reached();
3324 vfp_store_reg32(vd
, a
->vd
);
3325 tcg_temp_free_i32(vd
);
3326 tcg_temp_free_ptr(fpst
);
3330 static bool trans_VCVT_fix_sp(DisasContext
*s
, arg_VCVT_fix_sp
*a
)
3336 if (!dc_isar_feature(aa32_fpsp_v3
, s
)) {
3340 if (!vfp_access_check(s
)) {
3344 frac_bits
= (a
->opc
& 1) ? (32 - a
->imm
) : (16 - a
->imm
);
3346 vd
= tcg_temp_new_i32();
3347 vfp_load_reg32(vd
, a
->vd
);
3349 fpst
= fpstatus_ptr(FPST_FPCR
);
3350 shift
= tcg_constant_i32(frac_bits
);
3352 /* Switch on op:U:sx bits */
3355 gen_helper_vfp_shtos_round_to_nearest(vd
, vd
, shift
, fpst
);
3358 gen_helper_vfp_sltos_round_to_nearest(vd
, vd
, shift
, fpst
);
3361 gen_helper_vfp_uhtos_round_to_nearest(vd
, vd
, shift
, fpst
);
3364 gen_helper_vfp_ultos_round_to_nearest(vd
, vd
, shift
, fpst
);
3367 gen_helper_vfp_toshs_round_to_zero(vd
, vd
, shift
, fpst
);
3370 gen_helper_vfp_tosls_round_to_zero(vd
, vd
, shift
, fpst
);
3373 gen_helper_vfp_touhs_round_to_zero(vd
, vd
, shift
, fpst
);
3376 gen_helper_vfp_touls_round_to_zero(vd
, vd
, shift
, fpst
);
3379 g_assert_not_reached();
3382 vfp_store_reg32(vd
, a
->vd
);
3383 tcg_temp_free_i32(vd
);
3384 tcg_temp_free_ptr(fpst
);
3388 static bool trans_VCVT_fix_dp(DisasContext
*s
, arg_VCVT_fix_dp
*a
)
3395 if (!dc_isar_feature(aa32_fpdp_v3
, s
)) {
3399 /* UNDEF accesses to D16-D31 if they don't exist. */
3400 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vd
& 0x10)) {
3404 if (!vfp_access_check(s
)) {
3408 frac_bits
= (a
->opc
& 1) ? (32 - a
->imm
) : (16 - a
->imm
);
3410 vd
= tcg_temp_new_i64();
3411 vfp_load_reg64(vd
, a
->vd
);
3413 fpst
= fpstatus_ptr(FPST_FPCR
);
3414 shift
= tcg_constant_i32(frac_bits
);
3416 /* Switch on op:U:sx bits */
3419 gen_helper_vfp_shtod_round_to_nearest(vd
, vd
, shift
, fpst
);
3422 gen_helper_vfp_sltod_round_to_nearest(vd
, vd
, shift
, fpst
);
3425 gen_helper_vfp_uhtod_round_to_nearest(vd
, vd
, shift
, fpst
);
3428 gen_helper_vfp_ultod_round_to_nearest(vd
, vd
, shift
, fpst
);
3431 gen_helper_vfp_toshd_round_to_zero(vd
, vd
, shift
, fpst
);
3434 gen_helper_vfp_tosld_round_to_zero(vd
, vd
, shift
, fpst
);
3437 gen_helper_vfp_touhd_round_to_zero(vd
, vd
, shift
, fpst
);
3440 gen_helper_vfp_tould_round_to_zero(vd
, vd
, shift
, fpst
);
3443 g_assert_not_reached();
3446 vfp_store_reg64(vd
, a
->vd
);
3447 tcg_temp_free_i64(vd
);
3448 tcg_temp_free_ptr(fpst
);
3452 static bool trans_VCVT_hp_int(DisasContext
*s
, arg_VCVT_sp_int
*a
)
3457 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3461 if (!vfp_access_check(s
)) {
3465 fpst
= fpstatus_ptr(FPST_FPCR_F16
);
3466 vm
= tcg_temp_new_i32();
3467 vfp_load_reg32(vm
, a
->vm
);
3471 gen_helper_vfp_tosizh(vm
, vm
, fpst
);
3473 gen_helper_vfp_tosih(vm
, vm
, fpst
);
3477 gen_helper_vfp_touizh(vm
, vm
, fpst
);
3479 gen_helper_vfp_touih(vm
, vm
, fpst
);
3482 vfp_store_reg32(vm
, a
->vd
);
3483 tcg_temp_free_i32(vm
);
3484 tcg_temp_free_ptr(fpst
);
3488 static bool trans_VCVT_sp_int(DisasContext
*s
, arg_VCVT_sp_int
*a
)
3493 if (!dc_isar_feature(aa32_fpsp_v2
, s
)) {
3497 if (!vfp_access_check(s
)) {
3501 fpst
= fpstatus_ptr(FPST_FPCR
);
3502 vm
= tcg_temp_new_i32();
3503 vfp_load_reg32(vm
, a
->vm
);
3507 gen_helper_vfp_tosizs(vm
, vm
, fpst
);
3509 gen_helper_vfp_tosis(vm
, vm
, fpst
);
3513 gen_helper_vfp_touizs(vm
, vm
, fpst
);
3515 gen_helper_vfp_touis(vm
, vm
, fpst
);
3518 vfp_store_reg32(vm
, a
->vd
);
3519 tcg_temp_free_i32(vm
);
3520 tcg_temp_free_ptr(fpst
);
3524 static bool trans_VCVT_dp_int(DisasContext
*s
, arg_VCVT_dp_int
*a
)
3530 if (!dc_isar_feature(aa32_fpdp_v2
, s
)) {
3534 /* UNDEF accesses to D16-D31 if they don't exist. */
3535 if (!dc_isar_feature(aa32_simd_r32
, s
) && (a
->vm
& 0x10)) {
3539 if (!vfp_access_check(s
)) {
3543 fpst
= fpstatus_ptr(FPST_FPCR
);
3544 vm
= tcg_temp_new_i64();
3545 vd
= tcg_temp_new_i32();
3546 vfp_load_reg64(vm
, a
->vm
);
3550 gen_helper_vfp_tosizd(vd
, vm
, fpst
);
3552 gen_helper_vfp_tosid(vd
, vm
, fpst
);
3556 gen_helper_vfp_touizd(vd
, vm
, fpst
);
3558 gen_helper_vfp_touid(vd
, vm
, fpst
);
3561 vfp_store_reg32(vd
, a
->vd
);
3562 tcg_temp_free_i32(vd
);
3563 tcg_temp_free_i64(vm
);
3564 tcg_temp_free_ptr(fpst
);
3568 static bool trans_VINS(DisasContext
*s
, arg_VINS
*a
)
3572 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3576 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
3580 if (!vfp_access_check(s
)) {
3584 /* Insert low half of Vm into high half of Vd */
3585 rm
= tcg_temp_new_i32();
3586 rd
= tcg_temp_new_i32();
3587 vfp_load_reg32(rm
, a
->vm
);
3588 vfp_load_reg32(rd
, a
->vd
);
3589 tcg_gen_deposit_i32(rd
, rd
, rm
, 16, 16);
3590 vfp_store_reg32(rd
, a
->vd
);
3591 tcg_temp_free_i32(rm
);
3592 tcg_temp_free_i32(rd
);
3596 static bool trans_VMOVX(DisasContext
*s
, arg_VINS
*a
)
3600 if (!dc_isar_feature(aa32_fp16_arith
, s
)) {
3604 if (s
->vec_len
!= 0 || s
->vec_stride
!= 0) {
3608 if (!vfp_access_check(s
)) {
3612 /* Set Vd to high half of Vm */
3613 rm
= tcg_temp_new_i32();
3614 vfp_load_reg32(rm
, a
->vm
);
3615 tcg_gen_shri_i32(rm
, rm
, 16);
3616 vfp_store_reg32(rm
, a
->vd
);
3617 tcg_temp_free_i32(rm
);