2 * AArch64 specific helpers
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu/units.h"
23 #include "exec/gdbstub.h"
24 #include "exec/helper-proto.h"
25 #include "qemu/host-utils.h"
27 #include "qemu/main-loop.h"
28 #include "qemu/bitops.h"
29 #include "internals.h"
30 #include "qemu/crc32c.h"
31 #include "exec/exec-all.h"
32 #include "exec/cpu_ldst.h"
33 #include "qemu/int128.h"
34 #include "qemu/atomic128.h"
35 #include "fpu/softfloat.h"
36 #include <zlib.h> /* For crc32 */
38 /* C2.4.7 Multiply and divide */
39 /* special cases for 0 and LLONG_MIN are mandated by the standard */
40 uint64_t HELPER(udiv64
)(uint64_t num
, uint64_t den
)
48 int64_t HELPER(sdiv64
)(int64_t num
, int64_t den
)
53 if (num
== LLONG_MIN
&& den
== -1) {
59 uint64_t HELPER(rbit64
)(uint64_t x
)
64 void HELPER(msr_i_spsel
)(CPUARMState
*env
, uint32_t imm
)
66 update_spsel(env
, imm
);
69 static void daif_check(CPUARMState
*env
, uint32_t op
,
70 uint32_t imm
, uintptr_t ra
)
72 /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set. */
73 if (arm_current_el(env
) == 0 && !(arm_sctlr(env
, 0) & SCTLR_UMA
)) {
74 raise_exception_ra(env
, EXCP_UDEF
,
75 syn_aa64_sysregtrap(0, extract32(op
, 0, 3),
76 extract32(op
, 3, 3), 4,
78 exception_target_el(env
), ra
);
82 void HELPER(msr_i_daifset
)(CPUARMState
*env
, uint32_t imm
)
84 daif_check(env
, 0x1e, imm
, GETPC());
85 env
->daif
|= (imm
<< 6) & PSTATE_DAIF
;
86 arm_rebuild_hflags(env
);
89 void HELPER(msr_i_daifclear
)(CPUARMState
*env
, uint32_t imm
)
91 daif_check(env
, 0x1f, imm
, GETPC());
92 env
->daif
&= ~((imm
<< 6) & PSTATE_DAIF
);
93 arm_rebuild_hflags(env
);
96 /* Convert a softfloat float_relation_ (as returned by
97 * the float*_compare functions) to the correct ARM
100 static inline uint32_t float_rel_to_flags(int res
)
104 case float_relation_equal
:
105 flags
= PSTATE_Z
| PSTATE_C
;
107 case float_relation_less
:
110 case float_relation_greater
:
113 case float_relation_unordered
:
115 flags
= PSTATE_C
| PSTATE_V
;
121 uint64_t HELPER(vfp_cmph_a64
)(uint32_t x
, uint32_t y
, void *fp_status
)
123 return float_rel_to_flags(float16_compare_quiet(x
, y
, fp_status
));
126 uint64_t HELPER(vfp_cmpeh_a64
)(uint32_t x
, uint32_t y
, void *fp_status
)
128 return float_rel_to_flags(float16_compare(x
, y
, fp_status
));
131 uint64_t HELPER(vfp_cmps_a64
)(float32 x
, float32 y
, void *fp_status
)
133 return float_rel_to_flags(float32_compare_quiet(x
, y
, fp_status
));
136 uint64_t HELPER(vfp_cmpes_a64
)(float32 x
, float32 y
, void *fp_status
)
138 return float_rel_to_flags(float32_compare(x
, y
, fp_status
));
141 uint64_t HELPER(vfp_cmpd_a64
)(float64 x
, float64 y
, void *fp_status
)
143 return float_rel_to_flags(float64_compare_quiet(x
, y
, fp_status
));
146 uint64_t HELPER(vfp_cmped_a64
)(float64 x
, float64 y
, void *fp_status
)
148 return float_rel_to_flags(float64_compare(x
, y
, fp_status
));
151 float32
HELPER(vfp_mulxs
)(float32 a
, float32 b
, void *fpstp
)
153 float_status
*fpst
= fpstp
;
155 a
= float32_squash_input_denormal(a
, fpst
);
156 b
= float32_squash_input_denormal(b
, fpst
);
158 if ((float32_is_zero(a
) && float32_is_infinity(b
)) ||
159 (float32_is_infinity(a
) && float32_is_zero(b
))) {
160 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
161 return make_float32((1U << 30) |
162 ((float32_val(a
) ^ float32_val(b
)) & (1U << 31)));
164 return float32_mul(a
, b
, fpst
);
167 float64
HELPER(vfp_mulxd
)(float64 a
, float64 b
, void *fpstp
)
169 float_status
*fpst
= fpstp
;
171 a
= float64_squash_input_denormal(a
, fpst
);
172 b
= float64_squash_input_denormal(b
, fpst
);
174 if ((float64_is_zero(a
) && float64_is_infinity(b
)) ||
175 (float64_is_infinity(a
) && float64_is_zero(b
))) {
176 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
177 return make_float64((1ULL << 62) |
178 ((float64_val(a
) ^ float64_val(b
)) & (1ULL << 63)));
180 return float64_mul(a
, b
, fpst
);
183 /* 64bit/double versions of the neon float compare functions */
184 uint64_t HELPER(neon_ceq_f64
)(float64 a
, float64 b
, void *fpstp
)
186 float_status
*fpst
= fpstp
;
187 return -float64_eq_quiet(a
, b
, fpst
);
190 uint64_t HELPER(neon_cge_f64
)(float64 a
, float64 b
, void *fpstp
)
192 float_status
*fpst
= fpstp
;
193 return -float64_le(b
, a
, fpst
);
196 uint64_t HELPER(neon_cgt_f64
)(float64 a
, float64 b
, void *fpstp
)
198 float_status
*fpst
= fpstp
;
199 return -float64_lt(b
, a
, fpst
);
202 /* Reciprocal step and sqrt step. Note that unlike the A32/T32
203 * versions, these do a fully fused multiply-add or
204 * multiply-add-and-halve.
207 uint32_t HELPER(recpsf_f16
)(uint32_t a
, uint32_t b
, void *fpstp
)
209 float_status
*fpst
= fpstp
;
211 a
= float16_squash_input_denormal(a
, fpst
);
212 b
= float16_squash_input_denormal(b
, fpst
);
215 if ((float16_is_infinity(a
) && float16_is_zero(b
)) ||
216 (float16_is_infinity(b
) && float16_is_zero(a
))) {
219 return float16_muladd(a
, b
, float16_two
, 0, fpst
);
222 float32
HELPER(recpsf_f32
)(float32 a
, float32 b
, void *fpstp
)
224 float_status
*fpst
= fpstp
;
226 a
= float32_squash_input_denormal(a
, fpst
);
227 b
= float32_squash_input_denormal(b
, fpst
);
230 if ((float32_is_infinity(a
) && float32_is_zero(b
)) ||
231 (float32_is_infinity(b
) && float32_is_zero(a
))) {
234 return float32_muladd(a
, b
, float32_two
, 0, fpst
);
237 float64
HELPER(recpsf_f64
)(float64 a
, float64 b
, void *fpstp
)
239 float_status
*fpst
= fpstp
;
241 a
= float64_squash_input_denormal(a
, fpst
);
242 b
= float64_squash_input_denormal(b
, fpst
);
245 if ((float64_is_infinity(a
) && float64_is_zero(b
)) ||
246 (float64_is_infinity(b
) && float64_is_zero(a
))) {
249 return float64_muladd(a
, b
, float64_two
, 0, fpst
);
252 uint32_t HELPER(rsqrtsf_f16
)(uint32_t a
, uint32_t b
, void *fpstp
)
254 float_status
*fpst
= fpstp
;
256 a
= float16_squash_input_denormal(a
, fpst
);
257 b
= float16_squash_input_denormal(b
, fpst
);
260 if ((float16_is_infinity(a
) && float16_is_zero(b
)) ||
261 (float16_is_infinity(b
) && float16_is_zero(a
))) {
262 return float16_one_point_five
;
264 return float16_muladd(a
, b
, float16_three
, float_muladd_halve_result
, fpst
);
267 float32
HELPER(rsqrtsf_f32
)(float32 a
, float32 b
, void *fpstp
)
269 float_status
*fpst
= fpstp
;
271 a
= float32_squash_input_denormal(a
, fpst
);
272 b
= float32_squash_input_denormal(b
, fpst
);
275 if ((float32_is_infinity(a
) && float32_is_zero(b
)) ||
276 (float32_is_infinity(b
) && float32_is_zero(a
))) {
277 return float32_one_point_five
;
279 return float32_muladd(a
, b
, float32_three
, float_muladd_halve_result
, fpst
);
282 float64
HELPER(rsqrtsf_f64
)(float64 a
, float64 b
, void *fpstp
)
284 float_status
*fpst
= fpstp
;
286 a
= float64_squash_input_denormal(a
, fpst
);
287 b
= float64_squash_input_denormal(b
, fpst
);
290 if ((float64_is_infinity(a
) && float64_is_zero(b
)) ||
291 (float64_is_infinity(b
) && float64_is_zero(a
))) {
292 return float64_one_point_five
;
294 return float64_muladd(a
, b
, float64_three
, float_muladd_halve_result
, fpst
);
297 /* Pairwise long add: add pairs of adjacent elements into
298 * double-width elements in the result (eg _s8 is an 8x8->16 op)
300 uint64_t HELPER(neon_addlp_s8
)(uint64_t a
)
302 uint64_t nsignmask
= 0x0080008000800080ULL
;
303 uint64_t wsignmask
= 0x8000800080008000ULL
;
304 uint64_t elementmask
= 0x00ff00ff00ff00ffULL
;
306 uint64_t res
, signres
;
308 /* Extract odd elements, sign extend each to a 16 bit field */
309 tmp1
= a
& elementmask
;
312 tmp1
= (tmp1
- nsignmask
) ^ wsignmask
;
313 /* Ditto for the even elements */
314 tmp2
= (a
>> 8) & elementmask
;
317 tmp2
= (tmp2
- nsignmask
) ^ wsignmask
;
319 /* calculate the result by summing bits 0..14, 16..22, etc,
320 * and then adjusting the sign bits 15, 23, etc manually.
321 * This ensures the addition can't overflow the 16 bit field.
323 signres
= (tmp1
^ tmp2
) & wsignmask
;
324 res
= (tmp1
& ~wsignmask
) + (tmp2
& ~wsignmask
);
330 uint64_t HELPER(neon_addlp_u8
)(uint64_t a
)
334 tmp
= a
& 0x00ff00ff00ff00ffULL
;
335 tmp
+= (a
>> 8) & 0x00ff00ff00ff00ffULL
;
339 uint64_t HELPER(neon_addlp_s16
)(uint64_t a
)
341 int32_t reslo
, reshi
;
343 reslo
= (int32_t)(int16_t)a
+ (int32_t)(int16_t)(a
>> 16);
344 reshi
= (int32_t)(int16_t)(a
>> 32) + (int32_t)(int16_t)(a
>> 48);
346 return (uint32_t)reslo
| (((uint64_t)reshi
) << 32);
349 uint64_t HELPER(neon_addlp_u16
)(uint64_t a
)
353 tmp
= a
& 0x0000ffff0000ffffULL
;
354 tmp
+= (a
>> 16) & 0x0000ffff0000ffffULL
;
358 /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
359 uint32_t HELPER(frecpx_f16
)(uint32_t a
, void *fpstp
)
361 float_status
*fpst
= fpstp
;
362 uint16_t val16
, sbit
;
365 if (float16_is_any_nan(a
)) {
367 if (float16_is_signaling_nan(a
, fpst
)) {
368 float_raise(float_flag_invalid
, fpst
);
369 if (!fpst
->default_nan_mode
) {
370 nan
= float16_silence_nan(a
, fpst
);
373 if (fpst
->default_nan_mode
) {
374 nan
= float16_default_nan(fpst
);
379 a
= float16_squash_input_denormal(a
, fpst
);
381 val16
= float16_val(a
);
382 sbit
= 0x8000 & val16
;
383 exp
= extract32(val16
, 10, 5);
386 return make_float16(deposit32(sbit
, 10, 5, 0x1e));
388 return make_float16(deposit32(sbit
, 10, 5, ~exp
));
392 float32
HELPER(frecpx_f32
)(float32 a
, void *fpstp
)
394 float_status
*fpst
= fpstp
;
395 uint32_t val32
, sbit
;
398 if (float32_is_any_nan(a
)) {
400 if (float32_is_signaling_nan(a
, fpst
)) {
401 float_raise(float_flag_invalid
, fpst
);
402 if (!fpst
->default_nan_mode
) {
403 nan
= float32_silence_nan(a
, fpst
);
406 if (fpst
->default_nan_mode
) {
407 nan
= float32_default_nan(fpst
);
412 a
= float32_squash_input_denormal(a
, fpst
);
414 val32
= float32_val(a
);
415 sbit
= 0x80000000ULL
& val32
;
416 exp
= extract32(val32
, 23, 8);
419 return make_float32(sbit
| (0xfe << 23));
421 return make_float32(sbit
| (~exp
& 0xff) << 23);
425 float64
HELPER(frecpx_f64
)(float64 a
, void *fpstp
)
427 float_status
*fpst
= fpstp
;
428 uint64_t val64
, sbit
;
431 if (float64_is_any_nan(a
)) {
433 if (float64_is_signaling_nan(a
, fpst
)) {
434 float_raise(float_flag_invalid
, fpst
);
435 if (!fpst
->default_nan_mode
) {
436 nan
= float64_silence_nan(a
, fpst
);
439 if (fpst
->default_nan_mode
) {
440 nan
= float64_default_nan(fpst
);
445 a
= float64_squash_input_denormal(a
, fpst
);
447 val64
= float64_val(a
);
448 sbit
= 0x8000000000000000ULL
& val64
;
449 exp
= extract64(float64_val(a
), 52, 11);
452 return make_float64(sbit
| (0x7feULL
<< 52));
454 return make_float64(sbit
| (~exp
& 0x7ffULL
) << 52);
458 float32
HELPER(fcvtx_f64_to_f32
)(float64 a
, CPUARMState
*env
)
460 /* Von Neumann rounding is implemented by using round-to-zero
461 * and then setting the LSB of the result if Inexact was raised.
464 float_status
*fpst
= &env
->vfp
.fp_status
;
465 float_status tstat
= *fpst
;
468 set_float_rounding_mode(float_round_to_zero
, &tstat
);
469 set_float_exception_flags(0, &tstat
);
470 r
= float64_to_float32(a
, &tstat
);
471 exflags
= get_float_exception_flags(&tstat
);
472 if (exflags
& float_flag_inexact
) {
473 r
= make_float32(float32_val(r
) | 1);
475 exflags
|= get_float_exception_flags(fpst
);
476 set_float_exception_flags(exflags
, fpst
);
480 /* 64-bit versions of the CRC helpers. Note that although the operation
481 * (and the prototypes of crc32c() and crc32() mean that only the bottom
482 * 32 bits of the accumulator and result are used, we pass and return
483 * uint64_t for convenience of the generated code. Unlike the 32-bit
484 * instruction set versions, val may genuinely have 64 bits of data in it.
485 * The upper bytes of val (above the number specified by 'bytes') must have
486 * been zeroed out by the caller.
488 uint64_t HELPER(crc32_64
)(uint64_t acc
, uint64_t val
, uint32_t bytes
)
494 /* zlib crc32 converts the accumulator and output to one's complement. */
495 return crc32(acc
^ 0xffffffff, buf
, bytes
) ^ 0xffffffff;
498 uint64_t HELPER(crc32c_64
)(uint64_t acc
, uint64_t val
, uint32_t bytes
)
504 /* Linux crc32c converts the output to one's complement. */
505 return crc32c(acc
, buf
, bytes
) ^ 0xffffffff;
508 uint64_t HELPER(paired_cmpxchg64_le
)(CPUARMState
*env
, uint64_t addr
,
509 uint64_t new_lo
, uint64_t new_hi
)
511 Int128 cmpv
= int128_make128(env
->exclusive_val
, env
->exclusive_high
);
512 Int128 newv
= int128_make128(new_lo
, new_hi
);
514 uintptr_t ra
= GETPC();
517 int mem_idx
= cpu_mmu_index(env
, false);
518 MemOpIdx oi0
= make_memop_idx(MO_LEUQ
| MO_ALIGN_16
, mem_idx
);
519 MemOpIdx oi1
= make_memop_idx(MO_LEUQ
, mem_idx
);
521 o0
= cpu_ldq_le_mmu(env
, addr
+ 0, oi0
, ra
);
522 o1
= cpu_ldq_le_mmu(env
, addr
+ 8, oi1
, ra
);
523 oldv
= int128_make128(o0
, o1
);
525 success
= int128_eq(oldv
, cmpv
);
527 cpu_stq_le_mmu(env
, addr
+ 0, int128_getlo(newv
), oi1
, ra
);
528 cpu_stq_le_mmu(env
, addr
+ 8, int128_gethi(newv
), oi1
, ra
);
534 uint64_t HELPER(paired_cmpxchg64_le_parallel
)(CPUARMState
*env
, uint64_t addr
,
535 uint64_t new_lo
, uint64_t new_hi
)
537 Int128 oldv
, cmpv
, newv
;
538 uintptr_t ra
= GETPC();
543 assert(HAVE_CMPXCHG128
);
545 mem_idx
= cpu_mmu_index(env
, false);
546 oi
= make_memop_idx(MO_LE
| MO_128
| MO_ALIGN
, mem_idx
);
548 cmpv
= int128_make128(env
->exclusive_val
, env
->exclusive_high
);
549 newv
= int128_make128(new_lo
, new_hi
);
550 oldv
= cpu_atomic_cmpxchgo_le_mmu(env
, addr
, cmpv
, newv
, oi
, ra
);
552 success
= int128_eq(oldv
, cmpv
);
556 uint64_t HELPER(paired_cmpxchg64_be
)(CPUARMState
*env
, uint64_t addr
,
557 uint64_t new_lo
, uint64_t new_hi
)
560 * High and low need to be switched here because this is not actually a
561 * 128bit store but two doublewords stored consecutively
563 Int128 cmpv
= int128_make128(env
->exclusive_high
, env
->exclusive_val
);
564 Int128 newv
= int128_make128(new_hi
, new_lo
);
566 uintptr_t ra
= GETPC();
569 int mem_idx
= cpu_mmu_index(env
, false);
570 MemOpIdx oi0
= make_memop_idx(MO_BEUQ
| MO_ALIGN_16
, mem_idx
);
571 MemOpIdx oi1
= make_memop_idx(MO_BEUQ
, mem_idx
);
573 o1
= cpu_ldq_be_mmu(env
, addr
+ 0, oi0
, ra
);
574 o0
= cpu_ldq_be_mmu(env
, addr
+ 8, oi1
, ra
);
575 oldv
= int128_make128(o0
, o1
);
577 success
= int128_eq(oldv
, cmpv
);
579 cpu_stq_be_mmu(env
, addr
+ 0, int128_gethi(newv
), oi1
, ra
);
580 cpu_stq_be_mmu(env
, addr
+ 8, int128_getlo(newv
), oi1
, ra
);
586 uint64_t HELPER(paired_cmpxchg64_be_parallel
)(CPUARMState
*env
, uint64_t addr
,
587 uint64_t new_lo
, uint64_t new_hi
)
589 Int128 oldv
, cmpv
, newv
;
590 uintptr_t ra
= GETPC();
595 assert(HAVE_CMPXCHG128
);
597 mem_idx
= cpu_mmu_index(env
, false);
598 oi
= make_memop_idx(MO_BE
| MO_128
| MO_ALIGN
, mem_idx
);
601 * High and low need to be switched here because this is not actually a
602 * 128bit store but two doublewords stored consecutively
604 cmpv
= int128_make128(env
->exclusive_high
, env
->exclusive_val
);
605 newv
= int128_make128(new_hi
, new_lo
);
606 oldv
= cpu_atomic_cmpxchgo_be_mmu(env
, addr
, cmpv
, newv
, oi
, ra
);
608 success
= int128_eq(oldv
, cmpv
);
612 /* Writes back the old data into Rs. */
613 void HELPER(casp_le_parallel
)(CPUARMState
*env
, uint32_t rs
, uint64_t addr
,
614 uint64_t new_lo
, uint64_t new_hi
)
616 Int128 oldv
, cmpv
, newv
;
617 uintptr_t ra
= GETPC();
621 assert(HAVE_CMPXCHG128
);
623 mem_idx
= cpu_mmu_index(env
, false);
624 oi
= make_memop_idx(MO_LE
| MO_128
| MO_ALIGN
, mem_idx
);
626 cmpv
= int128_make128(env
->xregs
[rs
], env
->xregs
[rs
+ 1]);
627 newv
= int128_make128(new_lo
, new_hi
);
628 oldv
= cpu_atomic_cmpxchgo_le_mmu(env
, addr
, cmpv
, newv
, oi
, ra
);
630 env
->xregs
[rs
] = int128_getlo(oldv
);
631 env
->xregs
[rs
+ 1] = int128_gethi(oldv
);
634 void HELPER(casp_be_parallel
)(CPUARMState
*env
, uint32_t rs
, uint64_t addr
,
635 uint64_t new_hi
, uint64_t new_lo
)
637 Int128 oldv
, cmpv
, newv
;
638 uintptr_t ra
= GETPC();
642 assert(HAVE_CMPXCHG128
);
644 mem_idx
= cpu_mmu_index(env
, false);
645 oi
= make_memop_idx(MO_LE
| MO_128
| MO_ALIGN
, mem_idx
);
647 cmpv
= int128_make128(env
->xregs
[rs
+ 1], env
->xregs
[rs
]);
648 newv
= int128_make128(new_lo
, new_hi
);
649 oldv
= cpu_atomic_cmpxchgo_be_mmu(env
, addr
, cmpv
, newv
, oi
, ra
);
651 env
->xregs
[rs
+ 1] = int128_getlo(oldv
);
652 env
->xregs
[rs
] = int128_gethi(oldv
);
656 * AdvSIMD half-precision
659 #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
661 #define ADVSIMD_HALFOP(name) \
662 uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
664 float_status *fpst = fpstp; \
665 return float16_ ## name(a, b, fpst); \
674 ADVSIMD_HALFOP(minnum
)
675 ADVSIMD_HALFOP(maxnum
)
677 #define ADVSIMD_TWOHALFOP(name) \
678 uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
680 float16 a1, a2, b1, b2; \
682 float_status *fpst = fpstp; \
683 a1 = extract32(two_a, 0, 16); \
684 a2 = extract32(two_a, 16, 16); \
685 b1 = extract32(two_b, 0, 16); \
686 b2 = extract32(two_b, 16, 16); \
687 r1 = float16_ ## name(a1, b1, fpst); \
688 r2 = float16_ ## name(a2, b2, fpst); \
689 return deposit32(r1, 16, 16, r2); \
692 ADVSIMD_TWOHALFOP(add
)
693 ADVSIMD_TWOHALFOP(sub
)
694 ADVSIMD_TWOHALFOP(mul
)
695 ADVSIMD_TWOHALFOP(div
)
696 ADVSIMD_TWOHALFOP(min
)
697 ADVSIMD_TWOHALFOP(max
)
698 ADVSIMD_TWOHALFOP(minnum
)
699 ADVSIMD_TWOHALFOP(maxnum
)
701 /* Data processing - scalar floating-point and advanced SIMD */
702 static float16
float16_mulx(float16 a
, float16 b
, void *fpstp
)
704 float_status
*fpst
= fpstp
;
706 a
= float16_squash_input_denormal(a
, fpst
);
707 b
= float16_squash_input_denormal(b
, fpst
);
709 if ((float16_is_zero(a
) && float16_is_infinity(b
)) ||
710 (float16_is_infinity(a
) && float16_is_zero(b
))) {
711 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
712 return make_float16((1U << 14) |
713 ((float16_val(a
) ^ float16_val(b
)) & (1U << 15)));
715 return float16_mul(a
, b
, fpst
);
719 ADVSIMD_TWOHALFOP(mulx
)
721 /* fused multiply-accumulate */
722 uint32_t HELPER(advsimd_muladdh
)(uint32_t a
, uint32_t b
, uint32_t c
,
725 float_status
*fpst
= fpstp
;
726 return float16_muladd(a
, b
, c
, 0, fpst
);
729 uint32_t HELPER(advsimd_muladd2h
)(uint32_t two_a
, uint32_t two_b
,
730 uint32_t two_c
, void *fpstp
)
732 float_status
*fpst
= fpstp
;
733 float16 a1
, a2
, b1
, b2
, c1
, c2
;
735 a1
= extract32(two_a
, 0, 16);
736 a2
= extract32(two_a
, 16, 16);
737 b1
= extract32(two_b
, 0, 16);
738 b2
= extract32(two_b
, 16, 16);
739 c1
= extract32(two_c
, 0, 16);
740 c2
= extract32(two_c
, 16, 16);
741 r1
= float16_muladd(a1
, b1
, c1
, 0, fpst
);
742 r2
= float16_muladd(a2
, b2
, c2
, 0, fpst
);
743 return deposit32(r1
, 16, 16, r2
);
747 * Floating point comparisons produce an integer result. Softfloat
748 * routines return float_relation types which we convert to the 0/-1
752 #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
754 uint32_t HELPER(advsimd_ceq_f16
)(uint32_t a
, uint32_t b
, void *fpstp
)
756 float_status
*fpst
= fpstp
;
757 int compare
= float16_compare_quiet(a
, b
, fpst
);
758 return ADVSIMD_CMPRES(compare
== float_relation_equal
);
761 uint32_t HELPER(advsimd_cge_f16
)(uint32_t a
, uint32_t b
, void *fpstp
)
763 float_status
*fpst
= fpstp
;
764 int compare
= float16_compare(a
, b
, fpst
);
765 return ADVSIMD_CMPRES(compare
== float_relation_greater
||
766 compare
== float_relation_equal
);
769 uint32_t HELPER(advsimd_cgt_f16
)(uint32_t a
, uint32_t b
, void *fpstp
)
771 float_status
*fpst
= fpstp
;
772 int compare
= float16_compare(a
, b
, fpst
);
773 return ADVSIMD_CMPRES(compare
== float_relation_greater
);
776 uint32_t HELPER(advsimd_acge_f16
)(uint32_t a
, uint32_t b
, void *fpstp
)
778 float_status
*fpst
= fpstp
;
779 float16 f0
= float16_abs(a
);
780 float16 f1
= float16_abs(b
);
781 int compare
= float16_compare(f0
, f1
, fpst
);
782 return ADVSIMD_CMPRES(compare
== float_relation_greater
||
783 compare
== float_relation_equal
);
786 uint32_t HELPER(advsimd_acgt_f16
)(uint32_t a
, uint32_t b
, void *fpstp
)
788 float_status
*fpst
= fpstp
;
789 float16 f0
= float16_abs(a
);
790 float16 f1
= float16_abs(b
);
791 int compare
= float16_compare(f0
, f1
, fpst
);
792 return ADVSIMD_CMPRES(compare
== float_relation_greater
);
795 /* round to integral */
796 uint32_t HELPER(advsimd_rinth_exact
)(uint32_t x
, void *fp_status
)
798 return float16_round_to_int(x
, fp_status
);
801 uint32_t HELPER(advsimd_rinth
)(uint32_t x
, void *fp_status
)
803 int old_flags
= get_float_exception_flags(fp_status
), new_flags
;
806 ret
= float16_round_to_int(x
, fp_status
);
808 /* Suppress any inexact exceptions the conversion produced */
809 if (!(old_flags
& float_flag_inexact
)) {
810 new_flags
= get_float_exception_flags(fp_status
);
811 set_float_exception_flags(new_flags
& ~float_flag_inexact
, fp_status
);
818 * Half-precision floating point conversion functions
820 * There are a multitude of conversion functions with various
821 * different rounding modes. This is dealt with by the calling code
822 * setting the mode appropriately before calling the helper.
825 uint32_t HELPER(advsimd_f16tosinth
)(uint32_t a
, void *fpstp
)
827 float_status
*fpst
= fpstp
;
829 /* Invalid if we are passed a NaN */
830 if (float16_is_any_nan(a
)) {
831 float_raise(float_flag_invalid
, fpst
);
834 return float16_to_int16(a
, fpst
);
837 uint32_t HELPER(advsimd_f16touinth
)(uint32_t a
, void *fpstp
)
839 float_status
*fpst
= fpstp
;
841 /* Invalid if we are passed a NaN */
842 if (float16_is_any_nan(a
)) {
843 float_raise(float_flag_invalid
, fpst
);
846 return float16_to_uint16(a
, fpst
);
849 static int el_from_spsr(uint32_t spsr
)
851 /* Return the exception level that this SPSR is requesting a return to,
852 * or -1 if it is invalid (an illegal return)
854 if (spsr
& PSTATE_nRW
) {
855 switch (spsr
& CPSR_M
) {
856 case ARM_CPU_MODE_USR
:
858 case ARM_CPU_MODE_HYP
:
860 case ARM_CPU_MODE_FIQ
:
861 case ARM_CPU_MODE_IRQ
:
862 case ARM_CPU_MODE_SVC
:
863 case ARM_CPU_MODE_ABT
:
864 case ARM_CPU_MODE_UND
:
865 case ARM_CPU_MODE_SYS
:
867 case ARM_CPU_MODE_MON
:
868 /* Returning to Mon from AArch64 is never possible,
869 * so this is an illegal return.
875 if (extract32(spsr
, 1, 1)) {
876 /* Return with reserved M[1] bit set */
879 if (extract32(spsr
, 0, 4) == 1) {
880 /* return to EL0 with M[0] bit set */
883 return extract32(spsr
, 2, 2);
887 static void cpsr_write_from_spsr_elx(CPUARMState
*env
,
892 /* Save SPSR_ELx.SS into PSTATE. */
893 env
->pstate
= (env
->pstate
& ~PSTATE_SS
) | (val
& PSTATE_SS
);
896 /* Move DIT to the correct location for CPSR */
897 if (val
& PSTATE_DIT
) {
902 mask
= aarch32_cpsr_valid_mask(env
->features
, \
903 &env_archcpu(env
)->isar
);
904 cpsr_write(env
, val
, mask
, CPSRWriteRaw
);
907 void HELPER(exception_return
)(CPUARMState
*env
, uint64_t new_pc
)
909 int cur_el
= arm_current_el(env
);
910 unsigned int spsr_idx
= aarch64_banked_spsr_index(cur_el
);
911 uint32_t spsr
= env
->banked_spsr
[spsr_idx
];
913 bool return_to_aa64
= (spsr
& PSTATE_nRW
) == 0;
915 aarch64_save_sp(env
, cur_el
);
917 arm_clear_exclusive(env
);
919 /* We must squash the PSTATE.SS bit to zero unless both of the
921 * 1. debug exceptions are currently disabled
922 * 2. singlestep will be active in the EL we return to
923 * We check 1 here and 2 after we've done the pstate/cpsr write() to
924 * transition to the EL we're going to.
926 if (arm_generate_debug_exceptions(env
)) {
930 new_el
= el_from_spsr(spsr
);
934 if (new_el
> cur_el
|| (new_el
== 2 && !arm_is_el2_enabled(env
))) {
935 /* Disallow return to an EL which is unimplemented or higher
936 * than the current one.
941 if (new_el
!= 0 && arm_el_is_aa64(env
, new_el
) != return_to_aa64
) {
942 /* Return to an EL which is configured for a different register width */
946 if (new_el
== 1 && (arm_hcr_el2_eff(env
) & HCR_TGE
)) {
950 qemu_mutex_lock_iothread();
951 arm_call_pre_el_change_hook(env_archcpu(env
));
952 qemu_mutex_unlock_iothread();
954 if (!return_to_aa64
) {
955 env
->aarch64
= false;
956 /* We do a raw CPSR write because aarch64_sync_64_to_32()
957 * will sort the register banks out for us, and we've already
958 * caught all the bad-mode cases in el_from_spsr().
960 cpsr_write_from_spsr_elx(env
, spsr
);
961 if (!arm_singlestep_active(env
)) {
962 env
->pstate
&= ~PSTATE_SS
;
964 aarch64_sync_64_to_32(env
);
967 env
->regs
[15] = new_pc
& ~0x1;
969 env
->regs
[15] = new_pc
& ~0x3;
971 helper_rebuild_hflags_a32(env
, new_el
);
972 qemu_log_mask(CPU_LOG_INT
, "Exception return from AArch64 EL%d to "
973 "AArch32 EL%d PC 0x%" PRIx32
"\n",
974 cur_el
, new_el
, env
->regs
[15]);
979 spsr
&= aarch64_pstate_valid_mask(&env_archcpu(env
)->isar
);
980 pstate_write(env
, spsr
);
981 if (!arm_singlestep_active(env
)) {
982 env
->pstate
&= ~PSTATE_SS
;
984 aarch64_restore_sp(env
, new_el
);
985 helper_rebuild_hflags_a64(env
, new_el
);
988 * Apply TBI to the exception return address. We had to delay this
989 * until after we selected the new EL, so that we could select the
990 * correct TBI+TBID bits. This is made easier by waiting until after
991 * the hflags rebuild, since we can pull the composite TBII field
994 tbii
= EX_TBFLAG_A64(env
->hflags
, TBII
);
995 if ((tbii
>> extract64(new_pc
, 55, 1)) & 1) {
996 /* TBI is enabled. */
997 int core_mmu_idx
= cpu_mmu_index(env
, false);
998 if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx
))) {
999 new_pc
= sextract64(new_pc
, 0, 56);
1001 new_pc
= extract64(new_pc
, 0, 56);
1006 qemu_log_mask(CPU_LOG_INT
, "Exception return from AArch64 EL%d to "
1007 "AArch64 EL%d PC 0x%" PRIx64
"\n",
1008 cur_el
, new_el
, env
->pc
);
1012 * Note that cur_el can never be 0. If new_el is 0, then
1013 * el0_a64 is return_to_aa64, else el0_a64 is ignored.
1015 aarch64_sve_change_el(env
, cur_el
, new_el
, return_to_aa64
);
1017 qemu_mutex_lock_iothread();
1018 arm_call_el_change_hook(env_archcpu(env
));
1019 qemu_mutex_unlock_iothread();
1024 /* Illegal return events of various kinds have architecturally
1025 * mandated behaviour:
1026 * restore NZCV and DAIF from SPSR_ELx
1028 * restore PC from ELR_ELx
1029 * no change to exception level, execution state or stack pointer
1031 env
->pstate
|= PSTATE_IL
;
1033 spsr
&= PSTATE_NZCV
| PSTATE_DAIF
;
1034 spsr
|= pstate_read(env
) & ~(PSTATE_NZCV
| PSTATE_DAIF
);
1035 pstate_write(env
, spsr
);
1036 if (!arm_singlestep_active(env
)) {
1037 env
->pstate
&= ~PSTATE_SS
;
1039 helper_rebuild_hflags_a64(env
, cur_el
);
1040 qemu_log_mask(LOG_GUEST_ERROR
, "Illegal exception return at EL%d: "
1041 "resuming execution at 0x%" PRIx64
"\n", cur_el
, env
->pc
);
1045 * Square Root and Reciprocal square root
1048 uint32_t HELPER(sqrt_f16
)(uint32_t a
, void *fpstp
)
1050 float_status
*s
= fpstp
;
1052 return float16_sqrt(a
, s
);
1055 void HELPER(dc_zva
)(CPUARMState
*env
, uint64_t vaddr_in
)
1058 * Implement DC ZVA, which zeroes a fixed-length block of memory.
1059 * Note that we do not implement the (architecturally mandated)
1060 * alignment fault for attempts to use this on Device memory
1061 * (which matches the usual QEMU behaviour of not implementing either
1062 * alignment faults or any memory attribute handling).
1064 int blocklen
= 4 << env_archcpu(env
)->dcz_blocksize
;
1065 uint64_t vaddr
= vaddr_in
& ~(blocklen
- 1);
1066 int mmu_idx
= cpu_mmu_index(env
, false);
1070 * Trapless lookup. In addition to actual invalid page, may
1071 * return NULL for I/O, watchpoints, clean pages, etc.
1073 mem
= tlb_vaddr_to_host(env
, vaddr
, MMU_DATA_STORE
, mmu_idx
);
1075 #ifndef CONFIG_USER_ONLY
1076 if (unlikely(!mem
)) {
1077 uintptr_t ra
= GETPC();
1080 * Trap if accessing an invalid page. DC_ZVA requires that we supply
1081 * the original pointer for an invalid page. But watchpoints require
1082 * that we probe the actual space. So do both.
1084 (void) probe_write(env
, vaddr_in
, 1, mmu_idx
, ra
);
1085 mem
= probe_write(env
, vaddr
, blocklen
, mmu_idx
, ra
);
1087 if (unlikely(!mem
)) {
1089 * The only remaining reason for mem == NULL is I/O.
1090 * Just do a series of byte writes as the architecture demands.
1092 for (int i
= 0; i
< blocklen
; i
++) {
1093 cpu_stb_mmuidx_ra(env
, vaddr
+ i
, 0, mmu_idx
, ra
);
1100 memset(mem
, 0, blocklen
);