2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
28 #include "fpu/softfloat-macros.h"
29 #include "helper-tcg.h"
36 #define FT0 (env->ft0)
37 #define ST0 (env->fpregs[env->fpstt].d)
38 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
41 #define FPU_RC_MASK 0xc00
42 #define FPU_RC_NEAR 0x000
43 #define FPU_RC_DOWN 0x400
44 #define FPU_RC_UP 0x800
45 #define FPU_RC_CHOP 0xc00
47 #define MAXTAN 9223372036854775808.0
49 /* the following deal with x86 long double-precision numbers */
50 #define MAXEXPD 0x7fff
52 #define EXPD(fp) (fp.l.upper & 0x7fff)
53 #define SIGND(fp) ((fp.l.upper) & 0x8000)
54 #define MANTD(fp) (fp.l.lower)
55 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
57 #define FPUS_IE (1 << 0)
58 #define FPUS_DE (1 << 1)
59 #define FPUS_ZE (1 << 2)
60 #define FPUS_OE (1 << 3)
61 #define FPUS_UE (1 << 4)
62 #define FPUS_PE (1 << 5)
63 #define FPUS_SF (1 << 6)
64 #define FPUS_SE (1 << 7)
65 #define FPUS_B (1 << 15)
69 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
70 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
71 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
72 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
73 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
74 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
75 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
76 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
78 #if !defined(CONFIG_USER_ONLY)
79 static qemu_irq ferr_irq
;
81 void x86_register_ferr_irq(qemu_irq irq
)
86 static void cpu_clear_ignne(void)
88 CPUX86State
*env
= &X86_CPU(first_cpu
)->env
;
89 env
->hflags2
&= ~HF2_IGNNE_MASK
;
92 void cpu_set_ignne(void)
94 CPUX86State
*env
= &X86_CPU(first_cpu
)->env
;
95 env
->hflags2
|= HF2_IGNNE_MASK
;
97 * We get here in response to a write to port F0h. The chipset should
98 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
99 * cleared, because FERR# and FP_IRQ are two separate pins on real
100 * hardware. However, we don't model FERR# as a qemu_irq, so we just
101 * do directly what the chipset would do, i.e. deassert FP_IRQ.
103 qemu_irq_lower(ferr_irq
);
108 static inline void fpush(CPUX86State
*env
)
110 env
->fpstt
= (env
->fpstt
- 1) & 7;
111 env
->fptags
[env
->fpstt
] = 0; /* validate stack entry */
114 static inline void fpop(CPUX86State
*env
)
116 env
->fptags
[env
->fpstt
] = 1; /* invalidate stack entry */
117 env
->fpstt
= (env
->fpstt
+ 1) & 7;
120 static inline floatx80
helper_fldt(CPUX86State
*env
, target_ulong ptr
,
125 temp
.l
.lower
= cpu_ldq_data_ra(env
, ptr
, retaddr
);
126 temp
.l
.upper
= cpu_lduw_data_ra(env
, ptr
+ 8, retaddr
);
130 static inline void helper_fstt(CPUX86State
*env
, floatx80 f
, target_ulong ptr
,
136 cpu_stq_data_ra(env
, ptr
, temp
.l
.lower
, retaddr
);
137 cpu_stw_data_ra(env
, ptr
+ 8, temp
.l
.upper
, retaddr
);
140 /* x87 FPU helpers */
142 static inline double floatx80_to_double(CPUX86State
*env
, floatx80 a
)
149 u
.f64
= floatx80_to_float64(a
, &env
->fp_status
);
153 static inline floatx80
double_to_floatx80(CPUX86State
*env
, double a
)
161 return float64_to_floatx80(u
.f64
, &env
->fp_status
);
164 static void fpu_set_exception(CPUX86State
*env
, int mask
)
167 if (env
->fpus
& (~env
->fpuc
& FPUC_EM
)) {
168 env
->fpus
|= FPUS_SE
| FPUS_B
;
172 static inline uint8_t save_exception_flags(CPUX86State
*env
)
174 uint8_t old_flags
= get_float_exception_flags(&env
->fp_status
);
175 set_float_exception_flags(0, &env
->fp_status
);
179 static void merge_exception_flags(CPUX86State
*env
, uint8_t old_flags
)
181 uint8_t new_flags
= get_float_exception_flags(&env
->fp_status
);
182 float_raise(old_flags
, &env
->fp_status
);
183 fpu_set_exception(env
,
184 ((new_flags
& float_flag_invalid
? FPUS_IE
: 0) |
185 (new_flags
& float_flag_divbyzero
? FPUS_ZE
: 0) |
186 (new_flags
& float_flag_overflow
? FPUS_OE
: 0) |
187 (new_flags
& float_flag_underflow
? FPUS_UE
: 0) |
188 (new_flags
& float_flag_inexact
? FPUS_PE
: 0) |
189 (new_flags
& float_flag_input_denormal
? FPUS_DE
: 0)));
192 static inline floatx80
helper_fdiv(CPUX86State
*env
, floatx80 a
, floatx80 b
)
194 uint8_t old_flags
= save_exception_flags(env
);
195 floatx80 ret
= floatx80_div(a
, b
, &env
->fp_status
);
196 merge_exception_flags(env
, old_flags
);
200 static void fpu_raise_exception(CPUX86State
*env
, uintptr_t retaddr
)
202 if (env
->cr
[0] & CR0_NE_MASK
) {
203 raise_exception_ra(env
, EXCP10_COPR
, retaddr
);
205 #if !defined(CONFIG_USER_ONLY)
206 else if (ferr_irq
&& !(env
->hflags2
& HF2_IGNNE_MASK
)) {
207 qemu_irq_raise(ferr_irq
);
212 void helper_flds_FT0(CPUX86State
*env
, uint32_t val
)
214 uint8_t old_flags
= save_exception_flags(env
);
221 FT0
= float32_to_floatx80(u
.f
, &env
->fp_status
);
222 merge_exception_flags(env
, old_flags
);
225 void helper_fldl_FT0(CPUX86State
*env
, uint64_t val
)
227 uint8_t old_flags
= save_exception_flags(env
);
234 FT0
= float64_to_floatx80(u
.f
, &env
->fp_status
);
235 merge_exception_flags(env
, old_flags
);
238 void helper_fildl_FT0(CPUX86State
*env
, int32_t val
)
240 FT0
= int32_to_floatx80(val
, &env
->fp_status
);
243 void helper_flds_ST0(CPUX86State
*env
, uint32_t val
)
245 uint8_t old_flags
= save_exception_flags(env
);
252 new_fpstt
= (env
->fpstt
- 1) & 7;
254 env
->fpregs
[new_fpstt
].d
= float32_to_floatx80(u
.f
, &env
->fp_status
);
255 env
->fpstt
= new_fpstt
;
256 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
257 merge_exception_flags(env
, old_flags
);
260 void helper_fldl_ST0(CPUX86State
*env
, uint64_t val
)
262 uint8_t old_flags
= save_exception_flags(env
);
269 new_fpstt
= (env
->fpstt
- 1) & 7;
271 env
->fpregs
[new_fpstt
].d
= float64_to_floatx80(u
.f
, &env
->fp_status
);
272 env
->fpstt
= new_fpstt
;
273 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
274 merge_exception_flags(env
, old_flags
);
277 void helper_fildl_ST0(CPUX86State
*env
, int32_t val
)
281 new_fpstt
= (env
->fpstt
- 1) & 7;
282 env
->fpregs
[new_fpstt
].d
= int32_to_floatx80(val
, &env
->fp_status
);
283 env
->fpstt
= new_fpstt
;
284 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
287 void helper_fildll_ST0(CPUX86State
*env
, int64_t val
)
291 new_fpstt
= (env
->fpstt
- 1) & 7;
292 env
->fpregs
[new_fpstt
].d
= int64_to_floatx80(val
, &env
->fp_status
);
293 env
->fpstt
= new_fpstt
;
294 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
297 uint32_t helper_fsts_ST0(CPUX86State
*env
)
299 uint8_t old_flags
= save_exception_flags(env
);
305 u
.f
= floatx80_to_float32(ST0
, &env
->fp_status
);
306 merge_exception_flags(env
, old_flags
);
310 uint64_t helper_fstl_ST0(CPUX86State
*env
)
312 uint8_t old_flags
= save_exception_flags(env
);
318 u
.f
= floatx80_to_float64(ST0
, &env
->fp_status
);
319 merge_exception_flags(env
, old_flags
);
323 int32_t helper_fist_ST0(CPUX86State
*env
)
325 uint8_t old_flags
= save_exception_flags(env
);
328 val
= floatx80_to_int32(ST0
, &env
->fp_status
);
329 if (val
!= (int16_t)val
) {
330 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
333 merge_exception_flags(env
, old_flags
);
337 int32_t helper_fistl_ST0(CPUX86State
*env
)
339 uint8_t old_flags
= save_exception_flags(env
);
342 val
= floatx80_to_int32(ST0
, &env
->fp_status
);
343 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
346 merge_exception_flags(env
, old_flags
);
350 int64_t helper_fistll_ST0(CPUX86State
*env
)
352 uint8_t old_flags
= save_exception_flags(env
);
355 val
= floatx80_to_int64(ST0
, &env
->fp_status
);
356 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
357 val
= 0x8000000000000000ULL
;
359 merge_exception_flags(env
, old_flags
);
363 int32_t helper_fistt_ST0(CPUX86State
*env
)
365 uint8_t old_flags
= save_exception_flags(env
);
368 val
= floatx80_to_int32_round_to_zero(ST0
, &env
->fp_status
);
369 if (val
!= (int16_t)val
) {
370 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
373 merge_exception_flags(env
, old_flags
);
377 int32_t helper_fisttl_ST0(CPUX86State
*env
)
379 uint8_t old_flags
= save_exception_flags(env
);
382 val
= floatx80_to_int32_round_to_zero(ST0
, &env
->fp_status
);
383 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
386 merge_exception_flags(env
, old_flags
);
390 int64_t helper_fisttll_ST0(CPUX86State
*env
)
392 uint8_t old_flags
= save_exception_flags(env
);
395 val
= floatx80_to_int64_round_to_zero(ST0
, &env
->fp_status
);
396 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
397 val
= 0x8000000000000000ULL
;
399 merge_exception_flags(env
, old_flags
);
403 void helper_fldt_ST0(CPUX86State
*env
, target_ulong ptr
)
407 new_fpstt
= (env
->fpstt
- 1) & 7;
408 env
->fpregs
[new_fpstt
].d
= helper_fldt(env
, ptr
, GETPC());
409 env
->fpstt
= new_fpstt
;
410 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
413 void helper_fstt_ST0(CPUX86State
*env
, target_ulong ptr
)
415 helper_fstt(env
, ST0
, ptr
, GETPC());
418 void helper_fpush(CPUX86State
*env
)
423 void helper_fpop(CPUX86State
*env
)
428 void helper_fdecstp(CPUX86State
*env
)
430 env
->fpstt
= (env
->fpstt
- 1) & 7;
431 env
->fpus
&= ~0x4700;
434 void helper_fincstp(CPUX86State
*env
)
436 env
->fpstt
= (env
->fpstt
+ 1) & 7;
437 env
->fpus
&= ~0x4700;
442 void helper_ffree_STN(CPUX86State
*env
, int st_index
)
444 env
->fptags
[(env
->fpstt
+ st_index
) & 7] = 1;
447 void helper_fmov_ST0_FT0(CPUX86State
*env
)
452 void helper_fmov_FT0_STN(CPUX86State
*env
, int st_index
)
457 void helper_fmov_ST0_STN(CPUX86State
*env
, int st_index
)
462 void helper_fmov_STN_ST0(CPUX86State
*env
, int st_index
)
467 void helper_fxchg_ST0_STN(CPUX86State
*env
, int st_index
)
478 static const int fcom_ccval
[4] = {0x0100, 0x4000, 0x0000, 0x4500};
480 void helper_fcom_ST0_FT0(CPUX86State
*env
)
482 uint8_t old_flags
= save_exception_flags(env
);
485 ret
= floatx80_compare(ST0
, FT0
, &env
->fp_status
);
486 env
->fpus
= (env
->fpus
& ~0x4500) | fcom_ccval
[ret
+ 1];
487 merge_exception_flags(env
, old_flags
);
490 void helper_fucom_ST0_FT0(CPUX86State
*env
)
492 uint8_t old_flags
= save_exception_flags(env
);
495 ret
= floatx80_compare_quiet(ST0
, FT0
, &env
->fp_status
);
496 env
->fpus
= (env
->fpus
& ~0x4500) | fcom_ccval
[ret
+ 1];
497 merge_exception_flags(env
, old_flags
);
500 static const int fcomi_ccval
[4] = {CC_C
, CC_Z
, 0, CC_Z
| CC_P
| CC_C
};
502 void helper_fcomi_ST0_FT0(CPUX86State
*env
)
504 uint8_t old_flags
= save_exception_flags(env
);
508 ret
= floatx80_compare(ST0
, FT0
, &env
->fp_status
);
509 eflags
= cpu_cc_compute_all(env
, CC_OP
);
510 eflags
= (eflags
& ~(CC_Z
| CC_P
| CC_C
)) | fcomi_ccval
[ret
+ 1];
512 merge_exception_flags(env
, old_flags
);
515 void helper_fucomi_ST0_FT0(CPUX86State
*env
)
517 uint8_t old_flags
= save_exception_flags(env
);
521 ret
= floatx80_compare_quiet(ST0
, FT0
, &env
->fp_status
);
522 eflags
= cpu_cc_compute_all(env
, CC_OP
);
523 eflags
= (eflags
& ~(CC_Z
| CC_P
| CC_C
)) | fcomi_ccval
[ret
+ 1];
525 merge_exception_flags(env
, old_flags
);
528 void helper_fadd_ST0_FT0(CPUX86State
*env
)
530 uint8_t old_flags
= save_exception_flags(env
);
531 ST0
= floatx80_add(ST0
, FT0
, &env
->fp_status
);
532 merge_exception_flags(env
, old_flags
);
535 void helper_fmul_ST0_FT0(CPUX86State
*env
)
537 uint8_t old_flags
= save_exception_flags(env
);
538 ST0
= floatx80_mul(ST0
, FT0
, &env
->fp_status
);
539 merge_exception_flags(env
, old_flags
);
542 void helper_fsub_ST0_FT0(CPUX86State
*env
)
544 uint8_t old_flags
= save_exception_flags(env
);
545 ST0
= floatx80_sub(ST0
, FT0
, &env
->fp_status
);
546 merge_exception_flags(env
, old_flags
);
549 void helper_fsubr_ST0_FT0(CPUX86State
*env
)
551 uint8_t old_flags
= save_exception_flags(env
);
552 ST0
= floatx80_sub(FT0
, ST0
, &env
->fp_status
);
553 merge_exception_flags(env
, old_flags
);
556 void helper_fdiv_ST0_FT0(CPUX86State
*env
)
558 ST0
= helper_fdiv(env
, ST0
, FT0
);
561 void helper_fdivr_ST0_FT0(CPUX86State
*env
)
563 ST0
= helper_fdiv(env
, FT0
, ST0
);
566 /* fp operations between STN and ST0 */
568 void helper_fadd_STN_ST0(CPUX86State
*env
, int st_index
)
570 uint8_t old_flags
= save_exception_flags(env
);
571 ST(st_index
) = floatx80_add(ST(st_index
), ST0
, &env
->fp_status
);
572 merge_exception_flags(env
, old_flags
);
575 void helper_fmul_STN_ST0(CPUX86State
*env
, int st_index
)
577 uint8_t old_flags
= save_exception_flags(env
);
578 ST(st_index
) = floatx80_mul(ST(st_index
), ST0
, &env
->fp_status
);
579 merge_exception_flags(env
, old_flags
);
582 void helper_fsub_STN_ST0(CPUX86State
*env
, int st_index
)
584 uint8_t old_flags
= save_exception_flags(env
);
585 ST(st_index
) = floatx80_sub(ST(st_index
), ST0
, &env
->fp_status
);
586 merge_exception_flags(env
, old_flags
);
589 void helper_fsubr_STN_ST0(CPUX86State
*env
, int st_index
)
591 uint8_t old_flags
= save_exception_flags(env
);
592 ST(st_index
) = floatx80_sub(ST0
, ST(st_index
), &env
->fp_status
);
593 merge_exception_flags(env
, old_flags
);
596 void helper_fdiv_STN_ST0(CPUX86State
*env
, int st_index
)
601 *p
= helper_fdiv(env
, *p
, ST0
);
604 void helper_fdivr_STN_ST0(CPUX86State
*env
, int st_index
)
609 *p
= helper_fdiv(env
, ST0
, *p
);
612 /* misc FPU operations */
613 void helper_fchs_ST0(CPUX86State
*env
)
615 ST0
= floatx80_chs(ST0
);
618 void helper_fabs_ST0(CPUX86State
*env
)
620 ST0
= floatx80_abs(ST0
);
623 void helper_fld1_ST0(CPUX86State
*env
)
628 void helper_fldl2t_ST0(CPUX86State
*env
)
630 switch (env
->fpuc
& FPU_RC_MASK
) {
632 ST0
= floatx80_l2t_u
;
640 void helper_fldl2e_ST0(CPUX86State
*env
)
642 switch (env
->fpuc
& FPU_RC_MASK
) {
645 ST0
= floatx80_l2e_d
;
653 void helper_fldpi_ST0(CPUX86State
*env
)
655 switch (env
->fpuc
& FPU_RC_MASK
) {
666 void helper_fldlg2_ST0(CPUX86State
*env
)
668 switch (env
->fpuc
& FPU_RC_MASK
) {
671 ST0
= floatx80_lg2_d
;
679 void helper_fldln2_ST0(CPUX86State
*env
)
681 switch (env
->fpuc
& FPU_RC_MASK
) {
684 ST0
= floatx80_ln2_d
;
692 void helper_fldz_ST0(CPUX86State
*env
)
697 void helper_fldz_FT0(CPUX86State
*env
)
702 uint32_t helper_fnstsw(CPUX86State
*env
)
704 return (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
707 uint32_t helper_fnstcw(CPUX86State
*env
)
712 void update_fp_status(CPUX86State
*env
)
716 /* set rounding mode */
717 switch (env
->fpuc
& FPU_RC_MASK
) {
720 rnd_type
= float_round_nearest_even
;
723 rnd_type
= float_round_down
;
726 rnd_type
= float_round_up
;
729 rnd_type
= float_round_to_zero
;
732 set_float_rounding_mode(rnd_type
, &env
->fp_status
);
733 switch ((env
->fpuc
>> 8) & 3) {
745 set_floatx80_rounding_precision(rnd_type
, &env
->fp_status
);
748 void helper_fldcw(CPUX86State
*env
, uint32_t val
)
750 cpu_set_fpuc(env
, val
);
753 void helper_fclex(CPUX86State
*env
)
758 void helper_fwait(CPUX86State
*env
)
760 if (env
->fpus
& FPUS_SE
) {
761 fpu_raise_exception(env
, GETPC());
765 void helper_fninit(CPUX86State
*env
)
769 cpu_set_fpuc(env
, 0x37f);
782 void helper_fbld_ST0(CPUX86State
*env
, target_ulong ptr
)
790 for (i
= 8; i
>= 0; i
--) {
791 v
= cpu_ldub_data_ra(env
, ptr
+ i
, GETPC());
792 val
= (val
* 100) + ((v
>> 4) * 10) + (v
& 0xf);
794 tmp
= int64_to_floatx80(val
, &env
->fp_status
);
795 if (cpu_ldub_data_ra(env
, ptr
+ 9, GETPC()) & 0x80) {
796 tmp
= floatx80_chs(tmp
);
802 void helper_fbst_ST0(CPUX86State
*env
, target_ulong ptr
)
804 uint8_t old_flags
= save_exception_flags(env
);
806 target_ulong mem_ref
, mem_end
;
812 val
= floatx80_to_int64(ST0
, &env
->fp_status
);
814 if (val
>= 1000000000000000000LL || val
<= -1000000000000000000LL) {
815 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
816 while (mem_ref
< ptr
+ 7) {
817 cpu_stb_data_ra(env
, mem_ref
++, 0, GETPC());
819 cpu_stb_data_ra(env
, mem_ref
++, 0xc0, GETPC());
820 cpu_stb_data_ra(env
, mem_ref
++, 0xff, GETPC());
821 cpu_stb_data_ra(env
, mem_ref
++, 0xff, GETPC());
822 merge_exception_flags(env
, old_flags
);
825 mem_end
= mem_ref
+ 9;
827 cpu_stb_data_ra(env
, mem_end
, 0x80, GETPC());
830 cpu_stb_data_ra(env
, mem_end
, 0x00, GETPC());
832 while (mem_ref
< mem_end
) {
838 v
= ((v
/ 10) << 4) | (v
% 10);
839 cpu_stb_data_ra(env
, mem_ref
++, v
, GETPC());
841 while (mem_ref
< mem_end
) {
842 cpu_stb_data_ra(env
, mem_ref
++, 0, GETPC());
844 merge_exception_flags(env
, old_flags
);
847 /* 128-bit significand of log(2). */
848 #define ln2_sig_high 0xb17217f7d1cf79abULL
849 #define ln2_sig_low 0xc9e3b39803f2f6afULL
852 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
853 * the interval [-1/64, 1/64].
855 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
856 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
857 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
858 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
859 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
860 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
861 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
862 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
863 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
867 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
868 * are very close to exact floatx80 values.
871 /* The value of 2^t. */
873 /* The value of 2^t - 1. */
877 static const struct f2xm1_data f2xm1_table
[65] = {
878 { make_floatx80_init(0xbfff, 0x8000000000000000ULL
),
879 make_floatx80_init(0x3ffe, 0x8000000000000000ULL
),
880 make_floatx80_init(0xbffe, 0x8000000000000000ULL
) },
881 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL
),
882 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL
),
883 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL
) },
884 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL
),
885 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL
),
886 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL
) },
887 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL
),
888 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL
),
889 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL
) },
890 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL
),
891 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL
),
892 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL
) },
893 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL
),
894 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL
),
895 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL
) },
896 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL
),
897 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL
),
898 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL
) },
899 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL
),
900 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL
),
901 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL
) },
902 { make_floatx80_init(0xbffe, 0xc000000000006530ULL
),
903 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL
),
904 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL
) },
905 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL
),
906 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL
),
907 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL
) },
908 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL
),
909 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL
),
910 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL
) },
911 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL
),
912 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL
),
913 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL
) },
914 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL
),
915 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL
),
916 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL
) },
917 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL
),
918 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL
),
919 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL
) },
920 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL
),
921 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL
),
922 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL
) },
923 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL
),
924 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL
),
925 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL
) },
926 { make_floatx80_init(0xbffe, 0x800000000000227dULL
),
927 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL
),
928 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL
) },
929 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL
),
930 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL
),
931 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL
) },
932 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL
),
933 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL
),
934 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL
) },
935 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL
),
936 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL
),
937 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL
) },
938 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL
),
939 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL
),
940 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL
) },
941 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL
),
942 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL
),
943 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL
) },
944 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL
),
945 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL
),
946 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL
) },
947 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL
),
948 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL
),
949 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL
) },
950 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL
),
951 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL
),
952 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL
) },
953 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL
),
954 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL
),
955 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL
) },
956 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL
),
957 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL
),
958 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL
) },
959 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL
),
960 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL
),
961 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL
) },
962 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL
),
963 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL
),
964 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL
) },
965 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL
),
966 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL
),
967 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL
) },
968 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL
),
969 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL
),
970 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL
) },
971 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL
),
972 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL
),
973 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL
) },
974 { floatx80_zero_init
,
975 make_floatx80_init(0x3fff, 0x8000000000000000ULL
),
976 floatx80_zero_init
},
977 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL
),
978 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL
),
979 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL
) },
980 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL
),
981 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL
),
982 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL
) },
983 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL
),
984 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL
),
985 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL
) },
986 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL
),
987 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL
),
988 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL
) },
989 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL
),
990 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL
),
991 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL
) },
992 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL
),
993 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL
),
994 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL
) },
995 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL
),
996 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL
),
997 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL
) },
998 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL
),
999 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL
),
1000 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL
) },
1001 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL
),
1002 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL
),
1003 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL
) },
1004 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL
),
1005 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL
),
1006 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL
) },
1007 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL
),
1008 make_floatx80_init(0x3fff, 0xa27043030c49370aULL
),
1009 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL
) },
1010 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL
),
1011 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL
),
1012 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL
) },
1013 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL
),
1014 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL
),
1015 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL
) },
1016 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL
),
1017 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL
),
1018 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL
) },
1019 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL
),
1020 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL
),
1021 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL
) },
1022 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL
),
1023 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL
),
1024 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL
) },
1025 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL
),
1026 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL
),
1027 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL
) },
1028 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL
),
1029 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL
),
1030 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL
) },
1031 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL
),
1032 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL
),
1033 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL
) },
1034 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL
),
1035 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL
),
1036 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL
) },
1037 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL
),
1038 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL
),
1039 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL
) },
1040 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL
),
1041 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL
),
1042 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL
) },
1043 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL
),
1044 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL
),
1045 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL
) },
1046 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL
),
1047 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL
),
1048 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL
) },
1049 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL
),
1050 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL
),
1051 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL
) },
1052 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL
),
1053 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL
),
1054 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL
) },
1055 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL
),
1056 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL
),
1057 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL
) },
1058 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL
),
1059 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL
),
1060 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL
) },
1061 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL
),
1062 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL
),
1063 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL
) },
1064 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL
),
1065 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL
),
1066 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL
) },
1067 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL
),
1068 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL
),
1069 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL
) },
1070 { make_floatx80_init(0x3fff, 0x8000000000000000ULL
),
1071 make_floatx80_init(0x4000, 0x8000000000000000ULL
),
1072 make_floatx80_init(0x3fff, 0x8000000000000000ULL
) },
1075 void helper_f2xm1(CPUX86State
*env
)
1077 uint8_t old_flags
= save_exception_flags(env
);
1078 uint64_t sig
= extractFloatx80Frac(ST0
);
1079 int32_t exp
= extractFloatx80Exp(ST0
);
1080 bool sign
= extractFloatx80Sign(ST0
);
1082 if (floatx80_invalid_encoding(ST0
)) {
1083 float_raise(float_flag_invalid
, &env
->fp_status
);
1084 ST0
= floatx80_default_nan(&env
->fp_status
);
1085 } else if (floatx80_is_any_nan(ST0
)) {
1086 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1087 float_raise(float_flag_invalid
, &env
->fp_status
);
1088 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1090 } else if (exp
> 0x3fff ||
1091 (exp
== 0x3fff && sig
!= (0x8000000000000000ULL
))) {
1092 /* Out of range for the instruction, treat as invalid. */
1093 float_raise(float_flag_invalid
, &env
->fp_status
);
1094 ST0
= floatx80_default_nan(&env
->fp_status
);
1095 } else if (exp
== 0x3fff) {
1096 /* Argument 1 or -1, exact result 1 or -0.5. */
1098 ST0
= make_floatx80(0xbffe, 0x8000000000000000ULL
);
1100 } else if (exp
< 0x3fb0) {
1101 if (!floatx80_is_zero(ST0
)) {
1103 * Multiplying the argument by an extra-precision version
1104 * of log(2) is sufficiently precise. Zero arguments are
1105 * returned unchanged.
1107 uint64_t sig0
, sig1
, sig2
;
1109 normalizeFloatx80Subnormal(sig
, &exp
, &sig
);
1111 mul128By64To192(ln2_sig_high
, ln2_sig_low
, sig
, &sig0
, &sig1
,
1113 /* This result is inexact. */
1115 ST0
= normalizeRoundAndPackFloatx80(80, sign
, exp
, sig0
, sig1
,
1119 floatx80 tmp
, y
, accum
;
1121 int32_t n
, aexp
, bexp
;
1122 uint64_t asig0
, asig1
, asig2
, bsig0
, bsig1
;
1123 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
1124 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
1125 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
1126 env
->fp_status
.floatx80_rounding_precision
= 80;
1128 /* Find the nearest multiple of 1/32 to the argument. */
1129 tmp
= floatx80_scalbn(ST0
, 5, &env
->fp_status
);
1130 n
= 32 + floatx80_to_int32(tmp
, &env
->fp_status
);
1131 y
= floatx80_sub(ST0
, f2xm1_table
[n
].t
, &env
->fp_status
);
1133 if (floatx80_is_zero(y
)) {
1135 * Use the value of 2^t - 1 from the table, to avoid
1136 * needing to special-case zero as a result of
1137 * multiplication below.
1139 ST0
= f2xm1_table
[n
].t
;
1140 set_float_exception_flags(float_flag_inexact
, &env
->fp_status
);
1141 env
->fp_status
.float_rounding_mode
= save_mode
;
1144 * Compute the lower parts of a polynomial expansion for
1147 accum
= floatx80_mul(f2xm1_coeff_7
, y
, &env
->fp_status
);
1148 accum
= floatx80_add(f2xm1_coeff_6
, accum
, &env
->fp_status
);
1149 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1150 accum
= floatx80_add(f2xm1_coeff_5
, accum
, &env
->fp_status
);
1151 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1152 accum
= floatx80_add(f2xm1_coeff_4
, accum
, &env
->fp_status
);
1153 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1154 accum
= floatx80_add(f2xm1_coeff_3
, accum
, &env
->fp_status
);
1155 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1156 accum
= floatx80_add(f2xm1_coeff_2
, accum
, &env
->fp_status
);
1157 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1158 accum
= floatx80_add(f2xm1_coeff_1
, accum
, &env
->fp_status
);
1159 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1160 accum
= floatx80_add(f2xm1_coeff_0_low
, accum
, &env
->fp_status
);
1163 * The full polynomial expansion is f2xm1_coeff_0 + accum
1164 * (where accum has much lower magnitude, and so, in
1165 * particular, carry out of the addition is not possible).
1166 * (This expansion is only accurate to about 70 bits, not
1169 aexp
= extractFloatx80Exp(f2xm1_coeff_0
);
1170 asign
= extractFloatx80Sign(f2xm1_coeff_0
);
1171 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1172 aexp
- extractFloatx80Exp(accum
),
1174 bsig0
= extractFloatx80Frac(f2xm1_coeff_0
);
1176 if (asign
== extractFloatx80Sign(accum
)) {
1177 add128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1179 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1181 /* And thus compute an approximation to 2^y - 1. */
1182 mul128By64To192(asig0
, asig1
, extractFloatx80Frac(y
),
1183 &asig0
, &asig1
, &asig2
);
1184 aexp
+= extractFloatx80Exp(y
) - 0x3ffe;
1185 asign
^= extractFloatx80Sign(y
);
1188 * Multiply this by the precomputed value of 2^t and
1189 * add that of 2^t - 1.
1191 mul128By64To192(asig0
, asig1
,
1192 extractFloatx80Frac(f2xm1_table
[n
].exp2
),
1193 &asig0
, &asig1
, &asig2
);
1194 aexp
+= extractFloatx80Exp(f2xm1_table
[n
].exp2
) - 0x3ffe;
1195 bexp
= extractFloatx80Exp(f2xm1_table
[n
].exp2m1
);
1196 bsig0
= extractFloatx80Frac(f2xm1_table
[n
].exp2m1
);
1199 shift128RightJamming(bsig0
, bsig1
, aexp
- bexp
,
1201 } else if (aexp
< bexp
) {
1202 shift128RightJamming(asig0
, asig1
, bexp
- aexp
,
1206 /* The sign of 2^t - 1 is always that of the result. */
1207 bsign
= extractFloatx80Sign(f2xm1_table
[n
].exp2m1
);
1208 if (asign
== bsign
) {
1209 /* Avoid possible carry out of the addition. */
1210 shift128RightJamming(asig0
, asig1
, 1,
1212 shift128RightJamming(bsig0
, bsig1
, 1,
1215 add128(asig0
, asig1
, bsig0
, bsig1
, &asig0
, &asig1
);
1217 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1221 env
->fp_status
.float_rounding_mode
= save_mode
;
1222 /* This result is inexact. */
1224 ST0
= normalizeRoundAndPackFloatx80(80, asign
, aexp
, asig0
, asig1
,
1228 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1230 merge_exception_flags(env
, old_flags
);
1233 void helper_fptan(CPUX86State
*env
)
1235 double fptemp
= floatx80_to_double(env
, ST0
);
1237 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
1240 fptemp
= tan(fptemp
);
1241 ST0
= double_to_floatx80(env
, fptemp
);
1244 env
->fpus
&= ~0x400; /* C2 <-- 0 */
1245 /* the above code is for |arg| < 2**52 only */
1249 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1250 #define pi_4_exp 0x3ffe
1251 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1252 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1253 #define pi_2_exp 0x3fff
1254 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1255 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1256 #define pi_34_exp 0x4000
1257 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1258 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1259 #define pi_exp 0x4000
1260 #define pi_sig_high 0xc90fdaa22168c234ULL
1261 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1264 * Polynomial coefficients for an approximation to atan(x), with only
1265 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1266 * for some other approximations, no low part is needed for the first
1267 * coefficient here to achieve a sufficiently accurate result, because
1268 * the coefficient in this minimax approximation is very close to
1271 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1272 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1273 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1274 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1275 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1276 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1277 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1279 struct fpatan_data
{
1280 /* High and low parts of atan(x). */
1281 floatx80 atan_high
, atan_low
;
1284 static const struct fpatan_data fpatan_table
[9] = {
1285 { floatx80_zero_init
,
1286 floatx80_zero_init
},
1287 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL
),
1288 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL
) },
1289 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL
),
1290 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL
) },
1291 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL
),
1292 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL
) },
1293 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL
),
1294 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL
) },
1295 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL
),
1296 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL
) },
1297 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL
),
1298 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL
) },
1299 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL
),
1300 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL
) },
1301 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL
),
1302 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL
) },
1305 void helper_fpatan(CPUX86State
*env
)
1307 uint8_t old_flags
= save_exception_flags(env
);
1308 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
1309 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
1310 bool arg0_sign
= extractFloatx80Sign(ST0
);
1311 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
1312 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
1313 bool arg1_sign
= extractFloatx80Sign(ST1
);
1315 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1316 float_raise(float_flag_invalid
, &env
->fp_status
);
1317 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1318 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
1319 float_raise(float_flag_invalid
, &env
->fp_status
);
1320 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
1321 } else if (floatx80_invalid_encoding(ST0
) ||
1322 floatx80_invalid_encoding(ST1
)) {
1323 float_raise(float_flag_invalid
, &env
->fp_status
);
1324 ST1
= floatx80_default_nan(&env
->fp_status
);
1325 } else if (floatx80_is_any_nan(ST0
)) {
1327 } else if (floatx80_is_any_nan(ST1
)) {
1328 /* Pass this NaN through. */
1329 } else if (floatx80_is_zero(ST1
) && !arg0_sign
) {
1330 /* Pass this zero through. */
1331 } else if (((floatx80_is_infinity(ST0
) && !floatx80_is_infinity(ST1
)) ||
1332 arg0_exp
- arg1_exp
>= 80) &&
1335 * Dividing ST1 by ST0 gives the correct result up to
1336 * rounding, and avoids spurious underflow exceptions that
1337 * might result from passing some small values through the
1338 * polynomial approximation, but if a finite nonzero result of
1339 * division is exact, the result of fpatan is still inexact
1340 * (and underflowing where appropriate).
1342 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
1343 env
->fp_status
.floatx80_rounding_precision
= 80;
1344 ST1
= floatx80_div(ST1
, ST0
, &env
->fp_status
);
1345 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1346 if (!floatx80_is_zero(ST1
) &&
1347 !(get_float_exception_flags(&env
->fp_status
) &
1348 float_flag_inexact
)) {
1350 * The mathematical result is very slightly closer to zero
1351 * than this exact result. Round a value with the
1352 * significand adjusted accordingly to get the correct
1353 * exceptions, and possibly an adjusted result depending
1354 * on the rounding mode.
1356 uint64_t sig
= extractFloatx80Frac(ST1
);
1357 int32_t exp
= extractFloatx80Exp(ST1
);
1358 bool sign
= extractFloatx80Sign(ST1
);
1360 normalizeFloatx80Subnormal(sig
, &exp
, &sig
);
1362 ST1
= normalizeRoundAndPackFloatx80(80, sign
, exp
, sig
- 1,
1363 -1, &env
->fp_status
);
1366 /* The result is inexact. */
1367 bool rsign
= arg1_sign
;
1369 uint64_t rsig0
, rsig1
;
1370 if (floatx80_is_zero(ST1
)) {
1372 * ST0 is negative. The result is pi with the sign of
1376 rsig0
= pi_sig_high
;
1378 } else if (floatx80_is_infinity(ST1
)) {
1379 if (floatx80_is_infinity(ST0
)) {
1382 rsig0
= pi_34_sig_high
;
1383 rsig1
= pi_34_sig_low
;
1386 rsig0
= pi_4_sig_high
;
1387 rsig1
= pi_4_sig_low
;
1391 rsig0
= pi_2_sig_high
;
1392 rsig1
= pi_2_sig_low
;
1394 } else if (floatx80_is_zero(ST0
) || arg1_exp
- arg0_exp
>= 80) {
1396 rsig0
= pi_2_sig_high
;
1397 rsig1
= pi_2_sig_low
;
1398 } else if (floatx80_is_infinity(ST0
) || arg0_exp
- arg1_exp
>= 80) {
1399 /* ST0 is negative. */
1401 rsig0
= pi_sig_high
;
1405 * ST0 and ST1 are finite, nonzero and with exponents not
1408 int32_t adj_exp
, num_exp
, den_exp
, xexp
, yexp
, n
, texp
, zexp
, aexp
;
1409 int32_t azexp
, axexp
;
1410 bool adj_sub
, ysign
, zsign
;
1411 uint64_t adj_sig0
, adj_sig1
, num_sig
, den_sig
, xsig0
, xsig1
;
1412 uint64_t msig0
, msig1
, msig2
, remsig0
, remsig1
, remsig2
;
1413 uint64_t ysig0
, ysig1
, tsig
, zsig0
, zsig1
, asig0
, asig1
;
1414 uint64_t azsig0
, azsig1
;
1415 uint64_t azsig2
, azsig3
, axsig0
, axsig1
;
1417 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
1418 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
1419 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
1420 env
->fp_status
.floatx80_rounding_precision
= 80;
1422 if (arg0_exp
== 0) {
1423 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
1425 if (arg1_exp
== 0) {
1426 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
1428 if (arg0_exp
> arg1_exp
||
1429 (arg0_exp
== arg1_exp
&& arg0_sig
>= arg1_sig
)) {
1430 /* Work with abs(ST1) / abs(ST0). */
1436 /* The result is subtracted from pi. */
1438 adj_sig0
= pi_sig_high
;
1439 adj_sig1
= pi_sig_low
;
1442 /* The result is used as-is. */
1449 /* Work with abs(ST0) / abs(ST1). */
1454 /* The result is added to or subtracted from pi/2. */
1456 adj_sig0
= pi_2_sig_high
;
1457 adj_sig1
= pi_2_sig_low
;
1458 adj_sub
= !arg0_sign
;
1462 * Compute x = num/den, where 0 < x <= 1 and x is not too
1465 xexp
= num_exp
- den_exp
+ 0x3ffe;
1468 if (den_sig
<= remsig0
) {
1469 shift128Right(remsig0
, remsig1
, 1, &remsig0
, &remsig1
);
1472 xsig0
= estimateDiv128To64(remsig0
, remsig1
, den_sig
);
1473 mul64To128(den_sig
, xsig0
, &msig0
, &msig1
);
1474 sub128(remsig0
, remsig1
, msig0
, msig1
, &remsig0
, &remsig1
);
1475 while ((int64_t) remsig0
< 0) {
1477 add128(remsig0
, remsig1
, 0, den_sig
, &remsig0
, &remsig1
);
1479 xsig1
= estimateDiv128To64(remsig1
, 0, den_sig
);
1481 * No need to correct any estimation error in xsig1; even
1482 * with such error, it is accurate enough.
1486 * Split x as x = t + y, where t = n/8 is the nearest
1487 * multiple of 1/8 to x.
1489 x8
= normalizeRoundAndPackFloatx80(80, false, xexp
+ 3, xsig0
,
1490 xsig1
, &env
->fp_status
);
1491 n
= floatx80_to_int32(x8
, &env
->fp_status
);
1500 int shift
= clz32(n
) + 32;
1501 texp
= 0x403b - shift
;
1505 sub128(xsig0
, xsig1
, tsig
, 0, &ysig0
, &ysig1
);
1506 if ((int64_t) ysig0
>= 0) {
1512 shift
= clz64(ysig1
) + 64;
1513 yexp
= xexp
- shift
;
1514 shift128Left(ysig0
, ysig1
, shift
,
1518 shift
= clz64(ysig0
);
1519 yexp
= xexp
- shift
;
1520 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1524 sub128(0, 0, ysig0
, ysig1
, &ysig0
, &ysig1
);
1526 shift
= clz64(ysig1
) + 64;
1528 shift
= clz64(ysig0
);
1530 yexp
= xexp
- shift
;
1531 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1535 * t's exponent must be greater than x's because t
1536 * is positive and the nearest multiple of 1/8 to
1537 * x, and if x has a greater exponent, the power
1538 * of 2 with that exponent is also a multiple of
1541 uint64_t usig0
, usig1
;
1542 shift128RightJamming(xsig0
, xsig1
, texp
- xexp
,
1545 sub128(tsig
, 0, usig0
, usig1
, &ysig0
, &ysig1
);
1547 shift
= clz64(ysig1
) + 64;
1549 shift
= clz64(ysig0
);
1551 yexp
= texp
- shift
;
1552 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1557 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1561 if (texp
== 0 || yexp
== 0) {
1567 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1569 int32_t dexp
= texp
+ xexp
- 0x3ffe;
1570 uint64_t dsig0
, dsig1
, dsig2
;
1571 mul128By64To192(xsig0
, xsig1
, tsig
, &dsig0
, &dsig1
, &dsig2
);
1573 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1574 * bit). Add 1 to produce the denominator 1+tx.
1576 shift128RightJamming(dsig0
, dsig1
, 0x3fff - dexp
,
1578 dsig0
|= 0x8000000000000000ULL
;
1583 if (dsig0
<= remsig0
) {
1584 shift128Right(remsig0
, remsig1
, 1, &remsig0
, &remsig1
);
1587 zsig0
= estimateDiv128To64(remsig0
, remsig1
, dsig0
);
1588 mul128By64To192(dsig0
, dsig1
, zsig0
, &msig0
, &msig1
, &msig2
);
1589 sub192(remsig0
, remsig1
, remsig2
, msig0
, msig1
, msig2
,
1590 &remsig0
, &remsig1
, &remsig2
);
1591 while ((int64_t) remsig0
< 0) {
1593 add192(remsig0
, remsig1
, remsig2
, 0, dsig0
, dsig1
,
1594 &remsig0
, &remsig1
, &remsig2
);
1596 zsig1
= estimateDiv128To64(remsig1
, remsig2
, dsig0
);
1597 /* No need to correct any estimation error in zsig1. */
1606 uint64_t z2sig0
, z2sig1
, z2sig2
, z2sig3
;
1608 mul128To256(zsig0
, zsig1
, zsig0
, zsig1
,
1609 &z2sig0
, &z2sig1
, &z2sig2
, &z2sig3
);
1610 z2
= normalizeRoundAndPackFloatx80(80, false,
1611 zexp
+ zexp
- 0x3ffe,
1615 /* Compute the lower parts of the polynomial expansion. */
1616 accum
= floatx80_mul(fpatan_coeff_6
, z2
, &env
->fp_status
);
1617 accum
= floatx80_add(fpatan_coeff_5
, accum
, &env
->fp_status
);
1618 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1619 accum
= floatx80_add(fpatan_coeff_4
, accum
, &env
->fp_status
);
1620 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1621 accum
= floatx80_add(fpatan_coeff_3
, accum
, &env
->fp_status
);
1622 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1623 accum
= floatx80_add(fpatan_coeff_2
, accum
, &env
->fp_status
);
1624 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1625 accum
= floatx80_add(fpatan_coeff_1
, accum
, &env
->fp_status
);
1626 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1629 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1630 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1632 aexp
= extractFloatx80Exp(fpatan_coeff_0
);
1633 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1634 aexp
- extractFloatx80Exp(accum
),
1636 sub128(extractFloatx80Frac(fpatan_coeff_0
), 0, asig0
, asig1
,
1638 /* Multiply by z to compute arctan(z). */
1639 azexp
= aexp
+ zexp
- 0x3ffe;
1640 mul128To256(asig0
, asig1
, zsig0
, zsig1
, &azsig0
, &azsig1
,
1644 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1646 /* z is positive. */
1651 bool low_sign
= extractFloatx80Sign(fpatan_table
[n
].atan_low
);
1652 int32_t low_exp
= extractFloatx80Exp(fpatan_table
[n
].atan_low
);
1654 extractFloatx80Frac(fpatan_table
[n
].atan_low
);
1655 uint64_t low_sig1
= 0;
1656 axexp
= extractFloatx80Exp(fpatan_table
[n
].atan_high
);
1657 axsig0
= extractFloatx80Frac(fpatan_table
[n
].atan_high
);
1659 shift128RightJamming(low_sig0
, low_sig1
, axexp
- low_exp
,
1660 &low_sig0
, &low_sig1
);
1662 sub128(axsig0
, axsig1
, low_sig0
, low_sig1
,
1665 add128(axsig0
, axsig1
, low_sig0
, low_sig1
,
1668 if (azexp
>= axexp
) {
1669 shift128RightJamming(axsig0
, axsig1
, azexp
- axexp
+ 1,
1672 shift128RightJamming(azsig0
, azsig1
, 1,
1675 shift128RightJamming(axsig0
, axsig1
, 1,
1677 shift128RightJamming(azsig0
, azsig1
, axexp
- azexp
+ 1,
1682 sub128(axsig0
, axsig1
, azsig0
, azsig1
,
1685 add128(axsig0
, axsig1
, azsig0
, azsig1
,
1696 * Add or subtract arctan(x) (exponent axexp,
1697 * significand axsig0 and axsig1, positive, not
1698 * necessarily normalized) to the number given by
1699 * adj_exp, adj_sig0 and adj_sig1, according to
1702 if (adj_exp
>= axexp
) {
1703 shift128RightJamming(axsig0
, axsig1
, adj_exp
- axexp
+ 1,
1706 shift128RightJamming(adj_sig0
, adj_sig1
, 1,
1707 &adj_sig0
, &adj_sig1
);
1709 shift128RightJamming(axsig0
, axsig1
, 1,
1711 shift128RightJamming(adj_sig0
, adj_sig1
,
1712 axexp
- adj_exp
+ 1,
1713 &adj_sig0
, &adj_sig1
);
1717 sub128(adj_sig0
, adj_sig1
, axsig0
, axsig1
,
1720 add128(adj_sig0
, adj_sig1
, axsig0
, axsig1
,
1725 env
->fp_status
.float_rounding_mode
= save_mode
;
1726 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1728 /* This result is inexact. */
1730 ST1
= normalizeRoundAndPackFloatx80(80, rsign
, rexp
,
1731 rsig0
, rsig1
, &env
->fp_status
);
1735 merge_exception_flags(env
, old_flags
);
1738 void helper_fxtract(CPUX86State
*env
)
1740 uint8_t old_flags
= save_exception_flags(env
);
1745 if (floatx80_is_zero(ST0
)) {
1746 /* Easy way to generate -inf and raising division by 0 exception */
1747 ST0
= floatx80_div(floatx80_chs(floatx80_one
), floatx80_zero
,
1751 } else if (floatx80_invalid_encoding(ST0
)) {
1752 float_raise(float_flag_invalid
, &env
->fp_status
);
1753 ST0
= floatx80_default_nan(&env
->fp_status
);
1756 } else if (floatx80_is_any_nan(ST0
)) {
1757 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1758 float_raise(float_flag_invalid
, &env
->fp_status
);
1759 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1763 } else if (floatx80_is_infinity(ST0
)) {
1766 ST1
= floatx80_infinity
;
1770 if (EXPD(temp
) == 0) {
1771 int shift
= clz64(temp
.l
.lower
);
1772 temp
.l
.lower
<<= shift
;
1773 expdif
= 1 - EXPBIAS
- shift
;
1774 float_raise(float_flag_input_denormal
, &env
->fp_status
);
1776 expdif
= EXPD(temp
) - EXPBIAS
;
1778 /* DP exponent bias */
1779 ST0
= int32_to_floatx80(expdif
, &env
->fp_status
);
1784 merge_exception_flags(env
, old_flags
);
1787 static void helper_fprem_common(CPUX86State
*env
, bool mod
)
1789 uint8_t old_flags
= save_exception_flags(env
);
1791 CPU_LDoubleU temp0
, temp1
;
1792 int exp0
, exp1
, expdiff
;
1799 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1800 if (floatx80_is_zero(ST0
) || floatx80_is_zero(ST1
) ||
1801 exp0
== 0x7fff || exp1
== 0x7fff ||
1802 floatx80_invalid_encoding(ST0
) || floatx80_invalid_encoding(ST1
)) {
1803 ST0
= floatx80_modrem(ST0
, ST1
, mod
, "ient
, &env
->fp_status
);
1806 exp0
= 1 - clz64(temp0
.l
.lower
);
1809 exp1
= 1 - clz64(temp1
.l
.lower
);
1811 expdiff
= exp0
- exp1
;
1813 ST0
= floatx80_modrem(ST0
, ST1
, mod
, "ient
, &env
->fp_status
);
1814 env
->fpus
|= (quotient
& 0x4) << (8 - 2); /* (C0) <-- q2 */
1815 env
->fpus
|= (quotient
& 0x2) << (14 - 1); /* (C3) <-- q1 */
1816 env
->fpus
|= (quotient
& 0x1) << (9 - 0); /* (C1) <-- q0 */
1819 * Partial remainder. This choice of how many bits to
1820 * process at once is specified in AMD instruction set
1821 * manuals, and empirically is followed by Intel
1822 * processors as well; it ensures that the final remainder
1823 * operation in a loop does produce the correct low three
1824 * bits of the quotient. AMD manuals specify that the
1825 * flags other than C2 are cleared, and empirically Intel
1826 * processors clear them as well.
1828 int n
= 32 + (expdiff
% 32);
1829 temp1
.d
= floatx80_scalbn(temp1
.d
, expdiff
- n
, &env
->fp_status
);
1830 ST0
= floatx80_mod(ST0
, temp1
.d
, &env
->fp_status
);
1831 env
->fpus
|= 0x400; /* C2 <-- 1 */
1834 merge_exception_flags(env
, old_flags
);
1837 void helper_fprem1(CPUX86State
*env
)
1839 helper_fprem_common(env
, false);
1842 void helper_fprem(CPUX86State
*env
)
1844 helper_fprem_common(env
, true);
1847 /* 128-bit significand of log2(e). */
1848 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1849 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1852 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1853 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1854 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1855 * interval [sqrt(2)/2, sqrt(2)].
1857 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1858 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1859 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1860 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1861 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1862 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1863 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1864 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1865 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1866 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1867 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1870 * Compute an approximation of log2(1+arg), where 1+arg is in the
1871 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1872 * function is called, rounding precision is set to 80 and the
1873 * round-to-nearest mode is in effect. arg must not be exactly zero,
1874 * and must not be so close to zero that underflow might occur.
1876 static void helper_fyl2x_common(CPUX86State
*env
, floatx80 arg
, int32_t *exp
,
1877 uint64_t *sig0
, uint64_t *sig1
)
1879 uint64_t arg0_sig
= extractFloatx80Frac(arg
);
1880 int32_t arg0_exp
= extractFloatx80Exp(arg
);
1881 bool arg0_sign
= extractFloatx80Sign(arg
);
1883 int32_t dexp
, texp
, aexp
;
1884 uint64_t dsig0
, dsig1
, tsig0
, tsig1
, rsig0
, rsig1
, rsig2
;
1885 uint64_t msig0
, msig1
, msig2
, t2sig0
, t2sig1
, t2sig2
, t2sig3
;
1886 uint64_t asig0
, asig1
, asig2
, asig3
, bsig0
, bsig1
;
1890 * Compute an approximation of arg/(2+arg), with extra precision,
1891 * as the argument to a polynomial approximation. The extra
1892 * precision is only needed for the first term of the
1893 * approximation, with subsequent terms being significantly
1894 * smaller; the approximation only uses odd exponents, and the
1895 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1899 shift128RightJamming(arg0_sig
, 0, dexp
- arg0_exp
, &dsig0
, &dsig1
);
1900 sub128(0, 0, dsig0
, dsig1
, &dsig0
, &dsig1
);
1903 shift128RightJamming(arg0_sig
, 0, dexp
- arg0_exp
, &dsig0
, &dsig1
);
1904 dsig0
|= 0x8000000000000000ULL
;
1906 texp
= arg0_exp
- dexp
+ 0x3ffe;
1910 if (dsig0
<= rsig0
) {
1911 shift128Right(rsig0
, rsig1
, 1, &rsig0
, &rsig1
);
1914 tsig0
= estimateDiv128To64(rsig0
, rsig1
, dsig0
);
1915 mul128By64To192(dsig0
, dsig1
, tsig0
, &msig0
, &msig1
, &msig2
);
1916 sub192(rsig0
, rsig1
, rsig2
, msig0
, msig1
, msig2
,
1917 &rsig0
, &rsig1
, &rsig2
);
1918 while ((int64_t) rsig0
< 0) {
1920 add192(rsig0
, rsig1
, rsig2
, 0, dsig0
, dsig1
,
1921 &rsig0
, &rsig1
, &rsig2
);
1923 tsig1
= estimateDiv128To64(rsig1
, rsig2
, dsig0
);
1925 * No need to correct any estimation error in tsig1; even with
1926 * such error, it is accurate enough. Now compute the square of
1927 * that approximation.
1929 mul128To256(tsig0
, tsig1
, tsig0
, tsig1
,
1930 &t2sig0
, &t2sig1
, &t2sig2
, &t2sig3
);
1931 t2
= normalizeRoundAndPackFloatx80(80, false, texp
+ texp
- 0x3ffe,
1932 t2sig0
, t2sig1
, &env
->fp_status
);
1934 /* Compute the lower parts of the polynomial expansion. */
1935 accum
= floatx80_mul(fyl2x_coeff_9
, t2
, &env
->fp_status
);
1936 accum
= floatx80_add(fyl2x_coeff_8
, accum
, &env
->fp_status
);
1937 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1938 accum
= floatx80_add(fyl2x_coeff_7
, accum
, &env
->fp_status
);
1939 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1940 accum
= floatx80_add(fyl2x_coeff_6
, accum
, &env
->fp_status
);
1941 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1942 accum
= floatx80_add(fyl2x_coeff_5
, accum
, &env
->fp_status
);
1943 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1944 accum
= floatx80_add(fyl2x_coeff_4
, accum
, &env
->fp_status
);
1945 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1946 accum
= floatx80_add(fyl2x_coeff_3
, accum
, &env
->fp_status
);
1947 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1948 accum
= floatx80_add(fyl2x_coeff_2
, accum
, &env
->fp_status
);
1949 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1950 accum
= floatx80_add(fyl2x_coeff_1
, accum
, &env
->fp_status
);
1951 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1952 accum
= floatx80_add(fyl2x_coeff_0_low
, accum
, &env
->fp_status
);
1955 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1956 * accum has much lower magnitude, and so, in particular, carry
1957 * out of the addition is not possible), multiplied by t. (This
1958 * expansion is only accurate to about 70 bits, not 128 bits.)
1960 aexp
= extractFloatx80Exp(fyl2x_coeff_0
);
1961 asign
= extractFloatx80Sign(fyl2x_coeff_0
);
1962 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1963 aexp
- extractFloatx80Exp(accum
),
1965 bsig0
= extractFloatx80Frac(fyl2x_coeff_0
);
1967 if (asign
== extractFloatx80Sign(accum
)) {
1968 add128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1970 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1972 /* Multiply by t to compute the required result. */
1973 mul128To256(asig0
, asig1
, tsig0
, tsig1
,
1974 &asig0
, &asig1
, &asig2
, &asig3
);
1975 aexp
+= texp
- 0x3ffe;
1981 void helper_fyl2xp1(CPUX86State
*env
)
1983 uint8_t old_flags
= save_exception_flags(env
);
1984 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
1985 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
1986 bool arg0_sign
= extractFloatx80Sign(ST0
);
1987 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
1988 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
1989 bool arg1_sign
= extractFloatx80Sign(ST1
);
1991 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1992 float_raise(float_flag_invalid
, &env
->fp_status
);
1993 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1994 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
1995 float_raise(float_flag_invalid
, &env
->fp_status
);
1996 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
1997 } else if (floatx80_invalid_encoding(ST0
) ||
1998 floatx80_invalid_encoding(ST1
)) {
1999 float_raise(float_flag_invalid
, &env
->fp_status
);
2000 ST1
= floatx80_default_nan(&env
->fp_status
);
2001 } else if (floatx80_is_any_nan(ST0
)) {
2003 } else if (floatx80_is_any_nan(ST1
)) {
2004 /* Pass this NaN through. */
2005 } else if (arg0_exp
> 0x3ffd ||
2006 (arg0_exp
== 0x3ffd && arg0_sig
> (arg0_sign
?
2007 0x95f619980c4336f7ULL
:
2008 0xd413cccfe7799211ULL
))) {
2010 * Out of range for the instruction (ST0 must have absolute
2011 * value less than 1 - sqrt(2)/2 = 0.292..., according to
2012 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2013 * to sqrt(2) - 1, which we allow here), treat as invalid.
2015 float_raise(float_flag_invalid
, &env
->fp_status
);
2016 ST1
= floatx80_default_nan(&env
->fp_status
);
2017 } else if (floatx80_is_zero(ST0
) || floatx80_is_zero(ST1
) ||
2018 arg1_exp
== 0x7fff) {
2020 * One argument is zero, or multiplying by infinity; correct
2021 * result is exact and can be obtained by multiplying the
2024 ST1
= floatx80_mul(ST0
, ST1
, &env
->fp_status
);
2025 } else if (arg0_exp
< 0x3fb0) {
2027 * Multiplying both arguments and an extra-precision version
2028 * of log2(e) is sufficiently precise.
2030 uint64_t sig0
, sig1
, sig2
;
2032 if (arg0_exp
== 0) {
2033 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
2035 if (arg1_exp
== 0) {
2036 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2038 mul128By64To192(log2_e_sig_high
, log2_e_sig_low
, arg0_sig
,
2039 &sig0
, &sig1
, &sig2
);
2041 mul128By64To192(sig0
, sig1
, arg1_sig
, &sig0
, &sig1
, &sig2
);
2042 exp
+= arg1_exp
- 0x3ffe;
2043 /* This result is inexact. */
2045 ST1
= normalizeRoundAndPackFloatx80(80, arg0_sign
^ arg1_sign
, exp
,
2046 sig0
, sig1
, &env
->fp_status
);
2049 uint64_t asig0
, asig1
, asig2
;
2050 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
2051 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
2052 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
2053 env
->fp_status
.floatx80_rounding_precision
= 80;
2055 helper_fyl2x_common(env
, ST0
, &aexp
, &asig0
, &asig1
);
2057 * Multiply by the second argument to compute the required
2060 if (arg1_exp
== 0) {
2061 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2063 mul128By64To192(asig0
, asig1
, arg1_sig
, &asig0
, &asig1
, &asig2
);
2064 aexp
+= arg1_exp
- 0x3ffe;
2065 /* This result is inexact. */
2067 env
->fp_status
.float_rounding_mode
= save_mode
;
2068 ST1
= normalizeRoundAndPackFloatx80(80, arg0_sign
^ arg1_sign
, aexp
,
2069 asig0
, asig1
, &env
->fp_status
);
2070 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
2073 merge_exception_flags(env
, old_flags
);
2076 void helper_fyl2x(CPUX86State
*env
)
2078 uint8_t old_flags
= save_exception_flags(env
);
2079 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
2080 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
2081 bool arg0_sign
= extractFloatx80Sign(ST0
);
2082 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
2083 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
2084 bool arg1_sign
= extractFloatx80Sign(ST1
);
2086 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2087 float_raise(float_flag_invalid
, &env
->fp_status
);
2088 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2089 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
2090 float_raise(float_flag_invalid
, &env
->fp_status
);
2091 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
2092 } else if (floatx80_invalid_encoding(ST0
) ||
2093 floatx80_invalid_encoding(ST1
)) {
2094 float_raise(float_flag_invalid
, &env
->fp_status
);
2095 ST1
= floatx80_default_nan(&env
->fp_status
);
2096 } else if (floatx80_is_any_nan(ST0
)) {
2098 } else if (floatx80_is_any_nan(ST1
)) {
2099 /* Pass this NaN through. */
2100 } else if (arg0_sign
&& !floatx80_is_zero(ST0
)) {
2101 float_raise(float_flag_invalid
, &env
->fp_status
);
2102 ST1
= floatx80_default_nan(&env
->fp_status
);
2103 } else if (floatx80_is_infinity(ST1
)) {
2104 FloatRelation cmp
= floatx80_compare(ST0
, floatx80_one
,
2107 case float_relation_less
:
2108 ST1
= floatx80_chs(ST1
);
2110 case float_relation_greater
:
2111 /* Result is infinity of the same sign as ST1. */
2114 float_raise(float_flag_invalid
, &env
->fp_status
);
2115 ST1
= floatx80_default_nan(&env
->fp_status
);
2118 } else if (floatx80_is_infinity(ST0
)) {
2119 if (floatx80_is_zero(ST1
)) {
2120 float_raise(float_flag_invalid
, &env
->fp_status
);
2121 ST1
= floatx80_default_nan(&env
->fp_status
);
2122 } else if (arg1_sign
) {
2123 ST1
= floatx80_chs(ST0
);
2127 } else if (floatx80_is_zero(ST0
)) {
2128 if (floatx80_is_zero(ST1
)) {
2129 float_raise(float_flag_invalid
, &env
->fp_status
);
2130 ST1
= floatx80_default_nan(&env
->fp_status
);
2132 /* Result is infinity with opposite sign to ST1. */
2133 float_raise(float_flag_divbyzero
, &env
->fp_status
);
2134 ST1
= make_floatx80(arg1_sign
? 0x7fff : 0xffff,
2135 0x8000000000000000ULL
);
2137 } else if (floatx80_is_zero(ST1
)) {
2138 if (floatx80_lt(ST0
, floatx80_one
, &env
->fp_status
)) {
2139 ST1
= floatx80_chs(ST1
);
2141 /* Otherwise, ST1 is already the correct result. */
2142 } else if (floatx80_eq(ST0
, floatx80_one
, &env
->fp_status
)) {
2144 ST1
= floatx80_chs(floatx80_zero
);
2146 ST1
= floatx80_zero
;
2151 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
2152 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
2153 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
2154 env
->fp_status
.floatx80_rounding_precision
= 80;
2156 if (arg0_exp
== 0) {
2157 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
2159 if (arg1_exp
== 0) {
2160 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2162 int_exp
= arg0_exp
- 0x3fff;
2163 if (arg0_sig
> 0xb504f333f9de6484ULL
) {
2166 arg0_m1
= floatx80_sub(floatx80_scalbn(ST0
, -int_exp
,
2168 floatx80_one
, &env
->fp_status
);
2169 if (floatx80_is_zero(arg0_m1
)) {
2170 /* Exact power of 2; multiply by ST1. */
2171 env
->fp_status
.float_rounding_mode
= save_mode
;
2172 ST1
= floatx80_mul(int32_to_floatx80(int_exp
, &env
->fp_status
),
2173 ST1
, &env
->fp_status
);
2175 bool asign
= extractFloatx80Sign(arg0_m1
);
2177 uint64_t asig0
, asig1
, asig2
;
2178 helper_fyl2x_common(env
, arg0_m1
, &aexp
, &asig0
, &asig1
);
2180 bool isign
= (int_exp
< 0);
2184 int_exp
= isign
? -int_exp
: int_exp
;
2185 shift
= clz32(int_exp
) + 32;
2188 iexp
= 0x403e - shift
;
2189 shift128RightJamming(asig0
, asig1
, iexp
- aexp
,
2191 if (asign
== isign
) {
2192 add128(isig
, 0, asig0
, asig1
, &asig0
, &asig1
);
2194 sub128(isig
, 0, asig0
, asig1
, &asig0
, &asig1
);
2200 * Multiply by the second argument to compute the required
2203 if (arg1_exp
== 0) {
2204 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2206 mul128By64To192(asig0
, asig1
, arg1_sig
, &asig0
, &asig1
, &asig2
);
2207 aexp
+= arg1_exp
- 0x3ffe;
2208 /* This result is inexact. */
2210 env
->fp_status
.float_rounding_mode
= save_mode
;
2211 ST1
= normalizeRoundAndPackFloatx80(80, asign
^ arg1_sign
, aexp
,
2212 asig0
, asig1
, &env
->fp_status
);
2215 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
2218 merge_exception_flags(env
, old_flags
);
2221 void helper_fsqrt(CPUX86State
*env
)
2223 uint8_t old_flags
= save_exception_flags(env
);
2224 if (floatx80_is_neg(ST0
)) {
2225 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2228 ST0
= floatx80_sqrt(ST0
, &env
->fp_status
);
2229 merge_exception_flags(env
, old_flags
);
2232 void helper_fsincos(CPUX86State
*env
)
2234 double fptemp
= floatx80_to_double(env
, ST0
);
2236 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2239 ST0
= double_to_floatx80(env
, sin(fptemp
));
2241 ST0
= double_to_floatx80(env
, cos(fptemp
));
2242 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2243 /* the above code is for |arg| < 2**63 only */
2247 void helper_frndint(CPUX86State
*env
)
2249 uint8_t old_flags
= save_exception_flags(env
);
2250 ST0
= floatx80_round_to_int(ST0
, &env
->fp_status
);
2251 merge_exception_flags(env
, old_flags
);
2254 void helper_fscale(CPUX86State
*env
)
2256 uint8_t old_flags
= save_exception_flags(env
);
2257 if (floatx80_invalid_encoding(ST1
) || floatx80_invalid_encoding(ST0
)) {
2258 float_raise(float_flag_invalid
, &env
->fp_status
);
2259 ST0
= floatx80_default_nan(&env
->fp_status
);
2260 } else if (floatx80_is_any_nan(ST1
)) {
2261 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2262 float_raise(float_flag_invalid
, &env
->fp_status
);
2265 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2266 float_raise(float_flag_invalid
, &env
->fp_status
);
2267 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2269 } else if (floatx80_is_infinity(ST1
) &&
2270 !floatx80_invalid_encoding(ST0
) &&
2271 !floatx80_is_any_nan(ST0
)) {
2272 if (floatx80_is_neg(ST1
)) {
2273 if (floatx80_is_infinity(ST0
)) {
2274 float_raise(float_flag_invalid
, &env
->fp_status
);
2275 ST0
= floatx80_default_nan(&env
->fp_status
);
2277 ST0
= (floatx80_is_neg(ST0
) ?
2278 floatx80_chs(floatx80_zero
) :
2282 if (floatx80_is_zero(ST0
)) {
2283 float_raise(float_flag_invalid
, &env
->fp_status
);
2284 ST0
= floatx80_default_nan(&env
->fp_status
);
2286 ST0
= (floatx80_is_neg(ST0
) ?
2287 floatx80_chs(floatx80_infinity
) :
2293 signed char save
= env
->fp_status
.floatx80_rounding_precision
;
2294 uint8_t save_flags
= get_float_exception_flags(&env
->fp_status
);
2295 set_float_exception_flags(0, &env
->fp_status
);
2296 n
= floatx80_to_int32_round_to_zero(ST1
, &env
->fp_status
);
2297 set_float_exception_flags(save_flags
, &env
->fp_status
);
2298 env
->fp_status
.floatx80_rounding_precision
= 80;
2299 ST0
= floatx80_scalbn(ST0
, n
, &env
->fp_status
);
2300 env
->fp_status
.floatx80_rounding_precision
= save
;
2302 merge_exception_flags(env
, old_flags
);
2305 void helper_fsin(CPUX86State
*env
)
2307 double fptemp
= floatx80_to_double(env
, ST0
);
2309 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2312 ST0
= double_to_floatx80(env
, sin(fptemp
));
2313 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2314 /* the above code is for |arg| < 2**53 only */
2318 void helper_fcos(CPUX86State
*env
)
2320 double fptemp
= floatx80_to_double(env
, ST0
);
2322 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2325 ST0
= double_to_floatx80(env
, cos(fptemp
));
2326 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2327 /* the above code is for |arg| < 2**63 only */
2331 void helper_fxam_ST0(CPUX86State
*env
)
2338 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2340 env
->fpus
|= 0x200; /* C1 <-- 1 */
2343 if (env
->fptags
[env
->fpstt
]) {
2344 env
->fpus
|= 0x4100; /* Empty */
2348 expdif
= EXPD(temp
);
2349 if (expdif
== MAXEXPD
) {
2350 if (MANTD(temp
) == 0x8000000000000000ULL
) {
2351 env
->fpus
|= 0x500; /* Infinity */
2352 } else if (MANTD(temp
) & 0x8000000000000000ULL
) {
2353 env
->fpus
|= 0x100; /* NaN */
2355 } else if (expdif
== 0) {
2356 if (MANTD(temp
) == 0) {
2357 env
->fpus
|= 0x4000; /* Zero */
2359 env
->fpus
|= 0x4400; /* Denormal */
2361 } else if (MANTD(temp
) & 0x8000000000000000ULL
) {
2366 static void do_fstenv(CPUX86State
*env
, target_ulong ptr
, int data32
,
2369 int fpus
, fptag
, exp
, i
;
2373 fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
2375 for (i
= 7; i
>= 0; i
--) {
2377 if (env
->fptags
[i
]) {
2380 tmp
.d
= env
->fpregs
[i
].d
;
2383 if (exp
== 0 && mant
== 0) {
2386 } else if (exp
== 0 || exp
== MAXEXPD
2387 || (mant
& (1LL << 63)) == 0) {
2388 /* NaNs, infinity, denormal */
2395 cpu_stl_data_ra(env
, ptr
, env
->fpuc
, retaddr
);
2396 cpu_stl_data_ra(env
, ptr
+ 4, fpus
, retaddr
);
2397 cpu_stl_data_ra(env
, ptr
+ 8, fptag
, retaddr
);
2398 cpu_stl_data_ra(env
, ptr
+ 12, 0, retaddr
); /* fpip */
2399 cpu_stl_data_ra(env
, ptr
+ 16, 0, retaddr
); /* fpcs */
2400 cpu_stl_data_ra(env
, ptr
+ 20, 0, retaddr
); /* fpoo */
2401 cpu_stl_data_ra(env
, ptr
+ 24, 0, retaddr
); /* fpos */
2404 cpu_stw_data_ra(env
, ptr
, env
->fpuc
, retaddr
);
2405 cpu_stw_data_ra(env
, ptr
+ 2, fpus
, retaddr
);
2406 cpu_stw_data_ra(env
, ptr
+ 4, fptag
, retaddr
);
2407 cpu_stw_data_ra(env
, ptr
+ 6, 0, retaddr
);
2408 cpu_stw_data_ra(env
, ptr
+ 8, 0, retaddr
);
2409 cpu_stw_data_ra(env
, ptr
+ 10, 0, retaddr
);
2410 cpu_stw_data_ra(env
, ptr
+ 12, 0, retaddr
);
2414 void helper_fstenv(CPUX86State
*env
, target_ulong ptr
, int data32
)
2416 do_fstenv(env
, ptr
, data32
, GETPC());
2419 static void cpu_set_fpus(CPUX86State
*env
, uint16_t fpus
)
2421 env
->fpstt
= (fpus
>> 11) & 7;
2422 env
->fpus
= fpus
& ~0x3800 & ~FPUS_B
;
2423 env
->fpus
|= env
->fpus
& FPUS_SE
? FPUS_B
: 0;
2424 #if !defined(CONFIG_USER_ONLY)
2425 if (!(env
->fpus
& FPUS_SE
)) {
2427 * Here the processor deasserts FERR#; in response, the chipset deasserts
2435 static void do_fldenv(CPUX86State
*env
, target_ulong ptr
, int data32
,
2441 cpu_set_fpuc(env
, cpu_lduw_data_ra(env
, ptr
, retaddr
));
2442 fpus
= cpu_lduw_data_ra(env
, ptr
+ 4, retaddr
);
2443 fptag
= cpu_lduw_data_ra(env
, ptr
+ 8, retaddr
);
2445 cpu_set_fpuc(env
, cpu_lduw_data_ra(env
, ptr
, retaddr
));
2446 fpus
= cpu_lduw_data_ra(env
, ptr
+ 2, retaddr
);
2447 fptag
= cpu_lduw_data_ra(env
, ptr
+ 4, retaddr
);
2449 cpu_set_fpus(env
, fpus
);
2450 for (i
= 0; i
< 8; i
++) {
2451 env
->fptags
[i
] = ((fptag
& 3) == 3);
2456 void helper_fldenv(CPUX86State
*env
, target_ulong ptr
, int data32
)
2458 do_fldenv(env
, ptr
, data32
, GETPC());
2461 void helper_fsave(CPUX86State
*env
, target_ulong ptr
, int data32
)
2466 do_fstenv(env
, ptr
, data32
, GETPC());
2468 ptr
+= (14 << data32
);
2469 for (i
= 0; i
< 8; i
++) {
2471 helper_fstt(env
, tmp
, ptr
, GETPC());
2478 cpu_set_fpuc(env
, 0x37f);
2489 void helper_frstor(CPUX86State
*env
, target_ulong ptr
, int data32
)
2494 do_fldenv(env
, ptr
, data32
, GETPC());
2495 ptr
+= (14 << data32
);
2497 for (i
= 0; i
< 8; i
++) {
2498 tmp
= helper_fldt(env
, ptr
, GETPC());
2504 #if defined(CONFIG_USER_ONLY)
2505 void cpu_x86_fsave(CPUX86State
*env
, target_ulong ptr
, int data32
)
2507 helper_fsave(env
, ptr
, data32
);
2510 void cpu_x86_frstor(CPUX86State
*env
, target_ulong ptr
, int data32
)
2512 helper_frstor(env
, ptr
, data32
);
2516 #define XO(X) offsetof(X86XSaveArea, X)
2518 static void do_xsave_fpu(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2523 fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
2525 for (i
= 0; i
< 8; i
++) {
2526 fptag
|= (env
->fptags
[i
] << i
);
2529 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.fcw
), env
->fpuc
, ra
);
2530 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.fsw
), fpus
, ra
);
2531 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.ftw
), fptag
^ 0xff, ra
);
2533 /* In 32-bit mode this is eip, sel, dp, sel.
2534 In 64-bit mode this is rip, rdp.
2535 But in either case we don't write actual data, just zeros. */
2536 cpu_stq_data_ra(env
, ptr
+ XO(legacy
.fpip
), 0, ra
); /* eip+sel; rip */
2537 cpu_stq_data_ra(env
, ptr
+ XO(legacy
.fpdp
), 0, ra
); /* edp+sel; rdp */
2539 addr
= ptr
+ XO(legacy
.fpregs
);
2540 for (i
= 0; i
< 8; i
++) {
2541 floatx80 tmp
= ST(i
);
2542 helper_fstt(env
, tmp
, addr
, ra
);
2547 static void do_xsave_mxcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2549 update_mxcsr_from_sse_status(env
);
2550 cpu_stl_data_ra(env
, ptr
+ XO(legacy
.mxcsr
), env
->mxcsr
, ra
);
2551 cpu_stl_data_ra(env
, ptr
+ XO(legacy
.mxcsr_mask
), 0x0000ffff, ra
);
2554 static void do_xsave_sse(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2559 if (env
->hflags
& HF_CS64_MASK
) {
2565 addr
= ptr
+ XO(legacy
.xmm_regs
);
2566 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2567 cpu_stq_data_ra(env
, addr
, env
->xmm_regs
[i
].ZMM_Q(0), ra
);
2568 cpu_stq_data_ra(env
, addr
+ 8, env
->xmm_regs
[i
].ZMM_Q(1), ra
);
2573 static void do_xsave_bndregs(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2575 target_ulong addr
= ptr
+ offsetof(XSaveBNDREG
, bnd_regs
);
2578 for (i
= 0; i
< 4; i
++, addr
+= 16) {
2579 cpu_stq_data_ra(env
, addr
, env
->bnd_regs
[i
].lb
, ra
);
2580 cpu_stq_data_ra(env
, addr
+ 8, env
->bnd_regs
[i
].ub
, ra
);
2584 static void do_xsave_bndcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2586 cpu_stq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.cfgu
),
2587 env
->bndcs_regs
.cfgu
, ra
);
2588 cpu_stq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.sts
),
2589 env
->bndcs_regs
.sts
, ra
);
2592 static void do_xsave_pkru(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2594 cpu_stq_data_ra(env
, ptr
, env
->pkru
, ra
);
2597 void helper_fxsave(CPUX86State
*env
, target_ulong ptr
)
2599 uintptr_t ra
= GETPC();
2601 /* The operand must be 16 byte aligned */
2603 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2606 do_xsave_fpu(env
, ptr
, ra
);
2608 if (env
->cr
[4] & CR4_OSFXSR_MASK
) {
2609 do_xsave_mxcsr(env
, ptr
, ra
);
2610 /* Fast FXSAVE leaves out the XMM registers */
2611 if (!(env
->efer
& MSR_EFER_FFXSR
)
2612 || (env
->hflags
& HF_CPL_MASK
)
2613 || !(env
->hflags
& HF_LMA_MASK
)) {
2614 do_xsave_sse(env
, ptr
, ra
);
2619 static uint64_t get_xinuse(CPUX86State
*env
)
2621 uint64_t inuse
= -1;
2623 /* For the most part, we don't track XINUSE. We could calculate it
2624 here for all components, but it's probably less work to simply
2625 indicate in use. That said, the state of BNDREGS is important
2626 enough to track in HFLAGS, so we might as well use that here. */
2627 if ((env
->hflags
& HF_MPX_IU_MASK
) == 0) {
2628 inuse
&= ~XSTATE_BNDREGS_MASK
;
2633 static void do_xsave(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
,
2634 uint64_t inuse
, uint64_t opt
, uintptr_t ra
)
2636 uint64_t old_bv
, new_bv
;
2638 /* The OS must have enabled XSAVE. */
2639 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2640 raise_exception_ra(env
, EXCP06_ILLOP
, ra
);
2643 /* The operand must be 64 byte aligned. */
2645 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2648 /* Never save anything not enabled by XCR0. */
2652 if (opt
& XSTATE_FP_MASK
) {
2653 do_xsave_fpu(env
, ptr
, ra
);
2655 if (rfbm
& XSTATE_SSE_MASK
) {
2656 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2657 do_xsave_mxcsr(env
, ptr
, ra
);
2659 if (opt
& XSTATE_SSE_MASK
) {
2660 do_xsave_sse(env
, ptr
, ra
);
2662 if (opt
& XSTATE_BNDREGS_MASK
) {
2663 do_xsave_bndregs(env
, ptr
+ XO(bndreg_state
), ra
);
2665 if (opt
& XSTATE_BNDCSR_MASK
) {
2666 do_xsave_bndcsr(env
, ptr
+ XO(bndcsr_state
), ra
);
2668 if (opt
& XSTATE_PKRU_MASK
) {
2669 do_xsave_pkru(env
, ptr
+ XO(pkru_state
), ra
);
2672 /* Update the XSTATE_BV field. */
2673 old_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), ra
);
2674 new_bv
= (old_bv
& ~rfbm
) | (inuse
& rfbm
);
2675 cpu_stq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), new_bv
, ra
);
2678 void helper_xsave(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2680 do_xsave(env
, ptr
, rfbm
, get_xinuse(env
), -1, GETPC());
2683 void helper_xsaveopt(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2685 uint64_t inuse
= get_xinuse(env
);
2686 do_xsave(env
, ptr
, rfbm
, inuse
, inuse
, GETPC());
2689 static void do_xrstor_fpu(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2691 int i
, fpuc
, fpus
, fptag
;
2694 fpuc
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.fcw
), ra
);
2695 fpus
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.fsw
), ra
);
2696 fptag
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.ftw
), ra
);
2697 cpu_set_fpuc(env
, fpuc
);
2698 cpu_set_fpus(env
, fpus
);
2700 for (i
= 0; i
< 8; i
++) {
2701 env
->fptags
[i
] = ((fptag
>> i
) & 1);
2704 addr
= ptr
+ XO(legacy
.fpregs
);
2705 for (i
= 0; i
< 8; i
++) {
2706 floatx80 tmp
= helper_fldt(env
, addr
, ra
);
2712 static void do_xrstor_mxcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2714 cpu_set_mxcsr(env
, cpu_ldl_data_ra(env
, ptr
+ XO(legacy
.mxcsr
), ra
));
2717 static void do_xrstor_sse(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2722 if (env
->hflags
& HF_CS64_MASK
) {
2728 addr
= ptr
+ XO(legacy
.xmm_regs
);
2729 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2730 env
->xmm_regs
[i
].ZMM_Q(0) = cpu_ldq_data_ra(env
, addr
, ra
);
2731 env
->xmm_regs
[i
].ZMM_Q(1) = cpu_ldq_data_ra(env
, addr
+ 8, ra
);
2736 static void do_xrstor_bndregs(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2738 target_ulong addr
= ptr
+ offsetof(XSaveBNDREG
, bnd_regs
);
2741 for (i
= 0; i
< 4; i
++, addr
+= 16) {
2742 env
->bnd_regs
[i
].lb
= cpu_ldq_data_ra(env
, addr
, ra
);
2743 env
->bnd_regs
[i
].ub
= cpu_ldq_data_ra(env
, addr
+ 8, ra
);
2747 static void do_xrstor_bndcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2749 /* FIXME: Extend highest implemented bit of linear address. */
2750 env
->bndcs_regs
.cfgu
2751 = cpu_ldq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.cfgu
), ra
);
2753 = cpu_ldq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.sts
), ra
);
2756 static void do_xrstor_pkru(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2758 env
->pkru
= cpu_ldq_data_ra(env
, ptr
, ra
);
2761 void helper_fxrstor(CPUX86State
*env
, target_ulong ptr
)
2763 uintptr_t ra
= GETPC();
2765 /* The operand must be 16 byte aligned */
2767 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2770 do_xrstor_fpu(env
, ptr
, ra
);
2772 if (env
->cr
[4] & CR4_OSFXSR_MASK
) {
2773 do_xrstor_mxcsr(env
, ptr
, ra
);
2774 /* Fast FXRSTOR leaves out the XMM registers */
2775 if (!(env
->efer
& MSR_EFER_FFXSR
)
2776 || (env
->hflags
& HF_CPL_MASK
)
2777 || !(env
->hflags
& HF_LMA_MASK
)) {
2778 do_xrstor_sse(env
, ptr
, ra
);
2783 #if defined(CONFIG_USER_ONLY)
2784 void cpu_x86_fxsave(CPUX86State
*env
, target_ulong ptr
)
2786 helper_fxsave(env
, ptr
);
2789 void cpu_x86_fxrstor(CPUX86State
*env
, target_ulong ptr
)
2791 helper_fxrstor(env
, ptr
);
2795 void helper_xrstor(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2797 uintptr_t ra
= GETPC();
2798 uint64_t xstate_bv
, xcomp_bv
, reserve0
;
2802 /* The OS must have enabled XSAVE. */
2803 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2804 raise_exception_ra(env
, EXCP06_ILLOP
, ra
);
2807 /* The operand must be 64 byte aligned. */
2809 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2812 xstate_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), ra
);
2814 if ((int64_t)xstate_bv
< 0) {
2815 /* FIXME: Compact form. */
2816 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2819 /* Standard form. */
2821 /* The XSTATE_BV field must not set bits not present in XCR0. */
2822 if (xstate_bv
& ~env
->xcr0
) {
2823 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2826 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
2827 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2828 describes only XCOMP_BV, but the description of the standard form
2829 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2830 includes the next 64-bit field. */
2831 xcomp_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xcomp_bv
), ra
);
2832 reserve0
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.reserve0
), ra
);
2833 if (xcomp_bv
|| reserve0
) {
2834 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2837 if (rfbm
& XSTATE_FP_MASK
) {
2838 if (xstate_bv
& XSTATE_FP_MASK
) {
2839 do_xrstor_fpu(env
, ptr
, ra
);
2842 memset(env
->fpregs
, 0, sizeof(env
->fpregs
));
2845 if (rfbm
& XSTATE_SSE_MASK
) {
2846 /* Note that the standard form of XRSTOR loads MXCSR from memory
2847 whether or not the XSTATE_BV bit is set. */
2848 do_xrstor_mxcsr(env
, ptr
, ra
);
2849 if (xstate_bv
& XSTATE_SSE_MASK
) {
2850 do_xrstor_sse(env
, ptr
, ra
);
2852 /* ??? When AVX is implemented, we may have to be more
2853 selective in the clearing. */
2854 memset(env
->xmm_regs
, 0, sizeof(env
->xmm_regs
));
2857 if (rfbm
& XSTATE_BNDREGS_MASK
) {
2858 if (xstate_bv
& XSTATE_BNDREGS_MASK
) {
2859 do_xrstor_bndregs(env
, ptr
+ XO(bndreg_state
), ra
);
2860 env
->hflags
|= HF_MPX_IU_MASK
;
2862 memset(env
->bnd_regs
, 0, sizeof(env
->bnd_regs
));
2863 env
->hflags
&= ~HF_MPX_IU_MASK
;
2866 if (rfbm
& XSTATE_BNDCSR_MASK
) {
2867 if (xstate_bv
& XSTATE_BNDCSR_MASK
) {
2868 do_xrstor_bndcsr(env
, ptr
+ XO(bndcsr_state
), ra
);
2870 memset(&env
->bndcs_regs
, 0, sizeof(env
->bndcs_regs
));
2872 cpu_sync_bndcs_hflags(env
);
2874 if (rfbm
& XSTATE_PKRU_MASK
) {
2875 uint64_t old_pkru
= env
->pkru
;
2876 if (xstate_bv
& XSTATE_PKRU_MASK
) {
2877 do_xrstor_pkru(env
, ptr
+ XO(pkru_state
), ra
);
2881 if (env
->pkru
!= old_pkru
) {
2882 CPUState
*cs
= env_cpu(env
);
2890 uint64_t helper_xgetbv(CPUX86State
*env
, uint32_t ecx
)
2892 /* The OS must have enabled XSAVE. */
2893 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2894 raise_exception_ra(env
, EXCP06_ILLOP
, GETPC());
2901 if (env
->features
[FEAT_XSAVE
] & CPUID_XSAVE_XGETBV1
) {
2902 return env
->xcr0
& get_xinuse(env
);
2906 raise_exception_ra(env
, EXCP0D_GPF
, GETPC());
2909 void helper_xsetbv(CPUX86State
*env
, uint32_t ecx
, uint64_t mask
)
2911 uint32_t dummy
, ena_lo
, ena_hi
;
2914 /* The OS must have enabled XSAVE. */
2915 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2916 raise_exception_ra(env
, EXCP06_ILLOP
, GETPC());
2919 /* Only XCR0 is defined at present; the FPU may not be disabled. */
2920 if (ecx
!= 0 || (mask
& XSTATE_FP_MASK
) == 0) {
2924 /* Disallow enabling unimplemented features. */
2925 cpu_x86_cpuid(env
, 0x0d, 0, &ena_lo
, &dummy
, &dummy
, &ena_hi
);
2926 ena
= ((uint64_t)ena_hi
<< 32) | ena_lo
;
2931 /* Disallow enabling only half of MPX. */
2932 if ((mask
^ (mask
* (XSTATE_BNDCSR_MASK
/ XSTATE_BNDREGS_MASK
)))
2933 & XSTATE_BNDCSR_MASK
) {
2938 cpu_sync_bndcs_hflags(env
);
2942 raise_exception_ra(env
, EXCP0D_GPF
, GETPC());
2946 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2948 #define SSE_DAZ 0x0040
2949 #define SSE_RC_MASK 0x6000
2950 #define SSE_RC_NEAR 0x0000
2951 #define SSE_RC_DOWN 0x2000
2952 #define SSE_RC_UP 0x4000
2953 #define SSE_RC_CHOP 0x6000
2954 #define SSE_FZ 0x8000
2956 void update_mxcsr_status(CPUX86State
*env
)
2958 uint32_t mxcsr
= env
->mxcsr
;
2961 /* set rounding mode */
2962 switch (mxcsr
& SSE_RC_MASK
) {
2965 rnd_type
= float_round_nearest_even
;
2968 rnd_type
= float_round_down
;
2971 rnd_type
= float_round_up
;
2974 rnd_type
= float_round_to_zero
;
2977 set_float_rounding_mode(rnd_type
, &env
->sse_status
);
2979 /* Set exception flags. */
2980 set_float_exception_flags((mxcsr
& FPUS_IE
? float_flag_invalid
: 0) |
2981 (mxcsr
& FPUS_ZE
? float_flag_divbyzero
: 0) |
2982 (mxcsr
& FPUS_OE
? float_flag_overflow
: 0) |
2983 (mxcsr
& FPUS_UE
? float_flag_underflow
: 0) |
2984 (mxcsr
& FPUS_PE
? float_flag_inexact
: 0),
2987 /* set denormals are zero */
2988 set_flush_inputs_to_zero((mxcsr
& SSE_DAZ
) ? 1 : 0, &env
->sse_status
);
2990 /* set flush to zero */
2991 set_flush_to_zero((mxcsr
& SSE_FZ
) ? 1 : 0, &env
->sse_status
);
2994 void update_mxcsr_from_sse_status(CPUX86State
*env
)
2996 uint8_t flags
= get_float_exception_flags(&env
->sse_status
);
2998 * The MXCSR denormal flag has opposite semantics to
2999 * float_flag_input_denormal (the softfloat code sets that flag
3000 * only when flushing input denormals to zero, but SSE sets it
3001 * only when not flushing them to zero), so is not converted
3004 env
->mxcsr
|= ((flags
& float_flag_invalid
? FPUS_IE
: 0) |
3005 (flags
& float_flag_divbyzero
? FPUS_ZE
: 0) |
3006 (flags
& float_flag_overflow
? FPUS_OE
: 0) |
3007 (flags
& float_flag_underflow
? FPUS_UE
: 0) |
3008 (flags
& float_flag_inexact
? FPUS_PE
: 0) |
3009 (flags
& float_flag_output_denormal
? FPUS_UE
| FPUS_PE
:
3013 void helper_update_mxcsr(CPUX86State
*env
)
3015 update_mxcsr_from_sse_status(env
);
3018 void helper_ldmxcsr(CPUX86State
*env
, uint32_t val
)
3020 cpu_set_mxcsr(env
, val
);
3023 void helper_enter_mmx(CPUX86State
*env
)
3026 *(uint32_t *)(env
->fptags
) = 0;
3027 *(uint32_t *)(env
->fptags
+ 4) = 0;
3030 void helper_emms(CPUX86State
*env
)
3032 /* set to empty state */
3033 *(uint32_t *)(env
->fptags
) = 0x01010101;
3034 *(uint32_t *)(env
->fptags
+ 4) = 0x01010101;
3038 void helper_movq(CPUX86State
*env
, void *d
, void *s
)
3040 *(uint64_t *)d
= *(uint64_t *)s
;
3044 #include "ops_sse.h"
3047 #include "ops_sse.h"