2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
28 #include "fpu/softfloat-macros.h"
34 #define FPU_RC_MASK 0xc00
35 #define FPU_RC_NEAR 0x000
36 #define FPU_RC_DOWN 0x400
37 #define FPU_RC_UP 0x800
38 #define FPU_RC_CHOP 0xc00
40 #define MAXTAN 9223372036854775808.0
42 /* the following deal with x86 long double-precision numbers */
43 #define MAXEXPD 0x7fff
45 #define EXPD(fp) (fp.l.upper & 0x7fff)
46 #define SIGND(fp) ((fp.l.upper) & 0x8000)
47 #define MANTD(fp) (fp.l.lower)
48 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
50 #define FPUS_IE (1 << 0)
51 #define FPUS_DE (1 << 1)
52 #define FPUS_ZE (1 << 2)
53 #define FPUS_OE (1 << 3)
54 #define FPUS_UE (1 << 4)
55 #define FPUS_PE (1 << 5)
56 #define FPUS_SF (1 << 6)
57 #define FPUS_SE (1 << 7)
58 #define FPUS_B (1 << 15)
62 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
63 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
64 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
65 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
66 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
67 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
68 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
69 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
71 #if !defined(CONFIG_USER_ONLY)
72 static qemu_irq ferr_irq
;
74 void x86_register_ferr_irq(qemu_irq irq
)
79 static void cpu_clear_ignne(void)
81 CPUX86State
*env
= &X86_CPU(first_cpu
)->env
;
82 env
->hflags2
&= ~HF2_IGNNE_MASK
;
85 void cpu_set_ignne(void)
87 CPUX86State
*env
= &X86_CPU(first_cpu
)->env
;
88 env
->hflags2
|= HF2_IGNNE_MASK
;
90 * We get here in response to a write to port F0h. The chipset should
91 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
92 * cleared, because FERR# and FP_IRQ are two separate pins on real
93 * hardware. However, we don't model FERR# as a qemu_irq, so we just
94 * do directly what the chipset would do, i.e. deassert FP_IRQ.
96 qemu_irq_lower(ferr_irq
);
101 static inline void fpush(CPUX86State
*env
)
103 env
->fpstt
= (env
->fpstt
- 1) & 7;
104 env
->fptags
[env
->fpstt
] = 0; /* validate stack entry */
107 static inline void fpop(CPUX86State
*env
)
109 env
->fptags
[env
->fpstt
] = 1; /* invalidate stack entry */
110 env
->fpstt
= (env
->fpstt
+ 1) & 7;
113 static inline floatx80
helper_fldt(CPUX86State
*env
, target_ulong ptr
,
118 temp
.l
.lower
= cpu_ldq_data_ra(env
, ptr
, retaddr
);
119 temp
.l
.upper
= cpu_lduw_data_ra(env
, ptr
+ 8, retaddr
);
123 static inline void helper_fstt(CPUX86State
*env
, floatx80 f
, target_ulong ptr
,
129 cpu_stq_data_ra(env
, ptr
, temp
.l
.lower
, retaddr
);
130 cpu_stw_data_ra(env
, ptr
+ 8, temp
.l
.upper
, retaddr
);
133 /* x87 FPU helpers */
135 static inline double floatx80_to_double(CPUX86State
*env
, floatx80 a
)
142 u
.f64
= floatx80_to_float64(a
, &env
->fp_status
);
146 static inline floatx80
double_to_floatx80(CPUX86State
*env
, double a
)
154 return float64_to_floatx80(u
.f64
, &env
->fp_status
);
157 static void fpu_set_exception(CPUX86State
*env
, int mask
)
160 if (env
->fpus
& (~env
->fpuc
& FPUC_EM
)) {
161 env
->fpus
|= FPUS_SE
| FPUS_B
;
165 static inline uint8_t save_exception_flags(CPUX86State
*env
)
167 uint8_t old_flags
= get_float_exception_flags(&env
->fp_status
);
168 set_float_exception_flags(0, &env
->fp_status
);
172 static void merge_exception_flags(CPUX86State
*env
, uint8_t old_flags
)
174 uint8_t new_flags
= get_float_exception_flags(&env
->fp_status
);
175 float_raise(old_flags
, &env
->fp_status
);
176 fpu_set_exception(env
,
177 ((new_flags
& float_flag_invalid
? FPUS_IE
: 0) |
178 (new_flags
& float_flag_divbyzero
? FPUS_ZE
: 0) |
179 (new_flags
& float_flag_overflow
? FPUS_OE
: 0) |
180 (new_flags
& float_flag_underflow
? FPUS_UE
: 0) |
181 (new_flags
& float_flag_inexact
? FPUS_PE
: 0) |
182 (new_flags
& float_flag_input_denormal
? FPUS_DE
: 0)));
185 static inline floatx80
helper_fdiv(CPUX86State
*env
, floatx80 a
, floatx80 b
)
187 uint8_t old_flags
= save_exception_flags(env
);
188 floatx80 ret
= floatx80_div(a
, b
, &env
->fp_status
);
189 merge_exception_flags(env
, old_flags
);
193 static void fpu_raise_exception(CPUX86State
*env
, uintptr_t retaddr
)
195 if (env
->cr
[0] & CR0_NE_MASK
) {
196 raise_exception_ra(env
, EXCP10_COPR
, retaddr
);
198 #if !defined(CONFIG_USER_ONLY)
199 else if (ferr_irq
&& !(env
->hflags2
& HF2_IGNNE_MASK
)) {
200 qemu_irq_raise(ferr_irq
);
205 void helper_flds_FT0(CPUX86State
*env
, uint32_t val
)
207 uint8_t old_flags
= save_exception_flags(env
);
214 FT0
= float32_to_floatx80(u
.f
, &env
->fp_status
);
215 merge_exception_flags(env
, old_flags
);
218 void helper_fldl_FT0(CPUX86State
*env
, uint64_t val
)
220 uint8_t old_flags
= save_exception_flags(env
);
227 FT0
= float64_to_floatx80(u
.f
, &env
->fp_status
);
228 merge_exception_flags(env
, old_flags
);
231 void helper_fildl_FT0(CPUX86State
*env
, int32_t val
)
233 FT0
= int32_to_floatx80(val
, &env
->fp_status
);
236 void helper_flds_ST0(CPUX86State
*env
, uint32_t val
)
238 uint8_t old_flags
= save_exception_flags(env
);
245 new_fpstt
= (env
->fpstt
- 1) & 7;
247 env
->fpregs
[new_fpstt
].d
= float32_to_floatx80(u
.f
, &env
->fp_status
);
248 env
->fpstt
= new_fpstt
;
249 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
250 merge_exception_flags(env
, old_flags
);
253 void helper_fldl_ST0(CPUX86State
*env
, uint64_t val
)
255 uint8_t old_flags
= save_exception_flags(env
);
262 new_fpstt
= (env
->fpstt
- 1) & 7;
264 env
->fpregs
[new_fpstt
].d
= float64_to_floatx80(u
.f
, &env
->fp_status
);
265 env
->fpstt
= new_fpstt
;
266 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
267 merge_exception_flags(env
, old_flags
);
270 void helper_fildl_ST0(CPUX86State
*env
, int32_t val
)
274 new_fpstt
= (env
->fpstt
- 1) & 7;
275 env
->fpregs
[new_fpstt
].d
= int32_to_floatx80(val
, &env
->fp_status
);
276 env
->fpstt
= new_fpstt
;
277 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
280 void helper_fildll_ST0(CPUX86State
*env
, int64_t val
)
284 new_fpstt
= (env
->fpstt
- 1) & 7;
285 env
->fpregs
[new_fpstt
].d
= int64_to_floatx80(val
, &env
->fp_status
);
286 env
->fpstt
= new_fpstt
;
287 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
290 uint32_t helper_fsts_ST0(CPUX86State
*env
)
292 uint8_t old_flags
= save_exception_flags(env
);
298 u
.f
= floatx80_to_float32(ST0
, &env
->fp_status
);
299 merge_exception_flags(env
, old_flags
);
303 uint64_t helper_fstl_ST0(CPUX86State
*env
)
305 uint8_t old_flags
= save_exception_flags(env
);
311 u
.f
= floatx80_to_float64(ST0
, &env
->fp_status
);
312 merge_exception_flags(env
, old_flags
);
316 int32_t helper_fist_ST0(CPUX86State
*env
)
318 uint8_t old_flags
= save_exception_flags(env
);
321 val
= floatx80_to_int32(ST0
, &env
->fp_status
);
322 if (val
!= (int16_t)val
) {
323 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
326 merge_exception_flags(env
, old_flags
);
330 int32_t helper_fistl_ST0(CPUX86State
*env
)
332 uint8_t old_flags
= save_exception_flags(env
);
335 val
= floatx80_to_int32(ST0
, &env
->fp_status
);
336 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
339 merge_exception_flags(env
, old_flags
);
343 int64_t helper_fistll_ST0(CPUX86State
*env
)
345 uint8_t old_flags
= save_exception_flags(env
);
348 val
= floatx80_to_int64(ST0
, &env
->fp_status
);
349 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
350 val
= 0x8000000000000000ULL
;
352 merge_exception_flags(env
, old_flags
);
356 int32_t helper_fistt_ST0(CPUX86State
*env
)
358 uint8_t old_flags
= save_exception_flags(env
);
361 val
= floatx80_to_int32_round_to_zero(ST0
, &env
->fp_status
);
362 if (val
!= (int16_t)val
) {
363 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
366 merge_exception_flags(env
, old_flags
);
370 int32_t helper_fisttl_ST0(CPUX86State
*env
)
372 uint8_t old_flags
= save_exception_flags(env
);
375 val
= floatx80_to_int32_round_to_zero(ST0
, &env
->fp_status
);
376 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
379 merge_exception_flags(env
, old_flags
);
383 int64_t helper_fisttll_ST0(CPUX86State
*env
)
385 uint8_t old_flags
= save_exception_flags(env
);
388 val
= floatx80_to_int64_round_to_zero(ST0
, &env
->fp_status
);
389 if (get_float_exception_flags(&env
->fp_status
) & float_flag_invalid
) {
390 val
= 0x8000000000000000ULL
;
392 merge_exception_flags(env
, old_flags
);
396 void helper_fldt_ST0(CPUX86State
*env
, target_ulong ptr
)
400 new_fpstt
= (env
->fpstt
- 1) & 7;
401 env
->fpregs
[new_fpstt
].d
= helper_fldt(env
, ptr
, GETPC());
402 env
->fpstt
= new_fpstt
;
403 env
->fptags
[new_fpstt
] = 0; /* validate stack entry */
406 void helper_fstt_ST0(CPUX86State
*env
, target_ulong ptr
)
408 helper_fstt(env
, ST0
, ptr
, GETPC());
411 void helper_fpush(CPUX86State
*env
)
416 void helper_fpop(CPUX86State
*env
)
421 void helper_fdecstp(CPUX86State
*env
)
423 env
->fpstt
= (env
->fpstt
- 1) & 7;
424 env
->fpus
&= ~0x4700;
427 void helper_fincstp(CPUX86State
*env
)
429 env
->fpstt
= (env
->fpstt
+ 1) & 7;
430 env
->fpus
&= ~0x4700;
435 void helper_ffree_STN(CPUX86State
*env
, int st_index
)
437 env
->fptags
[(env
->fpstt
+ st_index
) & 7] = 1;
440 void helper_fmov_ST0_FT0(CPUX86State
*env
)
445 void helper_fmov_FT0_STN(CPUX86State
*env
, int st_index
)
450 void helper_fmov_ST0_STN(CPUX86State
*env
, int st_index
)
455 void helper_fmov_STN_ST0(CPUX86State
*env
, int st_index
)
460 void helper_fxchg_ST0_STN(CPUX86State
*env
, int st_index
)
471 static const int fcom_ccval
[4] = {0x0100, 0x4000, 0x0000, 0x4500};
473 void helper_fcom_ST0_FT0(CPUX86State
*env
)
475 uint8_t old_flags
= save_exception_flags(env
);
478 ret
= floatx80_compare(ST0
, FT0
, &env
->fp_status
);
479 env
->fpus
= (env
->fpus
& ~0x4500) | fcom_ccval
[ret
+ 1];
480 merge_exception_flags(env
, old_flags
);
483 void helper_fucom_ST0_FT0(CPUX86State
*env
)
485 uint8_t old_flags
= save_exception_flags(env
);
488 ret
= floatx80_compare_quiet(ST0
, FT0
, &env
->fp_status
);
489 env
->fpus
= (env
->fpus
& ~0x4500) | fcom_ccval
[ret
+ 1];
490 merge_exception_flags(env
, old_flags
);
493 static const int fcomi_ccval
[4] = {CC_C
, CC_Z
, 0, CC_Z
| CC_P
| CC_C
};
495 void helper_fcomi_ST0_FT0(CPUX86State
*env
)
497 uint8_t old_flags
= save_exception_flags(env
);
501 ret
= floatx80_compare(ST0
, FT0
, &env
->fp_status
);
502 eflags
= cpu_cc_compute_all(env
, CC_OP
);
503 eflags
= (eflags
& ~(CC_Z
| CC_P
| CC_C
)) | fcomi_ccval
[ret
+ 1];
505 merge_exception_flags(env
, old_flags
);
508 void helper_fucomi_ST0_FT0(CPUX86State
*env
)
510 uint8_t old_flags
= save_exception_flags(env
);
514 ret
= floatx80_compare_quiet(ST0
, FT0
, &env
->fp_status
);
515 eflags
= cpu_cc_compute_all(env
, CC_OP
);
516 eflags
= (eflags
& ~(CC_Z
| CC_P
| CC_C
)) | fcomi_ccval
[ret
+ 1];
518 merge_exception_flags(env
, old_flags
);
521 void helper_fadd_ST0_FT0(CPUX86State
*env
)
523 uint8_t old_flags
= save_exception_flags(env
);
524 ST0
= floatx80_add(ST0
, FT0
, &env
->fp_status
);
525 merge_exception_flags(env
, old_flags
);
528 void helper_fmul_ST0_FT0(CPUX86State
*env
)
530 uint8_t old_flags
= save_exception_flags(env
);
531 ST0
= floatx80_mul(ST0
, FT0
, &env
->fp_status
);
532 merge_exception_flags(env
, old_flags
);
535 void helper_fsub_ST0_FT0(CPUX86State
*env
)
537 uint8_t old_flags
= save_exception_flags(env
);
538 ST0
= floatx80_sub(ST0
, FT0
, &env
->fp_status
);
539 merge_exception_flags(env
, old_flags
);
542 void helper_fsubr_ST0_FT0(CPUX86State
*env
)
544 uint8_t old_flags
= save_exception_flags(env
);
545 ST0
= floatx80_sub(FT0
, ST0
, &env
->fp_status
);
546 merge_exception_flags(env
, old_flags
);
549 void helper_fdiv_ST0_FT0(CPUX86State
*env
)
551 ST0
= helper_fdiv(env
, ST0
, FT0
);
554 void helper_fdivr_ST0_FT0(CPUX86State
*env
)
556 ST0
= helper_fdiv(env
, FT0
, ST0
);
559 /* fp operations between STN and ST0 */
561 void helper_fadd_STN_ST0(CPUX86State
*env
, int st_index
)
563 uint8_t old_flags
= save_exception_flags(env
);
564 ST(st_index
) = floatx80_add(ST(st_index
), ST0
, &env
->fp_status
);
565 merge_exception_flags(env
, old_flags
);
568 void helper_fmul_STN_ST0(CPUX86State
*env
, int st_index
)
570 uint8_t old_flags
= save_exception_flags(env
);
571 ST(st_index
) = floatx80_mul(ST(st_index
), ST0
, &env
->fp_status
);
572 merge_exception_flags(env
, old_flags
);
575 void helper_fsub_STN_ST0(CPUX86State
*env
, int st_index
)
577 uint8_t old_flags
= save_exception_flags(env
);
578 ST(st_index
) = floatx80_sub(ST(st_index
), ST0
, &env
->fp_status
);
579 merge_exception_flags(env
, old_flags
);
582 void helper_fsubr_STN_ST0(CPUX86State
*env
, int st_index
)
584 uint8_t old_flags
= save_exception_flags(env
);
585 ST(st_index
) = floatx80_sub(ST0
, ST(st_index
), &env
->fp_status
);
586 merge_exception_flags(env
, old_flags
);
589 void helper_fdiv_STN_ST0(CPUX86State
*env
, int st_index
)
594 *p
= helper_fdiv(env
, *p
, ST0
);
597 void helper_fdivr_STN_ST0(CPUX86State
*env
, int st_index
)
602 *p
= helper_fdiv(env
, ST0
, *p
);
605 /* misc FPU operations */
606 void helper_fchs_ST0(CPUX86State
*env
)
608 ST0
= floatx80_chs(ST0
);
611 void helper_fabs_ST0(CPUX86State
*env
)
613 ST0
= floatx80_abs(ST0
);
616 void helper_fld1_ST0(CPUX86State
*env
)
621 void helper_fldl2t_ST0(CPUX86State
*env
)
623 switch (env
->fpuc
& FPU_RC_MASK
) {
625 ST0
= floatx80_l2t_u
;
633 void helper_fldl2e_ST0(CPUX86State
*env
)
635 switch (env
->fpuc
& FPU_RC_MASK
) {
638 ST0
= floatx80_l2e_d
;
646 void helper_fldpi_ST0(CPUX86State
*env
)
648 switch (env
->fpuc
& FPU_RC_MASK
) {
659 void helper_fldlg2_ST0(CPUX86State
*env
)
661 switch (env
->fpuc
& FPU_RC_MASK
) {
664 ST0
= floatx80_lg2_d
;
672 void helper_fldln2_ST0(CPUX86State
*env
)
674 switch (env
->fpuc
& FPU_RC_MASK
) {
677 ST0
= floatx80_ln2_d
;
685 void helper_fldz_ST0(CPUX86State
*env
)
690 void helper_fldz_FT0(CPUX86State
*env
)
695 uint32_t helper_fnstsw(CPUX86State
*env
)
697 return (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
700 uint32_t helper_fnstcw(CPUX86State
*env
)
705 void update_fp_status(CPUX86State
*env
)
709 /* set rounding mode */
710 switch (env
->fpuc
& FPU_RC_MASK
) {
713 rnd_type
= float_round_nearest_even
;
716 rnd_type
= float_round_down
;
719 rnd_type
= float_round_up
;
722 rnd_type
= float_round_to_zero
;
725 set_float_rounding_mode(rnd_type
, &env
->fp_status
);
726 switch ((env
->fpuc
>> 8) & 3) {
738 set_floatx80_rounding_precision(rnd_type
, &env
->fp_status
);
741 void helper_fldcw(CPUX86State
*env
, uint32_t val
)
743 cpu_set_fpuc(env
, val
);
746 void helper_fclex(CPUX86State
*env
)
751 void helper_fwait(CPUX86State
*env
)
753 if (env
->fpus
& FPUS_SE
) {
754 fpu_raise_exception(env
, GETPC());
758 void helper_fninit(CPUX86State
*env
)
762 cpu_set_fpuc(env
, 0x37f);
775 void helper_fbld_ST0(CPUX86State
*env
, target_ulong ptr
)
783 for (i
= 8; i
>= 0; i
--) {
784 v
= cpu_ldub_data_ra(env
, ptr
+ i
, GETPC());
785 val
= (val
* 100) + ((v
>> 4) * 10) + (v
& 0xf);
787 tmp
= int64_to_floatx80(val
, &env
->fp_status
);
788 if (cpu_ldub_data_ra(env
, ptr
+ 9, GETPC()) & 0x80) {
789 tmp
= floatx80_chs(tmp
);
795 void helper_fbst_ST0(CPUX86State
*env
, target_ulong ptr
)
797 uint8_t old_flags
= save_exception_flags(env
);
799 target_ulong mem_ref
, mem_end
;
805 val
= floatx80_to_int64(ST0
, &env
->fp_status
);
807 if (val
>= 1000000000000000000LL || val
<= -1000000000000000000LL) {
808 set_float_exception_flags(float_flag_invalid
, &env
->fp_status
);
809 while (mem_ref
< ptr
+ 7) {
810 cpu_stb_data_ra(env
, mem_ref
++, 0, GETPC());
812 cpu_stb_data_ra(env
, mem_ref
++, 0xc0, GETPC());
813 cpu_stb_data_ra(env
, mem_ref
++, 0xff, GETPC());
814 cpu_stb_data_ra(env
, mem_ref
++, 0xff, GETPC());
815 merge_exception_flags(env
, old_flags
);
818 mem_end
= mem_ref
+ 9;
820 cpu_stb_data_ra(env
, mem_end
, 0x80, GETPC());
823 cpu_stb_data_ra(env
, mem_end
, 0x00, GETPC());
825 while (mem_ref
< mem_end
) {
831 v
= ((v
/ 10) << 4) | (v
% 10);
832 cpu_stb_data_ra(env
, mem_ref
++, v
, GETPC());
834 while (mem_ref
< mem_end
) {
835 cpu_stb_data_ra(env
, mem_ref
++, 0, GETPC());
837 merge_exception_flags(env
, old_flags
);
840 /* 128-bit significand of log(2). */
841 #define ln2_sig_high 0xb17217f7d1cf79abULL
842 #define ln2_sig_low 0xc9e3b39803f2f6afULL
845 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
846 * the interval [-1/64, 1/64].
848 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
849 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
850 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
851 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
852 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
853 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
854 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
855 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
856 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
860 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
861 * are very close to exact floatx80 values.
864 /* The value of 2^t. */
866 /* The value of 2^t - 1. */
870 static const struct f2xm1_data f2xm1_table
[65] = {
871 { make_floatx80_init(0xbfff, 0x8000000000000000ULL
),
872 make_floatx80_init(0x3ffe, 0x8000000000000000ULL
),
873 make_floatx80_init(0xbffe, 0x8000000000000000ULL
) },
874 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL
),
875 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL
),
876 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL
) },
877 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL
),
878 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL
),
879 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL
) },
880 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL
),
881 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL
),
882 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL
) },
883 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL
),
884 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL
),
885 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL
) },
886 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL
),
887 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL
),
888 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL
) },
889 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL
),
890 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL
),
891 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL
) },
892 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL
),
893 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL
),
894 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL
) },
895 { make_floatx80_init(0xbffe, 0xc000000000006530ULL
),
896 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL
),
897 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL
) },
898 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL
),
899 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL
),
900 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL
) },
901 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL
),
902 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL
),
903 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL
) },
904 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL
),
905 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL
),
906 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL
) },
907 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL
),
908 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL
),
909 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL
) },
910 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL
),
911 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL
),
912 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL
) },
913 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL
),
914 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL
),
915 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL
) },
916 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL
),
917 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL
),
918 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL
) },
919 { make_floatx80_init(0xbffe, 0x800000000000227dULL
),
920 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL
),
921 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL
) },
922 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL
),
923 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL
),
924 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL
) },
925 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL
),
926 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL
),
927 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL
) },
928 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL
),
929 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL
),
930 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL
) },
931 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL
),
932 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL
),
933 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL
) },
934 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL
),
935 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL
),
936 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL
) },
937 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL
),
938 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL
),
939 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL
) },
940 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL
),
941 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL
),
942 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL
) },
943 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL
),
944 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL
),
945 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL
) },
946 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL
),
947 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL
),
948 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL
) },
949 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL
),
950 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL
),
951 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL
) },
952 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL
),
953 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL
),
954 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL
) },
955 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL
),
956 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL
),
957 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL
) },
958 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL
),
959 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL
),
960 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL
) },
961 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL
),
962 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL
),
963 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL
) },
964 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL
),
965 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL
),
966 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL
) },
967 { floatx80_zero_init
,
968 make_floatx80_init(0x3fff, 0x8000000000000000ULL
),
969 floatx80_zero_init
},
970 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL
),
971 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL
),
972 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL
) },
973 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL
),
974 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL
),
975 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL
) },
976 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL
),
977 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL
),
978 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL
) },
979 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL
),
980 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL
),
981 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL
) },
982 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL
),
983 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL
),
984 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL
) },
985 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL
),
986 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL
),
987 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL
) },
988 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL
),
989 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL
),
990 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL
) },
991 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL
),
992 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL
),
993 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL
) },
994 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL
),
995 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL
),
996 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL
) },
997 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL
),
998 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL
),
999 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL
) },
1000 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL
),
1001 make_floatx80_init(0x3fff, 0xa27043030c49370aULL
),
1002 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL
) },
1003 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL
),
1004 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL
),
1005 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL
) },
1006 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL
),
1007 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL
),
1008 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL
) },
1009 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL
),
1010 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL
),
1011 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL
) },
1012 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL
),
1013 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL
),
1014 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL
) },
1015 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL
),
1016 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL
),
1017 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL
) },
1018 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL
),
1019 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL
),
1020 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL
) },
1021 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL
),
1022 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL
),
1023 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL
) },
1024 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL
),
1025 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL
),
1026 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL
) },
1027 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL
),
1028 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL
),
1029 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL
) },
1030 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL
),
1031 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL
),
1032 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL
) },
1033 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL
),
1034 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL
),
1035 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL
) },
1036 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL
),
1037 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL
),
1038 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL
) },
1039 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL
),
1040 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL
),
1041 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL
) },
1042 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL
),
1043 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL
),
1044 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL
) },
1045 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL
),
1046 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL
),
1047 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL
) },
1048 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL
),
1049 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL
),
1050 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL
) },
1051 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL
),
1052 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL
),
1053 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL
) },
1054 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL
),
1055 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL
),
1056 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL
) },
1057 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL
),
1058 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL
),
1059 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL
) },
1060 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL
),
1061 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL
),
1062 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL
) },
1063 { make_floatx80_init(0x3fff, 0x8000000000000000ULL
),
1064 make_floatx80_init(0x4000, 0x8000000000000000ULL
),
1065 make_floatx80_init(0x3fff, 0x8000000000000000ULL
) },
1068 void helper_f2xm1(CPUX86State
*env
)
1070 uint8_t old_flags
= save_exception_flags(env
);
1071 uint64_t sig
= extractFloatx80Frac(ST0
);
1072 int32_t exp
= extractFloatx80Exp(ST0
);
1073 bool sign
= extractFloatx80Sign(ST0
);
1075 if (floatx80_invalid_encoding(ST0
)) {
1076 float_raise(float_flag_invalid
, &env
->fp_status
);
1077 ST0
= floatx80_default_nan(&env
->fp_status
);
1078 } else if (floatx80_is_any_nan(ST0
)) {
1079 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1080 float_raise(float_flag_invalid
, &env
->fp_status
);
1081 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1083 } else if (exp
> 0x3fff ||
1084 (exp
== 0x3fff && sig
!= (0x8000000000000000ULL
))) {
1085 /* Out of range for the instruction, treat as invalid. */
1086 float_raise(float_flag_invalid
, &env
->fp_status
);
1087 ST0
= floatx80_default_nan(&env
->fp_status
);
1088 } else if (exp
== 0x3fff) {
1089 /* Argument 1 or -1, exact result 1 or -0.5. */
1091 ST0
= make_floatx80(0xbffe, 0x8000000000000000ULL
);
1093 } else if (exp
< 0x3fb0) {
1094 if (!floatx80_is_zero(ST0
)) {
1096 * Multiplying the argument by an extra-precision version
1097 * of log(2) is sufficiently precise. Zero arguments are
1098 * returned unchanged.
1100 uint64_t sig0
, sig1
, sig2
;
1102 normalizeFloatx80Subnormal(sig
, &exp
, &sig
);
1104 mul128By64To192(ln2_sig_high
, ln2_sig_low
, sig
, &sig0
, &sig1
,
1106 /* This result is inexact. */
1108 ST0
= normalizeRoundAndPackFloatx80(80, sign
, exp
, sig0
, sig1
,
1112 floatx80 tmp
, y
, accum
;
1114 int32_t n
, aexp
, bexp
;
1115 uint64_t asig0
, asig1
, asig2
, bsig0
, bsig1
;
1116 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
1117 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
1118 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
1119 env
->fp_status
.floatx80_rounding_precision
= 80;
1121 /* Find the nearest multiple of 1/32 to the argument. */
1122 tmp
= floatx80_scalbn(ST0
, 5, &env
->fp_status
);
1123 n
= 32 + floatx80_to_int32(tmp
, &env
->fp_status
);
1124 y
= floatx80_sub(ST0
, f2xm1_table
[n
].t
, &env
->fp_status
);
1126 if (floatx80_is_zero(y
)) {
1128 * Use the value of 2^t - 1 from the table, to avoid
1129 * needing to special-case zero as a result of
1130 * multiplication below.
1132 ST0
= f2xm1_table
[n
].t
;
1133 set_float_exception_flags(float_flag_inexact
, &env
->fp_status
);
1134 env
->fp_status
.float_rounding_mode
= save_mode
;
1137 * Compute the lower parts of a polynomial expansion for
1140 accum
= floatx80_mul(f2xm1_coeff_7
, y
, &env
->fp_status
);
1141 accum
= floatx80_add(f2xm1_coeff_6
, accum
, &env
->fp_status
);
1142 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1143 accum
= floatx80_add(f2xm1_coeff_5
, accum
, &env
->fp_status
);
1144 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1145 accum
= floatx80_add(f2xm1_coeff_4
, accum
, &env
->fp_status
);
1146 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1147 accum
= floatx80_add(f2xm1_coeff_3
, accum
, &env
->fp_status
);
1148 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1149 accum
= floatx80_add(f2xm1_coeff_2
, accum
, &env
->fp_status
);
1150 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1151 accum
= floatx80_add(f2xm1_coeff_1
, accum
, &env
->fp_status
);
1152 accum
= floatx80_mul(accum
, y
, &env
->fp_status
);
1153 accum
= floatx80_add(f2xm1_coeff_0_low
, accum
, &env
->fp_status
);
1156 * The full polynomial expansion is f2xm1_coeff_0 + accum
1157 * (where accum has much lower magnitude, and so, in
1158 * particular, carry out of the addition is not possible).
1159 * (This expansion is only accurate to about 70 bits, not
1162 aexp
= extractFloatx80Exp(f2xm1_coeff_0
);
1163 asign
= extractFloatx80Sign(f2xm1_coeff_0
);
1164 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1165 aexp
- extractFloatx80Exp(accum
),
1167 bsig0
= extractFloatx80Frac(f2xm1_coeff_0
);
1169 if (asign
== extractFloatx80Sign(accum
)) {
1170 add128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1172 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1174 /* And thus compute an approximation to 2^y - 1. */
1175 mul128By64To192(asig0
, asig1
, extractFloatx80Frac(y
),
1176 &asig0
, &asig1
, &asig2
);
1177 aexp
+= extractFloatx80Exp(y
) - 0x3ffe;
1178 asign
^= extractFloatx80Sign(y
);
1181 * Multiply this by the precomputed value of 2^t and
1182 * add that of 2^t - 1.
1184 mul128By64To192(asig0
, asig1
,
1185 extractFloatx80Frac(f2xm1_table
[n
].exp2
),
1186 &asig0
, &asig1
, &asig2
);
1187 aexp
+= extractFloatx80Exp(f2xm1_table
[n
].exp2
) - 0x3ffe;
1188 bexp
= extractFloatx80Exp(f2xm1_table
[n
].exp2m1
);
1189 bsig0
= extractFloatx80Frac(f2xm1_table
[n
].exp2m1
);
1192 shift128RightJamming(bsig0
, bsig1
, aexp
- bexp
,
1194 } else if (aexp
< bexp
) {
1195 shift128RightJamming(asig0
, asig1
, bexp
- aexp
,
1199 /* The sign of 2^t - 1 is always that of the result. */
1200 bsign
= extractFloatx80Sign(f2xm1_table
[n
].exp2m1
);
1201 if (asign
== bsign
) {
1202 /* Avoid possible carry out of the addition. */
1203 shift128RightJamming(asig0
, asig1
, 1,
1205 shift128RightJamming(bsig0
, bsig1
, 1,
1208 add128(asig0
, asig1
, bsig0
, bsig1
, &asig0
, &asig1
);
1210 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1214 env
->fp_status
.float_rounding_mode
= save_mode
;
1215 /* This result is inexact. */
1217 ST0
= normalizeRoundAndPackFloatx80(80, asign
, aexp
, asig0
, asig1
,
1221 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1223 merge_exception_flags(env
, old_flags
);
1226 void helper_fptan(CPUX86State
*env
)
1228 double fptemp
= floatx80_to_double(env
, ST0
);
1230 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
1233 fptemp
= tan(fptemp
);
1234 ST0
= double_to_floatx80(env
, fptemp
);
1237 env
->fpus
&= ~0x400; /* C2 <-- 0 */
1238 /* the above code is for |arg| < 2**52 only */
1242 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1243 #define pi_4_exp 0x3ffe
1244 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1245 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1246 #define pi_2_exp 0x3fff
1247 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1248 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1249 #define pi_34_exp 0x4000
1250 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1251 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1252 #define pi_exp 0x4000
1253 #define pi_sig_high 0xc90fdaa22168c234ULL
1254 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1257 * Polynomial coefficients for an approximation to atan(x), with only
1258 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1259 * for some other approximations, no low part is needed for the first
1260 * coefficient here to achieve a sufficiently accurate result, because
1261 * the coefficient in this minimax approximation is very close to
1264 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1265 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1266 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1267 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1268 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1269 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1270 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1272 struct fpatan_data
{
1273 /* High and low parts of atan(x). */
1274 floatx80 atan_high
, atan_low
;
1277 static const struct fpatan_data fpatan_table
[9] = {
1278 { floatx80_zero_init
,
1279 floatx80_zero_init
},
1280 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL
),
1281 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL
) },
1282 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL
),
1283 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL
) },
1284 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL
),
1285 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL
) },
1286 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL
),
1287 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL
) },
1288 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL
),
1289 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL
) },
1290 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL
),
1291 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL
) },
1292 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL
),
1293 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL
) },
1294 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL
),
1295 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL
) },
1298 void helper_fpatan(CPUX86State
*env
)
1300 uint8_t old_flags
= save_exception_flags(env
);
1301 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
1302 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
1303 bool arg0_sign
= extractFloatx80Sign(ST0
);
1304 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
1305 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
1306 bool arg1_sign
= extractFloatx80Sign(ST1
);
1308 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1309 float_raise(float_flag_invalid
, &env
->fp_status
);
1310 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1311 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
1312 float_raise(float_flag_invalid
, &env
->fp_status
);
1313 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
1314 } else if (floatx80_invalid_encoding(ST0
) ||
1315 floatx80_invalid_encoding(ST1
)) {
1316 float_raise(float_flag_invalid
, &env
->fp_status
);
1317 ST1
= floatx80_default_nan(&env
->fp_status
);
1318 } else if (floatx80_is_any_nan(ST0
)) {
1320 } else if (floatx80_is_any_nan(ST1
)) {
1321 /* Pass this NaN through. */
1322 } else if (floatx80_is_zero(ST1
) && !arg0_sign
) {
1323 /* Pass this zero through. */
1324 } else if (((floatx80_is_infinity(ST0
) && !floatx80_is_infinity(ST1
)) ||
1325 arg0_exp
- arg1_exp
>= 80) &&
1328 * Dividing ST1 by ST0 gives the correct result up to
1329 * rounding, and avoids spurious underflow exceptions that
1330 * might result from passing some small values through the
1331 * polynomial approximation, but if a finite nonzero result of
1332 * division is exact, the result of fpatan is still inexact
1333 * (and underflowing where appropriate).
1335 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
1336 env
->fp_status
.floatx80_rounding_precision
= 80;
1337 ST1
= floatx80_div(ST1
, ST0
, &env
->fp_status
);
1338 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1339 if (!floatx80_is_zero(ST1
) &&
1340 !(get_float_exception_flags(&env
->fp_status
) &
1341 float_flag_inexact
)) {
1343 * The mathematical result is very slightly closer to zero
1344 * than this exact result. Round a value with the
1345 * significand adjusted accordingly to get the correct
1346 * exceptions, and possibly an adjusted result depending
1347 * on the rounding mode.
1349 uint64_t sig
= extractFloatx80Frac(ST1
);
1350 int32_t exp
= extractFloatx80Exp(ST1
);
1351 bool sign
= extractFloatx80Sign(ST1
);
1353 normalizeFloatx80Subnormal(sig
, &exp
, &sig
);
1355 ST1
= normalizeRoundAndPackFloatx80(80, sign
, exp
, sig
- 1,
1356 -1, &env
->fp_status
);
1359 /* The result is inexact. */
1360 bool rsign
= arg1_sign
;
1362 uint64_t rsig0
, rsig1
;
1363 if (floatx80_is_zero(ST1
)) {
1365 * ST0 is negative. The result is pi with the sign of
1369 rsig0
= pi_sig_high
;
1371 } else if (floatx80_is_infinity(ST1
)) {
1372 if (floatx80_is_infinity(ST0
)) {
1375 rsig0
= pi_34_sig_high
;
1376 rsig1
= pi_34_sig_low
;
1379 rsig0
= pi_4_sig_high
;
1380 rsig1
= pi_4_sig_low
;
1384 rsig0
= pi_2_sig_high
;
1385 rsig1
= pi_2_sig_low
;
1387 } else if (floatx80_is_zero(ST0
) || arg1_exp
- arg0_exp
>= 80) {
1389 rsig0
= pi_2_sig_high
;
1390 rsig1
= pi_2_sig_low
;
1391 } else if (floatx80_is_infinity(ST0
) || arg0_exp
- arg1_exp
>= 80) {
1392 /* ST0 is negative. */
1394 rsig0
= pi_sig_high
;
1398 * ST0 and ST1 are finite, nonzero and with exponents not
1401 int32_t adj_exp
, num_exp
, den_exp
, xexp
, yexp
, n
, texp
, zexp
, aexp
;
1402 int32_t azexp
, axexp
;
1403 bool adj_sub
, ysign
, zsign
;
1404 uint64_t adj_sig0
, adj_sig1
, num_sig
, den_sig
, xsig0
, xsig1
;
1405 uint64_t msig0
, msig1
, msig2
, remsig0
, remsig1
, remsig2
;
1406 uint64_t ysig0
, ysig1
, tsig
, zsig0
, zsig1
, asig0
, asig1
;
1407 uint64_t azsig0
, azsig1
;
1408 uint64_t azsig2
, azsig3
, axsig0
, axsig1
;
1410 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
1411 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
1412 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
1413 env
->fp_status
.floatx80_rounding_precision
= 80;
1415 if (arg0_exp
== 0) {
1416 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
1418 if (arg1_exp
== 0) {
1419 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
1421 if (arg0_exp
> arg1_exp
||
1422 (arg0_exp
== arg1_exp
&& arg0_sig
>= arg1_sig
)) {
1423 /* Work with abs(ST1) / abs(ST0). */
1429 /* The result is subtracted from pi. */
1431 adj_sig0
= pi_sig_high
;
1432 adj_sig1
= pi_sig_low
;
1435 /* The result is used as-is. */
1442 /* Work with abs(ST0) / abs(ST1). */
1447 /* The result is added to or subtracted from pi/2. */
1449 adj_sig0
= pi_2_sig_high
;
1450 adj_sig1
= pi_2_sig_low
;
1451 adj_sub
= !arg0_sign
;
1455 * Compute x = num/den, where 0 < x <= 1 and x is not too
1458 xexp
= num_exp
- den_exp
+ 0x3ffe;
1461 if (den_sig
<= remsig0
) {
1462 shift128Right(remsig0
, remsig1
, 1, &remsig0
, &remsig1
);
1465 xsig0
= estimateDiv128To64(remsig0
, remsig1
, den_sig
);
1466 mul64To128(den_sig
, xsig0
, &msig0
, &msig1
);
1467 sub128(remsig0
, remsig1
, msig0
, msig1
, &remsig0
, &remsig1
);
1468 while ((int64_t) remsig0
< 0) {
1470 add128(remsig0
, remsig1
, 0, den_sig
, &remsig0
, &remsig1
);
1472 xsig1
= estimateDiv128To64(remsig1
, 0, den_sig
);
1474 * No need to correct any estimation error in xsig1; even
1475 * with such error, it is accurate enough.
1479 * Split x as x = t + y, where t = n/8 is the nearest
1480 * multiple of 1/8 to x.
1482 x8
= normalizeRoundAndPackFloatx80(80, false, xexp
+ 3, xsig0
,
1483 xsig1
, &env
->fp_status
);
1484 n
= floatx80_to_int32(x8
, &env
->fp_status
);
1493 int shift
= clz32(n
) + 32;
1494 texp
= 0x403b - shift
;
1498 sub128(xsig0
, xsig1
, tsig
, 0, &ysig0
, &ysig1
);
1499 if ((int64_t) ysig0
>= 0) {
1505 shift
= clz64(ysig1
) + 64;
1506 yexp
= xexp
- shift
;
1507 shift128Left(ysig0
, ysig1
, shift
,
1511 shift
= clz64(ysig0
);
1512 yexp
= xexp
- shift
;
1513 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1517 sub128(0, 0, ysig0
, ysig1
, &ysig0
, &ysig1
);
1519 shift
= clz64(ysig1
) + 64;
1521 shift
= clz64(ysig0
);
1523 yexp
= xexp
- shift
;
1524 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1528 * t's exponent must be greater than x's because t
1529 * is positive and the nearest multiple of 1/8 to
1530 * x, and if x has a greater exponent, the power
1531 * of 2 with that exponent is also a multiple of
1534 uint64_t usig0
, usig1
;
1535 shift128RightJamming(xsig0
, xsig1
, texp
- xexp
,
1538 sub128(tsig
, 0, usig0
, usig1
, &ysig0
, &ysig1
);
1540 shift
= clz64(ysig1
) + 64;
1542 shift
= clz64(ysig0
);
1544 yexp
= texp
- shift
;
1545 shift128Left(ysig0
, ysig1
, shift
, &ysig0
, &ysig1
);
1550 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1554 if (texp
== 0 || yexp
== 0) {
1560 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1562 int32_t dexp
= texp
+ xexp
- 0x3ffe;
1563 uint64_t dsig0
, dsig1
, dsig2
;
1564 mul128By64To192(xsig0
, xsig1
, tsig
, &dsig0
, &dsig1
, &dsig2
);
1566 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1567 * bit). Add 1 to produce the denominator 1+tx.
1569 shift128RightJamming(dsig0
, dsig1
, 0x3fff - dexp
,
1571 dsig0
|= 0x8000000000000000ULL
;
1576 if (dsig0
<= remsig0
) {
1577 shift128Right(remsig0
, remsig1
, 1, &remsig0
, &remsig1
);
1580 zsig0
= estimateDiv128To64(remsig0
, remsig1
, dsig0
);
1581 mul128By64To192(dsig0
, dsig1
, zsig0
, &msig0
, &msig1
, &msig2
);
1582 sub192(remsig0
, remsig1
, remsig2
, msig0
, msig1
, msig2
,
1583 &remsig0
, &remsig1
, &remsig2
);
1584 while ((int64_t) remsig0
< 0) {
1586 add192(remsig0
, remsig1
, remsig2
, 0, dsig0
, dsig1
,
1587 &remsig0
, &remsig1
, &remsig2
);
1589 zsig1
= estimateDiv128To64(remsig1
, remsig2
, dsig0
);
1590 /* No need to correct any estimation error in zsig1. */
1599 uint64_t z2sig0
, z2sig1
, z2sig2
, z2sig3
;
1601 mul128To256(zsig0
, zsig1
, zsig0
, zsig1
,
1602 &z2sig0
, &z2sig1
, &z2sig2
, &z2sig3
);
1603 z2
= normalizeRoundAndPackFloatx80(80, false,
1604 zexp
+ zexp
- 0x3ffe,
1608 /* Compute the lower parts of the polynomial expansion. */
1609 accum
= floatx80_mul(fpatan_coeff_6
, z2
, &env
->fp_status
);
1610 accum
= floatx80_add(fpatan_coeff_5
, accum
, &env
->fp_status
);
1611 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1612 accum
= floatx80_add(fpatan_coeff_4
, accum
, &env
->fp_status
);
1613 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1614 accum
= floatx80_add(fpatan_coeff_3
, accum
, &env
->fp_status
);
1615 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1616 accum
= floatx80_add(fpatan_coeff_2
, accum
, &env
->fp_status
);
1617 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1618 accum
= floatx80_add(fpatan_coeff_1
, accum
, &env
->fp_status
);
1619 accum
= floatx80_mul(accum
, z2
, &env
->fp_status
);
1622 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1623 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1625 aexp
= extractFloatx80Exp(fpatan_coeff_0
);
1626 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1627 aexp
- extractFloatx80Exp(accum
),
1629 sub128(extractFloatx80Frac(fpatan_coeff_0
), 0, asig0
, asig1
,
1631 /* Multiply by z to compute arctan(z). */
1632 azexp
= aexp
+ zexp
- 0x3ffe;
1633 mul128To256(asig0
, asig1
, zsig0
, zsig1
, &azsig0
, &azsig1
,
1637 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1639 /* z is positive. */
1644 bool low_sign
= extractFloatx80Sign(fpatan_table
[n
].atan_low
);
1645 int32_t low_exp
= extractFloatx80Exp(fpatan_table
[n
].atan_low
);
1647 extractFloatx80Frac(fpatan_table
[n
].atan_low
);
1648 uint64_t low_sig1
= 0;
1649 axexp
= extractFloatx80Exp(fpatan_table
[n
].atan_high
);
1650 axsig0
= extractFloatx80Frac(fpatan_table
[n
].atan_high
);
1652 shift128RightJamming(low_sig0
, low_sig1
, axexp
- low_exp
,
1653 &low_sig0
, &low_sig1
);
1655 sub128(axsig0
, axsig1
, low_sig0
, low_sig1
,
1658 add128(axsig0
, axsig1
, low_sig0
, low_sig1
,
1661 if (azexp
>= axexp
) {
1662 shift128RightJamming(axsig0
, axsig1
, azexp
- axexp
+ 1,
1665 shift128RightJamming(azsig0
, azsig1
, 1,
1668 shift128RightJamming(axsig0
, axsig1
, 1,
1670 shift128RightJamming(azsig0
, azsig1
, axexp
- azexp
+ 1,
1675 sub128(axsig0
, axsig1
, azsig0
, azsig1
,
1678 add128(axsig0
, axsig1
, azsig0
, azsig1
,
1689 * Add or subtract arctan(x) (exponent axexp,
1690 * significand axsig0 and axsig1, positive, not
1691 * necessarily normalized) to the number given by
1692 * adj_exp, adj_sig0 and adj_sig1, according to
1695 if (adj_exp
>= axexp
) {
1696 shift128RightJamming(axsig0
, axsig1
, adj_exp
- axexp
+ 1,
1699 shift128RightJamming(adj_sig0
, adj_sig1
, 1,
1700 &adj_sig0
, &adj_sig1
);
1702 shift128RightJamming(axsig0
, axsig1
, 1,
1704 shift128RightJamming(adj_sig0
, adj_sig1
,
1705 axexp
- adj_exp
+ 1,
1706 &adj_sig0
, &adj_sig1
);
1710 sub128(adj_sig0
, adj_sig1
, axsig0
, axsig1
,
1713 add128(adj_sig0
, adj_sig1
, axsig0
, axsig1
,
1718 env
->fp_status
.float_rounding_mode
= save_mode
;
1719 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
1721 /* This result is inexact. */
1723 ST1
= normalizeRoundAndPackFloatx80(80, rsign
, rexp
,
1724 rsig0
, rsig1
, &env
->fp_status
);
1728 merge_exception_flags(env
, old_flags
);
1731 void helper_fxtract(CPUX86State
*env
)
1733 uint8_t old_flags
= save_exception_flags(env
);
1738 if (floatx80_is_zero(ST0
)) {
1739 /* Easy way to generate -inf and raising division by 0 exception */
1740 ST0
= floatx80_div(floatx80_chs(floatx80_one
), floatx80_zero
,
1744 } else if (floatx80_invalid_encoding(ST0
)) {
1745 float_raise(float_flag_invalid
, &env
->fp_status
);
1746 ST0
= floatx80_default_nan(&env
->fp_status
);
1749 } else if (floatx80_is_any_nan(ST0
)) {
1750 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1751 float_raise(float_flag_invalid
, &env
->fp_status
);
1752 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1756 } else if (floatx80_is_infinity(ST0
)) {
1759 ST1
= floatx80_infinity
;
1763 if (EXPD(temp
) == 0) {
1764 int shift
= clz64(temp
.l
.lower
);
1765 temp
.l
.lower
<<= shift
;
1766 expdif
= 1 - EXPBIAS
- shift
;
1767 float_raise(float_flag_input_denormal
, &env
->fp_status
);
1769 expdif
= EXPD(temp
) - EXPBIAS
;
1771 /* DP exponent bias */
1772 ST0
= int32_to_floatx80(expdif
, &env
->fp_status
);
1777 merge_exception_flags(env
, old_flags
);
1780 static void helper_fprem_common(CPUX86State
*env
, bool mod
)
1782 uint8_t old_flags
= save_exception_flags(env
);
1784 CPU_LDoubleU temp0
, temp1
;
1785 int exp0
, exp1
, expdiff
;
1792 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1793 if (floatx80_is_zero(ST0
) || floatx80_is_zero(ST1
) ||
1794 exp0
== 0x7fff || exp1
== 0x7fff ||
1795 floatx80_invalid_encoding(ST0
) || floatx80_invalid_encoding(ST1
)) {
1796 ST0
= floatx80_modrem(ST0
, ST1
, mod
, "ient
, &env
->fp_status
);
1799 exp0
= 1 - clz64(temp0
.l
.lower
);
1802 exp1
= 1 - clz64(temp1
.l
.lower
);
1804 expdiff
= exp0
- exp1
;
1806 ST0
= floatx80_modrem(ST0
, ST1
, mod
, "ient
, &env
->fp_status
);
1807 env
->fpus
|= (quotient
& 0x4) << (8 - 2); /* (C0) <-- q2 */
1808 env
->fpus
|= (quotient
& 0x2) << (14 - 1); /* (C3) <-- q1 */
1809 env
->fpus
|= (quotient
& 0x1) << (9 - 0); /* (C1) <-- q0 */
1812 * Partial remainder. This choice of how many bits to
1813 * process at once is specified in AMD instruction set
1814 * manuals, and empirically is followed by Intel
1815 * processors as well; it ensures that the final remainder
1816 * operation in a loop does produce the correct low three
1817 * bits of the quotient. AMD manuals specify that the
1818 * flags other than C2 are cleared, and empirically Intel
1819 * processors clear them as well.
1821 int n
= 32 + (expdiff
% 32);
1822 temp1
.d
= floatx80_scalbn(temp1
.d
, expdiff
- n
, &env
->fp_status
);
1823 ST0
= floatx80_mod(ST0
, temp1
.d
, &env
->fp_status
);
1824 env
->fpus
|= 0x400; /* C2 <-- 1 */
1827 merge_exception_flags(env
, old_flags
);
1830 void helper_fprem1(CPUX86State
*env
)
1832 helper_fprem_common(env
, false);
1835 void helper_fprem(CPUX86State
*env
)
1837 helper_fprem_common(env
, true);
1840 /* 128-bit significand of log2(e). */
1841 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1842 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1845 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1846 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1847 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1848 * interval [sqrt(2)/2, sqrt(2)].
1850 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1851 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1852 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1853 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1854 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1855 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1856 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1857 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1858 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1859 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1860 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1863 * Compute an approximation of log2(1+arg), where 1+arg is in the
1864 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1865 * function is called, rounding precision is set to 80 and the
1866 * round-to-nearest mode is in effect. arg must not be exactly zero,
1867 * and must not be so close to zero that underflow might occur.
1869 static void helper_fyl2x_common(CPUX86State
*env
, floatx80 arg
, int32_t *exp
,
1870 uint64_t *sig0
, uint64_t *sig1
)
1872 uint64_t arg0_sig
= extractFloatx80Frac(arg
);
1873 int32_t arg0_exp
= extractFloatx80Exp(arg
);
1874 bool arg0_sign
= extractFloatx80Sign(arg
);
1876 int32_t dexp
, texp
, aexp
;
1877 uint64_t dsig0
, dsig1
, tsig0
, tsig1
, rsig0
, rsig1
, rsig2
;
1878 uint64_t msig0
, msig1
, msig2
, t2sig0
, t2sig1
, t2sig2
, t2sig3
;
1879 uint64_t asig0
, asig1
, asig2
, asig3
, bsig0
, bsig1
;
1883 * Compute an approximation of arg/(2+arg), with extra precision,
1884 * as the argument to a polynomial approximation. The extra
1885 * precision is only needed for the first term of the
1886 * approximation, with subsequent terms being significantly
1887 * smaller; the approximation only uses odd exponents, and the
1888 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1892 shift128RightJamming(arg0_sig
, 0, dexp
- arg0_exp
, &dsig0
, &dsig1
);
1893 sub128(0, 0, dsig0
, dsig1
, &dsig0
, &dsig1
);
1896 shift128RightJamming(arg0_sig
, 0, dexp
- arg0_exp
, &dsig0
, &dsig1
);
1897 dsig0
|= 0x8000000000000000ULL
;
1899 texp
= arg0_exp
- dexp
+ 0x3ffe;
1903 if (dsig0
<= rsig0
) {
1904 shift128Right(rsig0
, rsig1
, 1, &rsig0
, &rsig1
);
1907 tsig0
= estimateDiv128To64(rsig0
, rsig1
, dsig0
);
1908 mul128By64To192(dsig0
, dsig1
, tsig0
, &msig0
, &msig1
, &msig2
);
1909 sub192(rsig0
, rsig1
, rsig2
, msig0
, msig1
, msig2
,
1910 &rsig0
, &rsig1
, &rsig2
);
1911 while ((int64_t) rsig0
< 0) {
1913 add192(rsig0
, rsig1
, rsig2
, 0, dsig0
, dsig1
,
1914 &rsig0
, &rsig1
, &rsig2
);
1916 tsig1
= estimateDiv128To64(rsig1
, rsig2
, dsig0
);
1918 * No need to correct any estimation error in tsig1; even with
1919 * such error, it is accurate enough. Now compute the square of
1920 * that approximation.
1922 mul128To256(tsig0
, tsig1
, tsig0
, tsig1
,
1923 &t2sig0
, &t2sig1
, &t2sig2
, &t2sig3
);
1924 t2
= normalizeRoundAndPackFloatx80(80, false, texp
+ texp
- 0x3ffe,
1925 t2sig0
, t2sig1
, &env
->fp_status
);
1927 /* Compute the lower parts of the polynomial expansion. */
1928 accum
= floatx80_mul(fyl2x_coeff_9
, t2
, &env
->fp_status
);
1929 accum
= floatx80_add(fyl2x_coeff_8
, accum
, &env
->fp_status
);
1930 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1931 accum
= floatx80_add(fyl2x_coeff_7
, accum
, &env
->fp_status
);
1932 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1933 accum
= floatx80_add(fyl2x_coeff_6
, accum
, &env
->fp_status
);
1934 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1935 accum
= floatx80_add(fyl2x_coeff_5
, accum
, &env
->fp_status
);
1936 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1937 accum
= floatx80_add(fyl2x_coeff_4
, accum
, &env
->fp_status
);
1938 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1939 accum
= floatx80_add(fyl2x_coeff_3
, accum
, &env
->fp_status
);
1940 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1941 accum
= floatx80_add(fyl2x_coeff_2
, accum
, &env
->fp_status
);
1942 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1943 accum
= floatx80_add(fyl2x_coeff_1
, accum
, &env
->fp_status
);
1944 accum
= floatx80_mul(accum
, t2
, &env
->fp_status
);
1945 accum
= floatx80_add(fyl2x_coeff_0_low
, accum
, &env
->fp_status
);
1948 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1949 * accum has much lower magnitude, and so, in particular, carry
1950 * out of the addition is not possible), multiplied by t. (This
1951 * expansion is only accurate to about 70 bits, not 128 bits.)
1953 aexp
= extractFloatx80Exp(fyl2x_coeff_0
);
1954 asign
= extractFloatx80Sign(fyl2x_coeff_0
);
1955 shift128RightJamming(extractFloatx80Frac(accum
), 0,
1956 aexp
- extractFloatx80Exp(accum
),
1958 bsig0
= extractFloatx80Frac(fyl2x_coeff_0
);
1960 if (asign
== extractFloatx80Sign(accum
)) {
1961 add128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1963 sub128(bsig0
, bsig1
, asig0
, asig1
, &asig0
, &asig1
);
1965 /* Multiply by t to compute the required result. */
1966 mul128To256(asig0
, asig1
, tsig0
, tsig1
,
1967 &asig0
, &asig1
, &asig2
, &asig3
);
1968 aexp
+= texp
- 0x3ffe;
1974 void helper_fyl2xp1(CPUX86State
*env
)
1976 uint8_t old_flags
= save_exception_flags(env
);
1977 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
1978 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
1979 bool arg0_sign
= extractFloatx80Sign(ST0
);
1980 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
1981 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
1982 bool arg1_sign
= extractFloatx80Sign(ST1
);
1984 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
1985 float_raise(float_flag_invalid
, &env
->fp_status
);
1986 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
1987 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
1988 float_raise(float_flag_invalid
, &env
->fp_status
);
1989 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
1990 } else if (floatx80_invalid_encoding(ST0
) ||
1991 floatx80_invalid_encoding(ST1
)) {
1992 float_raise(float_flag_invalid
, &env
->fp_status
);
1993 ST1
= floatx80_default_nan(&env
->fp_status
);
1994 } else if (floatx80_is_any_nan(ST0
)) {
1996 } else if (floatx80_is_any_nan(ST1
)) {
1997 /* Pass this NaN through. */
1998 } else if (arg0_exp
> 0x3ffd ||
1999 (arg0_exp
== 0x3ffd && arg0_sig
> (arg0_sign
?
2000 0x95f619980c4336f7ULL
:
2001 0xd413cccfe7799211ULL
))) {
2003 * Out of range for the instruction (ST0 must have absolute
2004 * value less than 1 - sqrt(2)/2 = 0.292..., according to
2005 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2006 * to sqrt(2) - 1, which we allow here), treat as invalid.
2008 float_raise(float_flag_invalid
, &env
->fp_status
);
2009 ST1
= floatx80_default_nan(&env
->fp_status
);
2010 } else if (floatx80_is_zero(ST0
) || floatx80_is_zero(ST1
) ||
2011 arg1_exp
== 0x7fff) {
2013 * One argument is zero, or multiplying by infinity; correct
2014 * result is exact and can be obtained by multiplying the
2017 ST1
= floatx80_mul(ST0
, ST1
, &env
->fp_status
);
2018 } else if (arg0_exp
< 0x3fb0) {
2020 * Multiplying both arguments and an extra-precision version
2021 * of log2(e) is sufficiently precise.
2023 uint64_t sig0
, sig1
, sig2
;
2025 if (arg0_exp
== 0) {
2026 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
2028 if (arg1_exp
== 0) {
2029 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2031 mul128By64To192(log2_e_sig_high
, log2_e_sig_low
, arg0_sig
,
2032 &sig0
, &sig1
, &sig2
);
2034 mul128By64To192(sig0
, sig1
, arg1_sig
, &sig0
, &sig1
, &sig2
);
2035 exp
+= arg1_exp
- 0x3ffe;
2036 /* This result is inexact. */
2038 ST1
= normalizeRoundAndPackFloatx80(80, arg0_sign
^ arg1_sign
, exp
,
2039 sig0
, sig1
, &env
->fp_status
);
2042 uint64_t asig0
, asig1
, asig2
;
2043 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
2044 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
2045 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
2046 env
->fp_status
.floatx80_rounding_precision
= 80;
2048 helper_fyl2x_common(env
, ST0
, &aexp
, &asig0
, &asig1
);
2050 * Multiply by the second argument to compute the required
2053 if (arg1_exp
== 0) {
2054 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2056 mul128By64To192(asig0
, asig1
, arg1_sig
, &asig0
, &asig1
, &asig2
);
2057 aexp
+= arg1_exp
- 0x3ffe;
2058 /* This result is inexact. */
2060 env
->fp_status
.float_rounding_mode
= save_mode
;
2061 ST1
= normalizeRoundAndPackFloatx80(80, arg0_sign
^ arg1_sign
, aexp
,
2062 asig0
, asig1
, &env
->fp_status
);
2063 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
2066 merge_exception_flags(env
, old_flags
);
2069 void helper_fyl2x(CPUX86State
*env
)
2071 uint8_t old_flags
= save_exception_flags(env
);
2072 uint64_t arg0_sig
= extractFloatx80Frac(ST0
);
2073 int32_t arg0_exp
= extractFloatx80Exp(ST0
);
2074 bool arg0_sign
= extractFloatx80Sign(ST0
);
2075 uint64_t arg1_sig
= extractFloatx80Frac(ST1
);
2076 int32_t arg1_exp
= extractFloatx80Exp(ST1
);
2077 bool arg1_sign
= extractFloatx80Sign(ST1
);
2079 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2080 float_raise(float_flag_invalid
, &env
->fp_status
);
2081 ST1
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2082 } else if (floatx80_is_signaling_nan(ST1
, &env
->fp_status
)) {
2083 float_raise(float_flag_invalid
, &env
->fp_status
);
2084 ST1
= floatx80_silence_nan(ST1
, &env
->fp_status
);
2085 } else if (floatx80_invalid_encoding(ST0
) ||
2086 floatx80_invalid_encoding(ST1
)) {
2087 float_raise(float_flag_invalid
, &env
->fp_status
);
2088 ST1
= floatx80_default_nan(&env
->fp_status
);
2089 } else if (floatx80_is_any_nan(ST0
)) {
2091 } else if (floatx80_is_any_nan(ST1
)) {
2092 /* Pass this NaN through. */
2093 } else if (arg0_sign
&& !floatx80_is_zero(ST0
)) {
2094 float_raise(float_flag_invalid
, &env
->fp_status
);
2095 ST1
= floatx80_default_nan(&env
->fp_status
);
2096 } else if (floatx80_is_infinity(ST1
)) {
2097 FloatRelation cmp
= floatx80_compare(ST0
, floatx80_one
,
2100 case float_relation_less
:
2101 ST1
= floatx80_chs(ST1
);
2103 case float_relation_greater
:
2104 /* Result is infinity of the same sign as ST1. */
2107 float_raise(float_flag_invalid
, &env
->fp_status
);
2108 ST1
= floatx80_default_nan(&env
->fp_status
);
2111 } else if (floatx80_is_infinity(ST0
)) {
2112 if (floatx80_is_zero(ST1
)) {
2113 float_raise(float_flag_invalid
, &env
->fp_status
);
2114 ST1
= floatx80_default_nan(&env
->fp_status
);
2115 } else if (arg1_sign
) {
2116 ST1
= floatx80_chs(ST0
);
2120 } else if (floatx80_is_zero(ST0
)) {
2121 if (floatx80_is_zero(ST1
)) {
2122 float_raise(float_flag_invalid
, &env
->fp_status
);
2123 ST1
= floatx80_default_nan(&env
->fp_status
);
2125 /* Result is infinity with opposite sign to ST1. */
2126 float_raise(float_flag_divbyzero
, &env
->fp_status
);
2127 ST1
= make_floatx80(arg1_sign
? 0x7fff : 0xffff,
2128 0x8000000000000000ULL
);
2130 } else if (floatx80_is_zero(ST1
)) {
2131 if (floatx80_lt(ST0
, floatx80_one
, &env
->fp_status
)) {
2132 ST1
= floatx80_chs(ST1
);
2134 /* Otherwise, ST1 is already the correct result. */
2135 } else if (floatx80_eq(ST0
, floatx80_one
, &env
->fp_status
)) {
2137 ST1
= floatx80_chs(floatx80_zero
);
2139 ST1
= floatx80_zero
;
2144 FloatRoundMode save_mode
= env
->fp_status
.float_rounding_mode
;
2145 signed char save_prec
= env
->fp_status
.floatx80_rounding_precision
;
2146 env
->fp_status
.float_rounding_mode
= float_round_nearest_even
;
2147 env
->fp_status
.floatx80_rounding_precision
= 80;
2149 if (arg0_exp
== 0) {
2150 normalizeFloatx80Subnormal(arg0_sig
, &arg0_exp
, &arg0_sig
);
2152 if (arg1_exp
== 0) {
2153 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2155 int_exp
= arg0_exp
- 0x3fff;
2156 if (arg0_sig
> 0xb504f333f9de6484ULL
) {
2159 arg0_m1
= floatx80_sub(floatx80_scalbn(ST0
, -int_exp
,
2161 floatx80_one
, &env
->fp_status
);
2162 if (floatx80_is_zero(arg0_m1
)) {
2163 /* Exact power of 2; multiply by ST1. */
2164 env
->fp_status
.float_rounding_mode
= save_mode
;
2165 ST1
= floatx80_mul(int32_to_floatx80(int_exp
, &env
->fp_status
),
2166 ST1
, &env
->fp_status
);
2168 bool asign
= extractFloatx80Sign(arg0_m1
);
2170 uint64_t asig0
, asig1
, asig2
;
2171 helper_fyl2x_common(env
, arg0_m1
, &aexp
, &asig0
, &asig1
);
2173 bool isign
= (int_exp
< 0);
2177 int_exp
= isign
? -int_exp
: int_exp
;
2178 shift
= clz32(int_exp
) + 32;
2181 iexp
= 0x403e - shift
;
2182 shift128RightJamming(asig0
, asig1
, iexp
- aexp
,
2184 if (asign
== isign
) {
2185 add128(isig
, 0, asig0
, asig1
, &asig0
, &asig1
);
2187 sub128(isig
, 0, asig0
, asig1
, &asig0
, &asig1
);
2193 * Multiply by the second argument to compute the required
2196 if (arg1_exp
== 0) {
2197 normalizeFloatx80Subnormal(arg1_sig
, &arg1_exp
, &arg1_sig
);
2199 mul128By64To192(asig0
, asig1
, arg1_sig
, &asig0
, &asig1
, &asig2
);
2200 aexp
+= arg1_exp
- 0x3ffe;
2201 /* This result is inexact. */
2203 env
->fp_status
.float_rounding_mode
= save_mode
;
2204 ST1
= normalizeRoundAndPackFloatx80(80, asign
^ arg1_sign
, aexp
,
2205 asig0
, asig1
, &env
->fp_status
);
2208 env
->fp_status
.floatx80_rounding_precision
= save_prec
;
2211 merge_exception_flags(env
, old_flags
);
2214 void helper_fsqrt(CPUX86State
*env
)
2216 uint8_t old_flags
= save_exception_flags(env
);
2217 if (floatx80_is_neg(ST0
)) {
2218 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2221 ST0
= floatx80_sqrt(ST0
, &env
->fp_status
);
2222 merge_exception_flags(env
, old_flags
);
2225 void helper_fsincos(CPUX86State
*env
)
2227 double fptemp
= floatx80_to_double(env
, ST0
);
2229 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2232 ST0
= double_to_floatx80(env
, sin(fptemp
));
2234 ST0
= double_to_floatx80(env
, cos(fptemp
));
2235 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2236 /* the above code is for |arg| < 2**63 only */
2240 void helper_frndint(CPUX86State
*env
)
2242 uint8_t old_flags
= save_exception_flags(env
);
2243 ST0
= floatx80_round_to_int(ST0
, &env
->fp_status
);
2244 merge_exception_flags(env
, old_flags
);
2247 void helper_fscale(CPUX86State
*env
)
2249 uint8_t old_flags
= save_exception_flags(env
);
2250 if (floatx80_invalid_encoding(ST1
) || floatx80_invalid_encoding(ST0
)) {
2251 float_raise(float_flag_invalid
, &env
->fp_status
);
2252 ST0
= floatx80_default_nan(&env
->fp_status
);
2253 } else if (floatx80_is_any_nan(ST1
)) {
2254 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2255 float_raise(float_flag_invalid
, &env
->fp_status
);
2258 if (floatx80_is_signaling_nan(ST0
, &env
->fp_status
)) {
2259 float_raise(float_flag_invalid
, &env
->fp_status
);
2260 ST0
= floatx80_silence_nan(ST0
, &env
->fp_status
);
2262 } else if (floatx80_is_infinity(ST1
) &&
2263 !floatx80_invalid_encoding(ST0
) &&
2264 !floatx80_is_any_nan(ST0
)) {
2265 if (floatx80_is_neg(ST1
)) {
2266 if (floatx80_is_infinity(ST0
)) {
2267 float_raise(float_flag_invalid
, &env
->fp_status
);
2268 ST0
= floatx80_default_nan(&env
->fp_status
);
2270 ST0
= (floatx80_is_neg(ST0
) ?
2271 floatx80_chs(floatx80_zero
) :
2275 if (floatx80_is_zero(ST0
)) {
2276 float_raise(float_flag_invalid
, &env
->fp_status
);
2277 ST0
= floatx80_default_nan(&env
->fp_status
);
2279 ST0
= (floatx80_is_neg(ST0
) ?
2280 floatx80_chs(floatx80_infinity
) :
2286 signed char save
= env
->fp_status
.floatx80_rounding_precision
;
2287 uint8_t save_flags
= get_float_exception_flags(&env
->fp_status
);
2288 set_float_exception_flags(0, &env
->fp_status
);
2289 n
= floatx80_to_int32_round_to_zero(ST1
, &env
->fp_status
);
2290 set_float_exception_flags(save_flags
, &env
->fp_status
);
2291 env
->fp_status
.floatx80_rounding_precision
= 80;
2292 ST0
= floatx80_scalbn(ST0
, n
, &env
->fp_status
);
2293 env
->fp_status
.floatx80_rounding_precision
= save
;
2295 merge_exception_flags(env
, old_flags
);
2298 void helper_fsin(CPUX86State
*env
)
2300 double fptemp
= floatx80_to_double(env
, ST0
);
2302 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2305 ST0
= double_to_floatx80(env
, sin(fptemp
));
2306 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2307 /* the above code is for |arg| < 2**53 only */
2311 void helper_fcos(CPUX86State
*env
)
2313 double fptemp
= floatx80_to_double(env
, ST0
);
2315 if ((fptemp
> MAXTAN
) || (fptemp
< -MAXTAN
)) {
2318 ST0
= double_to_floatx80(env
, cos(fptemp
));
2319 env
->fpus
&= ~0x400; /* C2 <-- 0 */
2320 /* the above code is for |arg| < 2**63 only */
2324 void helper_fxam_ST0(CPUX86State
*env
)
2331 env
->fpus
&= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2333 env
->fpus
|= 0x200; /* C1 <-- 1 */
2336 if (env
->fptags
[env
->fpstt
]) {
2337 env
->fpus
|= 0x4100; /* Empty */
2341 expdif
= EXPD(temp
);
2342 if (expdif
== MAXEXPD
) {
2343 if (MANTD(temp
) == 0x8000000000000000ULL
) {
2344 env
->fpus
|= 0x500; /* Infinity */
2345 } else if (MANTD(temp
) & 0x8000000000000000ULL
) {
2346 env
->fpus
|= 0x100; /* NaN */
2348 } else if (expdif
== 0) {
2349 if (MANTD(temp
) == 0) {
2350 env
->fpus
|= 0x4000; /* Zero */
2352 env
->fpus
|= 0x4400; /* Denormal */
2354 } else if (MANTD(temp
) & 0x8000000000000000ULL
) {
2359 static void do_fstenv(CPUX86State
*env
, target_ulong ptr
, int data32
,
2362 int fpus
, fptag
, exp
, i
;
2366 fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
2368 for (i
= 7; i
>= 0; i
--) {
2370 if (env
->fptags
[i
]) {
2373 tmp
.d
= env
->fpregs
[i
].d
;
2376 if (exp
== 0 && mant
== 0) {
2379 } else if (exp
== 0 || exp
== MAXEXPD
2380 || (mant
& (1LL << 63)) == 0) {
2381 /* NaNs, infinity, denormal */
2388 cpu_stl_data_ra(env
, ptr
, env
->fpuc
, retaddr
);
2389 cpu_stl_data_ra(env
, ptr
+ 4, fpus
, retaddr
);
2390 cpu_stl_data_ra(env
, ptr
+ 8, fptag
, retaddr
);
2391 cpu_stl_data_ra(env
, ptr
+ 12, 0, retaddr
); /* fpip */
2392 cpu_stl_data_ra(env
, ptr
+ 16, 0, retaddr
); /* fpcs */
2393 cpu_stl_data_ra(env
, ptr
+ 20, 0, retaddr
); /* fpoo */
2394 cpu_stl_data_ra(env
, ptr
+ 24, 0, retaddr
); /* fpos */
2397 cpu_stw_data_ra(env
, ptr
, env
->fpuc
, retaddr
);
2398 cpu_stw_data_ra(env
, ptr
+ 2, fpus
, retaddr
);
2399 cpu_stw_data_ra(env
, ptr
+ 4, fptag
, retaddr
);
2400 cpu_stw_data_ra(env
, ptr
+ 6, 0, retaddr
);
2401 cpu_stw_data_ra(env
, ptr
+ 8, 0, retaddr
);
2402 cpu_stw_data_ra(env
, ptr
+ 10, 0, retaddr
);
2403 cpu_stw_data_ra(env
, ptr
+ 12, 0, retaddr
);
2407 void helper_fstenv(CPUX86State
*env
, target_ulong ptr
, int data32
)
2409 do_fstenv(env
, ptr
, data32
, GETPC());
2412 static void cpu_set_fpus(CPUX86State
*env
, uint16_t fpus
)
2414 env
->fpstt
= (fpus
>> 11) & 7;
2415 env
->fpus
= fpus
& ~0x3800 & ~FPUS_B
;
2416 env
->fpus
|= env
->fpus
& FPUS_SE
? FPUS_B
: 0;
2417 #if !defined(CONFIG_USER_ONLY)
2418 if (!(env
->fpus
& FPUS_SE
)) {
2420 * Here the processor deasserts FERR#; in response, the chipset deasserts
2428 static void do_fldenv(CPUX86State
*env
, target_ulong ptr
, int data32
,
2434 cpu_set_fpuc(env
, cpu_lduw_data_ra(env
, ptr
, retaddr
));
2435 fpus
= cpu_lduw_data_ra(env
, ptr
+ 4, retaddr
);
2436 fptag
= cpu_lduw_data_ra(env
, ptr
+ 8, retaddr
);
2438 cpu_set_fpuc(env
, cpu_lduw_data_ra(env
, ptr
, retaddr
));
2439 fpus
= cpu_lduw_data_ra(env
, ptr
+ 2, retaddr
);
2440 fptag
= cpu_lduw_data_ra(env
, ptr
+ 4, retaddr
);
2442 cpu_set_fpus(env
, fpus
);
2443 for (i
= 0; i
< 8; i
++) {
2444 env
->fptags
[i
] = ((fptag
& 3) == 3);
2449 void helper_fldenv(CPUX86State
*env
, target_ulong ptr
, int data32
)
2451 do_fldenv(env
, ptr
, data32
, GETPC());
2454 void helper_fsave(CPUX86State
*env
, target_ulong ptr
, int data32
)
2459 do_fstenv(env
, ptr
, data32
, GETPC());
2461 ptr
+= (14 << data32
);
2462 for (i
= 0; i
< 8; i
++) {
2464 helper_fstt(env
, tmp
, ptr
, GETPC());
2471 cpu_set_fpuc(env
, 0x37f);
2482 void helper_frstor(CPUX86State
*env
, target_ulong ptr
, int data32
)
2487 do_fldenv(env
, ptr
, data32
, GETPC());
2488 ptr
+= (14 << data32
);
2490 for (i
= 0; i
< 8; i
++) {
2491 tmp
= helper_fldt(env
, ptr
, GETPC());
2497 #if defined(CONFIG_USER_ONLY)
2498 void cpu_x86_fsave(CPUX86State
*env
, target_ulong ptr
, int data32
)
2500 helper_fsave(env
, ptr
, data32
);
2503 void cpu_x86_frstor(CPUX86State
*env
, target_ulong ptr
, int data32
)
2505 helper_frstor(env
, ptr
, data32
);
2509 #define XO(X) offsetof(X86XSaveArea, X)
2511 static void do_xsave_fpu(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2516 fpus
= (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
2518 for (i
= 0; i
< 8; i
++) {
2519 fptag
|= (env
->fptags
[i
] << i
);
2522 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.fcw
), env
->fpuc
, ra
);
2523 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.fsw
), fpus
, ra
);
2524 cpu_stw_data_ra(env
, ptr
+ XO(legacy
.ftw
), fptag
^ 0xff, ra
);
2526 /* In 32-bit mode this is eip, sel, dp, sel.
2527 In 64-bit mode this is rip, rdp.
2528 But in either case we don't write actual data, just zeros. */
2529 cpu_stq_data_ra(env
, ptr
+ XO(legacy
.fpip
), 0, ra
); /* eip+sel; rip */
2530 cpu_stq_data_ra(env
, ptr
+ XO(legacy
.fpdp
), 0, ra
); /* edp+sel; rdp */
2532 addr
= ptr
+ XO(legacy
.fpregs
);
2533 for (i
= 0; i
< 8; i
++) {
2534 floatx80 tmp
= ST(i
);
2535 helper_fstt(env
, tmp
, addr
, ra
);
2540 static void do_xsave_mxcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2542 update_mxcsr_from_sse_status(env
);
2543 cpu_stl_data_ra(env
, ptr
+ XO(legacy
.mxcsr
), env
->mxcsr
, ra
);
2544 cpu_stl_data_ra(env
, ptr
+ XO(legacy
.mxcsr_mask
), 0x0000ffff, ra
);
2547 static void do_xsave_sse(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2552 if (env
->hflags
& HF_CS64_MASK
) {
2558 addr
= ptr
+ XO(legacy
.xmm_regs
);
2559 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2560 cpu_stq_data_ra(env
, addr
, env
->xmm_regs
[i
].ZMM_Q(0), ra
);
2561 cpu_stq_data_ra(env
, addr
+ 8, env
->xmm_regs
[i
].ZMM_Q(1), ra
);
2566 static void do_xsave_bndregs(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2568 target_ulong addr
= ptr
+ offsetof(XSaveBNDREG
, bnd_regs
);
2571 for (i
= 0; i
< 4; i
++, addr
+= 16) {
2572 cpu_stq_data_ra(env
, addr
, env
->bnd_regs
[i
].lb
, ra
);
2573 cpu_stq_data_ra(env
, addr
+ 8, env
->bnd_regs
[i
].ub
, ra
);
2577 static void do_xsave_bndcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2579 cpu_stq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.cfgu
),
2580 env
->bndcs_regs
.cfgu
, ra
);
2581 cpu_stq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.sts
),
2582 env
->bndcs_regs
.sts
, ra
);
2585 static void do_xsave_pkru(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2587 cpu_stq_data_ra(env
, ptr
, env
->pkru
, ra
);
2590 void helper_fxsave(CPUX86State
*env
, target_ulong ptr
)
2592 uintptr_t ra
= GETPC();
2594 /* The operand must be 16 byte aligned */
2596 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2599 do_xsave_fpu(env
, ptr
, ra
);
2601 if (env
->cr
[4] & CR4_OSFXSR_MASK
) {
2602 do_xsave_mxcsr(env
, ptr
, ra
);
2603 /* Fast FXSAVE leaves out the XMM registers */
2604 if (!(env
->efer
& MSR_EFER_FFXSR
)
2605 || (env
->hflags
& HF_CPL_MASK
)
2606 || !(env
->hflags
& HF_LMA_MASK
)) {
2607 do_xsave_sse(env
, ptr
, ra
);
2612 static uint64_t get_xinuse(CPUX86State
*env
)
2614 uint64_t inuse
= -1;
2616 /* For the most part, we don't track XINUSE. We could calculate it
2617 here for all components, but it's probably less work to simply
2618 indicate in use. That said, the state of BNDREGS is important
2619 enough to track in HFLAGS, so we might as well use that here. */
2620 if ((env
->hflags
& HF_MPX_IU_MASK
) == 0) {
2621 inuse
&= ~XSTATE_BNDREGS_MASK
;
2626 static void do_xsave(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
,
2627 uint64_t inuse
, uint64_t opt
, uintptr_t ra
)
2629 uint64_t old_bv
, new_bv
;
2631 /* The OS must have enabled XSAVE. */
2632 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2633 raise_exception_ra(env
, EXCP06_ILLOP
, ra
);
2636 /* The operand must be 64 byte aligned. */
2638 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2641 /* Never save anything not enabled by XCR0. */
2645 if (opt
& XSTATE_FP_MASK
) {
2646 do_xsave_fpu(env
, ptr
, ra
);
2648 if (rfbm
& XSTATE_SSE_MASK
) {
2649 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2650 do_xsave_mxcsr(env
, ptr
, ra
);
2652 if (opt
& XSTATE_SSE_MASK
) {
2653 do_xsave_sse(env
, ptr
, ra
);
2655 if (opt
& XSTATE_BNDREGS_MASK
) {
2656 do_xsave_bndregs(env
, ptr
+ XO(bndreg_state
), ra
);
2658 if (opt
& XSTATE_BNDCSR_MASK
) {
2659 do_xsave_bndcsr(env
, ptr
+ XO(bndcsr_state
), ra
);
2661 if (opt
& XSTATE_PKRU_MASK
) {
2662 do_xsave_pkru(env
, ptr
+ XO(pkru_state
), ra
);
2665 /* Update the XSTATE_BV field. */
2666 old_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), ra
);
2667 new_bv
= (old_bv
& ~rfbm
) | (inuse
& rfbm
);
2668 cpu_stq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), new_bv
, ra
);
2671 void helper_xsave(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2673 do_xsave(env
, ptr
, rfbm
, get_xinuse(env
), -1, GETPC());
2676 void helper_xsaveopt(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2678 uint64_t inuse
= get_xinuse(env
);
2679 do_xsave(env
, ptr
, rfbm
, inuse
, inuse
, GETPC());
2682 static void do_xrstor_fpu(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2684 int i
, fpuc
, fpus
, fptag
;
2687 fpuc
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.fcw
), ra
);
2688 fpus
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.fsw
), ra
);
2689 fptag
= cpu_lduw_data_ra(env
, ptr
+ XO(legacy
.ftw
), ra
);
2690 cpu_set_fpuc(env
, fpuc
);
2691 cpu_set_fpus(env
, fpus
);
2693 for (i
= 0; i
< 8; i
++) {
2694 env
->fptags
[i
] = ((fptag
>> i
) & 1);
2697 addr
= ptr
+ XO(legacy
.fpregs
);
2698 for (i
= 0; i
< 8; i
++) {
2699 floatx80 tmp
= helper_fldt(env
, addr
, ra
);
2705 static void do_xrstor_mxcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2707 cpu_set_mxcsr(env
, cpu_ldl_data_ra(env
, ptr
+ XO(legacy
.mxcsr
), ra
));
2710 static void do_xrstor_sse(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2715 if (env
->hflags
& HF_CS64_MASK
) {
2721 addr
= ptr
+ XO(legacy
.xmm_regs
);
2722 for (i
= 0; i
< nb_xmm_regs
; i
++) {
2723 env
->xmm_regs
[i
].ZMM_Q(0) = cpu_ldq_data_ra(env
, addr
, ra
);
2724 env
->xmm_regs
[i
].ZMM_Q(1) = cpu_ldq_data_ra(env
, addr
+ 8, ra
);
2729 static void do_xrstor_bndregs(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2731 target_ulong addr
= ptr
+ offsetof(XSaveBNDREG
, bnd_regs
);
2734 for (i
= 0; i
< 4; i
++, addr
+= 16) {
2735 env
->bnd_regs
[i
].lb
= cpu_ldq_data_ra(env
, addr
, ra
);
2736 env
->bnd_regs
[i
].ub
= cpu_ldq_data_ra(env
, addr
+ 8, ra
);
2740 static void do_xrstor_bndcsr(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2742 /* FIXME: Extend highest implemented bit of linear address. */
2743 env
->bndcs_regs
.cfgu
2744 = cpu_ldq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.cfgu
), ra
);
2746 = cpu_ldq_data_ra(env
, ptr
+ offsetof(XSaveBNDCSR
, bndcsr
.sts
), ra
);
2749 static void do_xrstor_pkru(CPUX86State
*env
, target_ulong ptr
, uintptr_t ra
)
2751 env
->pkru
= cpu_ldq_data_ra(env
, ptr
, ra
);
2754 void helper_fxrstor(CPUX86State
*env
, target_ulong ptr
)
2756 uintptr_t ra
= GETPC();
2758 /* The operand must be 16 byte aligned */
2760 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2763 do_xrstor_fpu(env
, ptr
, ra
);
2765 if (env
->cr
[4] & CR4_OSFXSR_MASK
) {
2766 do_xrstor_mxcsr(env
, ptr
, ra
);
2767 /* Fast FXRSTOR leaves out the XMM registers */
2768 if (!(env
->efer
& MSR_EFER_FFXSR
)
2769 || (env
->hflags
& HF_CPL_MASK
)
2770 || !(env
->hflags
& HF_LMA_MASK
)) {
2771 do_xrstor_sse(env
, ptr
, ra
);
2776 #if defined(CONFIG_USER_ONLY)
2777 void cpu_x86_fxsave(CPUX86State
*env
, target_ulong ptr
)
2779 helper_fxsave(env
, ptr
);
2782 void cpu_x86_fxrstor(CPUX86State
*env
, target_ulong ptr
)
2784 helper_fxrstor(env
, ptr
);
2788 void helper_xrstor(CPUX86State
*env
, target_ulong ptr
, uint64_t rfbm
)
2790 uintptr_t ra
= GETPC();
2791 uint64_t xstate_bv
, xcomp_bv
, reserve0
;
2795 /* The OS must have enabled XSAVE. */
2796 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2797 raise_exception_ra(env
, EXCP06_ILLOP
, ra
);
2800 /* The operand must be 64 byte aligned. */
2802 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2805 xstate_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xstate_bv
), ra
);
2807 if ((int64_t)xstate_bv
< 0) {
2808 /* FIXME: Compact form. */
2809 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2812 /* Standard form. */
2814 /* The XSTATE_BV field must not set bits not present in XCR0. */
2815 if (xstate_bv
& ~env
->xcr0
) {
2816 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2819 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
2820 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2821 describes only XCOMP_BV, but the description of the standard form
2822 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2823 includes the next 64-bit field. */
2824 xcomp_bv
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.xcomp_bv
), ra
);
2825 reserve0
= cpu_ldq_data_ra(env
, ptr
+ XO(header
.reserve0
), ra
);
2826 if (xcomp_bv
|| reserve0
) {
2827 raise_exception_ra(env
, EXCP0D_GPF
, ra
);
2830 if (rfbm
& XSTATE_FP_MASK
) {
2831 if (xstate_bv
& XSTATE_FP_MASK
) {
2832 do_xrstor_fpu(env
, ptr
, ra
);
2835 memset(env
->fpregs
, 0, sizeof(env
->fpregs
));
2838 if (rfbm
& XSTATE_SSE_MASK
) {
2839 /* Note that the standard form of XRSTOR loads MXCSR from memory
2840 whether or not the XSTATE_BV bit is set. */
2841 do_xrstor_mxcsr(env
, ptr
, ra
);
2842 if (xstate_bv
& XSTATE_SSE_MASK
) {
2843 do_xrstor_sse(env
, ptr
, ra
);
2845 /* ??? When AVX is implemented, we may have to be more
2846 selective in the clearing. */
2847 memset(env
->xmm_regs
, 0, sizeof(env
->xmm_regs
));
2850 if (rfbm
& XSTATE_BNDREGS_MASK
) {
2851 if (xstate_bv
& XSTATE_BNDREGS_MASK
) {
2852 do_xrstor_bndregs(env
, ptr
+ XO(bndreg_state
), ra
);
2853 env
->hflags
|= HF_MPX_IU_MASK
;
2855 memset(env
->bnd_regs
, 0, sizeof(env
->bnd_regs
));
2856 env
->hflags
&= ~HF_MPX_IU_MASK
;
2859 if (rfbm
& XSTATE_BNDCSR_MASK
) {
2860 if (xstate_bv
& XSTATE_BNDCSR_MASK
) {
2861 do_xrstor_bndcsr(env
, ptr
+ XO(bndcsr_state
), ra
);
2863 memset(&env
->bndcs_regs
, 0, sizeof(env
->bndcs_regs
));
2865 cpu_sync_bndcs_hflags(env
);
2867 if (rfbm
& XSTATE_PKRU_MASK
) {
2868 uint64_t old_pkru
= env
->pkru
;
2869 if (xstate_bv
& XSTATE_PKRU_MASK
) {
2870 do_xrstor_pkru(env
, ptr
+ XO(pkru_state
), ra
);
2874 if (env
->pkru
!= old_pkru
) {
2875 CPUState
*cs
= env_cpu(env
);
2883 uint64_t helper_xgetbv(CPUX86State
*env
, uint32_t ecx
)
2885 /* The OS must have enabled XSAVE. */
2886 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2887 raise_exception_ra(env
, EXCP06_ILLOP
, GETPC());
2894 if (env
->features
[FEAT_XSAVE
] & CPUID_XSAVE_XGETBV1
) {
2895 return env
->xcr0
& get_xinuse(env
);
2899 raise_exception_ra(env
, EXCP0D_GPF
, GETPC());
2902 void helper_xsetbv(CPUX86State
*env
, uint32_t ecx
, uint64_t mask
)
2904 uint32_t dummy
, ena_lo
, ena_hi
;
2907 /* The OS must have enabled XSAVE. */
2908 if (!(env
->cr
[4] & CR4_OSXSAVE_MASK
)) {
2909 raise_exception_ra(env
, EXCP06_ILLOP
, GETPC());
2912 /* Only XCR0 is defined at present; the FPU may not be disabled. */
2913 if (ecx
!= 0 || (mask
& XSTATE_FP_MASK
) == 0) {
2917 /* Disallow enabling unimplemented features. */
2918 cpu_x86_cpuid(env
, 0x0d, 0, &ena_lo
, &dummy
, &dummy
, &ena_hi
);
2919 ena
= ((uint64_t)ena_hi
<< 32) | ena_lo
;
2924 /* Disallow enabling only half of MPX. */
2925 if ((mask
^ (mask
* (XSTATE_BNDCSR_MASK
/ XSTATE_BNDREGS_MASK
)))
2926 & XSTATE_BNDCSR_MASK
) {
2931 cpu_sync_bndcs_hflags(env
);
2935 raise_exception_ra(env
, EXCP0D_GPF
, GETPC());
2939 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2941 #define SSE_DAZ 0x0040
2942 #define SSE_RC_MASK 0x6000
2943 #define SSE_RC_NEAR 0x0000
2944 #define SSE_RC_DOWN 0x2000
2945 #define SSE_RC_UP 0x4000
2946 #define SSE_RC_CHOP 0x6000
2947 #define SSE_FZ 0x8000
2949 void update_mxcsr_status(CPUX86State
*env
)
2951 uint32_t mxcsr
= env
->mxcsr
;
2954 /* set rounding mode */
2955 switch (mxcsr
& SSE_RC_MASK
) {
2958 rnd_type
= float_round_nearest_even
;
2961 rnd_type
= float_round_down
;
2964 rnd_type
= float_round_up
;
2967 rnd_type
= float_round_to_zero
;
2970 set_float_rounding_mode(rnd_type
, &env
->sse_status
);
2972 /* Set exception flags. */
2973 set_float_exception_flags((mxcsr
& FPUS_IE
? float_flag_invalid
: 0) |
2974 (mxcsr
& FPUS_ZE
? float_flag_divbyzero
: 0) |
2975 (mxcsr
& FPUS_OE
? float_flag_overflow
: 0) |
2976 (mxcsr
& FPUS_UE
? float_flag_underflow
: 0) |
2977 (mxcsr
& FPUS_PE
? float_flag_inexact
: 0),
2980 /* set denormals are zero */
2981 set_flush_inputs_to_zero((mxcsr
& SSE_DAZ
) ? 1 : 0, &env
->sse_status
);
2983 /* set flush to zero */
2984 set_flush_to_zero((mxcsr
& SSE_FZ
) ? 1 : 0, &env
->sse_status
);
2987 void update_mxcsr_from_sse_status(CPUX86State
*env
)
2989 if (tcg_enabled()) {
2990 uint8_t flags
= get_float_exception_flags(&env
->sse_status
);
2992 * The MXCSR denormal flag has opposite semantics to
2993 * float_flag_input_denormal (the softfloat code sets that flag
2994 * only when flushing input denormals to zero, but SSE sets it
2995 * only when not flushing them to zero), so is not converted
2998 env
->mxcsr
|= ((flags
& float_flag_invalid
? FPUS_IE
: 0) |
2999 (flags
& float_flag_divbyzero
? FPUS_ZE
: 0) |
3000 (flags
& float_flag_overflow
? FPUS_OE
: 0) |
3001 (flags
& float_flag_underflow
? FPUS_UE
: 0) |
3002 (flags
& float_flag_inexact
? FPUS_PE
: 0) |
3003 (flags
& float_flag_output_denormal
? FPUS_UE
| FPUS_PE
:
3008 void helper_update_mxcsr(CPUX86State
*env
)
3010 update_mxcsr_from_sse_status(env
);
3013 void helper_ldmxcsr(CPUX86State
*env
, uint32_t val
)
3015 cpu_set_mxcsr(env
, val
);
3018 void helper_enter_mmx(CPUX86State
*env
)
3021 *(uint32_t *)(env
->fptags
) = 0;
3022 *(uint32_t *)(env
->fptags
+ 4) = 0;
3025 void helper_emms(CPUX86State
*env
)
3027 /* set to empty state */
3028 *(uint32_t *)(env
->fptags
) = 0x01010101;
3029 *(uint32_t *)(env
->fptags
+ 4) = 0x01010101;
3033 void helper_movq(CPUX86State
*env
, void *d
, void *s
)
3035 *(uint64_t *)d
= *(uint64_t *)s
;
3039 #include "ops_sse.h"
3042 #include "ops_sse.h"