target/i386: fix fbstp handling of out-of-range values
[qemu/ar7.git] / target / i386 / fpu_helper.c
blob41f6f391cab3667f11a38362d1a51579c598b290
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
29 #ifdef CONFIG_SOFTMMU
30 #include "hw/irq.h"
31 #endif
33 #define FPU_RC_MASK 0xc00
34 #define FPU_RC_NEAR 0x000
35 #define FPU_RC_DOWN 0x400
36 #define FPU_RC_UP 0x800
37 #define FPU_RC_CHOP 0xc00
39 #define MAXTAN 9223372036854775808.0
41 /* the following deal with x86 long double-precision numbers */
42 #define MAXEXPD 0x7fff
43 #define EXPBIAS 16383
44 #define EXPD(fp) (fp.l.upper & 0x7fff)
45 #define SIGND(fp) ((fp.l.upper) & 0x8000)
46 #define MANTD(fp) (fp.l.lower)
47 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49 #define FPUS_IE (1 << 0)
50 #define FPUS_DE (1 << 1)
51 #define FPUS_ZE (1 << 2)
52 #define FPUS_OE (1 << 3)
53 #define FPUS_UE (1 << 4)
54 #define FPUS_PE (1 << 5)
55 #define FPUS_SF (1 << 6)
56 #define FPUS_SE (1 << 7)
57 #define FPUS_B (1 << 15)
59 #define FPUC_EM 0x3f
61 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
62 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
63 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
64 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
65 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
66 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
67 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
68 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
70 #if !defined(CONFIG_USER_ONLY)
71 static qemu_irq ferr_irq;
73 void x86_register_ferr_irq(qemu_irq irq)
75 ferr_irq = irq;
78 static void cpu_clear_ignne(void)
80 CPUX86State *env = &X86_CPU(first_cpu)->env;
81 env->hflags2 &= ~HF2_IGNNE_MASK;
84 void cpu_set_ignne(void)
86 CPUX86State *env = &X86_CPU(first_cpu)->env;
87 env->hflags2 |= HF2_IGNNE_MASK;
89 * We get here in response to a write to port F0h. The chipset should
90 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
91 * cleared, because FERR# and FP_IRQ are two separate pins on real
92 * hardware. However, we don't model FERR# as a qemu_irq, so we just
93 * do directly what the chipset would do, i.e. deassert FP_IRQ.
95 qemu_irq_lower(ferr_irq);
97 #endif
100 static inline void fpush(CPUX86State *env)
102 env->fpstt = (env->fpstt - 1) & 7;
103 env->fptags[env->fpstt] = 0; /* validate stack entry */
106 static inline void fpop(CPUX86State *env)
108 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
109 env->fpstt = (env->fpstt + 1) & 7;
112 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
113 uintptr_t retaddr)
115 CPU_LDoubleU temp;
117 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
118 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
119 return temp.d;
122 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
123 uintptr_t retaddr)
125 CPU_LDoubleU temp;
127 temp.d = f;
128 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
129 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
132 /* x87 FPU helpers */
134 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
136 union {
137 float64 f64;
138 double d;
139 } u;
141 u.f64 = floatx80_to_float64(a, &env->fp_status);
142 return u.d;
145 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
147 union {
148 float64 f64;
149 double d;
150 } u;
152 u.d = a;
153 return float64_to_floatx80(u.f64, &env->fp_status);
156 static void fpu_set_exception(CPUX86State *env, int mask)
158 env->fpus |= mask;
159 if (env->fpus & (~env->fpuc & FPUC_EM)) {
160 env->fpus |= FPUS_SE | FPUS_B;
164 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
166 if (floatx80_is_zero(b)) {
167 fpu_set_exception(env, FPUS_ZE);
169 return floatx80_div(a, b, &env->fp_status);
172 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
174 if (env->cr[0] & CR0_NE_MASK) {
175 raise_exception_ra(env, EXCP10_COPR, retaddr);
177 #if !defined(CONFIG_USER_ONLY)
178 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
179 qemu_irq_raise(ferr_irq);
181 #endif
184 void helper_flds_FT0(CPUX86State *env, uint32_t val)
186 union {
187 float32 f;
188 uint32_t i;
189 } u;
191 u.i = val;
192 FT0 = float32_to_floatx80(u.f, &env->fp_status);
195 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
197 union {
198 float64 f;
199 uint64_t i;
200 } u;
202 u.i = val;
203 FT0 = float64_to_floatx80(u.f, &env->fp_status);
206 void helper_fildl_FT0(CPUX86State *env, int32_t val)
208 FT0 = int32_to_floatx80(val, &env->fp_status);
211 void helper_flds_ST0(CPUX86State *env, uint32_t val)
213 int new_fpstt;
214 union {
215 float32 f;
216 uint32_t i;
217 } u;
219 new_fpstt = (env->fpstt - 1) & 7;
220 u.i = val;
221 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
222 env->fpstt = new_fpstt;
223 env->fptags[new_fpstt] = 0; /* validate stack entry */
226 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
228 int new_fpstt;
229 union {
230 float64 f;
231 uint64_t i;
232 } u;
234 new_fpstt = (env->fpstt - 1) & 7;
235 u.i = val;
236 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
237 env->fpstt = new_fpstt;
238 env->fptags[new_fpstt] = 0; /* validate stack entry */
241 void helper_fildl_ST0(CPUX86State *env, int32_t val)
243 int new_fpstt;
245 new_fpstt = (env->fpstt - 1) & 7;
246 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
247 env->fpstt = new_fpstt;
248 env->fptags[new_fpstt] = 0; /* validate stack entry */
251 void helper_fildll_ST0(CPUX86State *env, int64_t val)
253 int new_fpstt;
255 new_fpstt = (env->fpstt - 1) & 7;
256 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
257 env->fpstt = new_fpstt;
258 env->fptags[new_fpstt] = 0; /* validate stack entry */
261 uint32_t helper_fsts_ST0(CPUX86State *env)
263 union {
264 float32 f;
265 uint32_t i;
266 } u;
268 u.f = floatx80_to_float32(ST0, &env->fp_status);
269 return u.i;
272 uint64_t helper_fstl_ST0(CPUX86State *env)
274 union {
275 float64 f;
276 uint64_t i;
277 } u;
279 u.f = floatx80_to_float64(ST0, &env->fp_status);
280 return u.i;
283 int32_t helper_fist_ST0(CPUX86State *env)
285 int32_t val;
287 val = floatx80_to_int32(ST0, &env->fp_status);
288 if (val != (int16_t)val) {
289 val = -32768;
291 return val;
294 int32_t helper_fistl_ST0(CPUX86State *env)
296 int32_t val;
297 signed char old_exp_flags;
299 old_exp_flags = get_float_exception_flags(&env->fp_status);
300 set_float_exception_flags(0, &env->fp_status);
302 val = floatx80_to_int32(ST0, &env->fp_status);
303 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
304 val = 0x80000000;
306 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
307 | old_exp_flags, &env->fp_status);
308 return val;
311 int64_t helper_fistll_ST0(CPUX86State *env)
313 int64_t val;
314 signed char old_exp_flags;
316 old_exp_flags = get_float_exception_flags(&env->fp_status);
317 set_float_exception_flags(0, &env->fp_status);
319 val = floatx80_to_int64(ST0, &env->fp_status);
320 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
321 val = 0x8000000000000000ULL;
323 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
324 | old_exp_flags, &env->fp_status);
325 return val;
328 int32_t helper_fistt_ST0(CPUX86State *env)
330 int32_t val;
332 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
333 if (val != (int16_t)val) {
334 val = -32768;
336 return val;
339 int32_t helper_fisttl_ST0(CPUX86State *env)
341 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
344 int64_t helper_fisttll_ST0(CPUX86State *env)
346 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
349 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
351 int new_fpstt;
353 new_fpstt = (env->fpstt - 1) & 7;
354 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
355 env->fpstt = new_fpstt;
356 env->fptags[new_fpstt] = 0; /* validate stack entry */
359 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
361 helper_fstt(env, ST0, ptr, GETPC());
364 void helper_fpush(CPUX86State *env)
366 fpush(env);
369 void helper_fpop(CPUX86State *env)
371 fpop(env);
374 void helper_fdecstp(CPUX86State *env)
376 env->fpstt = (env->fpstt - 1) & 7;
377 env->fpus &= ~0x4700;
380 void helper_fincstp(CPUX86State *env)
382 env->fpstt = (env->fpstt + 1) & 7;
383 env->fpus &= ~0x4700;
386 /* FPU move */
388 void helper_ffree_STN(CPUX86State *env, int st_index)
390 env->fptags[(env->fpstt + st_index) & 7] = 1;
393 void helper_fmov_ST0_FT0(CPUX86State *env)
395 ST0 = FT0;
398 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
400 FT0 = ST(st_index);
403 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
405 ST0 = ST(st_index);
408 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
410 ST(st_index) = ST0;
413 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
415 floatx80 tmp;
417 tmp = ST(st_index);
418 ST(st_index) = ST0;
419 ST0 = tmp;
422 /* FPU operations */
424 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
426 void helper_fcom_ST0_FT0(CPUX86State *env)
428 FloatRelation ret;
430 ret = floatx80_compare(ST0, FT0, &env->fp_status);
431 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
434 void helper_fucom_ST0_FT0(CPUX86State *env)
436 FloatRelation ret;
438 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
439 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
442 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
444 void helper_fcomi_ST0_FT0(CPUX86State *env)
446 int eflags;
447 FloatRelation ret;
449 ret = floatx80_compare(ST0, FT0, &env->fp_status);
450 eflags = cpu_cc_compute_all(env, CC_OP);
451 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
452 CC_SRC = eflags;
455 void helper_fucomi_ST0_FT0(CPUX86State *env)
457 int eflags;
458 FloatRelation ret;
460 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
461 eflags = cpu_cc_compute_all(env, CC_OP);
462 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
463 CC_SRC = eflags;
466 void helper_fadd_ST0_FT0(CPUX86State *env)
468 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
471 void helper_fmul_ST0_FT0(CPUX86State *env)
473 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
476 void helper_fsub_ST0_FT0(CPUX86State *env)
478 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
481 void helper_fsubr_ST0_FT0(CPUX86State *env)
483 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
486 void helper_fdiv_ST0_FT0(CPUX86State *env)
488 ST0 = helper_fdiv(env, ST0, FT0);
491 void helper_fdivr_ST0_FT0(CPUX86State *env)
493 ST0 = helper_fdiv(env, FT0, ST0);
496 /* fp operations between STN and ST0 */
498 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
500 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
503 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
505 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
508 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
510 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
513 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
515 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
518 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
520 floatx80 *p;
522 p = &ST(st_index);
523 *p = helper_fdiv(env, *p, ST0);
526 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
528 floatx80 *p;
530 p = &ST(st_index);
531 *p = helper_fdiv(env, ST0, *p);
534 /* misc FPU operations */
535 void helper_fchs_ST0(CPUX86State *env)
537 ST0 = floatx80_chs(ST0);
540 void helper_fabs_ST0(CPUX86State *env)
542 ST0 = floatx80_abs(ST0);
545 void helper_fld1_ST0(CPUX86State *env)
547 ST0 = floatx80_one;
550 void helper_fldl2t_ST0(CPUX86State *env)
552 switch (env->fpuc & FPU_RC_MASK) {
553 case FPU_RC_UP:
554 ST0 = floatx80_l2t_u;
555 break;
556 default:
557 ST0 = floatx80_l2t;
558 break;
562 void helper_fldl2e_ST0(CPUX86State *env)
564 switch (env->fpuc & FPU_RC_MASK) {
565 case FPU_RC_DOWN:
566 case FPU_RC_CHOP:
567 ST0 = floatx80_l2e_d;
568 break;
569 default:
570 ST0 = floatx80_l2e;
571 break;
575 void helper_fldpi_ST0(CPUX86State *env)
577 switch (env->fpuc & FPU_RC_MASK) {
578 case FPU_RC_DOWN:
579 case FPU_RC_CHOP:
580 ST0 = floatx80_pi_d;
581 break;
582 default:
583 ST0 = floatx80_pi;
584 break;
588 void helper_fldlg2_ST0(CPUX86State *env)
590 switch (env->fpuc & FPU_RC_MASK) {
591 case FPU_RC_DOWN:
592 case FPU_RC_CHOP:
593 ST0 = floatx80_lg2_d;
594 break;
595 default:
596 ST0 = floatx80_lg2;
597 break;
601 void helper_fldln2_ST0(CPUX86State *env)
603 switch (env->fpuc & FPU_RC_MASK) {
604 case FPU_RC_DOWN:
605 case FPU_RC_CHOP:
606 ST0 = floatx80_ln2_d;
607 break;
608 default:
609 ST0 = floatx80_ln2;
610 break;
614 void helper_fldz_ST0(CPUX86State *env)
616 ST0 = floatx80_zero;
619 void helper_fldz_FT0(CPUX86State *env)
621 FT0 = floatx80_zero;
624 uint32_t helper_fnstsw(CPUX86State *env)
626 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
629 uint32_t helper_fnstcw(CPUX86State *env)
631 return env->fpuc;
634 void update_fp_status(CPUX86State *env)
636 int rnd_type;
638 /* set rounding mode */
639 switch (env->fpuc & FPU_RC_MASK) {
640 default:
641 case FPU_RC_NEAR:
642 rnd_type = float_round_nearest_even;
643 break;
644 case FPU_RC_DOWN:
645 rnd_type = float_round_down;
646 break;
647 case FPU_RC_UP:
648 rnd_type = float_round_up;
649 break;
650 case FPU_RC_CHOP:
651 rnd_type = float_round_to_zero;
652 break;
654 set_float_rounding_mode(rnd_type, &env->fp_status);
655 switch ((env->fpuc >> 8) & 3) {
656 case 0:
657 rnd_type = 32;
658 break;
659 case 2:
660 rnd_type = 64;
661 break;
662 case 3:
663 default:
664 rnd_type = 80;
665 break;
667 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
670 void helper_fldcw(CPUX86State *env, uint32_t val)
672 cpu_set_fpuc(env, val);
675 void helper_fclex(CPUX86State *env)
677 env->fpus &= 0x7f00;
680 void helper_fwait(CPUX86State *env)
682 if (env->fpus & FPUS_SE) {
683 fpu_raise_exception(env, GETPC());
687 void helper_fninit(CPUX86State *env)
689 env->fpus = 0;
690 env->fpstt = 0;
691 cpu_set_fpuc(env, 0x37f);
692 env->fptags[0] = 1;
693 env->fptags[1] = 1;
694 env->fptags[2] = 1;
695 env->fptags[3] = 1;
696 env->fptags[4] = 1;
697 env->fptags[5] = 1;
698 env->fptags[6] = 1;
699 env->fptags[7] = 1;
702 /* BCD ops */
704 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
706 floatx80 tmp;
707 uint64_t val;
708 unsigned int v;
709 int i;
711 val = 0;
712 for (i = 8; i >= 0; i--) {
713 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
714 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
716 tmp = int64_to_floatx80(val, &env->fp_status);
717 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
718 tmp = floatx80_chs(tmp);
720 fpush(env);
721 ST0 = tmp;
724 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
726 int v;
727 target_ulong mem_ref, mem_end;
728 int64_t val;
729 CPU_LDoubleU temp;
731 temp.d = ST0;
733 val = floatx80_to_int64(ST0, &env->fp_status);
734 mem_ref = ptr;
735 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
736 float_raise(float_flag_invalid, &env->fp_status);
737 while (mem_ref < ptr + 7) {
738 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
740 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
741 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
742 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
743 return;
745 mem_end = mem_ref + 9;
746 if (SIGND(temp)) {
747 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
748 val = -val;
749 } else {
750 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
752 while (mem_ref < mem_end) {
753 if (val == 0) {
754 break;
756 v = val % 100;
757 val = val / 100;
758 v = ((v / 10) << 4) | (v % 10);
759 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
761 while (mem_ref < mem_end) {
762 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
766 void helper_f2xm1(CPUX86State *env)
768 double val = floatx80_to_double(env, ST0);
770 val = pow(2.0, val) - 1.0;
771 ST0 = double_to_floatx80(env, val);
774 void helper_fyl2x(CPUX86State *env)
776 double fptemp = floatx80_to_double(env, ST0);
778 if (fptemp > 0.0) {
779 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
780 fptemp *= floatx80_to_double(env, ST1);
781 ST1 = double_to_floatx80(env, fptemp);
782 fpop(env);
783 } else {
784 env->fpus &= ~0x4700;
785 env->fpus |= 0x400;
789 void helper_fptan(CPUX86State *env)
791 double fptemp = floatx80_to_double(env, ST0);
793 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
794 env->fpus |= 0x400;
795 } else {
796 fptemp = tan(fptemp);
797 ST0 = double_to_floatx80(env, fptemp);
798 fpush(env);
799 ST0 = floatx80_one;
800 env->fpus &= ~0x400; /* C2 <-- 0 */
801 /* the above code is for |arg| < 2**52 only */
805 void helper_fpatan(CPUX86State *env)
807 double fptemp, fpsrcop;
809 fpsrcop = floatx80_to_double(env, ST1);
810 fptemp = floatx80_to_double(env, ST0);
811 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
812 fpop(env);
815 void helper_fxtract(CPUX86State *env)
817 CPU_LDoubleU temp;
819 temp.d = ST0;
821 if (floatx80_is_zero(ST0)) {
822 /* Easy way to generate -inf and raising division by 0 exception */
823 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
824 &env->fp_status);
825 fpush(env);
826 ST0 = temp.d;
827 } else if (floatx80_invalid_encoding(ST0)) {
828 float_raise(float_flag_invalid, &env->fp_status);
829 ST0 = floatx80_default_nan(&env->fp_status);
830 fpush(env);
831 ST0 = ST1;
832 } else if (floatx80_is_any_nan(ST0)) {
833 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
834 float_raise(float_flag_invalid, &env->fp_status);
835 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
837 fpush(env);
838 ST0 = ST1;
839 } else if (floatx80_is_infinity(ST0)) {
840 fpush(env);
841 ST0 = ST1;
842 ST1 = floatx80_infinity;
843 } else {
844 int expdif;
846 if (EXPD(temp) == 0) {
847 int shift = clz64(temp.l.lower);
848 temp.l.lower <<= shift;
849 expdif = 1 - EXPBIAS - shift;
850 float_raise(float_flag_input_denormal, &env->fp_status);
851 } else {
852 expdif = EXPD(temp) - EXPBIAS;
854 /* DP exponent bias */
855 ST0 = int32_to_floatx80(expdif, &env->fp_status);
856 fpush(env);
857 BIASEXPONENT(temp);
858 ST0 = temp.d;
862 void helper_fprem1(CPUX86State *env)
864 double st0, st1, dblq, fpsrcop, fptemp;
865 CPU_LDoubleU fpsrcop1, fptemp1;
866 int expdif;
867 signed long long int q;
869 st0 = floatx80_to_double(env, ST0);
870 st1 = floatx80_to_double(env, ST1);
872 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
873 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
874 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
875 return;
878 fpsrcop = st0;
879 fptemp = st1;
880 fpsrcop1.d = ST0;
881 fptemp1.d = ST1;
882 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
884 if (expdif < 0) {
885 /* optimisation? taken from the AMD docs */
886 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
887 /* ST0 is unchanged */
888 return;
891 if (expdif < 53) {
892 dblq = fpsrcop / fptemp;
893 /* round dblq towards nearest integer */
894 dblq = rint(dblq);
895 st0 = fpsrcop - fptemp * dblq;
897 /* convert dblq to q by truncating towards zero */
898 if (dblq < 0.0) {
899 q = (signed long long int)(-dblq);
900 } else {
901 q = (signed long long int)dblq;
904 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
905 /* (C0,C3,C1) <-- (q2,q1,q0) */
906 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
907 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
908 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
909 } else {
910 env->fpus |= 0x400; /* C2 <-- 1 */
911 fptemp = pow(2.0, expdif - 50);
912 fpsrcop = (st0 / st1) / fptemp;
913 /* fpsrcop = integer obtained by chopping */
914 fpsrcop = (fpsrcop < 0.0) ?
915 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
916 st0 -= (st1 * fpsrcop * fptemp);
918 ST0 = double_to_floatx80(env, st0);
921 void helper_fprem(CPUX86State *env)
923 double st0, st1, dblq, fpsrcop, fptemp;
924 CPU_LDoubleU fpsrcop1, fptemp1;
925 int expdif;
926 signed long long int q;
928 st0 = floatx80_to_double(env, ST0);
929 st1 = floatx80_to_double(env, ST1);
931 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
932 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
933 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
934 return;
937 fpsrcop = st0;
938 fptemp = st1;
939 fpsrcop1.d = ST0;
940 fptemp1.d = ST1;
941 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
943 if (expdif < 0) {
944 /* optimisation? taken from the AMD docs */
945 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
946 /* ST0 is unchanged */
947 return;
950 if (expdif < 53) {
951 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
952 /* round dblq towards zero */
953 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
954 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
956 /* convert dblq to q by truncating towards zero */
957 if (dblq < 0.0) {
958 q = (signed long long int)(-dblq);
959 } else {
960 q = (signed long long int)dblq;
963 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
964 /* (C0,C3,C1) <-- (q2,q1,q0) */
965 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
966 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
967 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
968 } else {
969 int N = 32 + (expdif % 32); /* as per AMD docs */
971 env->fpus |= 0x400; /* C2 <-- 1 */
972 fptemp = pow(2.0, (double)(expdif - N));
973 fpsrcop = (st0 / st1) / fptemp;
974 /* fpsrcop = integer obtained by chopping */
975 fpsrcop = (fpsrcop < 0.0) ?
976 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
977 st0 -= (st1 * fpsrcop * fptemp);
979 ST0 = double_to_floatx80(env, st0);
982 void helper_fyl2xp1(CPUX86State *env)
984 double fptemp = floatx80_to_double(env, ST0);
986 if ((fptemp + 1.0) > 0.0) {
987 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
988 fptemp *= floatx80_to_double(env, ST1);
989 ST1 = double_to_floatx80(env, fptemp);
990 fpop(env);
991 } else {
992 env->fpus &= ~0x4700;
993 env->fpus |= 0x400;
997 void helper_fsqrt(CPUX86State *env)
999 if (floatx80_is_neg(ST0)) {
1000 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1001 env->fpus |= 0x400;
1003 ST0 = floatx80_sqrt(ST0, &env->fp_status);
1006 void helper_fsincos(CPUX86State *env)
1008 double fptemp = floatx80_to_double(env, ST0);
1010 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1011 env->fpus |= 0x400;
1012 } else {
1013 ST0 = double_to_floatx80(env, sin(fptemp));
1014 fpush(env);
1015 ST0 = double_to_floatx80(env, cos(fptemp));
1016 env->fpus &= ~0x400; /* C2 <-- 0 */
1017 /* the above code is for |arg| < 2**63 only */
1021 void helper_frndint(CPUX86State *env)
1023 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
1026 void helper_fscale(CPUX86State *env)
1028 if (floatx80_invalid_encoding(ST1)) {
1029 float_raise(float_flag_invalid, &env->fp_status);
1030 ST0 = floatx80_default_nan(&env->fp_status);
1031 } else if (floatx80_is_any_nan(ST1)) {
1032 ST0 = ST1;
1033 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1034 float_raise(float_flag_invalid, &env->fp_status);
1035 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1037 } else if (floatx80_is_infinity(ST1) &&
1038 !floatx80_invalid_encoding(ST0) &&
1039 !floatx80_is_any_nan(ST0)) {
1040 if (floatx80_is_neg(ST1)) {
1041 if (floatx80_is_infinity(ST0)) {
1042 float_raise(float_flag_invalid, &env->fp_status);
1043 ST0 = floatx80_default_nan(&env->fp_status);
1044 } else {
1045 ST0 = (floatx80_is_neg(ST0) ?
1046 floatx80_chs(floatx80_zero) :
1047 floatx80_zero);
1049 } else {
1050 if (floatx80_is_zero(ST0)) {
1051 float_raise(float_flag_invalid, &env->fp_status);
1052 ST0 = floatx80_default_nan(&env->fp_status);
1053 } else {
1054 ST0 = (floatx80_is_neg(ST0) ?
1055 floatx80_chs(floatx80_infinity) :
1056 floatx80_infinity);
1059 } else {
1060 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
1061 signed char save = env->fp_status.floatx80_rounding_precision;
1062 env->fp_status.floatx80_rounding_precision = 80;
1063 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
1064 env->fp_status.floatx80_rounding_precision = save;
1068 void helper_fsin(CPUX86State *env)
1070 double fptemp = floatx80_to_double(env, ST0);
1072 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1073 env->fpus |= 0x400;
1074 } else {
1075 ST0 = double_to_floatx80(env, sin(fptemp));
1076 env->fpus &= ~0x400; /* C2 <-- 0 */
1077 /* the above code is for |arg| < 2**53 only */
1081 void helper_fcos(CPUX86State *env)
1083 double fptemp = floatx80_to_double(env, ST0);
1085 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1086 env->fpus |= 0x400;
1087 } else {
1088 ST0 = double_to_floatx80(env, cos(fptemp));
1089 env->fpus &= ~0x400; /* C2 <-- 0 */
1090 /* the above code is for |arg| < 2**63 only */
1094 void helper_fxam_ST0(CPUX86State *env)
1096 CPU_LDoubleU temp;
1097 int expdif;
1099 temp.d = ST0;
1101 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1102 if (SIGND(temp)) {
1103 env->fpus |= 0x200; /* C1 <-- 1 */
1106 if (env->fptags[env->fpstt]) {
1107 env->fpus |= 0x4100; /* Empty */
1108 return;
1111 expdif = EXPD(temp);
1112 if (expdif == MAXEXPD) {
1113 if (MANTD(temp) == 0x8000000000000000ULL) {
1114 env->fpus |= 0x500; /* Infinity */
1115 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1116 env->fpus |= 0x100; /* NaN */
1118 } else if (expdif == 0) {
1119 if (MANTD(temp) == 0) {
1120 env->fpus |= 0x4000; /* Zero */
1121 } else {
1122 env->fpus |= 0x4400; /* Denormal */
1124 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1125 env->fpus |= 0x400;
1129 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1130 uintptr_t retaddr)
1132 int fpus, fptag, exp, i;
1133 uint64_t mant;
1134 CPU_LDoubleU tmp;
1136 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1137 fptag = 0;
1138 for (i = 7; i >= 0; i--) {
1139 fptag <<= 2;
1140 if (env->fptags[i]) {
1141 fptag |= 3;
1142 } else {
1143 tmp.d = env->fpregs[i].d;
1144 exp = EXPD(tmp);
1145 mant = MANTD(tmp);
1146 if (exp == 0 && mant == 0) {
1147 /* zero */
1148 fptag |= 1;
1149 } else if (exp == 0 || exp == MAXEXPD
1150 || (mant & (1LL << 63)) == 0) {
1151 /* NaNs, infinity, denormal */
1152 fptag |= 2;
1156 if (data32) {
1157 /* 32 bit */
1158 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1159 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1160 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1161 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1162 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1163 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1164 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1165 } else {
1166 /* 16 bit */
1167 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1168 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1169 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1170 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1171 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1172 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1173 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1177 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1179 do_fstenv(env, ptr, data32, GETPC());
1182 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1184 env->fpstt = (fpus >> 11) & 7;
1185 env->fpus = fpus & ~0x3800 & ~FPUS_B;
1186 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1187 #if !defined(CONFIG_USER_ONLY)
1188 if (!(env->fpus & FPUS_SE)) {
1190 * Here the processor deasserts FERR#; in response, the chipset deasserts
1191 * IGNNE#.
1193 cpu_clear_ignne();
1195 #endif
1198 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1199 uintptr_t retaddr)
1201 int i, fpus, fptag;
1203 if (data32) {
1204 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1205 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1206 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1207 } else {
1208 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1209 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1210 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1212 cpu_set_fpus(env, fpus);
1213 for (i = 0; i < 8; i++) {
1214 env->fptags[i] = ((fptag & 3) == 3);
1215 fptag >>= 2;
1219 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1221 do_fldenv(env, ptr, data32, GETPC());
1224 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1226 floatx80 tmp;
1227 int i;
1229 do_fstenv(env, ptr, data32, GETPC());
1231 ptr += (14 << data32);
1232 for (i = 0; i < 8; i++) {
1233 tmp = ST(i);
1234 helper_fstt(env, tmp, ptr, GETPC());
1235 ptr += 10;
1238 /* fninit */
1239 env->fpus = 0;
1240 env->fpstt = 0;
1241 cpu_set_fpuc(env, 0x37f);
1242 env->fptags[0] = 1;
1243 env->fptags[1] = 1;
1244 env->fptags[2] = 1;
1245 env->fptags[3] = 1;
1246 env->fptags[4] = 1;
1247 env->fptags[5] = 1;
1248 env->fptags[6] = 1;
1249 env->fptags[7] = 1;
1252 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1254 floatx80 tmp;
1255 int i;
1257 do_fldenv(env, ptr, data32, GETPC());
1258 ptr += (14 << data32);
1260 for (i = 0; i < 8; i++) {
1261 tmp = helper_fldt(env, ptr, GETPC());
1262 ST(i) = tmp;
1263 ptr += 10;
1267 #if defined(CONFIG_USER_ONLY)
1268 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1270 helper_fsave(env, ptr, data32);
1273 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1275 helper_frstor(env, ptr, data32);
1277 #endif
1279 #define XO(X) offsetof(X86XSaveArea, X)
1281 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1283 int fpus, fptag, i;
1284 target_ulong addr;
1286 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1287 fptag = 0;
1288 for (i = 0; i < 8; i++) {
1289 fptag |= (env->fptags[i] << i);
1292 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1293 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1294 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1296 /* In 32-bit mode this is eip, sel, dp, sel.
1297 In 64-bit mode this is rip, rdp.
1298 But in either case we don't write actual data, just zeros. */
1299 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1300 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1302 addr = ptr + XO(legacy.fpregs);
1303 for (i = 0; i < 8; i++) {
1304 floatx80 tmp = ST(i);
1305 helper_fstt(env, tmp, addr, ra);
1306 addr += 16;
1310 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1312 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1313 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1316 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1318 int i, nb_xmm_regs;
1319 target_ulong addr;
1321 if (env->hflags & HF_CS64_MASK) {
1322 nb_xmm_regs = 16;
1323 } else {
1324 nb_xmm_regs = 8;
1327 addr = ptr + XO(legacy.xmm_regs);
1328 for (i = 0; i < nb_xmm_regs; i++) {
1329 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1330 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1331 addr += 16;
1335 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1337 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1338 int i;
1340 for (i = 0; i < 4; i++, addr += 16) {
1341 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1342 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1346 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1348 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1349 env->bndcs_regs.cfgu, ra);
1350 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1351 env->bndcs_regs.sts, ra);
1354 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1356 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1359 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1361 uintptr_t ra = GETPC();
1363 /* The operand must be 16 byte aligned */
1364 if (ptr & 0xf) {
1365 raise_exception_ra(env, EXCP0D_GPF, ra);
1368 do_xsave_fpu(env, ptr, ra);
1370 if (env->cr[4] & CR4_OSFXSR_MASK) {
1371 do_xsave_mxcsr(env, ptr, ra);
1372 /* Fast FXSAVE leaves out the XMM registers */
1373 if (!(env->efer & MSR_EFER_FFXSR)
1374 || (env->hflags & HF_CPL_MASK)
1375 || !(env->hflags & HF_LMA_MASK)) {
1376 do_xsave_sse(env, ptr, ra);
1381 static uint64_t get_xinuse(CPUX86State *env)
1383 uint64_t inuse = -1;
1385 /* For the most part, we don't track XINUSE. We could calculate it
1386 here for all components, but it's probably less work to simply
1387 indicate in use. That said, the state of BNDREGS is important
1388 enough to track in HFLAGS, so we might as well use that here. */
1389 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1390 inuse &= ~XSTATE_BNDREGS_MASK;
1392 return inuse;
1395 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1396 uint64_t inuse, uint64_t opt, uintptr_t ra)
1398 uint64_t old_bv, new_bv;
1400 /* The OS must have enabled XSAVE. */
1401 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1402 raise_exception_ra(env, EXCP06_ILLOP, ra);
1405 /* The operand must be 64 byte aligned. */
1406 if (ptr & 63) {
1407 raise_exception_ra(env, EXCP0D_GPF, ra);
1410 /* Never save anything not enabled by XCR0. */
1411 rfbm &= env->xcr0;
1412 opt &= rfbm;
1414 if (opt & XSTATE_FP_MASK) {
1415 do_xsave_fpu(env, ptr, ra);
1417 if (rfbm & XSTATE_SSE_MASK) {
1418 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1419 do_xsave_mxcsr(env, ptr, ra);
1421 if (opt & XSTATE_SSE_MASK) {
1422 do_xsave_sse(env, ptr, ra);
1424 if (opt & XSTATE_BNDREGS_MASK) {
1425 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1427 if (opt & XSTATE_BNDCSR_MASK) {
1428 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1430 if (opt & XSTATE_PKRU_MASK) {
1431 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1434 /* Update the XSTATE_BV field. */
1435 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1436 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1437 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1440 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1442 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1445 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1447 uint64_t inuse = get_xinuse(env);
1448 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1451 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1453 int i, fpuc, fpus, fptag;
1454 target_ulong addr;
1456 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1457 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1458 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1459 cpu_set_fpuc(env, fpuc);
1460 cpu_set_fpus(env, fpus);
1461 fptag ^= 0xff;
1462 for (i = 0; i < 8; i++) {
1463 env->fptags[i] = ((fptag >> i) & 1);
1466 addr = ptr + XO(legacy.fpregs);
1467 for (i = 0; i < 8; i++) {
1468 floatx80 tmp = helper_fldt(env, addr, ra);
1469 ST(i) = tmp;
1470 addr += 16;
1474 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1476 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1479 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1481 int i, nb_xmm_regs;
1482 target_ulong addr;
1484 if (env->hflags & HF_CS64_MASK) {
1485 nb_xmm_regs = 16;
1486 } else {
1487 nb_xmm_regs = 8;
1490 addr = ptr + XO(legacy.xmm_regs);
1491 for (i = 0; i < nb_xmm_regs; i++) {
1492 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1493 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1494 addr += 16;
1498 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1500 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1501 int i;
1503 for (i = 0; i < 4; i++, addr += 16) {
1504 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1505 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1509 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1511 /* FIXME: Extend highest implemented bit of linear address. */
1512 env->bndcs_regs.cfgu
1513 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1514 env->bndcs_regs.sts
1515 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1518 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1520 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1523 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1525 uintptr_t ra = GETPC();
1527 /* The operand must be 16 byte aligned */
1528 if (ptr & 0xf) {
1529 raise_exception_ra(env, EXCP0D_GPF, ra);
1532 do_xrstor_fpu(env, ptr, ra);
1534 if (env->cr[4] & CR4_OSFXSR_MASK) {
1535 do_xrstor_mxcsr(env, ptr, ra);
1536 /* Fast FXRSTOR leaves out the XMM registers */
1537 if (!(env->efer & MSR_EFER_FFXSR)
1538 || (env->hflags & HF_CPL_MASK)
1539 || !(env->hflags & HF_LMA_MASK)) {
1540 do_xrstor_sse(env, ptr, ra);
1545 #if defined(CONFIG_USER_ONLY)
1546 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1548 helper_fxsave(env, ptr);
1551 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1553 helper_fxrstor(env, ptr);
1555 #endif
1557 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1559 uintptr_t ra = GETPC();
1560 uint64_t xstate_bv, xcomp_bv, reserve0;
1562 rfbm &= env->xcr0;
1564 /* The OS must have enabled XSAVE. */
1565 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1566 raise_exception_ra(env, EXCP06_ILLOP, ra);
1569 /* The operand must be 64 byte aligned. */
1570 if (ptr & 63) {
1571 raise_exception_ra(env, EXCP0D_GPF, ra);
1574 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1576 if ((int64_t)xstate_bv < 0) {
1577 /* FIXME: Compact form. */
1578 raise_exception_ra(env, EXCP0D_GPF, ra);
1581 /* Standard form. */
1583 /* The XSTATE_BV field must not set bits not present in XCR0. */
1584 if (xstate_bv & ~env->xcr0) {
1585 raise_exception_ra(env, EXCP0D_GPF, ra);
1588 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1589 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1590 describes only XCOMP_BV, but the description of the standard form
1591 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1592 includes the next 64-bit field. */
1593 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1594 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1595 if (xcomp_bv || reserve0) {
1596 raise_exception_ra(env, EXCP0D_GPF, ra);
1599 if (rfbm & XSTATE_FP_MASK) {
1600 if (xstate_bv & XSTATE_FP_MASK) {
1601 do_xrstor_fpu(env, ptr, ra);
1602 } else {
1603 helper_fninit(env);
1604 memset(env->fpregs, 0, sizeof(env->fpregs));
1607 if (rfbm & XSTATE_SSE_MASK) {
1608 /* Note that the standard form of XRSTOR loads MXCSR from memory
1609 whether or not the XSTATE_BV bit is set. */
1610 do_xrstor_mxcsr(env, ptr, ra);
1611 if (xstate_bv & XSTATE_SSE_MASK) {
1612 do_xrstor_sse(env, ptr, ra);
1613 } else {
1614 /* ??? When AVX is implemented, we may have to be more
1615 selective in the clearing. */
1616 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1619 if (rfbm & XSTATE_BNDREGS_MASK) {
1620 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1621 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1622 env->hflags |= HF_MPX_IU_MASK;
1623 } else {
1624 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1625 env->hflags &= ~HF_MPX_IU_MASK;
1628 if (rfbm & XSTATE_BNDCSR_MASK) {
1629 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1630 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1631 } else {
1632 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1634 cpu_sync_bndcs_hflags(env);
1636 if (rfbm & XSTATE_PKRU_MASK) {
1637 uint64_t old_pkru = env->pkru;
1638 if (xstate_bv & XSTATE_PKRU_MASK) {
1639 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1640 } else {
1641 env->pkru = 0;
1643 if (env->pkru != old_pkru) {
1644 CPUState *cs = env_cpu(env);
1645 tlb_flush(cs);
1650 #undef XO
1652 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1654 /* The OS must have enabled XSAVE. */
1655 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1656 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1659 switch (ecx) {
1660 case 0:
1661 return env->xcr0;
1662 case 1:
1663 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1664 return env->xcr0 & get_xinuse(env);
1666 break;
1668 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1671 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1673 uint32_t dummy, ena_lo, ena_hi;
1674 uint64_t ena;
1676 /* The OS must have enabled XSAVE. */
1677 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1678 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1681 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1682 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1683 goto do_gpf;
1686 /* Disallow enabling unimplemented features. */
1687 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1688 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1689 if (mask & ~ena) {
1690 goto do_gpf;
1693 /* Disallow enabling only half of MPX. */
1694 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1695 & XSTATE_BNDCSR_MASK) {
1696 goto do_gpf;
1699 env->xcr0 = mask;
1700 cpu_sync_bndcs_hflags(env);
1701 return;
1703 do_gpf:
1704 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1707 /* MMX/SSE */
1708 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1710 #define SSE_DAZ 0x0040
1711 #define SSE_RC_MASK 0x6000
1712 #define SSE_RC_NEAR 0x0000
1713 #define SSE_RC_DOWN 0x2000
1714 #define SSE_RC_UP 0x4000
1715 #define SSE_RC_CHOP 0x6000
1716 #define SSE_FZ 0x8000
1718 void update_mxcsr_status(CPUX86State *env)
1720 uint32_t mxcsr = env->mxcsr;
1721 int rnd_type;
1723 /* set rounding mode */
1724 switch (mxcsr & SSE_RC_MASK) {
1725 default:
1726 case SSE_RC_NEAR:
1727 rnd_type = float_round_nearest_even;
1728 break;
1729 case SSE_RC_DOWN:
1730 rnd_type = float_round_down;
1731 break;
1732 case SSE_RC_UP:
1733 rnd_type = float_round_up;
1734 break;
1735 case SSE_RC_CHOP:
1736 rnd_type = float_round_to_zero;
1737 break;
1739 set_float_rounding_mode(rnd_type, &env->sse_status);
1741 /* set denormals are zero */
1742 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1744 /* set flush to zero */
1745 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1748 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1750 cpu_set_mxcsr(env, val);
1753 void helper_enter_mmx(CPUX86State *env)
1755 env->fpstt = 0;
1756 *(uint32_t *)(env->fptags) = 0;
1757 *(uint32_t *)(env->fptags + 4) = 0;
1760 void helper_emms(CPUX86State *env)
1762 /* set to empty state */
1763 *(uint32_t *)(env->fptags) = 0x01010101;
1764 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1767 /* XXX: suppress */
1768 void helper_movq(CPUX86State *env, void *d, void *s)
1770 *(uint64_t *)d = *(uint64_t *)s;
1773 #define SHIFT 0
1774 #include "ops_sse.h"
1776 #define SHIFT 1
1777 #include "ops_sse.h"