target/i386: fix fxam handling of invalid encodings
[qemu/ar7.git] / target / i386 / fpu_helper.c
blob185493db8eef78c1d8e6fedb9f7fc96553ae76c6
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
29 #ifdef CONFIG_SOFTMMU
30 #include "hw/irq.h"
31 #endif
33 #define FPU_RC_MASK 0xc00
34 #define FPU_RC_NEAR 0x000
35 #define FPU_RC_DOWN 0x400
36 #define FPU_RC_UP 0x800
37 #define FPU_RC_CHOP 0xc00
39 #define MAXTAN 9223372036854775808.0
41 /* the following deal with x86 long double-precision numbers */
42 #define MAXEXPD 0x7fff
43 #define EXPBIAS 16383
44 #define EXPD(fp) (fp.l.upper & 0x7fff)
45 #define SIGND(fp) ((fp.l.upper) & 0x8000)
46 #define MANTD(fp) (fp.l.lower)
47 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49 #define FPUS_IE (1 << 0)
50 #define FPUS_DE (1 << 1)
51 #define FPUS_ZE (1 << 2)
52 #define FPUS_OE (1 << 3)
53 #define FPUS_UE (1 << 4)
54 #define FPUS_PE (1 << 5)
55 #define FPUS_SF (1 << 6)
56 #define FPUS_SE (1 << 7)
57 #define FPUS_B (1 << 15)
59 #define FPUC_EM 0x3f
61 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
62 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
63 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
64 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
65 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
66 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
67 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
68 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
70 #if !defined(CONFIG_USER_ONLY)
71 static qemu_irq ferr_irq;
73 void x86_register_ferr_irq(qemu_irq irq)
75 ferr_irq = irq;
78 static void cpu_clear_ignne(void)
80 CPUX86State *env = &X86_CPU(first_cpu)->env;
81 env->hflags2 &= ~HF2_IGNNE_MASK;
84 void cpu_set_ignne(void)
86 CPUX86State *env = &X86_CPU(first_cpu)->env;
87 env->hflags2 |= HF2_IGNNE_MASK;
89 * We get here in response to a write to port F0h. The chipset should
90 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
91 * cleared, because FERR# and FP_IRQ are two separate pins on real
92 * hardware. However, we don't model FERR# as a qemu_irq, so we just
93 * do directly what the chipset would do, i.e. deassert FP_IRQ.
95 qemu_irq_lower(ferr_irq);
97 #endif
100 static inline void fpush(CPUX86State *env)
102 env->fpstt = (env->fpstt - 1) & 7;
103 env->fptags[env->fpstt] = 0; /* validate stack entry */
106 static inline void fpop(CPUX86State *env)
108 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
109 env->fpstt = (env->fpstt + 1) & 7;
112 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
113 uintptr_t retaddr)
115 CPU_LDoubleU temp;
117 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
118 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
119 return temp.d;
122 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
123 uintptr_t retaddr)
125 CPU_LDoubleU temp;
127 temp.d = f;
128 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
129 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
132 /* x87 FPU helpers */
134 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
136 union {
137 float64 f64;
138 double d;
139 } u;
141 u.f64 = floatx80_to_float64(a, &env->fp_status);
142 return u.d;
145 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
147 union {
148 float64 f64;
149 double d;
150 } u;
152 u.d = a;
153 return float64_to_floatx80(u.f64, &env->fp_status);
156 static void fpu_set_exception(CPUX86State *env, int mask)
158 env->fpus |= mask;
159 if (env->fpus & (~env->fpuc & FPUC_EM)) {
160 env->fpus |= FPUS_SE | FPUS_B;
164 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
166 if (floatx80_is_zero(b)) {
167 fpu_set_exception(env, FPUS_ZE);
169 return floatx80_div(a, b, &env->fp_status);
172 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
174 if (env->cr[0] & CR0_NE_MASK) {
175 raise_exception_ra(env, EXCP10_COPR, retaddr);
177 #if !defined(CONFIG_USER_ONLY)
178 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
179 qemu_irq_raise(ferr_irq);
181 #endif
184 void helper_flds_FT0(CPUX86State *env, uint32_t val)
186 union {
187 float32 f;
188 uint32_t i;
189 } u;
191 u.i = val;
192 FT0 = float32_to_floatx80(u.f, &env->fp_status);
195 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
197 union {
198 float64 f;
199 uint64_t i;
200 } u;
202 u.i = val;
203 FT0 = float64_to_floatx80(u.f, &env->fp_status);
206 void helper_fildl_FT0(CPUX86State *env, int32_t val)
208 FT0 = int32_to_floatx80(val, &env->fp_status);
211 void helper_flds_ST0(CPUX86State *env, uint32_t val)
213 int new_fpstt;
214 union {
215 float32 f;
216 uint32_t i;
217 } u;
219 new_fpstt = (env->fpstt - 1) & 7;
220 u.i = val;
221 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
222 env->fpstt = new_fpstt;
223 env->fptags[new_fpstt] = 0; /* validate stack entry */
226 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
228 int new_fpstt;
229 union {
230 float64 f;
231 uint64_t i;
232 } u;
234 new_fpstt = (env->fpstt - 1) & 7;
235 u.i = val;
236 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
237 env->fpstt = new_fpstt;
238 env->fptags[new_fpstt] = 0; /* validate stack entry */
241 void helper_fildl_ST0(CPUX86State *env, int32_t val)
243 int new_fpstt;
245 new_fpstt = (env->fpstt - 1) & 7;
246 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
247 env->fpstt = new_fpstt;
248 env->fptags[new_fpstt] = 0; /* validate stack entry */
251 void helper_fildll_ST0(CPUX86State *env, int64_t val)
253 int new_fpstt;
255 new_fpstt = (env->fpstt - 1) & 7;
256 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
257 env->fpstt = new_fpstt;
258 env->fptags[new_fpstt] = 0; /* validate stack entry */
261 uint32_t helper_fsts_ST0(CPUX86State *env)
263 union {
264 float32 f;
265 uint32_t i;
266 } u;
268 u.f = floatx80_to_float32(ST0, &env->fp_status);
269 return u.i;
272 uint64_t helper_fstl_ST0(CPUX86State *env)
274 union {
275 float64 f;
276 uint64_t i;
277 } u;
279 u.f = floatx80_to_float64(ST0, &env->fp_status);
280 return u.i;
283 int32_t helper_fist_ST0(CPUX86State *env)
285 int32_t val;
287 val = floatx80_to_int32(ST0, &env->fp_status);
288 if (val != (int16_t)val) {
289 val = -32768;
291 return val;
294 int32_t helper_fistl_ST0(CPUX86State *env)
296 int32_t val;
297 signed char old_exp_flags;
299 old_exp_flags = get_float_exception_flags(&env->fp_status);
300 set_float_exception_flags(0, &env->fp_status);
302 val = floatx80_to_int32(ST0, &env->fp_status);
303 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
304 val = 0x80000000;
306 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
307 | old_exp_flags, &env->fp_status);
308 return val;
311 int64_t helper_fistll_ST0(CPUX86State *env)
313 int64_t val;
314 signed char old_exp_flags;
316 old_exp_flags = get_float_exception_flags(&env->fp_status);
317 set_float_exception_flags(0, &env->fp_status);
319 val = floatx80_to_int64(ST0, &env->fp_status);
320 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
321 val = 0x8000000000000000ULL;
323 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
324 | old_exp_flags, &env->fp_status);
325 return val;
328 int32_t helper_fistt_ST0(CPUX86State *env)
330 int32_t val;
332 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
333 if (val != (int16_t)val) {
334 val = -32768;
336 return val;
339 int32_t helper_fisttl_ST0(CPUX86State *env)
341 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
344 int64_t helper_fisttll_ST0(CPUX86State *env)
346 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
349 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
351 int new_fpstt;
353 new_fpstt = (env->fpstt - 1) & 7;
354 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
355 env->fpstt = new_fpstt;
356 env->fptags[new_fpstt] = 0; /* validate stack entry */
359 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
361 helper_fstt(env, ST0, ptr, GETPC());
364 void helper_fpush(CPUX86State *env)
366 fpush(env);
369 void helper_fpop(CPUX86State *env)
371 fpop(env);
374 void helper_fdecstp(CPUX86State *env)
376 env->fpstt = (env->fpstt - 1) & 7;
377 env->fpus &= ~0x4700;
380 void helper_fincstp(CPUX86State *env)
382 env->fpstt = (env->fpstt + 1) & 7;
383 env->fpus &= ~0x4700;
386 /* FPU move */
388 void helper_ffree_STN(CPUX86State *env, int st_index)
390 env->fptags[(env->fpstt + st_index) & 7] = 1;
393 void helper_fmov_ST0_FT0(CPUX86State *env)
395 ST0 = FT0;
398 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
400 FT0 = ST(st_index);
403 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
405 ST0 = ST(st_index);
408 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
410 ST(st_index) = ST0;
413 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
415 floatx80 tmp;
417 tmp = ST(st_index);
418 ST(st_index) = ST0;
419 ST0 = tmp;
422 /* FPU operations */
424 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
426 void helper_fcom_ST0_FT0(CPUX86State *env)
428 FloatRelation ret;
430 ret = floatx80_compare(ST0, FT0, &env->fp_status);
431 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
434 void helper_fucom_ST0_FT0(CPUX86State *env)
436 FloatRelation ret;
438 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
439 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
442 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
444 void helper_fcomi_ST0_FT0(CPUX86State *env)
446 int eflags;
447 FloatRelation ret;
449 ret = floatx80_compare(ST0, FT0, &env->fp_status);
450 eflags = cpu_cc_compute_all(env, CC_OP);
451 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
452 CC_SRC = eflags;
455 void helper_fucomi_ST0_FT0(CPUX86State *env)
457 int eflags;
458 FloatRelation ret;
460 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
461 eflags = cpu_cc_compute_all(env, CC_OP);
462 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
463 CC_SRC = eflags;
466 void helper_fadd_ST0_FT0(CPUX86State *env)
468 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
471 void helper_fmul_ST0_FT0(CPUX86State *env)
473 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
476 void helper_fsub_ST0_FT0(CPUX86State *env)
478 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
481 void helper_fsubr_ST0_FT0(CPUX86State *env)
483 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
486 void helper_fdiv_ST0_FT0(CPUX86State *env)
488 ST0 = helper_fdiv(env, ST0, FT0);
491 void helper_fdivr_ST0_FT0(CPUX86State *env)
493 ST0 = helper_fdiv(env, FT0, ST0);
496 /* fp operations between STN and ST0 */
498 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
500 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
503 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
505 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
508 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
510 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
513 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
515 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
518 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
520 floatx80 *p;
522 p = &ST(st_index);
523 *p = helper_fdiv(env, *p, ST0);
526 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
528 floatx80 *p;
530 p = &ST(st_index);
531 *p = helper_fdiv(env, ST0, *p);
534 /* misc FPU operations */
535 void helper_fchs_ST0(CPUX86State *env)
537 ST0 = floatx80_chs(ST0);
540 void helper_fabs_ST0(CPUX86State *env)
542 ST0 = floatx80_abs(ST0);
545 void helper_fld1_ST0(CPUX86State *env)
547 ST0 = floatx80_one;
550 void helper_fldl2t_ST0(CPUX86State *env)
552 switch (env->fpuc & FPU_RC_MASK) {
553 case FPU_RC_UP:
554 ST0 = floatx80_l2t_u;
555 break;
556 default:
557 ST0 = floatx80_l2t;
558 break;
562 void helper_fldl2e_ST0(CPUX86State *env)
564 switch (env->fpuc & FPU_RC_MASK) {
565 case FPU_RC_DOWN:
566 case FPU_RC_CHOP:
567 ST0 = floatx80_l2e_d;
568 break;
569 default:
570 ST0 = floatx80_l2e;
571 break;
575 void helper_fldpi_ST0(CPUX86State *env)
577 switch (env->fpuc & FPU_RC_MASK) {
578 case FPU_RC_DOWN:
579 case FPU_RC_CHOP:
580 ST0 = floatx80_pi_d;
581 break;
582 default:
583 ST0 = floatx80_pi;
584 break;
588 void helper_fldlg2_ST0(CPUX86State *env)
590 switch (env->fpuc & FPU_RC_MASK) {
591 case FPU_RC_DOWN:
592 case FPU_RC_CHOP:
593 ST0 = floatx80_lg2_d;
594 break;
595 default:
596 ST0 = floatx80_lg2;
597 break;
601 void helper_fldln2_ST0(CPUX86State *env)
603 switch (env->fpuc & FPU_RC_MASK) {
604 case FPU_RC_DOWN:
605 case FPU_RC_CHOP:
606 ST0 = floatx80_ln2_d;
607 break;
608 default:
609 ST0 = floatx80_ln2;
610 break;
614 void helper_fldz_ST0(CPUX86State *env)
616 ST0 = floatx80_zero;
619 void helper_fldz_FT0(CPUX86State *env)
621 FT0 = floatx80_zero;
624 uint32_t helper_fnstsw(CPUX86State *env)
626 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
629 uint32_t helper_fnstcw(CPUX86State *env)
631 return env->fpuc;
634 void update_fp_status(CPUX86State *env)
636 int rnd_type;
638 /* set rounding mode */
639 switch (env->fpuc & FPU_RC_MASK) {
640 default:
641 case FPU_RC_NEAR:
642 rnd_type = float_round_nearest_even;
643 break;
644 case FPU_RC_DOWN:
645 rnd_type = float_round_down;
646 break;
647 case FPU_RC_UP:
648 rnd_type = float_round_up;
649 break;
650 case FPU_RC_CHOP:
651 rnd_type = float_round_to_zero;
652 break;
654 set_float_rounding_mode(rnd_type, &env->fp_status);
655 switch ((env->fpuc >> 8) & 3) {
656 case 0:
657 rnd_type = 32;
658 break;
659 case 2:
660 rnd_type = 64;
661 break;
662 case 3:
663 default:
664 rnd_type = 80;
665 break;
667 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
670 void helper_fldcw(CPUX86State *env, uint32_t val)
672 cpu_set_fpuc(env, val);
675 void helper_fclex(CPUX86State *env)
677 env->fpus &= 0x7f00;
680 void helper_fwait(CPUX86State *env)
682 if (env->fpus & FPUS_SE) {
683 fpu_raise_exception(env, GETPC());
687 void helper_fninit(CPUX86State *env)
689 env->fpus = 0;
690 env->fpstt = 0;
691 cpu_set_fpuc(env, 0x37f);
692 env->fptags[0] = 1;
693 env->fptags[1] = 1;
694 env->fptags[2] = 1;
695 env->fptags[3] = 1;
696 env->fptags[4] = 1;
697 env->fptags[5] = 1;
698 env->fptags[6] = 1;
699 env->fptags[7] = 1;
702 /* BCD ops */
704 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
706 floatx80 tmp;
707 uint64_t val;
708 unsigned int v;
709 int i;
711 val = 0;
712 for (i = 8; i >= 0; i--) {
713 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
714 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
716 tmp = int64_to_floatx80(val, &env->fp_status);
717 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
718 tmp = floatx80_chs(tmp);
720 fpush(env);
721 ST0 = tmp;
724 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
726 int v;
727 target_ulong mem_ref, mem_end;
728 int64_t val;
730 val = floatx80_to_int64(ST0, &env->fp_status);
731 mem_ref = ptr;
732 mem_end = mem_ref + 9;
733 if (val < 0) {
734 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
735 val = -val;
736 } else {
737 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
739 while (mem_ref < mem_end) {
740 if (val == 0) {
741 break;
743 v = val % 100;
744 val = val / 100;
745 v = ((v / 10) << 4) | (v % 10);
746 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
748 while (mem_ref < mem_end) {
749 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
753 void helper_f2xm1(CPUX86State *env)
755 double val = floatx80_to_double(env, ST0);
757 val = pow(2.0, val) - 1.0;
758 ST0 = double_to_floatx80(env, val);
761 void helper_fyl2x(CPUX86State *env)
763 double fptemp = floatx80_to_double(env, ST0);
765 if (fptemp > 0.0) {
766 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
767 fptemp *= floatx80_to_double(env, ST1);
768 ST1 = double_to_floatx80(env, fptemp);
769 fpop(env);
770 } else {
771 env->fpus &= ~0x4700;
772 env->fpus |= 0x400;
776 void helper_fptan(CPUX86State *env)
778 double fptemp = floatx80_to_double(env, ST0);
780 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
781 env->fpus |= 0x400;
782 } else {
783 fptemp = tan(fptemp);
784 ST0 = double_to_floatx80(env, fptemp);
785 fpush(env);
786 ST0 = floatx80_one;
787 env->fpus &= ~0x400; /* C2 <-- 0 */
788 /* the above code is for |arg| < 2**52 only */
792 void helper_fpatan(CPUX86State *env)
794 double fptemp, fpsrcop;
796 fpsrcop = floatx80_to_double(env, ST1);
797 fptemp = floatx80_to_double(env, ST0);
798 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
799 fpop(env);
802 void helper_fxtract(CPUX86State *env)
804 CPU_LDoubleU temp;
806 temp.d = ST0;
808 if (floatx80_is_zero(ST0)) {
809 /* Easy way to generate -inf and raising division by 0 exception */
810 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
811 &env->fp_status);
812 fpush(env);
813 ST0 = temp.d;
814 } else if (floatx80_invalid_encoding(ST0)) {
815 float_raise(float_flag_invalid, &env->fp_status);
816 ST0 = floatx80_default_nan(&env->fp_status);
817 fpush(env);
818 ST0 = ST1;
819 } else if (floatx80_is_any_nan(ST0)) {
820 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
821 float_raise(float_flag_invalid, &env->fp_status);
822 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
824 fpush(env);
825 ST0 = ST1;
826 } else if (floatx80_is_infinity(ST0)) {
827 fpush(env);
828 ST0 = ST1;
829 ST1 = floatx80_infinity;
830 } else {
831 int expdif;
833 if (EXPD(temp) == 0) {
834 int shift = clz64(temp.l.lower);
835 temp.l.lower <<= shift;
836 expdif = 1 - EXPBIAS - shift;
837 float_raise(float_flag_input_denormal, &env->fp_status);
838 } else {
839 expdif = EXPD(temp) - EXPBIAS;
841 /* DP exponent bias */
842 ST0 = int32_to_floatx80(expdif, &env->fp_status);
843 fpush(env);
844 BIASEXPONENT(temp);
845 ST0 = temp.d;
849 void helper_fprem1(CPUX86State *env)
851 double st0, st1, dblq, fpsrcop, fptemp;
852 CPU_LDoubleU fpsrcop1, fptemp1;
853 int expdif;
854 signed long long int q;
856 st0 = floatx80_to_double(env, ST0);
857 st1 = floatx80_to_double(env, ST1);
859 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
860 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
861 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
862 return;
865 fpsrcop = st0;
866 fptemp = st1;
867 fpsrcop1.d = ST0;
868 fptemp1.d = ST1;
869 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
871 if (expdif < 0) {
872 /* optimisation? taken from the AMD docs */
873 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
874 /* ST0 is unchanged */
875 return;
878 if (expdif < 53) {
879 dblq = fpsrcop / fptemp;
880 /* round dblq towards nearest integer */
881 dblq = rint(dblq);
882 st0 = fpsrcop - fptemp * dblq;
884 /* convert dblq to q by truncating towards zero */
885 if (dblq < 0.0) {
886 q = (signed long long int)(-dblq);
887 } else {
888 q = (signed long long int)dblq;
891 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
892 /* (C0,C3,C1) <-- (q2,q1,q0) */
893 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
894 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
895 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
896 } else {
897 env->fpus |= 0x400; /* C2 <-- 1 */
898 fptemp = pow(2.0, expdif - 50);
899 fpsrcop = (st0 / st1) / fptemp;
900 /* fpsrcop = integer obtained by chopping */
901 fpsrcop = (fpsrcop < 0.0) ?
902 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
903 st0 -= (st1 * fpsrcop * fptemp);
905 ST0 = double_to_floatx80(env, st0);
908 void helper_fprem(CPUX86State *env)
910 double st0, st1, dblq, fpsrcop, fptemp;
911 CPU_LDoubleU fpsrcop1, fptemp1;
912 int expdif;
913 signed long long int q;
915 st0 = floatx80_to_double(env, ST0);
916 st1 = floatx80_to_double(env, ST1);
918 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
919 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
920 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
921 return;
924 fpsrcop = st0;
925 fptemp = st1;
926 fpsrcop1.d = ST0;
927 fptemp1.d = ST1;
928 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
930 if (expdif < 0) {
931 /* optimisation? taken from the AMD docs */
932 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
933 /* ST0 is unchanged */
934 return;
937 if (expdif < 53) {
938 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
939 /* round dblq towards zero */
940 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
941 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
943 /* convert dblq to q by truncating towards zero */
944 if (dblq < 0.0) {
945 q = (signed long long int)(-dblq);
946 } else {
947 q = (signed long long int)dblq;
950 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
951 /* (C0,C3,C1) <-- (q2,q1,q0) */
952 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
953 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
954 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
955 } else {
956 int N = 32 + (expdif % 32); /* as per AMD docs */
958 env->fpus |= 0x400; /* C2 <-- 1 */
959 fptemp = pow(2.0, (double)(expdif - N));
960 fpsrcop = (st0 / st1) / fptemp;
961 /* fpsrcop = integer obtained by chopping */
962 fpsrcop = (fpsrcop < 0.0) ?
963 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
964 st0 -= (st1 * fpsrcop * fptemp);
966 ST0 = double_to_floatx80(env, st0);
969 void helper_fyl2xp1(CPUX86State *env)
971 double fptemp = floatx80_to_double(env, ST0);
973 if ((fptemp + 1.0) > 0.0) {
974 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
975 fptemp *= floatx80_to_double(env, ST1);
976 ST1 = double_to_floatx80(env, fptemp);
977 fpop(env);
978 } else {
979 env->fpus &= ~0x4700;
980 env->fpus |= 0x400;
984 void helper_fsqrt(CPUX86State *env)
986 if (floatx80_is_neg(ST0)) {
987 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
988 env->fpus |= 0x400;
990 ST0 = floatx80_sqrt(ST0, &env->fp_status);
993 void helper_fsincos(CPUX86State *env)
995 double fptemp = floatx80_to_double(env, ST0);
997 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
998 env->fpus |= 0x400;
999 } else {
1000 ST0 = double_to_floatx80(env, sin(fptemp));
1001 fpush(env);
1002 ST0 = double_to_floatx80(env, cos(fptemp));
1003 env->fpus &= ~0x400; /* C2 <-- 0 */
1004 /* the above code is for |arg| < 2**63 only */
1008 void helper_frndint(CPUX86State *env)
1010 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
1013 void helper_fscale(CPUX86State *env)
1015 if (floatx80_invalid_encoding(ST1)) {
1016 float_raise(float_flag_invalid, &env->fp_status);
1017 ST0 = floatx80_default_nan(&env->fp_status);
1018 } else if (floatx80_is_any_nan(ST1)) {
1019 ST0 = ST1;
1020 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1021 float_raise(float_flag_invalid, &env->fp_status);
1022 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1024 } else if (floatx80_is_infinity(ST1) &&
1025 !floatx80_invalid_encoding(ST0) &&
1026 !floatx80_is_any_nan(ST0)) {
1027 if (floatx80_is_neg(ST1)) {
1028 if (floatx80_is_infinity(ST0)) {
1029 float_raise(float_flag_invalid, &env->fp_status);
1030 ST0 = floatx80_default_nan(&env->fp_status);
1031 } else {
1032 ST0 = (floatx80_is_neg(ST0) ?
1033 floatx80_chs(floatx80_zero) :
1034 floatx80_zero);
1036 } else {
1037 if (floatx80_is_zero(ST0)) {
1038 float_raise(float_flag_invalid, &env->fp_status);
1039 ST0 = floatx80_default_nan(&env->fp_status);
1040 } else {
1041 ST0 = (floatx80_is_neg(ST0) ?
1042 floatx80_chs(floatx80_infinity) :
1043 floatx80_infinity);
1046 } else {
1047 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
1048 signed char save = env->fp_status.floatx80_rounding_precision;
1049 env->fp_status.floatx80_rounding_precision = 80;
1050 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
1051 env->fp_status.floatx80_rounding_precision = save;
1055 void helper_fsin(CPUX86State *env)
1057 double fptemp = floatx80_to_double(env, ST0);
1059 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1060 env->fpus |= 0x400;
1061 } else {
1062 ST0 = double_to_floatx80(env, sin(fptemp));
1063 env->fpus &= ~0x400; /* C2 <-- 0 */
1064 /* the above code is for |arg| < 2**53 only */
1068 void helper_fcos(CPUX86State *env)
1070 double fptemp = floatx80_to_double(env, ST0);
1072 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1073 env->fpus |= 0x400;
1074 } else {
1075 ST0 = double_to_floatx80(env, cos(fptemp));
1076 env->fpus &= ~0x400; /* C2 <-- 0 */
1077 /* the above code is for |arg| < 2**63 only */
1081 void helper_fxam_ST0(CPUX86State *env)
1083 CPU_LDoubleU temp;
1084 int expdif;
1086 temp.d = ST0;
1088 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1089 if (SIGND(temp)) {
1090 env->fpus |= 0x200; /* C1 <-- 1 */
1093 if (env->fptags[env->fpstt]) {
1094 env->fpus |= 0x4100; /* Empty */
1095 return;
1098 expdif = EXPD(temp);
1099 if (expdif == MAXEXPD) {
1100 if (MANTD(temp) == 0x8000000000000000ULL) {
1101 env->fpus |= 0x500; /* Infinity */
1102 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1103 env->fpus |= 0x100; /* NaN */
1105 } else if (expdif == 0) {
1106 if (MANTD(temp) == 0) {
1107 env->fpus |= 0x4000; /* Zero */
1108 } else {
1109 env->fpus |= 0x4400; /* Denormal */
1111 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1112 env->fpus |= 0x400;
1116 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1117 uintptr_t retaddr)
1119 int fpus, fptag, exp, i;
1120 uint64_t mant;
1121 CPU_LDoubleU tmp;
1123 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1124 fptag = 0;
1125 for (i = 7; i >= 0; i--) {
1126 fptag <<= 2;
1127 if (env->fptags[i]) {
1128 fptag |= 3;
1129 } else {
1130 tmp.d = env->fpregs[i].d;
1131 exp = EXPD(tmp);
1132 mant = MANTD(tmp);
1133 if (exp == 0 && mant == 0) {
1134 /* zero */
1135 fptag |= 1;
1136 } else if (exp == 0 || exp == MAXEXPD
1137 || (mant & (1LL << 63)) == 0) {
1138 /* NaNs, infinity, denormal */
1139 fptag |= 2;
1143 if (data32) {
1144 /* 32 bit */
1145 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1146 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1147 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1148 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1149 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1150 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1151 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1152 } else {
1153 /* 16 bit */
1154 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1155 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1156 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1157 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1158 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1159 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1160 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1164 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1166 do_fstenv(env, ptr, data32, GETPC());
1169 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1171 env->fpstt = (fpus >> 11) & 7;
1172 env->fpus = fpus & ~0x3800 & ~FPUS_B;
1173 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1174 #if !defined(CONFIG_USER_ONLY)
1175 if (!(env->fpus & FPUS_SE)) {
1177 * Here the processor deasserts FERR#; in response, the chipset deasserts
1178 * IGNNE#.
1180 cpu_clear_ignne();
1182 #endif
1185 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1186 uintptr_t retaddr)
1188 int i, fpus, fptag;
1190 if (data32) {
1191 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1192 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1193 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1194 } else {
1195 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1196 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1197 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1199 cpu_set_fpus(env, fpus);
1200 for (i = 0; i < 8; i++) {
1201 env->fptags[i] = ((fptag & 3) == 3);
1202 fptag >>= 2;
1206 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1208 do_fldenv(env, ptr, data32, GETPC());
1211 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1213 floatx80 tmp;
1214 int i;
1216 do_fstenv(env, ptr, data32, GETPC());
1218 ptr += (14 << data32);
1219 for (i = 0; i < 8; i++) {
1220 tmp = ST(i);
1221 helper_fstt(env, tmp, ptr, GETPC());
1222 ptr += 10;
1225 /* fninit */
1226 env->fpus = 0;
1227 env->fpstt = 0;
1228 cpu_set_fpuc(env, 0x37f);
1229 env->fptags[0] = 1;
1230 env->fptags[1] = 1;
1231 env->fptags[2] = 1;
1232 env->fptags[3] = 1;
1233 env->fptags[4] = 1;
1234 env->fptags[5] = 1;
1235 env->fptags[6] = 1;
1236 env->fptags[7] = 1;
1239 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1241 floatx80 tmp;
1242 int i;
1244 do_fldenv(env, ptr, data32, GETPC());
1245 ptr += (14 << data32);
1247 for (i = 0; i < 8; i++) {
1248 tmp = helper_fldt(env, ptr, GETPC());
1249 ST(i) = tmp;
1250 ptr += 10;
1254 #if defined(CONFIG_USER_ONLY)
1255 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1257 helper_fsave(env, ptr, data32);
1260 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1262 helper_frstor(env, ptr, data32);
1264 #endif
1266 #define XO(X) offsetof(X86XSaveArea, X)
1268 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1270 int fpus, fptag, i;
1271 target_ulong addr;
1273 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1274 fptag = 0;
1275 for (i = 0; i < 8; i++) {
1276 fptag |= (env->fptags[i] << i);
1279 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1280 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1281 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1283 /* In 32-bit mode this is eip, sel, dp, sel.
1284 In 64-bit mode this is rip, rdp.
1285 But in either case we don't write actual data, just zeros. */
1286 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1287 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1289 addr = ptr + XO(legacy.fpregs);
1290 for (i = 0; i < 8; i++) {
1291 floatx80 tmp = ST(i);
1292 helper_fstt(env, tmp, addr, ra);
1293 addr += 16;
1297 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1299 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1300 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1303 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1305 int i, nb_xmm_regs;
1306 target_ulong addr;
1308 if (env->hflags & HF_CS64_MASK) {
1309 nb_xmm_regs = 16;
1310 } else {
1311 nb_xmm_regs = 8;
1314 addr = ptr + XO(legacy.xmm_regs);
1315 for (i = 0; i < nb_xmm_regs; i++) {
1316 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1317 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1318 addr += 16;
1322 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1324 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1325 int i;
1327 for (i = 0; i < 4; i++, addr += 16) {
1328 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1329 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1333 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1335 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1336 env->bndcs_regs.cfgu, ra);
1337 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1338 env->bndcs_regs.sts, ra);
1341 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1343 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1346 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1348 uintptr_t ra = GETPC();
1350 /* The operand must be 16 byte aligned */
1351 if (ptr & 0xf) {
1352 raise_exception_ra(env, EXCP0D_GPF, ra);
1355 do_xsave_fpu(env, ptr, ra);
1357 if (env->cr[4] & CR4_OSFXSR_MASK) {
1358 do_xsave_mxcsr(env, ptr, ra);
1359 /* Fast FXSAVE leaves out the XMM registers */
1360 if (!(env->efer & MSR_EFER_FFXSR)
1361 || (env->hflags & HF_CPL_MASK)
1362 || !(env->hflags & HF_LMA_MASK)) {
1363 do_xsave_sse(env, ptr, ra);
1368 static uint64_t get_xinuse(CPUX86State *env)
1370 uint64_t inuse = -1;
1372 /* For the most part, we don't track XINUSE. We could calculate it
1373 here for all components, but it's probably less work to simply
1374 indicate in use. That said, the state of BNDREGS is important
1375 enough to track in HFLAGS, so we might as well use that here. */
1376 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1377 inuse &= ~XSTATE_BNDREGS_MASK;
1379 return inuse;
1382 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1383 uint64_t inuse, uint64_t opt, uintptr_t ra)
1385 uint64_t old_bv, new_bv;
1387 /* The OS must have enabled XSAVE. */
1388 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1389 raise_exception_ra(env, EXCP06_ILLOP, ra);
1392 /* The operand must be 64 byte aligned. */
1393 if (ptr & 63) {
1394 raise_exception_ra(env, EXCP0D_GPF, ra);
1397 /* Never save anything not enabled by XCR0. */
1398 rfbm &= env->xcr0;
1399 opt &= rfbm;
1401 if (opt & XSTATE_FP_MASK) {
1402 do_xsave_fpu(env, ptr, ra);
1404 if (rfbm & XSTATE_SSE_MASK) {
1405 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1406 do_xsave_mxcsr(env, ptr, ra);
1408 if (opt & XSTATE_SSE_MASK) {
1409 do_xsave_sse(env, ptr, ra);
1411 if (opt & XSTATE_BNDREGS_MASK) {
1412 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1414 if (opt & XSTATE_BNDCSR_MASK) {
1415 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1417 if (opt & XSTATE_PKRU_MASK) {
1418 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1421 /* Update the XSTATE_BV field. */
1422 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1423 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1424 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1427 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1429 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1432 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1434 uint64_t inuse = get_xinuse(env);
1435 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1438 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1440 int i, fpuc, fpus, fptag;
1441 target_ulong addr;
1443 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1444 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1445 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1446 cpu_set_fpuc(env, fpuc);
1447 cpu_set_fpus(env, fpus);
1448 fptag ^= 0xff;
1449 for (i = 0; i < 8; i++) {
1450 env->fptags[i] = ((fptag >> i) & 1);
1453 addr = ptr + XO(legacy.fpregs);
1454 for (i = 0; i < 8; i++) {
1455 floatx80 tmp = helper_fldt(env, addr, ra);
1456 ST(i) = tmp;
1457 addr += 16;
1461 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1463 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1466 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1468 int i, nb_xmm_regs;
1469 target_ulong addr;
1471 if (env->hflags & HF_CS64_MASK) {
1472 nb_xmm_regs = 16;
1473 } else {
1474 nb_xmm_regs = 8;
1477 addr = ptr + XO(legacy.xmm_regs);
1478 for (i = 0; i < nb_xmm_regs; i++) {
1479 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1480 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1481 addr += 16;
1485 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1487 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1488 int i;
1490 for (i = 0; i < 4; i++, addr += 16) {
1491 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1492 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1496 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1498 /* FIXME: Extend highest implemented bit of linear address. */
1499 env->bndcs_regs.cfgu
1500 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1501 env->bndcs_regs.sts
1502 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1505 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1507 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1510 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1512 uintptr_t ra = GETPC();
1514 /* The operand must be 16 byte aligned */
1515 if (ptr & 0xf) {
1516 raise_exception_ra(env, EXCP0D_GPF, ra);
1519 do_xrstor_fpu(env, ptr, ra);
1521 if (env->cr[4] & CR4_OSFXSR_MASK) {
1522 do_xrstor_mxcsr(env, ptr, ra);
1523 /* Fast FXRSTOR leaves out the XMM registers */
1524 if (!(env->efer & MSR_EFER_FFXSR)
1525 || (env->hflags & HF_CPL_MASK)
1526 || !(env->hflags & HF_LMA_MASK)) {
1527 do_xrstor_sse(env, ptr, ra);
1532 #if defined(CONFIG_USER_ONLY)
1533 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1535 helper_fxsave(env, ptr);
1538 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1540 helper_fxrstor(env, ptr);
1542 #endif
1544 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1546 uintptr_t ra = GETPC();
1547 uint64_t xstate_bv, xcomp_bv, reserve0;
1549 rfbm &= env->xcr0;
1551 /* The OS must have enabled XSAVE. */
1552 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1553 raise_exception_ra(env, EXCP06_ILLOP, ra);
1556 /* The operand must be 64 byte aligned. */
1557 if (ptr & 63) {
1558 raise_exception_ra(env, EXCP0D_GPF, ra);
1561 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1563 if ((int64_t)xstate_bv < 0) {
1564 /* FIXME: Compact form. */
1565 raise_exception_ra(env, EXCP0D_GPF, ra);
1568 /* Standard form. */
1570 /* The XSTATE_BV field must not set bits not present in XCR0. */
1571 if (xstate_bv & ~env->xcr0) {
1572 raise_exception_ra(env, EXCP0D_GPF, ra);
1575 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1576 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1577 describes only XCOMP_BV, but the description of the standard form
1578 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1579 includes the next 64-bit field. */
1580 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1581 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1582 if (xcomp_bv || reserve0) {
1583 raise_exception_ra(env, EXCP0D_GPF, ra);
1586 if (rfbm & XSTATE_FP_MASK) {
1587 if (xstate_bv & XSTATE_FP_MASK) {
1588 do_xrstor_fpu(env, ptr, ra);
1589 } else {
1590 helper_fninit(env);
1591 memset(env->fpregs, 0, sizeof(env->fpregs));
1594 if (rfbm & XSTATE_SSE_MASK) {
1595 /* Note that the standard form of XRSTOR loads MXCSR from memory
1596 whether or not the XSTATE_BV bit is set. */
1597 do_xrstor_mxcsr(env, ptr, ra);
1598 if (xstate_bv & XSTATE_SSE_MASK) {
1599 do_xrstor_sse(env, ptr, ra);
1600 } else {
1601 /* ??? When AVX is implemented, we may have to be more
1602 selective in the clearing. */
1603 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1606 if (rfbm & XSTATE_BNDREGS_MASK) {
1607 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1608 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1609 env->hflags |= HF_MPX_IU_MASK;
1610 } else {
1611 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1612 env->hflags &= ~HF_MPX_IU_MASK;
1615 if (rfbm & XSTATE_BNDCSR_MASK) {
1616 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1617 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1618 } else {
1619 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1621 cpu_sync_bndcs_hflags(env);
1623 if (rfbm & XSTATE_PKRU_MASK) {
1624 uint64_t old_pkru = env->pkru;
1625 if (xstate_bv & XSTATE_PKRU_MASK) {
1626 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1627 } else {
1628 env->pkru = 0;
1630 if (env->pkru != old_pkru) {
1631 CPUState *cs = env_cpu(env);
1632 tlb_flush(cs);
1637 #undef XO
1639 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1641 /* The OS must have enabled XSAVE. */
1642 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1643 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1646 switch (ecx) {
1647 case 0:
1648 return env->xcr0;
1649 case 1:
1650 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1651 return env->xcr0 & get_xinuse(env);
1653 break;
1655 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1658 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1660 uint32_t dummy, ena_lo, ena_hi;
1661 uint64_t ena;
1663 /* The OS must have enabled XSAVE. */
1664 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1665 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1668 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1669 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1670 goto do_gpf;
1673 /* Disallow enabling unimplemented features. */
1674 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1675 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1676 if (mask & ~ena) {
1677 goto do_gpf;
1680 /* Disallow enabling only half of MPX. */
1681 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1682 & XSTATE_BNDCSR_MASK) {
1683 goto do_gpf;
1686 env->xcr0 = mask;
1687 cpu_sync_bndcs_hflags(env);
1688 return;
1690 do_gpf:
1691 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1694 /* MMX/SSE */
1695 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1697 #define SSE_DAZ 0x0040
1698 #define SSE_RC_MASK 0x6000
1699 #define SSE_RC_NEAR 0x0000
1700 #define SSE_RC_DOWN 0x2000
1701 #define SSE_RC_UP 0x4000
1702 #define SSE_RC_CHOP 0x6000
1703 #define SSE_FZ 0x8000
1705 void update_mxcsr_status(CPUX86State *env)
1707 uint32_t mxcsr = env->mxcsr;
1708 int rnd_type;
1710 /* set rounding mode */
1711 switch (mxcsr & SSE_RC_MASK) {
1712 default:
1713 case SSE_RC_NEAR:
1714 rnd_type = float_round_nearest_even;
1715 break;
1716 case SSE_RC_DOWN:
1717 rnd_type = float_round_down;
1718 break;
1719 case SSE_RC_UP:
1720 rnd_type = float_round_up;
1721 break;
1722 case SSE_RC_CHOP:
1723 rnd_type = float_round_to_zero;
1724 break;
1726 set_float_rounding_mode(rnd_type, &env->sse_status);
1728 /* set denormals are zero */
1729 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1731 /* set flush to zero */
1732 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1735 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1737 cpu_set_mxcsr(env, val);
1740 void helper_enter_mmx(CPUX86State *env)
1742 env->fpstt = 0;
1743 *(uint32_t *)(env->fptags) = 0;
1744 *(uint32_t *)(env->fptags + 4) = 0;
1747 void helper_emms(CPUX86State *env)
1749 /* set to empty state */
1750 *(uint32_t *)(env->fptags) = 0x01010101;
1751 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1754 /* XXX: suppress */
1755 void helper_movq(CPUX86State *env, void *d, void *s)
1757 *(uint64_t *)d = *(uint64_t *)s;
1760 #define SHIFT 0
1761 #include "ops_sse.h"
1763 #define SHIFT 1
1764 #include "ops_sse.h"