Merge remote-tracking branch 'remotes/maxreitz/tags/pull-block-2020-03-26' into staging
[qemu.git] / target / i386 / fpu_helper.c
blob792a128a6da2d6cd2b6b99634c84fe615ef82972
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
29 #ifdef CONFIG_SOFTMMU
30 #include "hw/irq.h"
31 #endif
33 #define FPU_RC_MASK 0xc00
34 #define FPU_RC_NEAR 0x000
35 #define FPU_RC_DOWN 0x400
36 #define FPU_RC_UP 0x800
37 #define FPU_RC_CHOP 0xc00
39 #define MAXTAN 9223372036854775808.0
41 /* the following deal with x86 long double-precision numbers */
42 #define MAXEXPD 0x7fff
43 #define EXPBIAS 16383
44 #define EXPD(fp) (fp.l.upper & 0x7fff)
45 #define SIGND(fp) ((fp.l.upper) & 0x8000)
46 #define MANTD(fp) (fp.l.lower)
47 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49 #define FPUS_IE (1 << 0)
50 #define FPUS_DE (1 << 1)
51 #define FPUS_ZE (1 << 2)
52 #define FPUS_OE (1 << 3)
53 #define FPUS_UE (1 << 4)
54 #define FPUS_PE (1 << 5)
55 #define FPUS_SF (1 << 6)
56 #define FPUS_SE (1 << 7)
57 #define FPUS_B (1 << 15)
59 #define FPUC_EM 0x3f
61 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
62 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
63 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
65 #if !defined(CONFIG_USER_ONLY)
66 static qemu_irq ferr_irq;
68 void x86_register_ferr_irq(qemu_irq irq)
70 ferr_irq = irq;
73 static void cpu_clear_ignne(void)
75 CPUX86State *env = &X86_CPU(first_cpu)->env;
76 env->hflags2 &= ~HF2_IGNNE_MASK;
79 void cpu_set_ignne(void)
81 CPUX86State *env = &X86_CPU(first_cpu)->env;
82 env->hflags2 |= HF2_IGNNE_MASK;
84 * We get here in response to a write to port F0h. The chipset should
85 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
86 * cleared, because FERR# and FP_IRQ are two separate pins on real
87 * hardware. However, we don't model FERR# as a qemu_irq, so we just
88 * do directly what the chipset would do, i.e. deassert FP_IRQ.
90 qemu_irq_lower(ferr_irq);
92 #endif
95 static inline void fpush(CPUX86State *env)
97 env->fpstt = (env->fpstt - 1) & 7;
98 env->fptags[env->fpstt] = 0; /* validate stack entry */
101 static inline void fpop(CPUX86State *env)
103 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
104 env->fpstt = (env->fpstt + 1) & 7;
107 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
108 uintptr_t retaddr)
110 CPU_LDoubleU temp;
112 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
113 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
114 return temp.d;
117 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
118 uintptr_t retaddr)
120 CPU_LDoubleU temp;
122 temp.d = f;
123 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
124 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
127 /* x87 FPU helpers */
129 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
131 union {
132 float64 f64;
133 double d;
134 } u;
136 u.f64 = floatx80_to_float64(a, &env->fp_status);
137 return u.d;
140 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
142 union {
143 float64 f64;
144 double d;
145 } u;
147 u.d = a;
148 return float64_to_floatx80(u.f64, &env->fp_status);
151 static void fpu_set_exception(CPUX86State *env, int mask)
153 env->fpus |= mask;
154 if (env->fpus & (~env->fpuc & FPUC_EM)) {
155 env->fpus |= FPUS_SE | FPUS_B;
159 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
161 if (floatx80_is_zero(b)) {
162 fpu_set_exception(env, FPUS_ZE);
164 return floatx80_div(a, b, &env->fp_status);
167 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
169 if (env->cr[0] & CR0_NE_MASK) {
170 raise_exception_ra(env, EXCP10_COPR, retaddr);
172 #if !defined(CONFIG_USER_ONLY)
173 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
174 qemu_irq_raise(ferr_irq);
176 #endif
179 void helper_flds_FT0(CPUX86State *env, uint32_t val)
181 union {
182 float32 f;
183 uint32_t i;
184 } u;
186 u.i = val;
187 FT0 = float32_to_floatx80(u.f, &env->fp_status);
190 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
192 union {
193 float64 f;
194 uint64_t i;
195 } u;
197 u.i = val;
198 FT0 = float64_to_floatx80(u.f, &env->fp_status);
201 void helper_fildl_FT0(CPUX86State *env, int32_t val)
203 FT0 = int32_to_floatx80(val, &env->fp_status);
206 void helper_flds_ST0(CPUX86State *env, uint32_t val)
208 int new_fpstt;
209 union {
210 float32 f;
211 uint32_t i;
212 } u;
214 new_fpstt = (env->fpstt - 1) & 7;
215 u.i = val;
216 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
217 env->fpstt = new_fpstt;
218 env->fptags[new_fpstt] = 0; /* validate stack entry */
221 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
223 int new_fpstt;
224 union {
225 float64 f;
226 uint64_t i;
227 } u;
229 new_fpstt = (env->fpstt - 1) & 7;
230 u.i = val;
231 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
232 env->fpstt = new_fpstt;
233 env->fptags[new_fpstt] = 0; /* validate stack entry */
236 void helper_fildl_ST0(CPUX86State *env, int32_t val)
238 int new_fpstt;
240 new_fpstt = (env->fpstt - 1) & 7;
241 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
242 env->fpstt = new_fpstt;
243 env->fptags[new_fpstt] = 0; /* validate stack entry */
246 void helper_fildll_ST0(CPUX86State *env, int64_t val)
248 int new_fpstt;
250 new_fpstt = (env->fpstt - 1) & 7;
251 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
252 env->fpstt = new_fpstt;
253 env->fptags[new_fpstt] = 0; /* validate stack entry */
256 uint32_t helper_fsts_ST0(CPUX86State *env)
258 union {
259 float32 f;
260 uint32_t i;
261 } u;
263 u.f = floatx80_to_float32(ST0, &env->fp_status);
264 return u.i;
267 uint64_t helper_fstl_ST0(CPUX86State *env)
269 union {
270 float64 f;
271 uint64_t i;
272 } u;
274 u.f = floatx80_to_float64(ST0, &env->fp_status);
275 return u.i;
278 int32_t helper_fist_ST0(CPUX86State *env)
280 int32_t val;
282 val = floatx80_to_int32(ST0, &env->fp_status);
283 if (val != (int16_t)val) {
284 val = -32768;
286 return val;
289 int32_t helper_fistl_ST0(CPUX86State *env)
291 int32_t val;
292 signed char old_exp_flags;
294 old_exp_flags = get_float_exception_flags(&env->fp_status);
295 set_float_exception_flags(0, &env->fp_status);
297 val = floatx80_to_int32(ST0, &env->fp_status);
298 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
299 val = 0x80000000;
301 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
302 | old_exp_flags, &env->fp_status);
303 return val;
306 int64_t helper_fistll_ST0(CPUX86State *env)
308 int64_t val;
309 signed char old_exp_flags;
311 old_exp_flags = get_float_exception_flags(&env->fp_status);
312 set_float_exception_flags(0, &env->fp_status);
314 val = floatx80_to_int64(ST0, &env->fp_status);
315 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
316 val = 0x8000000000000000ULL;
318 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
319 | old_exp_flags, &env->fp_status);
320 return val;
323 int32_t helper_fistt_ST0(CPUX86State *env)
325 int32_t val;
327 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
328 if (val != (int16_t)val) {
329 val = -32768;
331 return val;
334 int32_t helper_fisttl_ST0(CPUX86State *env)
336 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
339 int64_t helper_fisttll_ST0(CPUX86State *env)
341 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
344 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
346 int new_fpstt;
348 new_fpstt = (env->fpstt - 1) & 7;
349 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
350 env->fpstt = new_fpstt;
351 env->fptags[new_fpstt] = 0; /* validate stack entry */
354 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
356 helper_fstt(env, ST0, ptr, GETPC());
359 void helper_fpush(CPUX86State *env)
361 fpush(env);
364 void helper_fpop(CPUX86State *env)
366 fpop(env);
369 void helper_fdecstp(CPUX86State *env)
371 env->fpstt = (env->fpstt - 1) & 7;
372 env->fpus &= ~0x4700;
375 void helper_fincstp(CPUX86State *env)
377 env->fpstt = (env->fpstt + 1) & 7;
378 env->fpus &= ~0x4700;
381 /* FPU move */
383 void helper_ffree_STN(CPUX86State *env, int st_index)
385 env->fptags[(env->fpstt + st_index) & 7] = 1;
388 void helper_fmov_ST0_FT0(CPUX86State *env)
390 ST0 = FT0;
393 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
395 FT0 = ST(st_index);
398 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
400 ST0 = ST(st_index);
403 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
405 ST(st_index) = ST0;
408 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
410 floatx80 tmp;
412 tmp = ST(st_index);
413 ST(st_index) = ST0;
414 ST0 = tmp;
417 /* FPU operations */
419 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
421 void helper_fcom_ST0_FT0(CPUX86State *env)
423 int ret;
425 ret = floatx80_compare(ST0, FT0, &env->fp_status);
426 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
429 void helper_fucom_ST0_FT0(CPUX86State *env)
431 int ret;
433 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
434 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
437 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
439 void helper_fcomi_ST0_FT0(CPUX86State *env)
441 int eflags;
442 int ret;
444 ret = floatx80_compare(ST0, FT0, &env->fp_status);
445 eflags = cpu_cc_compute_all(env, CC_OP);
446 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
447 CC_SRC = eflags;
450 void helper_fucomi_ST0_FT0(CPUX86State *env)
452 int eflags;
453 int ret;
455 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
456 eflags = cpu_cc_compute_all(env, CC_OP);
457 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
458 CC_SRC = eflags;
461 void helper_fadd_ST0_FT0(CPUX86State *env)
463 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
466 void helper_fmul_ST0_FT0(CPUX86State *env)
468 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
471 void helper_fsub_ST0_FT0(CPUX86State *env)
473 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
476 void helper_fsubr_ST0_FT0(CPUX86State *env)
478 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
481 void helper_fdiv_ST0_FT0(CPUX86State *env)
483 ST0 = helper_fdiv(env, ST0, FT0);
486 void helper_fdivr_ST0_FT0(CPUX86State *env)
488 ST0 = helper_fdiv(env, FT0, ST0);
491 /* fp operations between STN and ST0 */
493 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
495 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
498 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
500 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
503 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
505 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
508 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
510 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
513 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
515 floatx80 *p;
517 p = &ST(st_index);
518 *p = helper_fdiv(env, *p, ST0);
521 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
523 floatx80 *p;
525 p = &ST(st_index);
526 *p = helper_fdiv(env, ST0, *p);
529 /* misc FPU operations */
530 void helper_fchs_ST0(CPUX86State *env)
532 ST0 = floatx80_chs(ST0);
535 void helper_fabs_ST0(CPUX86State *env)
537 ST0 = floatx80_abs(ST0);
540 void helper_fld1_ST0(CPUX86State *env)
542 ST0 = floatx80_one;
545 void helper_fldl2t_ST0(CPUX86State *env)
547 ST0 = floatx80_l2t;
550 void helper_fldl2e_ST0(CPUX86State *env)
552 ST0 = floatx80_l2e;
555 void helper_fldpi_ST0(CPUX86State *env)
557 ST0 = floatx80_pi;
560 void helper_fldlg2_ST0(CPUX86State *env)
562 ST0 = floatx80_lg2;
565 void helper_fldln2_ST0(CPUX86State *env)
567 ST0 = floatx80_ln2;
570 void helper_fldz_ST0(CPUX86State *env)
572 ST0 = floatx80_zero;
575 void helper_fldz_FT0(CPUX86State *env)
577 FT0 = floatx80_zero;
580 uint32_t helper_fnstsw(CPUX86State *env)
582 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
585 uint32_t helper_fnstcw(CPUX86State *env)
587 return env->fpuc;
590 void update_fp_status(CPUX86State *env)
592 int rnd_type;
594 /* set rounding mode */
595 switch (env->fpuc & FPU_RC_MASK) {
596 default:
597 case FPU_RC_NEAR:
598 rnd_type = float_round_nearest_even;
599 break;
600 case FPU_RC_DOWN:
601 rnd_type = float_round_down;
602 break;
603 case FPU_RC_UP:
604 rnd_type = float_round_up;
605 break;
606 case FPU_RC_CHOP:
607 rnd_type = float_round_to_zero;
608 break;
610 set_float_rounding_mode(rnd_type, &env->fp_status);
611 switch ((env->fpuc >> 8) & 3) {
612 case 0:
613 rnd_type = 32;
614 break;
615 case 2:
616 rnd_type = 64;
617 break;
618 case 3:
619 default:
620 rnd_type = 80;
621 break;
623 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
626 void helper_fldcw(CPUX86State *env, uint32_t val)
628 cpu_set_fpuc(env, val);
631 void helper_fclex(CPUX86State *env)
633 env->fpus &= 0x7f00;
636 void helper_fwait(CPUX86State *env)
638 if (env->fpus & FPUS_SE) {
639 fpu_raise_exception(env, GETPC());
643 void helper_fninit(CPUX86State *env)
645 env->fpus = 0;
646 env->fpstt = 0;
647 cpu_set_fpuc(env, 0x37f);
648 env->fptags[0] = 1;
649 env->fptags[1] = 1;
650 env->fptags[2] = 1;
651 env->fptags[3] = 1;
652 env->fptags[4] = 1;
653 env->fptags[5] = 1;
654 env->fptags[6] = 1;
655 env->fptags[7] = 1;
658 /* BCD ops */
660 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
662 floatx80 tmp;
663 uint64_t val;
664 unsigned int v;
665 int i;
667 val = 0;
668 for (i = 8; i >= 0; i--) {
669 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
670 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
672 tmp = int64_to_floatx80(val, &env->fp_status);
673 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
674 tmp = floatx80_chs(tmp);
676 fpush(env);
677 ST0 = tmp;
680 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
682 int v;
683 target_ulong mem_ref, mem_end;
684 int64_t val;
686 val = floatx80_to_int64(ST0, &env->fp_status);
687 mem_ref = ptr;
688 mem_end = mem_ref + 9;
689 if (val < 0) {
690 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
691 val = -val;
692 } else {
693 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
695 while (mem_ref < mem_end) {
696 if (val == 0) {
697 break;
699 v = val % 100;
700 val = val / 100;
701 v = ((v / 10) << 4) | (v % 10);
702 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
704 while (mem_ref < mem_end) {
705 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
709 void helper_f2xm1(CPUX86State *env)
711 double val = floatx80_to_double(env, ST0);
713 val = pow(2.0, val) - 1.0;
714 ST0 = double_to_floatx80(env, val);
717 void helper_fyl2x(CPUX86State *env)
719 double fptemp = floatx80_to_double(env, ST0);
721 if (fptemp > 0.0) {
722 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
723 fptemp *= floatx80_to_double(env, ST1);
724 ST1 = double_to_floatx80(env, fptemp);
725 fpop(env);
726 } else {
727 env->fpus &= ~0x4700;
728 env->fpus |= 0x400;
732 void helper_fptan(CPUX86State *env)
734 double fptemp = floatx80_to_double(env, ST0);
736 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
737 env->fpus |= 0x400;
738 } else {
739 fptemp = tan(fptemp);
740 ST0 = double_to_floatx80(env, fptemp);
741 fpush(env);
742 ST0 = floatx80_one;
743 env->fpus &= ~0x400; /* C2 <-- 0 */
744 /* the above code is for |arg| < 2**52 only */
748 void helper_fpatan(CPUX86State *env)
750 double fptemp, fpsrcop;
752 fpsrcop = floatx80_to_double(env, ST1);
753 fptemp = floatx80_to_double(env, ST0);
754 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
755 fpop(env);
758 void helper_fxtract(CPUX86State *env)
760 CPU_LDoubleU temp;
762 temp.d = ST0;
764 if (floatx80_is_zero(ST0)) {
765 /* Easy way to generate -inf and raising division by 0 exception */
766 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
767 &env->fp_status);
768 fpush(env);
769 ST0 = temp.d;
770 } else {
771 int expdif;
773 expdif = EXPD(temp) - EXPBIAS;
774 /* DP exponent bias */
775 ST0 = int32_to_floatx80(expdif, &env->fp_status);
776 fpush(env);
777 BIASEXPONENT(temp);
778 ST0 = temp.d;
782 void helper_fprem1(CPUX86State *env)
784 double st0, st1, dblq, fpsrcop, fptemp;
785 CPU_LDoubleU fpsrcop1, fptemp1;
786 int expdif;
787 signed long long int q;
789 st0 = floatx80_to_double(env, ST0);
790 st1 = floatx80_to_double(env, ST1);
792 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
793 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
794 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
795 return;
798 fpsrcop = st0;
799 fptemp = st1;
800 fpsrcop1.d = ST0;
801 fptemp1.d = ST1;
802 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
804 if (expdif < 0) {
805 /* optimisation? taken from the AMD docs */
806 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
807 /* ST0 is unchanged */
808 return;
811 if (expdif < 53) {
812 dblq = fpsrcop / fptemp;
813 /* round dblq towards nearest integer */
814 dblq = rint(dblq);
815 st0 = fpsrcop - fptemp * dblq;
817 /* convert dblq to q by truncating towards zero */
818 if (dblq < 0.0) {
819 q = (signed long long int)(-dblq);
820 } else {
821 q = (signed long long int)dblq;
824 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
825 /* (C0,C3,C1) <-- (q2,q1,q0) */
826 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
827 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
828 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
829 } else {
830 env->fpus |= 0x400; /* C2 <-- 1 */
831 fptemp = pow(2.0, expdif - 50);
832 fpsrcop = (st0 / st1) / fptemp;
833 /* fpsrcop = integer obtained by chopping */
834 fpsrcop = (fpsrcop < 0.0) ?
835 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
836 st0 -= (st1 * fpsrcop * fptemp);
838 ST0 = double_to_floatx80(env, st0);
841 void helper_fprem(CPUX86State *env)
843 double st0, st1, dblq, fpsrcop, fptemp;
844 CPU_LDoubleU fpsrcop1, fptemp1;
845 int expdif;
846 signed long long int q;
848 st0 = floatx80_to_double(env, ST0);
849 st1 = floatx80_to_double(env, ST1);
851 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
852 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
853 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
854 return;
857 fpsrcop = st0;
858 fptemp = st1;
859 fpsrcop1.d = ST0;
860 fptemp1.d = ST1;
861 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
863 if (expdif < 0) {
864 /* optimisation? taken from the AMD docs */
865 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
866 /* ST0 is unchanged */
867 return;
870 if (expdif < 53) {
871 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
872 /* round dblq towards zero */
873 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
874 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
876 /* convert dblq to q by truncating towards zero */
877 if (dblq < 0.0) {
878 q = (signed long long int)(-dblq);
879 } else {
880 q = (signed long long int)dblq;
883 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
884 /* (C0,C3,C1) <-- (q2,q1,q0) */
885 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
886 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
887 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
888 } else {
889 int N = 32 + (expdif % 32); /* as per AMD docs */
891 env->fpus |= 0x400; /* C2 <-- 1 */
892 fptemp = pow(2.0, (double)(expdif - N));
893 fpsrcop = (st0 / st1) / fptemp;
894 /* fpsrcop = integer obtained by chopping */
895 fpsrcop = (fpsrcop < 0.0) ?
896 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
897 st0 -= (st1 * fpsrcop * fptemp);
899 ST0 = double_to_floatx80(env, st0);
902 void helper_fyl2xp1(CPUX86State *env)
904 double fptemp = floatx80_to_double(env, ST0);
906 if ((fptemp + 1.0) > 0.0) {
907 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
908 fptemp *= floatx80_to_double(env, ST1);
909 ST1 = double_to_floatx80(env, fptemp);
910 fpop(env);
911 } else {
912 env->fpus &= ~0x4700;
913 env->fpus |= 0x400;
917 void helper_fsqrt(CPUX86State *env)
919 if (floatx80_is_neg(ST0)) {
920 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
921 env->fpus |= 0x400;
923 ST0 = floatx80_sqrt(ST0, &env->fp_status);
926 void helper_fsincos(CPUX86State *env)
928 double fptemp = floatx80_to_double(env, ST0);
930 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
931 env->fpus |= 0x400;
932 } else {
933 ST0 = double_to_floatx80(env, sin(fptemp));
934 fpush(env);
935 ST0 = double_to_floatx80(env, cos(fptemp));
936 env->fpus &= ~0x400; /* C2 <-- 0 */
937 /* the above code is for |arg| < 2**63 only */
941 void helper_frndint(CPUX86State *env)
943 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
946 void helper_fscale(CPUX86State *env)
948 if (floatx80_is_any_nan(ST1)) {
949 ST0 = ST1;
950 } else {
951 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
952 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
956 void helper_fsin(CPUX86State *env)
958 double fptemp = floatx80_to_double(env, ST0);
960 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
961 env->fpus |= 0x400;
962 } else {
963 ST0 = double_to_floatx80(env, sin(fptemp));
964 env->fpus &= ~0x400; /* C2 <-- 0 */
965 /* the above code is for |arg| < 2**53 only */
969 void helper_fcos(CPUX86State *env)
971 double fptemp = floatx80_to_double(env, ST0);
973 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
974 env->fpus |= 0x400;
975 } else {
976 ST0 = double_to_floatx80(env, cos(fptemp));
977 env->fpus &= ~0x400; /* C2 <-- 0 */
978 /* the above code is for |arg| < 2**63 only */
982 void helper_fxam_ST0(CPUX86State *env)
984 CPU_LDoubleU temp;
985 int expdif;
987 temp.d = ST0;
989 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
990 if (SIGND(temp)) {
991 env->fpus |= 0x200; /* C1 <-- 1 */
994 if (env->fptags[env->fpstt]) {
995 env->fpus |= 0x4100; /* Empty */
996 return;
999 expdif = EXPD(temp);
1000 if (expdif == MAXEXPD) {
1001 if (MANTD(temp) == 0x8000000000000000ULL) {
1002 env->fpus |= 0x500; /* Infinity */
1003 } else {
1004 env->fpus |= 0x100; /* NaN */
1006 } else if (expdif == 0) {
1007 if (MANTD(temp) == 0) {
1008 env->fpus |= 0x4000; /* Zero */
1009 } else {
1010 env->fpus |= 0x4400; /* Denormal */
1012 } else {
1013 env->fpus |= 0x400;
1017 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1018 uintptr_t retaddr)
1020 int fpus, fptag, exp, i;
1021 uint64_t mant;
1022 CPU_LDoubleU tmp;
1024 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1025 fptag = 0;
1026 for (i = 7; i >= 0; i--) {
1027 fptag <<= 2;
1028 if (env->fptags[i]) {
1029 fptag |= 3;
1030 } else {
1031 tmp.d = env->fpregs[i].d;
1032 exp = EXPD(tmp);
1033 mant = MANTD(tmp);
1034 if (exp == 0 && mant == 0) {
1035 /* zero */
1036 fptag |= 1;
1037 } else if (exp == 0 || exp == MAXEXPD
1038 || (mant & (1LL << 63)) == 0) {
1039 /* NaNs, infinity, denormal */
1040 fptag |= 2;
1044 if (data32) {
1045 /* 32 bit */
1046 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1047 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1048 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1049 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1050 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1051 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1052 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1053 } else {
1054 /* 16 bit */
1055 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1056 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1057 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1058 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1059 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1060 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1061 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1065 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1067 do_fstenv(env, ptr, data32, GETPC());
1070 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1072 env->fpstt = (fpus >> 11) & 7;
1073 env->fpus = fpus & ~0x3800 & ~FPUS_B;
1074 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1075 #if !defined(CONFIG_USER_ONLY)
1076 if (!(env->fpus & FPUS_SE)) {
1078 * Here the processor deasserts FERR#; in response, the chipset deasserts
1079 * IGNNE#.
1081 cpu_clear_ignne();
1083 #endif
1086 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1087 uintptr_t retaddr)
1089 int i, fpus, fptag;
1091 if (data32) {
1092 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1093 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1094 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1095 } else {
1096 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1097 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1098 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1100 cpu_set_fpus(env, fpus);
1101 for (i = 0; i < 8; i++) {
1102 env->fptags[i] = ((fptag & 3) == 3);
1103 fptag >>= 2;
1107 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1109 do_fldenv(env, ptr, data32, GETPC());
1112 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1114 floatx80 tmp;
1115 int i;
1117 do_fstenv(env, ptr, data32, GETPC());
1119 ptr += (14 << data32);
1120 for (i = 0; i < 8; i++) {
1121 tmp = ST(i);
1122 helper_fstt(env, tmp, ptr, GETPC());
1123 ptr += 10;
1126 /* fninit */
1127 env->fpus = 0;
1128 env->fpstt = 0;
1129 cpu_set_fpuc(env, 0x37f);
1130 env->fptags[0] = 1;
1131 env->fptags[1] = 1;
1132 env->fptags[2] = 1;
1133 env->fptags[3] = 1;
1134 env->fptags[4] = 1;
1135 env->fptags[5] = 1;
1136 env->fptags[6] = 1;
1137 env->fptags[7] = 1;
1140 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1142 floatx80 tmp;
1143 int i;
1145 do_fldenv(env, ptr, data32, GETPC());
1146 ptr += (14 << data32);
1148 for (i = 0; i < 8; i++) {
1149 tmp = helper_fldt(env, ptr, GETPC());
1150 ST(i) = tmp;
1151 ptr += 10;
1155 #if defined(CONFIG_USER_ONLY)
1156 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1158 helper_fsave(env, ptr, data32);
1161 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1163 helper_frstor(env, ptr, data32);
1165 #endif
1167 #define XO(X) offsetof(X86XSaveArea, X)
1169 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1171 int fpus, fptag, i;
1172 target_ulong addr;
1174 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1175 fptag = 0;
1176 for (i = 0; i < 8; i++) {
1177 fptag |= (env->fptags[i] << i);
1180 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1181 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1182 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1184 /* In 32-bit mode this is eip, sel, dp, sel.
1185 In 64-bit mode this is rip, rdp.
1186 But in either case we don't write actual data, just zeros. */
1187 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1188 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1190 addr = ptr + XO(legacy.fpregs);
1191 for (i = 0; i < 8; i++) {
1192 floatx80 tmp = ST(i);
1193 helper_fstt(env, tmp, addr, ra);
1194 addr += 16;
1198 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1200 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1201 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1204 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1206 int i, nb_xmm_regs;
1207 target_ulong addr;
1209 if (env->hflags & HF_CS64_MASK) {
1210 nb_xmm_regs = 16;
1211 } else {
1212 nb_xmm_regs = 8;
1215 addr = ptr + XO(legacy.xmm_regs);
1216 for (i = 0; i < nb_xmm_regs; i++) {
1217 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1218 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1219 addr += 16;
1223 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1225 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1226 int i;
1228 for (i = 0; i < 4; i++, addr += 16) {
1229 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1230 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1234 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1236 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1237 env->bndcs_regs.cfgu, ra);
1238 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1239 env->bndcs_regs.sts, ra);
1242 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1244 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1247 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1249 uintptr_t ra = GETPC();
1251 /* The operand must be 16 byte aligned */
1252 if (ptr & 0xf) {
1253 raise_exception_ra(env, EXCP0D_GPF, ra);
1256 do_xsave_fpu(env, ptr, ra);
1258 if (env->cr[4] & CR4_OSFXSR_MASK) {
1259 do_xsave_mxcsr(env, ptr, ra);
1260 /* Fast FXSAVE leaves out the XMM registers */
1261 if (!(env->efer & MSR_EFER_FFXSR)
1262 || (env->hflags & HF_CPL_MASK)
1263 || !(env->hflags & HF_LMA_MASK)) {
1264 do_xsave_sse(env, ptr, ra);
1269 static uint64_t get_xinuse(CPUX86State *env)
1271 uint64_t inuse = -1;
1273 /* For the most part, we don't track XINUSE. We could calculate it
1274 here for all components, but it's probably less work to simply
1275 indicate in use. That said, the state of BNDREGS is important
1276 enough to track in HFLAGS, so we might as well use that here. */
1277 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1278 inuse &= ~XSTATE_BNDREGS_MASK;
1280 return inuse;
1283 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1284 uint64_t inuse, uint64_t opt, uintptr_t ra)
1286 uint64_t old_bv, new_bv;
1288 /* The OS must have enabled XSAVE. */
1289 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1290 raise_exception_ra(env, EXCP06_ILLOP, ra);
1293 /* The operand must be 64 byte aligned. */
1294 if (ptr & 63) {
1295 raise_exception_ra(env, EXCP0D_GPF, ra);
1298 /* Never save anything not enabled by XCR0. */
1299 rfbm &= env->xcr0;
1300 opt &= rfbm;
1302 if (opt & XSTATE_FP_MASK) {
1303 do_xsave_fpu(env, ptr, ra);
1305 if (rfbm & XSTATE_SSE_MASK) {
1306 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1307 do_xsave_mxcsr(env, ptr, ra);
1309 if (opt & XSTATE_SSE_MASK) {
1310 do_xsave_sse(env, ptr, ra);
1312 if (opt & XSTATE_BNDREGS_MASK) {
1313 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1315 if (opt & XSTATE_BNDCSR_MASK) {
1316 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1318 if (opt & XSTATE_PKRU_MASK) {
1319 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1322 /* Update the XSTATE_BV field. */
1323 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1324 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1325 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1328 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1330 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1333 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1335 uint64_t inuse = get_xinuse(env);
1336 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1339 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1341 int i, fpuc, fpus, fptag;
1342 target_ulong addr;
1344 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1345 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1346 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1347 cpu_set_fpuc(env, fpuc);
1348 cpu_set_fpus(env, fpus);
1349 fptag ^= 0xff;
1350 for (i = 0; i < 8; i++) {
1351 env->fptags[i] = ((fptag >> i) & 1);
1354 addr = ptr + XO(legacy.fpregs);
1355 for (i = 0; i < 8; i++) {
1356 floatx80 tmp = helper_fldt(env, addr, ra);
1357 ST(i) = tmp;
1358 addr += 16;
1362 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1364 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1367 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1369 int i, nb_xmm_regs;
1370 target_ulong addr;
1372 if (env->hflags & HF_CS64_MASK) {
1373 nb_xmm_regs = 16;
1374 } else {
1375 nb_xmm_regs = 8;
1378 addr = ptr + XO(legacy.xmm_regs);
1379 for (i = 0; i < nb_xmm_regs; i++) {
1380 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1381 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1382 addr += 16;
1386 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1388 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1389 int i;
1391 for (i = 0; i < 4; i++, addr += 16) {
1392 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1393 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1397 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1399 /* FIXME: Extend highest implemented bit of linear address. */
1400 env->bndcs_regs.cfgu
1401 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1402 env->bndcs_regs.sts
1403 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1406 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1408 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1411 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1413 uintptr_t ra = GETPC();
1415 /* The operand must be 16 byte aligned */
1416 if (ptr & 0xf) {
1417 raise_exception_ra(env, EXCP0D_GPF, ra);
1420 do_xrstor_fpu(env, ptr, ra);
1422 if (env->cr[4] & CR4_OSFXSR_MASK) {
1423 do_xrstor_mxcsr(env, ptr, ra);
1424 /* Fast FXRSTOR leaves out the XMM registers */
1425 if (!(env->efer & MSR_EFER_FFXSR)
1426 || (env->hflags & HF_CPL_MASK)
1427 || !(env->hflags & HF_LMA_MASK)) {
1428 do_xrstor_sse(env, ptr, ra);
1433 #if defined(CONFIG_USER_ONLY)
1434 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1436 helper_fxsave(env, ptr);
1439 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1441 helper_fxrstor(env, ptr);
1443 #endif
1445 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1447 uintptr_t ra = GETPC();
1448 uint64_t xstate_bv, xcomp_bv, reserve0;
1450 rfbm &= env->xcr0;
1452 /* The OS must have enabled XSAVE. */
1453 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1454 raise_exception_ra(env, EXCP06_ILLOP, ra);
1457 /* The operand must be 64 byte aligned. */
1458 if (ptr & 63) {
1459 raise_exception_ra(env, EXCP0D_GPF, ra);
1462 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1464 if ((int64_t)xstate_bv < 0) {
1465 /* FIXME: Compact form. */
1466 raise_exception_ra(env, EXCP0D_GPF, ra);
1469 /* Standard form. */
1471 /* The XSTATE_BV field must not set bits not present in XCR0. */
1472 if (xstate_bv & ~env->xcr0) {
1473 raise_exception_ra(env, EXCP0D_GPF, ra);
1476 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1477 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1478 describes only XCOMP_BV, but the description of the standard form
1479 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1480 includes the next 64-bit field. */
1481 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1482 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1483 if (xcomp_bv || reserve0) {
1484 raise_exception_ra(env, EXCP0D_GPF, ra);
1487 if (rfbm & XSTATE_FP_MASK) {
1488 if (xstate_bv & XSTATE_FP_MASK) {
1489 do_xrstor_fpu(env, ptr, ra);
1490 } else {
1491 helper_fninit(env);
1492 memset(env->fpregs, 0, sizeof(env->fpregs));
1495 if (rfbm & XSTATE_SSE_MASK) {
1496 /* Note that the standard form of XRSTOR loads MXCSR from memory
1497 whether or not the XSTATE_BV bit is set. */
1498 do_xrstor_mxcsr(env, ptr, ra);
1499 if (xstate_bv & XSTATE_SSE_MASK) {
1500 do_xrstor_sse(env, ptr, ra);
1501 } else {
1502 /* ??? When AVX is implemented, we may have to be more
1503 selective in the clearing. */
1504 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1507 if (rfbm & XSTATE_BNDREGS_MASK) {
1508 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1509 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1510 env->hflags |= HF_MPX_IU_MASK;
1511 } else {
1512 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1513 env->hflags &= ~HF_MPX_IU_MASK;
1516 if (rfbm & XSTATE_BNDCSR_MASK) {
1517 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1518 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1519 } else {
1520 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1522 cpu_sync_bndcs_hflags(env);
1524 if (rfbm & XSTATE_PKRU_MASK) {
1525 uint64_t old_pkru = env->pkru;
1526 if (xstate_bv & XSTATE_PKRU_MASK) {
1527 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1528 } else {
1529 env->pkru = 0;
1531 if (env->pkru != old_pkru) {
1532 CPUState *cs = env_cpu(env);
1533 tlb_flush(cs);
1538 #undef XO
1540 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1542 /* The OS must have enabled XSAVE. */
1543 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1544 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1547 switch (ecx) {
1548 case 0:
1549 return env->xcr0;
1550 case 1:
1551 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1552 return env->xcr0 & get_xinuse(env);
1554 break;
1556 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1559 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1561 uint32_t dummy, ena_lo, ena_hi;
1562 uint64_t ena;
1564 /* The OS must have enabled XSAVE. */
1565 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1566 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1569 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1570 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1571 goto do_gpf;
1574 /* Disallow enabling unimplemented features. */
1575 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1576 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1577 if (mask & ~ena) {
1578 goto do_gpf;
1581 /* Disallow enabling only half of MPX. */
1582 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1583 & XSTATE_BNDCSR_MASK) {
1584 goto do_gpf;
1587 env->xcr0 = mask;
1588 cpu_sync_bndcs_hflags(env);
1589 return;
1591 do_gpf:
1592 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1595 /* MMX/SSE */
1596 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1598 #define SSE_DAZ 0x0040
1599 #define SSE_RC_MASK 0x6000
1600 #define SSE_RC_NEAR 0x0000
1601 #define SSE_RC_DOWN 0x2000
1602 #define SSE_RC_UP 0x4000
1603 #define SSE_RC_CHOP 0x6000
1604 #define SSE_FZ 0x8000
1606 void update_mxcsr_status(CPUX86State *env)
1608 uint32_t mxcsr = env->mxcsr;
1609 int rnd_type;
1611 /* set rounding mode */
1612 switch (mxcsr & SSE_RC_MASK) {
1613 default:
1614 case SSE_RC_NEAR:
1615 rnd_type = float_round_nearest_even;
1616 break;
1617 case SSE_RC_DOWN:
1618 rnd_type = float_round_down;
1619 break;
1620 case SSE_RC_UP:
1621 rnd_type = float_round_up;
1622 break;
1623 case SSE_RC_CHOP:
1624 rnd_type = float_round_to_zero;
1625 break;
1627 set_float_rounding_mode(rnd_type, &env->sse_status);
1629 /* set denormals are zero */
1630 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1632 /* set flush to zero */
1633 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1636 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1638 cpu_set_mxcsr(env, val);
1641 void helper_enter_mmx(CPUX86State *env)
1643 env->fpstt = 0;
1644 *(uint32_t *)(env->fptags) = 0;
1645 *(uint32_t *)(env->fptags + 4) = 0;
1648 void helper_emms(CPUX86State *env)
1650 /* set to empty state */
1651 *(uint32_t *)(env->fptags) = 0x01010101;
1652 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1655 /* XXX: suppress */
1656 void helper_movq(CPUX86State *env, void *d, void *s)
1658 *(uint64_t *)d = *(uint64_t *)s;
1661 #define SHIFT 0
1662 #include "ops_sse.h"
1664 #define SHIFT 1
1665 #include "ops_sse.h"