Merge remote-tracking branch 'remotes/vivier2/tags/trivial-branch-pull-request' into...
[qemu/ar7.git] / target / i386 / fpu_helper.c
blob99f28f267f60f7b9b1b79bc5461bbcad71d50650
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
29 #ifdef CONFIG_SOFTMMU
30 #include "hw/irq.h"
31 #endif
33 #define FPU_RC_MASK 0xc00
34 #define FPU_RC_NEAR 0x000
35 #define FPU_RC_DOWN 0x400
36 #define FPU_RC_UP 0x800
37 #define FPU_RC_CHOP 0xc00
39 #define MAXTAN 9223372036854775808.0
41 /* the following deal with x86 long double-precision numbers */
42 #define MAXEXPD 0x7fff
43 #define EXPBIAS 16383
44 #define EXPD(fp) (fp.l.upper & 0x7fff)
45 #define SIGND(fp) ((fp.l.upper) & 0x8000)
46 #define MANTD(fp) (fp.l.lower)
47 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49 #define FPUS_IE (1 << 0)
50 #define FPUS_DE (1 << 1)
51 #define FPUS_ZE (1 << 2)
52 #define FPUS_OE (1 << 3)
53 #define FPUS_UE (1 << 4)
54 #define FPUS_PE (1 << 5)
55 #define FPUS_SF (1 << 6)
56 #define FPUS_SE (1 << 7)
57 #define FPUS_B (1 << 15)
59 #define FPUC_EM 0x3f
61 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
62 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
63 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
65 #if !defined(CONFIG_USER_ONLY)
66 static qemu_irq ferr_irq;
68 void x86_register_ferr_irq(qemu_irq irq)
70 ferr_irq = irq;
73 static void cpu_clear_ignne(void)
75 CPUX86State *env = &X86_CPU(first_cpu)->env;
76 env->hflags2 &= ~HF2_IGNNE_MASK;
79 void cpu_set_ignne(void)
81 CPUX86State *env = &X86_CPU(first_cpu)->env;
82 env->hflags2 |= HF2_IGNNE_MASK;
84 * We get here in response to a write to port F0h. The chipset should
85 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
86 * cleared, because FERR# and FP_IRQ are two separate pins on real
87 * hardware. However, we don't model FERR# as a qemu_irq, so we just
88 * do directly what the chipset would do, i.e. deassert FP_IRQ.
90 qemu_irq_lower(ferr_irq);
92 #endif
95 static inline void fpush(CPUX86State *env)
97 env->fpstt = (env->fpstt - 1) & 7;
98 env->fptags[env->fpstt] = 0; /* validate stack entry */
101 static inline void fpop(CPUX86State *env)
103 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
104 env->fpstt = (env->fpstt + 1) & 7;
107 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
108 uintptr_t retaddr)
110 CPU_LDoubleU temp;
112 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
113 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
114 return temp.d;
117 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
118 uintptr_t retaddr)
120 CPU_LDoubleU temp;
122 temp.d = f;
123 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
124 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
127 /* x87 FPU helpers */
129 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
131 union {
132 float64 f64;
133 double d;
134 } u;
136 u.f64 = floatx80_to_float64(a, &env->fp_status);
137 return u.d;
140 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
142 union {
143 float64 f64;
144 double d;
145 } u;
147 u.d = a;
148 return float64_to_floatx80(u.f64, &env->fp_status);
151 static void fpu_set_exception(CPUX86State *env, int mask)
153 env->fpus |= mask;
154 if (env->fpus & (~env->fpuc & FPUC_EM)) {
155 env->fpus |= FPUS_SE | FPUS_B;
159 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
161 if (floatx80_is_zero(b)) {
162 fpu_set_exception(env, FPUS_ZE);
164 return floatx80_div(a, b, &env->fp_status);
167 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
169 if (env->cr[0] & CR0_NE_MASK) {
170 raise_exception_ra(env, EXCP10_COPR, retaddr);
172 #if !defined(CONFIG_USER_ONLY)
173 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
174 qemu_irq_raise(ferr_irq);
176 #endif
179 void helper_flds_FT0(CPUX86State *env, uint32_t val)
181 union {
182 float32 f;
183 uint32_t i;
184 } u;
186 u.i = val;
187 FT0 = float32_to_floatx80(u.f, &env->fp_status);
190 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
192 union {
193 float64 f;
194 uint64_t i;
195 } u;
197 u.i = val;
198 FT0 = float64_to_floatx80(u.f, &env->fp_status);
201 void helper_fildl_FT0(CPUX86State *env, int32_t val)
203 FT0 = int32_to_floatx80(val, &env->fp_status);
206 void helper_flds_ST0(CPUX86State *env, uint32_t val)
208 int new_fpstt;
209 union {
210 float32 f;
211 uint32_t i;
212 } u;
214 new_fpstt = (env->fpstt - 1) & 7;
215 u.i = val;
216 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
217 env->fpstt = new_fpstt;
218 env->fptags[new_fpstt] = 0; /* validate stack entry */
221 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
223 int new_fpstt;
224 union {
225 float64 f;
226 uint64_t i;
227 } u;
229 new_fpstt = (env->fpstt - 1) & 7;
230 u.i = val;
231 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
232 env->fpstt = new_fpstt;
233 env->fptags[new_fpstt] = 0; /* validate stack entry */
236 void helper_fildl_ST0(CPUX86State *env, int32_t val)
238 int new_fpstt;
240 new_fpstt = (env->fpstt - 1) & 7;
241 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
242 env->fpstt = new_fpstt;
243 env->fptags[new_fpstt] = 0; /* validate stack entry */
246 void helper_fildll_ST0(CPUX86State *env, int64_t val)
248 int new_fpstt;
250 new_fpstt = (env->fpstt - 1) & 7;
251 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
252 env->fpstt = new_fpstt;
253 env->fptags[new_fpstt] = 0; /* validate stack entry */
256 uint32_t helper_fsts_ST0(CPUX86State *env)
258 union {
259 float32 f;
260 uint32_t i;
261 } u;
263 u.f = floatx80_to_float32(ST0, &env->fp_status);
264 return u.i;
267 uint64_t helper_fstl_ST0(CPUX86State *env)
269 union {
270 float64 f;
271 uint64_t i;
272 } u;
274 u.f = floatx80_to_float64(ST0, &env->fp_status);
275 return u.i;
278 int32_t helper_fist_ST0(CPUX86State *env)
280 int32_t val;
282 val = floatx80_to_int32(ST0, &env->fp_status);
283 if (val != (int16_t)val) {
284 val = -32768;
286 return val;
289 int32_t helper_fistl_ST0(CPUX86State *env)
291 int32_t val;
292 signed char old_exp_flags;
294 old_exp_flags = get_float_exception_flags(&env->fp_status);
295 set_float_exception_flags(0, &env->fp_status);
297 val = floatx80_to_int32(ST0, &env->fp_status);
298 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
299 val = 0x80000000;
301 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
302 | old_exp_flags, &env->fp_status);
303 return val;
306 int64_t helper_fistll_ST0(CPUX86State *env)
308 int64_t val;
309 signed char old_exp_flags;
311 old_exp_flags = get_float_exception_flags(&env->fp_status);
312 set_float_exception_flags(0, &env->fp_status);
314 val = floatx80_to_int64(ST0, &env->fp_status);
315 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
316 val = 0x8000000000000000ULL;
318 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
319 | old_exp_flags, &env->fp_status);
320 return val;
323 int32_t helper_fistt_ST0(CPUX86State *env)
325 int32_t val;
327 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
328 if (val != (int16_t)val) {
329 val = -32768;
331 return val;
334 int32_t helper_fisttl_ST0(CPUX86State *env)
336 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
339 int64_t helper_fisttll_ST0(CPUX86State *env)
341 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
344 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
346 int new_fpstt;
348 new_fpstt = (env->fpstt - 1) & 7;
349 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
350 env->fpstt = new_fpstt;
351 env->fptags[new_fpstt] = 0; /* validate stack entry */
354 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
356 helper_fstt(env, ST0, ptr, GETPC());
359 void helper_fpush(CPUX86State *env)
361 fpush(env);
364 void helper_fpop(CPUX86State *env)
366 fpop(env);
369 void helper_fdecstp(CPUX86State *env)
371 env->fpstt = (env->fpstt - 1) & 7;
372 env->fpus &= ~0x4700;
375 void helper_fincstp(CPUX86State *env)
377 env->fpstt = (env->fpstt + 1) & 7;
378 env->fpus &= ~0x4700;
381 /* FPU move */
383 void helper_ffree_STN(CPUX86State *env, int st_index)
385 env->fptags[(env->fpstt + st_index) & 7] = 1;
388 void helper_fmov_ST0_FT0(CPUX86State *env)
390 ST0 = FT0;
393 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
395 FT0 = ST(st_index);
398 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
400 ST0 = ST(st_index);
403 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
405 ST(st_index) = ST0;
408 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
410 floatx80 tmp;
412 tmp = ST(st_index);
413 ST(st_index) = ST0;
414 ST0 = tmp;
417 /* FPU operations */
419 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
421 void helper_fcom_ST0_FT0(CPUX86State *env)
423 int ret;
425 ret = floatx80_compare(ST0, FT0, &env->fp_status);
426 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
429 void helper_fucom_ST0_FT0(CPUX86State *env)
431 int ret;
433 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
434 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
437 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
439 void helper_fcomi_ST0_FT0(CPUX86State *env)
441 int eflags;
442 int ret;
444 ret = floatx80_compare(ST0, FT0, &env->fp_status);
445 eflags = cpu_cc_compute_all(env, CC_OP);
446 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
447 CC_SRC = eflags;
450 void helper_fucomi_ST0_FT0(CPUX86State *env)
452 int eflags;
453 int ret;
455 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
456 eflags = cpu_cc_compute_all(env, CC_OP);
457 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
458 CC_SRC = eflags;
461 void helper_fadd_ST0_FT0(CPUX86State *env)
463 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
466 void helper_fmul_ST0_FT0(CPUX86State *env)
468 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
471 void helper_fsub_ST0_FT0(CPUX86State *env)
473 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
476 void helper_fsubr_ST0_FT0(CPUX86State *env)
478 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
481 void helper_fdiv_ST0_FT0(CPUX86State *env)
483 ST0 = helper_fdiv(env, ST0, FT0);
486 void helper_fdivr_ST0_FT0(CPUX86State *env)
488 ST0 = helper_fdiv(env, FT0, ST0);
491 /* fp operations between STN and ST0 */
493 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
495 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
498 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
500 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
503 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
505 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
508 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
510 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
513 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
515 floatx80 *p;
517 p = &ST(st_index);
518 *p = helper_fdiv(env, *p, ST0);
521 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
523 floatx80 *p;
525 p = &ST(st_index);
526 *p = helper_fdiv(env, ST0, *p);
529 /* misc FPU operations */
530 void helper_fchs_ST0(CPUX86State *env)
532 ST0 = floatx80_chs(ST0);
535 void helper_fabs_ST0(CPUX86State *env)
537 ST0 = floatx80_abs(ST0);
540 void helper_fld1_ST0(CPUX86State *env)
542 ST0 = floatx80_one;
545 void helper_fldl2t_ST0(CPUX86State *env)
547 ST0 = floatx80_l2t;
550 void helper_fldl2e_ST0(CPUX86State *env)
552 ST0 = floatx80_l2e;
555 void helper_fldpi_ST0(CPUX86State *env)
557 ST0 = floatx80_pi;
560 void helper_fldlg2_ST0(CPUX86State *env)
562 ST0 = floatx80_lg2;
565 void helper_fldln2_ST0(CPUX86State *env)
567 ST0 = floatx80_ln2;
570 void helper_fldz_ST0(CPUX86State *env)
572 ST0 = floatx80_zero;
575 void helper_fldz_FT0(CPUX86State *env)
577 FT0 = floatx80_zero;
580 uint32_t helper_fnstsw(CPUX86State *env)
582 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
585 uint32_t helper_fnstcw(CPUX86State *env)
587 return env->fpuc;
590 void update_fp_status(CPUX86State *env)
592 int rnd_type;
594 /* set rounding mode */
595 switch (env->fpuc & FPU_RC_MASK) {
596 default:
597 case FPU_RC_NEAR:
598 rnd_type = float_round_nearest_even;
599 break;
600 case FPU_RC_DOWN:
601 rnd_type = float_round_down;
602 break;
603 case FPU_RC_UP:
604 rnd_type = float_round_up;
605 break;
606 case FPU_RC_CHOP:
607 rnd_type = float_round_to_zero;
608 break;
610 set_float_rounding_mode(rnd_type, &env->fp_status);
611 switch ((env->fpuc >> 8) & 3) {
612 case 0:
613 rnd_type = 32;
614 break;
615 case 2:
616 rnd_type = 64;
617 break;
618 case 3:
619 default:
620 rnd_type = 80;
621 break;
623 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
626 void helper_fldcw(CPUX86State *env, uint32_t val)
628 cpu_set_fpuc(env, val);
631 void helper_fclex(CPUX86State *env)
633 env->fpus &= 0x7f00;
636 void helper_fwait(CPUX86State *env)
638 if (env->fpus & FPUS_SE) {
639 fpu_raise_exception(env, GETPC());
643 void helper_fninit(CPUX86State *env)
645 env->fpus = 0;
646 env->fpstt = 0;
647 cpu_set_fpuc(env, 0x37f);
648 env->fptags[0] = 1;
649 env->fptags[1] = 1;
650 env->fptags[2] = 1;
651 env->fptags[3] = 1;
652 env->fptags[4] = 1;
653 env->fptags[5] = 1;
654 env->fptags[6] = 1;
655 env->fptags[7] = 1;
658 /* BCD ops */
660 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
662 floatx80 tmp;
663 uint64_t val;
664 unsigned int v;
665 int i;
667 val = 0;
668 for (i = 8; i >= 0; i--) {
669 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
670 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
672 tmp = int64_to_floatx80(val, &env->fp_status);
673 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
674 tmp = floatx80_chs(tmp);
676 fpush(env);
677 ST0 = tmp;
680 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
682 int v;
683 target_ulong mem_ref, mem_end;
684 int64_t val;
686 val = floatx80_to_int64(ST0, &env->fp_status);
687 mem_ref = ptr;
688 mem_end = mem_ref + 9;
689 if (val < 0) {
690 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
691 val = -val;
692 } else {
693 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
695 while (mem_ref < mem_end) {
696 if (val == 0) {
697 break;
699 v = val % 100;
700 val = val / 100;
701 v = ((v / 10) << 4) | (v % 10);
702 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
704 while (mem_ref < mem_end) {
705 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
709 void helper_f2xm1(CPUX86State *env)
711 double val = floatx80_to_double(env, ST0);
713 val = pow(2.0, val) - 1.0;
714 ST0 = double_to_floatx80(env, val);
717 void helper_fyl2x(CPUX86State *env)
719 double fptemp = floatx80_to_double(env, ST0);
721 if (fptemp > 0.0) {
722 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
723 fptemp *= floatx80_to_double(env, ST1);
724 ST1 = double_to_floatx80(env, fptemp);
725 fpop(env);
726 } else {
727 env->fpus &= ~0x4700;
728 env->fpus |= 0x400;
732 void helper_fptan(CPUX86State *env)
734 double fptemp = floatx80_to_double(env, ST0);
736 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
737 env->fpus |= 0x400;
738 } else {
739 fptemp = tan(fptemp);
740 ST0 = double_to_floatx80(env, fptemp);
741 fpush(env);
742 ST0 = floatx80_one;
743 env->fpus &= ~0x400; /* C2 <-- 0 */
744 /* the above code is for |arg| < 2**52 only */
748 void helper_fpatan(CPUX86State *env)
750 double fptemp, fpsrcop;
752 fpsrcop = floatx80_to_double(env, ST1);
753 fptemp = floatx80_to_double(env, ST0);
754 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
755 fpop(env);
758 void helper_fxtract(CPUX86State *env)
760 CPU_LDoubleU temp;
762 temp.d = ST0;
764 if (floatx80_is_zero(ST0)) {
765 /* Easy way to generate -inf and raising division by 0 exception */
766 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
767 &env->fp_status);
768 fpush(env);
769 ST0 = temp.d;
770 } else {
771 int expdif;
773 expdif = EXPD(temp) - EXPBIAS;
774 /* DP exponent bias */
775 ST0 = int32_to_floatx80(expdif, &env->fp_status);
776 fpush(env);
777 BIASEXPONENT(temp);
778 ST0 = temp.d;
782 void helper_fprem1(CPUX86State *env)
784 double st0, st1, dblq, fpsrcop, fptemp;
785 CPU_LDoubleU fpsrcop1, fptemp1;
786 int expdif;
787 signed long long int q;
789 st0 = floatx80_to_double(env, ST0);
790 st1 = floatx80_to_double(env, ST1);
792 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
793 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
794 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
795 return;
798 fpsrcop = st0;
799 fptemp = st1;
800 fpsrcop1.d = ST0;
801 fptemp1.d = ST1;
802 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
804 if (expdif < 0) {
805 /* optimisation? taken from the AMD docs */
806 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
807 /* ST0 is unchanged */
808 return;
811 if (expdif < 53) {
812 dblq = fpsrcop / fptemp;
813 /* round dblq towards nearest integer */
814 dblq = rint(dblq);
815 st0 = fpsrcop - fptemp * dblq;
817 /* convert dblq to q by truncating towards zero */
818 if (dblq < 0.0) {
819 q = (signed long long int)(-dblq);
820 } else {
821 q = (signed long long int)dblq;
824 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
825 /* (C0,C3,C1) <-- (q2,q1,q0) */
826 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
827 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
828 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
829 } else {
830 env->fpus |= 0x400; /* C2 <-- 1 */
831 fptemp = pow(2.0, expdif - 50);
832 fpsrcop = (st0 / st1) / fptemp;
833 /* fpsrcop = integer obtained by chopping */
834 fpsrcop = (fpsrcop < 0.0) ?
835 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
836 st0 -= (st1 * fpsrcop * fptemp);
838 ST0 = double_to_floatx80(env, st0);
841 void helper_fprem(CPUX86State *env)
843 double st0, st1, dblq, fpsrcop, fptemp;
844 CPU_LDoubleU fpsrcop1, fptemp1;
845 int expdif;
846 signed long long int q;
848 st0 = floatx80_to_double(env, ST0);
849 st1 = floatx80_to_double(env, ST1);
851 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
852 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
853 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
854 return;
857 fpsrcop = st0;
858 fptemp = st1;
859 fpsrcop1.d = ST0;
860 fptemp1.d = ST1;
861 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
863 if (expdif < 0) {
864 /* optimisation? taken from the AMD docs */
865 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
866 /* ST0 is unchanged */
867 return;
870 if (expdif < 53) {
871 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
872 /* round dblq towards zero */
873 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
874 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
876 /* convert dblq to q by truncating towards zero */
877 if (dblq < 0.0) {
878 q = (signed long long int)(-dblq);
879 } else {
880 q = (signed long long int)dblq;
883 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
884 /* (C0,C3,C1) <-- (q2,q1,q0) */
885 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
886 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
887 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
888 } else {
889 int N = 32 + (expdif % 32); /* as per AMD docs */
891 env->fpus |= 0x400; /* C2 <-- 1 */
892 fptemp = pow(2.0, (double)(expdif - N));
893 fpsrcop = (st0 / st1) / fptemp;
894 /* fpsrcop = integer obtained by chopping */
895 fpsrcop = (fpsrcop < 0.0) ?
896 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
897 st0 -= (st1 * fpsrcop * fptemp);
899 ST0 = double_to_floatx80(env, st0);
902 void helper_fyl2xp1(CPUX86State *env)
904 double fptemp = floatx80_to_double(env, ST0);
906 if ((fptemp + 1.0) > 0.0) {
907 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
908 fptemp *= floatx80_to_double(env, ST1);
909 ST1 = double_to_floatx80(env, fptemp);
910 fpop(env);
911 } else {
912 env->fpus &= ~0x4700;
913 env->fpus |= 0x400;
917 void helper_fsqrt(CPUX86State *env)
919 if (floatx80_is_neg(ST0)) {
920 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
921 env->fpus |= 0x400;
923 ST0 = floatx80_sqrt(ST0, &env->fp_status);
926 void helper_fsincos(CPUX86State *env)
928 double fptemp = floatx80_to_double(env, ST0);
930 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
931 env->fpus |= 0x400;
932 } else {
933 ST0 = double_to_floatx80(env, sin(fptemp));
934 fpush(env);
935 ST0 = double_to_floatx80(env, cos(fptemp));
936 env->fpus &= ~0x400; /* C2 <-- 0 */
937 /* the above code is for |arg| < 2**63 only */
941 void helper_frndint(CPUX86State *env)
943 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
946 void helper_fscale(CPUX86State *env)
948 if (floatx80_is_any_nan(ST1)) {
949 ST0 = ST1;
950 } else {
951 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
952 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
956 void helper_fsin(CPUX86State *env)
958 double fptemp = floatx80_to_double(env, ST0);
960 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
961 env->fpus |= 0x400;
962 } else {
963 ST0 = double_to_floatx80(env, sin(fptemp));
964 env->fpus &= ~0x400; /* C2 <-- 0 */
965 /* the above code is for |arg| < 2**53 only */
969 void helper_fcos(CPUX86State *env)
971 double fptemp = floatx80_to_double(env, ST0);
973 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
974 env->fpus |= 0x400;
975 } else {
976 ST0 = double_to_floatx80(env, cos(fptemp));
977 env->fpus &= ~0x400; /* C2 <-- 0 */
978 /* the above code is for |arg| < 2**63 only */
982 void helper_fxam_ST0(CPUX86State *env)
984 CPU_LDoubleU temp;
985 int expdif;
987 temp.d = ST0;
989 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
990 if (SIGND(temp)) {
991 env->fpus |= 0x200; /* C1 <-- 1 */
994 /* XXX: test fptags too */
995 expdif = EXPD(temp);
996 if (expdif == MAXEXPD) {
997 if (MANTD(temp) == 0x8000000000000000ULL) {
998 env->fpus |= 0x500; /* Infinity */
999 } else {
1000 env->fpus |= 0x100; /* NaN */
1002 } else if (expdif == 0) {
1003 if (MANTD(temp) == 0) {
1004 env->fpus |= 0x4000; /* Zero */
1005 } else {
1006 env->fpus |= 0x4400; /* Denormal */
1008 } else {
1009 env->fpus |= 0x400;
1013 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1014 uintptr_t retaddr)
1016 int fpus, fptag, exp, i;
1017 uint64_t mant;
1018 CPU_LDoubleU tmp;
1020 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1021 fptag = 0;
1022 for (i = 7; i >= 0; i--) {
1023 fptag <<= 2;
1024 if (env->fptags[i]) {
1025 fptag |= 3;
1026 } else {
1027 tmp.d = env->fpregs[i].d;
1028 exp = EXPD(tmp);
1029 mant = MANTD(tmp);
1030 if (exp == 0 && mant == 0) {
1031 /* zero */
1032 fptag |= 1;
1033 } else if (exp == 0 || exp == MAXEXPD
1034 || (mant & (1LL << 63)) == 0) {
1035 /* NaNs, infinity, denormal */
1036 fptag |= 2;
1040 if (data32) {
1041 /* 32 bit */
1042 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1043 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1044 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1045 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1046 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1047 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1048 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1049 } else {
1050 /* 16 bit */
1051 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1052 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1053 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1054 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1055 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1056 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1057 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1061 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1063 do_fstenv(env, ptr, data32, GETPC());
1066 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1068 env->fpstt = (fpus >> 11) & 7;
1069 env->fpus = fpus & ~0x3800 & ~FPUS_B;
1070 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1071 #if !defined(CONFIG_USER_ONLY)
1072 if (!(env->fpus & FPUS_SE)) {
1074 * Here the processor deasserts FERR#; in response, the chipset deasserts
1075 * IGNNE#.
1077 cpu_clear_ignne();
1079 #endif
1082 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1083 uintptr_t retaddr)
1085 int i, fpus, fptag;
1087 if (data32) {
1088 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1089 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1090 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1091 } else {
1092 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1093 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1094 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1096 cpu_set_fpus(env, fpus);
1097 for (i = 0; i < 8; i++) {
1098 env->fptags[i] = ((fptag & 3) == 3);
1099 fptag >>= 2;
1103 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1105 do_fldenv(env, ptr, data32, GETPC());
1108 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1110 floatx80 tmp;
1111 int i;
1113 do_fstenv(env, ptr, data32, GETPC());
1115 ptr += (14 << data32);
1116 for (i = 0; i < 8; i++) {
1117 tmp = ST(i);
1118 helper_fstt(env, tmp, ptr, GETPC());
1119 ptr += 10;
1122 /* fninit */
1123 env->fpus = 0;
1124 env->fpstt = 0;
1125 cpu_set_fpuc(env, 0x37f);
1126 env->fptags[0] = 1;
1127 env->fptags[1] = 1;
1128 env->fptags[2] = 1;
1129 env->fptags[3] = 1;
1130 env->fptags[4] = 1;
1131 env->fptags[5] = 1;
1132 env->fptags[6] = 1;
1133 env->fptags[7] = 1;
1136 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1138 floatx80 tmp;
1139 int i;
1141 do_fldenv(env, ptr, data32, GETPC());
1142 ptr += (14 << data32);
1144 for (i = 0; i < 8; i++) {
1145 tmp = helper_fldt(env, ptr, GETPC());
1146 ST(i) = tmp;
1147 ptr += 10;
1151 #if defined(CONFIG_USER_ONLY)
1152 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1154 helper_fsave(env, ptr, data32);
1157 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1159 helper_frstor(env, ptr, data32);
1161 #endif
1163 #define XO(X) offsetof(X86XSaveArea, X)
1165 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1167 int fpus, fptag, i;
1168 target_ulong addr;
1170 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1171 fptag = 0;
1172 for (i = 0; i < 8; i++) {
1173 fptag |= (env->fptags[i] << i);
1176 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1177 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1178 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1180 /* In 32-bit mode this is eip, sel, dp, sel.
1181 In 64-bit mode this is rip, rdp.
1182 But in either case we don't write actual data, just zeros. */
1183 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1184 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1186 addr = ptr + XO(legacy.fpregs);
1187 for (i = 0; i < 8; i++) {
1188 floatx80 tmp = ST(i);
1189 helper_fstt(env, tmp, addr, ra);
1190 addr += 16;
1194 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1196 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1197 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1200 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1202 int i, nb_xmm_regs;
1203 target_ulong addr;
1205 if (env->hflags & HF_CS64_MASK) {
1206 nb_xmm_regs = 16;
1207 } else {
1208 nb_xmm_regs = 8;
1211 addr = ptr + XO(legacy.xmm_regs);
1212 for (i = 0; i < nb_xmm_regs; i++) {
1213 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1214 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1215 addr += 16;
1219 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1221 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1222 int i;
1224 for (i = 0; i < 4; i++, addr += 16) {
1225 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1226 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1230 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1232 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1233 env->bndcs_regs.cfgu, ra);
1234 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1235 env->bndcs_regs.sts, ra);
1238 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1240 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1243 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1245 uintptr_t ra = GETPC();
1247 /* The operand must be 16 byte aligned */
1248 if (ptr & 0xf) {
1249 raise_exception_ra(env, EXCP0D_GPF, ra);
1252 do_xsave_fpu(env, ptr, ra);
1254 if (env->cr[4] & CR4_OSFXSR_MASK) {
1255 do_xsave_mxcsr(env, ptr, ra);
1256 /* Fast FXSAVE leaves out the XMM registers */
1257 if (!(env->efer & MSR_EFER_FFXSR)
1258 || (env->hflags & HF_CPL_MASK)
1259 || !(env->hflags & HF_LMA_MASK)) {
1260 do_xsave_sse(env, ptr, ra);
1265 static uint64_t get_xinuse(CPUX86State *env)
1267 uint64_t inuse = -1;
1269 /* For the most part, we don't track XINUSE. We could calculate it
1270 here for all components, but it's probably less work to simply
1271 indicate in use. That said, the state of BNDREGS is important
1272 enough to track in HFLAGS, so we might as well use that here. */
1273 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1274 inuse &= ~XSTATE_BNDREGS_MASK;
1276 return inuse;
1279 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1280 uint64_t inuse, uint64_t opt, uintptr_t ra)
1282 uint64_t old_bv, new_bv;
1284 /* The OS must have enabled XSAVE. */
1285 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1286 raise_exception_ra(env, EXCP06_ILLOP, ra);
1289 /* The operand must be 64 byte aligned. */
1290 if (ptr & 63) {
1291 raise_exception_ra(env, EXCP0D_GPF, ra);
1294 /* Never save anything not enabled by XCR0. */
1295 rfbm &= env->xcr0;
1296 opt &= rfbm;
1298 if (opt & XSTATE_FP_MASK) {
1299 do_xsave_fpu(env, ptr, ra);
1301 if (rfbm & XSTATE_SSE_MASK) {
1302 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1303 do_xsave_mxcsr(env, ptr, ra);
1305 if (opt & XSTATE_SSE_MASK) {
1306 do_xsave_sse(env, ptr, ra);
1308 if (opt & XSTATE_BNDREGS_MASK) {
1309 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1311 if (opt & XSTATE_BNDCSR_MASK) {
1312 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1314 if (opt & XSTATE_PKRU_MASK) {
1315 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1318 /* Update the XSTATE_BV field. */
1319 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1320 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1321 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1324 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1326 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1329 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1331 uint64_t inuse = get_xinuse(env);
1332 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1335 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1337 int i, fpuc, fpus, fptag;
1338 target_ulong addr;
1340 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1341 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1342 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1343 cpu_set_fpuc(env, fpuc);
1344 cpu_set_fpus(env, fpus);
1345 fptag ^= 0xff;
1346 for (i = 0; i < 8; i++) {
1347 env->fptags[i] = ((fptag >> i) & 1);
1350 addr = ptr + XO(legacy.fpregs);
1351 for (i = 0; i < 8; i++) {
1352 floatx80 tmp = helper_fldt(env, addr, ra);
1353 ST(i) = tmp;
1354 addr += 16;
1358 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1360 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1363 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1365 int i, nb_xmm_regs;
1366 target_ulong addr;
1368 if (env->hflags & HF_CS64_MASK) {
1369 nb_xmm_regs = 16;
1370 } else {
1371 nb_xmm_regs = 8;
1374 addr = ptr + XO(legacy.xmm_regs);
1375 for (i = 0; i < nb_xmm_regs; i++) {
1376 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1377 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1378 addr += 16;
1382 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1384 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1385 int i;
1387 for (i = 0; i < 4; i++, addr += 16) {
1388 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1389 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1393 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1395 /* FIXME: Extend highest implemented bit of linear address. */
1396 env->bndcs_regs.cfgu
1397 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1398 env->bndcs_regs.sts
1399 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1402 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1404 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1407 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1409 uintptr_t ra = GETPC();
1411 /* The operand must be 16 byte aligned */
1412 if (ptr & 0xf) {
1413 raise_exception_ra(env, EXCP0D_GPF, ra);
1416 do_xrstor_fpu(env, ptr, ra);
1418 if (env->cr[4] & CR4_OSFXSR_MASK) {
1419 do_xrstor_mxcsr(env, ptr, ra);
1420 /* Fast FXRSTOR leaves out the XMM registers */
1421 if (!(env->efer & MSR_EFER_FFXSR)
1422 || (env->hflags & HF_CPL_MASK)
1423 || !(env->hflags & HF_LMA_MASK)) {
1424 do_xrstor_sse(env, ptr, ra);
1429 #if defined(CONFIG_USER_ONLY)
1430 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1432 helper_fxsave(env, ptr);
1435 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1437 helper_fxrstor(env, ptr);
1439 #endif
1441 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1443 uintptr_t ra = GETPC();
1444 uint64_t xstate_bv, xcomp_bv, reserve0;
1446 rfbm &= env->xcr0;
1448 /* The OS must have enabled XSAVE. */
1449 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1450 raise_exception_ra(env, EXCP06_ILLOP, ra);
1453 /* The operand must be 64 byte aligned. */
1454 if (ptr & 63) {
1455 raise_exception_ra(env, EXCP0D_GPF, ra);
1458 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1460 if ((int64_t)xstate_bv < 0) {
1461 /* FIXME: Compact form. */
1462 raise_exception_ra(env, EXCP0D_GPF, ra);
1465 /* Standard form. */
1467 /* The XSTATE_BV field must not set bits not present in XCR0. */
1468 if (xstate_bv & ~env->xcr0) {
1469 raise_exception_ra(env, EXCP0D_GPF, ra);
1472 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1473 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1474 describes only XCOMP_BV, but the description of the standard form
1475 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1476 includes the next 64-bit field. */
1477 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1478 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1479 if (xcomp_bv || reserve0) {
1480 raise_exception_ra(env, EXCP0D_GPF, ra);
1483 if (rfbm & XSTATE_FP_MASK) {
1484 if (xstate_bv & XSTATE_FP_MASK) {
1485 do_xrstor_fpu(env, ptr, ra);
1486 } else {
1487 helper_fninit(env);
1488 memset(env->fpregs, 0, sizeof(env->fpregs));
1491 if (rfbm & XSTATE_SSE_MASK) {
1492 /* Note that the standard form of XRSTOR loads MXCSR from memory
1493 whether or not the XSTATE_BV bit is set. */
1494 do_xrstor_mxcsr(env, ptr, ra);
1495 if (xstate_bv & XSTATE_SSE_MASK) {
1496 do_xrstor_sse(env, ptr, ra);
1497 } else {
1498 /* ??? When AVX is implemented, we may have to be more
1499 selective in the clearing. */
1500 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1503 if (rfbm & XSTATE_BNDREGS_MASK) {
1504 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1505 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1506 env->hflags |= HF_MPX_IU_MASK;
1507 } else {
1508 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1509 env->hflags &= ~HF_MPX_IU_MASK;
1512 if (rfbm & XSTATE_BNDCSR_MASK) {
1513 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1514 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1515 } else {
1516 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1518 cpu_sync_bndcs_hflags(env);
1520 if (rfbm & XSTATE_PKRU_MASK) {
1521 uint64_t old_pkru = env->pkru;
1522 if (xstate_bv & XSTATE_PKRU_MASK) {
1523 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1524 } else {
1525 env->pkru = 0;
1527 if (env->pkru != old_pkru) {
1528 CPUState *cs = env_cpu(env);
1529 tlb_flush(cs);
1534 #undef XO
1536 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1538 /* The OS must have enabled XSAVE. */
1539 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1540 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1543 switch (ecx) {
1544 case 0:
1545 return env->xcr0;
1546 case 1:
1547 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1548 return env->xcr0 & get_xinuse(env);
1550 break;
1552 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1555 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1557 uint32_t dummy, ena_lo, ena_hi;
1558 uint64_t ena;
1560 /* The OS must have enabled XSAVE. */
1561 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1562 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1565 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1566 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1567 goto do_gpf;
1570 /* Disallow enabling unimplemented features. */
1571 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1572 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1573 if (mask & ~ena) {
1574 goto do_gpf;
1577 /* Disallow enabling only half of MPX. */
1578 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1579 & XSTATE_BNDCSR_MASK) {
1580 goto do_gpf;
1583 env->xcr0 = mask;
1584 cpu_sync_bndcs_hflags(env);
1585 return;
1587 do_gpf:
1588 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1591 /* MMX/SSE */
1592 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1594 #define SSE_DAZ 0x0040
1595 #define SSE_RC_MASK 0x6000
1596 #define SSE_RC_NEAR 0x0000
1597 #define SSE_RC_DOWN 0x2000
1598 #define SSE_RC_UP 0x4000
1599 #define SSE_RC_CHOP 0x6000
1600 #define SSE_FZ 0x8000
1602 void update_mxcsr_status(CPUX86State *env)
1604 uint32_t mxcsr = env->mxcsr;
1605 int rnd_type;
1607 /* set rounding mode */
1608 switch (mxcsr & SSE_RC_MASK) {
1609 default:
1610 case SSE_RC_NEAR:
1611 rnd_type = float_round_nearest_even;
1612 break;
1613 case SSE_RC_DOWN:
1614 rnd_type = float_round_down;
1615 break;
1616 case SSE_RC_UP:
1617 rnd_type = float_round_up;
1618 break;
1619 case SSE_RC_CHOP:
1620 rnd_type = float_round_to_zero;
1621 break;
1623 set_float_rounding_mode(rnd_type, &env->sse_status);
1625 /* set denormals are zero */
1626 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1628 /* set flush to zero */
1629 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1632 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1634 cpu_set_mxcsr(env, val);
1637 void helper_enter_mmx(CPUX86State *env)
1639 env->fpstt = 0;
1640 *(uint32_t *)(env->fptags) = 0;
1641 *(uint32_t *)(env->fptags + 4) = 0;
1644 void helper_emms(CPUX86State *env)
1646 /* set to empty state */
1647 *(uint32_t *)(env->fptags) = 0x01010101;
1648 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1651 /* XXX: suppress */
1652 void helper_movq(CPUX86State *env, void *d, void *s)
1654 *(uint64_t *)d = *(uint64_t *)s;
1657 #define SHIFT 0
1658 #include "ops_sse.h"
1660 #define SHIFT 1
1661 #include "ops_sse.h"