target/i386: fix fisttpl, fisttpll handling of out-of-range values
[qemu/ar7.git] / target / i386 / fpu_helper.c
blob9c93f385b14fca0e1a6b967a357d65a07d5cabbc
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
29 #ifdef CONFIG_SOFTMMU
30 #include "hw/irq.h"
31 #endif
33 #define FPU_RC_MASK 0xc00
34 #define FPU_RC_NEAR 0x000
35 #define FPU_RC_DOWN 0x400
36 #define FPU_RC_UP 0x800
37 #define FPU_RC_CHOP 0xc00
39 #define MAXTAN 9223372036854775808.0
41 /* the following deal with x86 long double-precision numbers */
42 #define MAXEXPD 0x7fff
43 #define EXPBIAS 16383
44 #define EXPD(fp) (fp.l.upper & 0x7fff)
45 #define SIGND(fp) ((fp.l.upper) & 0x8000)
46 #define MANTD(fp) (fp.l.lower)
47 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49 #define FPUS_IE (1 << 0)
50 #define FPUS_DE (1 << 1)
51 #define FPUS_ZE (1 << 2)
52 #define FPUS_OE (1 << 3)
53 #define FPUS_UE (1 << 4)
54 #define FPUS_PE (1 << 5)
55 #define FPUS_SF (1 << 6)
56 #define FPUS_SE (1 << 7)
57 #define FPUS_B (1 << 15)
59 #define FPUC_EM 0x3f
61 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
62 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
63 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
64 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
65 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
66 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
67 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
68 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
70 #if !defined(CONFIG_USER_ONLY)
71 static qemu_irq ferr_irq;
73 void x86_register_ferr_irq(qemu_irq irq)
75 ferr_irq = irq;
78 static void cpu_clear_ignne(void)
80 CPUX86State *env = &X86_CPU(first_cpu)->env;
81 env->hflags2 &= ~HF2_IGNNE_MASK;
84 void cpu_set_ignne(void)
86 CPUX86State *env = &X86_CPU(first_cpu)->env;
87 env->hflags2 |= HF2_IGNNE_MASK;
89 * We get here in response to a write to port F0h. The chipset should
90 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
91 * cleared, because FERR# and FP_IRQ are two separate pins on real
92 * hardware. However, we don't model FERR# as a qemu_irq, so we just
93 * do directly what the chipset would do, i.e. deassert FP_IRQ.
95 qemu_irq_lower(ferr_irq);
97 #endif
100 static inline void fpush(CPUX86State *env)
102 env->fpstt = (env->fpstt - 1) & 7;
103 env->fptags[env->fpstt] = 0; /* validate stack entry */
106 static inline void fpop(CPUX86State *env)
108 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
109 env->fpstt = (env->fpstt + 1) & 7;
112 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
113 uintptr_t retaddr)
115 CPU_LDoubleU temp;
117 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
118 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
119 return temp.d;
122 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
123 uintptr_t retaddr)
125 CPU_LDoubleU temp;
127 temp.d = f;
128 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
129 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
132 /* x87 FPU helpers */
134 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
136 union {
137 float64 f64;
138 double d;
139 } u;
141 u.f64 = floatx80_to_float64(a, &env->fp_status);
142 return u.d;
145 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
147 union {
148 float64 f64;
149 double d;
150 } u;
152 u.d = a;
153 return float64_to_floatx80(u.f64, &env->fp_status);
156 static void fpu_set_exception(CPUX86State *env, int mask)
158 env->fpus |= mask;
159 if (env->fpus & (~env->fpuc & FPUC_EM)) {
160 env->fpus |= FPUS_SE | FPUS_B;
164 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
166 if (floatx80_is_zero(b)) {
167 fpu_set_exception(env, FPUS_ZE);
169 return floatx80_div(a, b, &env->fp_status);
172 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
174 if (env->cr[0] & CR0_NE_MASK) {
175 raise_exception_ra(env, EXCP10_COPR, retaddr);
177 #if !defined(CONFIG_USER_ONLY)
178 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
179 qemu_irq_raise(ferr_irq);
181 #endif
184 void helper_flds_FT0(CPUX86State *env, uint32_t val)
186 union {
187 float32 f;
188 uint32_t i;
189 } u;
191 u.i = val;
192 FT0 = float32_to_floatx80(u.f, &env->fp_status);
195 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
197 union {
198 float64 f;
199 uint64_t i;
200 } u;
202 u.i = val;
203 FT0 = float64_to_floatx80(u.f, &env->fp_status);
206 void helper_fildl_FT0(CPUX86State *env, int32_t val)
208 FT0 = int32_to_floatx80(val, &env->fp_status);
211 void helper_flds_ST0(CPUX86State *env, uint32_t val)
213 int new_fpstt;
214 union {
215 float32 f;
216 uint32_t i;
217 } u;
219 new_fpstt = (env->fpstt - 1) & 7;
220 u.i = val;
221 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
222 env->fpstt = new_fpstt;
223 env->fptags[new_fpstt] = 0; /* validate stack entry */
226 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
228 int new_fpstt;
229 union {
230 float64 f;
231 uint64_t i;
232 } u;
234 new_fpstt = (env->fpstt - 1) & 7;
235 u.i = val;
236 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
237 env->fpstt = new_fpstt;
238 env->fptags[new_fpstt] = 0; /* validate stack entry */
241 void helper_fildl_ST0(CPUX86State *env, int32_t val)
243 int new_fpstt;
245 new_fpstt = (env->fpstt - 1) & 7;
246 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
247 env->fpstt = new_fpstt;
248 env->fptags[new_fpstt] = 0; /* validate stack entry */
251 void helper_fildll_ST0(CPUX86State *env, int64_t val)
253 int new_fpstt;
255 new_fpstt = (env->fpstt - 1) & 7;
256 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
257 env->fpstt = new_fpstt;
258 env->fptags[new_fpstt] = 0; /* validate stack entry */
261 uint32_t helper_fsts_ST0(CPUX86State *env)
263 union {
264 float32 f;
265 uint32_t i;
266 } u;
268 u.f = floatx80_to_float32(ST0, &env->fp_status);
269 return u.i;
272 uint64_t helper_fstl_ST0(CPUX86State *env)
274 union {
275 float64 f;
276 uint64_t i;
277 } u;
279 u.f = floatx80_to_float64(ST0, &env->fp_status);
280 return u.i;
283 int32_t helper_fist_ST0(CPUX86State *env)
285 int32_t val;
287 val = floatx80_to_int32(ST0, &env->fp_status);
288 if (val != (int16_t)val) {
289 val = -32768;
291 return val;
294 int32_t helper_fistl_ST0(CPUX86State *env)
296 int32_t val;
297 signed char old_exp_flags;
299 old_exp_flags = get_float_exception_flags(&env->fp_status);
300 set_float_exception_flags(0, &env->fp_status);
302 val = floatx80_to_int32(ST0, &env->fp_status);
303 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
304 val = 0x80000000;
306 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
307 | old_exp_flags, &env->fp_status);
308 return val;
311 int64_t helper_fistll_ST0(CPUX86State *env)
313 int64_t val;
314 signed char old_exp_flags;
316 old_exp_flags = get_float_exception_flags(&env->fp_status);
317 set_float_exception_flags(0, &env->fp_status);
319 val = floatx80_to_int64(ST0, &env->fp_status);
320 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
321 val = 0x8000000000000000ULL;
323 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
324 | old_exp_flags, &env->fp_status);
325 return val;
328 int32_t helper_fistt_ST0(CPUX86State *env)
330 int32_t val;
332 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
333 if (val != (int16_t)val) {
334 val = -32768;
336 return val;
339 int32_t helper_fisttl_ST0(CPUX86State *env)
341 int32_t val;
342 signed char old_exp_flags;
344 old_exp_flags = get_float_exception_flags(&env->fp_status);
345 set_float_exception_flags(0, &env->fp_status);
347 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
348 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
349 val = 0x80000000;
351 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
352 | old_exp_flags, &env->fp_status);
353 return val;
356 int64_t helper_fisttll_ST0(CPUX86State *env)
358 int64_t val;
359 signed char old_exp_flags;
361 old_exp_flags = get_float_exception_flags(&env->fp_status);
362 set_float_exception_flags(0, &env->fp_status);
364 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
365 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
366 val = 0x8000000000000000ULL;
368 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
369 | old_exp_flags, &env->fp_status);
370 return val;
373 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
375 int new_fpstt;
377 new_fpstt = (env->fpstt - 1) & 7;
378 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
379 env->fpstt = new_fpstt;
380 env->fptags[new_fpstt] = 0; /* validate stack entry */
383 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
385 helper_fstt(env, ST0, ptr, GETPC());
388 void helper_fpush(CPUX86State *env)
390 fpush(env);
393 void helper_fpop(CPUX86State *env)
395 fpop(env);
398 void helper_fdecstp(CPUX86State *env)
400 env->fpstt = (env->fpstt - 1) & 7;
401 env->fpus &= ~0x4700;
404 void helper_fincstp(CPUX86State *env)
406 env->fpstt = (env->fpstt + 1) & 7;
407 env->fpus &= ~0x4700;
410 /* FPU move */
412 void helper_ffree_STN(CPUX86State *env, int st_index)
414 env->fptags[(env->fpstt + st_index) & 7] = 1;
417 void helper_fmov_ST0_FT0(CPUX86State *env)
419 ST0 = FT0;
422 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
424 FT0 = ST(st_index);
427 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
429 ST0 = ST(st_index);
432 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
434 ST(st_index) = ST0;
437 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
439 floatx80 tmp;
441 tmp = ST(st_index);
442 ST(st_index) = ST0;
443 ST0 = tmp;
446 /* FPU operations */
448 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
450 void helper_fcom_ST0_FT0(CPUX86State *env)
452 FloatRelation ret;
454 ret = floatx80_compare(ST0, FT0, &env->fp_status);
455 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
458 void helper_fucom_ST0_FT0(CPUX86State *env)
460 FloatRelation ret;
462 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
463 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
466 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
468 void helper_fcomi_ST0_FT0(CPUX86State *env)
470 int eflags;
471 FloatRelation ret;
473 ret = floatx80_compare(ST0, FT0, &env->fp_status);
474 eflags = cpu_cc_compute_all(env, CC_OP);
475 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
476 CC_SRC = eflags;
479 void helper_fucomi_ST0_FT0(CPUX86State *env)
481 int eflags;
482 FloatRelation ret;
484 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
485 eflags = cpu_cc_compute_all(env, CC_OP);
486 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
487 CC_SRC = eflags;
490 void helper_fadd_ST0_FT0(CPUX86State *env)
492 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
495 void helper_fmul_ST0_FT0(CPUX86State *env)
497 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
500 void helper_fsub_ST0_FT0(CPUX86State *env)
502 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
505 void helper_fsubr_ST0_FT0(CPUX86State *env)
507 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
510 void helper_fdiv_ST0_FT0(CPUX86State *env)
512 ST0 = helper_fdiv(env, ST0, FT0);
515 void helper_fdivr_ST0_FT0(CPUX86State *env)
517 ST0 = helper_fdiv(env, FT0, ST0);
520 /* fp operations between STN and ST0 */
522 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
524 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
527 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
529 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
532 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
534 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
537 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
539 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
542 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
544 floatx80 *p;
546 p = &ST(st_index);
547 *p = helper_fdiv(env, *p, ST0);
550 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
552 floatx80 *p;
554 p = &ST(st_index);
555 *p = helper_fdiv(env, ST0, *p);
558 /* misc FPU operations */
559 void helper_fchs_ST0(CPUX86State *env)
561 ST0 = floatx80_chs(ST0);
564 void helper_fabs_ST0(CPUX86State *env)
566 ST0 = floatx80_abs(ST0);
569 void helper_fld1_ST0(CPUX86State *env)
571 ST0 = floatx80_one;
574 void helper_fldl2t_ST0(CPUX86State *env)
576 switch (env->fpuc & FPU_RC_MASK) {
577 case FPU_RC_UP:
578 ST0 = floatx80_l2t_u;
579 break;
580 default:
581 ST0 = floatx80_l2t;
582 break;
586 void helper_fldl2e_ST0(CPUX86State *env)
588 switch (env->fpuc & FPU_RC_MASK) {
589 case FPU_RC_DOWN:
590 case FPU_RC_CHOP:
591 ST0 = floatx80_l2e_d;
592 break;
593 default:
594 ST0 = floatx80_l2e;
595 break;
599 void helper_fldpi_ST0(CPUX86State *env)
601 switch (env->fpuc & FPU_RC_MASK) {
602 case FPU_RC_DOWN:
603 case FPU_RC_CHOP:
604 ST0 = floatx80_pi_d;
605 break;
606 default:
607 ST0 = floatx80_pi;
608 break;
612 void helper_fldlg2_ST0(CPUX86State *env)
614 switch (env->fpuc & FPU_RC_MASK) {
615 case FPU_RC_DOWN:
616 case FPU_RC_CHOP:
617 ST0 = floatx80_lg2_d;
618 break;
619 default:
620 ST0 = floatx80_lg2;
621 break;
625 void helper_fldln2_ST0(CPUX86State *env)
627 switch (env->fpuc & FPU_RC_MASK) {
628 case FPU_RC_DOWN:
629 case FPU_RC_CHOP:
630 ST0 = floatx80_ln2_d;
631 break;
632 default:
633 ST0 = floatx80_ln2;
634 break;
638 void helper_fldz_ST0(CPUX86State *env)
640 ST0 = floatx80_zero;
643 void helper_fldz_FT0(CPUX86State *env)
645 FT0 = floatx80_zero;
648 uint32_t helper_fnstsw(CPUX86State *env)
650 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
653 uint32_t helper_fnstcw(CPUX86State *env)
655 return env->fpuc;
658 void update_fp_status(CPUX86State *env)
660 int rnd_type;
662 /* set rounding mode */
663 switch (env->fpuc & FPU_RC_MASK) {
664 default:
665 case FPU_RC_NEAR:
666 rnd_type = float_round_nearest_even;
667 break;
668 case FPU_RC_DOWN:
669 rnd_type = float_round_down;
670 break;
671 case FPU_RC_UP:
672 rnd_type = float_round_up;
673 break;
674 case FPU_RC_CHOP:
675 rnd_type = float_round_to_zero;
676 break;
678 set_float_rounding_mode(rnd_type, &env->fp_status);
679 switch ((env->fpuc >> 8) & 3) {
680 case 0:
681 rnd_type = 32;
682 break;
683 case 2:
684 rnd_type = 64;
685 break;
686 case 3:
687 default:
688 rnd_type = 80;
689 break;
691 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
694 void helper_fldcw(CPUX86State *env, uint32_t val)
696 cpu_set_fpuc(env, val);
699 void helper_fclex(CPUX86State *env)
701 env->fpus &= 0x7f00;
704 void helper_fwait(CPUX86State *env)
706 if (env->fpus & FPUS_SE) {
707 fpu_raise_exception(env, GETPC());
711 void helper_fninit(CPUX86State *env)
713 env->fpus = 0;
714 env->fpstt = 0;
715 cpu_set_fpuc(env, 0x37f);
716 env->fptags[0] = 1;
717 env->fptags[1] = 1;
718 env->fptags[2] = 1;
719 env->fptags[3] = 1;
720 env->fptags[4] = 1;
721 env->fptags[5] = 1;
722 env->fptags[6] = 1;
723 env->fptags[7] = 1;
726 /* BCD ops */
728 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
730 floatx80 tmp;
731 uint64_t val;
732 unsigned int v;
733 int i;
735 val = 0;
736 for (i = 8; i >= 0; i--) {
737 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
738 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
740 tmp = int64_to_floatx80(val, &env->fp_status);
741 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
742 tmp = floatx80_chs(tmp);
744 fpush(env);
745 ST0 = tmp;
748 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
750 int v;
751 target_ulong mem_ref, mem_end;
752 int64_t val;
753 CPU_LDoubleU temp;
755 temp.d = ST0;
757 val = floatx80_to_int64(ST0, &env->fp_status);
758 mem_ref = ptr;
759 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
760 float_raise(float_flag_invalid, &env->fp_status);
761 while (mem_ref < ptr + 7) {
762 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
764 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
765 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
766 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
767 return;
769 mem_end = mem_ref + 9;
770 if (SIGND(temp)) {
771 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
772 val = -val;
773 } else {
774 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
776 while (mem_ref < mem_end) {
777 if (val == 0) {
778 break;
780 v = val % 100;
781 val = val / 100;
782 v = ((v / 10) << 4) | (v % 10);
783 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
785 while (mem_ref < mem_end) {
786 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
790 void helper_f2xm1(CPUX86State *env)
792 double val = floatx80_to_double(env, ST0);
794 val = pow(2.0, val) - 1.0;
795 ST0 = double_to_floatx80(env, val);
798 void helper_fyl2x(CPUX86State *env)
800 double fptemp = floatx80_to_double(env, ST0);
802 if (fptemp > 0.0) {
803 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
804 fptemp *= floatx80_to_double(env, ST1);
805 ST1 = double_to_floatx80(env, fptemp);
806 fpop(env);
807 } else {
808 env->fpus &= ~0x4700;
809 env->fpus |= 0x400;
813 void helper_fptan(CPUX86State *env)
815 double fptemp = floatx80_to_double(env, ST0);
817 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
818 env->fpus |= 0x400;
819 } else {
820 fptemp = tan(fptemp);
821 ST0 = double_to_floatx80(env, fptemp);
822 fpush(env);
823 ST0 = floatx80_one;
824 env->fpus &= ~0x400; /* C2 <-- 0 */
825 /* the above code is for |arg| < 2**52 only */
829 void helper_fpatan(CPUX86State *env)
831 double fptemp, fpsrcop;
833 fpsrcop = floatx80_to_double(env, ST1);
834 fptemp = floatx80_to_double(env, ST0);
835 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
836 fpop(env);
839 void helper_fxtract(CPUX86State *env)
841 CPU_LDoubleU temp;
843 temp.d = ST0;
845 if (floatx80_is_zero(ST0)) {
846 /* Easy way to generate -inf and raising division by 0 exception */
847 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
848 &env->fp_status);
849 fpush(env);
850 ST0 = temp.d;
851 } else if (floatx80_invalid_encoding(ST0)) {
852 float_raise(float_flag_invalid, &env->fp_status);
853 ST0 = floatx80_default_nan(&env->fp_status);
854 fpush(env);
855 ST0 = ST1;
856 } else if (floatx80_is_any_nan(ST0)) {
857 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
858 float_raise(float_flag_invalid, &env->fp_status);
859 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
861 fpush(env);
862 ST0 = ST1;
863 } else if (floatx80_is_infinity(ST0)) {
864 fpush(env);
865 ST0 = ST1;
866 ST1 = floatx80_infinity;
867 } else {
868 int expdif;
870 if (EXPD(temp) == 0) {
871 int shift = clz64(temp.l.lower);
872 temp.l.lower <<= shift;
873 expdif = 1 - EXPBIAS - shift;
874 float_raise(float_flag_input_denormal, &env->fp_status);
875 } else {
876 expdif = EXPD(temp) - EXPBIAS;
878 /* DP exponent bias */
879 ST0 = int32_to_floatx80(expdif, &env->fp_status);
880 fpush(env);
881 BIASEXPONENT(temp);
882 ST0 = temp.d;
886 void helper_fprem1(CPUX86State *env)
888 double st0, st1, dblq, fpsrcop, fptemp;
889 CPU_LDoubleU fpsrcop1, fptemp1;
890 int expdif;
891 signed long long int q;
893 st0 = floatx80_to_double(env, ST0);
894 st1 = floatx80_to_double(env, ST1);
896 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
897 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
898 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
899 return;
902 fpsrcop = st0;
903 fptemp = st1;
904 fpsrcop1.d = ST0;
905 fptemp1.d = ST1;
906 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
908 if (expdif < 0) {
909 /* optimisation? taken from the AMD docs */
910 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
911 /* ST0 is unchanged */
912 return;
915 if (expdif < 53) {
916 dblq = fpsrcop / fptemp;
917 /* round dblq towards nearest integer */
918 dblq = rint(dblq);
919 st0 = fpsrcop - fptemp * dblq;
921 /* convert dblq to q by truncating towards zero */
922 if (dblq < 0.0) {
923 q = (signed long long int)(-dblq);
924 } else {
925 q = (signed long long int)dblq;
928 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
929 /* (C0,C3,C1) <-- (q2,q1,q0) */
930 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
931 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
932 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
933 } else {
934 env->fpus |= 0x400; /* C2 <-- 1 */
935 fptemp = pow(2.0, expdif - 50);
936 fpsrcop = (st0 / st1) / fptemp;
937 /* fpsrcop = integer obtained by chopping */
938 fpsrcop = (fpsrcop < 0.0) ?
939 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
940 st0 -= (st1 * fpsrcop * fptemp);
942 ST0 = double_to_floatx80(env, st0);
945 void helper_fprem(CPUX86State *env)
947 double st0, st1, dblq, fpsrcop, fptemp;
948 CPU_LDoubleU fpsrcop1, fptemp1;
949 int expdif;
950 signed long long int q;
952 st0 = floatx80_to_double(env, ST0);
953 st1 = floatx80_to_double(env, ST1);
955 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
956 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
957 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
958 return;
961 fpsrcop = st0;
962 fptemp = st1;
963 fpsrcop1.d = ST0;
964 fptemp1.d = ST1;
965 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
967 if (expdif < 0) {
968 /* optimisation? taken from the AMD docs */
969 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
970 /* ST0 is unchanged */
971 return;
974 if (expdif < 53) {
975 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
976 /* round dblq towards zero */
977 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
978 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
980 /* convert dblq to q by truncating towards zero */
981 if (dblq < 0.0) {
982 q = (signed long long int)(-dblq);
983 } else {
984 q = (signed long long int)dblq;
987 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
988 /* (C0,C3,C1) <-- (q2,q1,q0) */
989 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
990 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
991 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
992 } else {
993 int N = 32 + (expdif % 32); /* as per AMD docs */
995 env->fpus |= 0x400; /* C2 <-- 1 */
996 fptemp = pow(2.0, (double)(expdif - N));
997 fpsrcop = (st0 / st1) / fptemp;
998 /* fpsrcop = integer obtained by chopping */
999 fpsrcop = (fpsrcop < 0.0) ?
1000 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
1001 st0 -= (st1 * fpsrcop * fptemp);
1003 ST0 = double_to_floatx80(env, st0);
1006 void helper_fyl2xp1(CPUX86State *env)
1008 double fptemp = floatx80_to_double(env, ST0);
1010 if ((fptemp + 1.0) > 0.0) {
1011 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
1012 fptemp *= floatx80_to_double(env, ST1);
1013 ST1 = double_to_floatx80(env, fptemp);
1014 fpop(env);
1015 } else {
1016 env->fpus &= ~0x4700;
1017 env->fpus |= 0x400;
1021 void helper_fsqrt(CPUX86State *env)
1023 if (floatx80_is_neg(ST0)) {
1024 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1025 env->fpus |= 0x400;
1027 ST0 = floatx80_sqrt(ST0, &env->fp_status);
1030 void helper_fsincos(CPUX86State *env)
1032 double fptemp = floatx80_to_double(env, ST0);
1034 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1035 env->fpus |= 0x400;
1036 } else {
1037 ST0 = double_to_floatx80(env, sin(fptemp));
1038 fpush(env);
1039 ST0 = double_to_floatx80(env, cos(fptemp));
1040 env->fpus &= ~0x400; /* C2 <-- 0 */
1041 /* the above code is for |arg| < 2**63 only */
1045 void helper_frndint(CPUX86State *env)
1047 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
1050 void helper_fscale(CPUX86State *env)
1052 if (floatx80_invalid_encoding(ST1)) {
1053 float_raise(float_flag_invalid, &env->fp_status);
1054 ST0 = floatx80_default_nan(&env->fp_status);
1055 } else if (floatx80_is_any_nan(ST1)) {
1056 ST0 = ST1;
1057 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1058 float_raise(float_flag_invalid, &env->fp_status);
1059 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1061 } else if (floatx80_is_infinity(ST1) &&
1062 !floatx80_invalid_encoding(ST0) &&
1063 !floatx80_is_any_nan(ST0)) {
1064 if (floatx80_is_neg(ST1)) {
1065 if (floatx80_is_infinity(ST0)) {
1066 float_raise(float_flag_invalid, &env->fp_status);
1067 ST0 = floatx80_default_nan(&env->fp_status);
1068 } else {
1069 ST0 = (floatx80_is_neg(ST0) ?
1070 floatx80_chs(floatx80_zero) :
1071 floatx80_zero);
1073 } else {
1074 if (floatx80_is_zero(ST0)) {
1075 float_raise(float_flag_invalid, &env->fp_status);
1076 ST0 = floatx80_default_nan(&env->fp_status);
1077 } else {
1078 ST0 = (floatx80_is_neg(ST0) ?
1079 floatx80_chs(floatx80_infinity) :
1080 floatx80_infinity);
1083 } else {
1084 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
1085 signed char save = env->fp_status.floatx80_rounding_precision;
1086 env->fp_status.floatx80_rounding_precision = 80;
1087 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
1088 env->fp_status.floatx80_rounding_precision = save;
1092 void helper_fsin(CPUX86State *env)
1094 double fptemp = floatx80_to_double(env, ST0);
1096 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1097 env->fpus |= 0x400;
1098 } else {
1099 ST0 = double_to_floatx80(env, sin(fptemp));
1100 env->fpus &= ~0x400; /* C2 <-- 0 */
1101 /* the above code is for |arg| < 2**53 only */
1105 void helper_fcos(CPUX86State *env)
1107 double fptemp = floatx80_to_double(env, ST0);
1109 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1110 env->fpus |= 0x400;
1111 } else {
1112 ST0 = double_to_floatx80(env, cos(fptemp));
1113 env->fpus &= ~0x400; /* C2 <-- 0 */
1114 /* the above code is for |arg| < 2**63 only */
1118 void helper_fxam_ST0(CPUX86State *env)
1120 CPU_LDoubleU temp;
1121 int expdif;
1123 temp.d = ST0;
1125 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1126 if (SIGND(temp)) {
1127 env->fpus |= 0x200; /* C1 <-- 1 */
1130 if (env->fptags[env->fpstt]) {
1131 env->fpus |= 0x4100; /* Empty */
1132 return;
1135 expdif = EXPD(temp);
1136 if (expdif == MAXEXPD) {
1137 if (MANTD(temp) == 0x8000000000000000ULL) {
1138 env->fpus |= 0x500; /* Infinity */
1139 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1140 env->fpus |= 0x100; /* NaN */
1142 } else if (expdif == 0) {
1143 if (MANTD(temp) == 0) {
1144 env->fpus |= 0x4000; /* Zero */
1145 } else {
1146 env->fpus |= 0x4400; /* Denormal */
1148 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1149 env->fpus |= 0x400;
1153 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1154 uintptr_t retaddr)
1156 int fpus, fptag, exp, i;
1157 uint64_t mant;
1158 CPU_LDoubleU tmp;
1160 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1161 fptag = 0;
1162 for (i = 7; i >= 0; i--) {
1163 fptag <<= 2;
1164 if (env->fptags[i]) {
1165 fptag |= 3;
1166 } else {
1167 tmp.d = env->fpregs[i].d;
1168 exp = EXPD(tmp);
1169 mant = MANTD(tmp);
1170 if (exp == 0 && mant == 0) {
1171 /* zero */
1172 fptag |= 1;
1173 } else if (exp == 0 || exp == MAXEXPD
1174 || (mant & (1LL << 63)) == 0) {
1175 /* NaNs, infinity, denormal */
1176 fptag |= 2;
1180 if (data32) {
1181 /* 32 bit */
1182 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1183 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1184 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1185 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1186 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1187 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1188 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1189 } else {
1190 /* 16 bit */
1191 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1192 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1193 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1194 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1195 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1196 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1197 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1201 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1203 do_fstenv(env, ptr, data32, GETPC());
1206 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1208 env->fpstt = (fpus >> 11) & 7;
1209 env->fpus = fpus & ~0x3800 & ~FPUS_B;
1210 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1211 #if !defined(CONFIG_USER_ONLY)
1212 if (!(env->fpus & FPUS_SE)) {
1214 * Here the processor deasserts FERR#; in response, the chipset deasserts
1215 * IGNNE#.
1217 cpu_clear_ignne();
1219 #endif
1222 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1223 uintptr_t retaddr)
1225 int i, fpus, fptag;
1227 if (data32) {
1228 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1229 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1230 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1231 } else {
1232 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1233 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1234 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1236 cpu_set_fpus(env, fpus);
1237 for (i = 0; i < 8; i++) {
1238 env->fptags[i] = ((fptag & 3) == 3);
1239 fptag >>= 2;
1243 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1245 do_fldenv(env, ptr, data32, GETPC());
1248 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1250 floatx80 tmp;
1251 int i;
1253 do_fstenv(env, ptr, data32, GETPC());
1255 ptr += (14 << data32);
1256 for (i = 0; i < 8; i++) {
1257 tmp = ST(i);
1258 helper_fstt(env, tmp, ptr, GETPC());
1259 ptr += 10;
1262 /* fninit */
1263 env->fpus = 0;
1264 env->fpstt = 0;
1265 cpu_set_fpuc(env, 0x37f);
1266 env->fptags[0] = 1;
1267 env->fptags[1] = 1;
1268 env->fptags[2] = 1;
1269 env->fptags[3] = 1;
1270 env->fptags[4] = 1;
1271 env->fptags[5] = 1;
1272 env->fptags[6] = 1;
1273 env->fptags[7] = 1;
1276 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1278 floatx80 tmp;
1279 int i;
1281 do_fldenv(env, ptr, data32, GETPC());
1282 ptr += (14 << data32);
1284 for (i = 0; i < 8; i++) {
1285 tmp = helper_fldt(env, ptr, GETPC());
1286 ST(i) = tmp;
1287 ptr += 10;
1291 #if defined(CONFIG_USER_ONLY)
1292 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1294 helper_fsave(env, ptr, data32);
1297 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1299 helper_frstor(env, ptr, data32);
1301 #endif
1303 #define XO(X) offsetof(X86XSaveArea, X)
1305 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1307 int fpus, fptag, i;
1308 target_ulong addr;
1310 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1311 fptag = 0;
1312 for (i = 0; i < 8; i++) {
1313 fptag |= (env->fptags[i] << i);
1316 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1317 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1318 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1320 /* In 32-bit mode this is eip, sel, dp, sel.
1321 In 64-bit mode this is rip, rdp.
1322 But in either case we don't write actual data, just zeros. */
1323 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1324 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1326 addr = ptr + XO(legacy.fpregs);
1327 for (i = 0; i < 8; i++) {
1328 floatx80 tmp = ST(i);
1329 helper_fstt(env, tmp, addr, ra);
1330 addr += 16;
1334 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1336 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1337 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1340 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1342 int i, nb_xmm_regs;
1343 target_ulong addr;
1345 if (env->hflags & HF_CS64_MASK) {
1346 nb_xmm_regs = 16;
1347 } else {
1348 nb_xmm_regs = 8;
1351 addr = ptr + XO(legacy.xmm_regs);
1352 for (i = 0; i < nb_xmm_regs; i++) {
1353 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1354 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1355 addr += 16;
1359 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1361 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1362 int i;
1364 for (i = 0; i < 4; i++, addr += 16) {
1365 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1366 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1370 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1372 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1373 env->bndcs_regs.cfgu, ra);
1374 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1375 env->bndcs_regs.sts, ra);
1378 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1380 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1383 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1385 uintptr_t ra = GETPC();
1387 /* The operand must be 16 byte aligned */
1388 if (ptr & 0xf) {
1389 raise_exception_ra(env, EXCP0D_GPF, ra);
1392 do_xsave_fpu(env, ptr, ra);
1394 if (env->cr[4] & CR4_OSFXSR_MASK) {
1395 do_xsave_mxcsr(env, ptr, ra);
1396 /* Fast FXSAVE leaves out the XMM registers */
1397 if (!(env->efer & MSR_EFER_FFXSR)
1398 || (env->hflags & HF_CPL_MASK)
1399 || !(env->hflags & HF_LMA_MASK)) {
1400 do_xsave_sse(env, ptr, ra);
1405 static uint64_t get_xinuse(CPUX86State *env)
1407 uint64_t inuse = -1;
1409 /* For the most part, we don't track XINUSE. We could calculate it
1410 here for all components, but it's probably less work to simply
1411 indicate in use. That said, the state of BNDREGS is important
1412 enough to track in HFLAGS, so we might as well use that here. */
1413 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1414 inuse &= ~XSTATE_BNDREGS_MASK;
1416 return inuse;
1419 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1420 uint64_t inuse, uint64_t opt, uintptr_t ra)
1422 uint64_t old_bv, new_bv;
1424 /* The OS must have enabled XSAVE. */
1425 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1426 raise_exception_ra(env, EXCP06_ILLOP, ra);
1429 /* The operand must be 64 byte aligned. */
1430 if (ptr & 63) {
1431 raise_exception_ra(env, EXCP0D_GPF, ra);
1434 /* Never save anything not enabled by XCR0. */
1435 rfbm &= env->xcr0;
1436 opt &= rfbm;
1438 if (opt & XSTATE_FP_MASK) {
1439 do_xsave_fpu(env, ptr, ra);
1441 if (rfbm & XSTATE_SSE_MASK) {
1442 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1443 do_xsave_mxcsr(env, ptr, ra);
1445 if (opt & XSTATE_SSE_MASK) {
1446 do_xsave_sse(env, ptr, ra);
1448 if (opt & XSTATE_BNDREGS_MASK) {
1449 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1451 if (opt & XSTATE_BNDCSR_MASK) {
1452 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1454 if (opt & XSTATE_PKRU_MASK) {
1455 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1458 /* Update the XSTATE_BV field. */
1459 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1460 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1461 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1464 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1466 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1469 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1471 uint64_t inuse = get_xinuse(env);
1472 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1475 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1477 int i, fpuc, fpus, fptag;
1478 target_ulong addr;
1480 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1481 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1482 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1483 cpu_set_fpuc(env, fpuc);
1484 cpu_set_fpus(env, fpus);
1485 fptag ^= 0xff;
1486 for (i = 0; i < 8; i++) {
1487 env->fptags[i] = ((fptag >> i) & 1);
1490 addr = ptr + XO(legacy.fpregs);
1491 for (i = 0; i < 8; i++) {
1492 floatx80 tmp = helper_fldt(env, addr, ra);
1493 ST(i) = tmp;
1494 addr += 16;
1498 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1500 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1503 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1505 int i, nb_xmm_regs;
1506 target_ulong addr;
1508 if (env->hflags & HF_CS64_MASK) {
1509 nb_xmm_regs = 16;
1510 } else {
1511 nb_xmm_regs = 8;
1514 addr = ptr + XO(legacy.xmm_regs);
1515 for (i = 0; i < nb_xmm_regs; i++) {
1516 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1517 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1518 addr += 16;
1522 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1524 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1525 int i;
1527 for (i = 0; i < 4; i++, addr += 16) {
1528 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1529 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1533 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1535 /* FIXME: Extend highest implemented bit of linear address. */
1536 env->bndcs_regs.cfgu
1537 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1538 env->bndcs_regs.sts
1539 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1542 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1544 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1547 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1549 uintptr_t ra = GETPC();
1551 /* The operand must be 16 byte aligned */
1552 if (ptr & 0xf) {
1553 raise_exception_ra(env, EXCP0D_GPF, ra);
1556 do_xrstor_fpu(env, ptr, ra);
1558 if (env->cr[4] & CR4_OSFXSR_MASK) {
1559 do_xrstor_mxcsr(env, ptr, ra);
1560 /* Fast FXRSTOR leaves out the XMM registers */
1561 if (!(env->efer & MSR_EFER_FFXSR)
1562 || (env->hflags & HF_CPL_MASK)
1563 || !(env->hflags & HF_LMA_MASK)) {
1564 do_xrstor_sse(env, ptr, ra);
1569 #if defined(CONFIG_USER_ONLY)
1570 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1572 helper_fxsave(env, ptr);
1575 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1577 helper_fxrstor(env, ptr);
1579 #endif
1581 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1583 uintptr_t ra = GETPC();
1584 uint64_t xstate_bv, xcomp_bv, reserve0;
1586 rfbm &= env->xcr0;
1588 /* The OS must have enabled XSAVE. */
1589 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1590 raise_exception_ra(env, EXCP06_ILLOP, ra);
1593 /* The operand must be 64 byte aligned. */
1594 if (ptr & 63) {
1595 raise_exception_ra(env, EXCP0D_GPF, ra);
1598 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1600 if ((int64_t)xstate_bv < 0) {
1601 /* FIXME: Compact form. */
1602 raise_exception_ra(env, EXCP0D_GPF, ra);
1605 /* Standard form. */
1607 /* The XSTATE_BV field must not set bits not present in XCR0. */
1608 if (xstate_bv & ~env->xcr0) {
1609 raise_exception_ra(env, EXCP0D_GPF, ra);
1612 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1613 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1614 describes only XCOMP_BV, but the description of the standard form
1615 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1616 includes the next 64-bit field. */
1617 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1618 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1619 if (xcomp_bv || reserve0) {
1620 raise_exception_ra(env, EXCP0D_GPF, ra);
1623 if (rfbm & XSTATE_FP_MASK) {
1624 if (xstate_bv & XSTATE_FP_MASK) {
1625 do_xrstor_fpu(env, ptr, ra);
1626 } else {
1627 helper_fninit(env);
1628 memset(env->fpregs, 0, sizeof(env->fpregs));
1631 if (rfbm & XSTATE_SSE_MASK) {
1632 /* Note that the standard form of XRSTOR loads MXCSR from memory
1633 whether or not the XSTATE_BV bit is set. */
1634 do_xrstor_mxcsr(env, ptr, ra);
1635 if (xstate_bv & XSTATE_SSE_MASK) {
1636 do_xrstor_sse(env, ptr, ra);
1637 } else {
1638 /* ??? When AVX is implemented, we may have to be more
1639 selective in the clearing. */
1640 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1643 if (rfbm & XSTATE_BNDREGS_MASK) {
1644 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1645 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1646 env->hflags |= HF_MPX_IU_MASK;
1647 } else {
1648 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1649 env->hflags &= ~HF_MPX_IU_MASK;
1652 if (rfbm & XSTATE_BNDCSR_MASK) {
1653 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1654 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1655 } else {
1656 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1658 cpu_sync_bndcs_hflags(env);
1660 if (rfbm & XSTATE_PKRU_MASK) {
1661 uint64_t old_pkru = env->pkru;
1662 if (xstate_bv & XSTATE_PKRU_MASK) {
1663 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1664 } else {
1665 env->pkru = 0;
1667 if (env->pkru != old_pkru) {
1668 CPUState *cs = env_cpu(env);
1669 tlb_flush(cs);
1674 #undef XO
1676 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1678 /* The OS must have enabled XSAVE. */
1679 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1680 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1683 switch (ecx) {
1684 case 0:
1685 return env->xcr0;
1686 case 1:
1687 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1688 return env->xcr0 & get_xinuse(env);
1690 break;
1692 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1695 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1697 uint32_t dummy, ena_lo, ena_hi;
1698 uint64_t ena;
1700 /* The OS must have enabled XSAVE. */
1701 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1702 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1705 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1706 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1707 goto do_gpf;
1710 /* Disallow enabling unimplemented features. */
1711 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1712 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1713 if (mask & ~ena) {
1714 goto do_gpf;
1717 /* Disallow enabling only half of MPX. */
1718 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1719 & XSTATE_BNDCSR_MASK) {
1720 goto do_gpf;
1723 env->xcr0 = mask;
1724 cpu_sync_bndcs_hflags(env);
1725 return;
1727 do_gpf:
1728 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1731 /* MMX/SSE */
1732 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1734 #define SSE_DAZ 0x0040
1735 #define SSE_RC_MASK 0x6000
1736 #define SSE_RC_NEAR 0x0000
1737 #define SSE_RC_DOWN 0x2000
1738 #define SSE_RC_UP 0x4000
1739 #define SSE_RC_CHOP 0x6000
1740 #define SSE_FZ 0x8000
1742 void update_mxcsr_status(CPUX86State *env)
1744 uint32_t mxcsr = env->mxcsr;
1745 int rnd_type;
1747 /* set rounding mode */
1748 switch (mxcsr & SSE_RC_MASK) {
1749 default:
1750 case SSE_RC_NEAR:
1751 rnd_type = float_round_nearest_even;
1752 break;
1753 case SSE_RC_DOWN:
1754 rnd_type = float_round_down;
1755 break;
1756 case SSE_RC_UP:
1757 rnd_type = float_round_up;
1758 break;
1759 case SSE_RC_CHOP:
1760 rnd_type = float_round_to_zero;
1761 break;
1763 set_float_rounding_mode(rnd_type, &env->sse_status);
1765 /* set denormals are zero */
1766 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1768 /* set flush to zero */
1769 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1772 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1774 cpu_set_mxcsr(env, val);
1777 void helper_enter_mmx(CPUX86State *env)
1779 env->fpstt = 0;
1780 *(uint32_t *)(env->fptags) = 0;
1781 *(uint32_t *)(env->fptags + 4) = 0;
1784 void helper_emms(CPUX86State *env)
1786 /* set to empty state */
1787 *(uint32_t *)(env->fptags) = 0x01010101;
1788 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1791 /* XXX: suppress */
1792 void helper_movq(CPUX86State *env, void *d, void *s)
1794 *(uint64_t *)d = *(uint64_t *)s;
1797 #define SHIFT 0
1798 #include "ops_sse.h"
1800 #define SHIFT 1
1801 #include "ops_sse.h"