target/i386: fix IEEE x87 floating-point exception raising
[qemu/ar7.git] / target / i386 / fpu_helper.c
blob8ef5b463eaf12e8c42c7fc2b7f4cc7f062efbb59
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
29 #ifdef CONFIG_SOFTMMU
30 #include "hw/irq.h"
31 #endif
33 #define FPU_RC_MASK 0xc00
34 #define FPU_RC_NEAR 0x000
35 #define FPU_RC_DOWN 0x400
36 #define FPU_RC_UP 0x800
37 #define FPU_RC_CHOP 0xc00
39 #define MAXTAN 9223372036854775808.0
41 /* the following deal with x86 long double-precision numbers */
42 #define MAXEXPD 0x7fff
43 #define EXPBIAS 16383
44 #define EXPD(fp) (fp.l.upper & 0x7fff)
45 #define SIGND(fp) ((fp.l.upper) & 0x8000)
46 #define MANTD(fp) (fp.l.lower)
47 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
49 #define FPUS_IE (1 << 0)
50 #define FPUS_DE (1 << 1)
51 #define FPUS_ZE (1 << 2)
52 #define FPUS_OE (1 << 3)
53 #define FPUS_UE (1 << 4)
54 #define FPUS_PE (1 << 5)
55 #define FPUS_SF (1 << 6)
56 #define FPUS_SE (1 << 7)
57 #define FPUS_B (1 << 15)
59 #define FPUC_EM 0x3f
61 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
62 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
63 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
64 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
65 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
66 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
67 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
68 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
70 #if !defined(CONFIG_USER_ONLY)
71 static qemu_irq ferr_irq;
73 void x86_register_ferr_irq(qemu_irq irq)
75 ferr_irq = irq;
78 static void cpu_clear_ignne(void)
80 CPUX86State *env = &X86_CPU(first_cpu)->env;
81 env->hflags2 &= ~HF2_IGNNE_MASK;
84 void cpu_set_ignne(void)
86 CPUX86State *env = &X86_CPU(first_cpu)->env;
87 env->hflags2 |= HF2_IGNNE_MASK;
89 * We get here in response to a write to port F0h. The chipset should
90 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
91 * cleared, because FERR# and FP_IRQ are two separate pins on real
92 * hardware. However, we don't model FERR# as a qemu_irq, so we just
93 * do directly what the chipset would do, i.e. deassert FP_IRQ.
95 qemu_irq_lower(ferr_irq);
97 #endif
100 static inline void fpush(CPUX86State *env)
102 env->fpstt = (env->fpstt - 1) & 7;
103 env->fptags[env->fpstt] = 0; /* validate stack entry */
106 static inline void fpop(CPUX86State *env)
108 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
109 env->fpstt = (env->fpstt + 1) & 7;
112 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
113 uintptr_t retaddr)
115 CPU_LDoubleU temp;
117 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
118 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
119 return temp.d;
122 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
123 uintptr_t retaddr)
125 CPU_LDoubleU temp;
127 temp.d = f;
128 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
129 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
132 /* x87 FPU helpers */
134 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
136 union {
137 float64 f64;
138 double d;
139 } u;
141 u.f64 = floatx80_to_float64(a, &env->fp_status);
142 return u.d;
145 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
147 union {
148 float64 f64;
149 double d;
150 } u;
152 u.d = a;
153 return float64_to_floatx80(u.f64, &env->fp_status);
156 static void fpu_set_exception(CPUX86State *env, int mask)
158 env->fpus |= mask;
159 if (env->fpus & (~env->fpuc & FPUC_EM)) {
160 env->fpus |= FPUS_SE | FPUS_B;
164 static inline uint8_t save_exception_flags(CPUX86State *env)
166 uint8_t old_flags = get_float_exception_flags(&env->fp_status);
167 set_float_exception_flags(0, &env->fp_status);
168 return old_flags;
171 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
173 uint8_t new_flags = get_float_exception_flags(&env->fp_status);
174 float_raise(old_flags, &env->fp_status);
175 fpu_set_exception(env,
176 ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
177 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
178 (new_flags & float_flag_overflow ? FPUS_OE : 0) |
179 (new_flags & float_flag_underflow ? FPUS_UE : 0) |
180 (new_flags & float_flag_inexact ? FPUS_PE : 0) |
181 (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
184 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
186 uint8_t old_flags = save_exception_flags(env);
187 floatx80 ret = floatx80_div(a, b, &env->fp_status);
188 merge_exception_flags(env, old_flags);
189 return ret;
192 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
194 if (env->cr[0] & CR0_NE_MASK) {
195 raise_exception_ra(env, EXCP10_COPR, retaddr);
197 #if !defined(CONFIG_USER_ONLY)
198 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
199 qemu_irq_raise(ferr_irq);
201 #endif
204 void helper_flds_FT0(CPUX86State *env, uint32_t val)
206 uint8_t old_flags = save_exception_flags(env);
207 union {
208 float32 f;
209 uint32_t i;
210 } u;
212 u.i = val;
213 FT0 = float32_to_floatx80(u.f, &env->fp_status);
214 merge_exception_flags(env, old_flags);
217 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
219 uint8_t old_flags = save_exception_flags(env);
220 union {
221 float64 f;
222 uint64_t i;
223 } u;
225 u.i = val;
226 FT0 = float64_to_floatx80(u.f, &env->fp_status);
227 merge_exception_flags(env, old_flags);
230 void helper_fildl_FT0(CPUX86State *env, int32_t val)
232 FT0 = int32_to_floatx80(val, &env->fp_status);
235 void helper_flds_ST0(CPUX86State *env, uint32_t val)
237 uint8_t old_flags = save_exception_flags(env);
238 int new_fpstt;
239 union {
240 float32 f;
241 uint32_t i;
242 } u;
244 new_fpstt = (env->fpstt - 1) & 7;
245 u.i = val;
246 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
247 env->fpstt = new_fpstt;
248 env->fptags[new_fpstt] = 0; /* validate stack entry */
249 merge_exception_flags(env, old_flags);
252 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
254 uint8_t old_flags = save_exception_flags(env);
255 int new_fpstt;
256 union {
257 float64 f;
258 uint64_t i;
259 } u;
261 new_fpstt = (env->fpstt - 1) & 7;
262 u.i = val;
263 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
264 env->fpstt = new_fpstt;
265 env->fptags[new_fpstt] = 0; /* validate stack entry */
266 merge_exception_flags(env, old_flags);
269 void helper_fildl_ST0(CPUX86State *env, int32_t val)
271 int new_fpstt;
273 new_fpstt = (env->fpstt - 1) & 7;
274 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
275 env->fpstt = new_fpstt;
276 env->fptags[new_fpstt] = 0; /* validate stack entry */
279 void helper_fildll_ST0(CPUX86State *env, int64_t val)
281 int new_fpstt;
283 new_fpstt = (env->fpstt - 1) & 7;
284 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
285 env->fpstt = new_fpstt;
286 env->fptags[new_fpstt] = 0; /* validate stack entry */
289 uint32_t helper_fsts_ST0(CPUX86State *env)
291 uint8_t old_flags = save_exception_flags(env);
292 union {
293 float32 f;
294 uint32_t i;
295 } u;
297 u.f = floatx80_to_float32(ST0, &env->fp_status);
298 merge_exception_flags(env, old_flags);
299 return u.i;
302 uint64_t helper_fstl_ST0(CPUX86State *env)
304 uint8_t old_flags = save_exception_flags(env);
305 union {
306 float64 f;
307 uint64_t i;
308 } u;
310 u.f = floatx80_to_float64(ST0, &env->fp_status);
311 merge_exception_flags(env, old_flags);
312 return u.i;
315 int32_t helper_fist_ST0(CPUX86State *env)
317 uint8_t old_flags = save_exception_flags(env);
318 int32_t val;
320 val = floatx80_to_int32(ST0, &env->fp_status);
321 if (val != (int16_t)val) {
322 set_float_exception_flags(float_flag_invalid, &env->fp_status);
323 val = -32768;
325 merge_exception_flags(env, old_flags);
326 return val;
329 int32_t helper_fistl_ST0(CPUX86State *env)
331 uint8_t old_flags = save_exception_flags(env);
332 int32_t val;
334 val = floatx80_to_int32(ST0, &env->fp_status);
335 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
336 val = 0x80000000;
338 merge_exception_flags(env, old_flags);
339 return val;
342 int64_t helper_fistll_ST0(CPUX86State *env)
344 uint8_t old_flags = save_exception_flags(env);
345 int64_t val;
347 val = floatx80_to_int64(ST0, &env->fp_status);
348 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
349 val = 0x8000000000000000ULL;
351 merge_exception_flags(env, old_flags);
352 return val;
355 int32_t helper_fistt_ST0(CPUX86State *env)
357 uint8_t old_flags = save_exception_flags(env);
358 int32_t val;
360 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
361 if (val != (int16_t)val) {
362 set_float_exception_flags(float_flag_invalid, &env->fp_status);
363 val = -32768;
365 merge_exception_flags(env, old_flags);
366 return val;
369 int32_t helper_fisttl_ST0(CPUX86State *env)
371 uint8_t old_flags = save_exception_flags(env);
372 int32_t val;
374 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
375 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
376 val = 0x80000000;
378 merge_exception_flags(env, old_flags);
379 return val;
382 int64_t helper_fisttll_ST0(CPUX86State *env)
384 uint8_t old_flags = save_exception_flags(env);
385 int64_t val;
387 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
388 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
389 val = 0x8000000000000000ULL;
391 merge_exception_flags(env, old_flags);
392 return val;
395 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
397 int new_fpstt;
399 new_fpstt = (env->fpstt - 1) & 7;
400 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
401 env->fpstt = new_fpstt;
402 env->fptags[new_fpstt] = 0; /* validate stack entry */
405 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
407 helper_fstt(env, ST0, ptr, GETPC());
410 void helper_fpush(CPUX86State *env)
412 fpush(env);
415 void helper_fpop(CPUX86State *env)
417 fpop(env);
420 void helper_fdecstp(CPUX86State *env)
422 env->fpstt = (env->fpstt - 1) & 7;
423 env->fpus &= ~0x4700;
426 void helper_fincstp(CPUX86State *env)
428 env->fpstt = (env->fpstt + 1) & 7;
429 env->fpus &= ~0x4700;
432 /* FPU move */
434 void helper_ffree_STN(CPUX86State *env, int st_index)
436 env->fptags[(env->fpstt + st_index) & 7] = 1;
439 void helper_fmov_ST0_FT0(CPUX86State *env)
441 ST0 = FT0;
444 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
446 FT0 = ST(st_index);
449 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
451 ST0 = ST(st_index);
454 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
456 ST(st_index) = ST0;
459 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
461 floatx80 tmp;
463 tmp = ST(st_index);
464 ST(st_index) = ST0;
465 ST0 = tmp;
468 /* FPU operations */
470 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
472 void helper_fcom_ST0_FT0(CPUX86State *env)
474 uint8_t old_flags = save_exception_flags(env);
475 FloatRelation ret;
477 ret = floatx80_compare(ST0, FT0, &env->fp_status);
478 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
479 merge_exception_flags(env, old_flags);
482 void helper_fucom_ST0_FT0(CPUX86State *env)
484 uint8_t old_flags = save_exception_flags(env);
485 FloatRelation ret;
487 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
488 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
489 merge_exception_flags(env, old_flags);
492 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
494 void helper_fcomi_ST0_FT0(CPUX86State *env)
496 uint8_t old_flags = save_exception_flags(env);
497 int eflags;
498 FloatRelation ret;
500 ret = floatx80_compare(ST0, FT0, &env->fp_status);
501 eflags = cpu_cc_compute_all(env, CC_OP);
502 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
503 CC_SRC = eflags;
504 merge_exception_flags(env, old_flags);
507 void helper_fucomi_ST0_FT0(CPUX86State *env)
509 uint8_t old_flags = save_exception_flags(env);
510 int eflags;
511 FloatRelation ret;
513 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
514 eflags = cpu_cc_compute_all(env, CC_OP);
515 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
516 CC_SRC = eflags;
517 merge_exception_flags(env, old_flags);
520 void helper_fadd_ST0_FT0(CPUX86State *env)
522 uint8_t old_flags = save_exception_flags(env);
523 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
524 merge_exception_flags(env, old_flags);
527 void helper_fmul_ST0_FT0(CPUX86State *env)
529 uint8_t old_flags = save_exception_flags(env);
530 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
531 merge_exception_flags(env, old_flags);
534 void helper_fsub_ST0_FT0(CPUX86State *env)
536 uint8_t old_flags = save_exception_flags(env);
537 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
538 merge_exception_flags(env, old_flags);
541 void helper_fsubr_ST0_FT0(CPUX86State *env)
543 uint8_t old_flags = save_exception_flags(env);
544 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
545 merge_exception_flags(env, old_flags);
548 void helper_fdiv_ST0_FT0(CPUX86State *env)
550 ST0 = helper_fdiv(env, ST0, FT0);
553 void helper_fdivr_ST0_FT0(CPUX86State *env)
555 ST0 = helper_fdiv(env, FT0, ST0);
558 /* fp operations between STN and ST0 */
560 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
562 uint8_t old_flags = save_exception_flags(env);
563 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
564 merge_exception_flags(env, old_flags);
567 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
569 uint8_t old_flags = save_exception_flags(env);
570 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
571 merge_exception_flags(env, old_flags);
574 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
576 uint8_t old_flags = save_exception_flags(env);
577 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
578 merge_exception_flags(env, old_flags);
581 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
583 uint8_t old_flags = save_exception_flags(env);
584 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
585 merge_exception_flags(env, old_flags);
588 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
590 floatx80 *p;
592 p = &ST(st_index);
593 *p = helper_fdiv(env, *p, ST0);
596 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
598 floatx80 *p;
600 p = &ST(st_index);
601 *p = helper_fdiv(env, ST0, *p);
604 /* misc FPU operations */
605 void helper_fchs_ST0(CPUX86State *env)
607 ST0 = floatx80_chs(ST0);
610 void helper_fabs_ST0(CPUX86State *env)
612 ST0 = floatx80_abs(ST0);
615 void helper_fld1_ST0(CPUX86State *env)
617 ST0 = floatx80_one;
620 void helper_fldl2t_ST0(CPUX86State *env)
622 switch (env->fpuc & FPU_RC_MASK) {
623 case FPU_RC_UP:
624 ST0 = floatx80_l2t_u;
625 break;
626 default:
627 ST0 = floatx80_l2t;
628 break;
632 void helper_fldl2e_ST0(CPUX86State *env)
634 switch (env->fpuc & FPU_RC_MASK) {
635 case FPU_RC_DOWN:
636 case FPU_RC_CHOP:
637 ST0 = floatx80_l2e_d;
638 break;
639 default:
640 ST0 = floatx80_l2e;
641 break;
645 void helper_fldpi_ST0(CPUX86State *env)
647 switch (env->fpuc & FPU_RC_MASK) {
648 case FPU_RC_DOWN:
649 case FPU_RC_CHOP:
650 ST0 = floatx80_pi_d;
651 break;
652 default:
653 ST0 = floatx80_pi;
654 break;
658 void helper_fldlg2_ST0(CPUX86State *env)
660 switch (env->fpuc & FPU_RC_MASK) {
661 case FPU_RC_DOWN:
662 case FPU_RC_CHOP:
663 ST0 = floatx80_lg2_d;
664 break;
665 default:
666 ST0 = floatx80_lg2;
667 break;
671 void helper_fldln2_ST0(CPUX86State *env)
673 switch (env->fpuc & FPU_RC_MASK) {
674 case FPU_RC_DOWN:
675 case FPU_RC_CHOP:
676 ST0 = floatx80_ln2_d;
677 break;
678 default:
679 ST0 = floatx80_ln2;
680 break;
684 void helper_fldz_ST0(CPUX86State *env)
686 ST0 = floatx80_zero;
689 void helper_fldz_FT0(CPUX86State *env)
691 FT0 = floatx80_zero;
694 uint32_t helper_fnstsw(CPUX86State *env)
696 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
699 uint32_t helper_fnstcw(CPUX86State *env)
701 return env->fpuc;
704 void update_fp_status(CPUX86State *env)
706 int rnd_type;
708 /* set rounding mode */
709 switch (env->fpuc & FPU_RC_MASK) {
710 default:
711 case FPU_RC_NEAR:
712 rnd_type = float_round_nearest_even;
713 break;
714 case FPU_RC_DOWN:
715 rnd_type = float_round_down;
716 break;
717 case FPU_RC_UP:
718 rnd_type = float_round_up;
719 break;
720 case FPU_RC_CHOP:
721 rnd_type = float_round_to_zero;
722 break;
724 set_float_rounding_mode(rnd_type, &env->fp_status);
725 switch ((env->fpuc >> 8) & 3) {
726 case 0:
727 rnd_type = 32;
728 break;
729 case 2:
730 rnd_type = 64;
731 break;
732 case 3:
733 default:
734 rnd_type = 80;
735 break;
737 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
740 void helper_fldcw(CPUX86State *env, uint32_t val)
742 cpu_set_fpuc(env, val);
745 void helper_fclex(CPUX86State *env)
747 env->fpus &= 0x7f00;
750 void helper_fwait(CPUX86State *env)
752 if (env->fpus & FPUS_SE) {
753 fpu_raise_exception(env, GETPC());
757 void helper_fninit(CPUX86State *env)
759 env->fpus = 0;
760 env->fpstt = 0;
761 cpu_set_fpuc(env, 0x37f);
762 env->fptags[0] = 1;
763 env->fptags[1] = 1;
764 env->fptags[2] = 1;
765 env->fptags[3] = 1;
766 env->fptags[4] = 1;
767 env->fptags[5] = 1;
768 env->fptags[6] = 1;
769 env->fptags[7] = 1;
772 /* BCD ops */
774 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
776 floatx80 tmp;
777 uint64_t val;
778 unsigned int v;
779 int i;
781 val = 0;
782 for (i = 8; i >= 0; i--) {
783 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
784 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
786 tmp = int64_to_floatx80(val, &env->fp_status);
787 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
788 tmp = floatx80_chs(tmp);
790 fpush(env);
791 ST0 = tmp;
794 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
796 uint8_t old_flags = save_exception_flags(env);
797 int v;
798 target_ulong mem_ref, mem_end;
799 int64_t val;
800 CPU_LDoubleU temp;
802 temp.d = ST0;
804 val = floatx80_to_int64(ST0, &env->fp_status);
805 mem_ref = ptr;
806 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
807 set_float_exception_flags(float_flag_invalid, &env->fp_status);
808 while (mem_ref < ptr + 7) {
809 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
811 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
812 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
813 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
814 merge_exception_flags(env, old_flags);
815 return;
817 mem_end = mem_ref + 9;
818 if (SIGND(temp)) {
819 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
820 val = -val;
821 } else {
822 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
824 while (mem_ref < mem_end) {
825 if (val == 0) {
826 break;
828 v = val % 100;
829 val = val / 100;
830 v = ((v / 10) << 4) | (v % 10);
831 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
833 while (mem_ref < mem_end) {
834 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
836 merge_exception_flags(env, old_flags);
839 void helper_f2xm1(CPUX86State *env)
841 double val = floatx80_to_double(env, ST0);
843 val = pow(2.0, val) - 1.0;
844 ST0 = double_to_floatx80(env, val);
847 void helper_fyl2x(CPUX86State *env)
849 double fptemp = floatx80_to_double(env, ST0);
851 if (fptemp > 0.0) {
852 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
853 fptemp *= floatx80_to_double(env, ST1);
854 ST1 = double_to_floatx80(env, fptemp);
855 fpop(env);
856 } else {
857 env->fpus &= ~0x4700;
858 env->fpus |= 0x400;
862 void helper_fptan(CPUX86State *env)
864 double fptemp = floatx80_to_double(env, ST0);
866 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
867 env->fpus |= 0x400;
868 } else {
869 fptemp = tan(fptemp);
870 ST0 = double_to_floatx80(env, fptemp);
871 fpush(env);
872 ST0 = floatx80_one;
873 env->fpus &= ~0x400; /* C2 <-- 0 */
874 /* the above code is for |arg| < 2**52 only */
878 void helper_fpatan(CPUX86State *env)
880 double fptemp, fpsrcop;
882 fpsrcop = floatx80_to_double(env, ST1);
883 fptemp = floatx80_to_double(env, ST0);
884 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
885 fpop(env);
888 void helper_fxtract(CPUX86State *env)
890 uint8_t old_flags = save_exception_flags(env);
891 CPU_LDoubleU temp;
893 temp.d = ST0;
895 if (floatx80_is_zero(ST0)) {
896 /* Easy way to generate -inf and raising division by 0 exception */
897 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
898 &env->fp_status);
899 fpush(env);
900 ST0 = temp.d;
901 } else if (floatx80_invalid_encoding(ST0)) {
902 float_raise(float_flag_invalid, &env->fp_status);
903 ST0 = floatx80_default_nan(&env->fp_status);
904 fpush(env);
905 ST0 = ST1;
906 } else if (floatx80_is_any_nan(ST0)) {
907 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
908 float_raise(float_flag_invalid, &env->fp_status);
909 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
911 fpush(env);
912 ST0 = ST1;
913 } else if (floatx80_is_infinity(ST0)) {
914 fpush(env);
915 ST0 = ST1;
916 ST1 = floatx80_infinity;
917 } else {
918 int expdif;
920 if (EXPD(temp) == 0) {
921 int shift = clz64(temp.l.lower);
922 temp.l.lower <<= shift;
923 expdif = 1 - EXPBIAS - shift;
924 float_raise(float_flag_input_denormal, &env->fp_status);
925 } else {
926 expdif = EXPD(temp) - EXPBIAS;
928 /* DP exponent bias */
929 ST0 = int32_to_floatx80(expdif, &env->fp_status);
930 fpush(env);
931 BIASEXPONENT(temp);
932 ST0 = temp.d;
934 merge_exception_flags(env, old_flags);
937 void helper_fprem1(CPUX86State *env)
939 double st0, st1, dblq, fpsrcop, fptemp;
940 CPU_LDoubleU fpsrcop1, fptemp1;
941 int expdif;
942 signed long long int q;
944 st0 = floatx80_to_double(env, ST0);
945 st1 = floatx80_to_double(env, ST1);
947 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
948 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
949 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
950 return;
953 fpsrcop = st0;
954 fptemp = st1;
955 fpsrcop1.d = ST0;
956 fptemp1.d = ST1;
957 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
959 if (expdif < 0) {
960 /* optimisation? taken from the AMD docs */
961 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
962 /* ST0 is unchanged */
963 return;
966 if (expdif < 53) {
967 dblq = fpsrcop / fptemp;
968 /* round dblq towards nearest integer */
969 dblq = rint(dblq);
970 st0 = fpsrcop - fptemp * dblq;
972 /* convert dblq to q by truncating towards zero */
973 if (dblq < 0.0) {
974 q = (signed long long int)(-dblq);
975 } else {
976 q = (signed long long int)dblq;
979 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
980 /* (C0,C3,C1) <-- (q2,q1,q0) */
981 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
982 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
983 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
984 } else {
985 env->fpus |= 0x400; /* C2 <-- 1 */
986 fptemp = pow(2.0, expdif - 50);
987 fpsrcop = (st0 / st1) / fptemp;
988 /* fpsrcop = integer obtained by chopping */
989 fpsrcop = (fpsrcop < 0.0) ?
990 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
991 st0 -= (st1 * fpsrcop * fptemp);
993 ST0 = double_to_floatx80(env, st0);
996 void helper_fprem(CPUX86State *env)
998 double st0, st1, dblq, fpsrcop, fptemp;
999 CPU_LDoubleU fpsrcop1, fptemp1;
1000 int expdif;
1001 signed long long int q;
1003 st0 = floatx80_to_double(env, ST0);
1004 st1 = floatx80_to_double(env, ST1);
1006 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
1007 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
1008 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1009 return;
1012 fpsrcop = st0;
1013 fptemp = st1;
1014 fpsrcop1.d = ST0;
1015 fptemp1.d = ST1;
1016 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
1018 if (expdif < 0) {
1019 /* optimisation? taken from the AMD docs */
1020 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1021 /* ST0 is unchanged */
1022 return;
1025 if (expdif < 53) {
1026 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
1027 /* round dblq towards zero */
1028 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
1029 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
1031 /* convert dblq to q by truncating towards zero */
1032 if (dblq < 0.0) {
1033 q = (signed long long int)(-dblq);
1034 } else {
1035 q = (signed long long int)dblq;
1038 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1039 /* (C0,C3,C1) <-- (q2,q1,q0) */
1040 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
1041 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
1042 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
1043 } else {
1044 int N = 32 + (expdif % 32); /* as per AMD docs */
1046 env->fpus |= 0x400; /* C2 <-- 1 */
1047 fptemp = pow(2.0, (double)(expdif - N));
1048 fpsrcop = (st0 / st1) / fptemp;
1049 /* fpsrcop = integer obtained by chopping */
1050 fpsrcop = (fpsrcop < 0.0) ?
1051 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
1052 st0 -= (st1 * fpsrcop * fptemp);
1054 ST0 = double_to_floatx80(env, st0);
1057 void helper_fyl2xp1(CPUX86State *env)
1059 double fptemp = floatx80_to_double(env, ST0);
1061 if ((fptemp + 1.0) > 0.0) {
1062 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
1063 fptemp *= floatx80_to_double(env, ST1);
1064 ST1 = double_to_floatx80(env, fptemp);
1065 fpop(env);
1066 } else {
1067 env->fpus &= ~0x4700;
1068 env->fpus |= 0x400;
1072 void helper_fsqrt(CPUX86State *env)
1074 uint8_t old_flags = save_exception_flags(env);
1075 if (floatx80_is_neg(ST0)) {
1076 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1077 env->fpus |= 0x400;
1079 ST0 = floatx80_sqrt(ST0, &env->fp_status);
1080 merge_exception_flags(env, old_flags);
1083 void helper_fsincos(CPUX86State *env)
1085 double fptemp = floatx80_to_double(env, ST0);
1087 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1088 env->fpus |= 0x400;
1089 } else {
1090 ST0 = double_to_floatx80(env, sin(fptemp));
1091 fpush(env);
1092 ST0 = double_to_floatx80(env, cos(fptemp));
1093 env->fpus &= ~0x400; /* C2 <-- 0 */
1094 /* the above code is for |arg| < 2**63 only */
1098 void helper_frndint(CPUX86State *env)
1100 uint8_t old_flags = save_exception_flags(env);
1101 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
1102 merge_exception_flags(env, old_flags);
1105 void helper_fscale(CPUX86State *env)
1107 uint8_t old_flags = save_exception_flags(env);
1108 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
1109 float_raise(float_flag_invalid, &env->fp_status);
1110 ST0 = floatx80_default_nan(&env->fp_status);
1111 } else if (floatx80_is_any_nan(ST1)) {
1112 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1113 float_raise(float_flag_invalid, &env->fp_status);
1115 ST0 = ST1;
1116 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1117 float_raise(float_flag_invalid, &env->fp_status);
1118 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1120 } else if (floatx80_is_infinity(ST1) &&
1121 !floatx80_invalid_encoding(ST0) &&
1122 !floatx80_is_any_nan(ST0)) {
1123 if (floatx80_is_neg(ST1)) {
1124 if (floatx80_is_infinity(ST0)) {
1125 float_raise(float_flag_invalid, &env->fp_status);
1126 ST0 = floatx80_default_nan(&env->fp_status);
1127 } else {
1128 ST0 = (floatx80_is_neg(ST0) ?
1129 floatx80_chs(floatx80_zero) :
1130 floatx80_zero);
1132 } else {
1133 if (floatx80_is_zero(ST0)) {
1134 float_raise(float_flag_invalid, &env->fp_status);
1135 ST0 = floatx80_default_nan(&env->fp_status);
1136 } else {
1137 ST0 = (floatx80_is_neg(ST0) ?
1138 floatx80_chs(floatx80_infinity) :
1139 floatx80_infinity);
1142 } else {
1143 int n;
1144 signed char save = env->fp_status.floatx80_rounding_precision;
1145 uint8_t save_flags = get_float_exception_flags(&env->fp_status);
1146 set_float_exception_flags(0, &env->fp_status);
1147 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
1148 set_float_exception_flags(save_flags, &env->fp_status);
1149 env->fp_status.floatx80_rounding_precision = 80;
1150 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
1151 env->fp_status.floatx80_rounding_precision = save;
1153 merge_exception_flags(env, old_flags);
1156 void helper_fsin(CPUX86State *env)
1158 double fptemp = floatx80_to_double(env, ST0);
1160 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1161 env->fpus |= 0x400;
1162 } else {
1163 ST0 = double_to_floatx80(env, sin(fptemp));
1164 env->fpus &= ~0x400; /* C2 <-- 0 */
1165 /* the above code is for |arg| < 2**53 only */
1169 void helper_fcos(CPUX86State *env)
1171 double fptemp = floatx80_to_double(env, ST0);
1173 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1174 env->fpus |= 0x400;
1175 } else {
1176 ST0 = double_to_floatx80(env, cos(fptemp));
1177 env->fpus &= ~0x400; /* C2 <-- 0 */
1178 /* the above code is for |arg| < 2**63 only */
1182 void helper_fxam_ST0(CPUX86State *env)
1184 CPU_LDoubleU temp;
1185 int expdif;
1187 temp.d = ST0;
1189 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1190 if (SIGND(temp)) {
1191 env->fpus |= 0x200; /* C1 <-- 1 */
1194 if (env->fptags[env->fpstt]) {
1195 env->fpus |= 0x4100; /* Empty */
1196 return;
1199 expdif = EXPD(temp);
1200 if (expdif == MAXEXPD) {
1201 if (MANTD(temp) == 0x8000000000000000ULL) {
1202 env->fpus |= 0x500; /* Infinity */
1203 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1204 env->fpus |= 0x100; /* NaN */
1206 } else if (expdif == 0) {
1207 if (MANTD(temp) == 0) {
1208 env->fpus |= 0x4000; /* Zero */
1209 } else {
1210 env->fpus |= 0x4400; /* Denormal */
1212 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1213 env->fpus |= 0x400;
1217 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1218 uintptr_t retaddr)
1220 int fpus, fptag, exp, i;
1221 uint64_t mant;
1222 CPU_LDoubleU tmp;
1224 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1225 fptag = 0;
1226 for (i = 7; i >= 0; i--) {
1227 fptag <<= 2;
1228 if (env->fptags[i]) {
1229 fptag |= 3;
1230 } else {
1231 tmp.d = env->fpregs[i].d;
1232 exp = EXPD(tmp);
1233 mant = MANTD(tmp);
1234 if (exp == 0 && mant == 0) {
1235 /* zero */
1236 fptag |= 1;
1237 } else if (exp == 0 || exp == MAXEXPD
1238 || (mant & (1LL << 63)) == 0) {
1239 /* NaNs, infinity, denormal */
1240 fptag |= 2;
1244 if (data32) {
1245 /* 32 bit */
1246 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1247 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1248 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1249 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1250 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1251 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1252 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1253 } else {
1254 /* 16 bit */
1255 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1256 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1257 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1258 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1259 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1260 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1261 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1265 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1267 do_fstenv(env, ptr, data32, GETPC());
1270 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1272 env->fpstt = (fpus >> 11) & 7;
1273 env->fpus = fpus & ~0x3800 & ~FPUS_B;
1274 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1275 #if !defined(CONFIG_USER_ONLY)
1276 if (!(env->fpus & FPUS_SE)) {
1278 * Here the processor deasserts FERR#; in response, the chipset deasserts
1279 * IGNNE#.
1281 cpu_clear_ignne();
1283 #endif
1286 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1287 uintptr_t retaddr)
1289 int i, fpus, fptag;
1291 if (data32) {
1292 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1293 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1294 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1295 } else {
1296 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1297 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1298 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1300 cpu_set_fpus(env, fpus);
1301 for (i = 0; i < 8; i++) {
1302 env->fptags[i] = ((fptag & 3) == 3);
1303 fptag >>= 2;
1307 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1309 do_fldenv(env, ptr, data32, GETPC());
1312 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1314 floatx80 tmp;
1315 int i;
1317 do_fstenv(env, ptr, data32, GETPC());
1319 ptr += (14 << data32);
1320 for (i = 0; i < 8; i++) {
1321 tmp = ST(i);
1322 helper_fstt(env, tmp, ptr, GETPC());
1323 ptr += 10;
1326 /* fninit */
1327 env->fpus = 0;
1328 env->fpstt = 0;
1329 cpu_set_fpuc(env, 0x37f);
1330 env->fptags[0] = 1;
1331 env->fptags[1] = 1;
1332 env->fptags[2] = 1;
1333 env->fptags[3] = 1;
1334 env->fptags[4] = 1;
1335 env->fptags[5] = 1;
1336 env->fptags[6] = 1;
1337 env->fptags[7] = 1;
1340 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1342 floatx80 tmp;
1343 int i;
1345 do_fldenv(env, ptr, data32, GETPC());
1346 ptr += (14 << data32);
1348 for (i = 0; i < 8; i++) {
1349 tmp = helper_fldt(env, ptr, GETPC());
1350 ST(i) = tmp;
1351 ptr += 10;
1355 #if defined(CONFIG_USER_ONLY)
1356 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1358 helper_fsave(env, ptr, data32);
1361 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1363 helper_frstor(env, ptr, data32);
1365 #endif
1367 #define XO(X) offsetof(X86XSaveArea, X)
1369 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1371 int fpus, fptag, i;
1372 target_ulong addr;
1374 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1375 fptag = 0;
1376 for (i = 0; i < 8; i++) {
1377 fptag |= (env->fptags[i] << i);
1380 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1381 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1382 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1384 /* In 32-bit mode this is eip, sel, dp, sel.
1385 In 64-bit mode this is rip, rdp.
1386 But in either case we don't write actual data, just zeros. */
1387 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1388 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1390 addr = ptr + XO(legacy.fpregs);
1391 for (i = 0; i < 8; i++) {
1392 floatx80 tmp = ST(i);
1393 helper_fstt(env, tmp, addr, ra);
1394 addr += 16;
1398 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1400 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1401 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1404 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1406 int i, nb_xmm_regs;
1407 target_ulong addr;
1409 if (env->hflags & HF_CS64_MASK) {
1410 nb_xmm_regs = 16;
1411 } else {
1412 nb_xmm_regs = 8;
1415 addr = ptr + XO(legacy.xmm_regs);
1416 for (i = 0; i < nb_xmm_regs; i++) {
1417 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1418 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1419 addr += 16;
1423 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1425 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1426 int i;
1428 for (i = 0; i < 4; i++, addr += 16) {
1429 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1430 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1434 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1436 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1437 env->bndcs_regs.cfgu, ra);
1438 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1439 env->bndcs_regs.sts, ra);
1442 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1444 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1447 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1449 uintptr_t ra = GETPC();
1451 /* The operand must be 16 byte aligned */
1452 if (ptr & 0xf) {
1453 raise_exception_ra(env, EXCP0D_GPF, ra);
1456 do_xsave_fpu(env, ptr, ra);
1458 if (env->cr[4] & CR4_OSFXSR_MASK) {
1459 do_xsave_mxcsr(env, ptr, ra);
1460 /* Fast FXSAVE leaves out the XMM registers */
1461 if (!(env->efer & MSR_EFER_FFXSR)
1462 || (env->hflags & HF_CPL_MASK)
1463 || !(env->hflags & HF_LMA_MASK)) {
1464 do_xsave_sse(env, ptr, ra);
1469 static uint64_t get_xinuse(CPUX86State *env)
1471 uint64_t inuse = -1;
1473 /* For the most part, we don't track XINUSE. We could calculate it
1474 here for all components, but it's probably less work to simply
1475 indicate in use. That said, the state of BNDREGS is important
1476 enough to track in HFLAGS, so we might as well use that here. */
1477 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1478 inuse &= ~XSTATE_BNDREGS_MASK;
1480 return inuse;
1483 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1484 uint64_t inuse, uint64_t opt, uintptr_t ra)
1486 uint64_t old_bv, new_bv;
1488 /* The OS must have enabled XSAVE. */
1489 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1490 raise_exception_ra(env, EXCP06_ILLOP, ra);
1493 /* The operand must be 64 byte aligned. */
1494 if (ptr & 63) {
1495 raise_exception_ra(env, EXCP0D_GPF, ra);
1498 /* Never save anything not enabled by XCR0. */
1499 rfbm &= env->xcr0;
1500 opt &= rfbm;
1502 if (opt & XSTATE_FP_MASK) {
1503 do_xsave_fpu(env, ptr, ra);
1505 if (rfbm & XSTATE_SSE_MASK) {
1506 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1507 do_xsave_mxcsr(env, ptr, ra);
1509 if (opt & XSTATE_SSE_MASK) {
1510 do_xsave_sse(env, ptr, ra);
1512 if (opt & XSTATE_BNDREGS_MASK) {
1513 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1515 if (opt & XSTATE_BNDCSR_MASK) {
1516 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1518 if (opt & XSTATE_PKRU_MASK) {
1519 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1522 /* Update the XSTATE_BV field. */
1523 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1524 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1525 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1528 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1530 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1533 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1535 uint64_t inuse = get_xinuse(env);
1536 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1539 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1541 int i, fpuc, fpus, fptag;
1542 target_ulong addr;
1544 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1545 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1546 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1547 cpu_set_fpuc(env, fpuc);
1548 cpu_set_fpus(env, fpus);
1549 fptag ^= 0xff;
1550 for (i = 0; i < 8; i++) {
1551 env->fptags[i] = ((fptag >> i) & 1);
1554 addr = ptr + XO(legacy.fpregs);
1555 for (i = 0; i < 8; i++) {
1556 floatx80 tmp = helper_fldt(env, addr, ra);
1557 ST(i) = tmp;
1558 addr += 16;
1562 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1564 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1567 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1569 int i, nb_xmm_regs;
1570 target_ulong addr;
1572 if (env->hflags & HF_CS64_MASK) {
1573 nb_xmm_regs = 16;
1574 } else {
1575 nb_xmm_regs = 8;
1578 addr = ptr + XO(legacy.xmm_regs);
1579 for (i = 0; i < nb_xmm_regs; i++) {
1580 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1581 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1582 addr += 16;
1586 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1588 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1589 int i;
1591 for (i = 0; i < 4; i++, addr += 16) {
1592 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1593 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1597 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1599 /* FIXME: Extend highest implemented bit of linear address. */
1600 env->bndcs_regs.cfgu
1601 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1602 env->bndcs_regs.sts
1603 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1606 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1608 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1611 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1613 uintptr_t ra = GETPC();
1615 /* The operand must be 16 byte aligned */
1616 if (ptr & 0xf) {
1617 raise_exception_ra(env, EXCP0D_GPF, ra);
1620 do_xrstor_fpu(env, ptr, ra);
1622 if (env->cr[4] & CR4_OSFXSR_MASK) {
1623 do_xrstor_mxcsr(env, ptr, ra);
1624 /* Fast FXRSTOR leaves out the XMM registers */
1625 if (!(env->efer & MSR_EFER_FFXSR)
1626 || (env->hflags & HF_CPL_MASK)
1627 || !(env->hflags & HF_LMA_MASK)) {
1628 do_xrstor_sse(env, ptr, ra);
1633 #if defined(CONFIG_USER_ONLY)
1634 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1636 helper_fxsave(env, ptr);
1639 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1641 helper_fxrstor(env, ptr);
1643 #endif
1645 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1647 uintptr_t ra = GETPC();
1648 uint64_t xstate_bv, xcomp_bv, reserve0;
1650 rfbm &= env->xcr0;
1652 /* The OS must have enabled XSAVE. */
1653 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1654 raise_exception_ra(env, EXCP06_ILLOP, ra);
1657 /* The operand must be 64 byte aligned. */
1658 if (ptr & 63) {
1659 raise_exception_ra(env, EXCP0D_GPF, ra);
1662 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1664 if ((int64_t)xstate_bv < 0) {
1665 /* FIXME: Compact form. */
1666 raise_exception_ra(env, EXCP0D_GPF, ra);
1669 /* Standard form. */
1671 /* The XSTATE_BV field must not set bits not present in XCR0. */
1672 if (xstate_bv & ~env->xcr0) {
1673 raise_exception_ra(env, EXCP0D_GPF, ra);
1676 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1677 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1678 describes only XCOMP_BV, but the description of the standard form
1679 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1680 includes the next 64-bit field. */
1681 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1682 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1683 if (xcomp_bv || reserve0) {
1684 raise_exception_ra(env, EXCP0D_GPF, ra);
1687 if (rfbm & XSTATE_FP_MASK) {
1688 if (xstate_bv & XSTATE_FP_MASK) {
1689 do_xrstor_fpu(env, ptr, ra);
1690 } else {
1691 helper_fninit(env);
1692 memset(env->fpregs, 0, sizeof(env->fpregs));
1695 if (rfbm & XSTATE_SSE_MASK) {
1696 /* Note that the standard form of XRSTOR loads MXCSR from memory
1697 whether or not the XSTATE_BV bit is set. */
1698 do_xrstor_mxcsr(env, ptr, ra);
1699 if (xstate_bv & XSTATE_SSE_MASK) {
1700 do_xrstor_sse(env, ptr, ra);
1701 } else {
1702 /* ??? When AVX is implemented, we may have to be more
1703 selective in the clearing. */
1704 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1707 if (rfbm & XSTATE_BNDREGS_MASK) {
1708 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1709 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1710 env->hflags |= HF_MPX_IU_MASK;
1711 } else {
1712 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1713 env->hflags &= ~HF_MPX_IU_MASK;
1716 if (rfbm & XSTATE_BNDCSR_MASK) {
1717 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1718 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1719 } else {
1720 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1722 cpu_sync_bndcs_hflags(env);
1724 if (rfbm & XSTATE_PKRU_MASK) {
1725 uint64_t old_pkru = env->pkru;
1726 if (xstate_bv & XSTATE_PKRU_MASK) {
1727 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1728 } else {
1729 env->pkru = 0;
1731 if (env->pkru != old_pkru) {
1732 CPUState *cs = env_cpu(env);
1733 tlb_flush(cs);
1738 #undef XO
1740 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1742 /* The OS must have enabled XSAVE. */
1743 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1744 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1747 switch (ecx) {
1748 case 0:
1749 return env->xcr0;
1750 case 1:
1751 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1752 return env->xcr0 & get_xinuse(env);
1754 break;
1756 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1759 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1761 uint32_t dummy, ena_lo, ena_hi;
1762 uint64_t ena;
1764 /* The OS must have enabled XSAVE. */
1765 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1766 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1769 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1770 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1771 goto do_gpf;
1774 /* Disallow enabling unimplemented features. */
1775 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1776 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1777 if (mask & ~ena) {
1778 goto do_gpf;
1781 /* Disallow enabling only half of MPX. */
1782 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1783 & XSTATE_BNDCSR_MASK) {
1784 goto do_gpf;
1787 env->xcr0 = mask;
1788 cpu_sync_bndcs_hflags(env);
1789 return;
1791 do_gpf:
1792 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1795 /* MMX/SSE */
1796 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1798 #define SSE_DAZ 0x0040
1799 #define SSE_RC_MASK 0x6000
1800 #define SSE_RC_NEAR 0x0000
1801 #define SSE_RC_DOWN 0x2000
1802 #define SSE_RC_UP 0x4000
1803 #define SSE_RC_CHOP 0x6000
1804 #define SSE_FZ 0x8000
1806 void update_mxcsr_status(CPUX86State *env)
1808 uint32_t mxcsr = env->mxcsr;
1809 int rnd_type;
1811 /* set rounding mode */
1812 switch (mxcsr & SSE_RC_MASK) {
1813 default:
1814 case SSE_RC_NEAR:
1815 rnd_type = float_round_nearest_even;
1816 break;
1817 case SSE_RC_DOWN:
1818 rnd_type = float_round_down;
1819 break;
1820 case SSE_RC_UP:
1821 rnd_type = float_round_up;
1822 break;
1823 case SSE_RC_CHOP:
1824 rnd_type = float_round_to_zero;
1825 break;
1827 set_float_rounding_mode(rnd_type, &env->sse_status);
1829 /* set denormals are zero */
1830 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1832 /* set flush to zero */
1833 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1836 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1838 cpu_set_mxcsr(env, val);
1841 void helper_enter_mmx(CPUX86State *env)
1843 env->fpstt = 0;
1844 *(uint32_t *)(env->fptags) = 0;
1845 *(uint32_t *)(env->fptags + 4) = 0;
1848 void helper_emms(CPUX86State *env)
1850 /* set to empty state */
1851 *(uint32_t *)(env->fptags) = 0x01010101;
1852 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1855 /* XXX: suppress */
1856 void helper_movq(CPUX86State *env, void *d, void *s)
1858 *(uint64_t *)d = *(uint64_t *)s;
1861 #define SHIFT 0
1862 #include "ops_sse.h"
1864 #define SHIFT 1
1865 #include "ops_sse.h"