s390x: add compat machine for 2.7
[qemu.git] / target-i386 / fpu_helper.c
blobfee5573a10768b97487762bdbb84152bf7f40e51
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/cpu_ldst.h"
27 #define FPU_RC_MASK 0xc00
28 #define FPU_RC_NEAR 0x000
29 #define FPU_RC_DOWN 0x400
30 #define FPU_RC_UP 0x800
31 #define FPU_RC_CHOP 0xc00
33 #define MAXTAN 9223372036854775808.0
35 /* the following deal with x86 long double-precision numbers */
36 #define MAXEXPD 0x7fff
37 #define EXPBIAS 16383
38 #define EXPD(fp) (fp.l.upper & 0x7fff)
39 #define SIGND(fp) ((fp.l.upper) & 0x8000)
40 #define MANTD(fp) (fp.l.lower)
41 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
43 #define FPUS_IE (1 << 0)
44 #define FPUS_DE (1 << 1)
45 #define FPUS_ZE (1 << 2)
46 #define FPUS_OE (1 << 3)
47 #define FPUS_UE (1 << 4)
48 #define FPUS_PE (1 << 5)
49 #define FPUS_SF (1 << 6)
50 #define FPUS_SE (1 << 7)
51 #define FPUS_B (1 << 15)
53 #define FPUC_EM 0x3f
55 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
56 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
57 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
59 static inline void fpush(CPUX86State *env)
61 env->fpstt = (env->fpstt - 1) & 7;
62 env->fptags[env->fpstt] = 0; /* validate stack entry */
65 static inline void fpop(CPUX86State *env)
67 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
68 env->fpstt = (env->fpstt + 1) & 7;
71 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
72 uintptr_t retaddr)
74 CPU_LDoubleU temp;
76 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
77 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
78 return temp.d;
81 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
82 uintptr_t retaddr)
84 CPU_LDoubleU temp;
86 temp.d = f;
87 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
88 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
91 /* x87 FPU helpers */
93 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
95 union {
96 float64 f64;
97 double d;
98 } u;
100 u.f64 = floatx80_to_float64(a, &env->fp_status);
101 return u.d;
104 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
106 union {
107 float64 f64;
108 double d;
109 } u;
111 u.d = a;
112 return float64_to_floatx80(u.f64, &env->fp_status);
115 static void fpu_set_exception(CPUX86State *env, int mask)
117 env->fpus |= mask;
118 if (env->fpus & (~env->fpuc & FPUC_EM)) {
119 env->fpus |= FPUS_SE | FPUS_B;
123 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
125 if (floatx80_is_zero(b)) {
126 fpu_set_exception(env, FPUS_ZE);
128 return floatx80_div(a, b, &env->fp_status);
131 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
133 if (env->cr[0] & CR0_NE_MASK) {
134 raise_exception_ra(env, EXCP10_COPR, retaddr);
136 #if !defined(CONFIG_USER_ONLY)
137 else {
138 cpu_set_ferr(env);
140 #endif
143 void helper_flds_FT0(CPUX86State *env, uint32_t val)
145 union {
146 float32 f;
147 uint32_t i;
148 } u;
150 u.i = val;
151 FT0 = float32_to_floatx80(u.f, &env->fp_status);
154 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
156 union {
157 float64 f;
158 uint64_t i;
159 } u;
161 u.i = val;
162 FT0 = float64_to_floatx80(u.f, &env->fp_status);
165 void helper_fildl_FT0(CPUX86State *env, int32_t val)
167 FT0 = int32_to_floatx80(val, &env->fp_status);
170 void helper_flds_ST0(CPUX86State *env, uint32_t val)
172 int new_fpstt;
173 union {
174 float32 f;
175 uint32_t i;
176 } u;
178 new_fpstt = (env->fpstt - 1) & 7;
179 u.i = val;
180 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
181 env->fpstt = new_fpstt;
182 env->fptags[new_fpstt] = 0; /* validate stack entry */
185 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
187 int new_fpstt;
188 union {
189 float64 f;
190 uint64_t i;
191 } u;
193 new_fpstt = (env->fpstt - 1) & 7;
194 u.i = val;
195 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
196 env->fpstt = new_fpstt;
197 env->fptags[new_fpstt] = 0; /* validate stack entry */
200 void helper_fildl_ST0(CPUX86State *env, int32_t val)
202 int new_fpstt;
204 new_fpstt = (env->fpstt - 1) & 7;
205 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
206 env->fpstt = new_fpstt;
207 env->fptags[new_fpstt] = 0; /* validate stack entry */
210 void helper_fildll_ST0(CPUX86State *env, int64_t val)
212 int new_fpstt;
214 new_fpstt = (env->fpstt - 1) & 7;
215 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
216 env->fpstt = new_fpstt;
217 env->fptags[new_fpstt] = 0; /* validate stack entry */
220 uint32_t helper_fsts_ST0(CPUX86State *env)
222 union {
223 float32 f;
224 uint32_t i;
225 } u;
227 u.f = floatx80_to_float32(ST0, &env->fp_status);
228 return u.i;
231 uint64_t helper_fstl_ST0(CPUX86State *env)
233 union {
234 float64 f;
235 uint64_t i;
236 } u;
238 u.f = floatx80_to_float64(ST0, &env->fp_status);
239 return u.i;
242 int32_t helper_fist_ST0(CPUX86State *env)
244 int32_t val;
246 val = floatx80_to_int32(ST0, &env->fp_status);
247 if (val != (int16_t)val) {
248 val = -32768;
250 return val;
253 int32_t helper_fistl_ST0(CPUX86State *env)
255 int32_t val;
256 signed char old_exp_flags;
258 old_exp_flags = get_float_exception_flags(&env->fp_status);
259 set_float_exception_flags(0, &env->fp_status);
261 val = floatx80_to_int32(ST0, &env->fp_status);
262 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
263 val = 0x80000000;
265 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
266 | old_exp_flags, &env->fp_status);
267 return val;
270 int64_t helper_fistll_ST0(CPUX86State *env)
272 int64_t val;
273 signed char old_exp_flags;
275 old_exp_flags = get_float_exception_flags(&env->fp_status);
276 set_float_exception_flags(0, &env->fp_status);
278 val = floatx80_to_int64(ST0, &env->fp_status);
279 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
280 val = 0x8000000000000000ULL;
282 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
283 | old_exp_flags, &env->fp_status);
284 return val;
287 int32_t helper_fistt_ST0(CPUX86State *env)
289 int32_t val;
291 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
292 if (val != (int16_t)val) {
293 val = -32768;
295 return val;
298 int32_t helper_fisttl_ST0(CPUX86State *env)
300 int32_t val;
302 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
303 return val;
306 int64_t helper_fisttll_ST0(CPUX86State *env)
308 int64_t val;
310 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
311 return val;
314 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
316 int new_fpstt;
318 new_fpstt = (env->fpstt - 1) & 7;
319 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
320 env->fpstt = new_fpstt;
321 env->fptags[new_fpstt] = 0; /* validate stack entry */
324 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
326 helper_fstt(env, ST0, ptr, GETPC());
329 void helper_fpush(CPUX86State *env)
331 fpush(env);
334 void helper_fpop(CPUX86State *env)
336 fpop(env);
339 void helper_fdecstp(CPUX86State *env)
341 env->fpstt = (env->fpstt - 1) & 7;
342 env->fpus &= ~0x4700;
345 void helper_fincstp(CPUX86State *env)
347 env->fpstt = (env->fpstt + 1) & 7;
348 env->fpus &= ~0x4700;
351 /* FPU move */
353 void helper_ffree_STN(CPUX86State *env, int st_index)
355 env->fptags[(env->fpstt + st_index) & 7] = 1;
358 void helper_fmov_ST0_FT0(CPUX86State *env)
360 ST0 = FT0;
363 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
365 FT0 = ST(st_index);
368 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
370 ST0 = ST(st_index);
373 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
375 ST(st_index) = ST0;
378 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
380 floatx80 tmp;
382 tmp = ST(st_index);
383 ST(st_index) = ST0;
384 ST0 = tmp;
387 /* FPU operations */
389 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
391 void helper_fcom_ST0_FT0(CPUX86State *env)
393 int ret;
395 ret = floatx80_compare(ST0, FT0, &env->fp_status);
396 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
399 void helper_fucom_ST0_FT0(CPUX86State *env)
401 int ret;
403 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
404 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
407 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
409 void helper_fcomi_ST0_FT0(CPUX86State *env)
411 int eflags;
412 int ret;
414 ret = floatx80_compare(ST0, FT0, &env->fp_status);
415 eflags = cpu_cc_compute_all(env, CC_OP);
416 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
417 CC_SRC = eflags;
420 void helper_fucomi_ST0_FT0(CPUX86State *env)
422 int eflags;
423 int ret;
425 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
426 eflags = cpu_cc_compute_all(env, CC_OP);
427 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
428 CC_SRC = eflags;
431 void helper_fadd_ST0_FT0(CPUX86State *env)
433 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
436 void helper_fmul_ST0_FT0(CPUX86State *env)
438 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
441 void helper_fsub_ST0_FT0(CPUX86State *env)
443 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
446 void helper_fsubr_ST0_FT0(CPUX86State *env)
448 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
451 void helper_fdiv_ST0_FT0(CPUX86State *env)
453 ST0 = helper_fdiv(env, ST0, FT0);
456 void helper_fdivr_ST0_FT0(CPUX86State *env)
458 ST0 = helper_fdiv(env, FT0, ST0);
461 /* fp operations between STN and ST0 */
463 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
465 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
468 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
470 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
473 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
475 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
478 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
480 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
483 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
485 floatx80 *p;
487 p = &ST(st_index);
488 *p = helper_fdiv(env, *p, ST0);
491 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
493 floatx80 *p;
495 p = &ST(st_index);
496 *p = helper_fdiv(env, ST0, *p);
499 /* misc FPU operations */
500 void helper_fchs_ST0(CPUX86State *env)
502 ST0 = floatx80_chs(ST0);
505 void helper_fabs_ST0(CPUX86State *env)
507 ST0 = floatx80_abs(ST0);
510 void helper_fld1_ST0(CPUX86State *env)
512 ST0 = floatx80_one;
515 void helper_fldl2t_ST0(CPUX86State *env)
517 ST0 = floatx80_l2t;
520 void helper_fldl2e_ST0(CPUX86State *env)
522 ST0 = floatx80_l2e;
525 void helper_fldpi_ST0(CPUX86State *env)
527 ST0 = floatx80_pi;
530 void helper_fldlg2_ST0(CPUX86State *env)
532 ST0 = floatx80_lg2;
535 void helper_fldln2_ST0(CPUX86State *env)
537 ST0 = floatx80_ln2;
540 void helper_fldz_ST0(CPUX86State *env)
542 ST0 = floatx80_zero;
545 void helper_fldz_FT0(CPUX86State *env)
547 FT0 = floatx80_zero;
550 uint32_t helper_fnstsw(CPUX86State *env)
552 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
555 uint32_t helper_fnstcw(CPUX86State *env)
557 return env->fpuc;
560 void update_fp_status(CPUX86State *env)
562 int rnd_type;
564 /* set rounding mode */
565 switch (env->fpuc & FPU_RC_MASK) {
566 default:
567 case FPU_RC_NEAR:
568 rnd_type = float_round_nearest_even;
569 break;
570 case FPU_RC_DOWN:
571 rnd_type = float_round_down;
572 break;
573 case FPU_RC_UP:
574 rnd_type = float_round_up;
575 break;
576 case FPU_RC_CHOP:
577 rnd_type = float_round_to_zero;
578 break;
580 set_float_rounding_mode(rnd_type, &env->fp_status);
581 switch ((env->fpuc >> 8) & 3) {
582 case 0:
583 rnd_type = 32;
584 break;
585 case 2:
586 rnd_type = 64;
587 break;
588 case 3:
589 default:
590 rnd_type = 80;
591 break;
593 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
596 void helper_fldcw(CPUX86State *env, uint32_t val)
598 cpu_set_fpuc(env, val);
601 void helper_fclex(CPUX86State *env)
603 env->fpus &= 0x7f00;
606 void helper_fwait(CPUX86State *env)
608 if (env->fpus & FPUS_SE) {
609 fpu_raise_exception(env, GETPC());
613 void helper_fninit(CPUX86State *env)
615 env->fpus = 0;
616 env->fpstt = 0;
617 cpu_set_fpuc(env, 0x37f);
618 env->fptags[0] = 1;
619 env->fptags[1] = 1;
620 env->fptags[2] = 1;
621 env->fptags[3] = 1;
622 env->fptags[4] = 1;
623 env->fptags[5] = 1;
624 env->fptags[6] = 1;
625 env->fptags[7] = 1;
628 /* BCD ops */
630 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
632 floatx80 tmp;
633 uint64_t val;
634 unsigned int v;
635 int i;
637 val = 0;
638 for (i = 8; i >= 0; i--) {
639 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
640 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
642 tmp = int64_to_floatx80(val, &env->fp_status);
643 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
644 tmp = floatx80_chs(tmp);
646 fpush(env);
647 ST0 = tmp;
650 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
652 int v;
653 target_ulong mem_ref, mem_end;
654 int64_t val;
656 val = floatx80_to_int64(ST0, &env->fp_status);
657 mem_ref = ptr;
658 mem_end = mem_ref + 9;
659 if (val < 0) {
660 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
661 val = -val;
662 } else {
663 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
665 while (mem_ref < mem_end) {
666 if (val == 0) {
667 break;
669 v = val % 100;
670 val = val / 100;
671 v = ((v / 10) << 4) | (v % 10);
672 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
674 while (mem_ref < mem_end) {
675 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
679 void helper_f2xm1(CPUX86State *env)
681 double val = floatx80_to_double(env, ST0);
683 val = pow(2.0, val) - 1.0;
684 ST0 = double_to_floatx80(env, val);
687 void helper_fyl2x(CPUX86State *env)
689 double fptemp = floatx80_to_double(env, ST0);
691 if (fptemp > 0.0) {
692 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
693 fptemp *= floatx80_to_double(env, ST1);
694 ST1 = double_to_floatx80(env, fptemp);
695 fpop(env);
696 } else {
697 env->fpus &= ~0x4700;
698 env->fpus |= 0x400;
702 void helper_fptan(CPUX86State *env)
704 double fptemp = floatx80_to_double(env, ST0);
706 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
707 env->fpus |= 0x400;
708 } else {
709 fptemp = tan(fptemp);
710 ST0 = double_to_floatx80(env, fptemp);
711 fpush(env);
712 ST0 = floatx80_one;
713 env->fpus &= ~0x400; /* C2 <-- 0 */
714 /* the above code is for |arg| < 2**52 only */
718 void helper_fpatan(CPUX86State *env)
720 double fptemp, fpsrcop;
722 fpsrcop = floatx80_to_double(env, ST1);
723 fptemp = floatx80_to_double(env, ST0);
724 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
725 fpop(env);
728 void helper_fxtract(CPUX86State *env)
730 CPU_LDoubleU temp;
732 temp.d = ST0;
734 if (floatx80_is_zero(ST0)) {
735 /* Easy way to generate -inf and raising division by 0 exception */
736 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
737 &env->fp_status);
738 fpush(env);
739 ST0 = temp.d;
740 } else {
741 int expdif;
743 expdif = EXPD(temp) - EXPBIAS;
744 /* DP exponent bias */
745 ST0 = int32_to_floatx80(expdif, &env->fp_status);
746 fpush(env);
747 BIASEXPONENT(temp);
748 ST0 = temp.d;
752 void helper_fprem1(CPUX86State *env)
754 double st0, st1, dblq, fpsrcop, fptemp;
755 CPU_LDoubleU fpsrcop1, fptemp1;
756 int expdif;
757 signed long long int q;
759 st0 = floatx80_to_double(env, ST0);
760 st1 = floatx80_to_double(env, ST1);
762 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
763 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
764 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
765 return;
768 fpsrcop = st0;
769 fptemp = st1;
770 fpsrcop1.d = ST0;
771 fptemp1.d = ST1;
772 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
774 if (expdif < 0) {
775 /* optimisation? taken from the AMD docs */
776 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
777 /* ST0 is unchanged */
778 return;
781 if (expdif < 53) {
782 dblq = fpsrcop / fptemp;
783 /* round dblq towards nearest integer */
784 dblq = rint(dblq);
785 st0 = fpsrcop - fptemp * dblq;
787 /* convert dblq to q by truncating towards zero */
788 if (dblq < 0.0) {
789 q = (signed long long int)(-dblq);
790 } else {
791 q = (signed long long int)dblq;
794 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
795 /* (C0,C3,C1) <-- (q2,q1,q0) */
796 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
797 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
798 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
799 } else {
800 env->fpus |= 0x400; /* C2 <-- 1 */
801 fptemp = pow(2.0, expdif - 50);
802 fpsrcop = (st0 / st1) / fptemp;
803 /* fpsrcop = integer obtained by chopping */
804 fpsrcop = (fpsrcop < 0.0) ?
805 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
806 st0 -= (st1 * fpsrcop * fptemp);
808 ST0 = double_to_floatx80(env, st0);
811 void helper_fprem(CPUX86State *env)
813 double st0, st1, dblq, fpsrcop, fptemp;
814 CPU_LDoubleU fpsrcop1, fptemp1;
815 int expdif;
816 signed long long int q;
818 st0 = floatx80_to_double(env, ST0);
819 st1 = floatx80_to_double(env, ST1);
821 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
822 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
823 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
824 return;
827 fpsrcop = st0;
828 fptemp = st1;
829 fpsrcop1.d = ST0;
830 fptemp1.d = ST1;
831 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
833 if (expdif < 0) {
834 /* optimisation? taken from the AMD docs */
835 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
836 /* ST0 is unchanged */
837 return;
840 if (expdif < 53) {
841 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
842 /* round dblq towards zero */
843 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
844 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
846 /* convert dblq to q by truncating towards zero */
847 if (dblq < 0.0) {
848 q = (signed long long int)(-dblq);
849 } else {
850 q = (signed long long int)dblq;
853 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
854 /* (C0,C3,C1) <-- (q2,q1,q0) */
855 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
856 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
857 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
858 } else {
859 int N = 32 + (expdif % 32); /* as per AMD docs */
861 env->fpus |= 0x400; /* C2 <-- 1 */
862 fptemp = pow(2.0, (double)(expdif - N));
863 fpsrcop = (st0 / st1) / fptemp;
864 /* fpsrcop = integer obtained by chopping */
865 fpsrcop = (fpsrcop < 0.0) ?
866 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
867 st0 -= (st1 * fpsrcop * fptemp);
869 ST0 = double_to_floatx80(env, st0);
872 void helper_fyl2xp1(CPUX86State *env)
874 double fptemp = floatx80_to_double(env, ST0);
876 if ((fptemp + 1.0) > 0.0) {
877 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
878 fptemp *= floatx80_to_double(env, ST1);
879 ST1 = double_to_floatx80(env, fptemp);
880 fpop(env);
881 } else {
882 env->fpus &= ~0x4700;
883 env->fpus |= 0x400;
887 void helper_fsqrt(CPUX86State *env)
889 if (floatx80_is_neg(ST0)) {
890 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
891 env->fpus |= 0x400;
893 ST0 = floatx80_sqrt(ST0, &env->fp_status);
896 void helper_fsincos(CPUX86State *env)
898 double fptemp = floatx80_to_double(env, ST0);
900 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
901 env->fpus |= 0x400;
902 } else {
903 ST0 = double_to_floatx80(env, sin(fptemp));
904 fpush(env);
905 ST0 = double_to_floatx80(env, cos(fptemp));
906 env->fpus &= ~0x400; /* C2 <-- 0 */
907 /* the above code is for |arg| < 2**63 only */
911 void helper_frndint(CPUX86State *env)
913 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
916 void helper_fscale(CPUX86State *env)
918 if (floatx80_is_any_nan(ST1)) {
919 ST0 = ST1;
920 } else {
921 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
922 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
926 void helper_fsin(CPUX86State *env)
928 double fptemp = floatx80_to_double(env, ST0);
930 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
931 env->fpus |= 0x400;
932 } else {
933 ST0 = double_to_floatx80(env, sin(fptemp));
934 env->fpus &= ~0x400; /* C2 <-- 0 */
935 /* the above code is for |arg| < 2**53 only */
939 void helper_fcos(CPUX86State *env)
941 double fptemp = floatx80_to_double(env, ST0);
943 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
944 env->fpus |= 0x400;
945 } else {
946 ST0 = double_to_floatx80(env, cos(fptemp));
947 env->fpus &= ~0x400; /* C2 <-- 0 */
948 /* the above code is for |arg| < 2**63 only */
952 void helper_fxam_ST0(CPUX86State *env)
954 CPU_LDoubleU temp;
955 int expdif;
957 temp.d = ST0;
959 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
960 if (SIGND(temp)) {
961 env->fpus |= 0x200; /* C1 <-- 1 */
964 /* XXX: test fptags too */
965 expdif = EXPD(temp);
966 if (expdif == MAXEXPD) {
967 if (MANTD(temp) == 0x8000000000000000ULL) {
968 env->fpus |= 0x500; /* Infinity */
969 } else {
970 env->fpus |= 0x100; /* NaN */
972 } else if (expdif == 0) {
973 if (MANTD(temp) == 0) {
974 env->fpus |= 0x4000; /* Zero */
975 } else {
976 env->fpus |= 0x4400; /* Denormal */
978 } else {
979 env->fpus |= 0x400;
983 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
984 uintptr_t retaddr)
986 int fpus, fptag, exp, i;
987 uint64_t mant;
988 CPU_LDoubleU tmp;
990 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
991 fptag = 0;
992 for (i = 7; i >= 0; i--) {
993 fptag <<= 2;
994 if (env->fptags[i]) {
995 fptag |= 3;
996 } else {
997 tmp.d = env->fpregs[i].d;
998 exp = EXPD(tmp);
999 mant = MANTD(tmp);
1000 if (exp == 0 && mant == 0) {
1001 /* zero */
1002 fptag |= 1;
1003 } else if (exp == 0 || exp == MAXEXPD
1004 || (mant & (1LL << 63)) == 0) {
1005 /* NaNs, infinity, denormal */
1006 fptag |= 2;
1010 if (data32) {
1011 /* 32 bit */
1012 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1013 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1014 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1015 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1016 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1017 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1018 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1019 } else {
1020 /* 16 bit */
1021 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1022 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1023 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1024 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1025 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1026 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1027 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1031 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1033 do_fstenv(env, ptr, data32, GETPC());
1036 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1037 uintptr_t retaddr)
1039 int i, fpus, fptag;
1041 if (data32) {
1042 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1045 } else {
1046 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1047 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1048 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1050 env->fpstt = (fpus >> 11) & 7;
1051 env->fpus = fpus & ~0x3800;
1052 for (i = 0; i < 8; i++) {
1053 env->fptags[i] = ((fptag & 3) == 3);
1054 fptag >>= 2;
1058 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1060 do_fldenv(env, ptr, data32, GETPC());
1063 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1065 floatx80 tmp;
1066 int i;
1068 do_fstenv(env, ptr, data32, GETPC());
1070 ptr += (14 << data32);
1071 for (i = 0; i < 8; i++) {
1072 tmp = ST(i);
1073 helper_fstt(env, tmp, ptr, GETPC());
1074 ptr += 10;
1077 /* fninit */
1078 env->fpus = 0;
1079 env->fpstt = 0;
1080 cpu_set_fpuc(env, 0x37f);
1081 env->fptags[0] = 1;
1082 env->fptags[1] = 1;
1083 env->fptags[2] = 1;
1084 env->fptags[3] = 1;
1085 env->fptags[4] = 1;
1086 env->fptags[5] = 1;
1087 env->fptags[6] = 1;
1088 env->fptags[7] = 1;
1091 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1093 floatx80 tmp;
1094 int i;
1096 do_fldenv(env, ptr, data32, GETPC());
1097 ptr += (14 << data32);
1099 for (i = 0; i < 8; i++) {
1100 tmp = helper_fldt(env, ptr, GETPC());
1101 ST(i) = tmp;
1102 ptr += 10;
1106 #if defined(CONFIG_USER_ONLY)
1107 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1109 helper_fsave(env, ptr, data32);
1112 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1114 helper_frstor(env, ptr, data32);
1116 #endif
1118 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1120 int fpus, fptag, i;
1121 target_ulong addr;
1123 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1124 fptag = 0;
1125 for (i = 0; i < 8; i++) {
1126 fptag |= (env->fptags[i] << i);
1128 cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1129 cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1130 cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1132 /* In 32-bit mode this is eip, sel, dp, sel.
1133 In 64-bit mode this is rip, rdp.
1134 But in either case we don't write actual data, just zeros. */
1135 cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1136 cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
1138 addr = ptr + 0x20;
1139 for (i = 0; i < 8; i++) {
1140 floatx80 tmp = ST(i);
1141 helper_fstt(env, tmp, addr, ra);
1142 addr += 16;
1146 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1148 cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1149 cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1152 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1154 int i, nb_xmm_regs;
1155 target_ulong addr;
1157 if (env->hflags & HF_CS64_MASK) {
1158 nb_xmm_regs = 16;
1159 } else {
1160 nb_xmm_regs = 8;
1163 addr = ptr + 0xa0;
1164 for (i = 0; i < nb_xmm_regs; i++) {
1165 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1166 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1167 addr += 16;
1171 static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1173 int i;
1175 for (i = 0; i < 4; i++, addr += 16) {
1176 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1181 static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1183 cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1184 cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1187 static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1189 cpu_stq_data_ra(env, addr, env->pkru, ra);
1192 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1194 uintptr_t ra = GETPC();
1196 /* The operand must be 16 byte aligned */
1197 if (ptr & 0xf) {
1198 raise_exception_ra(env, EXCP0D_GPF, ra);
1201 do_xsave_fpu(env, ptr, ra);
1203 if (env->cr[4] & CR4_OSFXSR_MASK) {
1204 do_xsave_mxcsr(env, ptr, ra);
1205 /* Fast FXSAVE leaves out the XMM registers */
1206 if (!(env->efer & MSR_EFER_FFXSR)
1207 || (env->hflags & HF_CPL_MASK)
1208 || !(env->hflags & HF_LMA_MASK)) {
1209 do_xsave_sse(env, ptr, ra);
1214 static uint64_t get_xinuse(CPUX86State *env)
1216 uint64_t inuse = -1;
1218 /* For the most part, we don't track XINUSE. We could calculate it
1219 here for all components, but it's probably less work to simply
1220 indicate in use. That said, the state of BNDREGS is important
1221 enough to track in HFLAGS, so we might as well use that here. */
1222 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1223 inuse &= ~XSTATE_BNDREGS_MASK;
1225 return inuse;
1228 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1229 uint64_t inuse, uint64_t opt, uintptr_t ra)
1231 uint64_t old_bv, new_bv;
1233 /* The OS must have enabled XSAVE. */
1234 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1235 raise_exception_ra(env, EXCP06_ILLOP, ra);
1238 /* The operand must be 64 byte aligned. */
1239 if (ptr & 63) {
1240 raise_exception_ra(env, EXCP0D_GPF, ra);
1243 /* Never save anything not enabled by XCR0. */
1244 rfbm &= env->xcr0;
1245 opt &= rfbm;
1247 if (opt & XSTATE_FP_MASK) {
1248 do_xsave_fpu(env, ptr, ra);
1250 if (rfbm & XSTATE_SSE_MASK) {
1251 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1252 do_xsave_mxcsr(env, ptr, ra);
1254 if (opt & XSTATE_SSE_MASK) {
1255 do_xsave_sse(env, ptr, ra);
1257 if (opt & XSTATE_BNDREGS_MASK) {
1258 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1259 do_xsave_bndregs(env, ptr + off, ra);
1261 if (opt & XSTATE_BNDCSR_MASK) {
1262 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1263 do_xsave_bndcsr(env, ptr + off, ra);
1265 if (opt & XSTATE_PKRU_MASK) {
1266 target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1267 do_xsave_pkru(env, ptr + off, ra);
1270 /* Update the XSTATE_BV field. */
1271 old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1272 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1273 cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1276 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1278 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1281 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1283 uint64_t inuse = get_xinuse(env);
1284 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1287 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1289 int i, fpus, fptag;
1290 target_ulong addr;
1292 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1293 fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1294 fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
1295 env->fpstt = (fpus >> 11) & 7;
1296 env->fpus = fpus & ~0x3800;
1297 fptag ^= 0xff;
1298 for (i = 0; i < 8; i++) {
1299 env->fptags[i] = ((fptag >> i) & 1);
1302 addr = ptr + 0x20;
1303 for (i = 0; i < 8; i++) {
1304 floatx80 tmp = helper_fldt(env, addr, ra);
1305 ST(i) = tmp;
1306 addr += 16;
1310 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1312 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1315 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1317 int i, nb_xmm_regs;
1318 target_ulong addr;
1320 if (env->hflags & HF_CS64_MASK) {
1321 nb_xmm_regs = 16;
1322 } else {
1323 nb_xmm_regs = 8;
1326 addr = ptr + 0xa0;
1327 for (i = 0; i < nb_xmm_regs; i++) {
1328 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1329 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1330 addr += 16;
1334 static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1336 int i;
1338 for (i = 0; i < 4; i++, addr += 16) {
1339 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1340 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1344 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1346 /* FIXME: Extend highest implemented bit of linear address. */
1347 env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1348 env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1351 static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1353 env->pkru = cpu_ldq_data_ra(env, addr, ra);
1356 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1358 uintptr_t ra = GETPC();
1360 /* The operand must be 16 byte aligned */
1361 if (ptr & 0xf) {
1362 raise_exception_ra(env, EXCP0D_GPF, ra);
1365 do_xrstor_fpu(env, ptr, ra);
1367 if (env->cr[4] & CR4_OSFXSR_MASK) {
1368 do_xrstor_mxcsr(env, ptr, ra);
1369 /* Fast FXRSTOR leaves out the XMM registers */
1370 if (!(env->efer & MSR_EFER_FFXSR)
1371 || (env->hflags & HF_CPL_MASK)
1372 || !(env->hflags & HF_LMA_MASK)) {
1373 do_xrstor_sse(env, ptr, ra);
1378 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1380 uintptr_t ra = GETPC();
1381 uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1383 rfbm &= env->xcr0;
1385 /* The OS must have enabled XSAVE. */
1386 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1387 raise_exception_ra(env, EXCP06_ILLOP, ra);
1390 /* The operand must be 64 byte aligned. */
1391 if (ptr & 63) {
1392 raise_exception_ra(env, EXCP0D_GPF, ra);
1395 xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1397 if ((int64_t)xstate_bv < 0) {
1398 /* FIXME: Compact form. */
1399 raise_exception_ra(env, EXCP0D_GPF, ra);
1402 /* Standard form. */
1404 /* The XSTATE field must not set bits not present in XCR0. */
1405 if (xstate_bv & ~env->xcr0) {
1406 raise_exception_ra(env, EXCP0D_GPF, ra);
1409 /* The XCOMP field must be zero. */
1410 xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1411 xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1412 if (xcomp_bv0 || xcomp_bv1) {
1413 raise_exception_ra(env, EXCP0D_GPF, ra);
1416 if (rfbm & XSTATE_FP_MASK) {
1417 if (xstate_bv & XSTATE_FP_MASK) {
1418 do_xrstor_fpu(env, ptr, ra);
1419 } else {
1420 helper_fninit(env);
1421 memset(env->fpregs, 0, sizeof(env->fpregs));
1424 if (rfbm & XSTATE_SSE_MASK) {
1425 /* Note that the standard form of XRSTOR loads MXCSR from memory
1426 whether or not the XSTATE_BV bit is set. */
1427 do_xrstor_mxcsr(env, ptr, ra);
1428 if (xstate_bv & XSTATE_SSE_MASK) {
1429 do_xrstor_sse(env, ptr, ra);
1430 } else {
1431 /* ??? When AVX is implemented, we may have to be more
1432 selective in the clearing. */
1433 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1436 if (rfbm & XSTATE_BNDREGS_MASK) {
1437 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1438 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1439 do_xrstor_bndregs(env, ptr + off, ra);
1440 env->hflags |= HF_MPX_IU_MASK;
1441 } else {
1442 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1443 env->hflags &= ~HF_MPX_IU_MASK;
1446 if (rfbm & XSTATE_BNDCSR_MASK) {
1447 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1448 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1449 do_xrstor_bndcsr(env, ptr + off, ra);
1450 } else {
1451 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1453 cpu_sync_bndcs_hflags(env);
1455 if (rfbm & XSTATE_PKRU_MASK) {
1456 uint64_t old_pkru = env->pkru;
1457 if (xstate_bv & XSTATE_PKRU_MASK) {
1458 target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1459 do_xrstor_pkru(env, ptr + off, ra);
1460 } else {
1461 env->pkru = 0;
1463 if (env->pkru != old_pkru) {
1464 CPUState *cs = CPU(x86_env_get_cpu(env));
1465 tlb_flush(cs, 1);
1470 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1472 /* The OS must have enabled XSAVE. */
1473 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1474 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1477 switch (ecx) {
1478 case 0:
1479 return env->xcr0;
1480 case 1:
1481 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1482 return env->xcr0 & get_xinuse(env);
1484 break;
1486 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1489 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1491 uint32_t dummy, ena_lo, ena_hi;
1492 uint64_t ena;
1494 /* The OS must have enabled XSAVE. */
1495 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1496 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1499 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1500 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1501 goto do_gpf;
1504 /* Disallow enabling unimplemented features. */
1505 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1506 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1507 if (mask & ~ena) {
1508 goto do_gpf;
1511 /* Disallow enabling only half of MPX. */
1512 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1513 & XSTATE_BNDCSR_MASK) {
1514 goto do_gpf;
1517 env->xcr0 = mask;
1518 cpu_sync_bndcs_hflags(env);
1519 return;
1521 do_gpf:
1522 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1525 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1527 CPU_LDoubleU temp;
1529 temp.d = f;
1530 *pmant = temp.l.lower;
1531 *pexp = temp.l.upper;
1534 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1536 CPU_LDoubleU temp;
1538 temp.l.upper = upper;
1539 temp.l.lower = mant;
1540 return temp.d;
1543 /* MMX/SSE */
1544 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1546 #define SSE_DAZ 0x0040
1547 #define SSE_RC_MASK 0x6000
1548 #define SSE_RC_NEAR 0x0000
1549 #define SSE_RC_DOWN 0x2000
1550 #define SSE_RC_UP 0x4000
1551 #define SSE_RC_CHOP 0x6000
1552 #define SSE_FZ 0x8000
1554 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1556 int rnd_type;
1558 env->mxcsr = mxcsr;
1560 /* set rounding mode */
1561 switch (mxcsr & SSE_RC_MASK) {
1562 default:
1563 case SSE_RC_NEAR:
1564 rnd_type = float_round_nearest_even;
1565 break;
1566 case SSE_RC_DOWN:
1567 rnd_type = float_round_down;
1568 break;
1569 case SSE_RC_UP:
1570 rnd_type = float_round_up;
1571 break;
1572 case SSE_RC_CHOP:
1573 rnd_type = float_round_to_zero;
1574 break;
1576 set_float_rounding_mode(rnd_type, &env->sse_status);
1578 /* set denormals are zero */
1579 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1581 /* set flush to zero */
1582 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1585 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1587 env->fpuc = val;
1588 update_fp_status(env);
1591 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1593 cpu_set_mxcsr(env, val);
1596 void helper_enter_mmx(CPUX86State *env)
1598 env->fpstt = 0;
1599 *(uint32_t *)(env->fptags) = 0;
1600 *(uint32_t *)(env->fptags + 4) = 0;
1603 void helper_emms(CPUX86State *env)
1605 /* set to empty state */
1606 *(uint32_t *)(env->fptags) = 0x01010101;
1607 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1610 /* XXX: suppress */
1611 void helper_movq(CPUX86State *env, void *d, void *s)
1613 *(uint64_t *)d = *(uint64_t *)s;
1616 #define SHIFT 0
1617 #include "ops_sse.h"
1619 #define SHIFT 1
1620 #include "ops_sse.h"