Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160607' into staging
[qemu.git] / target-i386 / fpu_helper.c
blob206e60fdf5f647f868de64dae9b095a8a2386aef
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
28 #define FPU_RC_MASK 0xc00
29 #define FPU_RC_NEAR 0x000
30 #define FPU_RC_DOWN 0x400
31 #define FPU_RC_UP 0x800
32 #define FPU_RC_CHOP 0xc00
34 #define MAXTAN 9223372036854775808.0
36 /* the following deal with x86 long double-precision numbers */
37 #define MAXEXPD 0x7fff
38 #define EXPBIAS 16383
39 #define EXPD(fp) (fp.l.upper & 0x7fff)
40 #define SIGND(fp) ((fp.l.upper) & 0x8000)
41 #define MANTD(fp) (fp.l.lower)
42 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
44 #define FPUS_IE (1 << 0)
45 #define FPUS_DE (1 << 1)
46 #define FPUS_ZE (1 << 2)
47 #define FPUS_OE (1 << 3)
48 #define FPUS_UE (1 << 4)
49 #define FPUS_PE (1 << 5)
50 #define FPUS_SF (1 << 6)
51 #define FPUS_SE (1 << 7)
52 #define FPUS_B (1 << 15)
54 #define FPUC_EM 0x3f
56 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
57 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
58 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
60 static inline void fpush(CPUX86State *env)
62 env->fpstt = (env->fpstt - 1) & 7;
63 env->fptags[env->fpstt] = 0; /* validate stack entry */
66 static inline void fpop(CPUX86State *env)
68 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
69 env->fpstt = (env->fpstt + 1) & 7;
72 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
73 uintptr_t retaddr)
75 CPU_LDoubleU temp;
77 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
78 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
79 return temp.d;
82 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
83 uintptr_t retaddr)
85 CPU_LDoubleU temp;
87 temp.d = f;
88 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
89 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
92 /* x87 FPU helpers */
94 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
96 union {
97 float64 f64;
98 double d;
99 } u;
101 u.f64 = floatx80_to_float64(a, &env->fp_status);
102 return u.d;
105 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
107 union {
108 float64 f64;
109 double d;
110 } u;
112 u.d = a;
113 return float64_to_floatx80(u.f64, &env->fp_status);
116 static void fpu_set_exception(CPUX86State *env, int mask)
118 env->fpus |= mask;
119 if (env->fpus & (~env->fpuc & FPUC_EM)) {
120 env->fpus |= FPUS_SE | FPUS_B;
124 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
126 if (floatx80_is_zero(b)) {
127 fpu_set_exception(env, FPUS_ZE);
129 return floatx80_div(a, b, &env->fp_status);
132 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
134 if (env->cr[0] & CR0_NE_MASK) {
135 raise_exception_ra(env, EXCP10_COPR, retaddr);
137 #if !defined(CONFIG_USER_ONLY)
138 else {
139 cpu_set_ferr(env);
141 #endif
144 void helper_flds_FT0(CPUX86State *env, uint32_t val)
146 union {
147 float32 f;
148 uint32_t i;
149 } u;
151 u.i = val;
152 FT0 = float32_to_floatx80(u.f, &env->fp_status);
155 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
157 union {
158 float64 f;
159 uint64_t i;
160 } u;
162 u.i = val;
163 FT0 = float64_to_floatx80(u.f, &env->fp_status);
166 void helper_fildl_FT0(CPUX86State *env, int32_t val)
168 FT0 = int32_to_floatx80(val, &env->fp_status);
171 void helper_flds_ST0(CPUX86State *env, uint32_t val)
173 int new_fpstt;
174 union {
175 float32 f;
176 uint32_t i;
177 } u;
179 new_fpstt = (env->fpstt - 1) & 7;
180 u.i = val;
181 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
182 env->fpstt = new_fpstt;
183 env->fptags[new_fpstt] = 0; /* validate stack entry */
186 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
188 int new_fpstt;
189 union {
190 float64 f;
191 uint64_t i;
192 } u;
194 new_fpstt = (env->fpstt - 1) & 7;
195 u.i = val;
196 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
197 env->fpstt = new_fpstt;
198 env->fptags[new_fpstt] = 0; /* validate stack entry */
201 void helper_fildl_ST0(CPUX86State *env, int32_t val)
203 int new_fpstt;
205 new_fpstt = (env->fpstt - 1) & 7;
206 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
207 env->fpstt = new_fpstt;
208 env->fptags[new_fpstt] = 0; /* validate stack entry */
211 void helper_fildll_ST0(CPUX86State *env, int64_t val)
213 int new_fpstt;
215 new_fpstt = (env->fpstt - 1) & 7;
216 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
217 env->fpstt = new_fpstt;
218 env->fptags[new_fpstt] = 0; /* validate stack entry */
221 uint32_t helper_fsts_ST0(CPUX86State *env)
223 union {
224 float32 f;
225 uint32_t i;
226 } u;
228 u.f = floatx80_to_float32(ST0, &env->fp_status);
229 return u.i;
232 uint64_t helper_fstl_ST0(CPUX86State *env)
234 union {
235 float64 f;
236 uint64_t i;
237 } u;
239 u.f = floatx80_to_float64(ST0, &env->fp_status);
240 return u.i;
243 int32_t helper_fist_ST0(CPUX86State *env)
245 int32_t val;
247 val = floatx80_to_int32(ST0, &env->fp_status);
248 if (val != (int16_t)val) {
249 val = -32768;
251 return val;
254 int32_t helper_fistl_ST0(CPUX86State *env)
256 int32_t val;
257 signed char old_exp_flags;
259 old_exp_flags = get_float_exception_flags(&env->fp_status);
260 set_float_exception_flags(0, &env->fp_status);
262 val = floatx80_to_int32(ST0, &env->fp_status);
263 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
264 val = 0x80000000;
266 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
267 | old_exp_flags, &env->fp_status);
268 return val;
271 int64_t helper_fistll_ST0(CPUX86State *env)
273 int64_t val;
274 signed char old_exp_flags;
276 old_exp_flags = get_float_exception_flags(&env->fp_status);
277 set_float_exception_flags(0, &env->fp_status);
279 val = floatx80_to_int64(ST0, &env->fp_status);
280 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
281 val = 0x8000000000000000ULL;
283 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
284 | old_exp_flags, &env->fp_status);
285 return val;
288 int32_t helper_fistt_ST0(CPUX86State *env)
290 int32_t val;
292 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
293 if (val != (int16_t)val) {
294 val = -32768;
296 return val;
299 int32_t helper_fisttl_ST0(CPUX86State *env)
301 int32_t val;
303 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
304 return val;
307 int64_t helper_fisttll_ST0(CPUX86State *env)
309 int64_t val;
311 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
312 return val;
315 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
317 int new_fpstt;
319 new_fpstt = (env->fpstt - 1) & 7;
320 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
321 env->fpstt = new_fpstt;
322 env->fptags[new_fpstt] = 0; /* validate stack entry */
325 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
327 helper_fstt(env, ST0, ptr, GETPC());
330 void helper_fpush(CPUX86State *env)
332 fpush(env);
335 void helper_fpop(CPUX86State *env)
337 fpop(env);
340 void helper_fdecstp(CPUX86State *env)
342 env->fpstt = (env->fpstt - 1) & 7;
343 env->fpus &= ~0x4700;
346 void helper_fincstp(CPUX86State *env)
348 env->fpstt = (env->fpstt + 1) & 7;
349 env->fpus &= ~0x4700;
352 /* FPU move */
354 void helper_ffree_STN(CPUX86State *env, int st_index)
356 env->fptags[(env->fpstt + st_index) & 7] = 1;
359 void helper_fmov_ST0_FT0(CPUX86State *env)
361 ST0 = FT0;
364 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
366 FT0 = ST(st_index);
369 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
371 ST0 = ST(st_index);
374 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
376 ST(st_index) = ST0;
379 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
381 floatx80 tmp;
383 tmp = ST(st_index);
384 ST(st_index) = ST0;
385 ST0 = tmp;
388 /* FPU operations */
390 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
392 void helper_fcom_ST0_FT0(CPUX86State *env)
394 int ret;
396 ret = floatx80_compare(ST0, FT0, &env->fp_status);
397 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
400 void helper_fucom_ST0_FT0(CPUX86State *env)
402 int ret;
404 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
405 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
408 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
410 void helper_fcomi_ST0_FT0(CPUX86State *env)
412 int eflags;
413 int ret;
415 ret = floatx80_compare(ST0, FT0, &env->fp_status);
416 eflags = cpu_cc_compute_all(env, CC_OP);
417 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
418 CC_SRC = eflags;
421 void helper_fucomi_ST0_FT0(CPUX86State *env)
423 int eflags;
424 int ret;
426 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
427 eflags = cpu_cc_compute_all(env, CC_OP);
428 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
429 CC_SRC = eflags;
432 void helper_fadd_ST0_FT0(CPUX86State *env)
434 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
437 void helper_fmul_ST0_FT0(CPUX86State *env)
439 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
442 void helper_fsub_ST0_FT0(CPUX86State *env)
444 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
447 void helper_fsubr_ST0_FT0(CPUX86State *env)
449 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
452 void helper_fdiv_ST0_FT0(CPUX86State *env)
454 ST0 = helper_fdiv(env, ST0, FT0);
457 void helper_fdivr_ST0_FT0(CPUX86State *env)
459 ST0 = helper_fdiv(env, FT0, ST0);
462 /* fp operations between STN and ST0 */
464 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
466 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
469 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
471 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
474 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
476 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
479 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
481 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
484 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
486 floatx80 *p;
488 p = &ST(st_index);
489 *p = helper_fdiv(env, *p, ST0);
492 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
494 floatx80 *p;
496 p = &ST(st_index);
497 *p = helper_fdiv(env, ST0, *p);
500 /* misc FPU operations */
501 void helper_fchs_ST0(CPUX86State *env)
503 ST0 = floatx80_chs(ST0);
506 void helper_fabs_ST0(CPUX86State *env)
508 ST0 = floatx80_abs(ST0);
511 void helper_fld1_ST0(CPUX86State *env)
513 ST0 = floatx80_one;
516 void helper_fldl2t_ST0(CPUX86State *env)
518 ST0 = floatx80_l2t;
521 void helper_fldl2e_ST0(CPUX86State *env)
523 ST0 = floatx80_l2e;
526 void helper_fldpi_ST0(CPUX86State *env)
528 ST0 = floatx80_pi;
531 void helper_fldlg2_ST0(CPUX86State *env)
533 ST0 = floatx80_lg2;
536 void helper_fldln2_ST0(CPUX86State *env)
538 ST0 = floatx80_ln2;
541 void helper_fldz_ST0(CPUX86State *env)
543 ST0 = floatx80_zero;
546 void helper_fldz_FT0(CPUX86State *env)
548 FT0 = floatx80_zero;
551 uint32_t helper_fnstsw(CPUX86State *env)
553 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
556 uint32_t helper_fnstcw(CPUX86State *env)
558 return env->fpuc;
561 void update_fp_status(CPUX86State *env)
563 int rnd_type;
565 /* set rounding mode */
566 switch (env->fpuc & FPU_RC_MASK) {
567 default:
568 case FPU_RC_NEAR:
569 rnd_type = float_round_nearest_even;
570 break;
571 case FPU_RC_DOWN:
572 rnd_type = float_round_down;
573 break;
574 case FPU_RC_UP:
575 rnd_type = float_round_up;
576 break;
577 case FPU_RC_CHOP:
578 rnd_type = float_round_to_zero;
579 break;
581 set_float_rounding_mode(rnd_type, &env->fp_status);
582 switch ((env->fpuc >> 8) & 3) {
583 case 0:
584 rnd_type = 32;
585 break;
586 case 2:
587 rnd_type = 64;
588 break;
589 case 3:
590 default:
591 rnd_type = 80;
592 break;
594 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
597 void helper_fldcw(CPUX86State *env, uint32_t val)
599 cpu_set_fpuc(env, val);
602 void helper_fclex(CPUX86State *env)
604 env->fpus &= 0x7f00;
607 void helper_fwait(CPUX86State *env)
609 if (env->fpus & FPUS_SE) {
610 fpu_raise_exception(env, GETPC());
614 void helper_fninit(CPUX86State *env)
616 env->fpus = 0;
617 env->fpstt = 0;
618 cpu_set_fpuc(env, 0x37f);
619 env->fptags[0] = 1;
620 env->fptags[1] = 1;
621 env->fptags[2] = 1;
622 env->fptags[3] = 1;
623 env->fptags[4] = 1;
624 env->fptags[5] = 1;
625 env->fptags[6] = 1;
626 env->fptags[7] = 1;
629 /* BCD ops */
631 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
633 floatx80 tmp;
634 uint64_t val;
635 unsigned int v;
636 int i;
638 val = 0;
639 for (i = 8; i >= 0; i--) {
640 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
641 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
643 tmp = int64_to_floatx80(val, &env->fp_status);
644 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
645 tmp = floatx80_chs(tmp);
647 fpush(env);
648 ST0 = tmp;
651 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
653 int v;
654 target_ulong mem_ref, mem_end;
655 int64_t val;
657 val = floatx80_to_int64(ST0, &env->fp_status);
658 mem_ref = ptr;
659 mem_end = mem_ref + 9;
660 if (val < 0) {
661 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
662 val = -val;
663 } else {
664 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
666 while (mem_ref < mem_end) {
667 if (val == 0) {
668 break;
670 v = val % 100;
671 val = val / 100;
672 v = ((v / 10) << 4) | (v % 10);
673 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
675 while (mem_ref < mem_end) {
676 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
680 void helper_f2xm1(CPUX86State *env)
682 double val = floatx80_to_double(env, ST0);
684 val = pow(2.0, val) - 1.0;
685 ST0 = double_to_floatx80(env, val);
688 void helper_fyl2x(CPUX86State *env)
690 double fptemp = floatx80_to_double(env, ST0);
692 if (fptemp > 0.0) {
693 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
694 fptemp *= floatx80_to_double(env, ST1);
695 ST1 = double_to_floatx80(env, fptemp);
696 fpop(env);
697 } else {
698 env->fpus &= ~0x4700;
699 env->fpus |= 0x400;
703 void helper_fptan(CPUX86State *env)
705 double fptemp = floatx80_to_double(env, ST0);
707 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
708 env->fpus |= 0x400;
709 } else {
710 fptemp = tan(fptemp);
711 ST0 = double_to_floatx80(env, fptemp);
712 fpush(env);
713 ST0 = floatx80_one;
714 env->fpus &= ~0x400; /* C2 <-- 0 */
715 /* the above code is for |arg| < 2**52 only */
719 void helper_fpatan(CPUX86State *env)
721 double fptemp, fpsrcop;
723 fpsrcop = floatx80_to_double(env, ST1);
724 fptemp = floatx80_to_double(env, ST0);
725 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
726 fpop(env);
729 void helper_fxtract(CPUX86State *env)
731 CPU_LDoubleU temp;
733 temp.d = ST0;
735 if (floatx80_is_zero(ST0)) {
736 /* Easy way to generate -inf and raising division by 0 exception */
737 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
738 &env->fp_status);
739 fpush(env);
740 ST0 = temp.d;
741 } else {
742 int expdif;
744 expdif = EXPD(temp) - EXPBIAS;
745 /* DP exponent bias */
746 ST0 = int32_to_floatx80(expdif, &env->fp_status);
747 fpush(env);
748 BIASEXPONENT(temp);
749 ST0 = temp.d;
753 void helper_fprem1(CPUX86State *env)
755 double st0, st1, dblq, fpsrcop, fptemp;
756 CPU_LDoubleU fpsrcop1, fptemp1;
757 int expdif;
758 signed long long int q;
760 st0 = floatx80_to_double(env, ST0);
761 st1 = floatx80_to_double(env, ST1);
763 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
764 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
765 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
766 return;
769 fpsrcop = st0;
770 fptemp = st1;
771 fpsrcop1.d = ST0;
772 fptemp1.d = ST1;
773 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
775 if (expdif < 0) {
776 /* optimisation? taken from the AMD docs */
777 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
778 /* ST0 is unchanged */
779 return;
782 if (expdif < 53) {
783 dblq = fpsrcop / fptemp;
784 /* round dblq towards nearest integer */
785 dblq = rint(dblq);
786 st0 = fpsrcop - fptemp * dblq;
788 /* convert dblq to q by truncating towards zero */
789 if (dblq < 0.0) {
790 q = (signed long long int)(-dblq);
791 } else {
792 q = (signed long long int)dblq;
795 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
796 /* (C0,C3,C1) <-- (q2,q1,q0) */
797 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
798 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
799 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
800 } else {
801 env->fpus |= 0x400; /* C2 <-- 1 */
802 fptemp = pow(2.0, expdif - 50);
803 fpsrcop = (st0 / st1) / fptemp;
804 /* fpsrcop = integer obtained by chopping */
805 fpsrcop = (fpsrcop < 0.0) ?
806 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
807 st0 -= (st1 * fpsrcop * fptemp);
809 ST0 = double_to_floatx80(env, st0);
812 void helper_fprem(CPUX86State *env)
814 double st0, st1, dblq, fpsrcop, fptemp;
815 CPU_LDoubleU fpsrcop1, fptemp1;
816 int expdif;
817 signed long long int q;
819 st0 = floatx80_to_double(env, ST0);
820 st1 = floatx80_to_double(env, ST1);
822 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
823 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
824 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
825 return;
828 fpsrcop = st0;
829 fptemp = st1;
830 fpsrcop1.d = ST0;
831 fptemp1.d = ST1;
832 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
834 if (expdif < 0) {
835 /* optimisation? taken from the AMD docs */
836 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
837 /* ST0 is unchanged */
838 return;
841 if (expdif < 53) {
842 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
843 /* round dblq towards zero */
844 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
845 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
847 /* convert dblq to q by truncating towards zero */
848 if (dblq < 0.0) {
849 q = (signed long long int)(-dblq);
850 } else {
851 q = (signed long long int)dblq;
854 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
855 /* (C0,C3,C1) <-- (q2,q1,q0) */
856 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
857 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
858 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
859 } else {
860 int N = 32 + (expdif % 32); /* as per AMD docs */
862 env->fpus |= 0x400; /* C2 <-- 1 */
863 fptemp = pow(2.0, (double)(expdif - N));
864 fpsrcop = (st0 / st1) / fptemp;
865 /* fpsrcop = integer obtained by chopping */
866 fpsrcop = (fpsrcop < 0.0) ?
867 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
868 st0 -= (st1 * fpsrcop * fptemp);
870 ST0 = double_to_floatx80(env, st0);
873 void helper_fyl2xp1(CPUX86State *env)
875 double fptemp = floatx80_to_double(env, ST0);
877 if ((fptemp + 1.0) > 0.0) {
878 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
879 fptemp *= floatx80_to_double(env, ST1);
880 ST1 = double_to_floatx80(env, fptemp);
881 fpop(env);
882 } else {
883 env->fpus &= ~0x4700;
884 env->fpus |= 0x400;
888 void helper_fsqrt(CPUX86State *env)
890 if (floatx80_is_neg(ST0)) {
891 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
892 env->fpus |= 0x400;
894 ST0 = floatx80_sqrt(ST0, &env->fp_status);
897 void helper_fsincos(CPUX86State *env)
899 double fptemp = floatx80_to_double(env, ST0);
901 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
902 env->fpus |= 0x400;
903 } else {
904 ST0 = double_to_floatx80(env, sin(fptemp));
905 fpush(env);
906 ST0 = double_to_floatx80(env, cos(fptemp));
907 env->fpus &= ~0x400; /* C2 <-- 0 */
908 /* the above code is for |arg| < 2**63 only */
912 void helper_frndint(CPUX86State *env)
914 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
917 void helper_fscale(CPUX86State *env)
919 if (floatx80_is_any_nan(ST1)) {
920 ST0 = ST1;
921 } else {
922 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
923 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
927 void helper_fsin(CPUX86State *env)
929 double fptemp = floatx80_to_double(env, ST0);
931 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
932 env->fpus |= 0x400;
933 } else {
934 ST0 = double_to_floatx80(env, sin(fptemp));
935 env->fpus &= ~0x400; /* C2 <-- 0 */
936 /* the above code is for |arg| < 2**53 only */
940 void helper_fcos(CPUX86State *env)
942 double fptemp = floatx80_to_double(env, ST0);
944 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
945 env->fpus |= 0x400;
946 } else {
947 ST0 = double_to_floatx80(env, cos(fptemp));
948 env->fpus &= ~0x400; /* C2 <-- 0 */
949 /* the above code is for |arg| < 2**63 only */
953 void helper_fxam_ST0(CPUX86State *env)
955 CPU_LDoubleU temp;
956 int expdif;
958 temp.d = ST0;
960 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
961 if (SIGND(temp)) {
962 env->fpus |= 0x200; /* C1 <-- 1 */
965 /* XXX: test fptags too */
966 expdif = EXPD(temp);
967 if (expdif == MAXEXPD) {
968 if (MANTD(temp) == 0x8000000000000000ULL) {
969 env->fpus |= 0x500; /* Infinity */
970 } else {
971 env->fpus |= 0x100; /* NaN */
973 } else if (expdif == 0) {
974 if (MANTD(temp) == 0) {
975 env->fpus |= 0x4000; /* Zero */
976 } else {
977 env->fpus |= 0x4400; /* Denormal */
979 } else {
980 env->fpus |= 0x400;
984 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
985 uintptr_t retaddr)
987 int fpus, fptag, exp, i;
988 uint64_t mant;
989 CPU_LDoubleU tmp;
991 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
992 fptag = 0;
993 for (i = 7; i >= 0; i--) {
994 fptag <<= 2;
995 if (env->fptags[i]) {
996 fptag |= 3;
997 } else {
998 tmp.d = env->fpregs[i].d;
999 exp = EXPD(tmp);
1000 mant = MANTD(tmp);
1001 if (exp == 0 && mant == 0) {
1002 /* zero */
1003 fptag |= 1;
1004 } else if (exp == 0 || exp == MAXEXPD
1005 || (mant & (1LL << 63)) == 0) {
1006 /* NaNs, infinity, denormal */
1007 fptag |= 2;
1011 if (data32) {
1012 /* 32 bit */
1013 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1014 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1015 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1016 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1017 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1018 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1019 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1020 } else {
1021 /* 16 bit */
1022 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1023 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1024 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1025 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1026 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1027 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1028 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1032 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1034 do_fstenv(env, ptr, data32, GETPC());
1037 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1038 uintptr_t retaddr)
1040 int i, fpus, fptag;
1042 if (data32) {
1043 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1044 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1045 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1046 } else {
1047 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1048 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1049 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1051 env->fpstt = (fpus >> 11) & 7;
1052 env->fpus = fpus & ~0x3800;
1053 for (i = 0; i < 8; i++) {
1054 env->fptags[i] = ((fptag & 3) == 3);
1055 fptag >>= 2;
1059 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1061 do_fldenv(env, ptr, data32, GETPC());
1064 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1066 floatx80 tmp;
1067 int i;
1069 do_fstenv(env, ptr, data32, GETPC());
1071 ptr += (14 << data32);
1072 for (i = 0; i < 8; i++) {
1073 tmp = ST(i);
1074 helper_fstt(env, tmp, ptr, GETPC());
1075 ptr += 10;
1078 /* fninit */
1079 env->fpus = 0;
1080 env->fpstt = 0;
1081 cpu_set_fpuc(env, 0x37f);
1082 env->fptags[0] = 1;
1083 env->fptags[1] = 1;
1084 env->fptags[2] = 1;
1085 env->fptags[3] = 1;
1086 env->fptags[4] = 1;
1087 env->fptags[5] = 1;
1088 env->fptags[6] = 1;
1089 env->fptags[7] = 1;
1092 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1094 floatx80 tmp;
1095 int i;
1097 do_fldenv(env, ptr, data32, GETPC());
1098 ptr += (14 << data32);
1100 for (i = 0; i < 8; i++) {
1101 tmp = helper_fldt(env, ptr, GETPC());
1102 ST(i) = tmp;
1103 ptr += 10;
1107 #if defined(CONFIG_USER_ONLY)
1108 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1110 helper_fsave(env, ptr, data32);
1113 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1115 helper_frstor(env, ptr, data32);
1117 #endif
1119 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1121 int fpus, fptag, i;
1122 target_ulong addr;
1124 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1125 fptag = 0;
1126 for (i = 0; i < 8; i++) {
1127 fptag |= (env->fptags[i] << i);
1129 cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1130 cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1131 cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1133 /* In 32-bit mode this is eip, sel, dp, sel.
1134 In 64-bit mode this is rip, rdp.
1135 But in either case we don't write actual data, just zeros. */
1136 cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1137 cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
1139 addr = ptr + 0x20;
1140 for (i = 0; i < 8; i++) {
1141 floatx80 tmp = ST(i);
1142 helper_fstt(env, tmp, addr, ra);
1143 addr += 16;
1147 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1149 cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1150 cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1153 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1155 int i, nb_xmm_regs;
1156 target_ulong addr;
1158 if (env->hflags & HF_CS64_MASK) {
1159 nb_xmm_regs = 16;
1160 } else {
1161 nb_xmm_regs = 8;
1164 addr = ptr + 0xa0;
1165 for (i = 0; i < nb_xmm_regs; i++) {
1166 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1167 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1168 addr += 16;
1172 static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1174 int i;
1176 for (i = 0; i < 4; i++, addr += 16) {
1177 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1178 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1182 static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1184 cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1185 cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1188 static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1190 cpu_stq_data_ra(env, addr, env->pkru, ra);
1193 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1195 uintptr_t ra = GETPC();
1197 /* The operand must be 16 byte aligned */
1198 if (ptr & 0xf) {
1199 raise_exception_ra(env, EXCP0D_GPF, ra);
1202 do_xsave_fpu(env, ptr, ra);
1204 if (env->cr[4] & CR4_OSFXSR_MASK) {
1205 do_xsave_mxcsr(env, ptr, ra);
1206 /* Fast FXSAVE leaves out the XMM registers */
1207 if (!(env->efer & MSR_EFER_FFXSR)
1208 || (env->hflags & HF_CPL_MASK)
1209 || !(env->hflags & HF_LMA_MASK)) {
1210 do_xsave_sse(env, ptr, ra);
1215 static uint64_t get_xinuse(CPUX86State *env)
1217 uint64_t inuse = -1;
1219 /* For the most part, we don't track XINUSE. We could calculate it
1220 here for all components, but it's probably less work to simply
1221 indicate in use. That said, the state of BNDREGS is important
1222 enough to track in HFLAGS, so we might as well use that here. */
1223 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1224 inuse &= ~XSTATE_BNDREGS_MASK;
1226 return inuse;
1229 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1230 uint64_t inuse, uint64_t opt, uintptr_t ra)
1232 uint64_t old_bv, new_bv;
1234 /* The OS must have enabled XSAVE. */
1235 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1236 raise_exception_ra(env, EXCP06_ILLOP, ra);
1239 /* The operand must be 64 byte aligned. */
1240 if (ptr & 63) {
1241 raise_exception_ra(env, EXCP0D_GPF, ra);
1244 /* Never save anything not enabled by XCR0. */
1245 rfbm &= env->xcr0;
1246 opt &= rfbm;
1248 if (opt & XSTATE_FP_MASK) {
1249 do_xsave_fpu(env, ptr, ra);
1251 if (rfbm & XSTATE_SSE_MASK) {
1252 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1253 do_xsave_mxcsr(env, ptr, ra);
1255 if (opt & XSTATE_SSE_MASK) {
1256 do_xsave_sse(env, ptr, ra);
1258 if (opt & XSTATE_BNDREGS_MASK) {
1259 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1260 do_xsave_bndregs(env, ptr + off, ra);
1262 if (opt & XSTATE_BNDCSR_MASK) {
1263 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1264 do_xsave_bndcsr(env, ptr + off, ra);
1266 if (opt & XSTATE_PKRU_MASK) {
1267 target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1268 do_xsave_pkru(env, ptr + off, ra);
1271 /* Update the XSTATE_BV field. */
1272 old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1273 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1274 cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1277 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1279 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1282 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1284 uint64_t inuse = get_xinuse(env);
1285 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1288 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1290 int i, fpus, fptag;
1291 target_ulong addr;
1293 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1294 fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1295 fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
1296 env->fpstt = (fpus >> 11) & 7;
1297 env->fpus = fpus & ~0x3800;
1298 fptag ^= 0xff;
1299 for (i = 0; i < 8; i++) {
1300 env->fptags[i] = ((fptag >> i) & 1);
1303 addr = ptr + 0x20;
1304 for (i = 0; i < 8; i++) {
1305 floatx80 tmp = helper_fldt(env, addr, ra);
1306 ST(i) = tmp;
1307 addr += 16;
1311 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1313 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1316 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1318 int i, nb_xmm_regs;
1319 target_ulong addr;
1321 if (env->hflags & HF_CS64_MASK) {
1322 nb_xmm_regs = 16;
1323 } else {
1324 nb_xmm_regs = 8;
1327 addr = ptr + 0xa0;
1328 for (i = 0; i < nb_xmm_regs; i++) {
1329 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1330 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1331 addr += 16;
1335 static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1337 int i;
1339 for (i = 0; i < 4; i++, addr += 16) {
1340 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1345 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1347 /* FIXME: Extend highest implemented bit of linear address. */
1348 env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1349 env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1352 static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1354 env->pkru = cpu_ldq_data_ra(env, addr, ra);
1357 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1359 uintptr_t ra = GETPC();
1361 /* The operand must be 16 byte aligned */
1362 if (ptr & 0xf) {
1363 raise_exception_ra(env, EXCP0D_GPF, ra);
1366 do_xrstor_fpu(env, ptr, ra);
1368 if (env->cr[4] & CR4_OSFXSR_MASK) {
1369 do_xrstor_mxcsr(env, ptr, ra);
1370 /* Fast FXRSTOR leaves out the XMM registers */
1371 if (!(env->efer & MSR_EFER_FFXSR)
1372 || (env->hflags & HF_CPL_MASK)
1373 || !(env->hflags & HF_LMA_MASK)) {
1374 do_xrstor_sse(env, ptr, ra);
1379 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1381 uintptr_t ra = GETPC();
1382 uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1384 rfbm &= env->xcr0;
1386 /* The OS must have enabled XSAVE. */
1387 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1388 raise_exception_ra(env, EXCP06_ILLOP, ra);
1391 /* The operand must be 64 byte aligned. */
1392 if (ptr & 63) {
1393 raise_exception_ra(env, EXCP0D_GPF, ra);
1396 xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1398 if ((int64_t)xstate_bv < 0) {
1399 /* FIXME: Compact form. */
1400 raise_exception_ra(env, EXCP0D_GPF, ra);
1403 /* Standard form. */
1405 /* The XSTATE field must not set bits not present in XCR0. */
1406 if (xstate_bv & ~env->xcr0) {
1407 raise_exception_ra(env, EXCP0D_GPF, ra);
1410 /* The XCOMP field must be zero. */
1411 xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1412 xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1413 if (xcomp_bv0 || xcomp_bv1) {
1414 raise_exception_ra(env, EXCP0D_GPF, ra);
1417 if (rfbm & XSTATE_FP_MASK) {
1418 if (xstate_bv & XSTATE_FP_MASK) {
1419 do_xrstor_fpu(env, ptr, ra);
1420 } else {
1421 helper_fninit(env);
1422 memset(env->fpregs, 0, sizeof(env->fpregs));
1425 if (rfbm & XSTATE_SSE_MASK) {
1426 /* Note that the standard form of XRSTOR loads MXCSR from memory
1427 whether or not the XSTATE_BV bit is set. */
1428 do_xrstor_mxcsr(env, ptr, ra);
1429 if (xstate_bv & XSTATE_SSE_MASK) {
1430 do_xrstor_sse(env, ptr, ra);
1431 } else {
1432 /* ??? When AVX is implemented, we may have to be more
1433 selective in the clearing. */
1434 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1437 if (rfbm & XSTATE_BNDREGS_MASK) {
1438 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1439 target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1440 do_xrstor_bndregs(env, ptr + off, ra);
1441 env->hflags |= HF_MPX_IU_MASK;
1442 } else {
1443 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1444 env->hflags &= ~HF_MPX_IU_MASK;
1447 if (rfbm & XSTATE_BNDCSR_MASK) {
1448 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1449 target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1450 do_xrstor_bndcsr(env, ptr + off, ra);
1451 } else {
1452 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1454 cpu_sync_bndcs_hflags(env);
1456 if (rfbm & XSTATE_PKRU_MASK) {
1457 uint64_t old_pkru = env->pkru;
1458 if (xstate_bv & XSTATE_PKRU_MASK) {
1459 target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1460 do_xrstor_pkru(env, ptr + off, ra);
1461 } else {
1462 env->pkru = 0;
1464 if (env->pkru != old_pkru) {
1465 CPUState *cs = CPU(x86_env_get_cpu(env));
1466 tlb_flush(cs, 1);
1471 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1473 /* The OS must have enabled XSAVE. */
1474 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1475 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1478 switch (ecx) {
1479 case 0:
1480 return env->xcr0;
1481 case 1:
1482 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1483 return env->xcr0 & get_xinuse(env);
1485 break;
1487 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1490 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1492 uint32_t dummy, ena_lo, ena_hi;
1493 uint64_t ena;
1495 /* The OS must have enabled XSAVE. */
1496 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1497 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1500 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1501 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1502 goto do_gpf;
1505 /* Disallow enabling unimplemented features. */
1506 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1507 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1508 if (mask & ~ena) {
1509 goto do_gpf;
1512 /* Disallow enabling only half of MPX. */
1513 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1514 & XSTATE_BNDCSR_MASK) {
1515 goto do_gpf;
1518 env->xcr0 = mask;
1519 cpu_sync_bndcs_hflags(env);
1520 return;
1522 do_gpf:
1523 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1526 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1528 CPU_LDoubleU temp;
1530 temp.d = f;
1531 *pmant = temp.l.lower;
1532 *pexp = temp.l.upper;
1535 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1537 CPU_LDoubleU temp;
1539 temp.l.upper = upper;
1540 temp.l.lower = mant;
1541 return temp.d;
1544 /* MMX/SSE */
1545 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1547 #define SSE_DAZ 0x0040
1548 #define SSE_RC_MASK 0x6000
1549 #define SSE_RC_NEAR 0x0000
1550 #define SSE_RC_DOWN 0x2000
1551 #define SSE_RC_UP 0x4000
1552 #define SSE_RC_CHOP 0x6000
1553 #define SSE_FZ 0x8000
1555 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1557 int rnd_type;
1559 env->mxcsr = mxcsr;
1561 /* set rounding mode */
1562 switch (mxcsr & SSE_RC_MASK) {
1563 default:
1564 case SSE_RC_NEAR:
1565 rnd_type = float_round_nearest_even;
1566 break;
1567 case SSE_RC_DOWN:
1568 rnd_type = float_round_down;
1569 break;
1570 case SSE_RC_UP:
1571 rnd_type = float_round_up;
1572 break;
1573 case SSE_RC_CHOP:
1574 rnd_type = float_round_to_zero;
1575 break;
1577 set_float_rounding_mode(rnd_type, &env->sse_status);
1579 /* set denormals are zero */
1580 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1582 /* set flush to zero */
1583 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1586 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1588 env->fpuc = val;
1589 update_fp_status(env);
1592 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1594 cpu_set_mxcsr(env, val);
1597 void helper_enter_mmx(CPUX86State *env)
1599 env->fpstt = 0;
1600 *(uint32_t *)(env->fptags) = 0;
1601 *(uint32_t *)(env->fptags + 4) = 0;
1604 void helper_emms(CPUX86State *env)
1606 /* set to empty state */
1607 *(uint32_t *)(env->fptags) = 0x01010101;
1608 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1611 /* XXX: suppress */
1612 void helper_movq(CPUX86State *env, void *d, void *s)
1614 *(uint64_t *)d = *(uint64_t *)s;
1617 #define SHIFT 0
1618 #include "ops_sse.h"
1620 #define SHIFT 1
1621 #include "ops_sse.h"