block: test 'blockdev-snapshot' using a file BDS as the overlay
[qemu/ar7.git] / target-i386 / fpu_helper.c
blobd421a475ffc821ef5a9e1b4372d28c94aeda09b7
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include <math.h>
21 #include "cpu.h"
22 #include "exec/helper-proto.h"
23 #include "qemu/host-utils.h"
24 #include "exec/cpu_ldst.h"
26 #define FPU_RC_MASK 0xc00
27 #define FPU_RC_NEAR 0x000
28 #define FPU_RC_DOWN 0x400
29 #define FPU_RC_UP 0x800
30 #define FPU_RC_CHOP 0xc00
32 #define MAXTAN 9223372036854775808.0
34 /* the following deal with x86 long double-precision numbers */
35 #define MAXEXPD 0x7fff
36 #define EXPBIAS 16383
37 #define EXPD(fp) (fp.l.upper & 0x7fff)
38 #define SIGND(fp) ((fp.l.upper) & 0x8000)
39 #define MANTD(fp) (fp.l.lower)
40 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
42 #define FPUS_IE (1 << 0)
43 #define FPUS_DE (1 << 1)
44 #define FPUS_ZE (1 << 2)
45 #define FPUS_OE (1 << 3)
46 #define FPUS_UE (1 << 4)
47 #define FPUS_PE (1 << 5)
48 #define FPUS_SF (1 << 6)
49 #define FPUS_SE (1 << 7)
50 #define FPUS_B (1 << 15)
52 #define FPUC_EM 0x3f
54 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
55 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
56 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
58 static inline void fpush(CPUX86State *env)
60 env->fpstt = (env->fpstt - 1) & 7;
61 env->fptags[env->fpstt] = 0; /* validate stack entry */
64 static inline void fpop(CPUX86State *env)
66 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
67 env->fpstt = (env->fpstt + 1) & 7;
70 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
71 uintptr_t retaddr)
73 CPU_LDoubleU temp;
75 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
76 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
77 return temp.d;
80 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
81 uintptr_t retaddr)
83 CPU_LDoubleU temp;
85 temp.d = f;
86 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
87 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
90 /* x87 FPU helpers */
92 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
94 union {
95 float64 f64;
96 double d;
97 } u;
99 u.f64 = floatx80_to_float64(a, &env->fp_status);
100 return u.d;
103 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
105 union {
106 float64 f64;
107 double d;
108 } u;
110 u.d = a;
111 return float64_to_floatx80(u.f64, &env->fp_status);
114 static void fpu_set_exception(CPUX86State *env, int mask)
116 env->fpus |= mask;
117 if (env->fpus & (~env->fpuc & FPUC_EM)) {
118 env->fpus |= FPUS_SE | FPUS_B;
122 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
124 if (floatx80_is_zero(b)) {
125 fpu_set_exception(env, FPUS_ZE);
127 return floatx80_div(a, b, &env->fp_status);
130 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
132 if (env->cr[0] & CR0_NE_MASK) {
133 raise_exception_ra(env, EXCP10_COPR, retaddr);
135 #if !defined(CONFIG_USER_ONLY)
136 else {
137 cpu_set_ferr(env);
139 #endif
142 void helper_flds_FT0(CPUX86State *env, uint32_t val)
144 union {
145 float32 f;
146 uint32_t i;
147 } u;
149 u.i = val;
150 FT0 = float32_to_floatx80(u.f, &env->fp_status);
153 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
155 union {
156 float64 f;
157 uint64_t i;
158 } u;
160 u.i = val;
161 FT0 = float64_to_floatx80(u.f, &env->fp_status);
164 void helper_fildl_FT0(CPUX86State *env, int32_t val)
166 FT0 = int32_to_floatx80(val, &env->fp_status);
169 void helper_flds_ST0(CPUX86State *env, uint32_t val)
171 int new_fpstt;
172 union {
173 float32 f;
174 uint32_t i;
175 } u;
177 new_fpstt = (env->fpstt - 1) & 7;
178 u.i = val;
179 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
180 env->fpstt = new_fpstt;
181 env->fptags[new_fpstt] = 0; /* validate stack entry */
184 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
186 int new_fpstt;
187 union {
188 float64 f;
189 uint64_t i;
190 } u;
192 new_fpstt = (env->fpstt - 1) & 7;
193 u.i = val;
194 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
195 env->fpstt = new_fpstt;
196 env->fptags[new_fpstt] = 0; /* validate stack entry */
199 void helper_fildl_ST0(CPUX86State *env, int32_t val)
201 int new_fpstt;
203 new_fpstt = (env->fpstt - 1) & 7;
204 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
205 env->fpstt = new_fpstt;
206 env->fptags[new_fpstt] = 0; /* validate stack entry */
209 void helper_fildll_ST0(CPUX86State *env, int64_t val)
211 int new_fpstt;
213 new_fpstt = (env->fpstt - 1) & 7;
214 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
215 env->fpstt = new_fpstt;
216 env->fptags[new_fpstt] = 0; /* validate stack entry */
219 uint32_t helper_fsts_ST0(CPUX86State *env)
221 union {
222 float32 f;
223 uint32_t i;
224 } u;
226 u.f = floatx80_to_float32(ST0, &env->fp_status);
227 return u.i;
230 uint64_t helper_fstl_ST0(CPUX86State *env)
232 union {
233 float64 f;
234 uint64_t i;
235 } u;
237 u.f = floatx80_to_float64(ST0, &env->fp_status);
238 return u.i;
241 int32_t helper_fist_ST0(CPUX86State *env)
243 int32_t val;
245 val = floatx80_to_int32(ST0, &env->fp_status);
246 if (val != (int16_t)val) {
247 val = -32768;
249 return val;
252 int32_t helper_fistl_ST0(CPUX86State *env)
254 int32_t val;
255 signed char old_exp_flags;
257 old_exp_flags = get_float_exception_flags(&env->fp_status);
258 set_float_exception_flags(0, &env->fp_status);
260 val = floatx80_to_int32(ST0, &env->fp_status);
261 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
262 val = 0x80000000;
264 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
265 | old_exp_flags, &env->fp_status);
266 return val;
269 int64_t helper_fistll_ST0(CPUX86State *env)
271 int64_t val;
272 signed char old_exp_flags;
274 old_exp_flags = get_float_exception_flags(&env->fp_status);
275 set_float_exception_flags(0, &env->fp_status);
277 val = floatx80_to_int64(ST0, &env->fp_status);
278 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
279 val = 0x8000000000000000ULL;
281 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
282 | old_exp_flags, &env->fp_status);
283 return val;
286 int32_t helper_fistt_ST0(CPUX86State *env)
288 int32_t val;
290 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
291 if (val != (int16_t)val) {
292 val = -32768;
294 return val;
297 int32_t helper_fisttl_ST0(CPUX86State *env)
299 int32_t val;
301 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
302 return val;
305 int64_t helper_fisttll_ST0(CPUX86State *env)
307 int64_t val;
309 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
310 return val;
313 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
315 int new_fpstt;
317 new_fpstt = (env->fpstt - 1) & 7;
318 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
319 env->fpstt = new_fpstt;
320 env->fptags[new_fpstt] = 0; /* validate stack entry */
323 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
325 helper_fstt(env, ST0, ptr, GETPC());
328 void helper_fpush(CPUX86State *env)
330 fpush(env);
333 void helper_fpop(CPUX86State *env)
335 fpop(env);
338 void helper_fdecstp(CPUX86State *env)
340 env->fpstt = (env->fpstt - 1) & 7;
341 env->fpus &= ~0x4700;
344 void helper_fincstp(CPUX86State *env)
346 env->fpstt = (env->fpstt + 1) & 7;
347 env->fpus &= ~0x4700;
350 /* FPU move */
352 void helper_ffree_STN(CPUX86State *env, int st_index)
354 env->fptags[(env->fpstt + st_index) & 7] = 1;
357 void helper_fmov_ST0_FT0(CPUX86State *env)
359 ST0 = FT0;
362 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
364 FT0 = ST(st_index);
367 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
369 ST0 = ST(st_index);
372 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
374 ST(st_index) = ST0;
377 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
379 floatx80 tmp;
381 tmp = ST(st_index);
382 ST(st_index) = ST0;
383 ST0 = tmp;
386 /* FPU operations */
388 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
390 void helper_fcom_ST0_FT0(CPUX86State *env)
392 int ret;
394 ret = floatx80_compare(ST0, FT0, &env->fp_status);
395 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
398 void helper_fucom_ST0_FT0(CPUX86State *env)
400 int ret;
402 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
403 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
406 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
408 void helper_fcomi_ST0_FT0(CPUX86State *env)
410 int eflags;
411 int ret;
413 ret = floatx80_compare(ST0, FT0, &env->fp_status);
414 eflags = cpu_cc_compute_all(env, CC_OP);
415 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
416 CC_SRC = eflags;
419 void helper_fucomi_ST0_FT0(CPUX86State *env)
421 int eflags;
422 int ret;
424 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
425 eflags = cpu_cc_compute_all(env, CC_OP);
426 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
427 CC_SRC = eflags;
430 void helper_fadd_ST0_FT0(CPUX86State *env)
432 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
435 void helper_fmul_ST0_FT0(CPUX86State *env)
437 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
440 void helper_fsub_ST0_FT0(CPUX86State *env)
442 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
445 void helper_fsubr_ST0_FT0(CPUX86State *env)
447 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
450 void helper_fdiv_ST0_FT0(CPUX86State *env)
452 ST0 = helper_fdiv(env, ST0, FT0);
455 void helper_fdivr_ST0_FT0(CPUX86State *env)
457 ST0 = helper_fdiv(env, FT0, ST0);
460 /* fp operations between STN and ST0 */
462 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
464 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
467 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
469 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
472 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
474 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
477 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
479 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
482 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
484 floatx80 *p;
486 p = &ST(st_index);
487 *p = helper_fdiv(env, *p, ST0);
490 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
492 floatx80 *p;
494 p = &ST(st_index);
495 *p = helper_fdiv(env, ST0, *p);
498 /* misc FPU operations */
499 void helper_fchs_ST0(CPUX86State *env)
501 ST0 = floatx80_chs(ST0);
504 void helper_fabs_ST0(CPUX86State *env)
506 ST0 = floatx80_abs(ST0);
509 void helper_fld1_ST0(CPUX86State *env)
511 ST0 = floatx80_one;
514 void helper_fldl2t_ST0(CPUX86State *env)
516 ST0 = floatx80_l2t;
519 void helper_fldl2e_ST0(CPUX86State *env)
521 ST0 = floatx80_l2e;
524 void helper_fldpi_ST0(CPUX86State *env)
526 ST0 = floatx80_pi;
529 void helper_fldlg2_ST0(CPUX86State *env)
531 ST0 = floatx80_lg2;
534 void helper_fldln2_ST0(CPUX86State *env)
536 ST0 = floatx80_ln2;
539 void helper_fldz_ST0(CPUX86State *env)
541 ST0 = floatx80_zero;
544 void helper_fldz_FT0(CPUX86State *env)
546 FT0 = floatx80_zero;
549 uint32_t helper_fnstsw(CPUX86State *env)
551 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
554 uint32_t helper_fnstcw(CPUX86State *env)
556 return env->fpuc;
559 void update_fp_status(CPUX86State *env)
561 int rnd_type;
563 /* set rounding mode */
564 switch (env->fpuc & FPU_RC_MASK) {
565 default:
566 case FPU_RC_NEAR:
567 rnd_type = float_round_nearest_even;
568 break;
569 case FPU_RC_DOWN:
570 rnd_type = float_round_down;
571 break;
572 case FPU_RC_UP:
573 rnd_type = float_round_up;
574 break;
575 case FPU_RC_CHOP:
576 rnd_type = float_round_to_zero;
577 break;
579 set_float_rounding_mode(rnd_type, &env->fp_status);
580 switch ((env->fpuc >> 8) & 3) {
581 case 0:
582 rnd_type = 32;
583 break;
584 case 2:
585 rnd_type = 64;
586 break;
587 case 3:
588 default:
589 rnd_type = 80;
590 break;
592 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
595 void helper_fldcw(CPUX86State *env, uint32_t val)
597 cpu_set_fpuc(env, val);
600 void helper_fclex(CPUX86State *env)
602 env->fpus &= 0x7f00;
605 void helper_fwait(CPUX86State *env)
607 if (env->fpus & FPUS_SE) {
608 fpu_raise_exception(env, GETPC());
612 void helper_fninit(CPUX86State *env)
614 env->fpus = 0;
615 env->fpstt = 0;
616 cpu_set_fpuc(env, 0x37f);
617 env->fptags[0] = 1;
618 env->fptags[1] = 1;
619 env->fptags[2] = 1;
620 env->fptags[3] = 1;
621 env->fptags[4] = 1;
622 env->fptags[5] = 1;
623 env->fptags[6] = 1;
624 env->fptags[7] = 1;
627 /* BCD ops */
629 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
631 floatx80 tmp;
632 uint64_t val;
633 unsigned int v;
634 int i;
636 val = 0;
637 for (i = 8; i >= 0; i--) {
638 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
639 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
641 tmp = int64_to_floatx80(val, &env->fp_status);
642 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
643 tmp = floatx80_chs(tmp);
645 fpush(env);
646 ST0 = tmp;
649 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
651 int v;
652 target_ulong mem_ref, mem_end;
653 int64_t val;
655 val = floatx80_to_int64(ST0, &env->fp_status);
656 mem_ref = ptr;
657 mem_end = mem_ref + 9;
658 if (val < 0) {
659 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
660 val = -val;
661 } else {
662 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
664 while (mem_ref < mem_end) {
665 if (val == 0) {
666 break;
668 v = val % 100;
669 val = val / 100;
670 v = ((v / 10) << 4) | (v % 10);
671 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
673 while (mem_ref < mem_end) {
674 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
678 void helper_f2xm1(CPUX86State *env)
680 double val = floatx80_to_double(env, ST0);
682 val = pow(2.0, val) - 1.0;
683 ST0 = double_to_floatx80(env, val);
686 void helper_fyl2x(CPUX86State *env)
688 double fptemp = floatx80_to_double(env, ST0);
690 if (fptemp > 0.0) {
691 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
692 fptemp *= floatx80_to_double(env, ST1);
693 ST1 = double_to_floatx80(env, fptemp);
694 fpop(env);
695 } else {
696 env->fpus &= ~0x4700;
697 env->fpus |= 0x400;
701 void helper_fptan(CPUX86State *env)
703 double fptemp = floatx80_to_double(env, ST0);
705 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
706 env->fpus |= 0x400;
707 } else {
708 fptemp = tan(fptemp);
709 ST0 = double_to_floatx80(env, fptemp);
710 fpush(env);
711 ST0 = floatx80_one;
712 env->fpus &= ~0x400; /* C2 <-- 0 */
713 /* the above code is for |arg| < 2**52 only */
717 void helper_fpatan(CPUX86State *env)
719 double fptemp, fpsrcop;
721 fpsrcop = floatx80_to_double(env, ST1);
722 fptemp = floatx80_to_double(env, ST0);
723 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
724 fpop(env);
727 void helper_fxtract(CPUX86State *env)
729 CPU_LDoubleU temp;
731 temp.d = ST0;
733 if (floatx80_is_zero(ST0)) {
734 /* Easy way to generate -inf and raising division by 0 exception */
735 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
736 &env->fp_status);
737 fpush(env);
738 ST0 = temp.d;
739 } else {
740 int expdif;
742 expdif = EXPD(temp) - EXPBIAS;
743 /* DP exponent bias */
744 ST0 = int32_to_floatx80(expdif, &env->fp_status);
745 fpush(env);
746 BIASEXPONENT(temp);
747 ST0 = temp.d;
751 void helper_fprem1(CPUX86State *env)
753 double st0, st1, dblq, fpsrcop, fptemp;
754 CPU_LDoubleU fpsrcop1, fptemp1;
755 int expdif;
756 signed long long int q;
758 st0 = floatx80_to_double(env, ST0);
759 st1 = floatx80_to_double(env, ST1);
761 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
762 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
763 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
764 return;
767 fpsrcop = st0;
768 fptemp = st1;
769 fpsrcop1.d = ST0;
770 fptemp1.d = ST1;
771 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
773 if (expdif < 0) {
774 /* optimisation? taken from the AMD docs */
775 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
776 /* ST0 is unchanged */
777 return;
780 if (expdif < 53) {
781 dblq = fpsrcop / fptemp;
782 /* round dblq towards nearest integer */
783 dblq = rint(dblq);
784 st0 = fpsrcop - fptemp * dblq;
786 /* convert dblq to q by truncating towards zero */
787 if (dblq < 0.0) {
788 q = (signed long long int)(-dblq);
789 } else {
790 q = (signed long long int)dblq;
793 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
794 /* (C0,C3,C1) <-- (q2,q1,q0) */
795 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
796 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
797 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
798 } else {
799 env->fpus |= 0x400; /* C2 <-- 1 */
800 fptemp = pow(2.0, expdif - 50);
801 fpsrcop = (st0 / st1) / fptemp;
802 /* fpsrcop = integer obtained by chopping */
803 fpsrcop = (fpsrcop < 0.0) ?
804 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
805 st0 -= (st1 * fpsrcop * fptemp);
807 ST0 = double_to_floatx80(env, st0);
810 void helper_fprem(CPUX86State *env)
812 double st0, st1, dblq, fpsrcop, fptemp;
813 CPU_LDoubleU fpsrcop1, fptemp1;
814 int expdif;
815 signed long long int q;
817 st0 = floatx80_to_double(env, ST0);
818 st1 = floatx80_to_double(env, ST1);
820 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
821 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
822 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
823 return;
826 fpsrcop = st0;
827 fptemp = st1;
828 fpsrcop1.d = ST0;
829 fptemp1.d = ST1;
830 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
832 if (expdif < 0) {
833 /* optimisation? taken from the AMD docs */
834 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
835 /* ST0 is unchanged */
836 return;
839 if (expdif < 53) {
840 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
841 /* round dblq towards zero */
842 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
843 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
845 /* convert dblq to q by truncating towards zero */
846 if (dblq < 0.0) {
847 q = (signed long long int)(-dblq);
848 } else {
849 q = (signed long long int)dblq;
852 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
853 /* (C0,C3,C1) <-- (q2,q1,q0) */
854 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
855 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
856 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
857 } else {
858 int N = 32 + (expdif % 32); /* as per AMD docs */
860 env->fpus |= 0x400; /* C2 <-- 1 */
861 fptemp = pow(2.0, (double)(expdif - N));
862 fpsrcop = (st0 / st1) / fptemp;
863 /* fpsrcop = integer obtained by chopping */
864 fpsrcop = (fpsrcop < 0.0) ?
865 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
866 st0 -= (st1 * fpsrcop * fptemp);
868 ST0 = double_to_floatx80(env, st0);
871 void helper_fyl2xp1(CPUX86State *env)
873 double fptemp = floatx80_to_double(env, ST0);
875 if ((fptemp + 1.0) > 0.0) {
876 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
877 fptemp *= floatx80_to_double(env, ST1);
878 ST1 = double_to_floatx80(env, fptemp);
879 fpop(env);
880 } else {
881 env->fpus &= ~0x4700;
882 env->fpus |= 0x400;
886 void helper_fsqrt(CPUX86State *env)
888 if (floatx80_is_neg(ST0)) {
889 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
890 env->fpus |= 0x400;
892 ST0 = floatx80_sqrt(ST0, &env->fp_status);
895 void helper_fsincos(CPUX86State *env)
897 double fptemp = floatx80_to_double(env, ST0);
899 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
900 env->fpus |= 0x400;
901 } else {
902 ST0 = double_to_floatx80(env, sin(fptemp));
903 fpush(env);
904 ST0 = double_to_floatx80(env, cos(fptemp));
905 env->fpus &= ~0x400; /* C2 <-- 0 */
906 /* the above code is for |arg| < 2**63 only */
910 void helper_frndint(CPUX86State *env)
912 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
915 void helper_fscale(CPUX86State *env)
917 if (floatx80_is_any_nan(ST1)) {
918 ST0 = ST1;
919 } else {
920 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
921 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
925 void helper_fsin(CPUX86State *env)
927 double fptemp = floatx80_to_double(env, ST0);
929 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
930 env->fpus |= 0x400;
931 } else {
932 ST0 = double_to_floatx80(env, sin(fptemp));
933 env->fpus &= ~0x400; /* C2 <-- 0 */
934 /* the above code is for |arg| < 2**53 only */
938 void helper_fcos(CPUX86State *env)
940 double fptemp = floatx80_to_double(env, ST0);
942 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
943 env->fpus |= 0x400;
944 } else {
945 ST0 = double_to_floatx80(env, cos(fptemp));
946 env->fpus &= ~0x400; /* C2 <-- 0 */
947 /* the above code is for |arg| < 2**63 only */
951 void helper_fxam_ST0(CPUX86State *env)
953 CPU_LDoubleU temp;
954 int expdif;
956 temp.d = ST0;
958 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
959 if (SIGND(temp)) {
960 env->fpus |= 0x200; /* C1 <-- 1 */
963 /* XXX: test fptags too */
964 expdif = EXPD(temp);
965 if (expdif == MAXEXPD) {
966 if (MANTD(temp) == 0x8000000000000000ULL) {
967 env->fpus |= 0x500; /* Infinity */
968 } else {
969 env->fpus |= 0x100; /* NaN */
971 } else if (expdif == 0) {
972 if (MANTD(temp) == 0) {
973 env->fpus |= 0x4000; /* Zero */
974 } else {
975 env->fpus |= 0x4400; /* Denormal */
977 } else {
978 env->fpus |= 0x400;
982 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
983 uintptr_t retaddr)
985 int fpus, fptag, exp, i;
986 uint64_t mant;
987 CPU_LDoubleU tmp;
989 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
990 fptag = 0;
991 for (i = 7; i >= 0; i--) {
992 fptag <<= 2;
993 if (env->fptags[i]) {
994 fptag |= 3;
995 } else {
996 tmp.d = env->fpregs[i].d;
997 exp = EXPD(tmp);
998 mant = MANTD(tmp);
999 if (exp == 0 && mant == 0) {
1000 /* zero */
1001 fptag |= 1;
1002 } else if (exp == 0 || exp == MAXEXPD
1003 || (mant & (1LL << 63)) == 0) {
1004 /* NaNs, infinity, denormal */
1005 fptag |= 2;
1009 if (data32) {
1010 /* 32 bit */
1011 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1012 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1013 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1014 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1015 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1016 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1017 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1018 } else {
1019 /* 16 bit */
1020 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1021 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1022 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1023 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1024 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1025 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1026 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1030 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1032 do_fstenv(env, ptr, data32, GETPC());
1035 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1036 uintptr_t retaddr)
1038 int i, fpus, fptag;
1040 if (data32) {
1041 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1042 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1043 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1044 } else {
1045 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1046 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1047 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1049 env->fpstt = (fpus >> 11) & 7;
1050 env->fpus = fpus & ~0x3800;
1051 for (i = 0; i < 8; i++) {
1052 env->fptags[i] = ((fptag & 3) == 3);
1053 fptag >>= 2;
1057 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1059 do_fldenv(env, ptr, data32, GETPC());
1062 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1064 floatx80 tmp;
1065 int i;
1067 do_fstenv(env, ptr, data32, GETPC());
1069 ptr += (14 << data32);
1070 for (i = 0; i < 8; i++) {
1071 tmp = ST(i);
1072 helper_fstt(env, tmp, ptr, GETPC());
1073 ptr += 10;
1076 /* fninit */
1077 env->fpus = 0;
1078 env->fpstt = 0;
1079 cpu_set_fpuc(env, 0x37f);
1080 env->fptags[0] = 1;
1081 env->fptags[1] = 1;
1082 env->fptags[2] = 1;
1083 env->fptags[3] = 1;
1084 env->fptags[4] = 1;
1085 env->fptags[5] = 1;
1086 env->fptags[6] = 1;
1087 env->fptags[7] = 1;
1090 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1092 floatx80 tmp;
1093 int i;
1095 do_fldenv(env, ptr, data32, GETPC());
1096 ptr += (14 << data32);
1098 for (i = 0; i < 8; i++) {
1099 tmp = helper_fldt(env, ptr, GETPC());
1100 ST(i) = tmp;
1101 ptr += 10;
1105 #if defined(CONFIG_USER_ONLY)
1106 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1108 helper_fsave(env, ptr, data32);
1111 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1113 helper_frstor(env, ptr, data32);
1115 #endif
1117 static void do_fxsave(CPUX86State *env, target_ulong ptr, int data64,
1118 uintptr_t retaddr)
1120 int fpus, fptag, i, nb_xmm_regs;
1121 floatx80 tmp;
1122 target_ulong addr;
1124 /* The operand must be 16 byte aligned */
1125 if (ptr & 0xf) {
1126 raise_exception_ra(env, EXCP0D_GPF, retaddr);
1129 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1130 fptag = 0;
1131 for (i = 0; i < 8; i++) {
1132 fptag |= (env->fptags[i] << i);
1134 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1135 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1136 cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, retaddr);
1137 #ifdef TARGET_X86_64
1138 if (data64) {
1139 cpu_stq_data_ra(env, ptr + 0x08, 0, retaddr); /* rip */
1140 cpu_stq_data_ra(env, ptr + 0x10, 0, retaddr); /* rdp */
1141 } else
1142 #endif
1144 cpu_stl_data_ra(env, ptr + 0x08, 0, retaddr); /* eip */
1145 cpu_stl_data_ra(env, ptr + 0x0c, 0, retaddr); /* sel */
1146 cpu_stl_data_ra(env, ptr + 0x10, 0, retaddr); /* dp */
1147 cpu_stl_data_ra(env, ptr + 0x14, 0, retaddr); /* sel */
1150 addr = ptr + 0x20;
1151 for (i = 0; i < 8; i++) {
1152 tmp = ST(i);
1153 helper_fstt(env, tmp, addr, retaddr);
1154 addr += 16;
1157 if (env->cr[4] & CR4_OSFXSR_MASK) {
1158 /* XXX: finish it */
1159 cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, retaddr); /* mxcsr */
1160 cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, retaddr); /* mxcsr_mask */
1161 if (env->hflags & HF_CS64_MASK) {
1162 nb_xmm_regs = 16;
1163 } else {
1164 nb_xmm_regs = 8;
1166 addr = ptr + 0xa0;
1167 /* Fast FXSAVE leaves out the XMM registers */
1168 if (!(env->efer & MSR_EFER_FFXSR)
1169 || (env->hflags & HF_CPL_MASK)
1170 || !(env->hflags & HF_LMA_MASK)) {
1171 for (i = 0; i < nb_xmm_regs; i++) {
1172 cpu_stq_data_ra(env, addr, env->xmm_regs[i].XMM_Q(0), retaddr);
1173 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].XMM_Q(1), retaddr);
1174 addr += 16;
1180 void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
1182 do_fxsave(env, ptr, data64, GETPC());
1185 static void do_fxrstor(CPUX86State *env, target_ulong ptr, int data64,
1186 uintptr_t retaddr)
1188 int i, fpus, fptag, nb_xmm_regs;
1189 floatx80 tmp;
1190 target_ulong addr;
1192 /* The operand must be 16 byte aligned */
1193 if (ptr & 0xf) {
1194 raise_exception_ra(env, EXCP0D_GPF, retaddr);
1197 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1198 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1199 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1200 env->fpstt = (fpus >> 11) & 7;
1201 env->fpus = fpus & ~0x3800;
1202 fptag ^= 0xff;
1203 for (i = 0; i < 8; i++) {
1204 env->fptags[i] = ((fptag >> i) & 1);
1207 addr = ptr + 0x20;
1208 for (i = 0; i < 8; i++) {
1209 tmp = helper_fldt(env, addr, retaddr);
1210 ST(i) = tmp;
1211 addr += 16;
1214 if (env->cr[4] & CR4_OSFXSR_MASK) {
1215 /* XXX: finish it */
1216 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, retaddr));
1217 /* cpu_ldl_data_ra(env, ptr + 0x1c, retaddr); */
1218 if (env->hflags & HF_CS64_MASK) {
1219 nb_xmm_regs = 16;
1220 } else {
1221 nb_xmm_regs = 8;
1223 addr = ptr + 0xa0;
1224 /* Fast FXRESTORE leaves out the XMM registers */
1225 if (!(env->efer & MSR_EFER_FFXSR)
1226 || (env->hflags & HF_CPL_MASK)
1227 || !(env->hflags & HF_LMA_MASK)) {
1228 for (i = 0; i < nb_xmm_regs; i++) {
1229 env->xmm_regs[i].XMM_Q(0) = cpu_ldq_data_ra(env, addr, retaddr);
1230 env->xmm_regs[i].XMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, retaddr);
1231 addr += 16;
1237 void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
1239 do_fxrstor(env, ptr, data64, GETPC());
1242 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1244 CPU_LDoubleU temp;
1246 temp.d = f;
1247 *pmant = temp.l.lower;
1248 *pexp = temp.l.upper;
1251 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1253 CPU_LDoubleU temp;
1255 temp.l.upper = upper;
1256 temp.l.lower = mant;
1257 return temp.d;
1260 /* MMX/SSE */
1261 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1263 #define SSE_DAZ 0x0040
1264 #define SSE_RC_MASK 0x6000
1265 #define SSE_RC_NEAR 0x0000
1266 #define SSE_RC_DOWN 0x2000
1267 #define SSE_RC_UP 0x4000
1268 #define SSE_RC_CHOP 0x6000
1269 #define SSE_FZ 0x8000
1271 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1273 int rnd_type;
1275 env->mxcsr = mxcsr;
1277 /* set rounding mode */
1278 switch (mxcsr & SSE_RC_MASK) {
1279 default:
1280 case SSE_RC_NEAR:
1281 rnd_type = float_round_nearest_even;
1282 break;
1283 case SSE_RC_DOWN:
1284 rnd_type = float_round_down;
1285 break;
1286 case SSE_RC_UP:
1287 rnd_type = float_round_up;
1288 break;
1289 case SSE_RC_CHOP:
1290 rnd_type = float_round_to_zero;
1291 break;
1293 set_float_rounding_mode(rnd_type, &env->sse_status);
1295 /* set denormals are zero */
1296 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1298 /* set flush to zero */
1299 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1302 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1304 env->fpuc = val;
1305 update_fp_status(env);
1308 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1310 cpu_set_mxcsr(env, val);
1313 void helper_enter_mmx(CPUX86State *env)
1315 env->fpstt = 0;
1316 *(uint32_t *)(env->fptags) = 0;
1317 *(uint32_t *)(env->fptags + 4) = 0;
1320 void helper_emms(CPUX86State *env)
1322 /* set to empty state */
1323 *(uint32_t *)(env->fptags) = 0x01010101;
1324 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1327 /* XXX: suppress */
1328 void helper_movq(CPUX86State *env, void *d, void *s)
1330 *(uint64_t *)d = *(uint64_t *)s;
1333 #define SHIFT 0
1334 #include "ops_sse.h"
1336 #define SHIFT 1
1337 #include "ops_sse.h"