target/i386: reimplement fyl2x using floatx80 operations
[qemu/ar7.git] / target / i386 / fpu_helper.c
blob62820bc735b2121e4a61b71610f501f6da2c4aa6
1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "exec/helper-proto.h"
24 #include "qemu/host-utils.h"
25 #include "exec/exec-all.h"
26 #include "exec/cpu_ldst.h"
27 #include "fpu/softfloat.h"
28 #include "fpu/softfloat-macros.h"
30 #ifdef CONFIG_SOFTMMU
31 #include "hw/irq.h"
32 #endif
34 #define FPU_RC_MASK 0xc00
35 #define FPU_RC_NEAR 0x000
36 #define FPU_RC_DOWN 0x400
37 #define FPU_RC_UP 0x800
38 #define FPU_RC_CHOP 0xc00
40 #define MAXTAN 9223372036854775808.0
42 /* the following deal with x86 long double-precision numbers */
43 #define MAXEXPD 0x7fff
44 #define EXPBIAS 16383
45 #define EXPD(fp) (fp.l.upper & 0x7fff)
46 #define SIGND(fp) ((fp.l.upper) & 0x8000)
47 #define MANTD(fp) (fp.l.lower)
48 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
50 #define FPUS_IE (1 << 0)
51 #define FPUS_DE (1 << 1)
52 #define FPUS_ZE (1 << 2)
53 #define FPUS_OE (1 << 3)
54 #define FPUS_UE (1 << 4)
55 #define FPUS_PE (1 << 5)
56 #define FPUS_SF (1 << 6)
57 #define FPUS_SE (1 << 7)
58 #define FPUS_B (1 << 15)
60 #define FPUC_EM 0x3f
62 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
63 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
64 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
65 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
66 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
67 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
68 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
69 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
71 #if !defined(CONFIG_USER_ONLY)
72 static qemu_irq ferr_irq;
74 void x86_register_ferr_irq(qemu_irq irq)
76 ferr_irq = irq;
79 static void cpu_clear_ignne(void)
81 CPUX86State *env = &X86_CPU(first_cpu)->env;
82 env->hflags2 &= ~HF2_IGNNE_MASK;
85 void cpu_set_ignne(void)
87 CPUX86State *env = &X86_CPU(first_cpu)->env;
88 env->hflags2 |= HF2_IGNNE_MASK;
90 * We get here in response to a write to port F0h. The chipset should
91 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
92 * cleared, because FERR# and FP_IRQ are two separate pins on real
93 * hardware. However, we don't model FERR# as a qemu_irq, so we just
94 * do directly what the chipset would do, i.e. deassert FP_IRQ.
96 qemu_irq_lower(ferr_irq);
98 #endif
101 static inline void fpush(CPUX86State *env)
103 env->fpstt = (env->fpstt - 1) & 7;
104 env->fptags[env->fpstt] = 0; /* validate stack entry */
107 static inline void fpop(CPUX86State *env)
109 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
110 env->fpstt = (env->fpstt + 1) & 7;
113 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
114 uintptr_t retaddr)
116 CPU_LDoubleU temp;
118 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
119 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
120 return temp.d;
123 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
124 uintptr_t retaddr)
126 CPU_LDoubleU temp;
128 temp.d = f;
129 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
130 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
133 /* x87 FPU helpers */
135 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
137 union {
138 float64 f64;
139 double d;
140 } u;
142 u.f64 = floatx80_to_float64(a, &env->fp_status);
143 return u.d;
146 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
148 union {
149 float64 f64;
150 double d;
151 } u;
153 u.d = a;
154 return float64_to_floatx80(u.f64, &env->fp_status);
157 static void fpu_set_exception(CPUX86State *env, int mask)
159 env->fpus |= mask;
160 if (env->fpus & (~env->fpuc & FPUC_EM)) {
161 env->fpus |= FPUS_SE | FPUS_B;
165 static inline uint8_t save_exception_flags(CPUX86State *env)
167 uint8_t old_flags = get_float_exception_flags(&env->fp_status);
168 set_float_exception_flags(0, &env->fp_status);
169 return old_flags;
172 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
174 uint8_t new_flags = get_float_exception_flags(&env->fp_status);
175 float_raise(old_flags, &env->fp_status);
176 fpu_set_exception(env,
177 ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
178 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
179 (new_flags & float_flag_overflow ? FPUS_OE : 0) |
180 (new_flags & float_flag_underflow ? FPUS_UE : 0) |
181 (new_flags & float_flag_inexact ? FPUS_PE : 0) |
182 (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
185 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
187 uint8_t old_flags = save_exception_flags(env);
188 floatx80 ret = floatx80_div(a, b, &env->fp_status);
189 merge_exception_flags(env, old_flags);
190 return ret;
193 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
195 if (env->cr[0] & CR0_NE_MASK) {
196 raise_exception_ra(env, EXCP10_COPR, retaddr);
198 #if !defined(CONFIG_USER_ONLY)
199 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
200 qemu_irq_raise(ferr_irq);
202 #endif
205 void helper_flds_FT0(CPUX86State *env, uint32_t val)
207 uint8_t old_flags = save_exception_flags(env);
208 union {
209 float32 f;
210 uint32_t i;
211 } u;
213 u.i = val;
214 FT0 = float32_to_floatx80(u.f, &env->fp_status);
215 merge_exception_flags(env, old_flags);
218 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
220 uint8_t old_flags = save_exception_flags(env);
221 union {
222 float64 f;
223 uint64_t i;
224 } u;
226 u.i = val;
227 FT0 = float64_to_floatx80(u.f, &env->fp_status);
228 merge_exception_flags(env, old_flags);
231 void helper_fildl_FT0(CPUX86State *env, int32_t val)
233 FT0 = int32_to_floatx80(val, &env->fp_status);
236 void helper_flds_ST0(CPUX86State *env, uint32_t val)
238 uint8_t old_flags = save_exception_flags(env);
239 int new_fpstt;
240 union {
241 float32 f;
242 uint32_t i;
243 } u;
245 new_fpstt = (env->fpstt - 1) & 7;
246 u.i = val;
247 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
248 env->fpstt = new_fpstt;
249 env->fptags[new_fpstt] = 0; /* validate stack entry */
250 merge_exception_flags(env, old_flags);
253 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
255 uint8_t old_flags = save_exception_flags(env);
256 int new_fpstt;
257 union {
258 float64 f;
259 uint64_t i;
260 } u;
262 new_fpstt = (env->fpstt - 1) & 7;
263 u.i = val;
264 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
265 env->fpstt = new_fpstt;
266 env->fptags[new_fpstt] = 0; /* validate stack entry */
267 merge_exception_flags(env, old_flags);
270 void helper_fildl_ST0(CPUX86State *env, int32_t val)
272 int new_fpstt;
274 new_fpstt = (env->fpstt - 1) & 7;
275 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
276 env->fpstt = new_fpstt;
277 env->fptags[new_fpstt] = 0; /* validate stack entry */
280 void helper_fildll_ST0(CPUX86State *env, int64_t val)
282 int new_fpstt;
284 new_fpstt = (env->fpstt - 1) & 7;
285 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
286 env->fpstt = new_fpstt;
287 env->fptags[new_fpstt] = 0; /* validate stack entry */
290 uint32_t helper_fsts_ST0(CPUX86State *env)
292 uint8_t old_flags = save_exception_flags(env);
293 union {
294 float32 f;
295 uint32_t i;
296 } u;
298 u.f = floatx80_to_float32(ST0, &env->fp_status);
299 merge_exception_flags(env, old_flags);
300 return u.i;
303 uint64_t helper_fstl_ST0(CPUX86State *env)
305 uint8_t old_flags = save_exception_flags(env);
306 union {
307 float64 f;
308 uint64_t i;
309 } u;
311 u.f = floatx80_to_float64(ST0, &env->fp_status);
312 merge_exception_flags(env, old_flags);
313 return u.i;
316 int32_t helper_fist_ST0(CPUX86State *env)
318 uint8_t old_flags = save_exception_flags(env);
319 int32_t val;
321 val = floatx80_to_int32(ST0, &env->fp_status);
322 if (val != (int16_t)val) {
323 set_float_exception_flags(float_flag_invalid, &env->fp_status);
324 val = -32768;
326 merge_exception_flags(env, old_flags);
327 return val;
330 int32_t helper_fistl_ST0(CPUX86State *env)
332 uint8_t old_flags = save_exception_flags(env);
333 int32_t val;
335 val = floatx80_to_int32(ST0, &env->fp_status);
336 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
337 val = 0x80000000;
339 merge_exception_flags(env, old_flags);
340 return val;
343 int64_t helper_fistll_ST0(CPUX86State *env)
345 uint8_t old_flags = save_exception_flags(env);
346 int64_t val;
348 val = floatx80_to_int64(ST0, &env->fp_status);
349 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
350 val = 0x8000000000000000ULL;
352 merge_exception_flags(env, old_flags);
353 return val;
356 int32_t helper_fistt_ST0(CPUX86State *env)
358 uint8_t old_flags = save_exception_flags(env);
359 int32_t val;
361 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
362 if (val != (int16_t)val) {
363 set_float_exception_flags(float_flag_invalid, &env->fp_status);
364 val = -32768;
366 merge_exception_flags(env, old_flags);
367 return val;
370 int32_t helper_fisttl_ST0(CPUX86State *env)
372 uint8_t old_flags = save_exception_flags(env);
373 int32_t val;
375 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
376 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
377 val = 0x80000000;
379 merge_exception_flags(env, old_flags);
380 return val;
383 int64_t helper_fisttll_ST0(CPUX86State *env)
385 uint8_t old_flags = save_exception_flags(env);
386 int64_t val;
388 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
389 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
390 val = 0x8000000000000000ULL;
392 merge_exception_flags(env, old_flags);
393 return val;
396 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
398 int new_fpstt;
400 new_fpstt = (env->fpstt - 1) & 7;
401 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
402 env->fpstt = new_fpstt;
403 env->fptags[new_fpstt] = 0; /* validate stack entry */
406 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
408 helper_fstt(env, ST0, ptr, GETPC());
411 void helper_fpush(CPUX86State *env)
413 fpush(env);
416 void helper_fpop(CPUX86State *env)
418 fpop(env);
421 void helper_fdecstp(CPUX86State *env)
423 env->fpstt = (env->fpstt - 1) & 7;
424 env->fpus &= ~0x4700;
427 void helper_fincstp(CPUX86State *env)
429 env->fpstt = (env->fpstt + 1) & 7;
430 env->fpus &= ~0x4700;
433 /* FPU move */
435 void helper_ffree_STN(CPUX86State *env, int st_index)
437 env->fptags[(env->fpstt + st_index) & 7] = 1;
440 void helper_fmov_ST0_FT0(CPUX86State *env)
442 ST0 = FT0;
445 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
447 FT0 = ST(st_index);
450 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
452 ST0 = ST(st_index);
455 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
457 ST(st_index) = ST0;
460 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
462 floatx80 tmp;
464 tmp = ST(st_index);
465 ST(st_index) = ST0;
466 ST0 = tmp;
469 /* FPU operations */
471 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
473 void helper_fcom_ST0_FT0(CPUX86State *env)
475 uint8_t old_flags = save_exception_flags(env);
476 FloatRelation ret;
478 ret = floatx80_compare(ST0, FT0, &env->fp_status);
479 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
480 merge_exception_flags(env, old_flags);
483 void helper_fucom_ST0_FT0(CPUX86State *env)
485 uint8_t old_flags = save_exception_flags(env);
486 FloatRelation ret;
488 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
489 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
490 merge_exception_flags(env, old_flags);
493 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
495 void helper_fcomi_ST0_FT0(CPUX86State *env)
497 uint8_t old_flags = save_exception_flags(env);
498 int eflags;
499 FloatRelation ret;
501 ret = floatx80_compare(ST0, FT0, &env->fp_status);
502 eflags = cpu_cc_compute_all(env, CC_OP);
503 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
504 CC_SRC = eflags;
505 merge_exception_flags(env, old_flags);
508 void helper_fucomi_ST0_FT0(CPUX86State *env)
510 uint8_t old_flags = save_exception_flags(env);
511 int eflags;
512 FloatRelation ret;
514 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
515 eflags = cpu_cc_compute_all(env, CC_OP);
516 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
517 CC_SRC = eflags;
518 merge_exception_flags(env, old_flags);
521 void helper_fadd_ST0_FT0(CPUX86State *env)
523 uint8_t old_flags = save_exception_flags(env);
524 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
525 merge_exception_flags(env, old_flags);
528 void helper_fmul_ST0_FT0(CPUX86State *env)
530 uint8_t old_flags = save_exception_flags(env);
531 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
532 merge_exception_flags(env, old_flags);
535 void helper_fsub_ST0_FT0(CPUX86State *env)
537 uint8_t old_flags = save_exception_flags(env);
538 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
539 merge_exception_flags(env, old_flags);
542 void helper_fsubr_ST0_FT0(CPUX86State *env)
544 uint8_t old_flags = save_exception_flags(env);
545 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
546 merge_exception_flags(env, old_flags);
549 void helper_fdiv_ST0_FT0(CPUX86State *env)
551 ST0 = helper_fdiv(env, ST0, FT0);
554 void helper_fdivr_ST0_FT0(CPUX86State *env)
556 ST0 = helper_fdiv(env, FT0, ST0);
559 /* fp operations between STN and ST0 */
561 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
563 uint8_t old_flags = save_exception_flags(env);
564 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
565 merge_exception_flags(env, old_flags);
568 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
570 uint8_t old_flags = save_exception_flags(env);
571 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
572 merge_exception_flags(env, old_flags);
575 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
577 uint8_t old_flags = save_exception_flags(env);
578 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
579 merge_exception_flags(env, old_flags);
582 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
584 uint8_t old_flags = save_exception_flags(env);
585 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
586 merge_exception_flags(env, old_flags);
589 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
591 floatx80 *p;
593 p = &ST(st_index);
594 *p = helper_fdiv(env, *p, ST0);
597 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
599 floatx80 *p;
601 p = &ST(st_index);
602 *p = helper_fdiv(env, ST0, *p);
605 /* misc FPU operations */
606 void helper_fchs_ST0(CPUX86State *env)
608 ST0 = floatx80_chs(ST0);
611 void helper_fabs_ST0(CPUX86State *env)
613 ST0 = floatx80_abs(ST0);
616 void helper_fld1_ST0(CPUX86State *env)
618 ST0 = floatx80_one;
621 void helper_fldl2t_ST0(CPUX86State *env)
623 switch (env->fpuc & FPU_RC_MASK) {
624 case FPU_RC_UP:
625 ST0 = floatx80_l2t_u;
626 break;
627 default:
628 ST0 = floatx80_l2t;
629 break;
633 void helper_fldl2e_ST0(CPUX86State *env)
635 switch (env->fpuc & FPU_RC_MASK) {
636 case FPU_RC_DOWN:
637 case FPU_RC_CHOP:
638 ST0 = floatx80_l2e_d;
639 break;
640 default:
641 ST0 = floatx80_l2e;
642 break;
646 void helper_fldpi_ST0(CPUX86State *env)
648 switch (env->fpuc & FPU_RC_MASK) {
649 case FPU_RC_DOWN:
650 case FPU_RC_CHOP:
651 ST0 = floatx80_pi_d;
652 break;
653 default:
654 ST0 = floatx80_pi;
655 break;
659 void helper_fldlg2_ST0(CPUX86State *env)
661 switch (env->fpuc & FPU_RC_MASK) {
662 case FPU_RC_DOWN:
663 case FPU_RC_CHOP:
664 ST0 = floatx80_lg2_d;
665 break;
666 default:
667 ST0 = floatx80_lg2;
668 break;
672 void helper_fldln2_ST0(CPUX86State *env)
674 switch (env->fpuc & FPU_RC_MASK) {
675 case FPU_RC_DOWN:
676 case FPU_RC_CHOP:
677 ST0 = floatx80_ln2_d;
678 break;
679 default:
680 ST0 = floatx80_ln2;
681 break;
685 void helper_fldz_ST0(CPUX86State *env)
687 ST0 = floatx80_zero;
690 void helper_fldz_FT0(CPUX86State *env)
692 FT0 = floatx80_zero;
695 uint32_t helper_fnstsw(CPUX86State *env)
697 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
700 uint32_t helper_fnstcw(CPUX86State *env)
702 return env->fpuc;
705 void update_fp_status(CPUX86State *env)
707 int rnd_type;
709 /* set rounding mode */
710 switch (env->fpuc & FPU_RC_MASK) {
711 default:
712 case FPU_RC_NEAR:
713 rnd_type = float_round_nearest_even;
714 break;
715 case FPU_RC_DOWN:
716 rnd_type = float_round_down;
717 break;
718 case FPU_RC_UP:
719 rnd_type = float_round_up;
720 break;
721 case FPU_RC_CHOP:
722 rnd_type = float_round_to_zero;
723 break;
725 set_float_rounding_mode(rnd_type, &env->fp_status);
726 switch ((env->fpuc >> 8) & 3) {
727 case 0:
728 rnd_type = 32;
729 break;
730 case 2:
731 rnd_type = 64;
732 break;
733 case 3:
734 default:
735 rnd_type = 80;
736 break;
738 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
741 void helper_fldcw(CPUX86State *env, uint32_t val)
743 cpu_set_fpuc(env, val);
746 void helper_fclex(CPUX86State *env)
748 env->fpus &= 0x7f00;
751 void helper_fwait(CPUX86State *env)
753 if (env->fpus & FPUS_SE) {
754 fpu_raise_exception(env, GETPC());
758 void helper_fninit(CPUX86State *env)
760 env->fpus = 0;
761 env->fpstt = 0;
762 cpu_set_fpuc(env, 0x37f);
763 env->fptags[0] = 1;
764 env->fptags[1] = 1;
765 env->fptags[2] = 1;
766 env->fptags[3] = 1;
767 env->fptags[4] = 1;
768 env->fptags[5] = 1;
769 env->fptags[6] = 1;
770 env->fptags[7] = 1;
773 /* BCD ops */
775 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
777 floatx80 tmp;
778 uint64_t val;
779 unsigned int v;
780 int i;
782 val = 0;
783 for (i = 8; i >= 0; i--) {
784 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
785 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
787 tmp = int64_to_floatx80(val, &env->fp_status);
788 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
789 tmp = floatx80_chs(tmp);
791 fpush(env);
792 ST0 = tmp;
795 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
797 uint8_t old_flags = save_exception_flags(env);
798 int v;
799 target_ulong mem_ref, mem_end;
800 int64_t val;
801 CPU_LDoubleU temp;
803 temp.d = ST0;
805 val = floatx80_to_int64(ST0, &env->fp_status);
806 mem_ref = ptr;
807 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
808 set_float_exception_flags(float_flag_invalid, &env->fp_status);
809 while (mem_ref < ptr + 7) {
810 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
812 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
813 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
814 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
815 merge_exception_flags(env, old_flags);
816 return;
818 mem_end = mem_ref + 9;
819 if (SIGND(temp)) {
820 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
821 val = -val;
822 } else {
823 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
825 while (mem_ref < mem_end) {
826 if (val == 0) {
827 break;
829 v = val % 100;
830 val = val / 100;
831 v = ((v / 10) << 4) | (v % 10);
832 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
834 while (mem_ref < mem_end) {
835 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
837 merge_exception_flags(env, old_flags);
840 /* 128-bit significand of log(2). */
841 #define ln2_sig_high 0xb17217f7d1cf79abULL
842 #define ln2_sig_low 0xc9e3b39803f2f6afULL
845 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
846 * the interval [-1/64, 1/64].
848 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
849 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
850 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
851 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
852 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
853 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
854 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
855 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
856 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
858 struct f2xm1_data {
860 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
861 * are very close to exact floatx80 values.
863 floatx80 t;
864 /* The value of 2^t. */
865 floatx80 exp2;
866 /* The value of 2^t - 1. */
867 floatx80 exp2m1;
870 static const struct f2xm1_data f2xm1_table[65] = {
871 { make_floatx80(0xbfff, 0x8000000000000000ULL),
872 make_floatx80(0x3ffe, 0x8000000000000000ULL),
873 make_floatx80(0xbffe, 0x8000000000000000ULL) },
874 { make_floatx80(0xbffe, 0xf800000000002e7eULL),
875 make_floatx80(0x3ffe, 0x82cd8698ac2b9160ULL),
876 make_floatx80(0xbffd, 0xfa64f2cea7a8dd40ULL) },
877 { make_floatx80(0xbffe, 0xefffffffffffe960ULL),
878 make_floatx80(0x3ffe, 0x85aac367cc488345ULL),
879 make_floatx80(0xbffd, 0xf4aa7930676ef976ULL) },
880 { make_floatx80(0xbffe, 0xe800000000006f10ULL),
881 make_floatx80(0x3ffe, 0x88980e8092da5c14ULL),
882 make_floatx80(0xbffd, 0xeecfe2feda4b47d8ULL) },
883 { make_floatx80(0xbffe, 0xe000000000008a45ULL),
884 make_floatx80(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
885 make_floatx80(0xbffd, 0xe8d47c382ae8bab6ULL) },
886 { make_floatx80(0xbffe, 0xd7ffffffffff8a9eULL),
887 make_floatx80(0x3ffe, 0x8ea4398b45cd8116ULL),
888 make_floatx80(0xbffd, 0xe2b78ce97464fdd4ULL) },
889 { make_floatx80(0xbffe, 0xd0000000000019a0ULL),
890 make_floatx80(0x3ffe, 0x91c3d373ab11b919ULL),
891 make_floatx80(0xbffd, 0xdc785918a9dc8dceULL) },
892 { make_floatx80(0xbffe, 0xc7ffffffffff14dfULL),
893 make_floatx80(0x3ffe, 0x94f4efa8fef76836ULL),
894 make_floatx80(0xbffd, 0xd61620ae02112f94ULL) },
895 { make_floatx80(0xbffe, 0xc000000000006530ULL),
896 make_floatx80(0x3ffe, 0x9837f0518db87fbbULL),
897 make_floatx80(0xbffd, 0xcf901f5ce48f008aULL) },
898 { make_floatx80(0xbffe, 0xb7ffffffffff1723ULL),
899 make_floatx80(0x3ffe, 0x9b8d39b9d54eb74cULL),
900 make_floatx80(0xbffd, 0xc8e58c8c55629168ULL) },
901 { make_floatx80(0xbffe, 0xb00000000000b5e1ULL),
902 make_floatx80(0x3ffe, 0x9ef5326091a0c366ULL),
903 make_floatx80(0xbffd, 0xc2159b3edcbe7934ULL) },
904 { make_floatx80(0xbffe, 0xa800000000006f8aULL),
905 make_floatx80(0x3ffe, 0xa27043030c49370aULL),
906 make_floatx80(0xbffd, 0xbb1f79f9e76d91ecULL) },
907 { make_floatx80(0xbffe, 0x9fffffffffff816aULL),
908 make_floatx80(0x3ffe, 0xa5fed6a9b15171cfULL),
909 make_floatx80(0xbffd, 0xb40252ac9d5d1c62ULL) },
910 { make_floatx80(0xbffe, 0x97ffffffffffb621ULL),
911 make_floatx80(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
912 make_floatx80(0xbffd, 0xacbd4a962b079e34ULL) },
913 { make_floatx80(0xbffe, 0x8fffffffffff162bULL),
914 make_floatx80(0x3ffe, 0xad583eea42a1b886ULL),
915 make_floatx80(0xbffd, 0xa54f822b7abc8ef4ULL) },
916 { make_floatx80(0xbffe, 0x87ffffffffff4d34ULL),
917 make_floatx80(0x3ffe, 0xb123f581d2ac7b51ULL),
918 make_floatx80(0xbffd, 0x9db814fc5aa7095eULL) },
919 { make_floatx80(0xbffe, 0x800000000000227dULL),
920 make_floatx80(0x3ffe, 0xb504f333f9de539dULL),
921 make_floatx80(0xbffd, 0x95f619980c4358c6ULL) },
922 { make_floatx80(0xbffd, 0xefffffffffff3978ULL),
923 make_floatx80(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
924 make_floatx80(0xbffd, 0x8e08a1713a085ebeULL) },
925 { make_floatx80(0xbffd, 0xe00000000000df81ULL),
926 make_floatx80(0x3ffe, 0xbd08a39f580bfd8cULL),
927 make_floatx80(0xbffd, 0x85eeb8c14fe804e8ULL) },
928 { make_floatx80(0xbffd, 0xd00000000000bccfULL),
929 make_floatx80(0x3ffe, 0xc12c4cca667062f6ULL),
930 make_floatx80(0xbffc, 0xfb4eccd6663e7428ULL) },
931 { make_floatx80(0xbffd, 0xc00000000000eff0ULL),
932 make_floatx80(0x3ffe, 0xc5672a1155069abeULL),
933 make_floatx80(0xbffc, 0xea6357baabe59508ULL) },
934 { make_floatx80(0xbffd, 0xb000000000000fe6ULL),
935 make_floatx80(0x3ffe, 0xc9b9bd866e2f234bULL),
936 make_floatx80(0xbffc, 0xd91909e6474372d4ULL) },
937 { make_floatx80(0xbffd, 0x9fffffffffff2172ULL),
938 make_floatx80(0x3ffe, 0xce248c151f84bf00ULL),
939 make_floatx80(0xbffc, 0xc76dcfab81ed0400ULL) },
940 { make_floatx80(0xbffd, 0x8fffffffffffafffULL),
941 make_floatx80(0x3ffe, 0xd2a81d91f12afb2bULL),
942 make_floatx80(0xbffc, 0xb55f89b83b541354ULL) },
943 { make_floatx80(0xbffc, 0xffffffffffff81a3ULL),
944 make_floatx80(0x3ffe, 0xd744fccad69d7d5eULL),
945 make_floatx80(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
946 { make_floatx80(0xbffc, 0xdfffffffffff1568ULL),
947 make_floatx80(0x3ffe, 0xdbfbb797daf25a44ULL),
948 make_floatx80(0xbffc, 0x901121a0943696f0ULL) },
949 { make_floatx80(0xbffc, 0xbfffffffffff68daULL),
950 make_floatx80(0x3ffe, 0xe0ccdeec2a94f811ULL),
951 make_floatx80(0xbffb, 0xf999089eab583f78ULL) },
952 { make_floatx80(0xbffc, 0x9fffffffffff4690ULL),
953 make_floatx80(0x3ffe, 0xe5b906e77c83657eULL),
954 make_floatx80(0xbffb, 0xd237c8c41be4d410ULL) },
955 { make_floatx80(0xbffb, 0xffffffffffff8aeeULL),
956 make_floatx80(0x3ffe, 0xeac0c6e7dd24427cULL),
957 make_floatx80(0xbffb, 0xa9f9c8c116ddec20ULL) },
958 { make_floatx80(0xbffb, 0xbfffffffffff2d18ULL),
959 make_floatx80(0x3ffe, 0xefe4b99bdcdb06ebULL),
960 make_floatx80(0xbffb, 0x80da33211927c8a8ULL) },
961 { make_floatx80(0xbffa, 0xffffffffffff8ccbULL),
962 make_floatx80(0x3ffe, 0xf5257d152486d0f4ULL),
963 make_floatx80(0xbffa, 0xada82eadb792f0c0ULL) },
964 { make_floatx80(0xbff9, 0xffffffffffff11feULL),
965 make_floatx80(0x3ffe, 0xfa83b2db722a0846ULL),
966 make_floatx80(0xbff9, 0xaf89a491babef740ULL) },
967 { floatx80_zero,
968 make_floatx80(0x3fff, 0x8000000000000000ULL),
969 floatx80_zero },
970 { make_floatx80(0x3ff9, 0xffffffffffff2680ULL),
971 make_floatx80(0x3fff, 0x82cd8698ac2b9f6fULL),
972 make_floatx80(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
973 { make_floatx80(0x3ffb, 0x800000000000b500ULL),
974 make_floatx80(0x3fff, 0x85aac367cc488345ULL),
975 make_floatx80(0x3ffa, 0xb5586cf9891068a0ULL) },
976 { make_floatx80(0x3ffb, 0xbfffffffffff4b67ULL),
977 make_floatx80(0x3fff, 0x88980e8092da7cceULL),
978 make_floatx80(0x3ffb, 0x8980e8092da7cce0ULL) },
979 { make_floatx80(0x3ffb, 0xffffffffffffff57ULL),
980 make_floatx80(0x3fff, 0x8b95c1e3ea8bd6dfULL),
981 make_floatx80(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
982 { make_floatx80(0x3ffc, 0x9fffffffffff811fULL),
983 make_floatx80(0x3fff, 0x8ea4398b45cd4780ULL),
984 make_floatx80(0x3ffb, 0xea4398b45cd47800ULL) },
985 { make_floatx80(0x3ffc, 0xbfffffffffff9980ULL),
986 make_floatx80(0x3fff, 0x91c3d373ab11b919ULL),
987 make_floatx80(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
988 { make_floatx80(0x3ffc, 0xdffffffffffff631ULL),
989 make_floatx80(0x3fff, 0x94f4efa8fef70864ULL),
990 make_floatx80(0x3ffc, 0xa7a77d47f7b84320ULL) },
991 { make_floatx80(0x3ffc, 0xffffffffffff2499ULL),
992 make_floatx80(0x3fff, 0x9837f0518db892d4ULL),
993 make_floatx80(0x3ffc, 0xc1bf828c6dc496a0ULL) },
994 { make_floatx80(0x3ffd, 0x8fffffffffff80fbULL),
995 make_floatx80(0x3fff, 0x9b8d39b9d54e3a79ULL),
996 make_floatx80(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
997 { make_floatx80(0x3ffd, 0x9fffffffffffbc23ULL),
998 make_floatx80(0x3fff, 0x9ef5326091a10313ULL),
999 make_floatx80(0x3ffc, 0xf7a993048d081898ULL) },
1000 { make_floatx80(0x3ffd, 0xafffffffffff20ecULL),
1001 make_floatx80(0x3fff, 0xa27043030c49370aULL),
1002 make_floatx80(0x3ffd, 0x89c10c0c3124dc28ULL) },
1003 { make_floatx80(0x3ffd, 0xc00000000000fd2cULL),
1004 make_floatx80(0x3fff, 0xa5fed6a9b15171cfULL),
1005 make_floatx80(0x3ffd, 0x97fb5aa6c545c73cULL) },
1006 { make_floatx80(0x3ffd, 0xd0000000000093beULL),
1007 make_floatx80(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1008 make_floatx80(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1009 { make_floatx80(0x3ffd, 0xe00000000000c2aeULL),
1010 make_floatx80(0x3fff, 0xad583eea42a17876ULL),
1011 make_floatx80(0x3ffd, 0xb560fba90a85e1d8ULL) },
1012 { make_floatx80(0x3ffd, 0xefffffffffff1e3fULL),
1013 make_floatx80(0x3fff, 0xb123f581d2abef6cULL),
1014 make_floatx80(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1015 { make_floatx80(0x3ffd, 0xffffffffffff1c23ULL),
1016 make_floatx80(0x3fff, 0xb504f333f9de2cadULL),
1017 make_floatx80(0x3ffd, 0xd413cccfe778b2b4ULL) },
1018 { make_floatx80(0x3ffe, 0x8800000000006344ULL),
1019 make_floatx80(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1020 make_floatx80(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1021 { make_floatx80(0x3ffe, 0x9000000000005d67ULL),
1022 make_floatx80(0x3fff, 0xbd08a39f580c668dULL),
1023 make_floatx80(0x3ffd, 0xf4228e7d60319a34ULL) },
1024 { make_floatx80(0x3ffe, 0x9800000000009127ULL),
1025 make_floatx80(0x3fff, 0xc12c4cca6670e042ULL),
1026 make_floatx80(0x3ffe, 0x82589994cce1c084ULL) },
1027 { make_floatx80(0x3ffe, 0x9fffffffffff06f9ULL),
1028 make_floatx80(0x3fff, 0xc5672a11550655c3ULL),
1029 make_floatx80(0x3ffe, 0x8ace5422aa0cab86ULL) },
1030 { make_floatx80(0x3ffe, 0xa7fffffffffff80dULL),
1031 make_floatx80(0x3fff, 0xc9b9bd866e2f234bULL),
1032 make_floatx80(0x3ffe, 0x93737b0cdc5e4696ULL) },
1033 { make_floatx80(0x3ffe, 0xafffffffffff1470ULL),
1034 make_floatx80(0x3fff, 0xce248c151f83fd69ULL),
1035 make_floatx80(0x3ffe, 0x9c49182a3f07fad2ULL) },
1036 { make_floatx80(0x3ffe, 0xb800000000000e0aULL),
1037 make_floatx80(0x3fff, 0xd2a81d91f12aec5cULL),
1038 make_floatx80(0x3ffe, 0xa5503b23e255d8b8ULL) },
1039 { make_floatx80(0x3ffe, 0xc00000000000b7faULL),
1040 make_floatx80(0x3fff, 0xd744fccad69dd630ULL),
1041 make_floatx80(0x3ffe, 0xae89f995ad3bac60ULL) },
1042 { make_floatx80(0x3ffe, 0xc800000000003aa6ULL),
1043 make_floatx80(0x3fff, 0xdbfbb797daf25a44ULL),
1044 make_floatx80(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1045 { make_floatx80(0x3ffe, 0xd00000000000a6aeULL),
1046 make_floatx80(0x3fff, 0xe0ccdeec2a954685ULL),
1047 make_floatx80(0x3ffe, 0xc199bdd8552a8d0aULL) },
1048 { make_floatx80(0x3ffe, 0xd800000000004165ULL),
1049 make_floatx80(0x3fff, 0xe5b906e77c837155ULL),
1050 make_floatx80(0x3ffe, 0xcb720dcef906e2aaULL) },
1051 { make_floatx80(0x3ffe, 0xe00000000000582cULL),
1052 make_floatx80(0x3fff, 0xeac0c6e7dd24713aULL),
1053 make_floatx80(0x3ffe, 0xd5818dcfba48e274ULL) },
1054 { make_floatx80(0x3ffe, 0xe800000000001a5dULL),
1055 make_floatx80(0x3fff, 0xefe4b99bdcdb06ebULL),
1056 make_floatx80(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1057 { make_floatx80(0x3ffe, 0xefffffffffffc1efULL),
1058 make_floatx80(0x3fff, 0xf5257d152486a2faULL),
1059 make_floatx80(0x3ffe, 0xea4afa2a490d45f4ULL) },
1060 { make_floatx80(0x3ffe, 0xf800000000001069ULL),
1061 make_floatx80(0x3fff, 0xfa83b2db722a0e5cULL),
1062 make_floatx80(0x3ffe, 0xf50765b6e4541cb8ULL) },
1063 { make_floatx80(0x3fff, 0x8000000000000000ULL),
1064 make_floatx80(0x4000, 0x8000000000000000ULL),
1065 make_floatx80(0x3fff, 0x8000000000000000ULL) },
1068 void helper_f2xm1(CPUX86State *env)
1070 uint8_t old_flags = save_exception_flags(env);
1071 uint64_t sig = extractFloatx80Frac(ST0);
1072 int32_t exp = extractFloatx80Exp(ST0);
1073 bool sign = extractFloatx80Sign(ST0);
1075 if (floatx80_invalid_encoding(ST0)) {
1076 float_raise(float_flag_invalid, &env->fp_status);
1077 ST0 = floatx80_default_nan(&env->fp_status);
1078 } else if (floatx80_is_any_nan(ST0)) {
1079 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1080 float_raise(float_flag_invalid, &env->fp_status);
1081 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1083 } else if (exp > 0x3fff ||
1084 (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1085 /* Out of range for the instruction, treat as invalid. */
1086 float_raise(float_flag_invalid, &env->fp_status);
1087 ST0 = floatx80_default_nan(&env->fp_status);
1088 } else if (exp == 0x3fff) {
1089 /* Argument 1 or -1, exact result 1 or -0.5. */
1090 if (sign) {
1091 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1093 } else if (exp < 0x3fb0) {
1094 if (!floatx80_is_zero(ST0)) {
1096 * Multiplying the argument by an extra-precision version
1097 * of log(2) is sufficiently precise. Zero arguments are
1098 * returned unchanged.
1100 uint64_t sig0, sig1, sig2;
1101 if (exp == 0) {
1102 normalizeFloatx80Subnormal(sig, &exp, &sig);
1104 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1105 &sig2);
1106 /* This result is inexact. */
1107 sig1 |= 1;
1108 ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1,
1109 &env->fp_status);
1111 } else {
1112 floatx80 tmp, y, accum;
1113 bool asign, bsign;
1114 int32_t n, aexp, bexp;
1115 uint64_t asig0, asig1, asig2, bsig0, bsig1;
1116 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1117 signed char save_prec = env->fp_status.floatx80_rounding_precision;
1118 env->fp_status.float_rounding_mode = float_round_nearest_even;
1119 env->fp_status.floatx80_rounding_precision = 80;
1121 /* Find the nearest multiple of 1/32 to the argument. */
1122 tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1123 n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1124 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1126 if (floatx80_is_zero(y)) {
1128 * Use the value of 2^t - 1 from the table, to avoid
1129 * needing to special-case zero as a result of
1130 * multiplication below.
1132 ST0 = f2xm1_table[n].t;
1133 set_float_exception_flags(float_flag_inexact, &env->fp_status);
1134 env->fp_status.float_rounding_mode = save_mode;
1135 } else {
1137 * Compute the lower parts of a polynomial expansion for
1138 * (2^y - 1) / y.
1140 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1141 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1142 accum = floatx80_mul(accum, y, &env->fp_status);
1143 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1144 accum = floatx80_mul(accum, y, &env->fp_status);
1145 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1146 accum = floatx80_mul(accum, y, &env->fp_status);
1147 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1148 accum = floatx80_mul(accum, y, &env->fp_status);
1149 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1150 accum = floatx80_mul(accum, y, &env->fp_status);
1151 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1152 accum = floatx80_mul(accum, y, &env->fp_status);
1153 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1156 * The full polynomial expansion is f2xm1_coeff_0 + accum
1157 * (where accum has much lower magnitude, and so, in
1158 * particular, carry out of the addition is not possible).
1159 * (This expansion is only accurate to about 70 bits, not
1160 * 128 bits.)
1162 aexp = extractFloatx80Exp(f2xm1_coeff_0);
1163 asign = extractFloatx80Sign(f2xm1_coeff_0);
1164 shift128RightJamming(extractFloatx80Frac(accum), 0,
1165 aexp - extractFloatx80Exp(accum),
1166 &asig0, &asig1);
1167 bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1168 bsig1 = 0;
1169 if (asign == extractFloatx80Sign(accum)) {
1170 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1171 } else {
1172 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1174 /* And thus compute an approximation to 2^y - 1. */
1175 mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1176 &asig0, &asig1, &asig2);
1177 aexp += extractFloatx80Exp(y) - 0x3ffe;
1178 asign ^= extractFloatx80Sign(y);
1179 if (n != 32) {
1181 * Multiply this by the precomputed value of 2^t and
1182 * add that of 2^t - 1.
1184 mul128By64To192(asig0, asig1,
1185 extractFloatx80Frac(f2xm1_table[n].exp2),
1186 &asig0, &asig1, &asig2);
1187 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1188 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1189 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1190 bsig1 = 0;
1191 if (bexp < aexp) {
1192 shift128RightJamming(bsig0, bsig1, aexp - bexp,
1193 &bsig0, &bsig1);
1194 } else if (aexp < bexp) {
1195 shift128RightJamming(asig0, asig1, bexp - aexp,
1196 &asig0, &asig1);
1197 aexp = bexp;
1199 /* The sign of 2^t - 1 is always that of the result. */
1200 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1201 if (asign == bsign) {
1202 /* Avoid possible carry out of the addition. */
1203 shift128RightJamming(asig0, asig1, 1,
1204 &asig0, &asig1);
1205 shift128RightJamming(bsig0, bsig1, 1,
1206 &bsig0, &bsig1);
1207 ++aexp;
1208 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1209 } else {
1210 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1211 asign = bsign;
1214 env->fp_status.float_rounding_mode = save_mode;
1215 /* This result is inexact. */
1216 asig1 |= 1;
1217 ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1,
1218 &env->fp_status);
1221 env->fp_status.floatx80_rounding_precision = save_prec;
1223 merge_exception_flags(env, old_flags);
1226 void helper_fptan(CPUX86State *env)
1228 double fptemp = floatx80_to_double(env, ST0);
1230 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1231 env->fpus |= 0x400;
1232 } else {
1233 fptemp = tan(fptemp);
1234 ST0 = double_to_floatx80(env, fptemp);
1235 fpush(env);
1236 ST0 = floatx80_one;
1237 env->fpus &= ~0x400; /* C2 <-- 0 */
1238 /* the above code is for |arg| < 2**52 only */
1242 void helper_fpatan(CPUX86State *env)
1244 double fptemp, fpsrcop;
1246 fpsrcop = floatx80_to_double(env, ST1);
1247 fptemp = floatx80_to_double(env, ST0);
1248 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
1249 fpop(env);
1252 void helper_fxtract(CPUX86State *env)
1254 uint8_t old_flags = save_exception_flags(env);
1255 CPU_LDoubleU temp;
1257 temp.d = ST0;
1259 if (floatx80_is_zero(ST0)) {
1260 /* Easy way to generate -inf and raising division by 0 exception */
1261 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1262 &env->fp_status);
1263 fpush(env);
1264 ST0 = temp.d;
1265 } else if (floatx80_invalid_encoding(ST0)) {
1266 float_raise(float_flag_invalid, &env->fp_status);
1267 ST0 = floatx80_default_nan(&env->fp_status);
1268 fpush(env);
1269 ST0 = ST1;
1270 } else if (floatx80_is_any_nan(ST0)) {
1271 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1272 float_raise(float_flag_invalid, &env->fp_status);
1273 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1275 fpush(env);
1276 ST0 = ST1;
1277 } else if (floatx80_is_infinity(ST0)) {
1278 fpush(env);
1279 ST0 = ST1;
1280 ST1 = floatx80_infinity;
1281 } else {
1282 int expdif;
1284 if (EXPD(temp) == 0) {
1285 int shift = clz64(temp.l.lower);
1286 temp.l.lower <<= shift;
1287 expdif = 1 - EXPBIAS - shift;
1288 float_raise(float_flag_input_denormal, &env->fp_status);
1289 } else {
1290 expdif = EXPD(temp) - EXPBIAS;
1292 /* DP exponent bias */
1293 ST0 = int32_to_floatx80(expdif, &env->fp_status);
1294 fpush(env);
1295 BIASEXPONENT(temp);
1296 ST0 = temp.d;
1298 merge_exception_flags(env, old_flags);
1301 static void helper_fprem_common(CPUX86State *env, bool mod)
1303 uint8_t old_flags = save_exception_flags(env);
1304 uint64_t quotient;
1305 CPU_LDoubleU temp0, temp1;
1306 int exp0, exp1, expdiff;
1308 temp0.d = ST0;
1309 temp1.d = ST1;
1310 exp0 = EXPD(temp0);
1311 exp1 = EXPD(temp1);
1313 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1314 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1315 exp0 == 0x7fff || exp1 == 0x7fff ||
1316 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1317 ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1318 } else {
1319 if (exp0 == 0) {
1320 exp0 = 1 - clz64(temp0.l.lower);
1322 if (exp1 == 0) {
1323 exp1 = 1 - clz64(temp1.l.lower);
1325 expdiff = exp0 - exp1;
1326 if (expdiff < 64) {
1327 ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1328 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */
1329 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1330 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */
1331 } else {
1333 * Partial remainder. This choice of how many bits to
1334 * process at once is specified in AMD instruction set
1335 * manuals, and empirically is followed by Intel
1336 * processors as well; it ensures that the final remainder
1337 * operation in a loop does produce the correct low three
1338 * bits of the quotient. AMD manuals specify that the
1339 * flags other than C2 are cleared, and empirically Intel
1340 * processors clear them as well.
1342 int n = 32 + (expdiff % 32);
1343 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1344 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1345 env->fpus |= 0x400; /* C2 <-- 1 */
1348 merge_exception_flags(env, old_flags);
1351 void helper_fprem1(CPUX86State *env)
1353 helper_fprem_common(env, false);
1356 void helper_fprem(CPUX86State *env)
1358 helper_fprem_common(env, true);
1361 /* 128-bit significand of log2(e). */
1362 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1363 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1366 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1367 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1368 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1369 * interval [sqrt(2)/2, sqrt(2)].
1371 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1372 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1373 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1374 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1375 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1376 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1377 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1378 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1379 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1380 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1381 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1384 * Compute an approximation of log2(1+arg), where 1+arg is in the
1385 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1386 * function is called, rounding precision is set to 80 and the
1387 * round-to-nearest mode is in effect. arg must not be exactly zero,
1388 * and must not be so close to zero that underflow might occur.
1390 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1391 uint64_t *sig0, uint64_t *sig1)
1393 uint64_t arg0_sig = extractFloatx80Frac(arg);
1394 int32_t arg0_exp = extractFloatx80Exp(arg);
1395 bool arg0_sign = extractFloatx80Sign(arg);
1396 bool asign;
1397 int32_t dexp, texp, aexp;
1398 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1399 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1400 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1401 floatx80 t2, accum;
1404 * Compute an approximation of arg/(2+arg), with extra precision,
1405 * as the argument to a polynomial approximation. The extra
1406 * precision is only needed for the first term of the
1407 * approximation, with subsequent terms being significantly
1408 * smaller; the approximation only uses odd exponents, and the
1409 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1411 if (arg0_sign) {
1412 dexp = 0x3fff;
1413 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1414 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1415 } else {
1416 dexp = 0x4000;
1417 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1418 dsig0 |= 0x8000000000000000ULL;
1420 texp = arg0_exp - dexp + 0x3ffe;
1421 rsig0 = arg0_sig;
1422 rsig1 = 0;
1423 rsig2 = 0;
1424 if (dsig0 <= rsig0) {
1425 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1426 ++texp;
1428 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1429 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1430 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1431 &rsig0, &rsig1, &rsig2);
1432 while ((int64_t) rsig0 < 0) {
1433 --tsig0;
1434 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1435 &rsig0, &rsig1, &rsig2);
1437 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1439 * No need to correct any estimation error in tsig1; even with
1440 * such error, it is accurate enough. Now compute the square of
1441 * that approximation.
1443 mul128To256(tsig0, tsig1, tsig0, tsig1,
1444 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1445 t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
1446 t2sig0, t2sig1, &env->fp_status);
1448 /* Compute the lower parts of the polynomial expansion. */
1449 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1450 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1451 accum = floatx80_mul(accum, t2, &env->fp_status);
1452 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1453 accum = floatx80_mul(accum, t2, &env->fp_status);
1454 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1455 accum = floatx80_mul(accum, t2, &env->fp_status);
1456 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1457 accum = floatx80_mul(accum, t2, &env->fp_status);
1458 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1459 accum = floatx80_mul(accum, t2, &env->fp_status);
1460 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1461 accum = floatx80_mul(accum, t2, &env->fp_status);
1462 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1463 accum = floatx80_mul(accum, t2, &env->fp_status);
1464 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1465 accum = floatx80_mul(accum, t2, &env->fp_status);
1466 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
1469 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1470 * accum has much lower magnitude, and so, in particular, carry
1471 * out of the addition is not possible), multiplied by t. (This
1472 * expansion is only accurate to about 70 bits, not 128 bits.)
1474 aexp = extractFloatx80Exp(fyl2x_coeff_0);
1475 asign = extractFloatx80Sign(fyl2x_coeff_0);
1476 shift128RightJamming(extractFloatx80Frac(accum), 0,
1477 aexp - extractFloatx80Exp(accum),
1478 &asig0, &asig1);
1479 bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
1480 bsig1 = 0;
1481 if (asign == extractFloatx80Sign(accum)) {
1482 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1483 } else {
1484 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1486 /* Multiply by t to compute the required result. */
1487 mul128To256(asig0, asig1, tsig0, tsig1,
1488 &asig0, &asig1, &asig2, &asig3);
1489 aexp += texp - 0x3ffe;
1490 *exp = aexp;
1491 *sig0 = asig0;
1492 *sig1 = asig1;
1495 void helper_fyl2xp1(CPUX86State *env)
1497 uint8_t old_flags = save_exception_flags(env);
1498 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1499 int32_t arg0_exp = extractFloatx80Exp(ST0);
1500 bool arg0_sign = extractFloatx80Sign(ST0);
1501 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1502 int32_t arg1_exp = extractFloatx80Exp(ST1);
1503 bool arg1_sign = extractFloatx80Sign(ST1);
1505 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1506 float_raise(float_flag_invalid, &env->fp_status);
1507 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1508 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1509 float_raise(float_flag_invalid, &env->fp_status);
1510 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1511 } else if (floatx80_invalid_encoding(ST0) ||
1512 floatx80_invalid_encoding(ST1)) {
1513 float_raise(float_flag_invalid, &env->fp_status);
1514 ST1 = floatx80_default_nan(&env->fp_status);
1515 } else if (floatx80_is_any_nan(ST0)) {
1516 ST1 = ST0;
1517 } else if (floatx80_is_any_nan(ST1)) {
1518 /* Pass this NaN through. */
1519 } else if (arg0_exp > 0x3ffd ||
1520 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
1521 0x95f619980c4336f7ULL :
1522 0xd413cccfe7799211ULL))) {
1524 * Out of range for the instruction (ST0 must have absolute
1525 * value less than 1 - sqrt(2)/2 = 0.292..., according to
1526 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
1527 * to sqrt(2) - 1, which we allow here), treat as invalid.
1529 float_raise(float_flag_invalid, &env->fp_status);
1530 ST1 = floatx80_default_nan(&env->fp_status);
1531 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1532 arg1_exp == 0x7fff) {
1534 * One argument is zero, or multiplying by infinity; correct
1535 * result is exact and can be obtained by multiplying the
1536 * arguments.
1538 ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
1539 } else if (arg0_exp < 0x3fb0) {
1541 * Multiplying both arguments and an extra-precision version
1542 * of log2(e) is sufficiently precise.
1544 uint64_t sig0, sig1, sig2;
1545 int32_t exp;
1546 if (arg0_exp == 0) {
1547 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1549 if (arg1_exp == 0) {
1550 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1552 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
1553 &sig0, &sig1, &sig2);
1554 exp = arg0_exp + 1;
1555 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
1556 exp += arg1_exp - 0x3ffe;
1557 /* This result is inexact. */
1558 sig1 |= 1;
1559 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp,
1560 sig0, sig1, &env->fp_status);
1561 } else {
1562 int32_t aexp;
1563 uint64_t asig0, asig1, asig2;
1564 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1565 signed char save_prec = env->fp_status.floatx80_rounding_precision;
1566 env->fp_status.float_rounding_mode = float_round_nearest_even;
1567 env->fp_status.floatx80_rounding_precision = 80;
1569 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
1571 * Multiply by the second argument to compute the required
1572 * result.
1574 if (arg1_exp == 0) {
1575 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1577 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
1578 aexp += arg1_exp - 0x3ffe;
1579 /* This result is inexact. */
1580 asig1 |= 1;
1581 env->fp_status.float_rounding_mode = save_mode;
1582 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp,
1583 asig0, asig1, &env->fp_status);
1584 env->fp_status.floatx80_rounding_precision = save_prec;
1586 fpop(env);
1587 merge_exception_flags(env, old_flags);
1590 void helper_fyl2x(CPUX86State *env)
1592 uint8_t old_flags = save_exception_flags(env);
1593 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1594 int32_t arg0_exp = extractFloatx80Exp(ST0);
1595 bool arg0_sign = extractFloatx80Sign(ST0);
1596 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1597 int32_t arg1_exp = extractFloatx80Exp(ST1);
1598 bool arg1_sign = extractFloatx80Sign(ST1);
1600 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1601 float_raise(float_flag_invalid, &env->fp_status);
1602 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1603 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1604 float_raise(float_flag_invalid, &env->fp_status);
1605 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1606 } else if (floatx80_invalid_encoding(ST0) ||
1607 floatx80_invalid_encoding(ST1)) {
1608 float_raise(float_flag_invalid, &env->fp_status);
1609 ST1 = floatx80_default_nan(&env->fp_status);
1610 } else if (floatx80_is_any_nan(ST0)) {
1611 ST1 = ST0;
1612 } else if (floatx80_is_any_nan(ST1)) {
1613 /* Pass this NaN through. */
1614 } else if (arg0_sign && !floatx80_is_zero(ST0)) {
1615 float_raise(float_flag_invalid, &env->fp_status);
1616 ST1 = floatx80_default_nan(&env->fp_status);
1617 } else if (floatx80_is_infinity(ST1)) {
1618 FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
1619 &env->fp_status);
1620 switch (cmp) {
1621 case float_relation_less:
1622 ST1 = floatx80_chs(ST1);
1623 break;
1624 case float_relation_greater:
1625 /* Result is infinity of the same sign as ST1. */
1626 break;
1627 default:
1628 float_raise(float_flag_invalid, &env->fp_status);
1629 ST1 = floatx80_default_nan(&env->fp_status);
1630 break;
1632 } else if (floatx80_is_infinity(ST0)) {
1633 if (floatx80_is_zero(ST1)) {
1634 float_raise(float_flag_invalid, &env->fp_status);
1635 ST1 = floatx80_default_nan(&env->fp_status);
1636 } else if (arg1_sign) {
1637 ST1 = floatx80_chs(ST0);
1638 } else {
1639 ST1 = ST0;
1641 } else if (floatx80_is_zero(ST0)) {
1642 if (floatx80_is_zero(ST1)) {
1643 float_raise(float_flag_invalid, &env->fp_status);
1644 ST1 = floatx80_default_nan(&env->fp_status);
1645 } else {
1646 /* Result is infinity with opposite sign to ST1. */
1647 float_raise(float_flag_divbyzero, &env->fp_status);
1648 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
1649 0x8000000000000000ULL);
1651 } else if (floatx80_is_zero(ST1)) {
1652 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
1653 ST1 = floatx80_chs(ST1);
1655 /* Otherwise, ST1 is already the correct result. */
1656 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
1657 if (arg1_sign) {
1658 ST1 = floatx80_chs(floatx80_zero);
1659 } else {
1660 ST1 = floatx80_zero;
1662 } else {
1663 int32_t int_exp;
1664 floatx80 arg0_m1;
1665 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1666 signed char save_prec = env->fp_status.floatx80_rounding_precision;
1667 env->fp_status.float_rounding_mode = float_round_nearest_even;
1668 env->fp_status.floatx80_rounding_precision = 80;
1670 if (arg0_exp == 0) {
1671 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1673 if (arg1_exp == 0) {
1674 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1676 int_exp = arg0_exp - 0x3fff;
1677 if (arg0_sig > 0xb504f333f9de6484ULL) {
1678 ++int_exp;
1680 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
1681 &env->fp_status),
1682 floatx80_one, &env->fp_status);
1683 if (floatx80_is_zero(arg0_m1)) {
1684 /* Exact power of 2; multiply by ST1. */
1685 env->fp_status.float_rounding_mode = save_mode;
1686 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
1687 ST1, &env->fp_status);
1688 } else {
1689 bool asign = extractFloatx80Sign(arg0_m1);
1690 int32_t aexp;
1691 uint64_t asig0, asig1, asig2;
1692 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
1693 if (int_exp != 0) {
1694 bool isign = (int_exp < 0);
1695 int32_t iexp;
1696 uint64_t isig;
1697 int shift;
1698 int_exp = isign ? -int_exp : int_exp;
1699 shift = clz32(int_exp) + 32;
1700 isig = int_exp;
1701 isig <<= shift;
1702 iexp = 0x403e - shift;
1703 shift128RightJamming(asig0, asig1, iexp - aexp,
1704 &asig0, &asig1);
1705 if (asign == isign) {
1706 add128(isig, 0, asig0, asig1, &asig0, &asig1);
1707 } else {
1708 sub128(isig, 0, asig0, asig1, &asig0, &asig1);
1710 aexp = iexp;
1711 asign = isign;
1714 * Multiply by the second argument to compute the required
1715 * result.
1717 if (arg1_exp == 0) {
1718 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1720 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
1721 aexp += arg1_exp - 0x3ffe;
1722 /* This result is inexact. */
1723 asig1 |= 1;
1724 env->fp_status.float_rounding_mode = save_mode;
1725 ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp,
1726 asig0, asig1, &env->fp_status);
1729 env->fp_status.floatx80_rounding_precision = save_prec;
1731 fpop(env);
1732 merge_exception_flags(env, old_flags);
1735 void helper_fsqrt(CPUX86State *env)
1737 uint8_t old_flags = save_exception_flags(env);
1738 if (floatx80_is_neg(ST0)) {
1739 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1740 env->fpus |= 0x400;
1742 ST0 = floatx80_sqrt(ST0, &env->fp_status);
1743 merge_exception_flags(env, old_flags);
1746 void helper_fsincos(CPUX86State *env)
1748 double fptemp = floatx80_to_double(env, ST0);
1750 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1751 env->fpus |= 0x400;
1752 } else {
1753 ST0 = double_to_floatx80(env, sin(fptemp));
1754 fpush(env);
1755 ST0 = double_to_floatx80(env, cos(fptemp));
1756 env->fpus &= ~0x400; /* C2 <-- 0 */
1757 /* the above code is for |arg| < 2**63 only */
1761 void helper_frndint(CPUX86State *env)
1763 uint8_t old_flags = save_exception_flags(env);
1764 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
1765 merge_exception_flags(env, old_flags);
1768 void helper_fscale(CPUX86State *env)
1770 uint8_t old_flags = save_exception_flags(env);
1771 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
1772 float_raise(float_flag_invalid, &env->fp_status);
1773 ST0 = floatx80_default_nan(&env->fp_status);
1774 } else if (floatx80_is_any_nan(ST1)) {
1775 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1776 float_raise(float_flag_invalid, &env->fp_status);
1778 ST0 = ST1;
1779 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1780 float_raise(float_flag_invalid, &env->fp_status);
1781 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1783 } else if (floatx80_is_infinity(ST1) &&
1784 !floatx80_invalid_encoding(ST0) &&
1785 !floatx80_is_any_nan(ST0)) {
1786 if (floatx80_is_neg(ST1)) {
1787 if (floatx80_is_infinity(ST0)) {
1788 float_raise(float_flag_invalid, &env->fp_status);
1789 ST0 = floatx80_default_nan(&env->fp_status);
1790 } else {
1791 ST0 = (floatx80_is_neg(ST0) ?
1792 floatx80_chs(floatx80_zero) :
1793 floatx80_zero);
1795 } else {
1796 if (floatx80_is_zero(ST0)) {
1797 float_raise(float_flag_invalid, &env->fp_status);
1798 ST0 = floatx80_default_nan(&env->fp_status);
1799 } else {
1800 ST0 = (floatx80_is_neg(ST0) ?
1801 floatx80_chs(floatx80_infinity) :
1802 floatx80_infinity);
1805 } else {
1806 int n;
1807 signed char save = env->fp_status.floatx80_rounding_precision;
1808 uint8_t save_flags = get_float_exception_flags(&env->fp_status);
1809 set_float_exception_flags(0, &env->fp_status);
1810 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
1811 set_float_exception_flags(save_flags, &env->fp_status);
1812 env->fp_status.floatx80_rounding_precision = 80;
1813 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
1814 env->fp_status.floatx80_rounding_precision = save;
1816 merge_exception_flags(env, old_flags);
1819 void helper_fsin(CPUX86State *env)
1821 double fptemp = floatx80_to_double(env, ST0);
1823 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1824 env->fpus |= 0x400;
1825 } else {
1826 ST0 = double_to_floatx80(env, sin(fptemp));
1827 env->fpus &= ~0x400; /* C2 <-- 0 */
1828 /* the above code is for |arg| < 2**53 only */
1832 void helper_fcos(CPUX86State *env)
1834 double fptemp = floatx80_to_double(env, ST0);
1836 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1837 env->fpus |= 0x400;
1838 } else {
1839 ST0 = double_to_floatx80(env, cos(fptemp));
1840 env->fpus &= ~0x400; /* C2 <-- 0 */
1841 /* the above code is for |arg| < 2**63 only */
1845 void helper_fxam_ST0(CPUX86State *env)
1847 CPU_LDoubleU temp;
1848 int expdif;
1850 temp.d = ST0;
1852 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1853 if (SIGND(temp)) {
1854 env->fpus |= 0x200; /* C1 <-- 1 */
1857 if (env->fptags[env->fpstt]) {
1858 env->fpus |= 0x4100; /* Empty */
1859 return;
1862 expdif = EXPD(temp);
1863 if (expdif == MAXEXPD) {
1864 if (MANTD(temp) == 0x8000000000000000ULL) {
1865 env->fpus |= 0x500; /* Infinity */
1866 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1867 env->fpus |= 0x100; /* NaN */
1869 } else if (expdif == 0) {
1870 if (MANTD(temp) == 0) {
1871 env->fpus |= 0x4000; /* Zero */
1872 } else {
1873 env->fpus |= 0x4400; /* Denormal */
1875 } else if (MANTD(temp) & 0x8000000000000000ULL) {
1876 env->fpus |= 0x400;
1880 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1881 uintptr_t retaddr)
1883 int fpus, fptag, exp, i;
1884 uint64_t mant;
1885 CPU_LDoubleU tmp;
1887 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1888 fptag = 0;
1889 for (i = 7; i >= 0; i--) {
1890 fptag <<= 2;
1891 if (env->fptags[i]) {
1892 fptag |= 3;
1893 } else {
1894 tmp.d = env->fpregs[i].d;
1895 exp = EXPD(tmp);
1896 mant = MANTD(tmp);
1897 if (exp == 0 && mant == 0) {
1898 /* zero */
1899 fptag |= 1;
1900 } else if (exp == 0 || exp == MAXEXPD
1901 || (mant & (1LL << 63)) == 0) {
1902 /* NaNs, infinity, denormal */
1903 fptag |= 2;
1907 if (data32) {
1908 /* 32 bit */
1909 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1910 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1911 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1912 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1913 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1914 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1915 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1916 } else {
1917 /* 16 bit */
1918 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1919 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1920 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1921 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1922 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1923 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1924 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1928 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1930 do_fstenv(env, ptr, data32, GETPC());
1933 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1935 env->fpstt = (fpus >> 11) & 7;
1936 env->fpus = fpus & ~0x3800 & ~FPUS_B;
1937 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1938 #if !defined(CONFIG_USER_ONLY)
1939 if (!(env->fpus & FPUS_SE)) {
1941 * Here the processor deasserts FERR#; in response, the chipset deasserts
1942 * IGNNE#.
1944 cpu_clear_ignne();
1946 #endif
1949 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1950 uintptr_t retaddr)
1952 int i, fpus, fptag;
1954 if (data32) {
1955 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1956 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1957 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1958 } else {
1959 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1960 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1961 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1963 cpu_set_fpus(env, fpus);
1964 for (i = 0; i < 8; i++) {
1965 env->fptags[i] = ((fptag & 3) == 3);
1966 fptag >>= 2;
1970 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1972 do_fldenv(env, ptr, data32, GETPC());
1975 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1977 floatx80 tmp;
1978 int i;
1980 do_fstenv(env, ptr, data32, GETPC());
1982 ptr += (14 << data32);
1983 for (i = 0; i < 8; i++) {
1984 tmp = ST(i);
1985 helper_fstt(env, tmp, ptr, GETPC());
1986 ptr += 10;
1989 /* fninit */
1990 env->fpus = 0;
1991 env->fpstt = 0;
1992 cpu_set_fpuc(env, 0x37f);
1993 env->fptags[0] = 1;
1994 env->fptags[1] = 1;
1995 env->fptags[2] = 1;
1996 env->fptags[3] = 1;
1997 env->fptags[4] = 1;
1998 env->fptags[5] = 1;
1999 env->fptags[6] = 1;
2000 env->fptags[7] = 1;
2003 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2005 floatx80 tmp;
2006 int i;
2008 do_fldenv(env, ptr, data32, GETPC());
2009 ptr += (14 << data32);
2011 for (i = 0; i < 8; i++) {
2012 tmp = helper_fldt(env, ptr, GETPC());
2013 ST(i) = tmp;
2014 ptr += 10;
2018 #if defined(CONFIG_USER_ONLY)
2019 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
2021 helper_fsave(env, ptr, data32);
2024 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
2026 helper_frstor(env, ptr, data32);
2028 #endif
2030 #define XO(X) offsetof(X86XSaveArea, X)
2032 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2034 int fpus, fptag, i;
2035 target_ulong addr;
2037 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2038 fptag = 0;
2039 for (i = 0; i < 8; i++) {
2040 fptag |= (env->fptags[i] << i);
2043 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
2044 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
2045 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
2047 /* In 32-bit mode this is eip, sel, dp, sel.
2048 In 64-bit mode this is rip, rdp.
2049 But in either case we don't write actual data, just zeros. */
2050 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
2051 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
2053 addr = ptr + XO(legacy.fpregs);
2054 for (i = 0; i < 8; i++) {
2055 floatx80 tmp = ST(i);
2056 helper_fstt(env, tmp, addr, ra);
2057 addr += 16;
2061 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2063 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
2064 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
2067 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2069 int i, nb_xmm_regs;
2070 target_ulong addr;
2072 if (env->hflags & HF_CS64_MASK) {
2073 nb_xmm_regs = 16;
2074 } else {
2075 nb_xmm_regs = 8;
2078 addr = ptr + XO(legacy.xmm_regs);
2079 for (i = 0; i < nb_xmm_regs; i++) {
2080 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
2081 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
2082 addr += 16;
2086 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2088 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2089 int i;
2091 for (i = 0; i < 4; i++, addr += 16) {
2092 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
2093 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
2097 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2099 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2100 env->bndcs_regs.cfgu, ra);
2101 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2102 env->bndcs_regs.sts, ra);
2105 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2107 cpu_stq_data_ra(env, ptr, env->pkru, ra);
2110 void helper_fxsave(CPUX86State *env, target_ulong ptr)
2112 uintptr_t ra = GETPC();
2114 /* The operand must be 16 byte aligned */
2115 if (ptr & 0xf) {
2116 raise_exception_ra(env, EXCP0D_GPF, ra);
2119 do_xsave_fpu(env, ptr, ra);
2121 if (env->cr[4] & CR4_OSFXSR_MASK) {
2122 do_xsave_mxcsr(env, ptr, ra);
2123 /* Fast FXSAVE leaves out the XMM registers */
2124 if (!(env->efer & MSR_EFER_FFXSR)
2125 || (env->hflags & HF_CPL_MASK)
2126 || !(env->hflags & HF_LMA_MASK)) {
2127 do_xsave_sse(env, ptr, ra);
2132 static uint64_t get_xinuse(CPUX86State *env)
2134 uint64_t inuse = -1;
2136 /* For the most part, we don't track XINUSE. We could calculate it
2137 here for all components, but it's probably less work to simply
2138 indicate in use. That said, the state of BNDREGS is important
2139 enough to track in HFLAGS, so we might as well use that here. */
2140 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2141 inuse &= ~XSTATE_BNDREGS_MASK;
2143 return inuse;
2146 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2147 uint64_t inuse, uint64_t opt, uintptr_t ra)
2149 uint64_t old_bv, new_bv;
2151 /* The OS must have enabled XSAVE. */
2152 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2153 raise_exception_ra(env, EXCP06_ILLOP, ra);
2156 /* The operand must be 64 byte aligned. */
2157 if (ptr & 63) {
2158 raise_exception_ra(env, EXCP0D_GPF, ra);
2161 /* Never save anything not enabled by XCR0. */
2162 rfbm &= env->xcr0;
2163 opt &= rfbm;
2165 if (opt & XSTATE_FP_MASK) {
2166 do_xsave_fpu(env, ptr, ra);
2168 if (rfbm & XSTATE_SSE_MASK) {
2169 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2170 do_xsave_mxcsr(env, ptr, ra);
2172 if (opt & XSTATE_SSE_MASK) {
2173 do_xsave_sse(env, ptr, ra);
2175 if (opt & XSTATE_BNDREGS_MASK) {
2176 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
2178 if (opt & XSTATE_BNDCSR_MASK) {
2179 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
2181 if (opt & XSTATE_PKRU_MASK) {
2182 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
2185 /* Update the XSTATE_BV field. */
2186 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2187 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2188 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
2191 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2193 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
2196 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2198 uint64_t inuse = get_xinuse(env);
2199 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2202 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2204 int i, fpuc, fpus, fptag;
2205 target_ulong addr;
2207 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
2208 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
2209 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
2210 cpu_set_fpuc(env, fpuc);
2211 cpu_set_fpus(env, fpus);
2212 fptag ^= 0xff;
2213 for (i = 0; i < 8; i++) {
2214 env->fptags[i] = ((fptag >> i) & 1);
2217 addr = ptr + XO(legacy.fpregs);
2218 for (i = 0; i < 8; i++) {
2219 floatx80 tmp = helper_fldt(env, addr, ra);
2220 ST(i) = tmp;
2221 addr += 16;
2225 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2227 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
2230 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2232 int i, nb_xmm_regs;
2233 target_ulong addr;
2235 if (env->hflags & HF_CS64_MASK) {
2236 nb_xmm_regs = 16;
2237 } else {
2238 nb_xmm_regs = 8;
2241 addr = ptr + XO(legacy.xmm_regs);
2242 for (i = 0; i < nb_xmm_regs; i++) {
2243 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
2244 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
2245 addr += 16;
2249 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2251 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2252 int i;
2254 for (i = 0; i < 4; i++, addr += 16) {
2255 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
2256 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
2260 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2262 /* FIXME: Extend highest implemented bit of linear address. */
2263 env->bndcs_regs.cfgu
2264 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
2265 env->bndcs_regs.sts
2266 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
2269 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2271 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
2274 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2276 uintptr_t ra = GETPC();
2278 /* The operand must be 16 byte aligned */
2279 if (ptr & 0xf) {
2280 raise_exception_ra(env, EXCP0D_GPF, ra);
2283 do_xrstor_fpu(env, ptr, ra);
2285 if (env->cr[4] & CR4_OSFXSR_MASK) {
2286 do_xrstor_mxcsr(env, ptr, ra);
2287 /* Fast FXRSTOR leaves out the XMM registers */
2288 if (!(env->efer & MSR_EFER_FFXSR)
2289 || (env->hflags & HF_CPL_MASK)
2290 || !(env->hflags & HF_LMA_MASK)) {
2291 do_xrstor_sse(env, ptr, ra);
2296 #if defined(CONFIG_USER_ONLY)
2297 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2299 helper_fxsave(env, ptr);
2302 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2304 helper_fxrstor(env, ptr);
2306 #endif
2308 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2310 uintptr_t ra = GETPC();
2311 uint64_t xstate_bv, xcomp_bv, reserve0;
2313 rfbm &= env->xcr0;
2315 /* The OS must have enabled XSAVE. */
2316 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2317 raise_exception_ra(env, EXCP06_ILLOP, ra);
2320 /* The operand must be 64 byte aligned. */
2321 if (ptr & 63) {
2322 raise_exception_ra(env, EXCP0D_GPF, ra);
2325 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2327 if ((int64_t)xstate_bv < 0) {
2328 /* FIXME: Compact form. */
2329 raise_exception_ra(env, EXCP0D_GPF, ra);
2332 /* Standard form. */
2334 /* The XSTATE_BV field must not set bits not present in XCR0. */
2335 if (xstate_bv & ~env->xcr0) {
2336 raise_exception_ra(env, EXCP0D_GPF, ra);
2339 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
2340 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2341 describes only XCOMP_BV, but the description of the standard form
2342 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2343 includes the next 64-bit field. */
2344 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2345 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2346 if (xcomp_bv || reserve0) {
2347 raise_exception_ra(env, EXCP0D_GPF, ra);
2350 if (rfbm & XSTATE_FP_MASK) {
2351 if (xstate_bv & XSTATE_FP_MASK) {
2352 do_xrstor_fpu(env, ptr, ra);
2353 } else {
2354 helper_fninit(env);
2355 memset(env->fpregs, 0, sizeof(env->fpregs));
2358 if (rfbm & XSTATE_SSE_MASK) {
2359 /* Note that the standard form of XRSTOR loads MXCSR from memory
2360 whether or not the XSTATE_BV bit is set. */
2361 do_xrstor_mxcsr(env, ptr, ra);
2362 if (xstate_bv & XSTATE_SSE_MASK) {
2363 do_xrstor_sse(env, ptr, ra);
2364 } else {
2365 /* ??? When AVX is implemented, we may have to be more
2366 selective in the clearing. */
2367 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
2370 if (rfbm & XSTATE_BNDREGS_MASK) {
2371 if (xstate_bv & XSTATE_BNDREGS_MASK) {
2372 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2373 env->hflags |= HF_MPX_IU_MASK;
2374 } else {
2375 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2376 env->hflags &= ~HF_MPX_IU_MASK;
2379 if (rfbm & XSTATE_BNDCSR_MASK) {
2380 if (xstate_bv & XSTATE_BNDCSR_MASK) {
2381 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2382 } else {
2383 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2385 cpu_sync_bndcs_hflags(env);
2387 if (rfbm & XSTATE_PKRU_MASK) {
2388 uint64_t old_pkru = env->pkru;
2389 if (xstate_bv & XSTATE_PKRU_MASK) {
2390 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2391 } else {
2392 env->pkru = 0;
2394 if (env->pkru != old_pkru) {
2395 CPUState *cs = env_cpu(env);
2396 tlb_flush(cs);
2401 #undef XO
2403 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2405 /* The OS must have enabled XSAVE. */
2406 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2407 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2410 switch (ecx) {
2411 case 0:
2412 return env->xcr0;
2413 case 1:
2414 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2415 return env->xcr0 & get_xinuse(env);
2417 break;
2419 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2422 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
2424 uint32_t dummy, ena_lo, ena_hi;
2425 uint64_t ena;
2427 /* The OS must have enabled XSAVE. */
2428 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2429 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2432 /* Only XCR0 is defined at present; the FPU may not be disabled. */
2433 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
2434 goto do_gpf;
2437 /* Disallow enabling unimplemented features. */
2438 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
2439 ena = ((uint64_t)ena_hi << 32) | ena_lo;
2440 if (mask & ~ena) {
2441 goto do_gpf;
2444 /* Disallow enabling only half of MPX. */
2445 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
2446 & XSTATE_BNDCSR_MASK) {
2447 goto do_gpf;
2450 env->xcr0 = mask;
2451 cpu_sync_bndcs_hflags(env);
2452 return;
2454 do_gpf:
2455 raise_exception_ra(env, EXCP0D_GPF, GETPC());
2458 /* MMX/SSE */
2459 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2461 #define SSE_DAZ 0x0040
2462 #define SSE_RC_MASK 0x6000
2463 #define SSE_RC_NEAR 0x0000
2464 #define SSE_RC_DOWN 0x2000
2465 #define SSE_RC_UP 0x4000
2466 #define SSE_RC_CHOP 0x6000
2467 #define SSE_FZ 0x8000
2469 void update_mxcsr_status(CPUX86State *env)
2471 uint32_t mxcsr = env->mxcsr;
2472 int rnd_type;
2474 /* set rounding mode */
2475 switch (mxcsr & SSE_RC_MASK) {
2476 default:
2477 case SSE_RC_NEAR:
2478 rnd_type = float_round_nearest_even;
2479 break;
2480 case SSE_RC_DOWN:
2481 rnd_type = float_round_down;
2482 break;
2483 case SSE_RC_UP:
2484 rnd_type = float_round_up;
2485 break;
2486 case SSE_RC_CHOP:
2487 rnd_type = float_round_to_zero;
2488 break;
2490 set_float_rounding_mode(rnd_type, &env->sse_status);
2492 /* set denormals are zero */
2493 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
2495 /* set flush to zero */
2496 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
2499 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
2501 cpu_set_mxcsr(env, val);
2504 void helper_enter_mmx(CPUX86State *env)
2506 env->fpstt = 0;
2507 *(uint32_t *)(env->fptags) = 0;
2508 *(uint32_t *)(env->fptags + 4) = 0;
2511 void helper_emms(CPUX86State *env)
2513 /* set to empty state */
2514 *(uint32_t *)(env->fptags) = 0x01010101;
2515 *(uint32_t *)(env->fptags + 4) = 0x01010101;
2518 /* XXX: suppress */
2519 void helper_movq(CPUX86State *env, void *d, void *s)
2521 *(uint64_t *)d = *(uint64_t *)s;
2524 #define SHIFT 0
2525 #include "ops_sse.h"
2527 #define SHIFT 1
2528 #include "ops_sse.h"