target/i386/fpu_helper.c

   1 /*
   2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include <math.h>
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "exec/exec-all.h"
  26 #include "exec/cpu_ldst.h"
  27 #include "fpu/softfloat.h"
  28
  29 #ifdef CONFIG_SOFTMMU
  30 #include "hw/irq.h"
  31 #endif
  32
  33 #define FPU_RC_MASK         0xc00
  34 #define FPU_RC_NEAR         0x000
  35 #define FPU_RC_DOWN         0x400
  36 #define FPU_RC_UP           0x800
  37 #define FPU_RC_CHOP         0xc00
  38
  39 #define MAXTAN 9223372036854775808.0
  40
  41 /* the following deal with x86 long double-precision numbers */
  42 #define MAXEXPD 0x7fff
  43 #define EXPBIAS 16383
  44 #define EXPD(fp)        (fp.l.upper & 0x7fff)
  45 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
  46 #define MANTD(fp)       (fp.l.lower)
  47 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  48
  49 #define FPUS_IE (1 << 0)
  50 #define FPUS_DE (1 << 1)
  51 #define FPUS_ZE (1 << 2)
  52 #define FPUS_OE (1 << 3)
  53 #define FPUS_UE (1 << 4)
  54 #define FPUS_PE (1 << 5)
  55 #define FPUS_SF (1 << 6)
  56 #define FPUS_SE (1 << 7)
  57 #define FPUS_B  (1 << 15)
  58
  59 #define FPUC_EM 0x3f
  60
  61 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  62 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  63 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  64
  65 #if !defined(CONFIG_USER_ONLY)
  66 static qemu_irq ferr_irq;
  67
  68 void x86_register_ferr_irq(qemu_irq irq)
  69 {
  70     ferr_irq = irq;
  71 }
  72
  73 static void cpu_clear_ignne(void)
  74 {
  75     CPUX86State *env = &X86_CPU(first_cpu)->env;
  76     env->hflags2 &= ~HF2_IGNNE_MASK;
  77 }
  78
  79 void cpu_set_ignne(void)
  80 {
  81     CPUX86State *env = &X86_CPU(first_cpu)->env;
  82     env->hflags2 |= HF2_IGNNE_MASK;
  83     /*
  84      * We get here in response to a write to port F0h.  The chipset should
  85      * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
  86      * cleared, because FERR# and FP_IRQ are two separate pins on real
  87      * hardware.  However, we don't model FERR# as a qemu_irq, so we just
  88      * do directly what the chipset would do, i.e. deassert FP_IRQ.
  89      */
  90     qemu_irq_lower(ferr_irq);
  91 }
  92 #endif
  93
  94
  95 static inline void fpush(CPUX86State *env)
  96 {
  97     env->fpstt = (env->fpstt - 1) & 7;
  98     env->fptags[env->fpstt] = 0; /* validate stack entry */
  99 }
 100
 101 static inline void fpop(CPUX86State *env)
 102 {
 103     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
 104     env->fpstt = (env->fpstt + 1) & 7;
 105 }
 106
 107 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
 108                                    uintptr_t retaddr)
 109 {
 110     CPU_LDoubleU temp;
 111
 112     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
 113     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
 114     return temp.d;
 115 }
 116
 117 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
 118                                uintptr_t retaddr)
 119 {
 120     CPU_LDoubleU temp;
 121
 122     temp.d = f;
 123     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
 124     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
 125 }
 126
 127 /* x87 FPU helpers */
 128
 129 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
 130 {
 131     union {
 132         float64 f64;
 133         double d;
 134     } u;
 135
 136     u.f64 = floatx80_to_float64(a, &env->fp_status);
 137     return u.d;
 138 }
 139
 140 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 141 {
 142     union {
 143         float64 f64;
 144         double d;
 145     } u;
 146
 147     u.d = a;
 148     return float64_to_floatx80(u.f64, &env->fp_status);
 149 }
 150
 151 static void fpu_set_exception(CPUX86State *env, int mask)
 152 {
 153     env->fpus |= mask;
 154     if (env->fpus & (~env->fpuc & FPUC_EM)) {
 155         env->fpus |= FPUS_SE | FPUS_B;
 156     }
 157 }
 158
 159 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 160 {
 161     if (floatx80_is_zero(b)) {
 162         fpu_set_exception(env, FPUS_ZE);
 163     }
 164     return floatx80_div(a, b, &env->fp_status);
 165 }
 166
 167 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 168 {
 169     if (env->cr[0] & CR0_NE_MASK) {
 170         raise_exception_ra(env, EXCP10_COPR, retaddr);
 171     }
 172 #if !defined(CONFIG_USER_ONLY)
 173     else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
 174         qemu_irq_raise(ferr_irq);
 175     }
 176 #endif
 177 }
 178
 179 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 180 {
 181     union {
 182         float32 f;
 183         uint32_t i;
 184     } u;
 185
 186     u.i = val;
 187     FT0 = float32_to_floatx80(u.f, &env->fp_status);
 188 }
 189
 190 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 191 {
 192     union {
 193         float64 f;
 194         uint64_t i;
 195     } u;
 196
 197     u.i = val;
 198     FT0 = float64_to_floatx80(u.f, &env->fp_status);
 199 }
 200
 201 void helper_fildl_FT0(CPUX86State *env, int32_t val)
 202 {
 203     FT0 = int32_to_floatx80(val, &env->fp_status);
 204 }
 205
 206 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 207 {
 208     int new_fpstt;
 209     union {
 210         float32 f;
 211         uint32_t i;
 212     } u;
 213
 214     new_fpstt = (env->fpstt - 1) & 7;
 215     u.i = val;
 216     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 217     env->fpstt = new_fpstt;
 218     env->fptags[new_fpstt] = 0; /* validate stack entry */
 219 }
 220
 221 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 222 {
 223     int new_fpstt;
 224     union {
 225         float64 f;
 226         uint64_t i;
 227     } u;
 228
 229     new_fpstt = (env->fpstt - 1) & 7;
 230     u.i = val;
 231     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 232     env->fpstt = new_fpstt;
 233     env->fptags[new_fpstt] = 0; /* validate stack entry */
 234 }
 235
 236 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 237 {
 238     int new_fpstt;
 239
 240     new_fpstt = (env->fpstt - 1) & 7;
 241     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 242     env->fpstt = new_fpstt;
 243     env->fptags[new_fpstt] = 0; /* validate stack entry */
 244 }
 245
 246 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 247 {
 248     int new_fpstt;
 249
 250     new_fpstt = (env->fpstt - 1) & 7;
 251     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 252     env->fpstt = new_fpstt;
 253     env->fptags[new_fpstt] = 0; /* validate stack entry */
 254 }
 255
 256 uint32_t helper_fsts_ST0(CPUX86State *env)
 257 {
 258     union {
 259         float32 f;
 260         uint32_t i;
 261     } u;
 262
 263     u.f = floatx80_to_float32(ST0, &env->fp_status);
 264     return u.i;
 265 }
 266
 267 uint64_t helper_fstl_ST0(CPUX86State *env)
 268 {
 269     union {
 270         float64 f;
 271         uint64_t i;
 272     } u;
 273
 274     u.f = floatx80_to_float64(ST0, &env->fp_status);
 275     return u.i;
 276 }
 277
 278 int32_t helper_fist_ST0(CPUX86State *env)
 279 {
 280     int32_t val;
 281
 282     val = floatx80_to_int32(ST0, &env->fp_status);
 283     if (val != (int16_t)val) {
 284         val = -32768;
 285     }
 286     return val;
 287 }
 288
 289 int32_t helper_fistl_ST0(CPUX86State *env)
 290 {
 291     int32_t val;
 292     signed char old_exp_flags;
 293
 294     old_exp_flags = get_float_exception_flags(&env->fp_status);
 295     set_float_exception_flags(0, &env->fp_status);
 296
 297     val = floatx80_to_int32(ST0, &env->fp_status);
 298     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 299         val = 0x80000000;
 300     }
 301     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 302                                 | old_exp_flags, &env->fp_status);
 303     return val;
 304 }
 305
 306 int64_t helper_fistll_ST0(CPUX86State *env)
 307 {
 308     int64_t val;
 309     signed char old_exp_flags;
 310
 311     old_exp_flags = get_float_exception_flags(&env->fp_status);
 312     set_float_exception_flags(0, &env->fp_status);
 313
 314     val = floatx80_to_int64(ST0, &env->fp_status);
 315     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 316         val = 0x8000000000000000ULL;
 317     }
 318     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 319                                 | old_exp_flags, &env->fp_status);
 320     return val;
 321 }
 322
 323 int32_t helper_fistt_ST0(CPUX86State *env)
 324 {
 325     int32_t val;
 326
 327     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 328     if (val != (int16_t)val) {
 329         val = -32768;
 330     }
 331     return val;
 332 }
 333
 334 int32_t helper_fisttl_ST0(CPUX86State *env)
 335 {
 336     return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 337 }
 338
 339 int64_t helper_fisttll_ST0(CPUX86State *env)
 340 {
 341     return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 342 }
 343
 344 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 345 {
 346     int new_fpstt;
 347
 348     new_fpstt = (env->fpstt - 1) & 7;
 349     env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 350     env->fpstt = new_fpstt;
 351     env->fptags[new_fpstt] = 0; /* validate stack entry */
 352 }
 353
 354 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 355 {
 356     helper_fstt(env, ST0, ptr, GETPC());
 357 }
 358
 359 void helper_fpush(CPUX86State *env)
 360 {
 361     fpush(env);
 362 }
 363
 364 void helper_fpop(CPUX86State *env)
 365 {
 366     fpop(env);
 367 }
 368
 369 void helper_fdecstp(CPUX86State *env)
 370 {
 371     env->fpstt = (env->fpstt - 1) & 7;
 372     env->fpus &= ~0x4700;
 373 }
 374
 375 void helper_fincstp(CPUX86State *env)
 376 {
 377     env->fpstt = (env->fpstt + 1) & 7;
 378     env->fpus &= ~0x4700;
 379 }
 380
 381 /* FPU move */
 382
 383 void helper_ffree_STN(CPUX86State *env, int st_index)
 384 {
 385     env->fptags[(env->fpstt + st_index) & 7] = 1;
 386 }
 387
 388 void helper_fmov_ST0_FT0(CPUX86State *env)
 389 {
 390     ST0 = FT0;
 391 }
 392
 393 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 394 {
 395     FT0 = ST(st_index);
 396 }
 397
 398 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 399 {
 400     ST0 = ST(st_index);
 401 }
 402
 403 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 404 {
 405     ST(st_index) = ST0;
 406 }
 407
 408 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 409 {
 410     floatx80 tmp;
 411
 412     tmp = ST(st_index);
 413     ST(st_index) = ST0;
 414     ST0 = tmp;
 415 }
 416
 417 /* FPU operations */
 418
 419 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 420
 421 void helper_fcom_ST0_FT0(CPUX86State *env)
 422 {
 423     int ret;
 424
 425     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 426     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 427 }
 428
 429 void helper_fucom_ST0_FT0(CPUX86State *env)
 430 {
 431     int ret;
 432
 433     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 434     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 435 }
 436
 437 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 438
 439 void helper_fcomi_ST0_FT0(CPUX86State *env)
 440 {
 441     int eflags;
 442     int ret;
 443
 444     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 445     eflags = cpu_cc_compute_all(env, CC_OP);
 446     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 447     CC_SRC = eflags;
 448 }
 449
 450 void helper_fucomi_ST0_FT0(CPUX86State *env)
 451 {
 452     int eflags;
 453     int ret;
 454
 455     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 456     eflags = cpu_cc_compute_all(env, CC_OP);
 457     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 458     CC_SRC = eflags;
 459 }
 460
 461 void helper_fadd_ST0_FT0(CPUX86State *env)
 462 {
 463     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 464 }
 465
 466 void helper_fmul_ST0_FT0(CPUX86State *env)
 467 {
 468     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 469 }
 470
 471 void helper_fsub_ST0_FT0(CPUX86State *env)
 472 {
 473     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 474 }
 475
 476 void helper_fsubr_ST0_FT0(CPUX86State *env)
 477 {
 478     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 479 }
 480
 481 void helper_fdiv_ST0_FT0(CPUX86State *env)
 482 {
 483     ST0 = helper_fdiv(env, ST0, FT0);
 484 }
 485
 486 void helper_fdivr_ST0_FT0(CPUX86State *env)
 487 {
 488     ST0 = helper_fdiv(env, FT0, ST0);
 489 }
 490
 491 /* fp operations between STN and ST0 */
 492
 493 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 494 {
 495     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 496 }
 497
 498 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 499 {
 500     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 501 }
 502
 503 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 504 {
 505     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 506 }
 507
 508 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 509 {
 510     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 511 }
 512
 513 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 514 {
 515     floatx80 *p;
 516
 517     p = &ST(st_index);
 518     *p = helper_fdiv(env, *p, ST0);
 519 }
 520
 521 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 522 {
 523     floatx80 *p;
 524
 525     p = &ST(st_index);
 526     *p = helper_fdiv(env, ST0, *p);
 527 }
 528
 529 /* misc FPU operations */
 530 void helper_fchs_ST0(CPUX86State *env)
 531 {
 532     ST0 = floatx80_chs(ST0);
 533 }
 534
 535 void helper_fabs_ST0(CPUX86State *env)
 536 {
 537     ST0 = floatx80_abs(ST0);
 538 }
 539
 540 void helper_fld1_ST0(CPUX86State *env)
 541 {
 542     ST0 = floatx80_one;
 543 }
 544
 545 void helper_fldl2t_ST0(CPUX86State *env)
 546 {
 547     ST0 = floatx80_l2t;
 548 }
 549
 550 void helper_fldl2e_ST0(CPUX86State *env)
 551 {
 552     ST0 = floatx80_l2e;
 553 }
 554
 555 void helper_fldpi_ST0(CPUX86State *env)
 556 {
 557     ST0 = floatx80_pi;
 558 }
 559
 560 void helper_fldlg2_ST0(CPUX86State *env)
 561 {
 562     ST0 = floatx80_lg2;
 563 }
 564
 565 void helper_fldln2_ST0(CPUX86State *env)
 566 {
 567     ST0 = floatx80_ln2;
 568 }
 569
 570 void helper_fldz_ST0(CPUX86State *env)
 571 {
 572     ST0 = floatx80_zero;
 573 }
 574
 575 void helper_fldz_FT0(CPUX86State *env)
 576 {
 577     FT0 = floatx80_zero;
 578 }
 579
 580 uint32_t helper_fnstsw(CPUX86State *env)
 581 {
 582     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 583 }
 584
 585 uint32_t helper_fnstcw(CPUX86State *env)
 586 {
 587     return env->fpuc;
 588 }
 589
 590 void update_fp_status(CPUX86State *env)
 591 {
 592     int rnd_type;
 593
 594     /* set rounding mode */
 595     switch (env->fpuc & FPU_RC_MASK) {
 596     default:
 597     case FPU_RC_NEAR:
 598         rnd_type = float_round_nearest_even;
 599         break;
 600     case FPU_RC_DOWN:
 601         rnd_type = float_round_down;
 602         break;
 603     case FPU_RC_UP:
 604         rnd_type = float_round_up;
 605         break;
 606     case FPU_RC_CHOP:
 607         rnd_type = float_round_to_zero;
 608         break;
 609     }
 610     set_float_rounding_mode(rnd_type, &env->fp_status);
 611     switch ((env->fpuc >> 8) & 3) {
 612     case 0:
 613         rnd_type = 32;
 614         break;
 615     case 2:
 616         rnd_type = 64;
 617         break;
 618     case 3:
 619     default:
 620         rnd_type = 80;
 621         break;
 622     }
 623     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 624 }
 625
 626 void helper_fldcw(CPUX86State *env, uint32_t val)
 627 {
 628     cpu_set_fpuc(env, val);
 629 }
 630
 631 void helper_fclex(CPUX86State *env)
 632 {
 633     env->fpus &= 0x7f00;
 634 }
 635
 636 void helper_fwait(CPUX86State *env)
 637 {
 638     if (env->fpus & FPUS_SE) {
 639         fpu_raise_exception(env, GETPC());
 640     }
 641 }
 642
 643 void helper_fninit(CPUX86State *env)
 644 {
 645     env->fpus = 0;
 646     env->fpstt = 0;
 647     cpu_set_fpuc(env, 0x37f);
 648     env->fptags[0] = 1;
 649     env->fptags[1] = 1;
 650     env->fptags[2] = 1;
 651     env->fptags[3] = 1;
 652     env->fptags[4] = 1;
 653     env->fptags[5] = 1;
 654     env->fptags[6] = 1;
 655     env->fptags[7] = 1;
 656 }
 657
 658 /* BCD ops */
 659
 660 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 661 {
 662     floatx80 tmp;
 663     uint64_t val;
 664     unsigned int v;
 665     int i;
 666
 667     val = 0;
 668     for (i = 8; i >= 0; i--) {
 669         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 670         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 671     }
 672     tmp = int64_to_floatx80(val, &env->fp_status);
 673     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 674         tmp = floatx80_chs(tmp);
 675     }
 676     fpush(env);
 677     ST0 = tmp;
 678 }
 679
 680 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 681 {
 682     int v;
 683     target_ulong mem_ref, mem_end;
 684     int64_t val;
 685
 686     val = floatx80_to_int64(ST0, &env->fp_status);
 687     mem_ref = ptr;
 688     mem_end = mem_ref + 9;
 689     if (val < 0) {
 690         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 691         val = -val;
 692     } else {
 693         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 694     }
 695     while (mem_ref < mem_end) {
 696         if (val == 0) {
 697             break;
 698         }
 699         v = val % 100;
 700         val = val / 100;
 701         v = ((v / 10) << 4) | (v % 10);
 702         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 703     }
 704     while (mem_ref < mem_end) {
 705         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 706     }
 707 }
 708
 709 void helper_f2xm1(CPUX86State *env)
 710 {
 711     double val = floatx80_to_double(env, ST0);
 712
 713     val = pow(2.0, val) - 1.0;
 714     ST0 = double_to_floatx80(env, val);
 715 }
 716
 717 void helper_fyl2x(CPUX86State *env)
 718 {
 719     double fptemp = floatx80_to_double(env, ST0);
 720
 721     if (fptemp > 0.0) {
 722         fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 723         fptemp *= floatx80_to_double(env, ST1);
 724         ST1 = double_to_floatx80(env, fptemp);
 725         fpop(env);
 726     } else {
 727         env->fpus &= ~0x4700;
 728         env->fpus |= 0x400;
 729     }
 730 }
 731
 732 void helper_fptan(CPUX86State *env)
 733 {
 734     double fptemp = floatx80_to_double(env, ST0);
 735
 736     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 737         env->fpus |= 0x400;
 738     } else {
 739         fptemp = tan(fptemp);
 740         ST0 = double_to_floatx80(env, fptemp);
 741         fpush(env);
 742         ST0 = floatx80_one;
 743         env->fpus &= ~0x400; /* C2 <-- 0 */
 744         /* the above code is for |arg| < 2**52 only */
 745     }
 746 }
 747
 748 void helper_fpatan(CPUX86State *env)
 749 {
 750     double fptemp, fpsrcop;
 751
 752     fpsrcop = floatx80_to_double(env, ST1);
 753     fptemp = floatx80_to_double(env, ST0);
 754     ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 755     fpop(env);
 756 }
 757
 758 void helper_fxtract(CPUX86State *env)
 759 {
 760     CPU_LDoubleU temp;
 761
 762     temp.d = ST0;
 763
 764     if (floatx80_is_zero(ST0)) {
 765         /* Easy way to generate -inf and raising division by 0 exception */
 766         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 767                            &env->fp_status);
 768         fpush(env);
 769         ST0 = temp.d;
 770     } else {
 771         int expdif;
 772
 773         expdif = EXPD(temp) - EXPBIAS;
 774         /* DP exponent bias */
 775         ST0 = int32_to_floatx80(expdif, &env->fp_status);
 776         fpush(env);
 777         BIASEXPONENT(temp);
 778         ST0 = temp.d;
 779     }
 780 }
 781
 782 void helper_fprem1(CPUX86State *env)
 783 {
 784     double st0, st1, dblq, fpsrcop, fptemp;
 785     CPU_LDoubleU fpsrcop1, fptemp1;
 786     int expdif;
 787     signed long long int q;
 788
 789     st0 = floatx80_to_double(env, ST0);
 790     st1 = floatx80_to_double(env, ST1);
 791
 792     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 793         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 794         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 795         return;
 796     }
 797
 798     fpsrcop = st0;
 799     fptemp = st1;
 800     fpsrcop1.d = ST0;
 801     fptemp1.d = ST1;
 802     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 803
 804     if (expdif < 0) {
 805         /* optimisation? taken from the AMD docs */
 806         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 807         /* ST0 is unchanged */
 808         return;
 809     }
 810
 811     if (expdif < 53) {
 812         dblq = fpsrcop / fptemp;
 813         /* round dblq towards nearest integer */
 814         dblq = rint(dblq);
 815         st0 = fpsrcop - fptemp * dblq;
 816
 817         /* convert dblq to q by truncating towards zero */
 818         if (dblq < 0.0) {
 819             q = (signed long long int)(-dblq);
 820         } else {
 821             q = (signed long long int)dblq;
 822         }
 823
 824         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 825         /* (C0,C3,C1) <-- (q2,q1,q0) */
 826         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 827         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 828         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 829     } else {
 830         env->fpus |= 0x400;  /* C2 <-- 1 */
 831         fptemp = pow(2.0, expdif - 50);
 832         fpsrcop = (st0 / st1) / fptemp;
 833         /* fpsrcop = integer obtained by chopping */
 834         fpsrcop = (fpsrcop < 0.0) ?
 835                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 836         st0 -= (st1 * fpsrcop * fptemp);
 837     }
 838     ST0 = double_to_floatx80(env, st0);
 839 }
 840
 841 void helper_fprem(CPUX86State *env)
 842 {
 843     double st0, st1, dblq, fpsrcop, fptemp;
 844     CPU_LDoubleU fpsrcop1, fptemp1;
 845     int expdif;
 846     signed long long int q;
 847
 848     st0 = floatx80_to_double(env, ST0);
 849     st1 = floatx80_to_double(env, ST1);
 850
 851     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 852         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 853         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 854         return;
 855     }
 856
 857     fpsrcop = st0;
 858     fptemp = st1;
 859     fpsrcop1.d = ST0;
 860     fptemp1.d = ST1;
 861     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 862
 863     if (expdif < 0) {
 864         /* optimisation? taken from the AMD docs */
 865         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 866         /* ST0 is unchanged */
 867         return;
 868     }
 869
 870     if (expdif < 53) {
 871         dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 872         /* round dblq towards zero */
 873         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 874         st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 875
 876         /* convert dblq to q by truncating towards zero */
 877         if (dblq < 0.0) {
 878             q = (signed long long int)(-dblq);
 879         } else {
 880             q = (signed long long int)dblq;
 881         }
 882
 883         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 884         /* (C0,C3,C1) <-- (q2,q1,q0) */
 885         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 886         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 887         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 888     } else {
 889         int N = 32 + (expdif % 32); /* as per AMD docs */
 890
 891         env->fpus |= 0x400;  /* C2 <-- 1 */
 892         fptemp = pow(2.0, (double)(expdif - N));
 893         fpsrcop = (st0 / st1) / fptemp;
 894         /* fpsrcop = integer obtained by chopping */
 895         fpsrcop = (fpsrcop < 0.0) ?
 896                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 897         st0 -= (st1 * fpsrcop * fptemp);
 898     }
 899     ST0 = double_to_floatx80(env, st0);
 900 }
 901
 902 void helper_fyl2xp1(CPUX86State *env)
 903 {
 904     double fptemp = floatx80_to_double(env, ST0);
 905
 906     if ((fptemp + 1.0) > 0.0) {
 907         fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 908         fptemp *= floatx80_to_double(env, ST1);
 909         ST1 = double_to_floatx80(env, fptemp);
 910         fpop(env);
 911     } else {
 912         env->fpus &= ~0x4700;
 913         env->fpus |= 0x400;
 914     }
 915 }
 916
 917 void helper_fsqrt(CPUX86State *env)
 918 {
 919     if (floatx80_is_neg(ST0)) {
 920         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 921         env->fpus |= 0x400;
 922     }
 923     ST0 = floatx80_sqrt(ST0, &env->fp_status);
 924 }
 925
 926 void helper_fsincos(CPUX86State *env)
 927 {
 928     double fptemp = floatx80_to_double(env, ST0);
 929
 930     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 931         env->fpus |= 0x400;
 932     } else {
 933         ST0 = double_to_floatx80(env, sin(fptemp));
 934         fpush(env);
 935         ST0 = double_to_floatx80(env, cos(fptemp));
 936         env->fpus &= ~0x400;  /* C2 <-- 0 */
 937         /* the above code is for |arg| < 2**63 only */
 938     }
 939 }
 940
 941 void helper_frndint(CPUX86State *env)
 942 {
 943     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 944 }
 945
 946 void helper_fscale(CPUX86State *env)
 947 {
 948     if (floatx80_is_any_nan(ST1)) {
 949         ST0 = ST1;
 950     } else {
 951         int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 952         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 953     }
 954 }
 955
 956 void helper_fsin(CPUX86State *env)
 957 {
 958     double fptemp = floatx80_to_double(env, ST0);
 959
 960     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 961         env->fpus |= 0x400;
 962     } else {
 963         ST0 = double_to_floatx80(env, sin(fptemp));
 964         env->fpus &= ~0x400;  /* C2 <-- 0 */
 965         /* the above code is for |arg| < 2**53 only */
 966     }
 967 }
 968
 969 void helper_fcos(CPUX86State *env)
 970 {
 971     double fptemp = floatx80_to_double(env, ST0);
 972
 973     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 974         env->fpus |= 0x400;
 975     } else {
 976         ST0 = double_to_floatx80(env, cos(fptemp));
 977         env->fpus &= ~0x400;  /* C2 <-- 0 */
 978         /* the above code is for |arg| < 2**63 only */
 979     }
 980 }
 981
 982 void helper_fxam_ST0(CPUX86State *env)
 983 {
 984     CPU_LDoubleU temp;
 985     int expdif;
 986
 987     temp.d = ST0;
 988
 989     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 990     if (SIGND(temp)) {
 991         env->fpus |= 0x200; /* C1 <-- 1 */
 992     }
 993
 994     /* XXX: test fptags too */
 995     expdif = EXPD(temp);
 996     if (expdif == MAXEXPD) {
 997         if (MANTD(temp) == 0x8000000000000000ULL) {
 998             env->fpus |= 0x500; /* Infinity */
 999         } else {
1000             env->fpus |= 0x100; /* NaN */
1001         }
1002     } else if (expdif == 0) {
1003         if (MANTD(temp) == 0) {
1004             env->fpus |=  0x4000; /* Zero */
1005         } else {
1006             env->fpus |= 0x4400; /* Denormal */
1007         }
1008     } else {
1009         env->fpus |= 0x400;
1010     }
1011 }
1012
1013 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1014                       uintptr_t retaddr)
1015 {
1016     int fpus, fptag, exp, i;
1017     uint64_t mant;
1018     CPU_LDoubleU tmp;
1019
1020     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1021     fptag = 0;
1022     for (i = 7; i >= 0; i--) {
1023         fptag <<= 2;
1024         if (env->fptags[i]) {
1025             fptag |= 3;
1026         } else {
1027             tmp.d = env->fpregs[i].d;
1028             exp = EXPD(tmp);
1029             mant = MANTD(tmp);
1030             if (exp == 0 && mant == 0) {
1031                 /* zero */
1032                 fptag |= 1;
1033             } else if (exp == 0 || exp == MAXEXPD
1034                        || (mant & (1LL << 63)) == 0) {
1035                 /* NaNs, infinity, denormal */
1036                 fptag |= 2;
1037             }
1038         }
1039     }
1040     if (data32) {
1041         /* 32 bit */
1042         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1043         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1044         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1045         cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1046         cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1047         cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1048         cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1049     } else {
1050         /* 16 bit */
1051         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1052         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1053         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1054         cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1055         cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1056         cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1057         cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1058     }
1059 }
1060
1061 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1062 {
1063     do_fstenv(env, ptr, data32, GETPC());
1064 }
1065
1066 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1067 {
1068     env->fpstt = (fpus >> 11) & 7;
1069     env->fpus = fpus & ~0x3800 & ~FPUS_B;
1070     env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1071 #if !defined(CONFIG_USER_ONLY)
1072     if (!(env->fpus & FPUS_SE)) {
1073         /*
1074          * Here the processor deasserts FERR#; in response, the chipset deasserts
1075          * IGNNE#.
1076          */
1077         cpu_clear_ignne();
1078     }
1079 #endif
1080 }
1081
1082 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1083                       uintptr_t retaddr)
1084 {
1085     int i, fpus, fptag;
1086
1087     if (data32) {
1088         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1089         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1090         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1091     } else {
1092         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1093         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1094         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1095     }
1096     cpu_set_fpus(env, fpus);
1097     for (i = 0; i < 8; i++) {
1098         env->fptags[i] = ((fptag & 3) == 3);
1099         fptag >>= 2;
1100     }
1101 }
1102
1103 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1104 {
1105     do_fldenv(env, ptr, data32, GETPC());
1106 }
1107
1108 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1109 {
1110     floatx80 tmp;
1111     int i;
1112
1113     do_fstenv(env, ptr, data32, GETPC());
1114
1115     ptr += (14 << data32);
1116     for (i = 0; i < 8; i++) {
1117         tmp = ST(i);
1118         helper_fstt(env, tmp, ptr, GETPC());
1119         ptr += 10;
1120     }
1121
1122     /* fninit */
1123     env->fpus = 0;
1124     env->fpstt = 0;
1125     cpu_set_fpuc(env, 0x37f);
1126     env->fptags[0] = 1;
1127     env->fptags[1] = 1;
1128     env->fptags[2] = 1;
1129     env->fptags[3] = 1;
1130     env->fptags[4] = 1;
1131     env->fptags[5] = 1;
1132     env->fptags[6] = 1;
1133     env->fptags[7] = 1;
1134 }
1135
1136 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1137 {
1138     floatx80 tmp;
1139     int i;
1140
1141     do_fldenv(env, ptr, data32, GETPC());
1142     ptr += (14 << data32);
1143
1144     for (i = 0; i < 8; i++) {
1145         tmp = helper_fldt(env, ptr, GETPC());
1146         ST(i) = tmp;
1147         ptr += 10;
1148     }
1149 }
1150
1151 #if defined(CONFIG_USER_ONLY)
1152 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1153 {
1154     helper_fsave(env, ptr, data32);
1155 }
1156
1157 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1158 {
1159     helper_frstor(env, ptr, data32);
1160 }
1161 #endif
1162
1163 #define XO(X)  offsetof(X86XSaveArea, X)
1164
1165 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1166 {
1167     int fpus, fptag, i;
1168     target_ulong addr;
1169
1170     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1171     fptag = 0;
1172     for (i = 0; i < 8; i++) {
1173         fptag |= (env->fptags[i] << i);
1174     }
1175
1176     cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1177     cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1178     cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1179
1180     /* In 32-bit mode this is eip, sel, dp, sel.
1181        In 64-bit mode this is rip, rdp.
1182        But in either case we don't write actual data, just zeros.  */
1183     cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1184     cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1185
1186     addr = ptr + XO(legacy.fpregs);
1187     for (i = 0; i < 8; i++) {
1188         floatx80 tmp = ST(i);
1189         helper_fstt(env, tmp, addr, ra);
1190         addr += 16;
1191     }
1192 }
1193
1194 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1195 {
1196     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1197     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1198 }
1199
1200 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1201 {
1202     int i, nb_xmm_regs;
1203     target_ulong addr;
1204
1205     if (env->hflags & HF_CS64_MASK) {
1206         nb_xmm_regs = 16;
1207     } else {
1208         nb_xmm_regs = 8;
1209     }
1210
1211     addr = ptr + XO(legacy.xmm_regs);
1212     for (i = 0; i < nb_xmm_regs; i++) {
1213         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1214         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1215         addr += 16;
1216     }
1217 }
1218
1219 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1220 {
1221     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1222     int i;
1223
1224     for (i = 0; i < 4; i++, addr += 16) {
1225         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1226         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1227     }
1228 }
1229
1230 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1231 {
1232     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1233                     env->bndcs_regs.cfgu, ra);
1234     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1235                     env->bndcs_regs.sts, ra);
1236 }
1237
1238 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1239 {
1240     cpu_stq_data_ra(env, ptr, env->pkru, ra);
1241 }
1242
1243 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1244 {
1245     uintptr_t ra = GETPC();
1246
1247     /* The operand must be 16 byte aligned */
1248     if (ptr & 0xf) {
1249         raise_exception_ra(env, EXCP0D_GPF, ra);
1250     }
1251
1252     do_xsave_fpu(env, ptr, ra);
1253
1254     if (env->cr[4] & CR4_OSFXSR_MASK) {
1255         do_xsave_mxcsr(env, ptr, ra);
1256         /* Fast FXSAVE leaves out the XMM registers */
1257         if (!(env->efer & MSR_EFER_FFXSR)
1258             || (env->hflags & HF_CPL_MASK)
1259             || !(env->hflags & HF_LMA_MASK)) {
1260             do_xsave_sse(env, ptr, ra);
1261         }
1262     }
1263 }
1264
1265 static uint64_t get_xinuse(CPUX86State *env)
1266 {
1267     uint64_t inuse = -1;
1268
1269     /* For the most part, we don't track XINUSE.  We could calculate it
1270        here for all components, but it's probably less work to simply
1271        indicate in use.  That said, the state of BNDREGS is important
1272        enough to track in HFLAGS, so we might as well use that here.  */
1273     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1274        inuse &= ~XSTATE_BNDREGS_MASK;
1275     }
1276     return inuse;
1277 }
1278
1279 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1280                      uint64_t inuse, uint64_t opt, uintptr_t ra)
1281 {
1282     uint64_t old_bv, new_bv;
1283
1284     /* The OS must have enabled XSAVE.  */
1285     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1286         raise_exception_ra(env, EXCP06_ILLOP, ra);
1287     }
1288
1289     /* The operand must be 64 byte aligned.  */
1290     if (ptr & 63) {
1291         raise_exception_ra(env, EXCP0D_GPF, ra);
1292     }
1293
1294     /* Never save anything not enabled by XCR0.  */
1295     rfbm &= env->xcr0;
1296     opt &= rfbm;
1297
1298     if (opt & XSTATE_FP_MASK) {
1299         do_xsave_fpu(env, ptr, ra);
1300     }
1301     if (rfbm & XSTATE_SSE_MASK) {
1302         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1303         do_xsave_mxcsr(env, ptr, ra);
1304     }
1305     if (opt & XSTATE_SSE_MASK) {
1306         do_xsave_sse(env, ptr, ra);
1307     }
1308     if (opt & XSTATE_BNDREGS_MASK) {
1309         do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1310     }
1311     if (opt & XSTATE_BNDCSR_MASK) {
1312         do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1313     }
1314     if (opt & XSTATE_PKRU_MASK) {
1315         do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1316     }
1317
1318     /* Update the XSTATE_BV field.  */
1319     old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1320     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1321     cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1322 }
1323
1324 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1325 {
1326     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1327 }
1328
1329 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1330 {
1331     uint64_t inuse = get_xinuse(env);
1332     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1333 }
1334
1335 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1336 {
1337     int i, fpuc, fpus, fptag;
1338     target_ulong addr;
1339
1340     fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1341     fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1342     fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1343     cpu_set_fpuc(env, fpuc);
1344     cpu_set_fpus(env, fpus);
1345     fptag ^= 0xff;
1346     for (i = 0; i < 8; i++) {
1347         env->fptags[i] = ((fptag >> i) & 1);
1348     }
1349
1350     addr = ptr + XO(legacy.fpregs);
1351     for (i = 0; i < 8; i++) {
1352         floatx80 tmp = helper_fldt(env, addr, ra);
1353         ST(i) = tmp;
1354         addr += 16;
1355     }
1356 }
1357
1358 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1359 {
1360     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1361 }
1362
1363 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1364 {
1365     int i, nb_xmm_regs;
1366     target_ulong addr;
1367
1368     if (env->hflags & HF_CS64_MASK) {
1369         nb_xmm_regs = 16;
1370     } else {
1371         nb_xmm_regs = 8;
1372     }
1373
1374     addr = ptr + XO(legacy.xmm_regs);
1375     for (i = 0; i < nb_xmm_regs; i++) {
1376         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1377         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1378         addr += 16;
1379     }
1380 }
1381
1382 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1383 {
1384     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1385     int i;
1386
1387     for (i = 0; i < 4; i++, addr += 16) {
1388         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1389         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1390     }
1391 }
1392
1393 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1394 {
1395     /* FIXME: Extend highest implemented bit of linear address.  */
1396     env->bndcs_regs.cfgu
1397         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1398     env->bndcs_regs.sts
1399         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1400 }
1401
1402 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1403 {
1404     env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1405 }
1406
1407 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1408 {
1409     uintptr_t ra = GETPC();
1410
1411     /* The operand must be 16 byte aligned */
1412     if (ptr & 0xf) {
1413         raise_exception_ra(env, EXCP0D_GPF, ra);
1414     }
1415
1416     do_xrstor_fpu(env, ptr, ra);
1417
1418     if (env->cr[4] & CR4_OSFXSR_MASK) {
1419         do_xrstor_mxcsr(env, ptr, ra);
1420         /* Fast FXRSTOR leaves out the XMM registers */
1421         if (!(env->efer & MSR_EFER_FFXSR)
1422             || (env->hflags & HF_CPL_MASK)
1423             || !(env->hflags & HF_LMA_MASK)) {
1424             do_xrstor_sse(env, ptr, ra);
1425         }
1426     }
1427 }
1428
1429 #if defined(CONFIG_USER_ONLY)
1430 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1431 {
1432     helper_fxsave(env, ptr);
1433 }
1434
1435 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1436 {
1437     helper_fxrstor(env, ptr);
1438 }
1439 #endif
1440
1441 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1442 {
1443     uintptr_t ra = GETPC();
1444     uint64_t xstate_bv, xcomp_bv, reserve0;
1445
1446     rfbm &= env->xcr0;
1447
1448     /* The OS must have enabled XSAVE.  */
1449     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1450         raise_exception_ra(env, EXCP06_ILLOP, ra);
1451     }
1452
1453     /* The operand must be 64 byte aligned.  */
1454     if (ptr & 63) {
1455         raise_exception_ra(env, EXCP0D_GPF, ra);
1456     }
1457
1458     xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1459
1460     if ((int64_t)xstate_bv < 0) {
1461         /* FIXME: Compact form.  */
1462         raise_exception_ra(env, EXCP0D_GPF, ra);
1463     }
1464
1465     /* Standard form.  */
1466
1467     /* The XSTATE_BV field must not set bits not present in XCR0.  */
1468     if (xstate_bv & ~env->xcr0) {
1469         raise_exception_ra(env, EXCP0D_GPF, ra);
1470     }
1471
1472     /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
1473        revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1474        describes only XCOMP_BV, but the description of the standard form
1475        of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1476        includes the next 64-bit field.  */
1477     xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1478     reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1479     if (xcomp_bv || reserve0) {
1480         raise_exception_ra(env, EXCP0D_GPF, ra);
1481     }
1482
1483     if (rfbm & XSTATE_FP_MASK) {
1484         if (xstate_bv & XSTATE_FP_MASK) {
1485             do_xrstor_fpu(env, ptr, ra);
1486         } else {
1487             helper_fninit(env);
1488             memset(env->fpregs, 0, sizeof(env->fpregs));
1489         }
1490     }
1491     if (rfbm & XSTATE_SSE_MASK) {
1492         /* Note that the standard form of XRSTOR loads MXCSR from memory
1493            whether or not the XSTATE_BV bit is set.  */
1494         do_xrstor_mxcsr(env, ptr, ra);
1495         if (xstate_bv & XSTATE_SSE_MASK) {
1496             do_xrstor_sse(env, ptr, ra);
1497         } else {
1498             /* ??? When AVX is implemented, we may have to be more
1499                selective in the clearing.  */
1500             memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1501         }
1502     }
1503     if (rfbm & XSTATE_BNDREGS_MASK) {
1504         if (xstate_bv & XSTATE_BNDREGS_MASK) {
1505             do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1506             env->hflags |= HF_MPX_IU_MASK;
1507         } else {
1508             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1509             env->hflags &= ~HF_MPX_IU_MASK;
1510         }
1511     }
1512     if (rfbm & XSTATE_BNDCSR_MASK) {
1513         if (xstate_bv & XSTATE_BNDCSR_MASK) {
1514             do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1515         } else {
1516             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1517         }
1518         cpu_sync_bndcs_hflags(env);
1519     }
1520     if (rfbm & XSTATE_PKRU_MASK) {
1521         uint64_t old_pkru = env->pkru;
1522         if (xstate_bv & XSTATE_PKRU_MASK) {
1523             do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1524         } else {
1525             env->pkru = 0;
1526         }
1527         if (env->pkru != old_pkru) {
1528             CPUState *cs = env_cpu(env);
1529             tlb_flush(cs);
1530         }
1531     }
1532 }
1533
1534 #undef XO
1535
1536 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1537 {
1538     /* The OS must have enabled XSAVE.  */
1539     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1540         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1541     }
1542
1543     switch (ecx) {
1544     case 0:
1545         return env->xcr0;
1546     case 1:
1547         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1548             return env->xcr0 & get_xinuse(env);
1549         }
1550         break;
1551     }
1552     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1553 }
1554
1555 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1556 {
1557     uint32_t dummy, ena_lo, ena_hi;
1558     uint64_t ena;
1559
1560     /* The OS must have enabled XSAVE.  */
1561     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1562         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1563     }
1564
1565     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1566     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1567         goto do_gpf;
1568     }
1569
1570     /* Disallow enabling unimplemented features.  */
1571     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1572     ena = ((uint64_t)ena_hi << 32) | ena_lo;
1573     if (mask & ~ena) {
1574         goto do_gpf;
1575     }
1576
1577     /* Disallow enabling only half of MPX.  */
1578     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1579         & XSTATE_BNDCSR_MASK) {
1580         goto do_gpf;
1581     }
1582
1583     env->xcr0 = mask;
1584     cpu_sync_bndcs_hflags(env);
1585     return;
1586
1587  do_gpf:
1588     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1589 }
1590
1591 /* MMX/SSE */
1592 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1593
1594 #define SSE_DAZ             0x0040
1595 #define SSE_RC_MASK         0x6000
1596 #define SSE_RC_NEAR         0x0000
1597 #define SSE_RC_DOWN         0x2000
1598 #define SSE_RC_UP           0x4000
1599 #define SSE_RC_CHOP         0x6000
1600 #define SSE_FZ              0x8000
1601
1602 void update_mxcsr_status(CPUX86State *env)
1603 {
1604     uint32_t mxcsr = env->mxcsr;
1605     int rnd_type;
1606
1607     /* set rounding mode */
1608     switch (mxcsr & SSE_RC_MASK) {
1609     default:
1610     case SSE_RC_NEAR:
1611         rnd_type = float_round_nearest_even;
1612         break;
1613     case SSE_RC_DOWN:
1614         rnd_type = float_round_down;
1615         break;
1616     case SSE_RC_UP:
1617         rnd_type = float_round_up;
1618         break;
1619     case SSE_RC_CHOP:
1620         rnd_type = float_round_to_zero;
1621         break;
1622     }
1623     set_float_rounding_mode(rnd_type, &env->sse_status);
1624
1625     /* set denormals are zero */
1626     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1627
1628     /* set flush to zero */
1629     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1630 }
1631
1632 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1633 {
1634     cpu_set_mxcsr(env, val);
1635 }
1636
1637 void helper_enter_mmx(CPUX86State *env)
1638 {
1639     env->fpstt = 0;
1640     *(uint32_t *)(env->fptags) = 0;
1641     *(uint32_t *)(env->fptags + 4) = 0;
1642 }
1643
1644 void helper_emms(CPUX86State *env)
1645 {
1646     /* set to empty state */
1647     *(uint32_t *)(env->fptags) = 0x01010101;
1648     *(uint32_t *)(env->fptags + 4) = 0x01010101;
1649 }
1650
1651 /* XXX: suppress */
1652 void helper_movq(CPUX86State *env, void *d, void *s)
1653 {
1654     *(uint64_t *)d = *(uint64_t *)s;
1655 }
1656
1657 #define SHIFT 0
1658 #include "ops_sse.h"
1659
1660 #define SHIFT 1
1661 #include "ops_sse.h"