target/i386/fpu_helper.c

   1 /*
   2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include <math.h>
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "exec/exec-all.h"
  26 #include "exec/cpu_ldst.h"
  27 #include "fpu/softfloat.h"
  28
  29 #ifdef CONFIG_SOFTMMU
  30 #include "hw/irq.h"
  31 #endif
  32
  33 #define FPU_RC_MASK         0xc00
  34 #define FPU_RC_NEAR         0x000
  35 #define FPU_RC_DOWN         0x400
  36 #define FPU_RC_UP           0x800
  37 #define FPU_RC_CHOP         0xc00
  38
  39 #define MAXTAN 9223372036854775808.0
  40
  41 /* the following deal with x86 long double-precision numbers */
  42 #define MAXEXPD 0x7fff
  43 #define EXPBIAS 16383
  44 #define EXPD(fp)        (fp.l.upper & 0x7fff)
  45 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
  46 #define MANTD(fp)       (fp.l.lower)
  47 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  48
  49 #define FPUS_IE (1 << 0)
  50 #define FPUS_DE (1 << 1)
  51 #define FPUS_ZE (1 << 2)
  52 #define FPUS_OE (1 << 3)
  53 #define FPUS_UE (1 << 4)
  54 #define FPUS_PE (1 << 5)
  55 #define FPUS_SF (1 << 6)
  56 #define FPUS_SE (1 << 7)
  57 #define FPUS_B  (1 << 15)
  58
  59 #define FPUC_EM 0x3f
  60
  61 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  62 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  63 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  64
  65 #if !defined(CONFIG_USER_ONLY)
  66 static qemu_irq ferr_irq;
  67
  68 void x86_register_ferr_irq(qemu_irq irq)
  69 {
  70     ferr_irq = irq;
  71 }
  72
  73 static void cpu_clear_ignne(void)
  74 {
  75     CPUX86State *env = &X86_CPU(first_cpu)->env;
  76     env->hflags2 &= ~HF2_IGNNE_MASK;
  77 }
  78
  79 void cpu_set_ignne(void)
  80 {
  81     CPUX86State *env = &X86_CPU(first_cpu)->env;
  82     env->hflags2 |= HF2_IGNNE_MASK;
  83     /*
  84      * We get here in response to a write to port F0h.  The chipset should
  85      * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
  86      * cleared, because FERR# and FP_IRQ are two separate pins on real
  87      * hardware.  However, we don't model FERR# as a qemu_irq, so we just
  88      * do directly what the chipset would do, i.e. deassert FP_IRQ.
  89      */
  90     qemu_irq_lower(ferr_irq);
  91 }
  92 #endif
  93
  94
  95 static inline void fpush(CPUX86State *env)
  96 {
  97     env->fpstt = (env->fpstt - 1) & 7;
  98     env->fptags[env->fpstt] = 0; /* validate stack entry */
  99 }
 100
 101 static inline void fpop(CPUX86State *env)
 102 {
 103     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
 104     env->fpstt = (env->fpstt + 1) & 7;
 105 }
 106
 107 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
 108                                    uintptr_t retaddr)
 109 {
 110     CPU_LDoubleU temp;
 111
 112     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
 113     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
 114     return temp.d;
 115 }
 116
 117 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
 118                                uintptr_t retaddr)
 119 {
 120     CPU_LDoubleU temp;
 121
 122     temp.d = f;
 123     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
 124     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
 125 }
 126
 127 /* x87 FPU helpers */
 128
 129 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
 130 {
 131     union {
 132         float64 f64;
 133         double d;
 134     } u;
 135
 136     u.f64 = floatx80_to_float64(a, &env->fp_status);
 137     return u.d;
 138 }
 139
 140 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 141 {
 142     union {
 143         float64 f64;
 144         double d;
 145     } u;
 146
 147     u.d = a;
 148     return float64_to_floatx80(u.f64, &env->fp_status);
 149 }
 150
 151 static void fpu_set_exception(CPUX86State *env, int mask)
 152 {
 153     env->fpus |= mask;
 154     if (env->fpus & (~env->fpuc & FPUC_EM)) {
 155         env->fpus |= FPUS_SE | FPUS_B;
 156     }
 157 }
 158
 159 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 160 {
 161     if (floatx80_is_zero(b)) {
 162         fpu_set_exception(env, FPUS_ZE);
 163     }
 164     return floatx80_div(a, b, &env->fp_status);
 165 }
 166
 167 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 168 {
 169     if (env->cr[0] & CR0_NE_MASK) {
 170         raise_exception_ra(env, EXCP10_COPR, retaddr);
 171     }
 172 #if !defined(CONFIG_USER_ONLY)
 173     else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
 174         qemu_irq_raise(ferr_irq);
 175     }
 176 #endif
 177 }
 178
 179 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 180 {
 181     union {
 182         float32 f;
 183         uint32_t i;
 184     } u;
 185
 186     u.i = val;
 187     FT0 = float32_to_floatx80(u.f, &env->fp_status);
 188 }
 189
 190 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 191 {
 192     union {
 193         float64 f;
 194         uint64_t i;
 195     } u;
 196
 197     u.i = val;
 198     FT0 = float64_to_floatx80(u.f, &env->fp_status);
 199 }
 200
 201 void helper_fildl_FT0(CPUX86State *env, int32_t val)
 202 {
 203     FT0 = int32_to_floatx80(val, &env->fp_status);
 204 }
 205
 206 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 207 {
 208     int new_fpstt;
 209     union {
 210         float32 f;
 211         uint32_t i;
 212     } u;
 213
 214     new_fpstt = (env->fpstt - 1) & 7;
 215     u.i = val;
 216     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 217     env->fpstt = new_fpstt;
 218     env->fptags[new_fpstt] = 0; /* validate stack entry */
 219 }
 220
 221 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 222 {
 223     int new_fpstt;
 224     union {
 225         float64 f;
 226         uint64_t i;
 227     } u;
 228
 229     new_fpstt = (env->fpstt - 1) & 7;
 230     u.i = val;
 231     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 232     env->fpstt = new_fpstt;
 233     env->fptags[new_fpstt] = 0; /* validate stack entry */
 234 }
 235
 236 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 237 {
 238     int new_fpstt;
 239
 240     new_fpstt = (env->fpstt - 1) & 7;
 241     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 242     env->fpstt = new_fpstt;
 243     env->fptags[new_fpstt] = 0; /* validate stack entry */
 244 }
 245
 246 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 247 {
 248     int new_fpstt;
 249
 250     new_fpstt = (env->fpstt - 1) & 7;
 251     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 252     env->fpstt = new_fpstt;
 253     env->fptags[new_fpstt] = 0; /* validate stack entry */
 254 }
 255
 256 uint32_t helper_fsts_ST0(CPUX86State *env)
 257 {
 258     union {
 259         float32 f;
 260         uint32_t i;
 261     } u;
 262
 263     u.f = floatx80_to_float32(ST0, &env->fp_status);
 264     return u.i;
 265 }
 266
 267 uint64_t helper_fstl_ST0(CPUX86State *env)
 268 {
 269     union {
 270         float64 f;
 271         uint64_t i;
 272     } u;
 273
 274     u.f = floatx80_to_float64(ST0, &env->fp_status);
 275     return u.i;
 276 }
 277
 278 int32_t helper_fist_ST0(CPUX86State *env)
 279 {
 280     int32_t val;
 281
 282     val = floatx80_to_int32(ST0, &env->fp_status);
 283     if (val != (int16_t)val) {
 284         val = -32768;
 285     }
 286     return val;
 287 }
 288
 289 int32_t helper_fistl_ST0(CPUX86State *env)
 290 {
 291     int32_t val;
 292     signed char old_exp_flags;
 293
 294     old_exp_flags = get_float_exception_flags(&env->fp_status);
 295     set_float_exception_flags(0, &env->fp_status);
 296
 297     val = floatx80_to_int32(ST0, &env->fp_status);
 298     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 299         val = 0x80000000;
 300     }
 301     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 302                                 | old_exp_flags, &env->fp_status);
 303     return val;
 304 }
 305
 306 int64_t helper_fistll_ST0(CPUX86State *env)
 307 {
 308     int64_t val;
 309     signed char old_exp_flags;
 310
 311     old_exp_flags = get_float_exception_flags(&env->fp_status);
 312     set_float_exception_flags(0, &env->fp_status);
 313
 314     val = floatx80_to_int64(ST0, &env->fp_status);
 315     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 316         val = 0x8000000000000000ULL;
 317     }
 318     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 319                                 | old_exp_flags, &env->fp_status);
 320     return val;
 321 }
 322
 323 int32_t helper_fistt_ST0(CPUX86State *env)
 324 {
 325     int32_t val;
 326
 327     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 328     if (val != (int16_t)val) {
 329         val = -32768;
 330     }
 331     return val;
 332 }
 333
 334 int32_t helper_fisttl_ST0(CPUX86State *env)
 335 {
 336     return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 337 }
 338
 339 int64_t helper_fisttll_ST0(CPUX86State *env)
 340 {
 341     return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 342 }
 343
 344 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 345 {
 346     int new_fpstt;
 347
 348     new_fpstt = (env->fpstt - 1) & 7;
 349     env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 350     env->fpstt = new_fpstt;
 351     env->fptags[new_fpstt] = 0; /* validate stack entry */
 352 }
 353
 354 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 355 {
 356     helper_fstt(env, ST0, ptr, GETPC());
 357 }
 358
 359 void helper_fpush(CPUX86State *env)
 360 {
 361     fpush(env);
 362 }
 363
 364 void helper_fpop(CPUX86State *env)
 365 {
 366     fpop(env);
 367 }
 368
 369 void helper_fdecstp(CPUX86State *env)
 370 {
 371     env->fpstt = (env->fpstt - 1) & 7;
 372     env->fpus &= ~0x4700;
 373 }
 374
 375 void helper_fincstp(CPUX86State *env)
 376 {
 377     env->fpstt = (env->fpstt + 1) & 7;
 378     env->fpus &= ~0x4700;
 379 }
 380
 381 /* FPU move */
 382
 383 void helper_ffree_STN(CPUX86State *env, int st_index)
 384 {
 385     env->fptags[(env->fpstt + st_index) & 7] = 1;
 386 }
 387
 388 void helper_fmov_ST0_FT0(CPUX86State *env)
 389 {
 390     ST0 = FT0;
 391 }
 392
 393 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 394 {
 395     FT0 = ST(st_index);
 396 }
 397
 398 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 399 {
 400     ST0 = ST(st_index);
 401 }
 402
 403 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 404 {
 405     ST(st_index) = ST0;
 406 }
 407
 408 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 409 {
 410     floatx80 tmp;
 411
 412     tmp = ST(st_index);
 413     ST(st_index) = ST0;
 414     ST0 = tmp;
 415 }
 416
 417 /* FPU operations */
 418
 419 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 420
 421 void helper_fcom_ST0_FT0(CPUX86State *env)
 422 {
 423     int ret;
 424
 425     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 426     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 427 }
 428
 429 void helper_fucom_ST0_FT0(CPUX86State *env)
 430 {
 431     int ret;
 432
 433     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 434     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 435 }
 436
 437 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 438
 439 void helper_fcomi_ST0_FT0(CPUX86State *env)
 440 {
 441     int eflags;
 442     int ret;
 443
 444     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 445     eflags = cpu_cc_compute_all(env, CC_OP);
 446     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 447     CC_SRC = eflags;
 448 }
 449
 450 void helper_fucomi_ST0_FT0(CPUX86State *env)
 451 {
 452     int eflags;
 453     int ret;
 454
 455     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 456     eflags = cpu_cc_compute_all(env, CC_OP);
 457     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 458     CC_SRC = eflags;
 459 }
 460
 461 void helper_fadd_ST0_FT0(CPUX86State *env)
 462 {
 463     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 464 }
 465
 466 void helper_fmul_ST0_FT0(CPUX86State *env)
 467 {
 468     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 469 }
 470
 471 void helper_fsub_ST0_FT0(CPUX86State *env)
 472 {
 473     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 474 }
 475
 476 void helper_fsubr_ST0_FT0(CPUX86State *env)
 477 {
 478     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 479 }
 480
 481 void helper_fdiv_ST0_FT0(CPUX86State *env)
 482 {
 483     ST0 = helper_fdiv(env, ST0, FT0);
 484 }
 485
 486 void helper_fdivr_ST0_FT0(CPUX86State *env)
 487 {
 488     ST0 = helper_fdiv(env, FT0, ST0);
 489 }
 490
 491 /* fp operations between STN and ST0 */
 492
 493 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 494 {
 495     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 496 }
 497
 498 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 499 {
 500     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 501 }
 502
 503 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 504 {
 505     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 506 }
 507
 508 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 509 {
 510     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 511 }
 512
 513 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 514 {
 515     floatx80 *p;
 516
 517     p = &ST(st_index);
 518     *p = helper_fdiv(env, *p, ST0);
 519 }
 520
 521 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 522 {
 523     floatx80 *p;
 524
 525     p = &ST(st_index);
 526     *p = helper_fdiv(env, ST0, *p);
 527 }
 528
 529 /* misc FPU operations */
 530 void helper_fchs_ST0(CPUX86State *env)
 531 {
 532     ST0 = floatx80_chs(ST0);
 533 }
 534
 535 void helper_fabs_ST0(CPUX86State *env)
 536 {
 537     ST0 = floatx80_abs(ST0);
 538 }
 539
 540 void helper_fld1_ST0(CPUX86State *env)
 541 {
 542     ST0 = floatx80_one;
 543 }
 544
 545 void helper_fldl2t_ST0(CPUX86State *env)
 546 {
 547     ST0 = floatx80_l2t;
 548 }
 549
 550 void helper_fldl2e_ST0(CPUX86State *env)
 551 {
 552     ST0 = floatx80_l2e;
 553 }
 554
 555 void helper_fldpi_ST0(CPUX86State *env)
 556 {
 557     ST0 = floatx80_pi;
 558 }
 559
 560 void helper_fldlg2_ST0(CPUX86State *env)
 561 {
 562     ST0 = floatx80_lg2;
 563 }
 564
 565 void helper_fldln2_ST0(CPUX86State *env)
 566 {
 567     ST0 = floatx80_ln2;
 568 }
 569
 570 void helper_fldz_ST0(CPUX86State *env)
 571 {
 572     ST0 = floatx80_zero;
 573 }
 574
 575 void helper_fldz_FT0(CPUX86State *env)
 576 {
 577     FT0 = floatx80_zero;
 578 }
 579
 580 uint32_t helper_fnstsw(CPUX86State *env)
 581 {
 582     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 583 }
 584
 585 uint32_t helper_fnstcw(CPUX86State *env)
 586 {
 587     return env->fpuc;
 588 }
 589
 590 void update_fp_status(CPUX86State *env)
 591 {
 592     int rnd_type;
 593
 594     /* set rounding mode */
 595     switch (env->fpuc & FPU_RC_MASK) {
 596     default:
 597     case FPU_RC_NEAR:
 598         rnd_type = float_round_nearest_even;
 599         break;
 600     case FPU_RC_DOWN:
 601         rnd_type = float_round_down;
 602         break;
 603     case FPU_RC_UP:
 604         rnd_type = float_round_up;
 605         break;
 606     case FPU_RC_CHOP:
 607         rnd_type = float_round_to_zero;
 608         break;
 609     }
 610     set_float_rounding_mode(rnd_type, &env->fp_status);
 611     switch ((env->fpuc >> 8) & 3) {
 612     case 0:
 613         rnd_type = 32;
 614         break;
 615     case 2:
 616         rnd_type = 64;
 617         break;
 618     case 3:
 619     default:
 620         rnd_type = 80;
 621         break;
 622     }
 623     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 624 }
 625
 626 void helper_fldcw(CPUX86State *env, uint32_t val)
 627 {
 628     cpu_set_fpuc(env, val);
 629 }
 630
 631 void helper_fclex(CPUX86State *env)
 632 {
 633     env->fpus &= 0x7f00;
 634 }
 635
 636 void helper_fwait(CPUX86State *env)
 637 {
 638     if (env->fpus & FPUS_SE) {
 639         fpu_raise_exception(env, GETPC());
 640     }
 641 }
 642
 643 void helper_fninit(CPUX86State *env)
 644 {
 645     env->fpus = 0;
 646     env->fpstt = 0;
 647     cpu_set_fpuc(env, 0x37f);
 648     env->fptags[0] = 1;
 649     env->fptags[1] = 1;
 650     env->fptags[2] = 1;
 651     env->fptags[3] = 1;
 652     env->fptags[4] = 1;
 653     env->fptags[5] = 1;
 654     env->fptags[6] = 1;
 655     env->fptags[7] = 1;
 656 }
 657
 658 /* BCD ops */
 659
 660 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 661 {
 662     floatx80 tmp;
 663     uint64_t val;
 664     unsigned int v;
 665     int i;
 666
 667     val = 0;
 668     for (i = 8; i >= 0; i--) {
 669         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 670         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 671     }
 672     tmp = int64_to_floatx80(val, &env->fp_status);
 673     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 674         tmp = floatx80_chs(tmp);
 675     }
 676     fpush(env);
 677     ST0 = tmp;
 678 }
 679
 680 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 681 {
 682     int v;
 683     target_ulong mem_ref, mem_end;
 684     int64_t val;
 685
 686     val = floatx80_to_int64(ST0, &env->fp_status);
 687     mem_ref = ptr;
 688     mem_end = mem_ref + 9;
 689     if (val < 0) {
 690         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 691         val = -val;
 692     } else {
 693         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 694     }
 695     while (mem_ref < mem_end) {
 696         if (val == 0) {
 697             break;
 698         }
 699         v = val % 100;
 700         val = val / 100;
 701         v = ((v / 10) << 4) | (v % 10);
 702         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 703     }
 704     while (mem_ref < mem_end) {
 705         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 706     }
 707 }
 708
 709 void helper_f2xm1(CPUX86State *env)
 710 {
 711     double val = floatx80_to_double(env, ST0);
 712
 713     val = pow(2.0, val) - 1.0;
 714     ST0 = double_to_floatx80(env, val);
 715 }
 716
 717 void helper_fyl2x(CPUX86State *env)
 718 {
 719     double fptemp = floatx80_to_double(env, ST0);
 720
 721     if (fptemp > 0.0) {
 722         fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 723         fptemp *= floatx80_to_double(env, ST1);
 724         ST1 = double_to_floatx80(env, fptemp);
 725         fpop(env);
 726     } else {
 727         env->fpus &= ~0x4700;
 728         env->fpus |= 0x400;
 729     }
 730 }
 731
 732 void helper_fptan(CPUX86State *env)
 733 {
 734     double fptemp = floatx80_to_double(env, ST0);
 735
 736     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 737         env->fpus |= 0x400;
 738     } else {
 739         fptemp = tan(fptemp);
 740         ST0 = double_to_floatx80(env, fptemp);
 741         fpush(env);
 742         ST0 = floatx80_one;
 743         env->fpus &= ~0x400; /* C2 <-- 0 */
 744         /* the above code is for |arg| < 2**52 only */
 745     }
 746 }
 747
 748 void helper_fpatan(CPUX86State *env)
 749 {
 750     double fptemp, fpsrcop;
 751
 752     fpsrcop = floatx80_to_double(env, ST1);
 753     fptemp = floatx80_to_double(env, ST0);
 754     ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 755     fpop(env);
 756 }
 757
 758 void helper_fxtract(CPUX86State *env)
 759 {
 760     CPU_LDoubleU temp;
 761
 762     temp.d = ST0;
 763
 764     if (floatx80_is_zero(ST0)) {
 765         /* Easy way to generate -inf and raising division by 0 exception */
 766         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 767                            &env->fp_status);
 768         fpush(env);
 769         ST0 = temp.d;
 770     } else {
 771         int expdif;
 772
 773         expdif = EXPD(temp) - EXPBIAS;
 774         /* DP exponent bias */
 775         ST0 = int32_to_floatx80(expdif, &env->fp_status);
 776         fpush(env);
 777         BIASEXPONENT(temp);
 778         ST0 = temp.d;
 779     }
 780 }
 781
 782 void helper_fprem1(CPUX86State *env)
 783 {
 784     double st0, st1, dblq, fpsrcop, fptemp;
 785     CPU_LDoubleU fpsrcop1, fptemp1;
 786     int expdif;
 787     signed long long int q;
 788
 789     st0 = floatx80_to_double(env, ST0);
 790     st1 = floatx80_to_double(env, ST1);
 791
 792     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 793         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 794         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 795         return;
 796     }
 797
 798     fpsrcop = st0;
 799     fptemp = st1;
 800     fpsrcop1.d = ST0;
 801     fptemp1.d = ST1;
 802     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 803
 804     if (expdif < 0) {
 805         /* optimisation? taken from the AMD docs */
 806         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 807         /* ST0 is unchanged */
 808         return;
 809     }
 810
 811     if (expdif < 53) {
 812         dblq = fpsrcop / fptemp;
 813         /* round dblq towards nearest integer */
 814         dblq = rint(dblq);
 815         st0 = fpsrcop - fptemp * dblq;
 816
 817         /* convert dblq to q by truncating towards zero */
 818         if (dblq < 0.0) {
 819             q = (signed long long int)(-dblq);
 820         } else {
 821             q = (signed long long int)dblq;
 822         }
 823
 824         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 825         /* (C0,C3,C1) <-- (q2,q1,q0) */
 826         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 827         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 828         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 829     } else {
 830         env->fpus |= 0x400;  /* C2 <-- 1 */
 831         fptemp = pow(2.0, expdif - 50);
 832         fpsrcop = (st0 / st1) / fptemp;
 833         /* fpsrcop = integer obtained by chopping */
 834         fpsrcop = (fpsrcop < 0.0) ?
 835                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 836         st0 -= (st1 * fpsrcop * fptemp);
 837     }
 838     ST0 = double_to_floatx80(env, st0);
 839 }
 840
 841 void helper_fprem(CPUX86State *env)
 842 {
 843     double st0, st1, dblq, fpsrcop, fptemp;
 844     CPU_LDoubleU fpsrcop1, fptemp1;
 845     int expdif;
 846     signed long long int q;
 847
 848     st0 = floatx80_to_double(env, ST0);
 849     st1 = floatx80_to_double(env, ST1);
 850
 851     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 852         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 853         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 854         return;
 855     }
 856
 857     fpsrcop = st0;
 858     fptemp = st1;
 859     fpsrcop1.d = ST0;
 860     fptemp1.d = ST1;
 861     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 862
 863     if (expdif < 0) {
 864         /* optimisation? taken from the AMD docs */
 865         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 866         /* ST0 is unchanged */
 867         return;
 868     }
 869
 870     if (expdif < 53) {
 871         dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 872         /* round dblq towards zero */
 873         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 874         st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 875
 876         /* convert dblq to q by truncating towards zero */
 877         if (dblq < 0.0) {
 878             q = (signed long long int)(-dblq);
 879         } else {
 880             q = (signed long long int)dblq;
 881         }
 882
 883         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 884         /* (C0,C3,C1) <-- (q2,q1,q0) */
 885         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 886         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 887         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 888     } else {
 889         int N = 32 + (expdif % 32); /* as per AMD docs */
 890
 891         env->fpus |= 0x400;  /* C2 <-- 1 */
 892         fptemp = pow(2.0, (double)(expdif - N));
 893         fpsrcop = (st0 / st1) / fptemp;
 894         /* fpsrcop = integer obtained by chopping */
 895         fpsrcop = (fpsrcop < 0.0) ?
 896                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 897         st0 -= (st1 * fpsrcop * fptemp);
 898     }
 899     ST0 = double_to_floatx80(env, st0);
 900 }
 901
 902 void helper_fyl2xp1(CPUX86State *env)
 903 {
 904     double fptemp = floatx80_to_double(env, ST0);
 905
 906     if ((fptemp + 1.0) > 0.0) {
 907         fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 908         fptemp *= floatx80_to_double(env, ST1);
 909         ST1 = double_to_floatx80(env, fptemp);
 910         fpop(env);
 911     } else {
 912         env->fpus &= ~0x4700;
 913         env->fpus |= 0x400;
 914     }
 915 }
 916
 917 void helper_fsqrt(CPUX86State *env)
 918 {
 919     if (floatx80_is_neg(ST0)) {
 920         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 921         env->fpus |= 0x400;
 922     }
 923     ST0 = floatx80_sqrt(ST0, &env->fp_status);
 924 }
 925
 926 void helper_fsincos(CPUX86State *env)
 927 {
 928     double fptemp = floatx80_to_double(env, ST0);
 929
 930     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 931         env->fpus |= 0x400;
 932     } else {
 933         ST0 = double_to_floatx80(env, sin(fptemp));
 934         fpush(env);
 935         ST0 = double_to_floatx80(env, cos(fptemp));
 936         env->fpus &= ~0x400;  /* C2 <-- 0 */
 937         /* the above code is for |arg| < 2**63 only */
 938     }
 939 }
 940
 941 void helper_frndint(CPUX86State *env)
 942 {
 943     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 944 }
 945
 946 void helper_fscale(CPUX86State *env)
 947 {
 948     if (floatx80_is_any_nan(ST1)) {
 949         ST0 = ST1;
 950     } else {
 951         int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 952         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 953     }
 954 }
 955
 956 void helper_fsin(CPUX86State *env)
 957 {
 958     double fptemp = floatx80_to_double(env, ST0);
 959
 960     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 961         env->fpus |= 0x400;
 962     } else {
 963         ST0 = double_to_floatx80(env, sin(fptemp));
 964         env->fpus &= ~0x400;  /* C2 <-- 0 */
 965         /* the above code is for |arg| < 2**53 only */
 966     }
 967 }
 968
 969 void helper_fcos(CPUX86State *env)
 970 {
 971     double fptemp = floatx80_to_double(env, ST0);
 972
 973     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 974         env->fpus |= 0x400;
 975     } else {
 976         ST0 = double_to_floatx80(env, cos(fptemp));
 977         env->fpus &= ~0x400;  /* C2 <-- 0 */
 978         /* the above code is for |arg| < 2**63 only */
 979     }
 980 }
 981
 982 void helper_fxam_ST0(CPUX86State *env)
 983 {
 984     CPU_LDoubleU temp;
 985     int expdif;
 986
 987     temp.d = ST0;
 988
 989     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 990     if (SIGND(temp)) {
 991         env->fpus |= 0x200; /* C1 <-- 1 */
 992     }
 993
 994     if (env->fptags[env->fpstt]) {
 995         env->fpus |= 0x4100; /* Empty */
 996         return;
 997     }
 998
 999     expdif = EXPD(temp);
1000     if (expdif == MAXEXPD) {
1001         if (MANTD(temp) == 0x8000000000000000ULL) {
1002             env->fpus |= 0x500; /* Infinity */
1003         } else {
1004             env->fpus |= 0x100; /* NaN */
1005         }
1006     } else if (expdif == 0) {
1007         if (MANTD(temp) == 0) {
1008             env->fpus |=  0x4000; /* Zero */
1009         } else {
1010             env->fpus |= 0x4400; /* Denormal */
1011         }
1012     } else {
1013         env->fpus |= 0x400;
1014     }
1015 }
1016
1017 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1018                       uintptr_t retaddr)
1019 {
1020     int fpus, fptag, exp, i;
1021     uint64_t mant;
1022     CPU_LDoubleU tmp;
1023
1024     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1025     fptag = 0;
1026     for (i = 7; i >= 0; i--) {
1027         fptag <<= 2;
1028         if (env->fptags[i]) {
1029             fptag |= 3;
1030         } else {
1031             tmp.d = env->fpregs[i].d;
1032             exp = EXPD(tmp);
1033             mant = MANTD(tmp);
1034             if (exp == 0 && mant == 0) {
1035                 /* zero */
1036                 fptag |= 1;
1037             } else if (exp == 0 || exp == MAXEXPD
1038                        || (mant & (1LL << 63)) == 0) {
1039                 /* NaNs, infinity, denormal */
1040                 fptag |= 2;
1041             }
1042         }
1043     }
1044     if (data32) {
1045         /* 32 bit */
1046         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1047         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1048         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1049         cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1050         cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1051         cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1052         cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1053     } else {
1054         /* 16 bit */
1055         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1056         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1057         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1058         cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1059         cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1060         cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1061         cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1062     }
1063 }
1064
1065 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1066 {
1067     do_fstenv(env, ptr, data32, GETPC());
1068 }
1069
1070 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1071 {
1072     env->fpstt = (fpus >> 11) & 7;
1073     env->fpus = fpus & ~0x3800 & ~FPUS_B;
1074     env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1075 #if !defined(CONFIG_USER_ONLY)
1076     if (!(env->fpus & FPUS_SE)) {
1077         /*
1078          * Here the processor deasserts FERR#; in response, the chipset deasserts
1079          * IGNNE#.
1080          */
1081         cpu_clear_ignne();
1082     }
1083 #endif
1084 }
1085
1086 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1087                       uintptr_t retaddr)
1088 {
1089     int i, fpus, fptag;
1090
1091     if (data32) {
1092         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1093         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1094         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1095     } else {
1096         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1097         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1098         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1099     }
1100     cpu_set_fpus(env, fpus);
1101     for (i = 0; i < 8; i++) {
1102         env->fptags[i] = ((fptag & 3) == 3);
1103         fptag >>= 2;
1104     }
1105 }
1106
1107 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1108 {
1109     do_fldenv(env, ptr, data32, GETPC());
1110 }
1111
1112 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1113 {
1114     floatx80 tmp;
1115     int i;
1116
1117     do_fstenv(env, ptr, data32, GETPC());
1118
1119     ptr += (14 << data32);
1120     for (i = 0; i < 8; i++) {
1121         tmp = ST(i);
1122         helper_fstt(env, tmp, ptr, GETPC());
1123         ptr += 10;
1124     }
1125
1126     /* fninit */
1127     env->fpus = 0;
1128     env->fpstt = 0;
1129     cpu_set_fpuc(env, 0x37f);
1130     env->fptags[0] = 1;
1131     env->fptags[1] = 1;
1132     env->fptags[2] = 1;
1133     env->fptags[3] = 1;
1134     env->fptags[4] = 1;
1135     env->fptags[5] = 1;
1136     env->fptags[6] = 1;
1137     env->fptags[7] = 1;
1138 }
1139
1140 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1141 {
1142     floatx80 tmp;
1143     int i;
1144
1145     do_fldenv(env, ptr, data32, GETPC());
1146     ptr += (14 << data32);
1147
1148     for (i = 0; i < 8; i++) {
1149         tmp = helper_fldt(env, ptr, GETPC());
1150         ST(i) = tmp;
1151         ptr += 10;
1152     }
1153 }
1154
1155 #if defined(CONFIG_USER_ONLY)
1156 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1157 {
1158     helper_fsave(env, ptr, data32);
1159 }
1160
1161 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1162 {
1163     helper_frstor(env, ptr, data32);
1164 }
1165 #endif
1166
1167 #define XO(X)  offsetof(X86XSaveArea, X)
1168
1169 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1170 {
1171     int fpus, fptag, i;
1172     target_ulong addr;
1173
1174     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1175     fptag = 0;
1176     for (i = 0; i < 8; i++) {
1177         fptag |= (env->fptags[i] << i);
1178     }
1179
1180     cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1181     cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1182     cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1183
1184     /* In 32-bit mode this is eip, sel, dp, sel.
1185        In 64-bit mode this is rip, rdp.
1186        But in either case we don't write actual data, just zeros.  */
1187     cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1188     cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1189
1190     addr = ptr + XO(legacy.fpregs);
1191     for (i = 0; i < 8; i++) {
1192         floatx80 tmp = ST(i);
1193         helper_fstt(env, tmp, addr, ra);
1194         addr += 16;
1195     }
1196 }
1197
1198 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1199 {
1200     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1201     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1202 }
1203
1204 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1205 {
1206     int i, nb_xmm_regs;
1207     target_ulong addr;
1208
1209     if (env->hflags & HF_CS64_MASK) {
1210         nb_xmm_regs = 16;
1211     } else {
1212         nb_xmm_regs = 8;
1213     }
1214
1215     addr = ptr + XO(legacy.xmm_regs);
1216     for (i = 0; i < nb_xmm_regs; i++) {
1217         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1218         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1219         addr += 16;
1220     }
1221 }
1222
1223 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1224 {
1225     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1226     int i;
1227
1228     for (i = 0; i < 4; i++, addr += 16) {
1229         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1230         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1231     }
1232 }
1233
1234 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1235 {
1236     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1237                     env->bndcs_regs.cfgu, ra);
1238     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1239                     env->bndcs_regs.sts, ra);
1240 }
1241
1242 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1243 {
1244     cpu_stq_data_ra(env, ptr, env->pkru, ra);
1245 }
1246
1247 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1248 {
1249     uintptr_t ra = GETPC();
1250
1251     /* The operand must be 16 byte aligned */
1252     if (ptr & 0xf) {
1253         raise_exception_ra(env, EXCP0D_GPF, ra);
1254     }
1255
1256     do_xsave_fpu(env, ptr, ra);
1257
1258     if (env->cr[4] & CR4_OSFXSR_MASK) {
1259         do_xsave_mxcsr(env, ptr, ra);
1260         /* Fast FXSAVE leaves out the XMM registers */
1261         if (!(env->efer & MSR_EFER_FFXSR)
1262             || (env->hflags & HF_CPL_MASK)
1263             || !(env->hflags & HF_LMA_MASK)) {
1264             do_xsave_sse(env, ptr, ra);
1265         }
1266     }
1267 }
1268
1269 static uint64_t get_xinuse(CPUX86State *env)
1270 {
1271     uint64_t inuse = -1;
1272
1273     /* For the most part, we don't track XINUSE.  We could calculate it
1274        here for all components, but it's probably less work to simply
1275        indicate in use.  That said, the state of BNDREGS is important
1276        enough to track in HFLAGS, so we might as well use that here.  */
1277     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1278        inuse &= ~XSTATE_BNDREGS_MASK;
1279     }
1280     return inuse;
1281 }
1282
1283 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1284                      uint64_t inuse, uint64_t opt, uintptr_t ra)
1285 {
1286     uint64_t old_bv, new_bv;
1287
1288     /* The OS must have enabled XSAVE.  */
1289     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1290         raise_exception_ra(env, EXCP06_ILLOP, ra);
1291     }
1292
1293     /* The operand must be 64 byte aligned.  */
1294     if (ptr & 63) {
1295         raise_exception_ra(env, EXCP0D_GPF, ra);
1296     }
1297
1298     /* Never save anything not enabled by XCR0.  */
1299     rfbm &= env->xcr0;
1300     opt &= rfbm;
1301
1302     if (opt & XSTATE_FP_MASK) {
1303         do_xsave_fpu(env, ptr, ra);
1304     }
1305     if (rfbm & XSTATE_SSE_MASK) {
1306         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1307         do_xsave_mxcsr(env, ptr, ra);
1308     }
1309     if (opt & XSTATE_SSE_MASK) {
1310         do_xsave_sse(env, ptr, ra);
1311     }
1312     if (opt & XSTATE_BNDREGS_MASK) {
1313         do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1314     }
1315     if (opt & XSTATE_BNDCSR_MASK) {
1316         do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1317     }
1318     if (opt & XSTATE_PKRU_MASK) {
1319         do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1320     }
1321
1322     /* Update the XSTATE_BV field.  */
1323     old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1324     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1325     cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1326 }
1327
1328 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1329 {
1330     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1331 }
1332
1333 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1334 {
1335     uint64_t inuse = get_xinuse(env);
1336     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1337 }
1338
1339 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1340 {
1341     int i, fpuc, fpus, fptag;
1342     target_ulong addr;
1343
1344     fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1345     fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1346     fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1347     cpu_set_fpuc(env, fpuc);
1348     cpu_set_fpus(env, fpus);
1349     fptag ^= 0xff;
1350     for (i = 0; i < 8; i++) {
1351         env->fptags[i] = ((fptag >> i) & 1);
1352     }
1353
1354     addr = ptr + XO(legacy.fpregs);
1355     for (i = 0; i < 8; i++) {
1356         floatx80 tmp = helper_fldt(env, addr, ra);
1357         ST(i) = tmp;
1358         addr += 16;
1359     }
1360 }
1361
1362 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1363 {
1364     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1365 }
1366
1367 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1368 {
1369     int i, nb_xmm_regs;
1370     target_ulong addr;
1371
1372     if (env->hflags & HF_CS64_MASK) {
1373         nb_xmm_regs = 16;
1374     } else {
1375         nb_xmm_regs = 8;
1376     }
1377
1378     addr = ptr + XO(legacy.xmm_regs);
1379     for (i = 0; i < nb_xmm_regs; i++) {
1380         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1381         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1382         addr += 16;
1383     }
1384 }
1385
1386 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1387 {
1388     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1389     int i;
1390
1391     for (i = 0; i < 4; i++, addr += 16) {
1392         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1393         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1394     }
1395 }
1396
1397 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1398 {
1399     /* FIXME: Extend highest implemented bit of linear address.  */
1400     env->bndcs_regs.cfgu
1401         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1402     env->bndcs_regs.sts
1403         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1404 }
1405
1406 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1407 {
1408     env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1409 }
1410
1411 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1412 {
1413     uintptr_t ra = GETPC();
1414
1415     /* The operand must be 16 byte aligned */
1416     if (ptr & 0xf) {
1417         raise_exception_ra(env, EXCP0D_GPF, ra);
1418     }
1419
1420     do_xrstor_fpu(env, ptr, ra);
1421
1422     if (env->cr[4] & CR4_OSFXSR_MASK) {
1423         do_xrstor_mxcsr(env, ptr, ra);
1424         /* Fast FXRSTOR leaves out the XMM registers */
1425         if (!(env->efer & MSR_EFER_FFXSR)
1426             || (env->hflags & HF_CPL_MASK)
1427             || !(env->hflags & HF_LMA_MASK)) {
1428             do_xrstor_sse(env, ptr, ra);
1429         }
1430     }
1431 }
1432
1433 #if defined(CONFIG_USER_ONLY)
1434 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1435 {
1436     helper_fxsave(env, ptr);
1437 }
1438
1439 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1440 {
1441     helper_fxrstor(env, ptr);
1442 }
1443 #endif
1444
1445 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1446 {
1447     uintptr_t ra = GETPC();
1448     uint64_t xstate_bv, xcomp_bv, reserve0;
1449
1450     rfbm &= env->xcr0;
1451
1452     /* The OS must have enabled XSAVE.  */
1453     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1454         raise_exception_ra(env, EXCP06_ILLOP, ra);
1455     }
1456
1457     /* The operand must be 64 byte aligned.  */
1458     if (ptr & 63) {
1459         raise_exception_ra(env, EXCP0D_GPF, ra);
1460     }
1461
1462     xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1463
1464     if ((int64_t)xstate_bv < 0) {
1465         /* FIXME: Compact form.  */
1466         raise_exception_ra(env, EXCP0D_GPF, ra);
1467     }
1468
1469     /* Standard form.  */
1470
1471     /* The XSTATE_BV field must not set bits not present in XCR0.  */
1472     if (xstate_bv & ~env->xcr0) {
1473         raise_exception_ra(env, EXCP0D_GPF, ra);
1474     }
1475
1476     /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
1477        revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1478        describes only XCOMP_BV, but the description of the standard form
1479        of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1480        includes the next 64-bit field.  */
1481     xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1482     reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1483     if (xcomp_bv || reserve0) {
1484         raise_exception_ra(env, EXCP0D_GPF, ra);
1485     }
1486
1487     if (rfbm & XSTATE_FP_MASK) {
1488         if (xstate_bv & XSTATE_FP_MASK) {
1489             do_xrstor_fpu(env, ptr, ra);
1490         } else {
1491             helper_fninit(env);
1492             memset(env->fpregs, 0, sizeof(env->fpregs));
1493         }
1494     }
1495     if (rfbm & XSTATE_SSE_MASK) {
1496         /* Note that the standard form of XRSTOR loads MXCSR from memory
1497            whether or not the XSTATE_BV bit is set.  */
1498         do_xrstor_mxcsr(env, ptr, ra);
1499         if (xstate_bv & XSTATE_SSE_MASK) {
1500             do_xrstor_sse(env, ptr, ra);
1501         } else {
1502             /* ??? When AVX is implemented, we may have to be more
1503                selective in the clearing.  */
1504             memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1505         }
1506     }
1507     if (rfbm & XSTATE_BNDREGS_MASK) {
1508         if (xstate_bv & XSTATE_BNDREGS_MASK) {
1509             do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1510             env->hflags |= HF_MPX_IU_MASK;
1511         } else {
1512             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1513             env->hflags &= ~HF_MPX_IU_MASK;
1514         }
1515     }
1516     if (rfbm & XSTATE_BNDCSR_MASK) {
1517         if (xstate_bv & XSTATE_BNDCSR_MASK) {
1518             do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1519         } else {
1520             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1521         }
1522         cpu_sync_bndcs_hflags(env);
1523     }
1524     if (rfbm & XSTATE_PKRU_MASK) {
1525         uint64_t old_pkru = env->pkru;
1526         if (xstate_bv & XSTATE_PKRU_MASK) {
1527             do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1528         } else {
1529             env->pkru = 0;
1530         }
1531         if (env->pkru != old_pkru) {
1532             CPUState *cs = env_cpu(env);
1533             tlb_flush(cs);
1534         }
1535     }
1536 }
1537
1538 #undef XO
1539
1540 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1541 {
1542     /* The OS must have enabled XSAVE.  */
1543     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1544         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1545     }
1546
1547     switch (ecx) {
1548     case 0:
1549         return env->xcr0;
1550     case 1:
1551         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1552             return env->xcr0 & get_xinuse(env);
1553         }
1554         break;
1555     }
1556     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1557 }
1558
1559 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1560 {
1561     uint32_t dummy, ena_lo, ena_hi;
1562     uint64_t ena;
1563
1564     /* The OS must have enabled XSAVE.  */
1565     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1566         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1567     }
1568
1569     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1570     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1571         goto do_gpf;
1572     }
1573
1574     /* Disallow enabling unimplemented features.  */
1575     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1576     ena = ((uint64_t)ena_hi << 32) | ena_lo;
1577     if (mask & ~ena) {
1578         goto do_gpf;
1579     }
1580
1581     /* Disallow enabling only half of MPX.  */
1582     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1583         & XSTATE_BNDCSR_MASK) {
1584         goto do_gpf;
1585     }
1586
1587     env->xcr0 = mask;
1588     cpu_sync_bndcs_hflags(env);
1589     return;
1590
1591  do_gpf:
1592     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1593 }
1594
1595 /* MMX/SSE */
1596 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1597
1598 #define SSE_DAZ             0x0040
1599 #define SSE_RC_MASK         0x6000
1600 #define SSE_RC_NEAR         0x0000
1601 #define SSE_RC_DOWN         0x2000
1602 #define SSE_RC_UP           0x4000
1603 #define SSE_RC_CHOP         0x6000
1604 #define SSE_FZ              0x8000
1605
1606 void update_mxcsr_status(CPUX86State *env)
1607 {
1608     uint32_t mxcsr = env->mxcsr;
1609     int rnd_type;
1610
1611     /* set rounding mode */
1612     switch (mxcsr & SSE_RC_MASK) {
1613     default:
1614     case SSE_RC_NEAR:
1615         rnd_type = float_round_nearest_even;
1616         break;
1617     case SSE_RC_DOWN:
1618         rnd_type = float_round_down;
1619         break;
1620     case SSE_RC_UP:
1621         rnd_type = float_round_up;
1622         break;
1623     case SSE_RC_CHOP:
1624         rnd_type = float_round_to_zero;
1625         break;
1626     }
1627     set_float_rounding_mode(rnd_type, &env->sse_status);
1628
1629     /* set denormals are zero */
1630     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1631
1632     /* set flush to zero */
1633     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1634 }
1635
1636 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1637 {
1638     cpu_set_mxcsr(env, val);
1639 }
1640
1641 void helper_enter_mmx(CPUX86State *env)
1642 {
1643     env->fpstt = 0;
1644     *(uint32_t *)(env->fptags) = 0;
1645     *(uint32_t *)(env->fptags + 4) = 0;
1646 }
1647
1648 void helper_emms(CPUX86State *env)
1649 {
1650     /* set to empty state */
1651     *(uint32_t *)(env->fptags) = 0x01010101;
1652     *(uint32_t *)(env->fptags + 4) = 0x01010101;
1653 }
1654
1655 /* XXX: suppress */
1656 void helper_movq(CPUX86State *env, void *d, void *s)
1657 {
1658     *(uint64_t *)d = *(uint64_t *)s;
1659 }
1660
1661 #define SHIFT 0
1662 #include "ops_sse.h"
1663
1664 #define SHIFT 1
1665 #include "ops_sse.h"