target-i386/fpu_helper.c

   1 /*
   2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include <math.h>
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "exec/exec-all.h"
  26 #include "exec/cpu_ldst.h"
  27
  28 #define FPU_RC_MASK         0xc00
  29 #define FPU_RC_NEAR         0x000
  30 #define FPU_RC_DOWN         0x400
  31 #define FPU_RC_UP           0x800
  32 #define FPU_RC_CHOP         0xc00
  33
  34 #define MAXTAN 9223372036854775808.0
  35
  36 /* the following deal with x86 long double-precision numbers */
  37 #define MAXEXPD 0x7fff
  38 #define EXPBIAS 16383
  39 #define EXPD(fp)        (fp.l.upper & 0x7fff)
  40 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
  41 #define MANTD(fp)       (fp.l.lower)
  42 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  43
  44 #define FPUS_IE (1 << 0)
  45 #define FPUS_DE (1 << 1)
  46 #define FPUS_ZE (1 << 2)
  47 #define FPUS_OE (1 << 3)
  48 #define FPUS_UE (1 << 4)
  49 #define FPUS_PE (1 << 5)
  50 #define FPUS_SF (1 << 6)
  51 #define FPUS_SE (1 << 7)
  52 #define FPUS_B  (1 << 15)
  53
  54 #define FPUC_EM 0x3f
  55
  56 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  57 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  58 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  59
  60 static inline void fpush(CPUX86State *env)
  61 {
  62     env->fpstt = (env->fpstt - 1) & 7;
  63     env->fptags[env->fpstt] = 0; /* validate stack entry */
  64 }
  65
  66 static inline void fpop(CPUX86State *env)
  67 {
  68     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  69     env->fpstt = (env->fpstt + 1) & 7;
  70 }
  71
  72 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
  73                                    uintptr_t retaddr)
  74 {
  75     CPU_LDoubleU temp;
  76
  77     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  78     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  79     return temp.d;
  80 }
  81
  82 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  83                                uintptr_t retaddr)
  84 {
  85     CPU_LDoubleU temp;
  86
  87     temp.d = f;
  88     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
  89     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
  90 }
  91
  92 /* x87 FPU helpers */
  93
  94 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
  95 {
  96     union {
  97         float64 f64;
  98         double d;
  99     } u;
 100
 101     u.f64 = floatx80_to_float64(a, &env->fp_status);
 102     return u.d;
 103 }
 104
 105 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 106 {
 107     union {
 108         float64 f64;
 109         double d;
 110     } u;
 111
 112     u.d = a;
 113     return float64_to_floatx80(u.f64, &env->fp_status);
 114 }
 115
 116 static void fpu_set_exception(CPUX86State *env, int mask)
 117 {
 118     env->fpus |= mask;
 119     if (env->fpus & (~env->fpuc & FPUC_EM)) {
 120         env->fpus |= FPUS_SE | FPUS_B;
 121     }
 122 }
 123
 124 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 125 {
 126     if (floatx80_is_zero(b)) {
 127         fpu_set_exception(env, FPUS_ZE);
 128     }
 129     return floatx80_div(a, b, &env->fp_status);
 130 }
 131
 132 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 133 {
 134     if (env->cr[0] & CR0_NE_MASK) {
 135         raise_exception_ra(env, EXCP10_COPR, retaddr);
 136     }
 137 #if !defined(CONFIG_USER_ONLY)
 138     else {
 139         cpu_set_ferr(env);
 140     }
 141 #endif
 142 }
 143
 144 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 145 {
 146     union {
 147         float32 f;
 148         uint32_t i;
 149     } u;
 150
 151     u.i = val;
 152     FT0 = float32_to_floatx80(u.f, &env->fp_status);
 153 }
 154
 155 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 156 {
 157     union {
 158         float64 f;
 159         uint64_t i;
 160     } u;
 161
 162     u.i = val;
 163     FT0 = float64_to_floatx80(u.f, &env->fp_status);
 164 }
 165
 166 void helper_fildl_FT0(CPUX86State *env, int32_t val)
 167 {
 168     FT0 = int32_to_floatx80(val, &env->fp_status);
 169 }
 170
 171 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 172 {
 173     int new_fpstt;
 174     union {
 175         float32 f;
 176         uint32_t i;
 177     } u;
 178
 179     new_fpstt = (env->fpstt - 1) & 7;
 180     u.i = val;
 181     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 182     env->fpstt = new_fpstt;
 183     env->fptags[new_fpstt] = 0; /* validate stack entry */
 184 }
 185
 186 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 187 {
 188     int new_fpstt;
 189     union {
 190         float64 f;
 191         uint64_t i;
 192     } u;
 193
 194     new_fpstt = (env->fpstt - 1) & 7;
 195     u.i = val;
 196     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 197     env->fpstt = new_fpstt;
 198     env->fptags[new_fpstt] = 0; /* validate stack entry */
 199 }
 200
 201 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 202 {
 203     int new_fpstt;
 204
 205     new_fpstt = (env->fpstt - 1) & 7;
 206     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 207     env->fpstt = new_fpstt;
 208     env->fptags[new_fpstt] = 0; /* validate stack entry */
 209 }
 210
 211 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 212 {
 213     int new_fpstt;
 214
 215     new_fpstt = (env->fpstt - 1) & 7;
 216     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 217     env->fpstt = new_fpstt;
 218     env->fptags[new_fpstt] = 0; /* validate stack entry */
 219 }
 220
 221 uint32_t helper_fsts_ST0(CPUX86State *env)
 222 {
 223     union {
 224         float32 f;
 225         uint32_t i;
 226     } u;
 227
 228     u.f = floatx80_to_float32(ST0, &env->fp_status);
 229     return u.i;
 230 }
 231
 232 uint64_t helper_fstl_ST0(CPUX86State *env)
 233 {
 234     union {
 235         float64 f;
 236         uint64_t i;
 237     } u;
 238
 239     u.f = floatx80_to_float64(ST0, &env->fp_status);
 240     return u.i;
 241 }
 242
 243 int32_t helper_fist_ST0(CPUX86State *env)
 244 {
 245     int32_t val;
 246
 247     val = floatx80_to_int32(ST0, &env->fp_status);
 248     if (val != (int16_t)val) {
 249         val = -32768;
 250     }
 251     return val;
 252 }
 253
 254 int32_t helper_fistl_ST0(CPUX86State *env)
 255 {
 256     int32_t val;
 257     signed char old_exp_flags;
 258
 259     old_exp_flags = get_float_exception_flags(&env->fp_status);
 260     set_float_exception_flags(0, &env->fp_status);
 261
 262     val = floatx80_to_int32(ST0, &env->fp_status);
 263     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 264         val = 0x80000000;
 265     }
 266     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 267                                 | old_exp_flags, &env->fp_status);
 268     return val;
 269 }
 270
 271 int64_t helper_fistll_ST0(CPUX86State *env)
 272 {
 273     int64_t val;
 274     signed char old_exp_flags;
 275
 276     old_exp_flags = get_float_exception_flags(&env->fp_status);
 277     set_float_exception_flags(0, &env->fp_status);
 278
 279     val = floatx80_to_int64(ST0, &env->fp_status);
 280     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 281         val = 0x8000000000000000ULL;
 282     }
 283     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 284                                 | old_exp_flags, &env->fp_status);
 285     return val;
 286 }
 287
 288 int32_t helper_fistt_ST0(CPUX86State *env)
 289 {
 290     int32_t val;
 291
 292     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 293     if (val != (int16_t)val) {
 294         val = -32768;
 295     }
 296     return val;
 297 }
 298
 299 int32_t helper_fisttl_ST0(CPUX86State *env)
 300 {
 301     return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 302 }
 303
 304 int64_t helper_fisttll_ST0(CPUX86State *env)
 305 {
 306     return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 307 }
 308
 309 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 310 {
 311     int new_fpstt;
 312
 313     new_fpstt = (env->fpstt - 1) & 7;
 314     env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 315     env->fpstt = new_fpstt;
 316     env->fptags[new_fpstt] = 0; /* validate stack entry */
 317 }
 318
 319 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 320 {
 321     helper_fstt(env, ST0, ptr, GETPC());
 322 }
 323
 324 void helper_fpush(CPUX86State *env)
 325 {
 326     fpush(env);
 327 }
 328
 329 void helper_fpop(CPUX86State *env)
 330 {
 331     fpop(env);
 332 }
 333
 334 void helper_fdecstp(CPUX86State *env)
 335 {
 336     env->fpstt = (env->fpstt - 1) & 7;
 337     env->fpus &= ~0x4700;
 338 }
 339
 340 void helper_fincstp(CPUX86State *env)
 341 {
 342     env->fpstt = (env->fpstt + 1) & 7;
 343     env->fpus &= ~0x4700;
 344 }
 345
 346 /* FPU move */
 347
 348 void helper_ffree_STN(CPUX86State *env, int st_index)
 349 {
 350     env->fptags[(env->fpstt + st_index) & 7] = 1;
 351 }
 352
 353 void helper_fmov_ST0_FT0(CPUX86State *env)
 354 {
 355     ST0 = FT0;
 356 }
 357
 358 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 359 {
 360     FT0 = ST(st_index);
 361 }
 362
 363 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 364 {
 365     ST0 = ST(st_index);
 366 }
 367
 368 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 369 {
 370     ST(st_index) = ST0;
 371 }
 372
 373 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 374 {
 375     floatx80 tmp;
 376
 377     tmp = ST(st_index);
 378     ST(st_index) = ST0;
 379     ST0 = tmp;
 380 }
 381
 382 /* FPU operations */
 383
 384 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 385
 386 void helper_fcom_ST0_FT0(CPUX86State *env)
 387 {
 388     int ret;
 389
 390     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 391     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 392 }
 393
 394 void helper_fucom_ST0_FT0(CPUX86State *env)
 395 {
 396     int ret;
 397
 398     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 399     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 400 }
 401
 402 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 403
 404 void helper_fcomi_ST0_FT0(CPUX86State *env)
 405 {
 406     int eflags;
 407     int ret;
 408
 409     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 410     eflags = cpu_cc_compute_all(env, CC_OP);
 411     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 412     CC_SRC = eflags;
 413 }
 414
 415 void helper_fucomi_ST0_FT0(CPUX86State *env)
 416 {
 417     int eflags;
 418     int ret;
 419
 420     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 421     eflags = cpu_cc_compute_all(env, CC_OP);
 422     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 423     CC_SRC = eflags;
 424 }
 425
 426 void helper_fadd_ST0_FT0(CPUX86State *env)
 427 {
 428     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 429 }
 430
 431 void helper_fmul_ST0_FT0(CPUX86State *env)
 432 {
 433     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 434 }
 435
 436 void helper_fsub_ST0_FT0(CPUX86State *env)
 437 {
 438     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 439 }
 440
 441 void helper_fsubr_ST0_FT0(CPUX86State *env)
 442 {
 443     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 444 }
 445
 446 void helper_fdiv_ST0_FT0(CPUX86State *env)
 447 {
 448     ST0 = helper_fdiv(env, ST0, FT0);
 449 }
 450
 451 void helper_fdivr_ST0_FT0(CPUX86State *env)
 452 {
 453     ST0 = helper_fdiv(env, FT0, ST0);
 454 }
 455
 456 /* fp operations between STN and ST0 */
 457
 458 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 459 {
 460     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 461 }
 462
 463 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 464 {
 465     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 466 }
 467
 468 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 469 {
 470     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 471 }
 472
 473 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 474 {
 475     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 476 }
 477
 478 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 479 {
 480     floatx80 *p;
 481
 482     p = &ST(st_index);
 483     *p = helper_fdiv(env, *p, ST0);
 484 }
 485
 486 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 487 {
 488     floatx80 *p;
 489
 490     p = &ST(st_index);
 491     *p = helper_fdiv(env, ST0, *p);
 492 }
 493
 494 /* misc FPU operations */
 495 void helper_fchs_ST0(CPUX86State *env)
 496 {
 497     ST0 = floatx80_chs(ST0);
 498 }
 499
 500 void helper_fabs_ST0(CPUX86State *env)
 501 {
 502     ST0 = floatx80_abs(ST0);
 503 }
 504
 505 void helper_fld1_ST0(CPUX86State *env)
 506 {
 507     ST0 = floatx80_one;
 508 }
 509
 510 void helper_fldl2t_ST0(CPUX86State *env)
 511 {
 512     ST0 = floatx80_l2t;
 513 }
 514
 515 void helper_fldl2e_ST0(CPUX86State *env)
 516 {
 517     ST0 = floatx80_l2e;
 518 }
 519
 520 void helper_fldpi_ST0(CPUX86State *env)
 521 {
 522     ST0 = floatx80_pi;
 523 }
 524
 525 void helper_fldlg2_ST0(CPUX86State *env)
 526 {
 527     ST0 = floatx80_lg2;
 528 }
 529
 530 void helper_fldln2_ST0(CPUX86State *env)
 531 {
 532     ST0 = floatx80_ln2;
 533 }
 534
 535 void helper_fldz_ST0(CPUX86State *env)
 536 {
 537     ST0 = floatx80_zero;
 538 }
 539
 540 void helper_fldz_FT0(CPUX86State *env)
 541 {
 542     FT0 = floatx80_zero;
 543 }
 544
 545 uint32_t helper_fnstsw(CPUX86State *env)
 546 {
 547     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 548 }
 549
 550 uint32_t helper_fnstcw(CPUX86State *env)
 551 {
 552     return env->fpuc;
 553 }
 554
 555 void update_fp_status(CPUX86State *env)
 556 {
 557     int rnd_type;
 558
 559     /* set rounding mode */
 560     switch (env->fpuc & FPU_RC_MASK) {
 561     default:
 562     case FPU_RC_NEAR:
 563         rnd_type = float_round_nearest_even;
 564         break;
 565     case FPU_RC_DOWN:
 566         rnd_type = float_round_down;
 567         break;
 568     case FPU_RC_UP:
 569         rnd_type = float_round_up;
 570         break;
 571     case FPU_RC_CHOP:
 572         rnd_type = float_round_to_zero;
 573         break;
 574     }
 575     set_float_rounding_mode(rnd_type, &env->fp_status);
 576     switch ((env->fpuc >> 8) & 3) {
 577     case 0:
 578         rnd_type = 32;
 579         break;
 580     case 2:
 581         rnd_type = 64;
 582         break;
 583     case 3:
 584     default:
 585         rnd_type = 80;
 586         break;
 587     }
 588     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 589 }
 590
 591 void helper_fldcw(CPUX86State *env, uint32_t val)
 592 {
 593     cpu_set_fpuc(env, val);
 594 }
 595
 596 void helper_fclex(CPUX86State *env)
 597 {
 598     env->fpus &= 0x7f00;
 599 }
 600
 601 void helper_fwait(CPUX86State *env)
 602 {
 603     if (env->fpus & FPUS_SE) {
 604         fpu_raise_exception(env, GETPC());
 605     }
 606 }
 607
 608 void helper_fninit(CPUX86State *env)
 609 {
 610     env->fpus = 0;
 611     env->fpstt = 0;
 612     cpu_set_fpuc(env, 0x37f);
 613     env->fptags[0] = 1;
 614     env->fptags[1] = 1;
 615     env->fptags[2] = 1;
 616     env->fptags[3] = 1;
 617     env->fptags[4] = 1;
 618     env->fptags[5] = 1;
 619     env->fptags[6] = 1;
 620     env->fptags[7] = 1;
 621 }
 622
 623 /* BCD ops */
 624
 625 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 626 {
 627     floatx80 tmp;
 628     uint64_t val;
 629     unsigned int v;
 630     int i;
 631
 632     val = 0;
 633     for (i = 8; i >= 0; i--) {
 634         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 635         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 636     }
 637     tmp = int64_to_floatx80(val, &env->fp_status);
 638     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 639         tmp = floatx80_chs(tmp);
 640     }
 641     fpush(env);
 642     ST0 = tmp;
 643 }
 644
 645 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 646 {
 647     int v;
 648     target_ulong mem_ref, mem_end;
 649     int64_t val;
 650
 651     val = floatx80_to_int64(ST0, &env->fp_status);
 652     mem_ref = ptr;
 653     mem_end = mem_ref + 9;
 654     if (val < 0) {
 655         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 656         val = -val;
 657     } else {
 658         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 659     }
 660     while (mem_ref < mem_end) {
 661         if (val == 0) {
 662             break;
 663         }
 664         v = val % 100;
 665         val = val / 100;
 666         v = ((v / 10) << 4) | (v % 10);
 667         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 668     }
 669     while (mem_ref < mem_end) {
 670         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 671     }
 672 }
 673
 674 void helper_f2xm1(CPUX86State *env)
 675 {
 676     double val = floatx80_to_double(env, ST0);
 677
 678     val = pow(2.0, val) - 1.0;
 679     ST0 = double_to_floatx80(env, val);
 680 }
 681
 682 void helper_fyl2x(CPUX86State *env)
 683 {
 684     double fptemp = floatx80_to_double(env, ST0);
 685
 686     if (fptemp > 0.0) {
 687         fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 688         fptemp *= floatx80_to_double(env, ST1);
 689         ST1 = double_to_floatx80(env, fptemp);
 690         fpop(env);
 691     } else {
 692         env->fpus &= ~0x4700;
 693         env->fpus |= 0x400;
 694     }
 695 }
 696
 697 void helper_fptan(CPUX86State *env)
 698 {
 699     double fptemp = floatx80_to_double(env, ST0);
 700
 701     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 702         env->fpus |= 0x400;
 703     } else {
 704         fptemp = tan(fptemp);
 705         ST0 = double_to_floatx80(env, fptemp);
 706         fpush(env);
 707         ST0 = floatx80_one;
 708         env->fpus &= ~0x400; /* C2 <-- 0 */
 709         /* the above code is for |arg| < 2**52 only */
 710     }
 711 }
 712
 713 void helper_fpatan(CPUX86State *env)
 714 {
 715     double fptemp, fpsrcop;
 716
 717     fpsrcop = floatx80_to_double(env, ST1);
 718     fptemp = floatx80_to_double(env, ST0);
 719     ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 720     fpop(env);
 721 }
 722
 723 void helper_fxtract(CPUX86State *env)
 724 {
 725     CPU_LDoubleU temp;
 726
 727     temp.d = ST0;
 728
 729     if (floatx80_is_zero(ST0)) {
 730         /* Easy way to generate -inf and raising division by 0 exception */
 731         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 732                            &env->fp_status);
 733         fpush(env);
 734         ST0 = temp.d;
 735     } else {
 736         int expdif;
 737
 738         expdif = EXPD(temp) - EXPBIAS;
 739         /* DP exponent bias */
 740         ST0 = int32_to_floatx80(expdif, &env->fp_status);
 741         fpush(env);
 742         BIASEXPONENT(temp);
 743         ST0 = temp.d;
 744     }
 745 }
 746
 747 void helper_fprem1(CPUX86State *env)
 748 {
 749     double st0, st1, dblq, fpsrcop, fptemp;
 750     CPU_LDoubleU fpsrcop1, fptemp1;
 751     int expdif;
 752     signed long long int q;
 753
 754     st0 = floatx80_to_double(env, ST0);
 755     st1 = floatx80_to_double(env, ST1);
 756
 757     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 758         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 759         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 760         return;
 761     }
 762
 763     fpsrcop = st0;
 764     fptemp = st1;
 765     fpsrcop1.d = ST0;
 766     fptemp1.d = ST1;
 767     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 768
 769     if (expdif < 0) {
 770         /* optimisation? taken from the AMD docs */
 771         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 772         /* ST0 is unchanged */
 773         return;
 774     }
 775
 776     if (expdif < 53) {
 777         dblq = fpsrcop / fptemp;
 778         /* round dblq towards nearest integer */
 779         dblq = rint(dblq);
 780         st0 = fpsrcop - fptemp * dblq;
 781
 782         /* convert dblq to q by truncating towards zero */
 783         if (dblq < 0.0) {
 784             q = (signed long long int)(-dblq);
 785         } else {
 786             q = (signed long long int)dblq;
 787         }
 788
 789         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 790         /* (C0,C3,C1) <-- (q2,q1,q0) */
 791         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 792         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 793         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 794     } else {
 795         env->fpus |= 0x400;  /* C2 <-- 1 */
 796         fptemp = pow(2.0, expdif - 50);
 797         fpsrcop = (st0 / st1) / fptemp;
 798         /* fpsrcop = integer obtained by chopping */
 799         fpsrcop = (fpsrcop < 0.0) ?
 800                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 801         st0 -= (st1 * fpsrcop * fptemp);
 802     }
 803     ST0 = double_to_floatx80(env, st0);
 804 }
 805
 806 void helper_fprem(CPUX86State *env)
 807 {
 808     double st0, st1, dblq, fpsrcop, fptemp;
 809     CPU_LDoubleU fpsrcop1, fptemp1;
 810     int expdif;
 811     signed long long int q;
 812
 813     st0 = floatx80_to_double(env, ST0);
 814     st1 = floatx80_to_double(env, ST1);
 815
 816     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 817         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 818         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 819         return;
 820     }
 821
 822     fpsrcop = st0;
 823     fptemp = st1;
 824     fpsrcop1.d = ST0;
 825     fptemp1.d = ST1;
 826     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 827
 828     if (expdif < 0) {
 829         /* optimisation? taken from the AMD docs */
 830         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 831         /* ST0 is unchanged */
 832         return;
 833     }
 834
 835     if (expdif < 53) {
 836         dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 837         /* round dblq towards zero */
 838         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 839         st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 840
 841         /* convert dblq to q by truncating towards zero */
 842         if (dblq < 0.0) {
 843             q = (signed long long int)(-dblq);
 844         } else {
 845             q = (signed long long int)dblq;
 846         }
 847
 848         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 849         /* (C0,C3,C1) <-- (q2,q1,q0) */
 850         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 851         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 852         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 853     } else {
 854         int N = 32 + (expdif % 32); /* as per AMD docs */
 855
 856         env->fpus |= 0x400;  /* C2 <-- 1 */
 857         fptemp = pow(2.0, (double)(expdif - N));
 858         fpsrcop = (st0 / st1) / fptemp;
 859         /* fpsrcop = integer obtained by chopping */
 860         fpsrcop = (fpsrcop < 0.0) ?
 861                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 862         st0 -= (st1 * fpsrcop * fptemp);
 863     }
 864     ST0 = double_to_floatx80(env, st0);
 865 }
 866
 867 void helper_fyl2xp1(CPUX86State *env)
 868 {
 869     double fptemp = floatx80_to_double(env, ST0);
 870
 871     if ((fptemp + 1.0) > 0.0) {
 872         fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 873         fptemp *= floatx80_to_double(env, ST1);
 874         ST1 = double_to_floatx80(env, fptemp);
 875         fpop(env);
 876     } else {
 877         env->fpus &= ~0x4700;
 878         env->fpus |= 0x400;
 879     }
 880 }
 881
 882 void helper_fsqrt(CPUX86State *env)
 883 {
 884     if (floatx80_is_neg(ST0)) {
 885         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 886         env->fpus |= 0x400;
 887     }
 888     ST0 = floatx80_sqrt(ST0, &env->fp_status);
 889 }
 890
 891 void helper_fsincos(CPUX86State *env)
 892 {
 893     double fptemp = floatx80_to_double(env, ST0);
 894
 895     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 896         env->fpus |= 0x400;
 897     } else {
 898         ST0 = double_to_floatx80(env, sin(fptemp));
 899         fpush(env);
 900         ST0 = double_to_floatx80(env, cos(fptemp));
 901         env->fpus &= ~0x400;  /* C2 <-- 0 */
 902         /* the above code is for |arg| < 2**63 only */
 903     }
 904 }
 905
 906 void helper_frndint(CPUX86State *env)
 907 {
 908     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 909 }
 910
 911 void helper_fscale(CPUX86State *env)
 912 {
 913     if (floatx80_is_any_nan(ST1)) {
 914         ST0 = ST1;
 915     } else {
 916         int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 917         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 918     }
 919 }
 920
 921 void helper_fsin(CPUX86State *env)
 922 {
 923     double fptemp = floatx80_to_double(env, ST0);
 924
 925     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 926         env->fpus |= 0x400;
 927     } else {
 928         ST0 = double_to_floatx80(env, sin(fptemp));
 929         env->fpus &= ~0x400;  /* C2 <-- 0 */
 930         /* the above code is for |arg| < 2**53 only */
 931     }
 932 }
 933
 934 void helper_fcos(CPUX86State *env)
 935 {
 936     double fptemp = floatx80_to_double(env, ST0);
 937
 938     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 939         env->fpus |= 0x400;
 940     } else {
 941         ST0 = double_to_floatx80(env, cos(fptemp));
 942         env->fpus &= ~0x400;  /* C2 <-- 0 */
 943         /* the above code is for |arg| < 2**63 only */
 944     }
 945 }
 946
 947 void helper_fxam_ST0(CPUX86State *env)
 948 {
 949     CPU_LDoubleU temp;
 950     int expdif;
 951
 952     temp.d = ST0;
 953
 954     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 955     if (SIGND(temp)) {
 956         env->fpus |= 0x200; /* C1 <-- 1 */
 957     }
 958
 959     /* XXX: test fptags too */
 960     expdif = EXPD(temp);
 961     if (expdif == MAXEXPD) {
 962         if (MANTD(temp) == 0x8000000000000000ULL) {
 963             env->fpus |= 0x500; /* Infinity */
 964         } else {
 965             env->fpus |= 0x100; /* NaN */
 966         }
 967     } else if (expdif == 0) {
 968         if (MANTD(temp) == 0) {
 969             env->fpus |=  0x4000; /* Zero */
 970         } else {
 971             env->fpus |= 0x4400; /* Denormal */
 972         }
 973     } else {
 974         env->fpus |= 0x400;
 975     }
 976 }
 977
 978 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 979                       uintptr_t retaddr)
 980 {
 981     int fpus, fptag, exp, i;
 982     uint64_t mant;
 983     CPU_LDoubleU tmp;
 984
 985     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 986     fptag = 0;
 987     for (i = 7; i >= 0; i--) {
 988         fptag <<= 2;
 989         if (env->fptags[i]) {
 990             fptag |= 3;
 991         } else {
 992             tmp.d = env->fpregs[i].d;
 993             exp = EXPD(tmp);
 994             mant = MANTD(tmp);
 995             if (exp == 0 && mant == 0) {
 996                 /* zero */
 997                 fptag |= 1;
 998             } else if (exp == 0 || exp == MAXEXPD
 999                        || (mant & (1LL << 63)) == 0) {
1000                 /* NaNs, infinity, denormal */
1001                 fptag |= 2;
1002             }
1003         }
1004     }
1005     if (data32) {
1006         /* 32 bit */
1007         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1008         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1009         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1010         cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1011         cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1012         cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1013         cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1014     } else {
1015         /* 16 bit */
1016         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1017         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1018         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1019         cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1020         cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1021         cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1022         cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1023     }
1024 }
1025
1026 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1027 {
1028     do_fstenv(env, ptr, data32, GETPC());
1029 }
1030
1031 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1032                       uintptr_t retaddr)
1033 {
1034     int i, fpus, fptag;
1035
1036     if (data32) {
1037         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1038         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1039         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1040     } else {
1041         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1042         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1043         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044     }
1045     env->fpstt = (fpus >> 11) & 7;
1046     env->fpus = fpus & ~0x3800;
1047     for (i = 0; i < 8; i++) {
1048         env->fptags[i] = ((fptag & 3) == 3);
1049         fptag >>= 2;
1050     }
1051 }
1052
1053 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1054 {
1055     do_fldenv(env, ptr, data32, GETPC());
1056 }
1057
1058 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1059 {
1060     floatx80 tmp;
1061     int i;
1062
1063     do_fstenv(env, ptr, data32, GETPC());
1064
1065     ptr += (14 << data32);
1066     for (i = 0; i < 8; i++) {
1067         tmp = ST(i);
1068         helper_fstt(env, tmp, ptr, GETPC());
1069         ptr += 10;
1070     }
1071
1072     /* fninit */
1073     env->fpus = 0;
1074     env->fpstt = 0;
1075     cpu_set_fpuc(env, 0x37f);
1076     env->fptags[0] = 1;
1077     env->fptags[1] = 1;
1078     env->fptags[2] = 1;
1079     env->fptags[3] = 1;
1080     env->fptags[4] = 1;
1081     env->fptags[5] = 1;
1082     env->fptags[6] = 1;
1083     env->fptags[7] = 1;
1084 }
1085
1086 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1087 {
1088     floatx80 tmp;
1089     int i;
1090
1091     do_fldenv(env, ptr, data32, GETPC());
1092     ptr += (14 << data32);
1093
1094     for (i = 0; i < 8; i++) {
1095         tmp = helper_fldt(env, ptr, GETPC());
1096         ST(i) = tmp;
1097         ptr += 10;
1098     }
1099 }
1100
1101 #if defined(CONFIG_USER_ONLY)
1102 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1103 {
1104     helper_fsave(env, ptr, data32);
1105 }
1106
1107 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1108 {
1109     helper_frstor(env, ptr, data32);
1110 }
1111 #endif
1112
1113 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1114 {
1115     int fpus, fptag, i;
1116     target_ulong addr;
1117
1118     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1119     fptag = 0;
1120     for (i = 0; i < 8; i++) {
1121         fptag |= (env->fptags[i] << i);
1122     }
1123     cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1124     cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1125     cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1126
1127     /* In 32-bit mode this is eip, sel, dp, sel.
1128        In 64-bit mode this is rip, rdp.
1129        But in either case we don't write actual data, just zeros.  */
1130     cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1131     cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
1132
1133     addr = ptr + 0x20;
1134     for (i = 0; i < 8; i++) {
1135         floatx80 tmp = ST(i);
1136         helper_fstt(env, tmp, addr, ra);
1137         addr += 16;
1138     }
1139 }
1140
1141 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1142 {
1143     cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1144     cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1145 }
1146
1147 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1148 {
1149     int i, nb_xmm_regs;
1150     target_ulong addr;
1151
1152     if (env->hflags & HF_CS64_MASK) {
1153         nb_xmm_regs = 16;
1154     } else {
1155         nb_xmm_regs = 8;
1156     }
1157
1158     addr = ptr + 0xa0;
1159     for (i = 0; i < nb_xmm_regs; i++) {
1160         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1161         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1162         addr += 16;
1163     }
1164 }
1165
1166 static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1167 {
1168     int i;
1169
1170     for (i = 0; i < 4; i++, addr += 16) {
1171         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1172         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1173     }
1174 }
1175
1176 static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1177 {
1178     cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1179     cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1180 }
1181
1182 static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1183 {
1184     cpu_stq_data_ra(env, addr, env->pkru, ra);
1185 }
1186
1187 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1188 {
1189     uintptr_t ra = GETPC();
1190
1191     /* The operand must be 16 byte aligned */
1192     if (ptr & 0xf) {
1193         raise_exception_ra(env, EXCP0D_GPF, ra);
1194     }
1195
1196     do_xsave_fpu(env, ptr, ra);
1197
1198     if (env->cr[4] & CR4_OSFXSR_MASK) {
1199         do_xsave_mxcsr(env, ptr, ra);
1200         /* Fast FXSAVE leaves out the XMM registers */
1201         if (!(env->efer & MSR_EFER_FFXSR)
1202             || (env->hflags & HF_CPL_MASK)
1203             || !(env->hflags & HF_LMA_MASK)) {
1204             do_xsave_sse(env, ptr, ra);
1205         }
1206     }
1207 }
1208
1209 static uint64_t get_xinuse(CPUX86State *env)
1210 {
1211     uint64_t inuse = -1;
1212
1213     /* For the most part, we don't track XINUSE.  We could calculate it
1214        here for all components, but it's probably less work to simply
1215        indicate in use.  That said, the state of BNDREGS is important
1216        enough to track in HFLAGS, so we might as well use that here.  */
1217     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1218        inuse &= ~XSTATE_BNDREGS_MASK;
1219     }
1220     return inuse;
1221 }
1222
1223 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1224                      uint64_t inuse, uint64_t opt, uintptr_t ra)
1225 {
1226     uint64_t old_bv, new_bv;
1227
1228     /* The OS must have enabled XSAVE.  */
1229     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1230         raise_exception_ra(env, EXCP06_ILLOP, ra);
1231     }
1232
1233     /* The operand must be 64 byte aligned.  */
1234     if (ptr & 63) {
1235         raise_exception_ra(env, EXCP0D_GPF, ra);
1236     }
1237
1238     /* Never save anything not enabled by XCR0.  */
1239     rfbm &= env->xcr0;
1240     opt &= rfbm;
1241
1242     if (opt & XSTATE_FP_MASK) {
1243         do_xsave_fpu(env, ptr, ra);
1244     }
1245     if (rfbm & XSTATE_SSE_MASK) {
1246         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1247         do_xsave_mxcsr(env, ptr, ra);
1248     }
1249     if (opt & XSTATE_SSE_MASK) {
1250         do_xsave_sse(env, ptr, ra);
1251     }
1252     if (opt & XSTATE_BNDREGS_MASK) {
1253         target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1254         do_xsave_bndregs(env, ptr + off, ra);
1255     }
1256     if (opt & XSTATE_BNDCSR_MASK) {
1257         target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1258         do_xsave_bndcsr(env, ptr + off, ra);
1259     }
1260     if (opt & XSTATE_PKRU_MASK) {
1261         target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1262         do_xsave_pkru(env, ptr + off, ra);
1263     }
1264
1265     /* Update the XSTATE_BV field.  */
1266     old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1267     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1268     cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1269 }
1270
1271 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1272 {
1273     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1274 }
1275
1276 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1277 {
1278     uint64_t inuse = get_xinuse(env);
1279     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1280 }
1281
1282 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1283 {
1284     int i, fpus, fptag;
1285     target_ulong addr;
1286
1287     cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1288     fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1289     fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
1290     env->fpstt = (fpus >> 11) & 7;
1291     env->fpus = fpus & ~0x3800;
1292     fptag ^= 0xff;
1293     for (i = 0; i < 8; i++) {
1294         env->fptags[i] = ((fptag >> i) & 1);
1295     }
1296
1297     addr = ptr + 0x20;
1298     for (i = 0; i < 8; i++) {
1299         floatx80 tmp = helper_fldt(env, addr, ra);
1300         ST(i) = tmp;
1301         addr += 16;
1302     }
1303 }
1304
1305 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1306 {
1307     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1308 }
1309
1310 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1311 {
1312     int i, nb_xmm_regs;
1313     target_ulong addr;
1314
1315     if (env->hflags & HF_CS64_MASK) {
1316         nb_xmm_regs = 16;
1317     } else {
1318         nb_xmm_regs = 8;
1319     }
1320
1321     addr = ptr + 0xa0;
1322     for (i = 0; i < nb_xmm_regs; i++) {
1323         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1324         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1325         addr += 16;
1326     }
1327 }
1328
1329 static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1330 {
1331     int i;
1332
1333     for (i = 0; i < 4; i++, addr += 16) {
1334         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1335         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1336     }
1337 }
1338
1339 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1340 {
1341     /* FIXME: Extend highest implemented bit of linear address.  */
1342     env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1343     env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1344 }
1345
1346 static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1347 {
1348     env->pkru = cpu_ldq_data_ra(env, addr, ra);
1349 }
1350
1351 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1352 {
1353     uintptr_t ra = GETPC();
1354
1355     /* The operand must be 16 byte aligned */
1356     if (ptr & 0xf) {
1357         raise_exception_ra(env, EXCP0D_GPF, ra);
1358     }
1359
1360     do_xrstor_fpu(env, ptr, ra);
1361
1362     if (env->cr[4] & CR4_OSFXSR_MASK) {
1363         do_xrstor_mxcsr(env, ptr, ra);
1364         /* Fast FXRSTOR leaves out the XMM registers */
1365         if (!(env->efer & MSR_EFER_FFXSR)
1366             || (env->hflags & HF_CPL_MASK)
1367             || !(env->hflags & HF_LMA_MASK)) {
1368             do_xrstor_sse(env, ptr, ra);
1369         }
1370     }
1371 }
1372
1373 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1374 {
1375     uintptr_t ra = GETPC();
1376     uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1377
1378     rfbm &= env->xcr0;
1379
1380     /* The OS must have enabled XSAVE.  */
1381     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1382         raise_exception_ra(env, EXCP06_ILLOP, ra);
1383     }
1384
1385     /* The operand must be 64 byte aligned.  */
1386     if (ptr & 63) {
1387         raise_exception_ra(env, EXCP0D_GPF, ra);
1388     }
1389
1390     xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1391
1392     if ((int64_t)xstate_bv < 0) {
1393         /* FIXME: Compact form.  */
1394         raise_exception_ra(env, EXCP0D_GPF, ra);
1395     }
1396
1397     /* Standard form.  */
1398
1399     /* The XSTATE field must not set bits not present in XCR0.  */
1400     if (xstate_bv & ~env->xcr0) {
1401         raise_exception_ra(env, EXCP0D_GPF, ra);
1402     }
1403
1404     /* The XCOMP field must be zero.  */
1405     xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1406     xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1407     if (xcomp_bv0 || xcomp_bv1) {
1408         raise_exception_ra(env, EXCP0D_GPF, ra);
1409     }
1410
1411     if (rfbm & XSTATE_FP_MASK) {
1412         if (xstate_bv & XSTATE_FP_MASK) {
1413             do_xrstor_fpu(env, ptr, ra);
1414         } else {
1415             helper_fninit(env);
1416             memset(env->fpregs, 0, sizeof(env->fpregs));
1417         }
1418     }
1419     if (rfbm & XSTATE_SSE_MASK) {
1420         /* Note that the standard form of XRSTOR loads MXCSR from memory
1421            whether or not the XSTATE_BV bit is set.  */
1422         do_xrstor_mxcsr(env, ptr, ra);
1423         if (xstate_bv & XSTATE_SSE_MASK) {
1424             do_xrstor_sse(env, ptr, ra);
1425         } else {
1426             /* ??? When AVX is implemented, we may have to be more
1427                selective in the clearing.  */
1428             memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1429         }
1430     }
1431     if (rfbm & XSTATE_BNDREGS_MASK) {
1432         if (xstate_bv & XSTATE_BNDREGS_MASK) {
1433             target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1434             do_xrstor_bndregs(env, ptr + off, ra);
1435             env->hflags |= HF_MPX_IU_MASK;
1436         } else {
1437             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1438             env->hflags &= ~HF_MPX_IU_MASK;
1439         }
1440     }
1441     if (rfbm & XSTATE_BNDCSR_MASK) {
1442         if (xstate_bv & XSTATE_BNDCSR_MASK) {
1443             target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1444             do_xrstor_bndcsr(env, ptr + off, ra);
1445         } else {
1446             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1447         }
1448         cpu_sync_bndcs_hflags(env);
1449     }
1450     if (rfbm & XSTATE_PKRU_MASK) {
1451         uint64_t old_pkru = env->pkru;
1452         if (xstate_bv & XSTATE_PKRU_MASK) {
1453             target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1454             do_xrstor_pkru(env, ptr + off, ra);
1455         } else {
1456             env->pkru = 0;
1457         }
1458         if (env->pkru != old_pkru) {
1459             CPUState *cs = CPU(x86_env_get_cpu(env));
1460             tlb_flush(cs, 1);
1461         }
1462     }
1463 }
1464
1465 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1466 {
1467     /* The OS must have enabled XSAVE.  */
1468     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1469         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1470     }
1471
1472     switch (ecx) {
1473     case 0:
1474         return env->xcr0;
1475     case 1:
1476         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1477             return env->xcr0 & get_xinuse(env);
1478         }
1479         break;
1480     }
1481     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1482 }
1483
1484 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1485 {
1486     uint32_t dummy, ena_lo, ena_hi;
1487     uint64_t ena;
1488
1489     /* The OS must have enabled XSAVE.  */
1490     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1491         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1492     }
1493
1494     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1495     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1496         goto do_gpf;
1497     }
1498
1499     /* Disallow enabling unimplemented features.  */
1500     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1501     ena = ((uint64_t)ena_hi << 32) | ena_lo;
1502     if (mask & ~ena) {
1503         goto do_gpf;
1504     }
1505
1506     /* Disallow enabling only half of MPX.  */
1507     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1508         & XSTATE_BNDCSR_MASK) {
1509         goto do_gpf;
1510     }
1511
1512     env->xcr0 = mask;
1513     cpu_sync_bndcs_hflags(env);
1514     return;
1515
1516  do_gpf:
1517     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1518 }
1519
1520 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1521 {
1522     CPU_LDoubleU temp;
1523
1524     temp.d = f;
1525     *pmant = temp.l.lower;
1526     *pexp = temp.l.upper;
1527 }
1528
1529 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1530 {
1531     CPU_LDoubleU temp;
1532
1533     temp.l.upper = upper;
1534     temp.l.lower = mant;
1535     return temp.d;
1536 }
1537
1538 /* MMX/SSE */
1539 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1540
1541 #define SSE_DAZ             0x0040
1542 #define SSE_RC_MASK         0x6000
1543 #define SSE_RC_NEAR         0x0000
1544 #define SSE_RC_DOWN         0x2000
1545 #define SSE_RC_UP           0x4000
1546 #define SSE_RC_CHOP         0x6000
1547 #define SSE_FZ              0x8000
1548
1549 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1550 {
1551     int rnd_type;
1552
1553     env->mxcsr = mxcsr;
1554
1555     /* set rounding mode */
1556     switch (mxcsr & SSE_RC_MASK) {
1557     default:
1558     case SSE_RC_NEAR:
1559         rnd_type = float_round_nearest_even;
1560         break;
1561     case SSE_RC_DOWN:
1562         rnd_type = float_round_down;
1563         break;
1564     case SSE_RC_UP:
1565         rnd_type = float_round_up;
1566         break;
1567     case SSE_RC_CHOP:
1568         rnd_type = float_round_to_zero;
1569         break;
1570     }
1571     set_float_rounding_mode(rnd_type, &env->sse_status);
1572
1573     /* set denormals are zero */
1574     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1575
1576     /* set flush to zero */
1577     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1578 }
1579
1580 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1581 {
1582     env->fpuc = val;
1583     update_fp_status(env);
1584 }
1585
1586 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1587 {
1588     cpu_set_mxcsr(env, val);
1589 }
1590
1591 void helper_enter_mmx(CPUX86State *env)
1592 {
1593     env->fpstt = 0;
1594     *(uint32_t *)(env->fptags) = 0;
1595     *(uint32_t *)(env->fptags + 4) = 0;
1596 }
1597
1598 void helper_emms(CPUX86State *env)
1599 {
1600     /* set to empty state */
1601     *(uint32_t *)(env->fptags) = 0x01010101;
1602     *(uint32_t *)(env->fptags + 4) = 0x01010101;
1603 }
1604
1605 /* XXX: suppress */
1606 void helper_movq(CPUX86State *env, void *d, void *s)
1607 {
1608     *(uint64_t *)d = *(uint64_t *)s;
1609 }
1610
1611 #define SHIFT 0
1612 #include "ops_sse.h"
1613
1614 #define SHIFT 1
1615 #include "ops_sse.h"