target-i386/fpu_helper.c

   1 /*
   2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include <math.h>
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "exec/exec-all.h"
  26 #include "exec/cpu_ldst.h"
  27
  28 #define FPU_RC_MASK         0xc00
  29 #define FPU_RC_NEAR         0x000
  30 #define FPU_RC_DOWN         0x400
  31 #define FPU_RC_UP           0x800
  32 #define FPU_RC_CHOP         0xc00
  33
  34 #define MAXTAN 9223372036854775808.0
  35
  36 /* the following deal with x86 long double-precision numbers */
  37 #define MAXEXPD 0x7fff
  38 #define EXPBIAS 16383
  39 #define EXPD(fp)        (fp.l.upper & 0x7fff)
  40 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
  41 #define MANTD(fp)       (fp.l.lower)
  42 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  43
  44 #define FPUS_IE (1 << 0)
  45 #define FPUS_DE (1 << 1)
  46 #define FPUS_ZE (1 << 2)
  47 #define FPUS_OE (1 << 3)
  48 #define FPUS_UE (1 << 4)
  49 #define FPUS_PE (1 << 5)
  50 #define FPUS_SF (1 << 6)
  51 #define FPUS_SE (1 << 7)
  52 #define FPUS_B  (1 << 15)
  53
  54 #define FPUC_EM 0x3f
  55
  56 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  57 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  58 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  59
  60 static inline void fpush(CPUX86State *env)
  61 {
  62     env->fpstt = (env->fpstt - 1) & 7;
  63     env->fptags[env->fpstt] = 0; /* validate stack entry */
  64 }
  65
  66 static inline void fpop(CPUX86State *env)
  67 {
  68     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  69     env->fpstt = (env->fpstt + 1) & 7;
  70 }
  71
  72 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
  73                                    uintptr_t retaddr)
  74 {
  75     CPU_LDoubleU temp;
  76
  77     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  78     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  79     return temp.d;
  80 }
  81
  82 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  83                                uintptr_t retaddr)
  84 {
  85     CPU_LDoubleU temp;
  86
  87     temp.d = f;
  88     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
  89     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
  90 }
  91
  92 /* x87 FPU helpers */
  93
  94 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
  95 {
  96     union {
  97         float64 f64;
  98         double d;
  99     } u;
 100
 101     u.f64 = floatx80_to_float64(a, &env->fp_status);
 102     return u.d;
 103 }
 104
 105 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 106 {
 107     union {
 108         float64 f64;
 109         double d;
 110     } u;
 111
 112     u.d = a;
 113     return float64_to_floatx80(u.f64, &env->fp_status);
 114 }
 115
 116 static void fpu_set_exception(CPUX86State *env, int mask)
 117 {
 118     env->fpus |= mask;
 119     if (env->fpus & (~env->fpuc & FPUC_EM)) {
 120         env->fpus |= FPUS_SE | FPUS_B;
 121     }
 122 }
 123
 124 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 125 {
 126     if (floatx80_is_zero(b)) {
 127         fpu_set_exception(env, FPUS_ZE);
 128     }
 129     return floatx80_div(a, b, &env->fp_status);
 130 }
 131
 132 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 133 {
 134     if (env->cr[0] & CR0_NE_MASK) {
 135         raise_exception_ra(env, EXCP10_COPR, retaddr);
 136     }
 137 #if !defined(CONFIG_USER_ONLY)
 138     else {
 139         cpu_set_ferr(env);
 140     }
 141 #endif
 142 }
 143
 144 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 145 {
 146     union {
 147         float32 f;
 148         uint32_t i;
 149     } u;
 150
 151     u.i = val;
 152     FT0 = float32_to_floatx80(u.f, &env->fp_status);
 153 }
 154
 155 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 156 {
 157     union {
 158         float64 f;
 159         uint64_t i;
 160     } u;
 161
 162     u.i = val;
 163     FT0 = float64_to_floatx80(u.f, &env->fp_status);
 164 }
 165
 166 void helper_fildl_FT0(CPUX86State *env, int32_t val)
 167 {
 168     FT0 = int32_to_floatx80(val, &env->fp_status);
 169 }
 170
 171 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 172 {
 173     int new_fpstt;
 174     union {
 175         float32 f;
 176         uint32_t i;
 177     } u;
 178
 179     new_fpstt = (env->fpstt - 1) & 7;
 180     u.i = val;
 181     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 182     env->fpstt = new_fpstt;
 183     env->fptags[new_fpstt] = 0; /* validate stack entry */
 184 }
 185
 186 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 187 {
 188     int new_fpstt;
 189     union {
 190         float64 f;
 191         uint64_t i;
 192     } u;
 193
 194     new_fpstt = (env->fpstt - 1) & 7;
 195     u.i = val;
 196     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 197     env->fpstt = new_fpstt;
 198     env->fptags[new_fpstt] = 0; /* validate stack entry */
 199 }
 200
 201 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 202 {
 203     int new_fpstt;
 204
 205     new_fpstt = (env->fpstt - 1) & 7;
 206     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 207     env->fpstt = new_fpstt;
 208     env->fptags[new_fpstt] = 0; /* validate stack entry */
 209 }
 210
 211 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 212 {
 213     int new_fpstt;
 214
 215     new_fpstt = (env->fpstt - 1) & 7;
 216     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 217     env->fpstt = new_fpstt;
 218     env->fptags[new_fpstt] = 0; /* validate stack entry */
 219 }
 220
 221 uint32_t helper_fsts_ST0(CPUX86State *env)
 222 {
 223     union {
 224         float32 f;
 225         uint32_t i;
 226     } u;
 227
 228     u.f = floatx80_to_float32(ST0, &env->fp_status);
 229     return u.i;
 230 }
 231
 232 uint64_t helper_fstl_ST0(CPUX86State *env)
 233 {
 234     union {
 235         float64 f;
 236         uint64_t i;
 237     } u;
 238
 239     u.f = floatx80_to_float64(ST0, &env->fp_status);
 240     return u.i;
 241 }
 242
 243 int32_t helper_fist_ST0(CPUX86State *env)
 244 {
 245     int32_t val;
 246
 247     val = floatx80_to_int32(ST0, &env->fp_status);
 248     if (val != (int16_t)val) {
 249         val = -32768;
 250     }
 251     return val;
 252 }
 253
 254 int32_t helper_fistl_ST0(CPUX86State *env)
 255 {
 256     int32_t val;
 257     signed char old_exp_flags;
 258
 259     old_exp_flags = get_float_exception_flags(&env->fp_status);
 260     set_float_exception_flags(0, &env->fp_status);
 261
 262     val = floatx80_to_int32(ST0, &env->fp_status);
 263     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 264         val = 0x80000000;
 265     }
 266     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 267                                 | old_exp_flags, &env->fp_status);
 268     return val;
 269 }
 270
 271 int64_t helper_fistll_ST0(CPUX86State *env)
 272 {
 273     int64_t val;
 274     signed char old_exp_flags;
 275
 276     old_exp_flags = get_float_exception_flags(&env->fp_status);
 277     set_float_exception_flags(0, &env->fp_status);
 278
 279     val = floatx80_to_int64(ST0, &env->fp_status);
 280     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 281         val = 0x8000000000000000ULL;
 282     }
 283     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 284                                 | old_exp_flags, &env->fp_status);
 285     return val;
 286 }
 287
 288 int32_t helper_fistt_ST0(CPUX86State *env)
 289 {
 290     int32_t val;
 291
 292     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 293     if (val != (int16_t)val) {
 294         val = -32768;
 295     }
 296     return val;
 297 }
 298
 299 int32_t helper_fisttl_ST0(CPUX86State *env)
 300 {
 301     int32_t val;
 302
 303     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 304     return val;
 305 }
 306
 307 int64_t helper_fisttll_ST0(CPUX86State *env)
 308 {
 309     int64_t val;
 310
 311     val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 312     return val;
 313 }
 314
 315 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 316 {
 317     int new_fpstt;
 318
 319     new_fpstt = (env->fpstt - 1) & 7;
 320     env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 321     env->fpstt = new_fpstt;
 322     env->fptags[new_fpstt] = 0; /* validate stack entry */
 323 }
 324
 325 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 326 {
 327     helper_fstt(env, ST0, ptr, GETPC());
 328 }
 329
 330 void helper_fpush(CPUX86State *env)
 331 {
 332     fpush(env);
 333 }
 334
 335 void helper_fpop(CPUX86State *env)
 336 {
 337     fpop(env);
 338 }
 339
 340 void helper_fdecstp(CPUX86State *env)
 341 {
 342     env->fpstt = (env->fpstt - 1) & 7;
 343     env->fpus &= ~0x4700;
 344 }
 345
 346 void helper_fincstp(CPUX86State *env)
 347 {
 348     env->fpstt = (env->fpstt + 1) & 7;
 349     env->fpus &= ~0x4700;
 350 }
 351
 352 /* FPU move */
 353
 354 void helper_ffree_STN(CPUX86State *env, int st_index)
 355 {
 356     env->fptags[(env->fpstt + st_index) & 7] = 1;
 357 }
 358
 359 void helper_fmov_ST0_FT0(CPUX86State *env)
 360 {
 361     ST0 = FT0;
 362 }
 363
 364 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 365 {
 366     FT0 = ST(st_index);
 367 }
 368
 369 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 370 {
 371     ST0 = ST(st_index);
 372 }
 373
 374 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 375 {
 376     ST(st_index) = ST0;
 377 }
 378
 379 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 380 {
 381     floatx80 tmp;
 382
 383     tmp = ST(st_index);
 384     ST(st_index) = ST0;
 385     ST0 = tmp;
 386 }
 387
 388 /* FPU operations */
 389
 390 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 391
 392 void helper_fcom_ST0_FT0(CPUX86State *env)
 393 {
 394     int ret;
 395
 396     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 397     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 398 }
 399
 400 void helper_fucom_ST0_FT0(CPUX86State *env)
 401 {
 402     int ret;
 403
 404     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 405     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 406 }
 407
 408 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 409
 410 void helper_fcomi_ST0_FT0(CPUX86State *env)
 411 {
 412     int eflags;
 413     int ret;
 414
 415     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 416     eflags = cpu_cc_compute_all(env, CC_OP);
 417     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 418     CC_SRC = eflags;
 419 }
 420
 421 void helper_fucomi_ST0_FT0(CPUX86State *env)
 422 {
 423     int eflags;
 424     int ret;
 425
 426     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 427     eflags = cpu_cc_compute_all(env, CC_OP);
 428     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 429     CC_SRC = eflags;
 430 }
 431
 432 void helper_fadd_ST0_FT0(CPUX86State *env)
 433 {
 434     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 435 }
 436
 437 void helper_fmul_ST0_FT0(CPUX86State *env)
 438 {
 439     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 440 }
 441
 442 void helper_fsub_ST0_FT0(CPUX86State *env)
 443 {
 444     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 445 }
 446
 447 void helper_fsubr_ST0_FT0(CPUX86State *env)
 448 {
 449     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 450 }
 451
 452 void helper_fdiv_ST0_FT0(CPUX86State *env)
 453 {
 454     ST0 = helper_fdiv(env, ST0, FT0);
 455 }
 456
 457 void helper_fdivr_ST0_FT0(CPUX86State *env)
 458 {
 459     ST0 = helper_fdiv(env, FT0, ST0);
 460 }
 461
 462 /* fp operations between STN and ST0 */
 463
 464 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 465 {
 466     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 467 }
 468
 469 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 470 {
 471     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 472 }
 473
 474 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 475 {
 476     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 477 }
 478
 479 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 480 {
 481     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 482 }
 483
 484 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 485 {
 486     floatx80 *p;
 487
 488     p = &ST(st_index);
 489     *p = helper_fdiv(env, *p, ST0);
 490 }
 491
 492 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 493 {
 494     floatx80 *p;
 495
 496     p = &ST(st_index);
 497     *p = helper_fdiv(env, ST0, *p);
 498 }
 499
 500 /* misc FPU operations */
 501 void helper_fchs_ST0(CPUX86State *env)
 502 {
 503     ST0 = floatx80_chs(ST0);
 504 }
 505
 506 void helper_fabs_ST0(CPUX86State *env)
 507 {
 508     ST0 = floatx80_abs(ST0);
 509 }
 510
 511 void helper_fld1_ST0(CPUX86State *env)
 512 {
 513     ST0 = floatx80_one;
 514 }
 515
 516 void helper_fldl2t_ST0(CPUX86State *env)
 517 {
 518     ST0 = floatx80_l2t;
 519 }
 520
 521 void helper_fldl2e_ST0(CPUX86State *env)
 522 {
 523     ST0 = floatx80_l2e;
 524 }
 525
 526 void helper_fldpi_ST0(CPUX86State *env)
 527 {
 528     ST0 = floatx80_pi;
 529 }
 530
 531 void helper_fldlg2_ST0(CPUX86State *env)
 532 {
 533     ST0 = floatx80_lg2;
 534 }
 535
 536 void helper_fldln2_ST0(CPUX86State *env)
 537 {
 538     ST0 = floatx80_ln2;
 539 }
 540
 541 void helper_fldz_ST0(CPUX86State *env)
 542 {
 543     ST0 = floatx80_zero;
 544 }
 545
 546 void helper_fldz_FT0(CPUX86State *env)
 547 {
 548     FT0 = floatx80_zero;
 549 }
 550
 551 uint32_t helper_fnstsw(CPUX86State *env)
 552 {
 553     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 554 }
 555
 556 uint32_t helper_fnstcw(CPUX86State *env)
 557 {
 558     return env->fpuc;
 559 }
 560
 561 void update_fp_status(CPUX86State *env)
 562 {
 563     int rnd_type;
 564
 565     /* set rounding mode */
 566     switch (env->fpuc & FPU_RC_MASK) {
 567     default:
 568     case FPU_RC_NEAR:
 569         rnd_type = float_round_nearest_even;
 570         break;
 571     case FPU_RC_DOWN:
 572         rnd_type = float_round_down;
 573         break;
 574     case FPU_RC_UP:
 575         rnd_type = float_round_up;
 576         break;
 577     case FPU_RC_CHOP:
 578         rnd_type = float_round_to_zero;
 579         break;
 580     }
 581     set_float_rounding_mode(rnd_type, &env->fp_status);
 582     switch ((env->fpuc >> 8) & 3) {
 583     case 0:
 584         rnd_type = 32;
 585         break;
 586     case 2:
 587         rnd_type = 64;
 588         break;
 589     case 3:
 590     default:
 591         rnd_type = 80;
 592         break;
 593     }
 594     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 595 }
 596
 597 void helper_fldcw(CPUX86State *env, uint32_t val)
 598 {
 599     cpu_set_fpuc(env, val);
 600 }
 601
 602 void helper_fclex(CPUX86State *env)
 603 {
 604     env->fpus &= 0x7f00;
 605 }
 606
 607 void helper_fwait(CPUX86State *env)
 608 {
 609     if (env->fpus & FPUS_SE) {
 610         fpu_raise_exception(env, GETPC());
 611     }
 612 }
 613
 614 void helper_fninit(CPUX86State *env)
 615 {
 616     env->fpus = 0;
 617     env->fpstt = 0;
 618     cpu_set_fpuc(env, 0x37f);
 619     env->fptags[0] = 1;
 620     env->fptags[1] = 1;
 621     env->fptags[2] = 1;
 622     env->fptags[3] = 1;
 623     env->fptags[4] = 1;
 624     env->fptags[5] = 1;
 625     env->fptags[6] = 1;
 626     env->fptags[7] = 1;
 627 }
 628
 629 /* BCD ops */
 630
 631 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 632 {
 633     floatx80 tmp;
 634     uint64_t val;
 635     unsigned int v;
 636     int i;
 637
 638     val = 0;
 639     for (i = 8; i >= 0; i--) {
 640         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 641         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 642     }
 643     tmp = int64_to_floatx80(val, &env->fp_status);
 644     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 645         tmp = floatx80_chs(tmp);
 646     }
 647     fpush(env);
 648     ST0 = tmp;
 649 }
 650
 651 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 652 {
 653     int v;
 654     target_ulong mem_ref, mem_end;
 655     int64_t val;
 656
 657     val = floatx80_to_int64(ST0, &env->fp_status);
 658     mem_ref = ptr;
 659     mem_end = mem_ref + 9;
 660     if (val < 0) {
 661         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 662         val = -val;
 663     } else {
 664         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 665     }
 666     while (mem_ref < mem_end) {
 667         if (val == 0) {
 668             break;
 669         }
 670         v = val % 100;
 671         val = val / 100;
 672         v = ((v / 10) << 4) | (v % 10);
 673         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 674     }
 675     while (mem_ref < mem_end) {
 676         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 677     }
 678 }
 679
 680 void helper_f2xm1(CPUX86State *env)
 681 {
 682     double val = floatx80_to_double(env, ST0);
 683
 684     val = pow(2.0, val) - 1.0;
 685     ST0 = double_to_floatx80(env, val);
 686 }
 687
 688 void helper_fyl2x(CPUX86State *env)
 689 {
 690     double fptemp = floatx80_to_double(env, ST0);
 691
 692     if (fptemp > 0.0) {
 693         fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 694         fptemp *= floatx80_to_double(env, ST1);
 695         ST1 = double_to_floatx80(env, fptemp);
 696         fpop(env);
 697     } else {
 698         env->fpus &= ~0x4700;
 699         env->fpus |= 0x400;
 700     }
 701 }
 702
 703 void helper_fptan(CPUX86State *env)
 704 {
 705     double fptemp = floatx80_to_double(env, ST0);
 706
 707     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 708         env->fpus |= 0x400;
 709     } else {
 710         fptemp = tan(fptemp);
 711         ST0 = double_to_floatx80(env, fptemp);
 712         fpush(env);
 713         ST0 = floatx80_one;
 714         env->fpus &= ~0x400; /* C2 <-- 0 */
 715         /* the above code is for |arg| < 2**52 only */
 716     }
 717 }
 718
 719 void helper_fpatan(CPUX86State *env)
 720 {
 721     double fptemp, fpsrcop;
 722
 723     fpsrcop = floatx80_to_double(env, ST1);
 724     fptemp = floatx80_to_double(env, ST0);
 725     ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 726     fpop(env);
 727 }
 728
 729 void helper_fxtract(CPUX86State *env)
 730 {
 731     CPU_LDoubleU temp;
 732
 733     temp.d = ST0;
 734
 735     if (floatx80_is_zero(ST0)) {
 736         /* Easy way to generate -inf and raising division by 0 exception */
 737         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 738                            &env->fp_status);
 739         fpush(env);
 740         ST0 = temp.d;
 741     } else {
 742         int expdif;
 743
 744         expdif = EXPD(temp) - EXPBIAS;
 745         /* DP exponent bias */
 746         ST0 = int32_to_floatx80(expdif, &env->fp_status);
 747         fpush(env);
 748         BIASEXPONENT(temp);
 749         ST0 = temp.d;
 750     }
 751 }
 752
 753 void helper_fprem1(CPUX86State *env)
 754 {
 755     double st0, st1, dblq, fpsrcop, fptemp;
 756     CPU_LDoubleU fpsrcop1, fptemp1;
 757     int expdif;
 758     signed long long int q;
 759
 760     st0 = floatx80_to_double(env, ST0);
 761     st1 = floatx80_to_double(env, ST1);
 762
 763     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 764         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 765         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 766         return;
 767     }
 768
 769     fpsrcop = st0;
 770     fptemp = st1;
 771     fpsrcop1.d = ST0;
 772     fptemp1.d = ST1;
 773     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 774
 775     if (expdif < 0) {
 776         /* optimisation? taken from the AMD docs */
 777         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 778         /* ST0 is unchanged */
 779         return;
 780     }
 781
 782     if (expdif < 53) {
 783         dblq = fpsrcop / fptemp;
 784         /* round dblq towards nearest integer */
 785         dblq = rint(dblq);
 786         st0 = fpsrcop - fptemp * dblq;
 787
 788         /* convert dblq to q by truncating towards zero */
 789         if (dblq < 0.0) {
 790             q = (signed long long int)(-dblq);
 791         } else {
 792             q = (signed long long int)dblq;
 793         }
 794
 795         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 796         /* (C0,C3,C1) <-- (q2,q1,q0) */
 797         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 798         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 799         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 800     } else {
 801         env->fpus |= 0x400;  /* C2 <-- 1 */
 802         fptemp = pow(2.0, expdif - 50);
 803         fpsrcop = (st0 / st1) / fptemp;
 804         /* fpsrcop = integer obtained by chopping */
 805         fpsrcop = (fpsrcop < 0.0) ?
 806                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 807         st0 -= (st1 * fpsrcop * fptemp);
 808     }
 809     ST0 = double_to_floatx80(env, st0);
 810 }
 811
 812 void helper_fprem(CPUX86State *env)
 813 {
 814     double st0, st1, dblq, fpsrcop, fptemp;
 815     CPU_LDoubleU fpsrcop1, fptemp1;
 816     int expdif;
 817     signed long long int q;
 818
 819     st0 = floatx80_to_double(env, ST0);
 820     st1 = floatx80_to_double(env, ST1);
 821
 822     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 823         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 824         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 825         return;
 826     }
 827
 828     fpsrcop = st0;
 829     fptemp = st1;
 830     fpsrcop1.d = ST0;
 831     fptemp1.d = ST1;
 832     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 833
 834     if (expdif < 0) {
 835         /* optimisation? taken from the AMD docs */
 836         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 837         /* ST0 is unchanged */
 838         return;
 839     }
 840
 841     if (expdif < 53) {
 842         dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 843         /* round dblq towards zero */
 844         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 845         st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 846
 847         /* convert dblq to q by truncating towards zero */
 848         if (dblq < 0.0) {
 849             q = (signed long long int)(-dblq);
 850         } else {
 851             q = (signed long long int)dblq;
 852         }
 853
 854         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 855         /* (C0,C3,C1) <-- (q2,q1,q0) */
 856         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 857         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 858         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 859     } else {
 860         int N = 32 + (expdif % 32); /* as per AMD docs */
 861
 862         env->fpus |= 0x400;  /* C2 <-- 1 */
 863         fptemp = pow(2.0, (double)(expdif - N));
 864         fpsrcop = (st0 / st1) / fptemp;
 865         /* fpsrcop = integer obtained by chopping */
 866         fpsrcop = (fpsrcop < 0.0) ?
 867                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 868         st0 -= (st1 * fpsrcop * fptemp);
 869     }
 870     ST0 = double_to_floatx80(env, st0);
 871 }
 872
 873 void helper_fyl2xp1(CPUX86State *env)
 874 {
 875     double fptemp = floatx80_to_double(env, ST0);
 876
 877     if ((fptemp + 1.0) > 0.0) {
 878         fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 879         fptemp *= floatx80_to_double(env, ST1);
 880         ST1 = double_to_floatx80(env, fptemp);
 881         fpop(env);
 882     } else {
 883         env->fpus &= ~0x4700;
 884         env->fpus |= 0x400;
 885     }
 886 }
 887
 888 void helper_fsqrt(CPUX86State *env)
 889 {
 890     if (floatx80_is_neg(ST0)) {
 891         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 892         env->fpus |= 0x400;
 893     }
 894     ST0 = floatx80_sqrt(ST0, &env->fp_status);
 895 }
 896
 897 void helper_fsincos(CPUX86State *env)
 898 {
 899     double fptemp = floatx80_to_double(env, ST0);
 900
 901     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 902         env->fpus |= 0x400;
 903     } else {
 904         ST0 = double_to_floatx80(env, sin(fptemp));
 905         fpush(env);
 906         ST0 = double_to_floatx80(env, cos(fptemp));
 907         env->fpus &= ~0x400;  /* C2 <-- 0 */
 908         /* the above code is for |arg| < 2**63 only */
 909     }
 910 }
 911
 912 void helper_frndint(CPUX86State *env)
 913 {
 914     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 915 }
 916
 917 void helper_fscale(CPUX86State *env)
 918 {
 919     if (floatx80_is_any_nan(ST1)) {
 920         ST0 = ST1;
 921     } else {
 922         int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 923         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 924     }
 925 }
 926
 927 void helper_fsin(CPUX86State *env)
 928 {
 929     double fptemp = floatx80_to_double(env, ST0);
 930
 931     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 932         env->fpus |= 0x400;
 933     } else {
 934         ST0 = double_to_floatx80(env, sin(fptemp));
 935         env->fpus &= ~0x400;  /* C2 <-- 0 */
 936         /* the above code is for |arg| < 2**53 only */
 937     }
 938 }
 939
 940 void helper_fcos(CPUX86State *env)
 941 {
 942     double fptemp = floatx80_to_double(env, ST0);
 943
 944     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 945         env->fpus |= 0x400;
 946     } else {
 947         ST0 = double_to_floatx80(env, cos(fptemp));
 948         env->fpus &= ~0x400;  /* C2 <-- 0 */
 949         /* the above code is for |arg| < 2**63 only */
 950     }
 951 }
 952
 953 void helper_fxam_ST0(CPUX86State *env)
 954 {
 955     CPU_LDoubleU temp;
 956     int expdif;
 957
 958     temp.d = ST0;
 959
 960     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 961     if (SIGND(temp)) {
 962         env->fpus |= 0x200; /* C1 <-- 1 */
 963     }
 964
 965     /* XXX: test fptags too */
 966     expdif = EXPD(temp);
 967     if (expdif == MAXEXPD) {
 968         if (MANTD(temp) == 0x8000000000000000ULL) {
 969             env->fpus |= 0x500; /* Infinity */
 970         } else {
 971             env->fpus |= 0x100; /* NaN */
 972         }
 973     } else if (expdif == 0) {
 974         if (MANTD(temp) == 0) {
 975             env->fpus |=  0x4000; /* Zero */
 976         } else {
 977             env->fpus |= 0x4400; /* Denormal */
 978         }
 979     } else {
 980         env->fpus |= 0x400;
 981     }
 982 }
 983
 984 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 985                       uintptr_t retaddr)
 986 {
 987     int fpus, fptag, exp, i;
 988     uint64_t mant;
 989     CPU_LDoubleU tmp;
 990
 991     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 992     fptag = 0;
 993     for (i = 7; i >= 0; i--) {
 994         fptag <<= 2;
 995         if (env->fptags[i]) {
 996             fptag |= 3;
 997         } else {
 998             tmp.d = env->fpregs[i].d;
 999             exp = EXPD(tmp);
1000             mant = MANTD(tmp);
1001             if (exp == 0 && mant == 0) {
1002                 /* zero */
1003                 fptag |= 1;
1004             } else if (exp == 0 || exp == MAXEXPD
1005                        || (mant & (1LL << 63)) == 0) {
1006                 /* NaNs, infinity, denormal */
1007                 fptag |= 2;
1008             }
1009         }
1010     }
1011     if (data32) {
1012         /* 32 bit */
1013         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1014         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1015         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1016         cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1017         cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1018         cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1019         cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1020     } else {
1021         /* 16 bit */
1022         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1023         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1024         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1025         cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1026         cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1027         cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1028         cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1029     }
1030 }
1031
1032 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1033 {
1034     do_fstenv(env, ptr, data32, GETPC());
1035 }
1036
1037 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1038                       uintptr_t retaddr)
1039 {
1040     int i, fpus, fptag;
1041
1042     if (data32) {
1043         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1044         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1045         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1046     } else {
1047         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1048         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1049         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1050     }
1051     env->fpstt = (fpus >> 11) & 7;
1052     env->fpus = fpus & ~0x3800;
1053     for (i = 0; i < 8; i++) {
1054         env->fptags[i] = ((fptag & 3) == 3);
1055         fptag >>= 2;
1056     }
1057 }
1058
1059 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1060 {
1061     do_fldenv(env, ptr, data32, GETPC());
1062 }
1063
1064 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1065 {
1066     floatx80 tmp;
1067     int i;
1068
1069     do_fstenv(env, ptr, data32, GETPC());
1070
1071     ptr += (14 << data32);
1072     for (i = 0; i < 8; i++) {
1073         tmp = ST(i);
1074         helper_fstt(env, tmp, ptr, GETPC());
1075         ptr += 10;
1076     }
1077
1078     /* fninit */
1079     env->fpus = 0;
1080     env->fpstt = 0;
1081     cpu_set_fpuc(env, 0x37f);
1082     env->fptags[0] = 1;
1083     env->fptags[1] = 1;
1084     env->fptags[2] = 1;
1085     env->fptags[3] = 1;
1086     env->fptags[4] = 1;
1087     env->fptags[5] = 1;
1088     env->fptags[6] = 1;
1089     env->fptags[7] = 1;
1090 }
1091
1092 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1093 {
1094     floatx80 tmp;
1095     int i;
1096
1097     do_fldenv(env, ptr, data32, GETPC());
1098     ptr += (14 << data32);
1099
1100     for (i = 0; i < 8; i++) {
1101         tmp = helper_fldt(env, ptr, GETPC());
1102         ST(i) = tmp;
1103         ptr += 10;
1104     }
1105 }
1106
1107 #if defined(CONFIG_USER_ONLY)
1108 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1109 {
1110     helper_fsave(env, ptr, data32);
1111 }
1112
1113 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1114 {
1115     helper_frstor(env, ptr, data32);
1116 }
1117 #endif
1118
1119 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1120 {
1121     int fpus, fptag, i;
1122     target_ulong addr;
1123
1124     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1125     fptag = 0;
1126     for (i = 0; i < 8; i++) {
1127         fptag |= (env->fptags[i] << i);
1128     }
1129     cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1130     cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1131     cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1132
1133     /* In 32-bit mode this is eip, sel, dp, sel.
1134        In 64-bit mode this is rip, rdp.
1135        But in either case we don't write actual data, just zeros.  */
1136     cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1137     cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
1138
1139     addr = ptr + 0x20;
1140     for (i = 0; i < 8; i++) {
1141         floatx80 tmp = ST(i);
1142         helper_fstt(env, tmp, addr, ra);
1143         addr += 16;
1144     }
1145 }
1146
1147 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1148 {
1149     cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1150     cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1151 }
1152
1153 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1154 {
1155     int i, nb_xmm_regs;
1156     target_ulong addr;
1157
1158     if (env->hflags & HF_CS64_MASK) {
1159         nb_xmm_regs = 16;
1160     } else {
1161         nb_xmm_regs = 8;
1162     }
1163
1164     addr = ptr + 0xa0;
1165     for (i = 0; i < nb_xmm_regs; i++) {
1166         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1167         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1168         addr += 16;
1169     }
1170 }
1171
1172 static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1173 {
1174     int i;
1175
1176     for (i = 0; i < 4; i++, addr += 16) {
1177         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1178         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1179     }
1180 }
1181
1182 static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1183 {
1184     cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1185     cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1186 }
1187
1188 static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1189 {
1190     cpu_stq_data_ra(env, addr, env->pkru, ra);
1191 }
1192
1193 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1194 {
1195     uintptr_t ra = GETPC();
1196
1197     /* The operand must be 16 byte aligned */
1198     if (ptr & 0xf) {
1199         raise_exception_ra(env, EXCP0D_GPF, ra);
1200     }
1201
1202     do_xsave_fpu(env, ptr, ra);
1203
1204     if (env->cr[4] & CR4_OSFXSR_MASK) {
1205         do_xsave_mxcsr(env, ptr, ra);
1206         /* Fast FXSAVE leaves out the XMM registers */
1207         if (!(env->efer & MSR_EFER_FFXSR)
1208             || (env->hflags & HF_CPL_MASK)
1209             || !(env->hflags & HF_LMA_MASK)) {
1210             do_xsave_sse(env, ptr, ra);
1211         }
1212     }
1213 }
1214
1215 static uint64_t get_xinuse(CPUX86State *env)
1216 {
1217     uint64_t inuse = -1;
1218
1219     /* For the most part, we don't track XINUSE.  We could calculate it
1220        here for all components, but it's probably less work to simply
1221        indicate in use.  That said, the state of BNDREGS is important
1222        enough to track in HFLAGS, so we might as well use that here.  */
1223     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1224        inuse &= ~XSTATE_BNDREGS_MASK;
1225     }
1226     return inuse;
1227 }
1228
1229 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1230                      uint64_t inuse, uint64_t opt, uintptr_t ra)
1231 {
1232     uint64_t old_bv, new_bv;
1233
1234     /* The OS must have enabled XSAVE.  */
1235     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1236         raise_exception_ra(env, EXCP06_ILLOP, ra);
1237     }
1238
1239     /* The operand must be 64 byte aligned.  */
1240     if (ptr & 63) {
1241         raise_exception_ra(env, EXCP0D_GPF, ra);
1242     }
1243
1244     /* Never save anything not enabled by XCR0.  */
1245     rfbm &= env->xcr0;
1246     opt &= rfbm;
1247
1248     if (opt & XSTATE_FP_MASK) {
1249         do_xsave_fpu(env, ptr, ra);
1250     }
1251     if (rfbm & XSTATE_SSE_MASK) {
1252         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1253         do_xsave_mxcsr(env, ptr, ra);
1254     }
1255     if (opt & XSTATE_SSE_MASK) {
1256         do_xsave_sse(env, ptr, ra);
1257     }
1258     if (opt & XSTATE_BNDREGS_MASK) {
1259         target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1260         do_xsave_bndregs(env, ptr + off, ra);
1261     }
1262     if (opt & XSTATE_BNDCSR_MASK) {
1263         target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1264         do_xsave_bndcsr(env, ptr + off, ra);
1265     }
1266     if (opt & XSTATE_PKRU_MASK) {
1267         target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1268         do_xsave_pkru(env, ptr + off, ra);
1269     }
1270
1271     /* Update the XSTATE_BV field.  */
1272     old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1273     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1274     cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1275 }
1276
1277 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1278 {
1279     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1280 }
1281
1282 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1283 {
1284     uint64_t inuse = get_xinuse(env);
1285     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1286 }
1287
1288 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1289 {
1290     int i, fpus, fptag;
1291     target_ulong addr;
1292
1293     cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1294     fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1295     fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
1296     env->fpstt = (fpus >> 11) & 7;
1297     env->fpus = fpus & ~0x3800;
1298     fptag ^= 0xff;
1299     for (i = 0; i < 8; i++) {
1300         env->fptags[i] = ((fptag >> i) & 1);
1301     }
1302
1303     addr = ptr + 0x20;
1304     for (i = 0; i < 8; i++) {
1305         floatx80 tmp = helper_fldt(env, addr, ra);
1306         ST(i) = tmp;
1307         addr += 16;
1308     }
1309 }
1310
1311 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1312 {
1313     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1314 }
1315
1316 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1317 {
1318     int i, nb_xmm_regs;
1319     target_ulong addr;
1320
1321     if (env->hflags & HF_CS64_MASK) {
1322         nb_xmm_regs = 16;
1323     } else {
1324         nb_xmm_regs = 8;
1325     }
1326
1327     addr = ptr + 0xa0;
1328     for (i = 0; i < nb_xmm_regs; i++) {
1329         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1330         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1331         addr += 16;
1332     }
1333 }
1334
1335 static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1336 {
1337     int i;
1338
1339     for (i = 0; i < 4; i++, addr += 16) {
1340         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1342     }
1343 }
1344
1345 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1346 {
1347     /* FIXME: Extend highest implemented bit of linear address.  */
1348     env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1349     env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1350 }
1351
1352 static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1353 {
1354     env->pkru = cpu_ldq_data_ra(env, addr, ra);
1355 }
1356
1357 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1358 {
1359     uintptr_t ra = GETPC();
1360
1361     /* The operand must be 16 byte aligned */
1362     if (ptr & 0xf) {
1363         raise_exception_ra(env, EXCP0D_GPF, ra);
1364     }
1365
1366     do_xrstor_fpu(env, ptr, ra);
1367
1368     if (env->cr[4] & CR4_OSFXSR_MASK) {
1369         do_xrstor_mxcsr(env, ptr, ra);
1370         /* Fast FXRSTOR leaves out the XMM registers */
1371         if (!(env->efer & MSR_EFER_FFXSR)
1372             || (env->hflags & HF_CPL_MASK)
1373             || !(env->hflags & HF_LMA_MASK)) {
1374             do_xrstor_sse(env, ptr, ra);
1375         }
1376     }
1377 }
1378
1379 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1380 {
1381     uintptr_t ra = GETPC();
1382     uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1383
1384     rfbm &= env->xcr0;
1385
1386     /* The OS must have enabled XSAVE.  */
1387     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1388         raise_exception_ra(env, EXCP06_ILLOP, ra);
1389     }
1390
1391     /* The operand must be 64 byte aligned.  */
1392     if (ptr & 63) {
1393         raise_exception_ra(env, EXCP0D_GPF, ra);
1394     }
1395
1396     xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1397
1398     if ((int64_t)xstate_bv < 0) {
1399         /* FIXME: Compact form.  */
1400         raise_exception_ra(env, EXCP0D_GPF, ra);
1401     }
1402
1403     /* Standard form.  */
1404
1405     /* The XSTATE field must not set bits not present in XCR0.  */
1406     if (xstate_bv & ~env->xcr0) {
1407         raise_exception_ra(env, EXCP0D_GPF, ra);
1408     }
1409
1410     /* The XCOMP field must be zero.  */
1411     xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1412     xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1413     if (xcomp_bv0 || xcomp_bv1) {
1414         raise_exception_ra(env, EXCP0D_GPF, ra);
1415     }
1416
1417     if (rfbm & XSTATE_FP_MASK) {
1418         if (xstate_bv & XSTATE_FP_MASK) {
1419             do_xrstor_fpu(env, ptr, ra);
1420         } else {
1421             helper_fninit(env);
1422             memset(env->fpregs, 0, sizeof(env->fpregs));
1423         }
1424     }
1425     if (rfbm & XSTATE_SSE_MASK) {
1426         /* Note that the standard form of XRSTOR loads MXCSR from memory
1427            whether or not the XSTATE_BV bit is set.  */
1428         do_xrstor_mxcsr(env, ptr, ra);
1429         if (xstate_bv & XSTATE_SSE_MASK) {
1430             do_xrstor_sse(env, ptr, ra);
1431         } else {
1432             /* ??? When AVX is implemented, we may have to be more
1433                selective in the clearing.  */
1434             memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1435         }
1436     }
1437     if (rfbm & XSTATE_BNDREGS_MASK) {
1438         if (xstate_bv & XSTATE_BNDREGS_MASK) {
1439             target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1440             do_xrstor_bndregs(env, ptr + off, ra);
1441             env->hflags |= HF_MPX_IU_MASK;
1442         } else {
1443             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1444             env->hflags &= ~HF_MPX_IU_MASK;
1445         }
1446     }
1447     if (rfbm & XSTATE_BNDCSR_MASK) {
1448         if (xstate_bv & XSTATE_BNDCSR_MASK) {
1449             target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1450             do_xrstor_bndcsr(env, ptr + off, ra);
1451         } else {
1452             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1453         }
1454         cpu_sync_bndcs_hflags(env);
1455     }
1456     if (rfbm & XSTATE_PKRU_MASK) {
1457         uint64_t old_pkru = env->pkru;
1458         if (xstate_bv & XSTATE_PKRU_MASK) {
1459             target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1460             do_xrstor_pkru(env, ptr + off, ra);
1461         } else {
1462             env->pkru = 0;
1463         }
1464         if (env->pkru != old_pkru) {
1465             CPUState *cs = CPU(x86_env_get_cpu(env));
1466             tlb_flush(cs, 1);
1467         }
1468     }
1469 }
1470
1471 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1472 {
1473     /* The OS must have enabled XSAVE.  */
1474     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1475         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1476     }
1477
1478     switch (ecx) {
1479     case 0:
1480         return env->xcr0;
1481     case 1:
1482         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1483             return env->xcr0 & get_xinuse(env);
1484         }
1485         break;
1486     }
1487     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1488 }
1489
1490 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1491 {
1492     uint32_t dummy, ena_lo, ena_hi;
1493     uint64_t ena;
1494
1495     /* The OS must have enabled XSAVE.  */
1496     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1497         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1498     }
1499
1500     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1501     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1502         goto do_gpf;
1503     }
1504
1505     /* Disallow enabling unimplemented features.  */
1506     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1507     ena = ((uint64_t)ena_hi << 32) | ena_lo;
1508     if (mask & ~ena) {
1509         goto do_gpf;
1510     }
1511
1512     /* Disallow enabling only half of MPX.  */
1513     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1514         & XSTATE_BNDCSR_MASK) {
1515         goto do_gpf;
1516     }
1517
1518     env->xcr0 = mask;
1519     cpu_sync_bndcs_hflags(env);
1520     return;
1521
1522  do_gpf:
1523     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1524 }
1525
1526 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1527 {
1528     CPU_LDoubleU temp;
1529
1530     temp.d = f;
1531     *pmant = temp.l.lower;
1532     *pexp = temp.l.upper;
1533 }
1534
1535 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1536 {
1537     CPU_LDoubleU temp;
1538
1539     temp.l.upper = upper;
1540     temp.l.lower = mant;
1541     return temp.d;
1542 }
1543
1544 /* MMX/SSE */
1545 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1546
1547 #define SSE_DAZ             0x0040
1548 #define SSE_RC_MASK         0x6000
1549 #define SSE_RC_NEAR         0x0000
1550 #define SSE_RC_DOWN         0x2000
1551 #define SSE_RC_UP           0x4000
1552 #define SSE_RC_CHOP         0x6000
1553 #define SSE_FZ              0x8000
1554
1555 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1556 {
1557     int rnd_type;
1558
1559     env->mxcsr = mxcsr;
1560
1561     /* set rounding mode */
1562     switch (mxcsr & SSE_RC_MASK) {
1563     default:
1564     case SSE_RC_NEAR:
1565         rnd_type = float_round_nearest_even;
1566         break;
1567     case SSE_RC_DOWN:
1568         rnd_type = float_round_down;
1569         break;
1570     case SSE_RC_UP:
1571         rnd_type = float_round_up;
1572         break;
1573     case SSE_RC_CHOP:
1574         rnd_type = float_round_to_zero;
1575         break;
1576     }
1577     set_float_rounding_mode(rnd_type, &env->sse_status);
1578
1579     /* set denormals are zero */
1580     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1581
1582     /* set flush to zero */
1583     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1584 }
1585
1586 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1587 {
1588     env->fpuc = val;
1589     update_fp_status(env);
1590 }
1591
1592 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1593 {
1594     cpu_set_mxcsr(env, val);
1595 }
1596
1597 void helper_enter_mmx(CPUX86State *env)
1598 {
1599     env->fpstt = 0;
1600     *(uint32_t *)(env->fptags) = 0;
1601     *(uint32_t *)(env->fptags + 4) = 0;
1602 }
1603
1604 void helper_emms(CPUX86State *env)
1605 {
1606     /* set to empty state */
1607     *(uint32_t *)(env->fptags) = 0x01010101;
1608     *(uint32_t *)(env->fptags + 4) = 0x01010101;
1609 }
1610
1611 /* XXX: suppress */
1612 void helper_movq(CPUX86State *env, void *d, void *s)
1613 {
1614     *(uint64_t *)d = *(uint64_t *)s;
1615 }
1616
1617 #define SHIFT 0
1618 #include "ops_sse.h"
1619
1620 #define SHIFT 1
1621 #include "ops_sse.h"