target-i386/fpu_helper.c

   1 /*
   2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include <math.h>
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "exec/cpu_ldst.h"
  26
  27 #define FPU_RC_MASK         0xc00
  28 #define FPU_RC_NEAR         0x000
  29 #define FPU_RC_DOWN         0x400
  30 #define FPU_RC_UP           0x800
  31 #define FPU_RC_CHOP         0xc00
  32
  33 #define MAXTAN 9223372036854775808.0
  34
  35 /* the following deal with x86 long double-precision numbers */
  36 #define MAXEXPD 0x7fff
  37 #define EXPBIAS 16383
  38 #define EXPD(fp)        (fp.l.upper & 0x7fff)
  39 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
  40 #define MANTD(fp)       (fp.l.lower)
  41 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  42
  43 #define FPUS_IE (1 << 0)
  44 #define FPUS_DE (1 << 1)
  45 #define FPUS_ZE (1 << 2)
  46 #define FPUS_OE (1 << 3)
  47 #define FPUS_UE (1 << 4)
  48 #define FPUS_PE (1 << 5)
  49 #define FPUS_SF (1 << 6)
  50 #define FPUS_SE (1 << 7)
  51 #define FPUS_B  (1 << 15)
  52
  53 #define FPUC_EM 0x3f
  54
  55 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  56 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  57 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  58
  59 static inline void fpush(CPUX86State *env)
  60 {
  61     env->fpstt = (env->fpstt - 1) & 7;
  62     env->fptags[env->fpstt] = 0; /* validate stack entry */
  63 }
  64
  65 static inline void fpop(CPUX86State *env)
  66 {
  67     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  68     env->fpstt = (env->fpstt + 1) & 7;
  69 }
  70
  71 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
  72                                    uintptr_t retaddr)
  73 {
  74     CPU_LDoubleU temp;
  75
  76     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  77     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  78     return temp.d;
  79 }
  80
  81 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  82                                uintptr_t retaddr)
  83 {
  84     CPU_LDoubleU temp;
  85
  86     temp.d = f;
  87     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
  88     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
  89 }
  90
  91 /* x87 FPU helpers */
  92
  93 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
  94 {
  95     union {
  96         float64 f64;
  97         double d;
  98     } u;
  99
 100     u.f64 = floatx80_to_float64(a, &env->fp_status);
 101     return u.d;
 102 }
 103
 104 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 105 {
 106     union {
 107         float64 f64;
 108         double d;
 109     } u;
 110
 111     u.d = a;
 112     return float64_to_floatx80(u.f64, &env->fp_status);
 113 }
 114
 115 static void fpu_set_exception(CPUX86State *env, int mask)
 116 {
 117     env->fpus |= mask;
 118     if (env->fpus & (~env->fpuc & FPUC_EM)) {
 119         env->fpus |= FPUS_SE | FPUS_B;
 120     }
 121 }
 122
 123 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 124 {
 125     if (floatx80_is_zero(b)) {
 126         fpu_set_exception(env, FPUS_ZE);
 127     }
 128     return floatx80_div(a, b, &env->fp_status);
 129 }
 130
 131 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 132 {
 133     if (env->cr[0] & CR0_NE_MASK) {
 134         raise_exception_ra(env, EXCP10_COPR, retaddr);
 135     }
 136 #if !defined(CONFIG_USER_ONLY)
 137     else {
 138         cpu_set_ferr(env);
 139     }
 140 #endif
 141 }
 142
 143 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 144 {
 145     union {
 146         float32 f;
 147         uint32_t i;
 148     } u;
 149
 150     u.i = val;
 151     FT0 = float32_to_floatx80(u.f, &env->fp_status);
 152 }
 153
 154 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 155 {
 156     union {
 157         float64 f;
 158         uint64_t i;
 159     } u;
 160
 161     u.i = val;
 162     FT0 = float64_to_floatx80(u.f, &env->fp_status);
 163 }
 164
 165 void helper_fildl_FT0(CPUX86State *env, int32_t val)
 166 {
 167     FT0 = int32_to_floatx80(val, &env->fp_status);
 168 }
 169
 170 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 171 {
 172     int new_fpstt;
 173     union {
 174         float32 f;
 175         uint32_t i;
 176     } u;
 177
 178     new_fpstt = (env->fpstt - 1) & 7;
 179     u.i = val;
 180     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 181     env->fpstt = new_fpstt;
 182     env->fptags[new_fpstt] = 0; /* validate stack entry */
 183 }
 184
 185 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 186 {
 187     int new_fpstt;
 188     union {
 189         float64 f;
 190         uint64_t i;
 191     } u;
 192
 193     new_fpstt = (env->fpstt - 1) & 7;
 194     u.i = val;
 195     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 196     env->fpstt = new_fpstt;
 197     env->fptags[new_fpstt] = 0; /* validate stack entry */
 198 }
 199
 200 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 201 {
 202     int new_fpstt;
 203
 204     new_fpstt = (env->fpstt - 1) & 7;
 205     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 206     env->fpstt = new_fpstt;
 207     env->fptags[new_fpstt] = 0; /* validate stack entry */
 208 }
 209
 210 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 211 {
 212     int new_fpstt;
 213
 214     new_fpstt = (env->fpstt - 1) & 7;
 215     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 216     env->fpstt = new_fpstt;
 217     env->fptags[new_fpstt] = 0; /* validate stack entry */
 218 }
 219
 220 uint32_t helper_fsts_ST0(CPUX86State *env)
 221 {
 222     union {
 223         float32 f;
 224         uint32_t i;
 225     } u;
 226
 227     u.f = floatx80_to_float32(ST0, &env->fp_status);
 228     return u.i;
 229 }
 230
 231 uint64_t helper_fstl_ST0(CPUX86State *env)
 232 {
 233     union {
 234         float64 f;
 235         uint64_t i;
 236     } u;
 237
 238     u.f = floatx80_to_float64(ST0, &env->fp_status);
 239     return u.i;
 240 }
 241
 242 int32_t helper_fist_ST0(CPUX86State *env)
 243 {
 244     int32_t val;
 245
 246     val = floatx80_to_int32(ST0, &env->fp_status);
 247     if (val != (int16_t)val) {
 248         val = -32768;
 249     }
 250     return val;
 251 }
 252
 253 int32_t helper_fistl_ST0(CPUX86State *env)
 254 {
 255     int32_t val;
 256     signed char old_exp_flags;
 257
 258     old_exp_flags = get_float_exception_flags(&env->fp_status);
 259     set_float_exception_flags(0, &env->fp_status);
 260
 261     val = floatx80_to_int32(ST0, &env->fp_status);
 262     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 263         val = 0x80000000;
 264     }
 265     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 266                                 | old_exp_flags, &env->fp_status);
 267     return val;
 268 }
 269
 270 int64_t helper_fistll_ST0(CPUX86State *env)
 271 {
 272     int64_t val;
 273     signed char old_exp_flags;
 274
 275     old_exp_flags = get_float_exception_flags(&env->fp_status);
 276     set_float_exception_flags(0, &env->fp_status);
 277
 278     val = floatx80_to_int64(ST0, &env->fp_status);
 279     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 280         val = 0x8000000000000000ULL;
 281     }
 282     set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 283                                 | old_exp_flags, &env->fp_status);
 284     return val;
 285 }
 286
 287 int32_t helper_fistt_ST0(CPUX86State *env)
 288 {
 289     int32_t val;
 290
 291     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 292     if (val != (int16_t)val) {
 293         val = -32768;
 294     }
 295     return val;
 296 }
 297
 298 int32_t helper_fisttl_ST0(CPUX86State *env)
 299 {
 300     int32_t val;
 301
 302     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 303     return val;
 304 }
 305
 306 int64_t helper_fisttll_ST0(CPUX86State *env)
 307 {
 308     int64_t val;
 309
 310     val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 311     return val;
 312 }
 313
 314 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 315 {
 316     int new_fpstt;
 317
 318     new_fpstt = (env->fpstt - 1) & 7;
 319     env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 320     env->fpstt = new_fpstt;
 321     env->fptags[new_fpstt] = 0; /* validate stack entry */
 322 }
 323
 324 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 325 {
 326     helper_fstt(env, ST0, ptr, GETPC());
 327 }
 328
 329 void helper_fpush(CPUX86State *env)
 330 {
 331     fpush(env);
 332 }
 333
 334 void helper_fpop(CPUX86State *env)
 335 {
 336     fpop(env);
 337 }
 338
 339 void helper_fdecstp(CPUX86State *env)
 340 {
 341     env->fpstt = (env->fpstt - 1) & 7;
 342     env->fpus &= ~0x4700;
 343 }
 344
 345 void helper_fincstp(CPUX86State *env)
 346 {
 347     env->fpstt = (env->fpstt + 1) & 7;
 348     env->fpus &= ~0x4700;
 349 }
 350
 351 /* FPU move */
 352
 353 void helper_ffree_STN(CPUX86State *env, int st_index)
 354 {
 355     env->fptags[(env->fpstt + st_index) & 7] = 1;
 356 }
 357
 358 void helper_fmov_ST0_FT0(CPUX86State *env)
 359 {
 360     ST0 = FT0;
 361 }
 362
 363 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 364 {
 365     FT0 = ST(st_index);
 366 }
 367
 368 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 369 {
 370     ST0 = ST(st_index);
 371 }
 372
 373 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 374 {
 375     ST(st_index) = ST0;
 376 }
 377
 378 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 379 {
 380     floatx80 tmp;
 381
 382     tmp = ST(st_index);
 383     ST(st_index) = ST0;
 384     ST0 = tmp;
 385 }
 386
 387 /* FPU operations */
 388
 389 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 390
 391 void helper_fcom_ST0_FT0(CPUX86State *env)
 392 {
 393     int ret;
 394
 395     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 396     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 397 }
 398
 399 void helper_fucom_ST0_FT0(CPUX86State *env)
 400 {
 401     int ret;
 402
 403     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 404     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 405 }
 406
 407 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 408
 409 void helper_fcomi_ST0_FT0(CPUX86State *env)
 410 {
 411     int eflags;
 412     int ret;
 413
 414     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 415     eflags = cpu_cc_compute_all(env, CC_OP);
 416     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 417     CC_SRC = eflags;
 418 }
 419
 420 void helper_fucomi_ST0_FT0(CPUX86State *env)
 421 {
 422     int eflags;
 423     int ret;
 424
 425     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 426     eflags = cpu_cc_compute_all(env, CC_OP);
 427     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 428     CC_SRC = eflags;
 429 }
 430
 431 void helper_fadd_ST0_FT0(CPUX86State *env)
 432 {
 433     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 434 }
 435
 436 void helper_fmul_ST0_FT0(CPUX86State *env)
 437 {
 438     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 439 }
 440
 441 void helper_fsub_ST0_FT0(CPUX86State *env)
 442 {
 443     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 444 }
 445
 446 void helper_fsubr_ST0_FT0(CPUX86State *env)
 447 {
 448     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 449 }
 450
 451 void helper_fdiv_ST0_FT0(CPUX86State *env)
 452 {
 453     ST0 = helper_fdiv(env, ST0, FT0);
 454 }
 455
 456 void helper_fdivr_ST0_FT0(CPUX86State *env)
 457 {
 458     ST0 = helper_fdiv(env, FT0, ST0);
 459 }
 460
 461 /* fp operations between STN and ST0 */
 462
 463 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 464 {
 465     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 466 }
 467
 468 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 469 {
 470     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 471 }
 472
 473 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 474 {
 475     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 476 }
 477
 478 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 479 {
 480     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 481 }
 482
 483 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 484 {
 485     floatx80 *p;
 486
 487     p = &ST(st_index);
 488     *p = helper_fdiv(env, *p, ST0);
 489 }
 490
 491 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 492 {
 493     floatx80 *p;
 494
 495     p = &ST(st_index);
 496     *p = helper_fdiv(env, ST0, *p);
 497 }
 498
 499 /* misc FPU operations */
 500 void helper_fchs_ST0(CPUX86State *env)
 501 {
 502     ST0 = floatx80_chs(ST0);
 503 }
 504
 505 void helper_fabs_ST0(CPUX86State *env)
 506 {
 507     ST0 = floatx80_abs(ST0);
 508 }
 509
 510 void helper_fld1_ST0(CPUX86State *env)
 511 {
 512     ST0 = floatx80_one;
 513 }
 514
 515 void helper_fldl2t_ST0(CPUX86State *env)
 516 {
 517     ST0 = floatx80_l2t;
 518 }
 519
 520 void helper_fldl2e_ST0(CPUX86State *env)
 521 {
 522     ST0 = floatx80_l2e;
 523 }
 524
 525 void helper_fldpi_ST0(CPUX86State *env)
 526 {
 527     ST0 = floatx80_pi;
 528 }
 529
 530 void helper_fldlg2_ST0(CPUX86State *env)
 531 {
 532     ST0 = floatx80_lg2;
 533 }
 534
 535 void helper_fldln2_ST0(CPUX86State *env)
 536 {
 537     ST0 = floatx80_ln2;
 538 }
 539
 540 void helper_fldz_ST0(CPUX86State *env)
 541 {
 542     ST0 = floatx80_zero;
 543 }
 544
 545 void helper_fldz_FT0(CPUX86State *env)
 546 {
 547     FT0 = floatx80_zero;
 548 }
 549
 550 uint32_t helper_fnstsw(CPUX86State *env)
 551 {
 552     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 553 }
 554
 555 uint32_t helper_fnstcw(CPUX86State *env)
 556 {
 557     return env->fpuc;
 558 }
 559
 560 void update_fp_status(CPUX86State *env)
 561 {
 562     int rnd_type;
 563
 564     /* set rounding mode */
 565     switch (env->fpuc & FPU_RC_MASK) {
 566     default:
 567     case FPU_RC_NEAR:
 568         rnd_type = float_round_nearest_even;
 569         break;
 570     case FPU_RC_DOWN:
 571         rnd_type = float_round_down;
 572         break;
 573     case FPU_RC_UP:
 574         rnd_type = float_round_up;
 575         break;
 576     case FPU_RC_CHOP:
 577         rnd_type = float_round_to_zero;
 578         break;
 579     }
 580     set_float_rounding_mode(rnd_type, &env->fp_status);
 581     switch ((env->fpuc >> 8) & 3) {
 582     case 0:
 583         rnd_type = 32;
 584         break;
 585     case 2:
 586         rnd_type = 64;
 587         break;
 588     case 3:
 589     default:
 590         rnd_type = 80;
 591         break;
 592     }
 593     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 594 }
 595
 596 void helper_fldcw(CPUX86State *env, uint32_t val)
 597 {
 598     cpu_set_fpuc(env, val);
 599 }
 600
 601 void helper_fclex(CPUX86State *env)
 602 {
 603     env->fpus &= 0x7f00;
 604 }
 605
 606 void helper_fwait(CPUX86State *env)
 607 {
 608     if (env->fpus & FPUS_SE) {
 609         fpu_raise_exception(env, GETPC());
 610     }
 611 }
 612
 613 void helper_fninit(CPUX86State *env)
 614 {
 615     env->fpus = 0;
 616     env->fpstt = 0;
 617     cpu_set_fpuc(env, 0x37f);
 618     env->fptags[0] = 1;
 619     env->fptags[1] = 1;
 620     env->fptags[2] = 1;
 621     env->fptags[3] = 1;
 622     env->fptags[4] = 1;
 623     env->fptags[5] = 1;
 624     env->fptags[6] = 1;
 625     env->fptags[7] = 1;
 626 }
 627
 628 /* BCD ops */
 629
 630 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 631 {
 632     floatx80 tmp;
 633     uint64_t val;
 634     unsigned int v;
 635     int i;
 636
 637     val = 0;
 638     for (i = 8; i >= 0; i--) {
 639         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 640         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 641     }
 642     tmp = int64_to_floatx80(val, &env->fp_status);
 643     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 644         tmp = floatx80_chs(tmp);
 645     }
 646     fpush(env);
 647     ST0 = tmp;
 648 }
 649
 650 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 651 {
 652     int v;
 653     target_ulong mem_ref, mem_end;
 654     int64_t val;
 655
 656     val = floatx80_to_int64(ST0, &env->fp_status);
 657     mem_ref = ptr;
 658     mem_end = mem_ref + 9;
 659     if (val < 0) {
 660         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 661         val = -val;
 662     } else {
 663         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 664     }
 665     while (mem_ref < mem_end) {
 666         if (val == 0) {
 667             break;
 668         }
 669         v = val % 100;
 670         val = val / 100;
 671         v = ((v / 10) << 4) | (v % 10);
 672         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 673     }
 674     while (mem_ref < mem_end) {
 675         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 676     }
 677 }
 678
 679 void helper_f2xm1(CPUX86State *env)
 680 {
 681     double val = floatx80_to_double(env, ST0);
 682
 683     val = pow(2.0, val) - 1.0;
 684     ST0 = double_to_floatx80(env, val);
 685 }
 686
 687 void helper_fyl2x(CPUX86State *env)
 688 {
 689     double fptemp = floatx80_to_double(env, ST0);
 690
 691     if (fptemp > 0.0) {
 692         fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 693         fptemp *= floatx80_to_double(env, ST1);
 694         ST1 = double_to_floatx80(env, fptemp);
 695         fpop(env);
 696     } else {
 697         env->fpus &= ~0x4700;
 698         env->fpus |= 0x400;
 699     }
 700 }
 701
 702 void helper_fptan(CPUX86State *env)
 703 {
 704     double fptemp = floatx80_to_double(env, ST0);
 705
 706     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 707         env->fpus |= 0x400;
 708     } else {
 709         fptemp = tan(fptemp);
 710         ST0 = double_to_floatx80(env, fptemp);
 711         fpush(env);
 712         ST0 = floatx80_one;
 713         env->fpus &= ~0x400; /* C2 <-- 0 */
 714         /* the above code is for |arg| < 2**52 only */
 715     }
 716 }
 717
 718 void helper_fpatan(CPUX86State *env)
 719 {
 720     double fptemp, fpsrcop;
 721
 722     fpsrcop = floatx80_to_double(env, ST1);
 723     fptemp = floatx80_to_double(env, ST0);
 724     ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 725     fpop(env);
 726 }
 727
 728 void helper_fxtract(CPUX86State *env)
 729 {
 730     CPU_LDoubleU temp;
 731
 732     temp.d = ST0;
 733
 734     if (floatx80_is_zero(ST0)) {
 735         /* Easy way to generate -inf and raising division by 0 exception */
 736         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 737                            &env->fp_status);
 738         fpush(env);
 739         ST0 = temp.d;
 740     } else {
 741         int expdif;
 742
 743         expdif = EXPD(temp) - EXPBIAS;
 744         /* DP exponent bias */
 745         ST0 = int32_to_floatx80(expdif, &env->fp_status);
 746         fpush(env);
 747         BIASEXPONENT(temp);
 748         ST0 = temp.d;
 749     }
 750 }
 751
 752 void helper_fprem1(CPUX86State *env)
 753 {
 754     double st0, st1, dblq, fpsrcop, fptemp;
 755     CPU_LDoubleU fpsrcop1, fptemp1;
 756     int expdif;
 757     signed long long int q;
 758
 759     st0 = floatx80_to_double(env, ST0);
 760     st1 = floatx80_to_double(env, ST1);
 761
 762     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 763         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 764         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 765         return;
 766     }
 767
 768     fpsrcop = st0;
 769     fptemp = st1;
 770     fpsrcop1.d = ST0;
 771     fptemp1.d = ST1;
 772     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 773
 774     if (expdif < 0) {
 775         /* optimisation? taken from the AMD docs */
 776         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 777         /* ST0 is unchanged */
 778         return;
 779     }
 780
 781     if (expdif < 53) {
 782         dblq = fpsrcop / fptemp;
 783         /* round dblq towards nearest integer */
 784         dblq = rint(dblq);
 785         st0 = fpsrcop - fptemp * dblq;
 786
 787         /* convert dblq to q by truncating towards zero */
 788         if (dblq < 0.0) {
 789             q = (signed long long int)(-dblq);
 790         } else {
 791             q = (signed long long int)dblq;
 792         }
 793
 794         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 795         /* (C0,C3,C1) <-- (q2,q1,q0) */
 796         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 797         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 798         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 799     } else {
 800         env->fpus |= 0x400;  /* C2 <-- 1 */
 801         fptemp = pow(2.0, expdif - 50);
 802         fpsrcop = (st0 / st1) / fptemp;
 803         /* fpsrcop = integer obtained by chopping */
 804         fpsrcop = (fpsrcop < 0.0) ?
 805                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 806         st0 -= (st1 * fpsrcop * fptemp);
 807     }
 808     ST0 = double_to_floatx80(env, st0);
 809 }
 810
 811 void helper_fprem(CPUX86State *env)
 812 {
 813     double st0, st1, dblq, fpsrcop, fptemp;
 814     CPU_LDoubleU fpsrcop1, fptemp1;
 815     int expdif;
 816     signed long long int q;
 817
 818     st0 = floatx80_to_double(env, ST0);
 819     st1 = floatx80_to_double(env, ST1);
 820
 821     if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 822         ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 823         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 824         return;
 825     }
 826
 827     fpsrcop = st0;
 828     fptemp = st1;
 829     fpsrcop1.d = ST0;
 830     fptemp1.d = ST1;
 831     expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 832
 833     if (expdif < 0) {
 834         /* optimisation? taken from the AMD docs */
 835         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 836         /* ST0 is unchanged */
 837         return;
 838     }
 839
 840     if (expdif < 53) {
 841         dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 842         /* round dblq towards zero */
 843         dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 844         st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 845
 846         /* convert dblq to q by truncating towards zero */
 847         if (dblq < 0.0) {
 848             q = (signed long long int)(-dblq);
 849         } else {
 850             q = (signed long long int)dblq;
 851         }
 852
 853         env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 854         /* (C0,C3,C1) <-- (q2,q1,q0) */
 855         env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 856         env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 857         env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 858     } else {
 859         int N = 32 + (expdif % 32); /* as per AMD docs */
 860
 861         env->fpus |= 0x400;  /* C2 <-- 1 */
 862         fptemp = pow(2.0, (double)(expdif - N));
 863         fpsrcop = (st0 / st1) / fptemp;
 864         /* fpsrcop = integer obtained by chopping */
 865         fpsrcop = (fpsrcop < 0.0) ?
 866                   -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 867         st0 -= (st1 * fpsrcop * fptemp);
 868     }
 869     ST0 = double_to_floatx80(env, st0);
 870 }
 871
 872 void helper_fyl2xp1(CPUX86State *env)
 873 {
 874     double fptemp = floatx80_to_double(env, ST0);
 875
 876     if ((fptemp + 1.0) > 0.0) {
 877         fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 878         fptemp *= floatx80_to_double(env, ST1);
 879         ST1 = double_to_floatx80(env, fptemp);
 880         fpop(env);
 881     } else {
 882         env->fpus &= ~0x4700;
 883         env->fpus |= 0x400;
 884     }
 885 }
 886
 887 void helper_fsqrt(CPUX86State *env)
 888 {
 889     if (floatx80_is_neg(ST0)) {
 890         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 891         env->fpus |= 0x400;
 892     }
 893     ST0 = floatx80_sqrt(ST0, &env->fp_status);
 894 }
 895
 896 void helper_fsincos(CPUX86State *env)
 897 {
 898     double fptemp = floatx80_to_double(env, ST0);
 899
 900     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 901         env->fpus |= 0x400;
 902     } else {
 903         ST0 = double_to_floatx80(env, sin(fptemp));
 904         fpush(env);
 905         ST0 = double_to_floatx80(env, cos(fptemp));
 906         env->fpus &= ~0x400;  /* C2 <-- 0 */
 907         /* the above code is for |arg| < 2**63 only */
 908     }
 909 }
 910
 911 void helper_frndint(CPUX86State *env)
 912 {
 913     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 914 }
 915
 916 void helper_fscale(CPUX86State *env)
 917 {
 918     if (floatx80_is_any_nan(ST1)) {
 919         ST0 = ST1;
 920     } else {
 921         int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 922         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 923     }
 924 }
 925
 926 void helper_fsin(CPUX86State *env)
 927 {
 928     double fptemp = floatx80_to_double(env, ST0);
 929
 930     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 931         env->fpus |= 0x400;
 932     } else {
 933         ST0 = double_to_floatx80(env, sin(fptemp));
 934         env->fpus &= ~0x400;  /* C2 <-- 0 */
 935         /* the above code is for |arg| < 2**53 only */
 936     }
 937 }
 938
 939 void helper_fcos(CPUX86State *env)
 940 {
 941     double fptemp = floatx80_to_double(env, ST0);
 942
 943     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 944         env->fpus |= 0x400;
 945     } else {
 946         ST0 = double_to_floatx80(env, cos(fptemp));
 947         env->fpus &= ~0x400;  /* C2 <-- 0 */
 948         /* the above code is for |arg| < 2**63 only */
 949     }
 950 }
 951
 952 void helper_fxam_ST0(CPUX86State *env)
 953 {
 954     CPU_LDoubleU temp;
 955     int expdif;
 956
 957     temp.d = ST0;
 958
 959     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 960     if (SIGND(temp)) {
 961         env->fpus |= 0x200; /* C1 <-- 1 */
 962     }
 963
 964     /* XXX: test fptags too */
 965     expdif = EXPD(temp);
 966     if (expdif == MAXEXPD) {
 967         if (MANTD(temp) == 0x8000000000000000ULL) {
 968             env->fpus |= 0x500; /* Infinity */
 969         } else {
 970             env->fpus |= 0x100; /* NaN */
 971         }
 972     } else if (expdif == 0) {
 973         if (MANTD(temp) == 0) {
 974             env->fpus |=  0x4000; /* Zero */
 975         } else {
 976             env->fpus |= 0x4400; /* Denormal */
 977         }
 978     } else {
 979         env->fpus |= 0x400;
 980     }
 981 }
 982
 983 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 984                       uintptr_t retaddr)
 985 {
 986     int fpus, fptag, exp, i;
 987     uint64_t mant;
 988     CPU_LDoubleU tmp;
 989
 990     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 991     fptag = 0;
 992     for (i = 7; i >= 0; i--) {
 993         fptag <<= 2;
 994         if (env->fptags[i]) {
 995             fptag |= 3;
 996         } else {
 997             tmp.d = env->fpregs[i].d;
 998             exp = EXPD(tmp);
 999             mant = MANTD(tmp);
1000             if (exp == 0 && mant == 0) {
1001                 /* zero */
1002                 fptag |= 1;
1003             } else if (exp == 0 || exp == MAXEXPD
1004                        || (mant & (1LL << 63)) == 0) {
1005                 /* NaNs, infinity, denormal */
1006                 fptag |= 2;
1007             }
1008         }
1009     }
1010     if (data32) {
1011         /* 32 bit */
1012         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1013         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1014         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1015         cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1016         cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1017         cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1018         cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1019     } else {
1020         /* 16 bit */
1021         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1022         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1023         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1024         cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1025         cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1026         cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1027         cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1028     }
1029 }
1030
1031 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1032 {
1033     do_fstenv(env, ptr, data32, GETPC());
1034 }
1035
1036 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1037                       uintptr_t retaddr)
1038 {
1039     int i, fpus, fptag;
1040
1041     if (data32) {
1042         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1045     } else {
1046         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1047         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1048         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1049     }
1050     env->fpstt = (fpus >> 11) & 7;
1051     env->fpus = fpus & ~0x3800;
1052     for (i = 0; i < 8; i++) {
1053         env->fptags[i] = ((fptag & 3) == 3);
1054         fptag >>= 2;
1055     }
1056 }
1057
1058 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1059 {
1060     do_fldenv(env, ptr, data32, GETPC());
1061 }
1062
1063 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1064 {
1065     floatx80 tmp;
1066     int i;
1067
1068     do_fstenv(env, ptr, data32, GETPC());
1069
1070     ptr += (14 << data32);
1071     for (i = 0; i < 8; i++) {
1072         tmp = ST(i);
1073         helper_fstt(env, tmp, ptr, GETPC());
1074         ptr += 10;
1075     }
1076
1077     /* fninit */
1078     env->fpus = 0;
1079     env->fpstt = 0;
1080     cpu_set_fpuc(env, 0x37f);
1081     env->fptags[0] = 1;
1082     env->fptags[1] = 1;
1083     env->fptags[2] = 1;
1084     env->fptags[3] = 1;
1085     env->fptags[4] = 1;
1086     env->fptags[5] = 1;
1087     env->fptags[6] = 1;
1088     env->fptags[7] = 1;
1089 }
1090
1091 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1092 {
1093     floatx80 tmp;
1094     int i;
1095
1096     do_fldenv(env, ptr, data32, GETPC());
1097     ptr += (14 << data32);
1098
1099     for (i = 0; i < 8; i++) {
1100         tmp = helper_fldt(env, ptr, GETPC());
1101         ST(i) = tmp;
1102         ptr += 10;
1103     }
1104 }
1105
1106 #if defined(CONFIG_USER_ONLY)
1107 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1108 {
1109     helper_fsave(env, ptr, data32);
1110 }
1111
1112 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1113 {
1114     helper_frstor(env, ptr, data32);
1115 }
1116 #endif
1117
1118 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1119 {
1120     int fpus, fptag, i;
1121     target_ulong addr;
1122
1123     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1124     fptag = 0;
1125     for (i = 0; i < 8; i++) {
1126         fptag |= (env->fptags[i] << i);
1127     }
1128     cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1129     cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1130     cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1131
1132     /* In 32-bit mode this is eip, sel, dp, sel.
1133        In 64-bit mode this is rip, rdp.
1134        But in either case we don't write actual data, just zeros.  */
1135     cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1136     cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
1137
1138     addr = ptr + 0x20;
1139     for (i = 0; i < 8; i++) {
1140         floatx80 tmp = ST(i);
1141         helper_fstt(env, tmp, addr, ra);
1142         addr += 16;
1143     }
1144 }
1145
1146 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1147 {
1148     cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1149     cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1150 }
1151
1152 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1153 {
1154     int i, nb_xmm_regs;
1155     target_ulong addr;
1156
1157     if (env->hflags & HF_CS64_MASK) {
1158         nb_xmm_regs = 16;
1159     } else {
1160         nb_xmm_regs = 8;
1161     }
1162
1163     addr = ptr + 0xa0;
1164     for (i = 0; i < nb_xmm_regs; i++) {
1165         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1166         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1167         addr += 16;
1168     }
1169 }
1170
1171 static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1172 {
1173     int i;
1174
1175     for (i = 0; i < 4; i++, addr += 16) {
1176         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1178     }
1179 }
1180
1181 static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1182 {
1183     cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1184     cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1185 }
1186
1187 static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1188 {
1189     cpu_stq_data_ra(env, addr, env->pkru, ra);
1190 }
1191
1192 void helper_fxsave(CPUX86State *env, target_ulong ptr)
1193 {
1194     uintptr_t ra = GETPC();
1195
1196     /* The operand must be 16 byte aligned */
1197     if (ptr & 0xf) {
1198         raise_exception_ra(env, EXCP0D_GPF, ra);
1199     }
1200
1201     do_xsave_fpu(env, ptr, ra);
1202
1203     if (env->cr[4] & CR4_OSFXSR_MASK) {
1204         do_xsave_mxcsr(env, ptr, ra);
1205         /* Fast FXSAVE leaves out the XMM registers */
1206         if (!(env->efer & MSR_EFER_FFXSR)
1207             || (env->hflags & HF_CPL_MASK)
1208             || !(env->hflags & HF_LMA_MASK)) {
1209             do_xsave_sse(env, ptr, ra);
1210         }
1211     }
1212 }
1213
1214 static uint64_t get_xinuse(CPUX86State *env)
1215 {
1216     uint64_t inuse = -1;
1217
1218     /* For the most part, we don't track XINUSE.  We could calculate it
1219        here for all components, but it's probably less work to simply
1220        indicate in use.  That said, the state of BNDREGS is important
1221        enough to track in HFLAGS, so we might as well use that here.  */
1222     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1223        inuse &= ~XSTATE_BNDREGS_MASK;
1224     }
1225     return inuse;
1226 }
1227
1228 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1229                      uint64_t inuse, uint64_t opt, uintptr_t ra)
1230 {
1231     uint64_t old_bv, new_bv;
1232
1233     /* The OS must have enabled XSAVE.  */
1234     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1235         raise_exception_ra(env, EXCP06_ILLOP, ra);
1236     }
1237
1238     /* The operand must be 64 byte aligned.  */
1239     if (ptr & 63) {
1240         raise_exception_ra(env, EXCP0D_GPF, ra);
1241     }
1242
1243     /* Never save anything not enabled by XCR0.  */
1244     rfbm &= env->xcr0;
1245     opt &= rfbm;
1246
1247     if (opt & XSTATE_FP_MASK) {
1248         do_xsave_fpu(env, ptr, ra);
1249     }
1250     if (rfbm & XSTATE_SSE_MASK) {
1251         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1252         do_xsave_mxcsr(env, ptr, ra);
1253     }
1254     if (opt & XSTATE_SSE_MASK) {
1255         do_xsave_sse(env, ptr, ra);
1256     }
1257     if (opt & XSTATE_BNDREGS_MASK) {
1258         target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1259         do_xsave_bndregs(env, ptr + off, ra);
1260     }
1261     if (opt & XSTATE_BNDCSR_MASK) {
1262         target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1263         do_xsave_bndcsr(env, ptr + off, ra);
1264     }
1265     if (opt & XSTATE_PKRU_MASK) {
1266         target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1267         do_xsave_pkru(env, ptr + off, ra);
1268     }
1269
1270     /* Update the XSTATE_BV field.  */
1271     old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1272     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1273     cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1274 }
1275
1276 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1277 {
1278     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1279 }
1280
1281 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1282 {
1283     uint64_t inuse = get_xinuse(env);
1284     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1285 }
1286
1287 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1288 {
1289     int i, fpus, fptag;
1290     target_ulong addr;
1291
1292     cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1293     fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1294     fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
1295     env->fpstt = (fpus >> 11) & 7;
1296     env->fpus = fpus & ~0x3800;
1297     fptag ^= 0xff;
1298     for (i = 0; i < 8; i++) {
1299         env->fptags[i] = ((fptag >> i) & 1);
1300     }
1301
1302     addr = ptr + 0x20;
1303     for (i = 0; i < 8; i++) {
1304         floatx80 tmp = helper_fldt(env, addr, ra);
1305         ST(i) = tmp;
1306         addr += 16;
1307     }
1308 }
1309
1310 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1311 {
1312     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1313 }
1314
1315 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1316 {
1317     int i, nb_xmm_regs;
1318     target_ulong addr;
1319
1320     if (env->hflags & HF_CS64_MASK) {
1321         nb_xmm_regs = 16;
1322     } else {
1323         nb_xmm_regs = 8;
1324     }
1325
1326     addr = ptr + 0xa0;
1327     for (i = 0; i < nb_xmm_regs; i++) {
1328         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1329         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1330         addr += 16;
1331     }
1332 }
1333
1334 static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1335 {
1336     int i;
1337
1338     for (i = 0; i < 4; i++, addr += 16) {
1339         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1340         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1341     }
1342 }
1343
1344 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1345 {
1346     /* FIXME: Extend highest implemented bit of linear address.  */
1347     env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1348     env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1349 }
1350
1351 static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1352 {
1353     env->pkru = cpu_ldq_data_ra(env, addr, ra);
1354 }
1355
1356 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1357 {
1358     uintptr_t ra = GETPC();
1359
1360     /* The operand must be 16 byte aligned */
1361     if (ptr & 0xf) {
1362         raise_exception_ra(env, EXCP0D_GPF, ra);
1363     }
1364
1365     do_xrstor_fpu(env, ptr, ra);
1366
1367     if (env->cr[4] & CR4_OSFXSR_MASK) {
1368         do_xrstor_mxcsr(env, ptr, ra);
1369         /* Fast FXRSTOR leaves out the XMM registers */
1370         if (!(env->efer & MSR_EFER_FFXSR)
1371             || (env->hflags & HF_CPL_MASK)
1372             || !(env->hflags & HF_LMA_MASK)) {
1373             do_xrstor_sse(env, ptr, ra);
1374         }
1375     }
1376 }
1377
1378 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1379 {
1380     uintptr_t ra = GETPC();
1381     uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1382
1383     rfbm &= env->xcr0;
1384
1385     /* The OS must have enabled XSAVE.  */
1386     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1387         raise_exception_ra(env, EXCP06_ILLOP, ra);
1388     }
1389
1390     /* The operand must be 64 byte aligned.  */
1391     if (ptr & 63) {
1392         raise_exception_ra(env, EXCP0D_GPF, ra);
1393     }
1394
1395     xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1396
1397     if ((int64_t)xstate_bv < 0) {
1398         /* FIXME: Compact form.  */
1399         raise_exception_ra(env, EXCP0D_GPF, ra);
1400     }
1401
1402     /* Standard form.  */
1403
1404     /* The XSTATE field must not set bits not present in XCR0.  */
1405     if (xstate_bv & ~env->xcr0) {
1406         raise_exception_ra(env, EXCP0D_GPF, ra);
1407     }
1408
1409     /* The XCOMP field must be zero.  */
1410     xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1411     xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1412     if (xcomp_bv0 || xcomp_bv1) {
1413         raise_exception_ra(env, EXCP0D_GPF, ra);
1414     }
1415
1416     if (rfbm & XSTATE_FP_MASK) {
1417         if (xstate_bv & XSTATE_FP_MASK) {
1418             do_xrstor_fpu(env, ptr, ra);
1419         } else {
1420             helper_fninit(env);
1421             memset(env->fpregs, 0, sizeof(env->fpregs));
1422         }
1423     }
1424     if (rfbm & XSTATE_SSE_MASK) {
1425         /* Note that the standard form of XRSTOR loads MXCSR from memory
1426            whether or not the XSTATE_BV bit is set.  */
1427         do_xrstor_mxcsr(env, ptr, ra);
1428         if (xstate_bv & XSTATE_SSE_MASK) {
1429             do_xrstor_sse(env, ptr, ra);
1430         } else {
1431             /* ??? When AVX is implemented, we may have to be more
1432                selective in the clearing.  */
1433             memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1434         }
1435     }
1436     if (rfbm & XSTATE_BNDREGS_MASK) {
1437         if (xstate_bv & XSTATE_BNDREGS_MASK) {
1438             target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1439             do_xrstor_bndregs(env, ptr + off, ra);
1440             env->hflags |= HF_MPX_IU_MASK;
1441         } else {
1442             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1443             env->hflags &= ~HF_MPX_IU_MASK;
1444         }
1445     }
1446     if (rfbm & XSTATE_BNDCSR_MASK) {
1447         if (xstate_bv & XSTATE_BNDCSR_MASK) {
1448             target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1449             do_xrstor_bndcsr(env, ptr + off, ra);
1450         } else {
1451             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1452         }
1453         cpu_sync_bndcs_hflags(env);
1454     }
1455     if (rfbm & XSTATE_PKRU_MASK) {
1456         uint64_t old_pkru = env->pkru;
1457         if (xstate_bv & XSTATE_PKRU_MASK) {
1458             target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1459             do_xrstor_pkru(env, ptr + off, ra);
1460         } else {
1461             env->pkru = 0;
1462         }
1463         if (env->pkru != old_pkru) {
1464             CPUState *cs = CPU(x86_env_get_cpu(env));
1465             tlb_flush(cs, 1);
1466         }
1467     }
1468 }
1469
1470 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1471 {
1472     /* The OS must have enabled XSAVE.  */
1473     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1474         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1475     }
1476
1477     switch (ecx) {
1478     case 0:
1479         return env->xcr0;
1480     case 1:
1481         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1482             return env->xcr0 & get_xinuse(env);
1483         }
1484         break;
1485     }
1486     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1487 }
1488
1489 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1490 {
1491     uint32_t dummy, ena_lo, ena_hi;
1492     uint64_t ena;
1493
1494     /* The OS must have enabled XSAVE.  */
1495     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1496         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1497     }
1498
1499     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1500     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1501         goto do_gpf;
1502     }
1503
1504     /* Disallow enabling unimplemented features.  */
1505     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1506     ena = ((uint64_t)ena_hi << 32) | ena_lo;
1507     if (mask & ~ena) {
1508         goto do_gpf;
1509     }
1510
1511     /* Disallow enabling only half of MPX.  */
1512     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1513         & XSTATE_BNDCSR_MASK) {
1514         goto do_gpf;
1515     }
1516
1517     env->xcr0 = mask;
1518     cpu_sync_bndcs_hflags(env);
1519     return;
1520
1521  do_gpf:
1522     raise_exception_ra(env, EXCP0D_GPF, GETPC());
1523 }
1524
1525 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1526 {
1527     CPU_LDoubleU temp;
1528
1529     temp.d = f;
1530     *pmant = temp.l.lower;
1531     *pexp = temp.l.upper;
1532 }
1533
1534 floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1535 {
1536     CPU_LDoubleU temp;
1537
1538     temp.l.upper = upper;
1539     temp.l.lower = mant;
1540     return temp.d;
1541 }
1542
1543 /* MMX/SSE */
1544 /* XXX: optimize by storing fptt and fptags in the static cpu state */
1545
1546 #define SSE_DAZ             0x0040
1547 #define SSE_RC_MASK         0x6000
1548 #define SSE_RC_NEAR         0x0000
1549 #define SSE_RC_DOWN         0x2000
1550 #define SSE_RC_UP           0x4000
1551 #define SSE_RC_CHOP         0x6000
1552 #define SSE_FZ              0x8000
1553
1554 void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1555 {
1556     int rnd_type;
1557
1558     env->mxcsr = mxcsr;
1559
1560     /* set rounding mode */
1561     switch (mxcsr & SSE_RC_MASK) {
1562     default:
1563     case SSE_RC_NEAR:
1564         rnd_type = float_round_nearest_even;
1565         break;
1566     case SSE_RC_DOWN:
1567         rnd_type = float_round_down;
1568         break;
1569     case SSE_RC_UP:
1570         rnd_type = float_round_up;
1571         break;
1572     case SSE_RC_CHOP:
1573         rnd_type = float_round_to_zero;
1574         break;
1575     }
1576     set_float_rounding_mode(rnd_type, &env->sse_status);
1577
1578     /* set denormals are zero */
1579     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1580
1581     /* set flush to zero */
1582     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1583 }
1584
1585 void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1586 {
1587     env->fpuc = val;
1588     update_fp_status(env);
1589 }
1590
1591 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1592 {
1593     cpu_set_mxcsr(env, val);
1594 }
1595
1596 void helper_enter_mmx(CPUX86State *env)
1597 {
1598     env->fpstt = 0;
1599     *(uint32_t *)(env->fptags) = 0;
1600     *(uint32_t *)(env->fptags + 4) = 0;
1601 }
1602
1603 void helper_emms(CPUX86State *env)
1604 {
1605     /* set to empty state */
1606     *(uint32_t *)(env->fptags) = 0x01010101;
1607     *(uint32_t *)(env->fptags + 4) = 0x01010101;
1608 }
1609
1610 /* XXX: suppress */
1611 void helper_movq(CPUX86State *env, void *d, void *s)
1612 {
1613     *(uint64_t *)d = *(uint64_t *)s;
1614 }
1615
1616 #define SHIFT 0
1617 #include "ops_sse.h"
1618
1619 #define SHIFT 1
1620 #include "ops_sse.h"