target/i386/fpu_helper.c

   1 /*
   2  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include <math.h>
  22 #include "cpu.h"
  23 #include "exec/helper-proto.h"
  24 #include "qemu/host-utils.h"
  25 #include "exec/exec-all.h"
  26 #include "exec/cpu_ldst.h"
  27 #include "fpu/softfloat.h"
  28 #include "fpu/softfloat-macros.h"
  29
  30 #ifdef CONFIG_SOFTMMU
  31 #include "hw/irq.h"
  32 #endif
  33
  34 #define FPU_RC_MASK         0xc00
  35 #define FPU_RC_NEAR         0x000
  36 #define FPU_RC_DOWN         0x400
  37 #define FPU_RC_UP           0x800
  38 #define FPU_RC_CHOP         0xc00
  39
  40 #define MAXTAN 9223372036854775808.0
  41
  42 /* the following deal with x86 long double-precision numbers */
  43 #define MAXEXPD 0x7fff
  44 #define EXPBIAS 16383
  45 #define EXPD(fp)        (fp.l.upper & 0x7fff)
  46 #define SIGND(fp)       ((fp.l.upper) & 0x8000)
  47 #define MANTD(fp)       (fp.l.lower)
  48 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  49
  50 #define FPUS_IE (1 << 0)
  51 #define FPUS_DE (1 << 1)
  52 #define FPUS_ZE (1 << 2)
  53 #define FPUS_OE (1 << 3)
  54 #define FPUS_UE (1 << 4)
  55 #define FPUS_PE (1 << 5)
  56 #define FPUS_SF (1 << 6)
  57 #define FPUS_SE (1 << 7)
  58 #define FPUS_B  (1 << 15)
  59
  60 #define FPUC_EM 0x3f
  61
  62 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  63 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
  64 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  65 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
  66 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  67 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
  68 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
  69 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
  70
  71 #if !defined(CONFIG_USER_ONLY)
  72 static qemu_irq ferr_irq;
  73
  74 void x86_register_ferr_irq(qemu_irq irq)
  75 {
  76     ferr_irq = irq;
  77 }
  78
  79 static void cpu_clear_ignne(void)
  80 {
  81     CPUX86State *env = &X86_CPU(first_cpu)->env;
  82     env->hflags2 &= ~HF2_IGNNE_MASK;
  83 }
  84
  85 void cpu_set_ignne(void)
  86 {
  87     CPUX86State *env = &X86_CPU(first_cpu)->env;
  88     env->hflags2 |= HF2_IGNNE_MASK;
  89     /*
  90      * We get here in response to a write to port F0h.  The chipset should
  91      * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
  92      * cleared, because FERR# and FP_IRQ are two separate pins on real
  93      * hardware.  However, we don't model FERR# as a qemu_irq, so we just
  94      * do directly what the chipset would do, i.e. deassert FP_IRQ.
  95      */
  96     qemu_irq_lower(ferr_irq);
  97 }
  98 #endif
  99
 100
 101 static inline void fpush(CPUX86State *env)
 102 {
 103     env->fpstt = (env->fpstt - 1) & 7;
 104     env->fptags[env->fpstt] = 0; /* validate stack entry */
 105 }
 106
 107 static inline void fpop(CPUX86State *env)
 108 {
 109     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
 110     env->fpstt = (env->fpstt + 1) & 7;
 111 }
 112
 113 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
 114                                    uintptr_t retaddr)
 115 {
 116     CPU_LDoubleU temp;
 117
 118     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
 119     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
 120     return temp.d;
 121 }
 122
 123 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
 124                                uintptr_t retaddr)
 125 {
 126     CPU_LDoubleU temp;
 127
 128     temp.d = f;
 129     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
 130     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
 131 }
 132
 133 /* x87 FPU helpers */
 134
 135 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
 136 {
 137     union {
 138         float64 f64;
 139         double d;
 140     } u;
 141
 142     u.f64 = floatx80_to_float64(a, &env->fp_status);
 143     return u.d;
 144 }
 145
 146 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 147 {
 148     union {
 149         float64 f64;
 150         double d;
 151     } u;
 152
 153     u.d = a;
 154     return float64_to_floatx80(u.f64, &env->fp_status);
 155 }
 156
 157 static void fpu_set_exception(CPUX86State *env, int mask)
 158 {
 159     env->fpus |= mask;
 160     if (env->fpus & (~env->fpuc & FPUC_EM)) {
 161         env->fpus |= FPUS_SE | FPUS_B;
 162     }
 163 }
 164
 165 static inline uint8_t save_exception_flags(CPUX86State *env)
 166 {
 167     uint8_t old_flags = get_float_exception_flags(&env->fp_status);
 168     set_float_exception_flags(0, &env->fp_status);
 169     return old_flags;
 170 }
 171
 172 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
 173 {
 174     uint8_t new_flags = get_float_exception_flags(&env->fp_status);
 175     float_raise(old_flags, &env->fp_status);
 176     fpu_set_exception(env,
 177                       ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
 178                        (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
 179                        (new_flags & float_flag_overflow ? FPUS_OE : 0) |
 180                        (new_flags & float_flag_underflow ? FPUS_UE : 0) |
 181                        (new_flags & float_flag_inexact ? FPUS_PE : 0) |
 182                        (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
 183 }
 184
 185 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 186 {
 187     uint8_t old_flags = save_exception_flags(env);
 188     floatx80 ret = floatx80_div(a, b, &env->fp_status);
 189     merge_exception_flags(env, old_flags);
 190     return ret;
 191 }
 192
 193 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 194 {
 195     if (env->cr[0] & CR0_NE_MASK) {
 196         raise_exception_ra(env, EXCP10_COPR, retaddr);
 197     }
 198 #if !defined(CONFIG_USER_ONLY)
 199     else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
 200         qemu_irq_raise(ferr_irq);
 201     }
 202 #endif
 203 }
 204
 205 void helper_flds_FT0(CPUX86State *env, uint32_t val)
 206 {
 207     uint8_t old_flags = save_exception_flags(env);
 208     union {
 209         float32 f;
 210         uint32_t i;
 211     } u;
 212
 213     u.i = val;
 214     FT0 = float32_to_floatx80(u.f, &env->fp_status);
 215     merge_exception_flags(env, old_flags);
 216 }
 217
 218 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 219 {
 220     uint8_t old_flags = save_exception_flags(env);
 221     union {
 222         float64 f;
 223         uint64_t i;
 224     } u;
 225
 226     u.i = val;
 227     FT0 = float64_to_floatx80(u.f, &env->fp_status);
 228     merge_exception_flags(env, old_flags);
 229 }
 230
 231 void helper_fildl_FT0(CPUX86State *env, int32_t val)
 232 {
 233     FT0 = int32_to_floatx80(val, &env->fp_status);
 234 }
 235
 236 void helper_flds_ST0(CPUX86State *env, uint32_t val)
 237 {
 238     uint8_t old_flags = save_exception_flags(env);
 239     int new_fpstt;
 240     union {
 241         float32 f;
 242         uint32_t i;
 243     } u;
 244
 245     new_fpstt = (env->fpstt - 1) & 7;
 246     u.i = val;
 247     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 248     env->fpstt = new_fpstt;
 249     env->fptags[new_fpstt] = 0; /* validate stack entry */
 250     merge_exception_flags(env, old_flags);
 251 }
 252
 253 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 254 {
 255     uint8_t old_flags = save_exception_flags(env);
 256     int new_fpstt;
 257     union {
 258         float64 f;
 259         uint64_t i;
 260     } u;
 261
 262     new_fpstt = (env->fpstt - 1) & 7;
 263     u.i = val;
 264     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 265     env->fpstt = new_fpstt;
 266     env->fptags[new_fpstt] = 0; /* validate stack entry */
 267     merge_exception_flags(env, old_flags);
 268 }
 269
 270 void helper_fildl_ST0(CPUX86State *env, int32_t val)
 271 {
 272     int new_fpstt;
 273
 274     new_fpstt = (env->fpstt - 1) & 7;
 275     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 276     env->fpstt = new_fpstt;
 277     env->fptags[new_fpstt] = 0; /* validate stack entry */
 278 }
 279
 280 void helper_fildll_ST0(CPUX86State *env, int64_t val)
 281 {
 282     int new_fpstt;
 283
 284     new_fpstt = (env->fpstt - 1) & 7;
 285     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 286     env->fpstt = new_fpstt;
 287     env->fptags[new_fpstt] = 0; /* validate stack entry */
 288 }
 289
 290 uint32_t helper_fsts_ST0(CPUX86State *env)
 291 {
 292     uint8_t old_flags = save_exception_flags(env);
 293     union {
 294         float32 f;
 295         uint32_t i;
 296     } u;
 297
 298     u.f = floatx80_to_float32(ST0, &env->fp_status);
 299     merge_exception_flags(env, old_flags);
 300     return u.i;
 301 }
 302
 303 uint64_t helper_fstl_ST0(CPUX86State *env)
 304 {
 305     uint8_t old_flags = save_exception_flags(env);
 306     union {
 307         float64 f;
 308         uint64_t i;
 309     } u;
 310
 311     u.f = floatx80_to_float64(ST0, &env->fp_status);
 312     merge_exception_flags(env, old_flags);
 313     return u.i;
 314 }
 315
 316 int32_t helper_fist_ST0(CPUX86State *env)
 317 {
 318     uint8_t old_flags = save_exception_flags(env);
 319     int32_t val;
 320
 321     val = floatx80_to_int32(ST0, &env->fp_status);
 322     if (val != (int16_t)val) {
 323         set_float_exception_flags(float_flag_invalid, &env->fp_status);
 324         val = -32768;
 325     }
 326     merge_exception_flags(env, old_flags);
 327     return val;
 328 }
 329
 330 int32_t helper_fistl_ST0(CPUX86State *env)
 331 {
 332     uint8_t old_flags = save_exception_flags(env);
 333     int32_t val;
 334
 335     val = floatx80_to_int32(ST0, &env->fp_status);
 336     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 337         val = 0x80000000;
 338     }
 339     merge_exception_flags(env, old_flags);
 340     return val;
 341 }
 342
 343 int64_t helper_fistll_ST0(CPUX86State *env)
 344 {
 345     uint8_t old_flags = save_exception_flags(env);
 346     int64_t val;
 347
 348     val = floatx80_to_int64(ST0, &env->fp_status);
 349     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 350         val = 0x8000000000000000ULL;
 351     }
 352     merge_exception_flags(env, old_flags);
 353     return val;
 354 }
 355
 356 int32_t helper_fistt_ST0(CPUX86State *env)
 357 {
 358     uint8_t old_flags = save_exception_flags(env);
 359     int32_t val;
 360
 361     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 362     if (val != (int16_t)val) {
 363         set_float_exception_flags(float_flag_invalid, &env->fp_status);
 364         val = -32768;
 365     }
 366     merge_exception_flags(env, old_flags);
 367     return val;
 368 }
 369
 370 int32_t helper_fisttl_ST0(CPUX86State *env)
 371 {
 372     uint8_t old_flags = save_exception_flags(env);
 373     int32_t val;
 374
 375     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 376     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 377         val = 0x80000000;
 378     }
 379     merge_exception_flags(env, old_flags);
 380     return val;
 381 }
 382
 383 int64_t helper_fisttll_ST0(CPUX86State *env)
 384 {
 385     uint8_t old_flags = save_exception_flags(env);
 386     int64_t val;
 387
 388     val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 389     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 390         val = 0x8000000000000000ULL;
 391     }
 392     merge_exception_flags(env, old_flags);
 393     return val;
 394 }
 395
 396 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 397 {
 398     int new_fpstt;
 399
 400     new_fpstt = (env->fpstt - 1) & 7;
 401     env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 402     env->fpstt = new_fpstt;
 403     env->fptags[new_fpstt] = 0; /* validate stack entry */
 404 }
 405
 406 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 407 {
 408     helper_fstt(env, ST0, ptr, GETPC());
 409 }
 410
 411 void helper_fpush(CPUX86State *env)
 412 {
 413     fpush(env);
 414 }
 415
 416 void helper_fpop(CPUX86State *env)
 417 {
 418     fpop(env);
 419 }
 420
 421 void helper_fdecstp(CPUX86State *env)
 422 {
 423     env->fpstt = (env->fpstt - 1) & 7;
 424     env->fpus &= ~0x4700;
 425 }
 426
 427 void helper_fincstp(CPUX86State *env)
 428 {
 429     env->fpstt = (env->fpstt + 1) & 7;
 430     env->fpus &= ~0x4700;
 431 }
 432
 433 /* FPU move */
 434
 435 void helper_ffree_STN(CPUX86State *env, int st_index)
 436 {
 437     env->fptags[(env->fpstt + st_index) & 7] = 1;
 438 }
 439
 440 void helper_fmov_ST0_FT0(CPUX86State *env)
 441 {
 442     ST0 = FT0;
 443 }
 444
 445 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 446 {
 447     FT0 = ST(st_index);
 448 }
 449
 450 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 451 {
 452     ST0 = ST(st_index);
 453 }
 454
 455 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 456 {
 457     ST(st_index) = ST0;
 458 }
 459
 460 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 461 {
 462     floatx80 tmp;
 463
 464     tmp = ST(st_index);
 465     ST(st_index) = ST0;
 466     ST0 = tmp;
 467 }
 468
 469 /* FPU operations */
 470
 471 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 472
 473 void helper_fcom_ST0_FT0(CPUX86State *env)
 474 {
 475     uint8_t old_flags = save_exception_flags(env);
 476     FloatRelation ret;
 477
 478     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 479     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 480     merge_exception_flags(env, old_flags);
 481 }
 482
 483 void helper_fucom_ST0_FT0(CPUX86State *env)
 484 {
 485     uint8_t old_flags = save_exception_flags(env);
 486     FloatRelation ret;
 487
 488     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 489     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 490     merge_exception_flags(env, old_flags);
 491 }
 492
 493 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 494
 495 void helper_fcomi_ST0_FT0(CPUX86State *env)
 496 {
 497     uint8_t old_flags = save_exception_flags(env);
 498     int eflags;
 499     FloatRelation ret;
 500
 501     ret = floatx80_compare(ST0, FT0, &env->fp_status);
 502     eflags = cpu_cc_compute_all(env, CC_OP);
 503     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 504     CC_SRC = eflags;
 505     merge_exception_flags(env, old_flags);
 506 }
 507
 508 void helper_fucomi_ST0_FT0(CPUX86State *env)
 509 {
 510     uint8_t old_flags = save_exception_flags(env);
 511     int eflags;
 512     FloatRelation ret;
 513
 514     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 515     eflags = cpu_cc_compute_all(env, CC_OP);
 516     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 517     CC_SRC = eflags;
 518     merge_exception_flags(env, old_flags);
 519 }
 520
 521 void helper_fadd_ST0_FT0(CPUX86State *env)
 522 {
 523     uint8_t old_flags = save_exception_flags(env);
 524     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 525     merge_exception_flags(env, old_flags);
 526 }
 527
 528 void helper_fmul_ST0_FT0(CPUX86State *env)
 529 {
 530     uint8_t old_flags = save_exception_flags(env);
 531     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 532     merge_exception_flags(env, old_flags);
 533 }
 534
 535 void helper_fsub_ST0_FT0(CPUX86State *env)
 536 {
 537     uint8_t old_flags = save_exception_flags(env);
 538     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 539     merge_exception_flags(env, old_flags);
 540 }
 541
 542 void helper_fsubr_ST0_FT0(CPUX86State *env)
 543 {
 544     uint8_t old_flags = save_exception_flags(env);
 545     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 546     merge_exception_flags(env, old_flags);
 547 }
 548
 549 void helper_fdiv_ST0_FT0(CPUX86State *env)
 550 {
 551     ST0 = helper_fdiv(env, ST0, FT0);
 552 }
 553
 554 void helper_fdivr_ST0_FT0(CPUX86State *env)
 555 {
 556     ST0 = helper_fdiv(env, FT0, ST0);
 557 }
 558
 559 /* fp operations between STN and ST0 */
 560
 561 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 562 {
 563     uint8_t old_flags = save_exception_flags(env);
 564     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 565     merge_exception_flags(env, old_flags);
 566 }
 567
 568 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 569 {
 570     uint8_t old_flags = save_exception_flags(env);
 571     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 572     merge_exception_flags(env, old_flags);
 573 }
 574
 575 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 576 {
 577     uint8_t old_flags = save_exception_flags(env);
 578     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 579     merge_exception_flags(env, old_flags);
 580 }
 581
 582 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 583 {
 584     uint8_t old_flags = save_exception_flags(env);
 585     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 586     merge_exception_flags(env, old_flags);
 587 }
 588
 589 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 590 {
 591     floatx80 *p;
 592
 593     p = &ST(st_index);
 594     *p = helper_fdiv(env, *p, ST0);
 595 }
 596
 597 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 598 {
 599     floatx80 *p;
 600
 601     p = &ST(st_index);
 602     *p = helper_fdiv(env, ST0, *p);
 603 }
 604
 605 /* misc FPU operations */
 606 void helper_fchs_ST0(CPUX86State *env)
 607 {
 608     ST0 = floatx80_chs(ST0);
 609 }
 610
 611 void helper_fabs_ST0(CPUX86State *env)
 612 {
 613     ST0 = floatx80_abs(ST0);
 614 }
 615
 616 void helper_fld1_ST0(CPUX86State *env)
 617 {
 618     ST0 = floatx80_one;
 619 }
 620
 621 void helper_fldl2t_ST0(CPUX86State *env)
 622 {
 623     switch (env->fpuc & FPU_RC_MASK) {
 624     case FPU_RC_UP:
 625         ST0 = floatx80_l2t_u;
 626         break;
 627     default:
 628         ST0 = floatx80_l2t;
 629         break;
 630     }
 631 }
 632
 633 void helper_fldl2e_ST0(CPUX86State *env)
 634 {
 635     switch (env->fpuc & FPU_RC_MASK) {
 636     case FPU_RC_DOWN:
 637     case FPU_RC_CHOP:
 638         ST0 = floatx80_l2e_d;
 639         break;
 640     default:
 641         ST0 = floatx80_l2e;
 642         break;
 643     }
 644 }
 645
 646 void helper_fldpi_ST0(CPUX86State *env)
 647 {
 648     switch (env->fpuc & FPU_RC_MASK) {
 649     case FPU_RC_DOWN:
 650     case FPU_RC_CHOP:
 651         ST0 = floatx80_pi_d;
 652         break;
 653     default:
 654         ST0 = floatx80_pi;
 655         break;
 656     }
 657 }
 658
 659 void helper_fldlg2_ST0(CPUX86State *env)
 660 {
 661     switch (env->fpuc & FPU_RC_MASK) {
 662     case FPU_RC_DOWN:
 663     case FPU_RC_CHOP:
 664         ST0 = floatx80_lg2_d;
 665         break;
 666     default:
 667         ST0 = floatx80_lg2;
 668         break;
 669     }
 670 }
 671
 672 void helper_fldln2_ST0(CPUX86State *env)
 673 {
 674     switch (env->fpuc & FPU_RC_MASK) {
 675     case FPU_RC_DOWN:
 676     case FPU_RC_CHOP:
 677         ST0 = floatx80_ln2_d;
 678         break;
 679     default:
 680         ST0 = floatx80_ln2;
 681         break;
 682     }
 683 }
 684
 685 void helper_fldz_ST0(CPUX86State *env)
 686 {
 687     ST0 = floatx80_zero;
 688 }
 689
 690 void helper_fldz_FT0(CPUX86State *env)
 691 {
 692     FT0 = floatx80_zero;
 693 }
 694
 695 uint32_t helper_fnstsw(CPUX86State *env)
 696 {
 697     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 698 }
 699
 700 uint32_t helper_fnstcw(CPUX86State *env)
 701 {
 702     return env->fpuc;
 703 }
 704
 705 void update_fp_status(CPUX86State *env)
 706 {
 707     int rnd_type;
 708
 709     /* set rounding mode */
 710     switch (env->fpuc & FPU_RC_MASK) {
 711     default:
 712     case FPU_RC_NEAR:
 713         rnd_type = float_round_nearest_even;
 714         break;
 715     case FPU_RC_DOWN:
 716         rnd_type = float_round_down;
 717         break;
 718     case FPU_RC_UP:
 719         rnd_type = float_round_up;
 720         break;
 721     case FPU_RC_CHOP:
 722         rnd_type = float_round_to_zero;
 723         break;
 724     }
 725     set_float_rounding_mode(rnd_type, &env->fp_status);
 726     switch ((env->fpuc >> 8) & 3) {
 727     case 0:
 728         rnd_type = 32;
 729         break;
 730     case 2:
 731         rnd_type = 64;
 732         break;
 733     case 3:
 734     default:
 735         rnd_type = 80;
 736         break;
 737     }
 738     set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 739 }
 740
 741 void helper_fldcw(CPUX86State *env, uint32_t val)
 742 {
 743     cpu_set_fpuc(env, val);
 744 }
 745
 746 void helper_fclex(CPUX86State *env)
 747 {
 748     env->fpus &= 0x7f00;
 749 }
 750
 751 void helper_fwait(CPUX86State *env)
 752 {
 753     if (env->fpus & FPUS_SE) {
 754         fpu_raise_exception(env, GETPC());
 755     }
 756 }
 757
 758 void helper_fninit(CPUX86State *env)
 759 {
 760     env->fpus = 0;
 761     env->fpstt = 0;
 762     cpu_set_fpuc(env, 0x37f);
 763     env->fptags[0] = 1;
 764     env->fptags[1] = 1;
 765     env->fptags[2] = 1;
 766     env->fptags[3] = 1;
 767     env->fptags[4] = 1;
 768     env->fptags[5] = 1;
 769     env->fptags[6] = 1;
 770     env->fptags[7] = 1;
 771 }
 772
 773 /* BCD ops */
 774
 775 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 776 {
 777     floatx80 tmp;
 778     uint64_t val;
 779     unsigned int v;
 780     int i;
 781
 782     val = 0;
 783     for (i = 8; i >= 0; i--) {
 784         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 785         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 786     }
 787     tmp = int64_to_floatx80(val, &env->fp_status);
 788     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 789         tmp = floatx80_chs(tmp);
 790     }
 791     fpush(env);
 792     ST0 = tmp;
 793 }
 794
 795 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 796 {
 797     uint8_t old_flags = save_exception_flags(env);
 798     int v;
 799     target_ulong mem_ref, mem_end;
 800     int64_t val;
 801     CPU_LDoubleU temp;
 802
 803     temp.d = ST0;
 804
 805     val = floatx80_to_int64(ST0, &env->fp_status);
 806     mem_ref = ptr;
 807     if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
 808         set_float_exception_flags(float_flag_invalid, &env->fp_status);
 809         while (mem_ref < ptr + 7) {
 810             cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 811         }
 812         cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
 813         cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
 814         cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
 815         merge_exception_flags(env, old_flags);
 816         return;
 817     }
 818     mem_end = mem_ref + 9;
 819     if (SIGND(temp)) {
 820         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 821         val = -val;
 822     } else {
 823         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 824     }
 825     while (mem_ref < mem_end) {
 826         if (val == 0) {
 827             break;
 828         }
 829         v = val % 100;
 830         val = val / 100;
 831         v = ((v / 10) << 4) | (v % 10);
 832         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 833     }
 834     while (mem_ref < mem_end) {
 835         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 836     }
 837     merge_exception_flags(env, old_flags);
 838 }
 839
 840 /* 128-bit significand of log(2).  */
 841 #define ln2_sig_high 0xb17217f7d1cf79abULL
 842 #define ln2_sig_low 0xc9e3b39803f2f6afULL
 843
 844 /*
 845  * Polynomial coefficients for an approximation to (2^x - 1) / x, on
 846  * the interval [-1/64, 1/64].
 847  */
 848 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
 849 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
 850 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
 851 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
 852 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
 853 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
 854 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
 855 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
 856 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
 857
 858 struct f2xm1_data {
 859     /*
 860      * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
 861      * are very close to exact floatx80 values.
 862      */
 863     floatx80 t;
 864     /* The value of 2^t.  */
 865     floatx80 exp2;
 866     /* The value of 2^t - 1.  */
 867     floatx80 exp2m1;
 868 };
 869
 870 static const struct f2xm1_data f2xm1_table[65] = {
 871     { make_floatx80(0xbfff, 0x8000000000000000ULL),
 872       make_floatx80(0x3ffe, 0x8000000000000000ULL),
 873       make_floatx80(0xbffe, 0x8000000000000000ULL) },
 874     { make_floatx80(0xbffe, 0xf800000000002e7eULL),
 875       make_floatx80(0x3ffe, 0x82cd8698ac2b9160ULL),
 876       make_floatx80(0xbffd, 0xfa64f2cea7a8dd40ULL) },
 877     { make_floatx80(0xbffe, 0xefffffffffffe960ULL),
 878       make_floatx80(0x3ffe, 0x85aac367cc488345ULL),
 879       make_floatx80(0xbffd, 0xf4aa7930676ef976ULL) },
 880     { make_floatx80(0xbffe, 0xe800000000006f10ULL),
 881       make_floatx80(0x3ffe, 0x88980e8092da5c14ULL),
 882       make_floatx80(0xbffd, 0xeecfe2feda4b47d8ULL) },
 883     { make_floatx80(0xbffe, 0xe000000000008a45ULL),
 884       make_floatx80(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
 885       make_floatx80(0xbffd, 0xe8d47c382ae8bab6ULL) },
 886     { make_floatx80(0xbffe, 0xd7ffffffffff8a9eULL),
 887       make_floatx80(0x3ffe, 0x8ea4398b45cd8116ULL),
 888       make_floatx80(0xbffd, 0xe2b78ce97464fdd4ULL) },
 889     { make_floatx80(0xbffe, 0xd0000000000019a0ULL),
 890       make_floatx80(0x3ffe, 0x91c3d373ab11b919ULL),
 891       make_floatx80(0xbffd, 0xdc785918a9dc8dceULL) },
 892     { make_floatx80(0xbffe, 0xc7ffffffffff14dfULL),
 893       make_floatx80(0x3ffe, 0x94f4efa8fef76836ULL),
 894       make_floatx80(0xbffd, 0xd61620ae02112f94ULL) },
 895     { make_floatx80(0xbffe, 0xc000000000006530ULL),
 896       make_floatx80(0x3ffe, 0x9837f0518db87fbbULL),
 897       make_floatx80(0xbffd, 0xcf901f5ce48f008aULL) },
 898     { make_floatx80(0xbffe, 0xb7ffffffffff1723ULL),
 899       make_floatx80(0x3ffe, 0x9b8d39b9d54eb74cULL),
 900       make_floatx80(0xbffd, 0xc8e58c8c55629168ULL) },
 901     { make_floatx80(0xbffe, 0xb00000000000b5e1ULL),
 902       make_floatx80(0x3ffe, 0x9ef5326091a0c366ULL),
 903       make_floatx80(0xbffd, 0xc2159b3edcbe7934ULL) },
 904     { make_floatx80(0xbffe, 0xa800000000006f8aULL),
 905       make_floatx80(0x3ffe, 0xa27043030c49370aULL),
 906       make_floatx80(0xbffd, 0xbb1f79f9e76d91ecULL) },
 907     { make_floatx80(0xbffe, 0x9fffffffffff816aULL),
 908       make_floatx80(0x3ffe, 0xa5fed6a9b15171cfULL),
 909       make_floatx80(0xbffd, 0xb40252ac9d5d1c62ULL) },
 910     { make_floatx80(0xbffe, 0x97ffffffffffb621ULL),
 911       make_floatx80(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
 912       make_floatx80(0xbffd, 0xacbd4a962b079e34ULL) },
 913     { make_floatx80(0xbffe, 0x8fffffffffff162bULL),
 914       make_floatx80(0x3ffe, 0xad583eea42a1b886ULL),
 915       make_floatx80(0xbffd, 0xa54f822b7abc8ef4ULL) },
 916     { make_floatx80(0xbffe, 0x87ffffffffff4d34ULL),
 917       make_floatx80(0x3ffe, 0xb123f581d2ac7b51ULL),
 918       make_floatx80(0xbffd, 0x9db814fc5aa7095eULL) },
 919     { make_floatx80(0xbffe, 0x800000000000227dULL),
 920       make_floatx80(0x3ffe, 0xb504f333f9de539dULL),
 921       make_floatx80(0xbffd, 0x95f619980c4358c6ULL) },
 922     { make_floatx80(0xbffd, 0xefffffffffff3978ULL),
 923       make_floatx80(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
 924       make_floatx80(0xbffd, 0x8e08a1713a085ebeULL) },
 925     { make_floatx80(0xbffd, 0xe00000000000df81ULL),
 926       make_floatx80(0x3ffe, 0xbd08a39f580bfd8cULL),
 927       make_floatx80(0xbffd, 0x85eeb8c14fe804e8ULL) },
 928     { make_floatx80(0xbffd, 0xd00000000000bccfULL),
 929       make_floatx80(0x3ffe, 0xc12c4cca667062f6ULL),
 930       make_floatx80(0xbffc, 0xfb4eccd6663e7428ULL) },
 931     { make_floatx80(0xbffd, 0xc00000000000eff0ULL),
 932       make_floatx80(0x3ffe, 0xc5672a1155069abeULL),
 933       make_floatx80(0xbffc, 0xea6357baabe59508ULL) },
 934     { make_floatx80(0xbffd, 0xb000000000000fe6ULL),
 935       make_floatx80(0x3ffe, 0xc9b9bd866e2f234bULL),
 936       make_floatx80(0xbffc, 0xd91909e6474372d4ULL) },
 937     { make_floatx80(0xbffd, 0x9fffffffffff2172ULL),
 938       make_floatx80(0x3ffe, 0xce248c151f84bf00ULL),
 939       make_floatx80(0xbffc, 0xc76dcfab81ed0400ULL) },
 940     { make_floatx80(0xbffd, 0x8fffffffffffafffULL),
 941       make_floatx80(0x3ffe, 0xd2a81d91f12afb2bULL),
 942       make_floatx80(0xbffc, 0xb55f89b83b541354ULL) },
 943     { make_floatx80(0xbffc, 0xffffffffffff81a3ULL),
 944       make_floatx80(0x3ffe, 0xd744fccad69d7d5eULL),
 945       make_floatx80(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
 946     { make_floatx80(0xbffc, 0xdfffffffffff1568ULL),
 947       make_floatx80(0x3ffe, 0xdbfbb797daf25a44ULL),
 948       make_floatx80(0xbffc, 0x901121a0943696f0ULL) },
 949     { make_floatx80(0xbffc, 0xbfffffffffff68daULL),
 950       make_floatx80(0x3ffe, 0xe0ccdeec2a94f811ULL),
 951       make_floatx80(0xbffb, 0xf999089eab583f78ULL) },
 952     { make_floatx80(0xbffc, 0x9fffffffffff4690ULL),
 953       make_floatx80(0x3ffe, 0xe5b906e77c83657eULL),
 954       make_floatx80(0xbffb, 0xd237c8c41be4d410ULL) },
 955     { make_floatx80(0xbffb, 0xffffffffffff8aeeULL),
 956       make_floatx80(0x3ffe, 0xeac0c6e7dd24427cULL),
 957       make_floatx80(0xbffb, 0xa9f9c8c116ddec20ULL) },
 958     { make_floatx80(0xbffb, 0xbfffffffffff2d18ULL),
 959       make_floatx80(0x3ffe, 0xefe4b99bdcdb06ebULL),
 960       make_floatx80(0xbffb, 0x80da33211927c8a8ULL) },
 961     { make_floatx80(0xbffa, 0xffffffffffff8ccbULL),
 962       make_floatx80(0x3ffe, 0xf5257d152486d0f4ULL),
 963       make_floatx80(0xbffa, 0xada82eadb792f0c0ULL) },
 964     { make_floatx80(0xbff9, 0xffffffffffff11feULL),
 965       make_floatx80(0x3ffe, 0xfa83b2db722a0846ULL),
 966       make_floatx80(0xbff9, 0xaf89a491babef740ULL) },
 967     { floatx80_zero,
 968       make_floatx80(0x3fff, 0x8000000000000000ULL),
 969       floatx80_zero },
 970     { make_floatx80(0x3ff9, 0xffffffffffff2680ULL),
 971       make_floatx80(0x3fff, 0x82cd8698ac2b9f6fULL),
 972       make_floatx80(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
 973     { make_floatx80(0x3ffb, 0x800000000000b500ULL),
 974       make_floatx80(0x3fff, 0x85aac367cc488345ULL),
 975       make_floatx80(0x3ffa, 0xb5586cf9891068a0ULL) },
 976     { make_floatx80(0x3ffb, 0xbfffffffffff4b67ULL),
 977       make_floatx80(0x3fff, 0x88980e8092da7cceULL),
 978       make_floatx80(0x3ffb, 0x8980e8092da7cce0ULL) },
 979     { make_floatx80(0x3ffb, 0xffffffffffffff57ULL),
 980       make_floatx80(0x3fff, 0x8b95c1e3ea8bd6dfULL),
 981       make_floatx80(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
 982     { make_floatx80(0x3ffc, 0x9fffffffffff811fULL),
 983       make_floatx80(0x3fff, 0x8ea4398b45cd4780ULL),
 984       make_floatx80(0x3ffb, 0xea4398b45cd47800ULL) },
 985     { make_floatx80(0x3ffc, 0xbfffffffffff9980ULL),
 986       make_floatx80(0x3fff, 0x91c3d373ab11b919ULL),
 987       make_floatx80(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
 988     { make_floatx80(0x3ffc, 0xdffffffffffff631ULL),
 989       make_floatx80(0x3fff, 0x94f4efa8fef70864ULL),
 990       make_floatx80(0x3ffc, 0xa7a77d47f7b84320ULL) },
 991     { make_floatx80(0x3ffc, 0xffffffffffff2499ULL),
 992       make_floatx80(0x3fff, 0x9837f0518db892d4ULL),
 993       make_floatx80(0x3ffc, 0xc1bf828c6dc496a0ULL) },
 994     { make_floatx80(0x3ffd, 0x8fffffffffff80fbULL),
 995       make_floatx80(0x3fff, 0x9b8d39b9d54e3a79ULL),
 996       make_floatx80(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
 997     { make_floatx80(0x3ffd, 0x9fffffffffffbc23ULL),
 998       make_floatx80(0x3fff, 0x9ef5326091a10313ULL),
 999       make_floatx80(0x3ffc, 0xf7a993048d081898ULL) },
1000     { make_floatx80(0x3ffd, 0xafffffffffff20ecULL),
1001       make_floatx80(0x3fff, 0xa27043030c49370aULL),
1002       make_floatx80(0x3ffd, 0x89c10c0c3124dc28ULL) },
1003     { make_floatx80(0x3ffd, 0xc00000000000fd2cULL),
1004       make_floatx80(0x3fff, 0xa5fed6a9b15171cfULL),
1005       make_floatx80(0x3ffd, 0x97fb5aa6c545c73cULL) },
1006     { make_floatx80(0x3ffd, 0xd0000000000093beULL),
1007       make_floatx80(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1008       make_floatx80(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1009     { make_floatx80(0x3ffd, 0xe00000000000c2aeULL),
1010       make_floatx80(0x3fff, 0xad583eea42a17876ULL),
1011       make_floatx80(0x3ffd, 0xb560fba90a85e1d8ULL) },
1012     { make_floatx80(0x3ffd, 0xefffffffffff1e3fULL),
1013       make_floatx80(0x3fff, 0xb123f581d2abef6cULL),
1014       make_floatx80(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1015     { make_floatx80(0x3ffd, 0xffffffffffff1c23ULL),
1016       make_floatx80(0x3fff, 0xb504f333f9de2cadULL),
1017       make_floatx80(0x3ffd, 0xd413cccfe778b2b4ULL) },
1018     { make_floatx80(0x3ffe, 0x8800000000006344ULL),
1019       make_floatx80(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1020       make_floatx80(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1021     { make_floatx80(0x3ffe, 0x9000000000005d67ULL),
1022       make_floatx80(0x3fff, 0xbd08a39f580c668dULL),
1023       make_floatx80(0x3ffd, 0xf4228e7d60319a34ULL) },
1024     { make_floatx80(0x3ffe, 0x9800000000009127ULL),
1025       make_floatx80(0x3fff, 0xc12c4cca6670e042ULL),
1026       make_floatx80(0x3ffe, 0x82589994cce1c084ULL) },
1027     { make_floatx80(0x3ffe, 0x9fffffffffff06f9ULL),
1028       make_floatx80(0x3fff, 0xc5672a11550655c3ULL),
1029       make_floatx80(0x3ffe, 0x8ace5422aa0cab86ULL) },
1030     { make_floatx80(0x3ffe, 0xa7fffffffffff80dULL),
1031       make_floatx80(0x3fff, 0xc9b9bd866e2f234bULL),
1032       make_floatx80(0x3ffe, 0x93737b0cdc5e4696ULL) },
1033     { make_floatx80(0x3ffe, 0xafffffffffff1470ULL),
1034       make_floatx80(0x3fff, 0xce248c151f83fd69ULL),
1035       make_floatx80(0x3ffe, 0x9c49182a3f07fad2ULL) },
1036     { make_floatx80(0x3ffe, 0xb800000000000e0aULL),
1037       make_floatx80(0x3fff, 0xd2a81d91f12aec5cULL),
1038       make_floatx80(0x3ffe, 0xa5503b23e255d8b8ULL) },
1039     { make_floatx80(0x3ffe, 0xc00000000000b7faULL),
1040       make_floatx80(0x3fff, 0xd744fccad69dd630ULL),
1041       make_floatx80(0x3ffe, 0xae89f995ad3bac60ULL) },
1042     { make_floatx80(0x3ffe, 0xc800000000003aa6ULL),
1043       make_floatx80(0x3fff, 0xdbfbb797daf25a44ULL),
1044       make_floatx80(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1045     { make_floatx80(0x3ffe, 0xd00000000000a6aeULL),
1046       make_floatx80(0x3fff, 0xe0ccdeec2a954685ULL),
1047       make_floatx80(0x3ffe, 0xc199bdd8552a8d0aULL) },
1048     { make_floatx80(0x3ffe, 0xd800000000004165ULL),
1049       make_floatx80(0x3fff, 0xe5b906e77c837155ULL),
1050       make_floatx80(0x3ffe, 0xcb720dcef906e2aaULL) },
1051     { make_floatx80(0x3ffe, 0xe00000000000582cULL),
1052       make_floatx80(0x3fff, 0xeac0c6e7dd24713aULL),
1053       make_floatx80(0x3ffe, 0xd5818dcfba48e274ULL) },
1054     { make_floatx80(0x3ffe, 0xe800000000001a5dULL),
1055       make_floatx80(0x3fff, 0xefe4b99bdcdb06ebULL),
1056       make_floatx80(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1057     { make_floatx80(0x3ffe, 0xefffffffffffc1efULL),
1058       make_floatx80(0x3fff, 0xf5257d152486a2faULL),
1059       make_floatx80(0x3ffe, 0xea4afa2a490d45f4ULL) },
1060     { make_floatx80(0x3ffe, 0xf800000000001069ULL),
1061       make_floatx80(0x3fff, 0xfa83b2db722a0e5cULL),
1062       make_floatx80(0x3ffe, 0xf50765b6e4541cb8ULL) },
1063     { make_floatx80(0x3fff, 0x8000000000000000ULL),
1064       make_floatx80(0x4000, 0x8000000000000000ULL),
1065       make_floatx80(0x3fff, 0x8000000000000000ULL) },
1066 };
1067
1068 void helper_f2xm1(CPUX86State *env)
1069 {
1070     uint8_t old_flags = save_exception_flags(env);
1071     uint64_t sig = extractFloatx80Frac(ST0);
1072     int32_t exp = extractFloatx80Exp(ST0);
1073     bool sign = extractFloatx80Sign(ST0);
1074
1075     if (floatx80_invalid_encoding(ST0)) {
1076         float_raise(float_flag_invalid, &env->fp_status);
1077         ST0 = floatx80_default_nan(&env->fp_status);
1078     } else if (floatx80_is_any_nan(ST0)) {
1079         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1080             float_raise(float_flag_invalid, &env->fp_status);
1081             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1082         }
1083     } else if (exp > 0x3fff ||
1084                (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1085         /* Out of range for the instruction, treat as invalid.  */
1086         float_raise(float_flag_invalid, &env->fp_status);
1087         ST0 = floatx80_default_nan(&env->fp_status);
1088     } else if (exp == 0x3fff) {
1089         /* Argument 1 or -1, exact result 1 or -0.5.  */
1090         if (sign) {
1091             ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1092         }
1093     } else if (exp < 0x3fb0) {
1094         if (!floatx80_is_zero(ST0)) {
1095             /*
1096              * Multiplying the argument by an extra-precision version
1097              * of log(2) is sufficiently precise.  Zero arguments are
1098              * returned unchanged.
1099              */
1100             uint64_t sig0, sig1, sig2;
1101             if (exp == 0) {
1102                 normalizeFloatx80Subnormal(sig, &exp, &sig);
1103             }
1104             mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1105                             &sig2);
1106             /* This result is inexact.  */
1107             sig1 |= 1;
1108             ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1,
1109                                                 &env->fp_status);
1110         }
1111     } else {
1112         floatx80 tmp, y, accum;
1113         bool asign, bsign;
1114         int32_t n, aexp, bexp;
1115         uint64_t asig0, asig1, asig2, bsig0, bsig1;
1116         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1117         signed char save_prec = env->fp_status.floatx80_rounding_precision;
1118         env->fp_status.float_rounding_mode = float_round_nearest_even;
1119         env->fp_status.floatx80_rounding_precision = 80;
1120
1121         /* Find the nearest multiple of 1/32 to the argument.  */
1122         tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1123         n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1124         y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1125
1126         if (floatx80_is_zero(y)) {
1127             /*
1128              * Use the value of 2^t - 1 from the table, to avoid
1129              * needing to special-case zero as a result of
1130              * multiplication below.
1131              */
1132             ST0 = f2xm1_table[n].t;
1133             set_float_exception_flags(float_flag_inexact, &env->fp_status);
1134             env->fp_status.float_rounding_mode = save_mode;
1135         } else {
1136             /*
1137              * Compute the lower parts of a polynomial expansion for
1138              * (2^y - 1) / y.
1139              */
1140             accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1141             accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1142             accum = floatx80_mul(accum, y, &env->fp_status);
1143             accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1144             accum = floatx80_mul(accum, y, &env->fp_status);
1145             accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1146             accum = floatx80_mul(accum, y, &env->fp_status);
1147             accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1148             accum = floatx80_mul(accum, y, &env->fp_status);
1149             accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1150             accum = floatx80_mul(accum, y, &env->fp_status);
1151             accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1152             accum = floatx80_mul(accum, y, &env->fp_status);
1153             accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1154
1155             /*
1156              * The full polynomial expansion is f2xm1_coeff_0 + accum
1157              * (where accum has much lower magnitude, and so, in
1158              * particular, carry out of the addition is not possible).
1159              * (This expansion is only accurate to about 70 bits, not
1160              * 128 bits.)
1161              */
1162             aexp = extractFloatx80Exp(f2xm1_coeff_0);
1163             asign = extractFloatx80Sign(f2xm1_coeff_0);
1164             shift128RightJamming(extractFloatx80Frac(accum), 0,
1165                                  aexp - extractFloatx80Exp(accum),
1166                                  &asig0, &asig1);
1167             bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1168             bsig1 = 0;
1169             if (asign == extractFloatx80Sign(accum)) {
1170                 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1171             } else {
1172                 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1173             }
1174             /* And thus compute an approximation to 2^y - 1.  */
1175             mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1176                             &asig0, &asig1, &asig2);
1177             aexp += extractFloatx80Exp(y) - 0x3ffe;
1178             asign ^= extractFloatx80Sign(y);
1179             if (n != 32) {
1180                 /*
1181                  * Multiply this by the precomputed value of 2^t and
1182                  * add that of 2^t - 1.
1183                  */
1184                 mul128By64To192(asig0, asig1,
1185                                 extractFloatx80Frac(f2xm1_table[n].exp2),
1186                                 &asig0, &asig1, &asig2);
1187                 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1188                 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1189                 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1190                 bsig1 = 0;
1191                 if (bexp < aexp) {
1192                     shift128RightJamming(bsig0, bsig1, aexp - bexp,
1193                                          &bsig0, &bsig1);
1194                 } else if (aexp < bexp) {
1195                     shift128RightJamming(asig0, asig1, bexp - aexp,
1196                                          &asig0, &asig1);
1197                     aexp = bexp;
1198                 }
1199                 /* The sign of 2^t - 1 is always that of the result.  */
1200                 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1201                 if (asign == bsign) {
1202                     /* Avoid possible carry out of the addition.  */
1203                     shift128RightJamming(asig0, asig1, 1,
1204                                          &asig0, &asig1);
1205                     shift128RightJamming(bsig0, bsig1, 1,
1206                                          &bsig0, &bsig1);
1207                     ++aexp;
1208                     add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1209                 } else {
1210                     sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1211                     asign = bsign;
1212                 }
1213             }
1214             env->fp_status.float_rounding_mode = save_mode;
1215             /* This result is inexact.  */
1216             asig1 |= 1;
1217             ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1,
1218                                                 &env->fp_status);
1219         }
1220
1221         env->fp_status.floatx80_rounding_precision = save_prec;
1222     }
1223     merge_exception_flags(env, old_flags);
1224 }
1225
1226 void helper_fptan(CPUX86State *env)
1227 {
1228     double fptemp = floatx80_to_double(env, ST0);
1229
1230     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1231         env->fpus |= 0x400;
1232     } else {
1233         fptemp = tan(fptemp);
1234         ST0 = double_to_floatx80(env, fptemp);
1235         fpush(env);
1236         ST0 = floatx80_one;
1237         env->fpus &= ~0x400; /* C2 <-- 0 */
1238         /* the above code is for |arg| < 2**52 only */
1239     }
1240 }
1241
1242 void helper_fpatan(CPUX86State *env)
1243 {
1244     double fptemp, fpsrcop;
1245
1246     fpsrcop = floatx80_to_double(env, ST1);
1247     fptemp = floatx80_to_double(env, ST0);
1248     ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
1249     fpop(env);
1250 }
1251
1252 void helper_fxtract(CPUX86State *env)
1253 {
1254     uint8_t old_flags = save_exception_flags(env);
1255     CPU_LDoubleU temp;
1256
1257     temp.d = ST0;
1258
1259     if (floatx80_is_zero(ST0)) {
1260         /* Easy way to generate -inf and raising division by 0 exception */
1261         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1262                            &env->fp_status);
1263         fpush(env);
1264         ST0 = temp.d;
1265     } else if (floatx80_invalid_encoding(ST0)) {
1266         float_raise(float_flag_invalid, &env->fp_status);
1267         ST0 = floatx80_default_nan(&env->fp_status);
1268         fpush(env);
1269         ST0 = ST1;
1270     } else if (floatx80_is_any_nan(ST0)) {
1271         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1272             float_raise(float_flag_invalid, &env->fp_status);
1273             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1274         }
1275         fpush(env);
1276         ST0 = ST1;
1277     } else if (floatx80_is_infinity(ST0)) {
1278         fpush(env);
1279         ST0 = ST1;
1280         ST1 = floatx80_infinity;
1281     } else {
1282         int expdif;
1283
1284         if (EXPD(temp) == 0) {
1285             int shift = clz64(temp.l.lower);
1286             temp.l.lower <<= shift;
1287             expdif = 1 - EXPBIAS - shift;
1288             float_raise(float_flag_input_denormal, &env->fp_status);
1289         } else {
1290             expdif = EXPD(temp) - EXPBIAS;
1291         }
1292         /* DP exponent bias */
1293         ST0 = int32_to_floatx80(expdif, &env->fp_status);
1294         fpush(env);
1295         BIASEXPONENT(temp);
1296         ST0 = temp.d;
1297     }
1298     merge_exception_flags(env, old_flags);
1299 }
1300
1301 static void helper_fprem_common(CPUX86State *env, bool mod)
1302 {
1303     uint8_t old_flags = save_exception_flags(env);
1304     uint64_t quotient;
1305     CPU_LDoubleU temp0, temp1;
1306     int exp0, exp1, expdiff;
1307
1308     temp0.d = ST0;
1309     temp1.d = ST1;
1310     exp0 = EXPD(temp0);
1311     exp1 = EXPD(temp1);
1312
1313     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1314     if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1315         exp0 == 0x7fff || exp1 == 0x7fff ||
1316         floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1317         ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1318     } else {
1319         if (exp0 == 0) {
1320             exp0 = 1 - clz64(temp0.l.lower);
1321         }
1322         if (exp1 == 0) {
1323             exp1 = 1 - clz64(temp1.l.lower);
1324         }
1325         expdiff = exp0 - exp1;
1326         if (expdiff < 64) {
1327             ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1328             env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
1329             env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1330             env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
1331         } else {
1332             /*
1333              * Partial remainder.  This choice of how many bits to
1334              * process at once is specified in AMD instruction set
1335              * manuals, and empirically is followed by Intel
1336              * processors as well; it ensures that the final remainder
1337              * operation in a loop does produce the correct low three
1338              * bits of the quotient.  AMD manuals specify that the
1339              * flags other than C2 are cleared, and empirically Intel
1340              * processors clear them as well.
1341              */
1342             int n = 32 + (expdiff % 32);
1343             temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1344             ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1345             env->fpus |= 0x400;  /* C2 <-- 1 */
1346         }
1347     }
1348     merge_exception_flags(env, old_flags);
1349 }
1350
1351 void helper_fprem1(CPUX86State *env)
1352 {
1353     helper_fprem_common(env, false);
1354 }
1355
1356 void helper_fprem(CPUX86State *env)
1357 {
1358     helper_fprem_common(env, true);
1359 }
1360
1361 /* 128-bit significand of log2(e).  */
1362 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1363 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1364
1365 /*
1366  * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1367  * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1368  * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1369  * interval [sqrt(2)/2, sqrt(2)].
1370  */
1371 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1372 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1373 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1374 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1375 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1376 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1377 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1378 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1379 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1380 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1381 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1382
1383 /*
1384  * Compute an approximation of log2(1+arg), where 1+arg is in the
1385  * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
1386  * function is called, rounding precision is set to 80 and the
1387  * round-to-nearest mode is in effect.  arg must not be exactly zero,
1388  * and must not be so close to zero that underflow might occur.
1389  */
1390 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1391                                 uint64_t *sig0, uint64_t *sig1)
1392 {
1393     uint64_t arg0_sig = extractFloatx80Frac(arg);
1394     int32_t arg0_exp = extractFloatx80Exp(arg);
1395     bool arg0_sign = extractFloatx80Sign(arg);
1396     bool asign;
1397     int32_t dexp, texp, aexp;
1398     uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1399     uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1400     uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1401     floatx80 t2, accum;
1402
1403     /*
1404      * Compute an approximation of arg/(2+arg), with extra precision,
1405      * as the argument to a polynomial approximation.  The extra
1406      * precision is only needed for the first term of the
1407      * approximation, with subsequent terms being significantly
1408      * smaller; the approximation only uses odd exponents, and the
1409      * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1410      */
1411     if (arg0_sign) {
1412         dexp = 0x3fff;
1413         shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1414         sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1415     } else {
1416         dexp = 0x4000;
1417         shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1418         dsig0 |= 0x8000000000000000ULL;
1419     }
1420     texp = arg0_exp - dexp + 0x3ffe;
1421     rsig0 = arg0_sig;
1422     rsig1 = 0;
1423     rsig2 = 0;
1424     if (dsig0 <= rsig0) {
1425         shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1426         ++texp;
1427     }
1428     tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1429     mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1430     sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1431            &rsig0, &rsig1, &rsig2);
1432     while ((int64_t) rsig0 < 0) {
1433         --tsig0;
1434         add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1435                &rsig0, &rsig1, &rsig2);
1436     }
1437     tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1438     /*
1439      * No need to correct any estimation error in tsig1; even with
1440      * such error, it is accurate enough.  Now compute the square of
1441      * that approximation.
1442      */
1443     mul128To256(tsig0, tsig1, tsig0, tsig1,
1444                 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1445     t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
1446                                        t2sig0, t2sig1, &env->fp_status);
1447
1448     /* Compute the lower parts of the polynomial expansion.  */
1449     accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1450     accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1451     accum = floatx80_mul(accum, t2, &env->fp_status);
1452     accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1453     accum = floatx80_mul(accum, t2, &env->fp_status);
1454     accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1455     accum = floatx80_mul(accum, t2, &env->fp_status);
1456     accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1457     accum = floatx80_mul(accum, t2, &env->fp_status);
1458     accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1459     accum = floatx80_mul(accum, t2, &env->fp_status);
1460     accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1461     accum = floatx80_mul(accum, t2, &env->fp_status);
1462     accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1463     accum = floatx80_mul(accum, t2, &env->fp_status);
1464     accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1465     accum = floatx80_mul(accum, t2, &env->fp_status);
1466     accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
1467
1468     /*
1469      * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1470      * accum has much lower magnitude, and so, in particular, carry
1471      * out of the addition is not possible), multiplied by t.  (This
1472      * expansion is only accurate to about 70 bits, not 128 bits.)
1473      */
1474     aexp = extractFloatx80Exp(fyl2x_coeff_0);
1475     asign = extractFloatx80Sign(fyl2x_coeff_0);
1476     shift128RightJamming(extractFloatx80Frac(accum), 0,
1477                          aexp - extractFloatx80Exp(accum),
1478                          &asig0, &asig1);
1479     bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
1480     bsig1 = 0;
1481     if (asign == extractFloatx80Sign(accum)) {
1482         add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1483     } else {
1484         sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1485     }
1486     /* Multiply by t to compute the required result.  */
1487     mul128To256(asig0, asig1, tsig0, tsig1,
1488                 &asig0, &asig1, &asig2, &asig3);
1489     aexp += texp - 0x3ffe;
1490     *exp = aexp;
1491     *sig0 = asig0;
1492     *sig1 = asig1;
1493 }
1494
1495 void helper_fyl2xp1(CPUX86State *env)
1496 {
1497     uint8_t old_flags = save_exception_flags(env);
1498     uint64_t arg0_sig = extractFloatx80Frac(ST0);
1499     int32_t arg0_exp = extractFloatx80Exp(ST0);
1500     bool arg0_sign = extractFloatx80Sign(ST0);
1501     uint64_t arg1_sig = extractFloatx80Frac(ST1);
1502     int32_t arg1_exp = extractFloatx80Exp(ST1);
1503     bool arg1_sign = extractFloatx80Sign(ST1);
1504
1505     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1506         float_raise(float_flag_invalid, &env->fp_status);
1507         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1508     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1509         float_raise(float_flag_invalid, &env->fp_status);
1510         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1511     } else if (floatx80_invalid_encoding(ST0) ||
1512                floatx80_invalid_encoding(ST1)) {
1513         float_raise(float_flag_invalid, &env->fp_status);
1514         ST1 = floatx80_default_nan(&env->fp_status);
1515     } else if (floatx80_is_any_nan(ST0)) {
1516         ST1 = ST0;
1517     } else if (floatx80_is_any_nan(ST1)) {
1518         /* Pass this NaN through.  */
1519     } else if (arg0_exp > 0x3ffd ||
1520                (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
1521                                                   0x95f619980c4336f7ULL :
1522                                                   0xd413cccfe7799211ULL))) {
1523         /*
1524          * Out of range for the instruction (ST0 must have absolute
1525          * value less than 1 - sqrt(2)/2 = 0.292..., according to
1526          * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
1527          * to sqrt(2) - 1, which we allow here), treat as invalid.
1528          */
1529         float_raise(float_flag_invalid, &env->fp_status);
1530         ST1 = floatx80_default_nan(&env->fp_status);
1531     } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1532                arg1_exp == 0x7fff) {
1533         /*
1534          * One argument is zero, or multiplying by infinity; correct
1535          * result is exact and can be obtained by multiplying the
1536          * arguments.
1537          */
1538         ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
1539     } else if (arg0_exp < 0x3fb0) {
1540         /*
1541          * Multiplying both arguments and an extra-precision version
1542          * of log2(e) is sufficiently precise.
1543          */
1544         uint64_t sig0, sig1, sig2;
1545         int32_t exp;
1546         if (arg0_exp == 0) {
1547             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1548         }
1549         if (arg1_exp == 0) {
1550             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1551         }
1552         mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
1553                         &sig0, &sig1, &sig2);
1554         exp = arg0_exp + 1;
1555         mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
1556         exp += arg1_exp - 0x3ffe;
1557         /* This result is inexact.  */
1558         sig1 |= 1;
1559         ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp,
1560                                             sig0, sig1, &env->fp_status);
1561     } else {
1562         int32_t aexp;
1563         uint64_t asig0, asig1, asig2;
1564         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1565         signed char save_prec = env->fp_status.floatx80_rounding_precision;
1566         env->fp_status.float_rounding_mode = float_round_nearest_even;
1567         env->fp_status.floatx80_rounding_precision = 80;
1568
1569         helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
1570         /*
1571          * Multiply by the second argument to compute the required
1572          * result.
1573          */
1574         if (arg1_exp == 0) {
1575             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1576         }
1577         mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
1578         aexp += arg1_exp - 0x3ffe;
1579         /* This result is inexact.  */
1580         asig1 |= 1;
1581         env->fp_status.float_rounding_mode = save_mode;
1582         ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp,
1583                                             asig0, asig1, &env->fp_status);
1584         env->fp_status.floatx80_rounding_precision = save_prec;
1585     }
1586     fpop(env);
1587     merge_exception_flags(env, old_flags);
1588 }
1589
1590 void helper_fyl2x(CPUX86State *env)
1591 {
1592     uint8_t old_flags = save_exception_flags(env);
1593     uint64_t arg0_sig = extractFloatx80Frac(ST0);
1594     int32_t arg0_exp = extractFloatx80Exp(ST0);
1595     bool arg0_sign = extractFloatx80Sign(ST0);
1596     uint64_t arg1_sig = extractFloatx80Frac(ST1);
1597     int32_t arg1_exp = extractFloatx80Exp(ST1);
1598     bool arg1_sign = extractFloatx80Sign(ST1);
1599
1600     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1601         float_raise(float_flag_invalid, &env->fp_status);
1602         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1603     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1604         float_raise(float_flag_invalid, &env->fp_status);
1605         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1606     } else if (floatx80_invalid_encoding(ST0) ||
1607                floatx80_invalid_encoding(ST1)) {
1608         float_raise(float_flag_invalid, &env->fp_status);
1609         ST1 = floatx80_default_nan(&env->fp_status);
1610     } else if (floatx80_is_any_nan(ST0)) {
1611         ST1 = ST0;
1612     } else if (floatx80_is_any_nan(ST1)) {
1613         /* Pass this NaN through.  */
1614     } else if (arg0_sign && !floatx80_is_zero(ST0)) {
1615         float_raise(float_flag_invalid, &env->fp_status);
1616         ST1 = floatx80_default_nan(&env->fp_status);
1617     } else if (floatx80_is_infinity(ST1)) {
1618         FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
1619                                              &env->fp_status);
1620         switch (cmp) {
1621         case float_relation_less:
1622             ST1 = floatx80_chs(ST1);
1623             break;
1624         case float_relation_greater:
1625             /* Result is infinity of the same sign as ST1.  */
1626             break;
1627         default:
1628             float_raise(float_flag_invalid, &env->fp_status);
1629             ST1 = floatx80_default_nan(&env->fp_status);
1630             break;
1631         }
1632     } else if (floatx80_is_infinity(ST0)) {
1633         if (floatx80_is_zero(ST1)) {
1634             float_raise(float_flag_invalid, &env->fp_status);
1635             ST1 = floatx80_default_nan(&env->fp_status);
1636         } else if (arg1_sign) {
1637             ST1 = floatx80_chs(ST0);
1638         } else {
1639             ST1 = ST0;
1640         }
1641     } else if (floatx80_is_zero(ST0)) {
1642         if (floatx80_is_zero(ST1)) {
1643             float_raise(float_flag_invalid, &env->fp_status);
1644             ST1 = floatx80_default_nan(&env->fp_status);
1645         } else {
1646             /* Result is infinity with opposite sign to ST1.  */
1647             float_raise(float_flag_divbyzero, &env->fp_status);
1648             ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
1649                                 0x8000000000000000ULL);
1650         }
1651     } else if (floatx80_is_zero(ST1)) {
1652         if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
1653             ST1 = floatx80_chs(ST1);
1654         }
1655         /* Otherwise, ST1 is already the correct result.  */
1656     } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
1657         if (arg1_sign) {
1658             ST1 = floatx80_chs(floatx80_zero);
1659         } else {
1660             ST1 = floatx80_zero;
1661         }
1662     } else {
1663         int32_t int_exp;
1664         floatx80 arg0_m1;
1665         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1666         signed char save_prec = env->fp_status.floatx80_rounding_precision;
1667         env->fp_status.float_rounding_mode = float_round_nearest_even;
1668         env->fp_status.floatx80_rounding_precision = 80;
1669
1670         if (arg0_exp == 0) {
1671             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1672         }
1673         if (arg1_exp == 0) {
1674             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1675         }
1676         int_exp = arg0_exp - 0x3fff;
1677         if (arg0_sig > 0xb504f333f9de6484ULL) {
1678             ++int_exp;
1679         }
1680         arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
1681                                                &env->fp_status),
1682                                floatx80_one, &env->fp_status);
1683         if (floatx80_is_zero(arg0_m1)) {
1684             /* Exact power of 2; multiply by ST1.  */
1685             env->fp_status.float_rounding_mode = save_mode;
1686             ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
1687                                ST1, &env->fp_status);
1688         } else {
1689             bool asign = extractFloatx80Sign(arg0_m1);
1690             int32_t aexp;
1691             uint64_t asig0, asig1, asig2;
1692             helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
1693             if (int_exp != 0) {
1694                 bool isign = (int_exp < 0);
1695                 int32_t iexp;
1696                 uint64_t isig;
1697                 int shift;
1698                 int_exp = isign ? -int_exp : int_exp;
1699                 shift = clz32(int_exp) + 32;
1700                 isig = int_exp;
1701                 isig <<= shift;
1702                 iexp = 0x403e - shift;
1703                 shift128RightJamming(asig0, asig1, iexp - aexp,
1704                                      &asig0, &asig1);
1705                 if (asign == isign) {
1706                     add128(isig, 0, asig0, asig1, &asig0, &asig1);
1707                 } else {
1708                     sub128(isig, 0, asig0, asig1, &asig0, &asig1);
1709                 }
1710                 aexp = iexp;
1711                 asign = isign;
1712             }
1713             /*
1714              * Multiply by the second argument to compute the required
1715              * result.
1716              */
1717             if (arg1_exp == 0) {
1718                 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1719             }
1720             mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
1721             aexp += arg1_exp - 0x3ffe;
1722             /* This result is inexact.  */
1723             asig1 |= 1;
1724             env->fp_status.float_rounding_mode = save_mode;
1725             ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp,
1726                                                 asig0, asig1, &env->fp_status);
1727         }
1728
1729         env->fp_status.floatx80_rounding_precision = save_prec;
1730     }
1731     fpop(env);
1732     merge_exception_flags(env, old_flags);
1733 }
1734
1735 void helper_fsqrt(CPUX86State *env)
1736 {
1737     uint8_t old_flags = save_exception_flags(env);
1738     if (floatx80_is_neg(ST0)) {
1739         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
1740         env->fpus |= 0x400;
1741     }
1742     ST0 = floatx80_sqrt(ST0, &env->fp_status);
1743     merge_exception_flags(env, old_flags);
1744 }
1745
1746 void helper_fsincos(CPUX86State *env)
1747 {
1748     double fptemp = floatx80_to_double(env, ST0);
1749
1750     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1751         env->fpus |= 0x400;
1752     } else {
1753         ST0 = double_to_floatx80(env, sin(fptemp));
1754         fpush(env);
1755         ST0 = double_to_floatx80(env, cos(fptemp));
1756         env->fpus &= ~0x400;  /* C2 <-- 0 */
1757         /* the above code is for |arg| < 2**63 only */
1758     }
1759 }
1760
1761 void helper_frndint(CPUX86State *env)
1762 {
1763     uint8_t old_flags = save_exception_flags(env);
1764     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
1765     merge_exception_flags(env, old_flags);
1766 }
1767
1768 void helper_fscale(CPUX86State *env)
1769 {
1770     uint8_t old_flags = save_exception_flags(env);
1771     if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
1772         float_raise(float_flag_invalid, &env->fp_status);
1773         ST0 = floatx80_default_nan(&env->fp_status);
1774     } else if (floatx80_is_any_nan(ST1)) {
1775         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1776             float_raise(float_flag_invalid, &env->fp_status);
1777         }
1778         ST0 = ST1;
1779         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1780             float_raise(float_flag_invalid, &env->fp_status);
1781             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1782         }
1783     } else if (floatx80_is_infinity(ST1) &&
1784                !floatx80_invalid_encoding(ST0) &&
1785                !floatx80_is_any_nan(ST0)) {
1786         if (floatx80_is_neg(ST1)) {
1787             if (floatx80_is_infinity(ST0)) {
1788                 float_raise(float_flag_invalid, &env->fp_status);
1789                 ST0 = floatx80_default_nan(&env->fp_status);
1790             } else {
1791                 ST0 = (floatx80_is_neg(ST0) ?
1792                        floatx80_chs(floatx80_zero) :
1793                        floatx80_zero);
1794             }
1795         } else {
1796             if (floatx80_is_zero(ST0)) {
1797                 float_raise(float_flag_invalid, &env->fp_status);
1798                 ST0 = floatx80_default_nan(&env->fp_status);
1799             } else {
1800                 ST0 = (floatx80_is_neg(ST0) ?
1801                        floatx80_chs(floatx80_infinity) :
1802                        floatx80_infinity);
1803             }
1804         }
1805     } else {
1806         int n;
1807         signed char save = env->fp_status.floatx80_rounding_precision;
1808         uint8_t save_flags = get_float_exception_flags(&env->fp_status);
1809         set_float_exception_flags(0, &env->fp_status);
1810         n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
1811         set_float_exception_flags(save_flags, &env->fp_status);
1812         env->fp_status.floatx80_rounding_precision = 80;
1813         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
1814         env->fp_status.floatx80_rounding_precision = save;
1815     }
1816     merge_exception_flags(env, old_flags);
1817 }
1818
1819 void helper_fsin(CPUX86State *env)
1820 {
1821     double fptemp = floatx80_to_double(env, ST0);
1822
1823     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1824         env->fpus |= 0x400;
1825     } else {
1826         ST0 = double_to_floatx80(env, sin(fptemp));
1827         env->fpus &= ~0x400;  /* C2 <-- 0 */
1828         /* the above code is for |arg| < 2**53 only */
1829     }
1830 }
1831
1832 void helper_fcos(CPUX86State *env)
1833 {
1834     double fptemp = floatx80_to_double(env, ST0);
1835
1836     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1837         env->fpus |= 0x400;
1838     } else {
1839         ST0 = double_to_floatx80(env, cos(fptemp));
1840         env->fpus &= ~0x400;  /* C2 <-- 0 */
1841         /* the above code is for |arg| < 2**63 only */
1842     }
1843 }
1844
1845 void helper_fxam_ST0(CPUX86State *env)
1846 {
1847     CPU_LDoubleU temp;
1848     int expdif;
1849
1850     temp.d = ST0;
1851
1852     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1853     if (SIGND(temp)) {
1854         env->fpus |= 0x200; /* C1 <-- 1 */
1855     }
1856
1857     if (env->fptags[env->fpstt]) {
1858         env->fpus |= 0x4100; /* Empty */
1859         return;
1860     }
1861
1862     expdif = EXPD(temp);
1863     if (expdif == MAXEXPD) {
1864         if (MANTD(temp) == 0x8000000000000000ULL) {
1865             env->fpus |= 0x500; /* Infinity */
1866         } else if (MANTD(temp) & 0x8000000000000000ULL) {
1867             env->fpus |= 0x100; /* NaN */
1868         }
1869     } else if (expdif == 0) {
1870         if (MANTD(temp) == 0) {
1871             env->fpus |=  0x4000; /* Zero */
1872         } else {
1873             env->fpus |= 0x4400; /* Denormal */
1874         }
1875     } else if (MANTD(temp) & 0x8000000000000000ULL) {
1876         env->fpus |= 0x400;
1877     }
1878 }
1879
1880 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1881                       uintptr_t retaddr)
1882 {
1883     int fpus, fptag, exp, i;
1884     uint64_t mant;
1885     CPU_LDoubleU tmp;
1886
1887     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1888     fptag = 0;
1889     for (i = 7; i >= 0; i--) {
1890         fptag <<= 2;
1891         if (env->fptags[i]) {
1892             fptag |= 3;
1893         } else {
1894             tmp.d = env->fpregs[i].d;
1895             exp = EXPD(tmp);
1896             mant = MANTD(tmp);
1897             if (exp == 0 && mant == 0) {
1898                 /* zero */
1899                 fptag |= 1;
1900             } else if (exp == 0 || exp == MAXEXPD
1901                        || (mant & (1LL << 63)) == 0) {
1902                 /* NaNs, infinity, denormal */
1903                 fptag |= 2;
1904             }
1905         }
1906     }
1907     if (data32) {
1908         /* 32 bit */
1909         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1910         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1911         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1912         cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1913         cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1914         cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1915         cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1916     } else {
1917         /* 16 bit */
1918         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1919         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1920         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1921         cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1922         cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1923         cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1924         cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1925     }
1926 }
1927
1928 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1929 {
1930     do_fstenv(env, ptr, data32, GETPC());
1931 }
1932
1933 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1934 {
1935     env->fpstt = (fpus >> 11) & 7;
1936     env->fpus = fpus & ~0x3800 & ~FPUS_B;
1937     env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1938 #if !defined(CONFIG_USER_ONLY)
1939     if (!(env->fpus & FPUS_SE)) {
1940         /*
1941          * Here the processor deasserts FERR#; in response, the chipset deasserts
1942          * IGNNE#.
1943          */
1944         cpu_clear_ignne();
1945     }
1946 #endif
1947 }
1948
1949 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1950                       uintptr_t retaddr)
1951 {
1952     int i, fpus, fptag;
1953
1954     if (data32) {
1955         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1956         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1957         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1958     } else {
1959         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1960         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1961         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1962     }
1963     cpu_set_fpus(env, fpus);
1964     for (i = 0; i < 8; i++) {
1965         env->fptags[i] = ((fptag & 3) == 3);
1966         fptag >>= 2;
1967     }
1968 }
1969
1970 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1971 {
1972     do_fldenv(env, ptr, data32, GETPC());
1973 }
1974
1975 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1976 {
1977     floatx80 tmp;
1978     int i;
1979
1980     do_fstenv(env, ptr, data32, GETPC());
1981
1982     ptr += (14 << data32);
1983     for (i = 0; i < 8; i++) {
1984         tmp = ST(i);
1985         helper_fstt(env, tmp, ptr, GETPC());
1986         ptr += 10;
1987     }
1988
1989     /* fninit */
1990     env->fpus = 0;
1991     env->fpstt = 0;
1992     cpu_set_fpuc(env, 0x37f);
1993     env->fptags[0] = 1;
1994     env->fptags[1] = 1;
1995     env->fptags[2] = 1;
1996     env->fptags[3] = 1;
1997     env->fptags[4] = 1;
1998     env->fptags[5] = 1;
1999     env->fptags[6] = 1;
2000     env->fptags[7] = 1;
2001 }
2002
2003 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2004 {
2005     floatx80 tmp;
2006     int i;
2007
2008     do_fldenv(env, ptr, data32, GETPC());
2009     ptr += (14 << data32);
2010
2011     for (i = 0; i < 8; i++) {
2012         tmp = helper_fldt(env, ptr, GETPC());
2013         ST(i) = tmp;
2014         ptr += 10;
2015     }
2016 }
2017
2018 #if defined(CONFIG_USER_ONLY)
2019 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
2020 {
2021     helper_fsave(env, ptr, data32);
2022 }
2023
2024 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
2025 {
2026     helper_frstor(env, ptr, data32);
2027 }
2028 #endif
2029
2030 #define XO(X)  offsetof(X86XSaveArea, X)
2031
2032 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2033 {
2034     int fpus, fptag, i;
2035     target_ulong addr;
2036
2037     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2038     fptag = 0;
2039     for (i = 0; i < 8; i++) {
2040         fptag |= (env->fptags[i] << i);
2041     }
2042
2043     cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
2044     cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
2045     cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
2046
2047     /* In 32-bit mode this is eip, sel, dp, sel.
2048        In 64-bit mode this is rip, rdp.
2049        But in either case we don't write actual data, just zeros.  */
2050     cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
2051     cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
2052
2053     addr = ptr + XO(legacy.fpregs);
2054     for (i = 0; i < 8; i++) {
2055         floatx80 tmp = ST(i);
2056         helper_fstt(env, tmp, addr, ra);
2057         addr += 16;
2058     }
2059 }
2060
2061 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2062 {
2063     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
2064     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
2065 }
2066
2067 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2068 {
2069     int i, nb_xmm_regs;
2070     target_ulong addr;
2071
2072     if (env->hflags & HF_CS64_MASK) {
2073         nb_xmm_regs = 16;
2074     } else {
2075         nb_xmm_regs = 8;
2076     }
2077
2078     addr = ptr + XO(legacy.xmm_regs);
2079     for (i = 0; i < nb_xmm_regs; i++) {
2080         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
2081         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
2082         addr += 16;
2083     }
2084 }
2085
2086 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2087 {
2088     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2089     int i;
2090
2091     for (i = 0; i < 4; i++, addr += 16) {
2092         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
2093         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
2094     }
2095 }
2096
2097 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2098 {
2099     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2100                     env->bndcs_regs.cfgu, ra);
2101     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2102                     env->bndcs_regs.sts, ra);
2103 }
2104
2105 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2106 {
2107     cpu_stq_data_ra(env, ptr, env->pkru, ra);
2108 }
2109
2110 void helper_fxsave(CPUX86State *env, target_ulong ptr)
2111 {
2112     uintptr_t ra = GETPC();
2113
2114     /* The operand must be 16 byte aligned */
2115     if (ptr & 0xf) {
2116         raise_exception_ra(env, EXCP0D_GPF, ra);
2117     }
2118
2119     do_xsave_fpu(env, ptr, ra);
2120
2121     if (env->cr[4] & CR4_OSFXSR_MASK) {
2122         do_xsave_mxcsr(env, ptr, ra);
2123         /* Fast FXSAVE leaves out the XMM registers */
2124         if (!(env->efer & MSR_EFER_FFXSR)
2125             || (env->hflags & HF_CPL_MASK)
2126             || !(env->hflags & HF_LMA_MASK)) {
2127             do_xsave_sse(env, ptr, ra);
2128         }
2129     }
2130 }
2131
2132 static uint64_t get_xinuse(CPUX86State *env)
2133 {
2134     uint64_t inuse = -1;
2135
2136     /* For the most part, we don't track XINUSE.  We could calculate it
2137        here for all components, but it's probably less work to simply
2138        indicate in use.  That said, the state of BNDREGS is important
2139        enough to track in HFLAGS, so we might as well use that here.  */
2140     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2141        inuse &= ~XSTATE_BNDREGS_MASK;
2142     }
2143     return inuse;
2144 }
2145
2146 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2147                      uint64_t inuse, uint64_t opt, uintptr_t ra)
2148 {
2149     uint64_t old_bv, new_bv;
2150
2151     /* The OS must have enabled XSAVE.  */
2152     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2153         raise_exception_ra(env, EXCP06_ILLOP, ra);
2154     }
2155
2156     /* The operand must be 64 byte aligned.  */
2157     if (ptr & 63) {
2158         raise_exception_ra(env, EXCP0D_GPF, ra);
2159     }
2160
2161     /* Never save anything not enabled by XCR0.  */
2162     rfbm &= env->xcr0;
2163     opt &= rfbm;
2164
2165     if (opt & XSTATE_FP_MASK) {
2166         do_xsave_fpu(env, ptr, ra);
2167     }
2168     if (rfbm & XSTATE_SSE_MASK) {
2169         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
2170         do_xsave_mxcsr(env, ptr, ra);
2171     }
2172     if (opt & XSTATE_SSE_MASK) {
2173         do_xsave_sse(env, ptr, ra);
2174     }
2175     if (opt & XSTATE_BNDREGS_MASK) {
2176         do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
2177     }
2178     if (opt & XSTATE_BNDCSR_MASK) {
2179         do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
2180     }
2181     if (opt & XSTATE_PKRU_MASK) {
2182         do_xsave_pkru(env, ptr + XO(pkru_state), ra);
2183     }
2184
2185     /* Update the XSTATE_BV field.  */
2186     old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2187     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2188     cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
2189 }
2190
2191 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2192 {
2193     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
2194 }
2195
2196 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2197 {
2198     uint64_t inuse = get_xinuse(env);
2199     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2200 }
2201
2202 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2203 {
2204     int i, fpuc, fpus, fptag;
2205     target_ulong addr;
2206
2207     fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
2208     fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
2209     fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
2210     cpu_set_fpuc(env, fpuc);
2211     cpu_set_fpus(env, fpus);
2212     fptag ^= 0xff;
2213     for (i = 0; i < 8; i++) {
2214         env->fptags[i] = ((fptag >> i) & 1);
2215     }
2216
2217     addr = ptr + XO(legacy.fpregs);
2218     for (i = 0; i < 8; i++) {
2219         floatx80 tmp = helper_fldt(env, addr, ra);
2220         ST(i) = tmp;
2221         addr += 16;
2222     }
2223 }
2224
2225 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2226 {
2227     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
2228 }
2229
2230 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2231 {
2232     int i, nb_xmm_regs;
2233     target_ulong addr;
2234
2235     if (env->hflags & HF_CS64_MASK) {
2236         nb_xmm_regs = 16;
2237     } else {
2238         nb_xmm_regs = 8;
2239     }
2240
2241     addr = ptr + XO(legacy.xmm_regs);
2242     for (i = 0; i < nb_xmm_regs; i++) {
2243         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
2244         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
2245         addr += 16;
2246     }
2247 }
2248
2249 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2250 {
2251     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2252     int i;
2253
2254     for (i = 0; i < 4; i++, addr += 16) {
2255         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
2256         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
2257     }
2258 }
2259
2260 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2261 {
2262     /* FIXME: Extend highest implemented bit of linear address.  */
2263     env->bndcs_regs.cfgu
2264         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
2265     env->bndcs_regs.sts
2266         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
2267 }
2268
2269 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2270 {
2271     env->pkru = cpu_ldq_data_ra(env, ptr, ra);
2272 }
2273
2274 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2275 {
2276     uintptr_t ra = GETPC();
2277
2278     /* The operand must be 16 byte aligned */
2279     if (ptr & 0xf) {
2280         raise_exception_ra(env, EXCP0D_GPF, ra);
2281     }
2282
2283     do_xrstor_fpu(env, ptr, ra);
2284
2285     if (env->cr[4] & CR4_OSFXSR_MASK) {
2286         do_xrstor_mxcsr(env, ptr, ra);
2287         /* Fast FXRSTOR leaves out the XMM registers */
2288         if (!(env->efer & MSR_EFER_FFXSR)
2289             || (env->hflags & HF_CPL_MASK)
2290             || !(env->hflags & HF_LMA_MASK)) {
2291             do_xrstor_sse(env, ptr, ra);
2292         }
2293     }
2294 }
2295
2296 #if defined(CONFIG_USER_ONLY)
2297 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2298 {
2299     helper_fxsave(env, ptr);
2300 }
2301
2302 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2303 {
2304     helper_fxrstor(env, ptr);
2305 }
2306 #endif
2307
2308 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2309 {
2310     uintptr_t ra = GETPC();
2311     uint64_t xstate_bv, xcomp_bv, reserve0;
2312
2313     rfbm &= env->xcr0;
2314
2315     /* The OS must have enabled XSAVE.  */
2316     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2317         raise_exception_ra(env, EXCP06_ILLOP, ra);
2318     }
2319
2320     /* The operand must be 64 byte aligned.  */
2321     if (ptr & 63) {
2322         raise_exception_ra(env, EXCP0D_GPF, ra);
2323     }
2324
2325     xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2326
2327     if ((int64_t)xstate_bv < 0) {
2328         /* FIXME: Compact form.  */
2329         raise_exception_ra(env, EXCP0D_GPF, ra);
2330     }
2331
2332     /* Standard form.  */
2333
2334     /* The XSTATE_BV field must not set bits not present in XCR0.  */
2335     if (xstate_bv & ~env->xcr0) {
2336         raise_exception_ra(env, EXCP0D_GPF, ra);
2337     }
2338
2339     /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
2340        revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2341        describes only XCOMP_BV, but the description of the standard form
2342        of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2343        includes the next 64-bit field.  */
2344     xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2345     reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2346     if (xcomp_bv || reserve0) {
2347         raise_exception_ra(env, EXCP0D_GPF, ra);
2348     }
2349
2350     if (rfbm & XSTATE_FP_MASK) {
2351         if (xstate_bv & XSTATE_FP_MASK) {
2352             do_xrstor_fpu(env, ptr, ra);
2353         } else {
2354             helper_fninit(env);
2355             memset(env->fpregs, 0, sizeof(env->fpregs));
2356         }
2357     }
2358     if (rfbm & XSTATE_SSE_MASK) {
2359         /* Note that the standard form of XRSTOR loads MXCSR from memory
2360            whether or not the XSTATE_BV bit is set.  */
2361         do_xrstor_mxcsr(env, ptr, ra);
2362         if (xstate_bv & XSTATE_SSE_MASK) {
2363             do_xrstor_sse(env, ptr, ra);
2364         } else {
2365             /* ??? When AVX is implemented, we may have to be more
2366                selective in the clearing.  */
2367             memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
2368         }
2369     }
2370     if (rfbm & XSTATE_BNDREGS_MASK) {
2371         if (xstate_bv & XSTATE_BNDREGS_MASK) {
2372             do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2373             env->hflags |= HF_MPX_IU_MASK;
2374         } else {
2375             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2376             env->hflags &= ~HF_MPX_IU_MASK;
2377         }
2378     }
2379     if (rfbm & XSTATE_BNDCSR_MASK) {
2380         if (xstate_bv & XSTATE_BNDCSR_MASK) {
2381             do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2382         } else {
2383             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2384         }
2385         cpu_sync_bndcs_hflags(env);
2386     }
2387     if (rfbm & XSTATE_PKRU_MASK) {
2388         uint64_t old_pkru = env->pkru;
2389         if (xstate_bv & XSTATE_PKRU_MASK) {
2390             do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2391         } else {
2392             env->pkru = 0;
2393         }
2394         if (env->pkru != old_pkru) {
2395             CPUState *cs = env_cpu(env);
2396             tlb_flush(cs);
2397         }
2398     }
2399 }
2400
2401 #undef XO
2402
2403 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2404 {
2405     /* The OS must have enabled XSAVE.  */
2406     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2407         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2408     }
2409
2410     switch (ecx) {
2411     case 0:
2412         return env->xcr0;
2413     case 1:
2414         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2415             return env->xcr0 & get_xinuse(env);
2416         }
2417         break;
2418     }
2419     raise_exception_ra(env, EXCP0D_GPF, GETPC());
2420 }
2421
2422 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
2423 {
2424     uint32_t dummy, ena_lo, ena_hi;
2425     uint64_t ena;
2426
2427     /* The OS must have enabled XSAVE.  */
2428     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2429         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2430     }
2431
2432     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
2433     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
2434         goto do_gpf;
2435     }
2436
2437     /* Disallow enabling unimplemented features.  */
2438     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
2439     ena = ((uint64_t)ena_hi << 32) | ena_lo;
2440     if (mask & ~ena) {
2441         goto do_gpf;
2442     }
2443
2444     /* Disallow enabling only half of MPX.  */
2445     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
2446         & XSTATE_BNDCSR_MASK) {
2447         goto do_gpf;
2448     }
2449
2450     env->xcr0 = mask;
2451     cpu_sync_bndcs_hflags(env);
2452     return;
2453
2454  do_gpf:
2455     raise_exception_ra(env, EXCP0D_GPF, GETPC());
2456 }
2457
2458 /* MMX/SSE */
2459 /* XXX: optimize by storing fptt and fptags in the static cpu state */
2460
2461 #define SSE_DAZ             0x0040
2462 #define SSE_RC_MASK         0x6000
2463 #define SSE_RC_NEAR         0x0000
2464 #define SSE_RC_DOWN         0x2000
2465 #define SSE_RC_UP           0x4000
2466 #define SSE_RC_CHOP         0x6000
2467 #define SSE_FZ              0x8000
2468
2469 void update_mxcsr_status(CPUX86State *env)
2470 {
2471     uint32_t mxcsr = env->mxcsr;
2472     int rnd_type;
2473
2474     /* set rounding mode */
2475     switch (mxcsr & SSE_RC_MASK) {
2476     default:
2477     case SSE_RC_NEAR:
2478         rnd_type = float_round_nearest_even;
2479         break;
2480     case SSE_RC_DOWN:
2481         rnd_type = float_round_down;
2482         break;
2483     case SSE_RC_UP:
2484         rnd_type = float_round_up;
2485         break;
2486     case SSE_RC_CHOP:
2487         rnd_type = float_round_to_zero;
2488         break;
2489     }
2490     set_float_rounding_mode(rnd_type, &env->sse_status);
2491
2492     /* set denormals are zero */
2493     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
2494
2495     /* set flush to zero */
2496     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
2497 }
2498
2499 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
2500 {
2501     cpu_set_mxcsr(env, val);
2502 }
2503
2504 void helper_enter_mmx(CPUX86State *env)
2505 {
2506     env->fpstt = 0;
2507     *(uint32_t *)(env->fptags) = 0;
2508     *(uint32_t *)(env->fptags + 4) = 0;
2509 }
2510
2511 void helper_emms(CPUX86State *env)
2512 {
2513     /* set to empty state */
2514     *(uint32_t *)(env->fptags) = 0x01010101;
2515     *(uint32_t *)(env->fptags + 4) = 0x01010101;
2516 }
2517
2518 /* XXX: suppress */
2519 void helper_movq(CPUX86State *env, void *d, void *s)
2520 {
2521     *(uint64_t *)d = *(uint64_t *)s;
2522 }
2523
2524 #define SHIFT 0
2525 #include "ops_sse.h"
2526
2527 #define SHIFT 1
2528 #include "ops_sse.h"