dlls/msvcrt/math.c

   1 /*
   2  * msvcrt.dll math functions
   3  *
   4  * Copyright 2000 Jon Griffiths
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  *
  20  *
  21  * For functions copied from musl libc (http://musl.libc.org/):
  22  * ====================================================
  23  * Copyright 2005-2020 Rich Felker, et al.
  24  *
  25  * Permission is hereby granted, free of charge, to any person obtaining
  26  * a copy of this software and associated documentation files (the
  27  * "Software"), to deal in the Software without restriction, including
  28  * without limitation the rights to use, copy, modify, merge, publish,
  29  * distribute, sublicense, and/or sell copies of the Software, and to
  30  * permit persons to whom the Software is furnished to do so, subject to
  31  * the following conditions:
  32  *
  33  * The above copyright notice and this permission notice shall be
  34  * included in all copies or substantial portions of the Software.
  35  * ====================================================
  36  */
  37
  38 #include <complex.h>
  39 #include <stdio.h>
  40 #include <fenv.h>
  41 #include <fpieee.h>
  42 #include <limits.h>
  43 #include <locale.h>
  44 #include <math.h>
  45
  46 #include "msvcrt.h"
  47 #include "winternl.h"
  48
  49 #include "wine/asm.h"
  50 #include "wine/debug.h"
  51
  52 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
  53
  54 #undef div
  55 #undef ldiv
  56
  57 #define _DOMAIN         1       /* domain error in argument */
  58 #define _SING           2       /* singularity */
  59 #define _OVERFLOW       3       /* range overflow */
  60 #define _UNDERFLOW      4       /* range underflow */
  61
  62 typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
  63
  64 static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
  65
  66 BOOL sse2_supported;
  67 static BOOL sse2_enabled;
  68
  69 static const struct unix_funcs *unix_funcs;
  70
  71 void msvcrt_init_math( void *module )
  72 {
  73     sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
  74 #if _MSVCR_VER <=71
  75     sse2_enabled = FALSE;
  76 #else
  77     sse2_enabled = sse2_supported;
  78 #endif
  79     __wine_init_unix_lib( module, DLL_PROCESS_ATTACH, NULL, &unix_funcs );
  80 }
  81
  82 /* Copied from musl: src/internal/libm.h */
  83 static inline float fp_barrierf(float x)
  84 {
  85     volatile float y = x;
  86     return y;
  87 }
  88
  89 static inline double fp_barrier(double x)
  90 {
  91     volatile double y = x;
  92     return y;
  93 }
  94
  95 static inline double CDECL ret_nan( BOOL update_sw )
  96 {
  97     double x = 1.0;
  98     if (!update_sw) return -NAN;
  99     return (x - x) / (x - x);
 100 }
 101
 102 #define SET_X87_CW(MASK) \
 103     "subl $4, %esp\n\t" \
 104     __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
 105     "fnstcw (%esp)\n\t" \
 106     "movw (%esp), %ax\n\t" \
 107     "movw %ax, 2(%esp)\n\t" \
 108     "testw $" #MASK ", %ax\n\t" \
 109     "jz 1f\n\t" \
 110     "andw $~" #MASK ", %ax\n\t" \
 111     "movw %ax, 2(%esp)\n\t" \
 112     "fldcw 2(%esp)\n\t" \
 113     "1:\n\t"
 114
 115 #define RESET_X87_CW \
 116     "movw (%esp), %ax\n\t" \
 117     "cmpw %ax, 2(%esp)\n\t" \
 118     "je 1f\n\t" \
 119     "fstpl 8(%esp)\n\t" \
 120     "fldcw (%esp)\n\t" \
 121     "fldl 8(%esp)\n\t" \
 122     "fwait\n\t" \
 123     "1:\n\t" \
 124     "addl $4, %esp\n\t" \
 125     __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t")
 126
 127 /*********************************************************************
 128  *      _matherr (CRTDLL.@)
 129  */
 130 int CDECL _matherr(struct _exception *e)
 131 {
 132     return 0;
 133 }
 134
 135
 136 static double math_error(int type, const char *name, double arg1, double arg2, double retval)
 137 {
 138     struct _exception exception = {type, (char *)name, arg1, arg2, retval};
 139
 140     TRACE("(%d, %s, %g, %g, %g)\n", type, debugstr_a(name), arg1, arg2, retval);
 141
 142     if (MSVCRT_default_matherr_func && MSVCRT_default_matherr_func(&exception))
 143         return exception.retval;
 144
 145     switch (type)
 146     {
 147     case 0:
 148         /* don't set errno */
 149         break;
 150     case _DOMAIN:
 151         *_errno() = EDOM;
 152         break;
 153     case _SING:
 154     case _OVERFLOW:
 155         *_errno() = ERANGE;
 156         break;
 157     case _UNDERFLOW:
 158         /* don't set errno */
 159         break;
 160     default:
 161         ERR("Unhandled math error!\n");
 162     }
 163
 164     return exception.retval;
 165 }
 166
 167 /*********************************************************************
 168  *      __setusermatherr (MSVCRT.@)
 169  */
 170 void CDECL __setusermatherr(MSVCRT_matherr_func func)
 171 {
 172     MSVCRT_default_matherr_func = func;
 173     TRACE("new matherr handler %p\n", func);
 174 }
 175
 176 /*********************************************************************
 177  *      _set_SSE2_enable (MSVCRT.@)
 178  */
 179 int CDECL _set_SSE2_enable(int flag)
 180 {
 181     sse2_enabled = flag && sse2_supported;
 182     return sse2_enabled;
 183 }
 184
 185 #if defined(_WIN64)
 186 # if _MSVCR_VER>=140
 187 /*********************************************************************
 188  *      _get_FMA3_enable (UCRTBASE.@)
 189  */
 190 int CDECL _get_FMA3_enable(void)
 191 {
 192     FIXME("() stub\n");
 193     return 0;
 194 }
 195 # endif
 196
 197 # if _MSVCR_VER>=120
 198 /*********************************************************************
 199  *      _set_FMA3_enable (MSVCR120.@)
 200  */
 201 int CDECL _set_FMA3_enable(int flag)
 202 {
 203     FIXME("(%x) stub\n", flag);
 204     return 0;
 205 }
 206 # endif
 207 #endif
 208
 209 #if !defined(__i386__) || _MSVCR_VER>=120
 210
 211 /*********************************************************************
 212  *      _chgsignf (MSVCRT.@)
 213  */
 214 float CDECL _chgsignf( float num )
 215 {
 216     union { float f; UINT32 i; } u = { num };
 217     u.i ^= 0x80000000;
 218     return u.f;
 219 }
 220
 221 /*********************************************************************
 222  *      _copysignf (MSVCRT.@)
 223  *
 224  * Copied from musl: src/math/copysignf.c
 225  */
 226 float CDECL _copysignf( float x, float y )
 227 {
 228     union { float f; UINT32 i; } ux = { x }, uy = { y };
 229     ux.i &= 0x7fffffff;
 230     ux.i |= uy.i & 0x80000000;
 231     return ux.f;
 232 }
 233
 234 /*********************************************************************
 235  *      _nextafterf (MSVCRT.@)
 236  *
 237  * Copied from musl: src/math/nextafterf.c
 238  */
 239 float CDECL _nextafterf( float x, float y )
 240 {
 241     unsigned int ix = *(unsigned int*)&x;
 242     unsigned int iy = *(unsigned int*)&y;
 243     unsigned int ax, ay, e;
 244
 245     if (isnan(x) || isnan(y))
 246         return x + y;
 247     if (x == y) {
 248         if (_fpclassf(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
 249             *_errno() = ERANGE;
 250         return y;
 251     }
 252     ax = ix & 0x7fffffff;
 253     ay = iy & 0x7fffffff;
 254     if (ax == 0) {
 255         if (ay == 0)
 256             return y;
 257         ix = (iy & 0x80000000) | 1;
 258     } else if (ax > ay || ((ix ^ iy) & 0x80000000))
 259         ix--;
 260     else
 261         ix++;
 262     e = ix & 0x7f800000;
 263     /* raise overflow if ix is infinite and x is finite */
 264     if (e == 0x7f800000) {
 265         fp_barrierf(x + x);
 266         *_errno() = ERANGE;
 267     }
 268     /* raise underflow if ix is subnormal or zero */
 269     y = *(float*)&ix;
 270     if (e == 0) {
 271         fp_barrierf(x * x + y * y);
 272         *_errno() = ERANGE;
 273     }
 274     return y;
 275 }
 276
 277 /* Copied from musl: src/math/ilogbf.c */
 278 static int __ilogbf(float x)
 279 {
 280     union { float f; UINT32 i; } u = { x };
 281     int e = u.i >> 23 & 0xff;
 282
 283     if (!e)
 284     {
 285         u.i <<= 9;
 286         if (u.i == 0) return FP_ILOGB0;
 287         /* subnormal x */
 288         for (e = -0x7f; u.i >> 31 == 0; e--, u.i <<= 1);
 289         return e;
 290     }
 291     if (e == 0xff) return u.i << 9 ? FP_ILOGBNAN : INT_MAX;
 292     return e - 0x7f;
 293 }
 294
 295 /*********************************************************************
 296  *      _logbf (MSVCRT.@)
 297  *
 298  * Copied from musl: src/math/logbf.c
 299  */
 300 float CDECL _logbf(float x)
 301 {
 302     if (!isfinite(x))
 303         return x * x;
 304     if (x == 0) {
 305         *_errno() = ERANGE;
 306         return -1 / (x * x);
 307     }
 308     return __ilogbf(x);
 309 }
 310
 311 #endif
 312
 313 /* Copied from musl: src/math/scalbn.c */
 314 static double __scalbn(double x, int n)
 315 {
 316     union {double f; UINT64 i;} u;
 317     double y = x;
 318
 319     if (n > 1023) {
 320         y *= 0x1p1023;
 321         n -= 1023;
 322         if (n > 1023) {
 323             y *= 0x1p1023;
 324             n -= 1023;
 325             if (n > 1023)
 326                 n = 1023;
 327         }
 328     } else if (n < -1022) {
 329         /* make sure final n < -53 to avoid double
 330            rounding in the subnormal range */
 331         y *= 0x1p-1022 * 0x1p53;
 332         n += 1022 - 53;
 333         if (n < -1022) {
 334             y *= 0x1p-1022 * 0x1p53;
 335             n += 1022 - 53;
 336             if (n < -1022)
 337                 n = -1022;
 338         }
 339     }
 340     u.i = (UINT64)(0x3ff + n) << 52;
 341     x = y * u.f;
 342     return x;
 343 }
 344
 345 /* Copied from musl: src/math/__rem_pio2_large.c */
 346 static int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 347 {
 348     static const int init_jk[] = {3, 4};
 349     static const INT32 ipio2[] = {
 350         0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 351         0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 352         0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
 353         0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
 354         0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
 355         0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
 356         0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
 357         0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
 358         0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
 359         0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
 360         0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 361     };
 362     static const double PIo2[] = {
 363         1.57079625129699707031e+00,
 364         7.54978941586159635335e-08,
 365         5.39030252995776476554e-15,
 366         3.28200341580791294123e-22,
 367         1.27065575308067607349e-29,
 368         1.22933308981111328932e-36,
 369         2.73370053816464559624e-44,
 370         2.16741683877804819444e-51,
 371     };
 372
 373     INT32 jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
 374     double z, fw, f[20], fq[20] = {0}, q[20];
 375
 376     /* initialize jk*/
 377     jk = init_jk[prec];
 378     jp = jk;
 379
 380     /* determine jx,jv,q0, note that 3>q0 */
 381     jx = nx - 1;
 382     jv = (e0 - 3) / 24;
 383     if(jv < 0) jv = 0;
 384     q0 = e0 - 24 * (jv + 1);
 385
 386     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 387     j = jv - jx;
 388     m = jx + jk;
 389     for (i = 0; i <= m; i++, j++)
 390         f[i] = j < 0 ? 0.0 : (double)ipio2[j];
 391
 392     /* compute q[0],q[1],...q[jk] */
 393     for (i = 0; i <= jk; i++) {
 394         for (j = 0, fw = 0.0; j <= jx; j++)
 395             fw += x[j] * f[jx + i - j];
 396         q[i] = fw;
 397     }
 398
 399     jz = jk;
 400 recompute:
 401     /* distill q[] into iq[] reversingly */
 402     for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
 403         fw = (double)(INT32)(0x1p-24 * z);
 404         iq[i] = (INT32)(z - 0x1p24 * fw);
 405         z = q[j - 1] + fw;
 406     }
 407
 408     /* compute n */
 409     z = __scalbn(z, q0); /* actual value of z */
 410     z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
 411     n = (INT32)z;
 412     z -= (double)n;
 413     ih = 0;
 414     if (q0 > 0) {  /* need iq[jz-1] to determine n */
 415         i = iq[jz - 1] >> (24 - q0);
 416         n += i;
 417         iq[jz - 1] -= i << (24 - q0);
 418         ih = iq[jz - 1] >> (23 - q0);
 419     }
 420     else if (q0 == 0) ih = iq[jz - 1] >> 23;
 421     else if (z >= 0.5) ih = 2;
 422
 423     if (ih > 0) {  /* q > 0.5 */
 424         n += 1;
 425         carry = 0;
 426         for (i = 0; i < jz; i++) {  /* compute 1-q */
 427             j = iq[i];
 428             if (carry == 0) {
 429                 if (j != 0) {
 430                     carry = 1;
 431                     iq[i] = 0x1000000 - j;
 432                 }
 433             } else
 434                 iq[i] = 0xffffff - j;
 435         }
 436         if (q0 > 0) {  /* rare case: chance is 1 in 12 */
 437             switch(q0) {
 438             case 1:
 439                 iq[jz - 1] &= 0x7fffff;
 440                 break;
 441             case 2:
 442                 iq[jz - 1] &= 0x3fffff;
 443                 break;
 444             }
 445         }
 446         if (ih == 2) {
 447             z = 1.0 - z;
 448             if (carry != 0)
 449                 z -= __scalbn(1.0, q0);
 450         }
 451     }
 452
 453     /* check if recomputation is needed */
 454     if (z == 0.0) {
 455         j = 0;
 456         for (i = jz - 1; i >= jk; i--) j |= iq[i];
 457         if (j == 0) {  /* need recomputation */
 458             for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
 459
 460             for (i = jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
 461                 f[jx + i] = (double)ipio2[jv + i];
 462                 for (j = 0, fw = 0.0; j <= jx; j++)
 463                     fw += x[j] * f[jx + i - j];
 464                 q[i] = fw;
 465             }
 466             jz += k;
 467             goto recompute;
 468         }
 469     }
 470
 471     /* chop off zero terms */
 472     if (z == 0.0) {
 473         jz -= 1;
 474         q0 -= 24;
 475         while (iq[jz] == 0) {
 476             jz--;
 477             q0 -= 24;
 478         }
 479     } else { /* break z into 24-bit if necessary */
 480         z = __scalbn(z, -q0);
 481         if (z >= 0x1p24) {
 482             fw = (double)(INT32)(0x1p-24 * z);
 483             iq[jz] = (INT32)(z - 0x1p24 * fw);
 484             jz += 1;
 485             q0 += 24;
 486             iq[jz] = (INT32)fw;
 487         } else
 488             iq[jz] = (INT32)z;
 489     }
 490
 491     /* convert integer "bit" chunk to floating-point value */
 492     fw = __scalbn(1.0, q0);
 493     for (i = jz; i >= 0; i--) {
 494         q[i] = fw * (double)iq[i];
 495         fw *= 0x1p-24;
 496     }
 497
 498     /* compute PIo2[0,...,jp]*q[jz,...,0] */
 499     for(i = jz; i >= 0; i--) {
 500         for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
 501             fw += PIo2[k] * q[i + k];
 502         fq[jz - i] = fw;
 503     }
 504
 505     /* compress fq[] into y[] */
 506     switch(prec) {
 507     case 0:
 508         fw = 0.0;
 509         for (i = jz; i >= 0; i--)
 510             fw += fq[i];
 511         y[0] = ih == 0 ? fw : -fw;
 512         break;
 513     case 1:
 514     case 2:
 515         fw = 0.0;
 516         for (i = jz; i >= 0; i--)
 517             fw += fq[i];
 518         fw = (double)fw;
 519         y[0] = ih==0 ? fw : -fw;
 520         fw = fq[0] - fw;
 521         for (i = 1; i <= jz; i++)
 522             fw += fq[i];
 523         y[1] = ih == 0 ? fw : -fw;
 524         break;
 525     case 3:  /* painful */
 526         for (i = jz; i > 0; i--) {
 527             fw = fq[i - 1] + fq[i];
 528             fq[i] += fq[i - 1] - fw;
 529             fq[i - 1] = fw;
 530         }
 531         for (i = jz; i > 1; i--) {
 532             fw = fq[i - 1] + fq[i];
 533             fq[i] += fq[i - 1] - fw;
 534             fq[i - 1] = fw;
 535         }
 536         for (fw = 0.0, i = jz; i >= 2; i--)
 537             fw += fq[i];
 538         if (ih == 0) {
 539             y[0] = fq[0];
 540             y[1] = fq[1];
 541             y[2] = fw;
 542         } else {
 543             y[0] = -fq[0];
 544             y[1] = -fq[1];
 545             y[2] = -fw;
 546         }
 547     }
 548     return n & 7;
 549 }
 550
 551 /* Based on musl implementation: src/math/round.c */
 552 static double __round(double x)
 553 {
 554     ULONGLONG llx = *(ULONGLONG*)&x, tmp;
 555     int e = (llx >> 52 & 0x7ff) - 0x3ff;
 556
 557     if (e >= 52)
 558         return x;
 559     if (e < -1)
 560         return 0 * x;
 561     else if (e == -1)
 562         return signbit(x) ? -1 : 1;
 563
 564     tmp = 0x000fffffffffffffULL >> e;
 565     if (!(llx & tmp))
 566         return x;
 567     llx += 0x0008000000000000ULL >> e;
 568     llx &= ~tmp;
 569     return *(double*)&llx;
 570 }
 571
 572 #if !defined(__i386__) || _MSVCR_VER >= 120
 573 /* Copied from musl: src/math/expm1f.c */
 574 static float __expm1f(float x)
 575 {
 576     static const float ln2_hi = 6.9313812256e-01,
 577         ln2_lo = 9.0580006145e-06,
 578         invln2 = 1.4426950216e+00,
 579         Q1 = -3.3333212137e-2,
 580         Q2 = 1.5807170421e-3;
 581
 582     float y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
 583     union {float f; UINT32 i;} u = {x};
 584     UINT32 hx = u.i & 0x7fffffff;
 585     int k, sign = u.i >> 31;
 586
 587     /* filter out huge and non-finite argument */
 588     if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
 589         if (hx >= 0x7f800000) /* NaN */
 590             return u.i == 0xff800000 ? -1 : x;
 591         if (sign)
 592             return math_error(_UNDERFLOW, "exp", x, 0, -1);
 593         if (hx > 0x42b17217) /* x > log(FLT_MAX) */
 594             return math_error(_OVERFLOW, "exp", x, 0, fp_barrierf(x * FLT_MAX));
 595     }
 596
 597     /* argument reduction */
 598     if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
 599         if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
 600             if (!sign) {
 601                 hi = x - ln2_hi;
 602                 lo = ln2_lo;
 603                 k = 1;
 604             } else {
 605                 hi = x + ln2_hi;
 606                 lo = -ln2_lo;
 607                 k = -1;
 608             }
 609         } else {
 610             k = invln2 * x + (sign ? -0.5f : 0.5f);
 611             t = k;
 612             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
 613             lo = t * ln2_lo;
 614         }
 615         x = hi - lo;
 616         c = (hi - x) - lo;
 617     } else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
 618         if (hx < 0x00800000)
 619             fp_barrierf(x * x);
 620         return x;
 621     } else
 622         k = 0;
 623
 624     /* x is now in primary range */
 625     hfx = 0.5f * x;
 626     hxs = x * hfx;
 627     r1 = 1.0f + hxs * (Q1 + hxs * Q2);
 628     t = 3.0f - r1 * hfx;
 629     e = hxs * ((r1 - t) / (6.0f - x * t));
 630     if (k == 0) /* c is 0 */
 631         return x - (x * e - hxs);
 632     e = x * (e - c) - c;
 633     e -= hxs;
 634     /* exp(x) ~ 2^k (x_reduced - e + 1) */
 635     if (k == -1)
 636         return 0.5f * (x - e) - 0.5f;
 637     if (k == 1) {
 638         if (x < -0.25f)
 639             return -2.0f * (e - (x + 0.5f));
 640         return 1.0f + 2.0f * (x - e);
 641     }
 642     u.i = (0x7f + k) << 23; /* 2^k */
 643     twopk = u.f;
 644     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
 645         y = x - e + 1.0f;
 646         if (k == 128)
 647             y = y * 2.0f * 0x1p127f;
 648         else
 649             y = y * twopk;
 650         return y - 1.0f;
 651     }
 652     u.i = (0x7f-k) << 23; /* 2^-k */
 653     if (k < 23)
 654         y = (x - e + (1 - u.f)) * twopk;
 655     else
 656         y = (x - (e + u.f) + 1) * twopk;
 657     return y;
 658 }
 659
 660 /* Copied from musl: src/math/__sindf.c */
 661 static float __sindf(double x)
 662 {
 663     static const double S1 = -0x1.5555555555555p-3,
 664         S2 = 0x1.1111111111111p-7,
 665         S3 = -0x1.a01a01a01a01ap-13,
 666         S4 = 0x1.71de3a556c734p-19;
 667
 668     double r, s, w, z;
 669
 670     z = x * x;
 671     if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03)
 672         return x * (1 + S1 * z);
 673
 674     w = z * z;
 675     r = S3 + z * S4;
 676     s = z * x;
 677     return (x + s * (S1 + z * S2)) + s * w * r;
 678 }
 679
 680 /* Copied from musl: src/math/__cosdf.c */
 681 static float __cosdf(double x)
 682 {
 683     static const double C0 = -0x1.0000000000000p-1,
 684         C1 = 0x1.5555555555555p-5,
 685         C2 = -0x1.6c16c16c16c17p-10,
 686         C3 = 0x1.a01a01a01a01ap-16,
 687         C4 = -0x1.27e4fb7789f5cp-22;
 688     double z;
 689
 690     z = x * x;
 691     if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03)
 692         return 1 + C0 * z;
 693     return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4))));
 694 }
 695
 696 static const UINT64 exp2f_T[] = {
 697     0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL,
 698     0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL,
 699     0x3fef06fe0a31b715ULL, 0x3feef1a7373aa9cbULL, 0x3feedea64c123422ULL, 0x3feece086061892dULL,
 700     0x3feebfdad5362a27ULL, 0x3feeb42b569d4f82ULL, 0x3feeab07dd485429ULL, 0x3feea47eb03a5585ULL,
 701     0x3feea09e667f3bcdULL, 0x3fee9f75e8ec5f74ULL, 0x3feea11473eb0187ULL, 0x3feea589994cce13ULL,
 702     0x3feeace5422aa0dbULL, 0x3feeb737b0cdc5e5ULL, 0x3feec49182a3f090ULL, 0x3feed503b23e255dULL,
 703     0x3feee89f995ad3adULL, 0x3feeff76f2fb5e47ULL, 0x3fef199bdd85529cULL, 0x3fef3720dcef9069ULL,
 704     0x3fef5818dcfba487ULL, 0x3fef7c97337b9b5fULL, 0x3fefa4afa2a490daULL, 0x3fefd0765b6e4540ULL
 705 };
 706 #endif
 707
 708 #ifndef __i386__
 709
 710 /*********************************************************************
 711  *      _fpclassf (MSVCRT.@)
 712  */
 713 int CDECL _fpclassf( float num )
 714 {
 715     union { float f; UINT32 i; } u = { num };
 716     int e = u.i >> 23 & 0xff;
 717     int s = u.i >> 31;
 718
 719     switch (e)
 720     {
 721     case 0:
 722         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
 723         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
 724     case 0xff:
 725         if (u.i << 9) return ((u.i >> 22) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
 726         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
 727     default:
 728         return s ? _FPCLASS_NN : _FPCLASS_PN;
 729     }
 730 }
 731
 732 /*********************************************************************
 733  *      _finitef (MSVCRT.@)
 734  */
 735 int CDECL _finitef( float num )
 736 {
 737     union { float f; UINT32 i; } u = { num };
 738     return (u.i & 0x7fffffff) < 0x7f800000;
 739 }
 740
 741 /*********************************************************************
 742  *      _isnanf (MSVCRT.@)
 743  */
 744 int CDECL _isnanf( float num )
 745 {
 746     union { float f; UINT32 i; } u = { num };
 747     return (u.i & 0x7fffffff) > 0x7f800000;
 748 }
 749
 750 static float asinf_R(float z)
 751 {
 752     /* coefficients for R(x^2) */
 753     static const float p1 = 1.66666672e-01,
 754                  p2 = -5.11644611e-02,
 755                  p3 = -1.21124933e-02,
 756                  p4 = -3.58742251e-03,
 757                  q1 = -7.56982703e-01;
 758
 759     float p, q;
 760     p = z * (p1 + z * (p2 + z * (p3 + z * p4)));
 761     q = 1.0f + z * q1;
 762     return p / q;
 763 }
 764
 765 /*********************************************************************
 766  *      acosf (MSVCRT.@)
 767  *
 768  * Copied from musl: src/math/acosf.c
 769  */
 770 float CDECL acosf( float x )
 771 {
 772     static const double pio2_lo = 6.12323399573676603587e-17;
 773
 774     float z, w, s, c, df;
 775     unsigned int hx, ix;
 776
 777     hx = *(unsigned int*)&x;
 778     ix = hx & 0x7fffffff;
 779     /* |x| >= 1 or nan */
 780     if (ix >= 0x3f800000) {
 781         if (ix == 0x3f800000) {
 782             if (hx >> 31)
 783                 return M_PI;
 784             return 0;
 785         }
 786         if (isnan(x)) return x;
 787         return math_error(_DOMAIN, "acosf", x, 0, 0 / (x - x));
 788     }
 789     /* |x| < 0.5 */
 790     if (ix < 0x3f000000) {
 791         if (ix <= 0x32800000) /* |x| < 2**-26 */
 792             return M_PI_2;
 793         return M_PI_2 - (x - (pio2_lo - x * asinf_R(x * x)));
 794     }
 795     /* x < -0.5 */
 796     if (hx >> 31) {
 797         z = (1 + x) * 0.5f;
 798         s = sqrtf(z);
 799         return M_PI - 2 * (s + ((double)s * asinf_R(z)));
 800     }
 801     /* x > 0.5 */
 802     z = (1 - x) * 0.5f;
 803     s = sqrtf(z);
 804     hx = *(unsigned int*)&s & 0xffff0000;
 805     df = *(float*)&hx;
 806     c = (z - df * df) / (s + df);
 807     w = asinf_R(z) * s + c;
 808     return 2 * (df + w);
 809 }
 810
 811 /*********************************************************************
 812  *      asinf (MSVCRT.@)
 813  *
 814  * Copied from musl: src/math/asinf.c
 815  */
 816 float CDECL asinf( float x )
 817 {
 818     static const double pio2 = 1.570796326794896558e+00;
 819     static const float pio4_hi = 0.785398125648;
 820     static const float pio2_lo = 7.54978941586e-08;
 821
 822     float s, z, f, c;
 823     unsigned int hx, ix;
 824
 825     hx = *(unsigned int*)&x;
 826     ix = hx & 0x7fffffff;
 827     if (ix >= 0x3f800000) {  /* |x| >= 1 */
 828         if (ix == 0x3f800000)  /* |x| == 1 */
 829             return x * pio2 + 7.5231638453e-37;  /* asin(+-1) = +-pi/2 with inexact */
 830         if (isnan(x)) return x;
 831         return math_error(_DOMAIN, "asinf", x, 0, 0 / (x - x));
 832     }
 833     if (ix < 0x3f000000) {  /* |x| < 0.5 */
 834         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
 835         if (ix < 0x39800000 && ix >= 0x00800000)
 836             return x;
 837         return x + x * asinf_R(x * x);
 838     }
 839     /* 1 > |x| >= 0.5 */
 840     z = (1 - fabsf(x)) * 0.5f;
 841     s = sqrtf(z);
 842     /* f+c = sqrt(z) */
 843     *(unsigned int*)&f = *(unsigned int*)&s & 0xffff0000;
 844     c = (z - f * f) / (s + f);
 845     x = pio4_hi - (2 * s * asinf_R(z) - (pio2_lo - 2 * c) - (pio4_hi - 2 * f));
 846     if (hx >> 31)
 847         return -x;
 848     return x;
 849 }
 850
 851 /*********************************************************************
 852  *      atanf (MSVCRT.@)
 853  *
 854  * Copied from musl: src/math/atanf.c
 855  */
 856 float CDECL atanf( float x )
 857 {
 858     static const float atanhi[] = {
 859         4.6364760399e-01,
 860         7.8539812565e-01,
 861         9.8279368877e-01,
 862         1.5707962513e+00,
 863     };
 864     static const float atanlo[] = {
 865         5.0121582440e-09,
 866         3.7748947079e-08,
 867         3.4473217170e-08,
 868         7.5497894159e-08,
 869     };
 870     static const float aT[] = {
 871         3.3333328366e-01,
 872         -1.9999158382e-01,
 873         1.4253635705e-01,
 874         -1.0648017377e-01,
 875         6.1687607318e-02,
 876     };
 877
 878     float w, s1, s2, z;
 879     unsigned int ix, sign;
 880     int id;
 881
 882 #if _MSVCR_VER == 0
 883     if (isnan(x)) return math_error(_DOMAIN, "atanf", x, 0, x);
 884 #endif
 885
 886     ix = *(unsigned int*)&x;
 887     sign = ix >> 31;
 888     ix &= 0x7fffffff;
 889     if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
 890         if (isnan(x))
 891             return x;
 892         z = atanhi[3] + 7.5231638453e-37;
 893         return sign ? -z : z;
 894     }
 895     if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 896         if (ix < 0x39800000) {  /* |x| < 2**-12 */
 897             if (ix < 0x00800000)
 898                 /* raise underflow for subnormal x */
 899                 fp_barrierf(x*x);
 900             return x;
 901         }
 902         id = -1;
 903     } else {
 904         x = fabsf(x);
 905         if (ix < 0x3f980000) {  /* |x| < 1.1875 */
 906             if (ix < 0x3f300000) {  /*  7/16 <= |x| < 11/16 */
 907                 id = 0;
 908                 x = (2.0f * x - 1.0f) / (2.0f + x);
 909             } else {                /* 11/16 <= |x| < 19/16 */
 910                 id = 1;
 911                 x = (x - 1.0f) / (x + 1.0f);
 912             }
 913         } else {
 914             if (ix < 0x401c0000) {  /* |x| < 2.4375 */
 915                 id = 2;
 916                 x = (x - 1.5f) / (1.0f + 1.5f * x);
 917             } else {                /* 2.4375 <= |x| < 2**26 */
 918                 id = 3;
 919                 x = -1.0f / x;
 920             }
 921         }
 922     }
 923     /* end of argument reduction */
 924     z = x * x;
 925     w = z * z;
 926     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
 927     s1 = z * (aT[0] + w * (aT[2] + w * aT[4]));
 928     s2 = w * (aT[1] + w * aT[3]);
 929     if (id < 0)
 930         return x - x * (s1 + s2);
 931     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
 932     return sign ? -z : z;
 933 }
 934
 935 /*********************************************************************
 936  *              atan2f (MSVCRT.@)
 937  *
 938  * Copied from musl: src/math/atan2f.c
 939  */
 940 float CDECL atan2f( float y, float x )
 941 {
 942     static const float pi     = 3.1415927410e+00,
 943                  pi_lo  = -8.7422776573e-08;
 944
 945     float z;
 946     unsigned int m, ix, iy;
 947
 948     if (isnan(x) || isnan(y))
 949         return x + y;
 950     ix = *(unsigned int*)&x;
 951     iy = *(unsigned int*)&y;
 952     if (ix == 0x3f800000)  /* x=1.0 */
 953         return atanf(y);
 954     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
 955     ix &= 0x7fffffff;
 956     iy &= 0x7fffffff;
 957
 958     /* when y = 0 */
 959     if (iy == 0) {
 960         switch (m) {
 961         case 0:
 962         case 1: return y;   /* atan(+-0,+anything)=+-0 */
 963         case 2: return pi;  /* atan(+0,-anything) = pi */
 964         case 3: return -pi; /* atan(-0,-anything) =-pi */
 965         }
 966     }
 967     /* when x = 0 */
 968     if (ix == 0)
 969         return m & 1 ? -pi / 2 : pi / 2;
 970     /* when x is INF */
 971     if (ix == 0x7f800000) {
 972         if (iy == 0x7f800000) {
 973             switch (m) {
 974             case 0: return pi / 4;      /* atan(+INF,+INF) */
 975             case 1: return -pi / 4;     /* atan(-INF,+INF) */
 976             case 2: return 3 * pi / 4;  /*atan(+INF,-INF)*/
 977             case 3: return -3 * pi / 4; /*atan(-INF,-INF)*/
 978             }
 979         } else {
 980             switch (m) {
 981             case 0: return 0.0f;    /* atan(+...,+INF) */
 982             case 1: return -0.0f;   /* atan(-...,+INF) */
 983             case 2: return pi;      /* atan(+...,-INF) */
 984             case 3: return -pi;     /* atan(-...,-INF) */
 985             }
 986         }
 987     }
 988     /* |y/x| > 0x1p26 */
 989     if (ix + (26 << 23) < iy || iy == 0x7f800000)
 990         return m & 1 ? -pi / 2 : pi / 2;
 991
 992     /* z = atan(|y/x|) with correct underflow */
 993     if ((m & 2) && iy + (26 << 23) < ix)  /*|y/x| < 0x1p-26, x < 0 */
 994         z = 0.0;
 995     else
 996         z = atanf(fabsf(y / x));
 997     switch (m) {
 998     case 0: return z;                /* atan(+,+) */
 999     case 1: return -z;               /* atan(-,+) */
1000     case 2: return pi - (z - pi_lo); /* atan(+,-) */
1001     default: /* case 3 */
1002         return (z - pi_lo) - pi;     /* atan(-,-) */
1003     }
1004 }
1005
1006 /* Copied from musl: src/math/__rem_pio2f.c */
1007 static int __rem_pio2f(float x, double *y)
1008 {
1009     static const double toint = 1.5 / DBL_EPSILON,
1010         pio4 = 0x1.921fb6p-1,
1011         invpio2 = 6.36619772367581382433e-01,
1012         pio2_1 = 1.57079631090164184570e+00,
1013         pio2_1t = 1.58932547735281966916e-08;
1014
1015     union {float f; uint32_t i;} u = {x};
1016     double tx[1], ty[1], fn;
1017     UINT32 ix;
1018     int n, sign, e0;
1019
1020     ix = u.i & 0x7fffffff;
1021     /* 25+53 bit pi is good enough for medium size */
1022     if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
1023         /* Use a specialized rint() to get fn. */
1024         fn = fp_barrier(x * invpio2 + toint) - toint;
1025         n  = (int)fn;
1026         *y = x - fn * pio2_1 - fn * pio2_1t;
1027         /* Matters with directed rounding. */
1028         if (*y < -pio4) {
1029             n--;
1030             fn--;
1031             *y = x - fn * pio2_1 - fn * pio2_1t;
1032         } else if (*y > pio4) {
1033             n++;
1034             fn++;
1035             *y = x - fn * pio2_1 - fn * pio2_1t;
1036         }
1037         return n;
1038     }
1039     if(ix >= 0x7f800000) { /* x is inf or NaN */
1040         *y = x - x;
1041         return 0;
1042     }
1043     /* scale x into [2^23, 2^24-1] */
1044     sign = u.i >> 31;
1045     e0 = (ix >> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */
1046     u.i = ix - (e0 << 23);
1047     tx[0] = u.f;
1048     n = __rem_pio2_large(tx, ty, e0, 1, 0);
1049     if (sign) {
1050         *y = -ty[0];
1051         return -n;
1052     }
1053     *y = ty[0];
1054     return n;
1055 }
1056
1057 /*********************************************************************
1058  *      cosf (MSVCRT.@)
1059  *
1060  * Copied from musl: src/math/cosf.c
1061  */
1062 float CDECL cosf( float x )
1063 {
1064     static const double c1pio2 = 1*M_PI_2,
1065         c2pio2 = 2*M_PI_2,
1066         c3pio2 = 3*M_PI_2,
1067         c4pio2 = 4*M_PI_2;
1068
1069     double y;
1070     UINT32 ix;
1071     unsigned n, sign;
1072
1073     ix = *(UINT32*)&x;
1074     sign = ix >> 31;
1075     ix &= 0x7fffffff;
1076
1077     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1078         if (ix < 0x39800000) { /* |x| < 2**-12 */
1079             /* raise inexact if x != 0 */
1080             fp_barrierf(x + 0x1p120f);
1081             return 1.0f;
1082         }
1083         return __cosdf(x);
1084     }
1085     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1086         if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
1087             return -__cosdf(sign ? x + c2pio2 : x - c2pio2);
1088         else {
1089             if (sign)
1090                 return __sindf(x + c1pio2);
1091             else
1092                 return __sindf(c1pio2 - x);
1093         }
1094     }
1095     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1096         if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
1097             return __cosdf(sign ? x + c4pio2 : x - c4pio2);
1098         else {
1099             if (sign)
1100                 return __sindf(-x - c3pio2);
1101             else
1102                 return __sindf(x - c3pio2);
1103         }
1104     }
1105
1106     /* cos(Inf or NaN) is NaN */
1107     if (isinf(x)) return math_error(_DOMAIN, "cosf", x, 0, x - x);
1108     if (ix >= 0x7f800000)
1109         return x - x;
1110
1111     /* general argument reduction needed */
1112     n = __rem_pio2f(x, &y);
1113     switch (n & 3) {
1114     case 0: return __cosdf(y);
1115     case 1: return __sindf(-y);
1116     case 2: return -__cosdf(y);
1117     default: return __sindf(y);
1118     }
1119 }
1120
1121 /* Copied from musl: src/math/__expo2f.c */
1122 static float __expo2f(float x, float sign)
1123 {
1124     static const int k = 235;
1125     static const float kln2 = 0x1.45c778p+7f;
1126     float scale;
1127
1128     *(UINT32*)&scale = (UINT32)(0x7f + k/2) << 23;
1129     return expf(x - kln2) * (sign * scale) * scale;
1130 }
1131
1132 /*********************************************************************
1133  *      coshf (MSVCRT.@)
1134  *
1135  * Copied from musl: src/math/coshf.c
1136  */
1137 float CDECL coshf( float x )
1138 {
1139     UINT32 ui = *(UINT32*)&x;
1140     float t;
1141
1142     /* |x| */
1143     ui &= 0x7fffffff;
1144     x = *(float*)&ui;
1145
1146     /* |x| < log(2) */
1147     if (ui < 0x3f317217) {
1148         if (ui < 0x3f800000 - (12 << 23)) {
1149             fp_barrierf(x + 0x1p120f);
1150             return 1;
1151         }
1152         t = __expm1f(x);
1153         return 1 + t * t / (2 * (1 + t));
1154     }
1155
1156     /* |x| < log(FLT_MAX) */
1157     if (ui < 0x42b17217) {
1158         t = expf(x);
1159         return 0.5f * (t + 1 / t);
1160     }
1161
1162     /* |x| > log(FLT_MAX) or nan */
1163     t = __expo2f(x, 1.0f);
1164     return t;
1165 }
1166
1167 /*********************************************************************
1168  *      expf (MSVCRT.@)
1169  */
1170 float CDECL expf( float x )
1171 {
1172     static const double C[] = {
1173         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1174         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1175         0x1.62e42ff0c52d6p-1 / (1 << 5)
1176     };
1177     static const double invln2n = 0x1.71547652b82fep+0 * (1 << 5);
1178
1179     double kd, z, r, r2, y, s;
1180     UINT32 abstop;
1181     UINT64 ki, t;
1182
1183     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
1184     if (abstop >= 0x42b) {
1185         /* |x| >= 88 or x is nan.  */
1186         if (*(UINT32*)&x == 0xff800000)
1187             return 0.0f;
1188         if (abstop >= 0x7f8)
1189             return x + x;
1190         if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
1191             return math_error(_OVERFLOW, "expf", x, 0, x * FLT_MAX);
1192         if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
1193             return math_error(_UNDERFLOW, "expf", x, 0, fp_barrierf(FLT_MIN) * FLT_MIN);
1194     }
1195
1196     /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
1197     z = invln2n * x;
1198
1199     /* Round and convert z to int, the result is in [-150*N, 128*N] and
1200        ideally ties-to-even rule is used, otherwise the magnitude of r
1201        can be bigger which gives larger approximation error.  */
1202     kd = __round(z);
1203     ki = kd;
1204     r = z - kd;
1205
1206     /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1207     t = exp2f_T[ki % (1 << 5)];
1208     t += ki << (52 - 5);
1209     s = *(double*)&t;
1210     z = C[0] * r + C[1];
1211     r2 = r * r;
1212     y = C[2] * r + 1;
1213     y = z * r2 + y;
1214     y = y * s;
1215     return y;
1216 }
1217
1218 /*********************************************************************
1219  *      fmodf (MSVCRT.@)
1220  *
1221  * Copied from musl: src/math/fmodf.c
1222  */
1223 float CDECL fmodf( float x, float y )
1224 {
1225     UINT32 xi = *(UINT32*)&x;
1226     UINT32 yi = *(UINT32*)&y;
1227     int ex = xi>>23 & 0xff;
1228     int ey = yi>>23 & 0xff;
1229     UINT32 sx = xi & 0x80000000;
1230     UINT32 i;
1231
1232     if (isinf(x)) return math_error(_DOMAIN, "fmodf", x, y, (x * y) / (x * y));
1233     if (yi << 1 == 0 || isnan(y) || ex == 0xff)
1234         return (x * y) / (x * y);
1235     if (xi << 1 <= yi << 1) {
1236         if (xi << 1 == yi << 1)
1237             return 0 * x;
1238         return x;
1239     }
1240
1241     /* normalize x and y */
1242     if (!ex) {
1243         for (i = xi << 9; i >> 31 == 0; ex--, i <<= 1);
1244         xi <<= -ex + 1;
1245     } else {
1246         xi &= -1U >> 9;
1247         xi |= 1U << 23;
1248     }
1249     if (!ey) {
1250         for (i = yi << 9; i >> 31 == 0; ey--, i <<= 1);
1251         yi <<= -ey + 1;
1252     } else {
1253         yi &= -1U >> 9;
1254         yi |= 1U << 23;
1255     }
1256
1257     /* x mod y */
1258     for (; ex > ey; ex--) {
1259         i = xi - yi;
1260         if (i >> 31 == 0) {
1261             if (i == 0)
1262                 return 0 * x;
1263             xi = i;
1264         }
1265         xi <<= 1;
1266     }
1267     i = xi - yi;
1268     if (i >> 31 == 0) {
1269         if (i == 0)
1270             return 0 * x;
1271         xi = i;
1272     }
1273     for (; xi>>23 == 0; xi <<= 1, ex--);
1274
1275     /* scale result up */
1276     if (ex > 0) {
1277         xi -= 1U << 23;
1278         xi |= (UINT32)ex << 23;
1279     } else {
1280         xi >>= -ex + 1;
1281     }
1282     xi |= sx;
1283     return *(float*)&xi;
1284 }
1285
1286 /*********************************************************************
1287  *      logf (MSVCRT.@)
1288  *
1289  * Copied from musl: src/math/logf.c src/math/logf_data.c
1290  */
1291 float CDECL logf( float x )
1292 {
1293     static const double Ln2 = 0x1.62e42fefa39efp-1;
1294     static const double A[] = {
1295         -0x1.00ea348b88334p-2,
1296         0x1.5575b0be00b6ap-2,
1297         -0x1.ffffef20a4123p-2
1298     };
1299     static const struct {
1300         double invc, logc;
1301     } T[] = {
1302         { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
1303         { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
1304         { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
1305         { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
1306         { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
1307         { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
1308         { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
1309         { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
1310         { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
1311         { 0x1p+0, 0x0p+0 },
1312         { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
1313         { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
1314         { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
1315         { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
1316         { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
1317         { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }
1318     };
1319
1320     double z, r, r2, y, y0, invc, logc;
1321     UINT32 ix, iz, tmp;
1322     int k, i;
1323
1324     ix = *(UINT32*)&x;
1325     /* Fix sign of zero with downward rounding when x==1. */
1326     if (ix == 0x3f800000)
1327         return 0;
1328     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
1329         /* x < 0x1p-126 or inf or nan. */
1330         if (ix * 2 == 0)
1331             return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x);
1332         if (ix == 0x7f800000) /* log(inf) == inf. */
1333             return x;
1334         if (ix * 2 > 0xff000000)
1335             return x;
1336         if (ix & 0x80000000)
1337             return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x));
1338         /* x is subnormal, normalize it. */
1339         x *= 0x1p23f;
1340         ix = *(UINT32*)&x;
1341         ix -= 23 << 23;
1342     }
1343
1344     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1345        The range is split into N subintervals.
1346        The ith subinterval contains z and c is near its center. */
1347     tmp = ix - 0x3f330000;
1348     i = (tmp >> (23 - 4)) % (1 << 4);
1349     k = (INT32)tmp >> 23; /* arithmetic shift */
1350     iz = ix - (tmp & (0x1ffu << 23));
1351     invc = T[i].invc;
1352     logc = T[i].logc;
1353     z = *(float*)&iz;
1354
1355     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
1356     r = z * invc - 1;
1357     y0 = logc + (double)k * Ln2;
1358
1359     /* Pipelined polynomial evaluation to approximate log1p(r). */
1360     r2 = r * r;
1361     y = A[1] * r + A[2];
1362     y = A[0] * r2 + y;
1363     y = y * r2 + (y0 + r);
1364     return y;
1365 }
1366
1367 /*********************************************************************
1368  *      log10f (MSVCRT.@)
1369  */
1370 float CDECL log10f( float x )
1371 {
1372     static const float ivln10hi = 4.3432617188e-01,
1373         ivln10lo = -3.1689971365e-05,
1374         log10_2hi = 3.0102920532e-01,
1375         log10_2lo = 7.9034151668e-07,
1376         Lg1 = 0xaaaaaa.0p-24,
1377         Lg2 = 0xccce13.0p-25,
1378         Lg3 = 0x91e9ee.0p-25,
1379         Lg4 = 0xf89e26.0p-26;
1380
1381     union {float f; UINT32 i;} u = {x};
1382     float hfsq, f, s, z, R, w, t1, t2, dk, hi, lo;
1383     UINT32 ix;
1384     int k;
1385
1386     ix = u.i;
1387     k = 0;
1388     if (ix < 0x00800000 || ix >> 31) { /* x < 2**-126 */
1389         if (ix << 1 == 0)
1390             return math_error(_SING, "log10f", x, 0, -1 / (x * x));
1391         if ((ix & ~(1u << 31)) > 0x7f800000)
1392             return x;
1393         if (ix >> 31)
1394             return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x));
1395         /* subnormal number, scale up x */
1396         k -= 25;
1397         x *= 0x1p25f;
1398         u.f = x;
1399         ix = u.i;
1400     } else if (ix >= 0x7f800000) {
1401         return x;
1402     } else if (ix == 0x3f800000)
1403         return 0;
1404
1405     /* reduce x into [sqrt(2)/2, sqrt(2)] */
1406     ix += 0x3f800000 - 0x3f3504f3;
1407     k += (int)(ix >> 23) - 0x7f;
1408     ix = (ix & 0x007fffff) + 0x3f3504f3;
1409     u.i = ix;
1410     x = u.f;
1411
1412     f = x - 1.0f;
1413     s = f / (2.0f + f);
1414     z = s * s;
1415     w = z * z;
1416     t1= w * (Lg2 + w * Lg4);
1417     t2= z * (Lg1 + w * Lg3);
1418     R = t2 + t1;
1419     hfsq = 0.5f * f * f;
1420
1421     hi = f - hfsq;
1422     u.f = hi;
1423     u.i &= 0xfffff000;
1424     hi = u.f;
1425     lo = f - hi - hfsq + s * (hfsq + R);
1426     dk = k;
1427     return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
1428 }
1429
1430 /* Subnormal input is normalized so ix has negative biased exponent.
1431    Output is multiplied by POWF_SCALE (where 1 << 5). */
1432 static double powf_log2(UINT32 ix)
1433 {
1434     static const struct {
1435         double invc, logc;
1436     } T[] = {
1437         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * (1 << 5) },
1438         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * (1 << 5) },
1439         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * (1 << 5) },
1440         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * (1 << 5) },
1441         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * (1 << 5) },
1442         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * (1 << 5) },
1443         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * (1 << 5) },
1444         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * (1 << 5) },
1445         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * (1 << 5) },
1446         { 0x1p+0, 0x0p+0 * (1 << 4) },
1447         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * (1 << 5) },
1448         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * (1 << 5) },
1449         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * (1 << 5) },
1450         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * (1 << 5) },
1451         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * (1 << 5) },
1452         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * (1 << 5) }
1453     };
1454     static const double A[] = {
1455         0x1.27616c9496e0bp-2 * (1 << 5), -0x1.71969a075c67ap-2 * (1 << 5),
1456         0x1.ec70a6ca7baddp-2 * (1 << 5), -0x1.7154748bef6c8p-1 * (1 << 5),
1457         0x1.71547652ab82bp0 * (1 << 5)
1458     };
1459
1460     double z, r, r2, r4, p, q, y, y0, invc, logc;
1461     UINT32 iz, top, tmp;
1462     int k, i;
1463
1464     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1465        The range is split into N subintervals.
1466        The ith subinterval contains z and c is near its center. */
1467     tmp = ix - 0x3f330000;
1468     i = (tmp >> (23 - 4)) % (1 << 4);
1469     top = tmp & 0xff800000;
1470     iz = ix - top;
1471     k = (INT32)top >> (23 - 5); /* arithmetic shift */
1472     invc = T[i].invc;
1473     logc = T[i].logc;
1474     z = *(float*)&iz;
1475
1476     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
1477     r = z * invc - 1;
1478     y0 = logc + (double)k;
1479
1480     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
1481     r2 = r * r;
1482     y = A[0] * r + A[1];
1483     p = A[2] * r + A[3];
1484     r4 = r2 * r2;
1485     q = A[4] * r + y0;
1486     q = p * r2 + q;
1487     y = y * r4 + q;
1488     return y;
1489 }
1490
1491 /* The output of log2 and thus the input of exp2 is either scaled by N
1492    (in case of fast toint intrinsics) or not. The unscaled xd must be
1493    in [-1021,1023], sign_bias sets the sign of the result. */
1494 static float powf_exp2(double xd, UINT32 sign_bias)
1495 {
1496     static const double C[] = {
1497         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1498         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1499         0x1.62e42ff0c52d6p-1 / (1 << 5)
1500     };
1501
1502     UINT64 ki, ski, t;
1503     double kd, z, r, r2, y, s;
1504
1505     /* N*x = k + r with r in [-1/2, 1/2] */
1506     kd = __round(xd); /* k */
1507     ki = kd;
1508     r = xd - kd;
1509
1510     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1511     t = exp2f_T[ki % (1 << 5)];
1512     ski = ki + sign_bias;
1513     t += ski << (52 - 5);
1514     s = *(double*)&t;
1515     z = C[0] * r + C[1];
1516     r2 = r * r;
1517     y = C[2] * r + 1;
1518     y = z * r2 + y;
1519     y = y * s;
1520     return y;
1521 }
1522
1523 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
1524    the bit representation of a non-zero finite floating-point value. */
1525 static int powf_checkint(UINT32 iy)
1526 {
1527     int e = iy >> 23 & 0xff;
1528     if (e < 0x7f)
1529         return 0;
1530     if (e > 0x7f + 23)
1531         return 2;
1532     if (iy & ((1 << (0x7f + 23 - e)) - 1))
1533         return 0;
1534     if (iy & (1 << (0x7f + 23 - e)))
1535         return 1;
1536     return 2;
1537 }
1538
1539 /*********************************************************************
1540  *      powf (MSVCRT.@)
1541  *
1542  * Copied from musl: src/math/powf.c src/math/powf_data.c
1543  */
1544 float CDECL powf( float x, float y )
1545 {
1546     UINT32 sign_bias = 0;
1547     UINT32 ix, iy;
1548     double logx, ylogx;
1549
1550     ix = *(UINT32*)&x;
1551     iy = *(UINT32*)&y;
1552     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
1553             2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1554         /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
1555         if (2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1556             if (2 * iy == 0)
1557                 return 1.0f;
1558             if (ix == 0x3f800000)
1559                 return 1.0f;
1560             if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
1561                 return x + y;
1562             if (2 * ix == 2 * 0x3f800000)
1563                 return 1.0f;
1564             if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
1565                 return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
1566             return y * y;
1567         }
1568         if (2 * ix - 1 >= 2u * 0x7f800000 - 1) {
1569             float x2 = x * x;
1570             if (ix & 0x80000000 && powf_checkint(iy) == 1)
1571                 x2 = -x2;
1572             if (iy & 0x80000000 && x2 == 0.0)
1573                 return math_error(_SING, "powf", x, y, 1 / x2);
1574             /* Without the barrier some versions of clang hoist the 1/x2 and
1575                thus division by zero exception can be signaled spuriously. */
1576             return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
1577         }
1578         /* x and y are non-zero finite. */
1579         if (ix & 0x80000000) {
1580             /* Finite x < 0. */
1581             int yint = powf_checkint(iy);
1582             if (yint == 0)
1583                 return math_error(_DOMAIN, "powf", x, y, 0 / (x - x));
1584             if (yint == 1)
1585                 sign_bias = 1 << (5 + 11);
1586             ix &= 0x7fffffff;
1587         }
1588         if (ix < 0x00800000) {
1589             /* Normalize subnormal x so exponent becomes negative. */
1590             x *= 0x1p23f;
1591             ix = *(UINT32*)&x;
1592             ix &= 0x7fffffff;
1593             ix -= 23 << 23;
1594         }
1595     }
1596     logx = powf_log2(ix);
1597     ylogx = y * logx; /* cannot overflow, y is single prec. */
1598     if ((*(UINT64*)&ylogx >> 47 & 0xffff) >= 0x40af800000000000llu >> 47) {
1599         /* |y*log(x)| >= 126. */
1600         if (ylogx > 0x1.fffffffd1d571p+6 * (1 << 5))
1601             return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023);
1602         if (ylogx <= -150.0 * (1 << 5))
1603             return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023);
1604     }
1605     return powf_exp2(ylogx, sign_bias);
1606 }
1607
1608 /*********************************************************************
1609  *      sinf (MSVCRT.@)
1610  *
1611  * Copied from musl: src/math/sinf.c
1612  */
1613 float CDECL sinf( float x )
1614 {
1615     static const double s1pio2 = 1*M_PI_2,
1616         s2pio2 = 2*M_PI_2,
1617         s3pio2 = 3*M_PI_2,
1618         s4pio2 = 4*M_PI_2;
1619
1620     double y;
1621     UINT32 ix;
1622     int n, sign;
1623
1624     ix = *(UINT32*)&x;
1625     sign = ix >> 31;
1626     ix &= 0x7fffffff;
1627
1628     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1629         if (ix < 0x39800000) { /* |x| < 2**-12 */
1630             /* raise inexact if x!=0 and underflow if subnormal */
1631             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1632             return x;
1633         }
1634         return __sindf(x);
1635     }
1636     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1637         if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
1638             if (sign)
1639                 return -__cosdf(x + s1pio2);
1640             else
1641                 return __cosdf(x - s1pio2);
1642         }
1643         return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
1644     }
1645     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1646         if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
1647             if (sign)
1648                 return __cosdf(x + s3pio2);
1649             else
1650                 return -__cosdf(x - s3pio2);
1651         }
1652         return __sindf(sign ? x + s4pio2 : x - s4pio2);
1653     }
1654
1655     /* sin(Inf or NaN) is NaN */
1656     if (isinf(x))
1657         return math_error(_DOMAIN, "sinf", x, 0, x - x);
1658     if (ix >= 0x7f800000)
1659         return x - x;
1660
1661     /* general argument reduction needed */
1662     n = __rem_pio2f(x, &y);
1663     switch (n&3) {
1664     case 0: return __sindf(y);
1665     case 1: return __cosdf(y);
1666     case 2: return __sindf(-y);
1667     default: return -__cosdf(y);
1668     }
1669 }
1670
1671 /*********************************************************************
1672  *      sinhf (MSVCRT.@)
1673  */
1674 float CDECL sinhf( float x )
1675 {
1676     UINT32 ui = *(UINT32*)&x;
1677     float t, h, absx;
1678
1679     h = 0.5;
1680     if (ui >> 31)
1681         h = -h;
1682     /* |x| */
1683     ui &= 0x7fffffff;
1684     absx = *(float*)&ui;
1685
1686     /* |x| < log(FLT_MAX) */
1687     if (ui < 0x42b17217) {
1688         t = __expm1f(absx);
1689         if (ui < 0x3f800000) {
1690             if (ui < 0x3f800000 - (12 << 23))
1691                 return x;
1692             return h * (2 * t - t * t / (t + 1));
1693         }
1694         return h * (t + t / (t + 1));
1695     }
1696
1697     /* |x| > logf(FLT_MAX) or nan */
1698     t = __expo2f(absx, 2 * h);
1699     return t;
1700 }
1701
1702 static BOOL sqrtf_validate( float *x )
1703 {
1704     short c = _fdclass(*x);
1705
1706     if (c == FP_ZERO) return FALSE;
1707     if (c == FP_NAN) return FALSE;
1708     if (signbit(*x))
1709     {
1710         *x = math_error(_DOMAIN, "sqrtf", *x, 0, ret_nan(TRUE));
1711         return FALSE;
1712     }
1713     if (c == FP_INFINITE) return FALSE;
1714     return TRUE;
1715 }
1716
1717 #if defined(__x86_64__) || defined(__i386__)
1718 float CDECL sse2_sqrtf(float);
1719 __ASM_GLOBAL_FUNC( sse2_sqrtf,
1720         "sqrtss %xmm0, %xmm0\n\t"
1721         "ret" )
1722 #endif
1723
1724 /*********************************************************************
1725  *      sqrtf (MSVCRT.@)
1726  *
1727  * Copied from musl: src/math/sqrtf.c
1728  */
1729 float CDECL sqrtf( float x )
1730 {
1731 #ifdef __x86_64__
1732     if (!sqrtf_validate(&x))
1733         return x;
1734
1735     return sse2_sqrtf(x);
1736 #else
1737     static const float tiny = 1.0e-30;
1738
1739     float z;
1740     int ix,s,q,m,t,i;
1741     unsigned int r;
1742
1743     ix = *(int*)&x;
1744
1745     if (!sqrtf_validate(&x))
1746         return x;
1747
1748     /* normalize x */
1749     m = ix >> 23;
1750     if (m == 0) {  /* subnormal x */
1751         for (i = 0; (ix & 0x00800000) == 0; i++)
1752             ix <<= 1;
1753         m -= i - 1;
1754     }
1755     m -= 127;  /* unbias exponent */
1756     ix = (ix & 0x007fffff) | 0x00800000;
1757     if (m & 1)  /* odd m, double x to make it even */
1758         ix += ix;
1759     m >>= 1;  /* m = [m/2] */
1760
1761     /* generate sqrt(x) bit by bit */
1762     ix += ix;
1763     q = s = 0;       /* q = sqrt(x) */
1764     r = 0x01000000;  /* r = moving bit from right to left */
1765
1766     while (r != 0) {
1767         t = s + r;
1768         if (t <= ix) {
1769             s = t + r;
1770             ix -= t;
1771             q += r;
1772         }
1773         ix += ix;
1774         r >>= 1;
1775     }
1776
1777     /* use floating add to find out rounding direction */
1778     if (ix != 0) {
1779         z = 1.0f - tiny; /* raise inexact flag */
1780         if (z >= 1.0f) {
1781             z = 1.0f + tiny;
1782             if (z > 1.0f)
1783                 q += 2;
1784             else
1785                 q += q & 1;
1786         }
1787     }
1788     ix = (q >> 1) + 0x3f000000;
1789     r = ix + ((unsigned int)m << 23);
1790     z = *(float*)&r;
1791     return z;
1792 #endif
1793 }
1794
1795 /* Copied from musl: src/math/__tandf.c */
1796 static float __tandf(double x, int odd)
1797 {
1798     static const double T[] = {
1799         0x15554d3418c99f.0p-54,
1800         0x1112fd38999f72.0p-55,
1801         0x1b54c91d865afe.0p-57,
1802         0x191df3908c33ce.0p-58,
1803         0x185dadfcecf44e.0p-61,
1804         0x1362b9bf971bcd.0p-59,
1805     };
1806
1807     double z, r, w, s, t, u;
1808
1809     z = x * x;
1810     r = T[4] + z * T[5];
1811     t = T[2] + z * T[3];
1812     w = z * z;
1813     s = z * x;
1814     u = T[0] + z * T[1];
1815     r = (x + s * u) + (s * w) * (t + w * r);
1816     return odd ? -1.0 / r : r;
1817 }
1818
1819 /*********************************************************************
1820  *      tanf (MSVCRT.@)
1821  *
1822  * Copied from musl: src/math/tanf.c
1823  */
1824 float CDECL tanf( float x )
1825 {
1826     static const double t1pio2 = 1*M_PI_2,
1827         t2pio2 = 2*M_PI_2,
1828         t3pio2 = 3*M_PI_2,
1829         t4pio2 = 4*M_PI_2;
1830
1831     double y;
1832     UINT32 ix;
1833     unsigned n, sign;
1834
1835     ix = *(UINT32*)&x;
1836     sign = ix >> 31;
1837     ix &= 0x7fffffff;
1838
1839     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1840         if (ix < 0x39800000) { /* |x| < 2**-12 */
1841             /* raise inexact if x!=0 and underflow if subnormal */
1842             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1843             return x;
1844         }
1845         return __tandf(x, 0);
1846     }
1847     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1848         if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
1849             return __tandf((sign ? x + t1pio2 : x - t1pio2), 1);
1850         else
1851             return __tandf((sign ? x + t2pio2 : x - t2pio2), 0);
1852     }
1853     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1854         if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
1855             return __tandf((sign ? x + t3pio2 : x - t3pio2), 1);
1856         else
1857             return __tandf((sign ? x + t4pio2 : x - t4pio2), 0);
1858     }
1859
1860     /* tan(Inf or NaN) is NaN */
1861     if (isinf(x))
1862         return math_error(_DOMAIN, "tanf", x, 0, x - x);
1863     if (ix >= 0x7f800000)
1864         return x - x;
1865
1866     /* argument reduction */
1867     n = __rem_pio2f(x, &y);
1868     return __tandf(y, n & 1);
1869 }
1870
1871 /*********************************************************************
1872  *      tanhf (MSVCRT.@)
1873  */
1874 float CDECL tanhf( float x )
1875 {
1876     UINT32 ui = *(UINT32*)&x;
1877     int sign;
1878     float t;
1879
1880     /* x = |x| */
1881     sign = ui >> 31;
1882     ui &= 0x7fffffff;
1883     x = *(float*)&ui;
1884
1885     if (ui > 0x3f0c9f54) {
1886         /* |x| > log(3)/2 ~= 0.5493 or nan */
1887         if (ui > 0x41200000) {
1888 #if _MSVCR_VER < 140
1889             if (isnan(x))
1890                 return math_error(_DOMAIN, "tanhf", x, 0, x);
1891 #endif
1892             /* |x| > 10 */
1893             fp_barrierf(x + 0x1p120f);
1894             t = 1 + 0 / x;
1895         } else {
1896             t = __expm1f(2 * x);
1897             t = 1 - 2 / (t + 2);
1898         }
1899     } else if (ui > 0x3e82c578) {
1900         /* |x| > log(5/3)/2 ~= 0.2554 */
1901         t = __expm1f(2 * x);
1902         t = t / (t + 2);
1903     } else if (ui >= 0x00800000) {
1904         /* |x| >= 0x1p-126 */
1905         t = __expm1f(-2 * x);
1906         t = -t / (t + 2);
1907     } else {
1908         /* |x| is subnormal */
1909         fp_barrierf(x * x);
1910         t = x;
1911     }
1912     return sign ? -t : t;
1913 }
1914
1915 /*********************************************************************
1916  *      ceilf (MSVCRT.@)
1917  *
1918  * Copied from musl: src/math/ceilf.c
1919  */
1920 float CDECL ceilf( float x )
1921 {
1922     union {float f; UINT32 i;} u = {x};
1923     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1924     UINT32 m;
1925
1926     if (e >= 23)
1927         return x;
1928     if (e >= 0) {
1929         m = 0x007fffff >> e;
1930         if ((u.i & m) == 0)
1931             return x;
1932         if (u.i >> 31 == 0)
1933             u.i += m;
1934         u.i &= ~m;
1935     } else {
1936         if (u.i >> 31)
1937             return -0.0;
1938         else if (u.i << 1)
1939             return 1.0;
1940     }
1941     return u.f;
1942 }
1943
1944 /*********************************************************************
1945  *      floorf (MSVCRT.@)
1946  *
1947  * Copied from musl: src/math/floorf.c
1948  */
1949 float CDECL floorf( float x )
1950 {
1951     union {float f; UINT32 i;} u = {x};
1952     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1953     UINT32 m;
1954
1955     if (e >= 23)
1956         return x;
1957     if (e >= 0) {
1958         m = 0x007fffff >> e;
1959         if ((u.i & m) == 0)
1960             return x;
1961         if (u.i >> 31)
1962             u.i += m;
1963         u.i &= ~m;
1964     } else {
1965         if (u.i >> 31 == 0)
1966             return 0;
1967         else if (u.i << 1)
1968             return -1;
1969     }
1970     return u.f;
1971 }
1972
1973 /*********************************************************************
1974  *      frexpf (MSVCRT.@)
1975  *
1976  * Copied from musl: src/math/frexpf.c
1977  */
1978 float CDECL frexpf( float x, int *e )
1979 {
1980     UINT32 ux = *(UINT32*)&x;
1981     int ee = ux >> 23 & 0xff;
1982
1983     if (!ee) {
1984         if (x) {
1985             x = frexpf(x * 0x1p64, e);
1986             *e -= 64;
1987         } else *e = 0;
1988         return x;
1989     } else if (ee == 0xff) {
1990         return x;
1991     }
1992
1993     *e = ee - 0x7e;
1994     ux &= 0x807ffffful;
1995     ux |= 0x3f000000ul;
1996     return *(float*)&ux;
1997 }
1998
1999 /*********************************************************************
2000  *      modff (MSVCRT.@)
2001  *
2002  * Copied from musl: src/math/modff.c
2003  */
2004 float CDECL modff( float x, float *iptr )
2005 {
2006     union {float f; UINT32 i;} u = {x};
2007     UINT32 mask;
2008     int e = (u.i >> 23 & 0xff) - 0x7f;
2009
2010     /* no fractional part */
2011     if (e >= 23) {
2012         *iptr = x;
2013         if (e == 0x80 && u.i << 9 != 0) { /* nan */
2014             return x;
2015         }
2016         u.i &= 0x80000000;
2017         return u.f;
2018     }
2019     /* no integral part */
2020     if (e < 0) {
2021         u.i &= 0x80000000;
2022         *iptr = u.f;
2023         return x;
2024     }
2025
2026     mask = 0x007fffff >> e;
2027     if ((u.i & mask) == 0) {
2028         *iptr = x;
2029         u.i &= 0x80000000;
2030         return u.f;
2031     }
2032     u.i &= ~mask;
2033     *iptr = u.f;
2034     return x - u.f;
2035 }
2036
2037 #endif
2038
2039 #if !defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)
2040
2041 /*********************************************************************
2042  *      fabsf (MSVCRT.@)
2043  *
2044  * Copied from musl: src/math/fabsf.c
2045  */
2046 float CDECL fabsf( float x )
2047 {
2048     union { float f; UINT32 i; } u = { x };
2049     u.i &= 0x7fffffff;
2050     return u.f;
2051 }
2052
2053 #endif
2054
2055 /*********************************************************************
2056  *              acos (MSVCRT.@)
2057  *
2058  * Copied from musl: src/math/acos.c
2059  */
2060 static double acos_R(double z)
2061 {
2062     static const double pS0 =  1.66666666666666657415e-01,
2063                  pS1 = -3.25565818622400915405e-01,
2064                  pS2 =  2.01212532134862925881e-01,
2065                  pS3 = -4.00555345006794114027e-02,
2066                  pS4 =  7.91534994289814532176e-04,
2067                  pS5 =  3.47933107596021167570e-05,
2068                  qS1 = -2.40339491173441421878e+00,
2069                  qS2 =  2.02094576023350569471e+00,
2070                  qS3 = -6.88283971605453293030e-01,
2071                  qS4 =  7.70381505559019352791e-02;
2072
2073     double p, q;
2074     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2075     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2076     return p/q;
2077 }
2078
2079 double CDECL acos( double x )
2080 {
2081     static const double pio2_hi = 1.57079632679489655800e+00,
2082                  pio2_lo = 6.12323399573676603587e-17;
2083
2084     double z, w, s, c, df;
2085     unsigned int hx, ix;
2086     ULONGLONG llx;
2087
2088     hx = *(ULONGLONG*)&x >> 32;
2089     ix = hx & 0x7fffffff;
2090     /* |x| >= 1 or nan */
2091     if (ix >= 0x3ff00000) {
2092         unsigned int lx;
2093
2094         lx = *(ULONGLONG*)&x;
2095         if (((ix - 0x3ff00000) | lx) == 0) {
2096             /* acos(1)=0, acos(-1)=pi */
2097             if (hx >> 31)
2098                 return 2 * pio2_hi + 7.5231638452626401e-37;
2099             return 0;
2100         }
2101         if (isnan(x)) return x;
2102         return math_error(_DOMAIN, "acos", x, 0, 0 / (x - x));
2103     }
2104     /* |x| < 0.5 */
2105     if (ix < 0x3fe00000) {
2106         if (ix <= 0x3c600000)  /* |x| < 2**-57 */
2107             return pio2_hi + 7.5231638452626401e-37;
2108         return pio2_hi - (x - (pio2_lo - x * acos_R(x * x)));
2109     }
2110     /* x < -0.5 */
2111     if (hx >> 31) {
2112         z = (1.0 + x) * 0.5;
2113         s = sqrt(z);
2114         w = acos_R(z) * s - pio2_lo;
2115         return 2 * (pio2_hi - (s + w));
2116     }
2117     /* x > 0.5 */
2118     z = (1.0 - x) * 0.5;
2119     s = sqrt(z);
2120     df = s;
2121     llx = (*(ULONGLONG*)&df >> 32) << 32;
2122     df = *(double*)&llx;
2123     c = (z - df * df) / (s + df);
2124     w = acos_R(z) * s + c;
2125     return 2 * (df + w);
2126 }
2127
2128 /*********************************************************************
2129  *              asin (MSVCRT.@)
2130  *
2131  * Copied from musl: src/math/asin.c
2132  */
2133 static double asin_R(double z)
2134 {
2135     /* coefficients for R(x^2) */
2136     static const double pS0 =  1.66666666666666657415e-01,
2137                  pS1 = -3.25565818622400915405e-01,
2138                  pS2 =  2.01212532134862925881e-01,
2139                  pS3 = -4.00555345006794114027e-02,
2140                  pS4 =  7.91534994289814532176e-04,
2141                  pS5 =  3.47933107596021167570e-05,
2142                  qS1 = -2.40339491173441421878e+00,
2143                  qS2 =  2.02094576023350569471e+00,
2144                  qS3 = -6.88283971605453293030e-01,
2145                  qS4 =  7.70381505559019352791e-02;
2146
2147     double p, q;
2148     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2149     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2150     return p / q;
2151 }
2152
2153 #ifdef __i386__
2154 double CDECL x87_asin(double);
2155 __ASM_GLOBAL_FUNC( x87_asin,
2156         "fldl 4(%esp)\n\t"
2157         SET_X87_CW(~0x37f)
2158         "fld %st\n\t"
2159         "fld1\n\t"
2160         "fsubp\n\t"
2161         "fld1\n\t"
2162         "fadd %st(2)\n\t"
2163         "fmulp\n\t"
2164         "fsqrt\n\t"
2165         "fpatan\n\t"
2166         RESET_X87_CW
2167         "ret" )
2168 #endif
2169
2170 double CDECL asin( double x )
2171 {
2172     static const double pio2_hi = 1.57079632679489655800e+00,
2173                  pio2_lo = 6.12323399573676603587e-17;
2174
2175     double z, r, s;
2176     unsigned int hx, ix;
2177     ULONGLONG llx;
2178 #ifdef __i386__
2179     unsigned int x87_cw, sse2_cw;
2180 #endif
2181
2182     hx = *(ULONGLONG*)&x >> 32;
2183     ix = hx & 0x7fffffff;
2184     /* |x| >= 1 or nan */
2185     if (ix >= 0x3ff00000) {
2186         unsigned int lx;
2187         lx = *(ULONGLONG*)&x;
2188         if (((ix - 0x3ff00000) | lx) == 0)
2189             /* asin(1) = +-pi/2 with inexact */
2190             return x * pio2_hi + 7.5231638452626401e-37;
2191         if (isnan(x))
2192         {
2193 #ifdef __i386__
2194             return math_error(_DOMAIN, "asin", x, 0, x);
2195 #else
2196             return x;
2197 #endif
2198         }
2199         return math_error(_DOMAIN, "asin", x, 0, 0 / (x - x));
2200     }
2201
2202 #ifdef __i386__
2203     __control87_2(0, 0, &x87_cw, &sse2_cw);
2204     if (!sse2_enabled || (x87_cw & _MCW_EM) != _MCW_EM
2205             || (sse2_cw & (_MCW_EM | _MCW_RC)) != _MCW_EM)
2206         return x87_asin(x);
2207 #endif
2208
2209     /* |x| < 0.5 */
2210     if (ix < 0x3fe00000) {
2211         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
2212         if (ix < 0x3e500000 && ix >= 0x00100000)
2213             return x;
2214         return x + x * asin_R(x * x);
2215     }
2216     /* 1 > |x| >= 0.5 */
2217     z = (1 - fabs(x)) * 0.5;
2218     s = sqrt(z);
2219     r = asin_R(z);
2220     if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
2221         x = pio2_hi - (2 * (s + s * r) - pio2_lo);
2222     } else {
2223         double f, c;
2224         /* f+c = sqrt(z) */
2225         f = s;
2226         llx = (*(ULONGLONG*)&f >> 32) << 32;
2227         f = *(double*)&llx;
2228         c = (z - f * f) / (s + f);
2229         x = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * f));
2230     }
2231     if (hx >> 31)
2232         return -x;
2233     return x;
2234 }
2235
2236 /*********************************************************************
2237  *              atan (MSVCRT.@)
2238  *
2239  * Copied from musl: src/math/atan.c
2240  */
2241 double CDECL atan( double x )
2242 {
2243     static const double atanhi[] = {
2244         4.63647609000806093515e-01,
2245         7.85398163397448278999e-01,
2246         9.82793723247329054082e-01,
2247         1.57079632679489655800e+00,
2248     };
2249     static const double atanlo[] = {
2250         2.26987774529616870924e-17,
2251         3.06161699786838301793e-17,
2252         1.39033110312309984516e-17,
2253         6.12323399573676603587e-17,
2254     };
2255     static const double aT[] = {
2256         3.33333333333329318027e-01,
2257         -1.99999999998764832476e-01,
2258         1.42857142725034663711e-01,
2259         -1.11111104054623557880e-01,
2260         9.09088713343650656196e-02,
2261         -7.69187620504482999495e-02,
2262         6.66107313738753120669e-02,
2263         -5.83357013379057348645e-02,
2264         4.97687799461593236017e-02,
2265         -3.65315727442169155270e-02,
2266         1.62858201153657823623e-02,
2267     };
2268
2269     double w, s1, s2, z;
2270     unsigned int ix, sign;
2271     int id;
2272
2273 #if _MSVCR_VER == 0
2274     if (isnan(x)) return math_error(_DOMAIN, "atan", x, 0, x);
2275 #endif
2276
2277     ix = *(ULONGLONG*)&x >> 32;
2278     sign = ix >> 31;
2279     ix &= 0x7fffffff;
2280     if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
2281         if (isnan(x))
2282             return x;
2283         z = atanhi[3] + 7.5231638452626401e-37;
2284         return sign ? -z : z;
2285     }
2286     if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
2287         if (ix < 0x3e400000) {  /* |x| < 2^-27 */
2288             if (ix < 0x00100000)
2289                 /* raise underflow for subnormal x */
2290                 fp_barrierf((float)x);
2291             return x;
2292         }
2293         id = -1;
2294     } else {
2295         x = fabs(x);
2296         if (ix < 0x3ff30000) {  /* |x| < 1.1875 */
2297             if (ix < 0x3fe60000) {  /*  7/16 <= |x| < 11/16 */
2298                 id = 0;
2299                 x = (2.0 * x - 1.0) / (2.0 + x);
2300             } else {                /* 11/16 <= |x| < 19/16 */
2301                 id = 1;
2302                 x = (x - 1.0) / (x + 1.0);
2303             }
2304         } else {
2305             if (ix < 0x40038000) {  /* |x| < 2.4375 */
2306                 id = 2;
2307                 x = (x - 1.5) / (1.0 + 1.5 * x);
2308             } else {                /* 2.4375 <= |x| < 2^66 */
2309                 id = 3;
2310                 x = -1.0 / x;
2311             }
2312         }
2313     }
2314     /* end of argument reduction */
2315     z = x * x;
2316     w = z * z;
2317     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
2318     s1 = z * (aT[0] + w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
2319     s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
2320     if (id < 0)
2321         return x - x * (s1 + s2);
2322     z = atanhi[id] - (x * (s1 + s2) - atanlo[id] - x);
2323     return sign ? -z : z;
2324 }
2325
2326 /*********************************************************************
2327  *              atan2 (MSVCRT.@)
2328  *
2329  * Copied from musl: src/math/atan2.c
2330  */
2331 double CDECL atan2( double y, double x )
2332 {
2333     static const double pi     = 3.1415926535897931160E+00,
2334                  pi_lo  = 1.2246467991473531772E-16;
2335
2336     double z;
2337     unsigned int m, lx, ly, ix, iy;
2338
2339     if (isnan(x) || isnan(y))
2340         return x+y;
2341     ix = *(ULONGLONG*)&x >> 32;
2342     lx = *(ULONGLONG*)&x;
2343     iy = *(ULONGLONG*)&y >> 32;
2344     ly = *(ULONGLONG*)&y;
2345     if (((ix - 0x3ff00000) | lx) == 0)  /* x = 1.0 */
2346         return atan(y);
2347     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
2348     ix = ix & 0x7fffffff;
2349     iy = iy & 0x7fffffff;
2350
2351     /* when y = 0 */
2352     if ((iy | ly) == 0) {
2353         switch(m) {
2354         case 0:
2355         case 1: return y;   /* atan(+-0,+anything)=+-0 */
2356         case 2: return pi;  /* atan(+0,-anything) = pi */
2357         case 3: return -pi; /* atan(-0,-anything) =-pi */
2358         }
2359     }
2360     /* when x = 0 */
2361     if ((ix | lx) == 0)
2362         return m & 1 ? -pi / 2 : pi / 2;
2363     /* when x is INF */
2364     if (ix == 0x7ff00000) {
2365         if (iy == 0x7ff00000) {
2366             switch(m) {
2367             case 0: return pi / 4;      /* atan(+INF,+INF) */
2368             case 1: return -pi / 4;     /* atan(-INF,+INF) */
2369             case 2: return 3 * pi / 4;  /* atan(+INF,-INF) */
2370             case 3: return -3 * pi / 4; /* atan(-INF,-INF) */
2371             }
2372         } else {
2373             switch(m) {
2374             case 0: return 0.0;  /* atan(+...,+INF) */
2375             case 1: return -0.0; /* atan(-...,+INF) */
2376             case 2: return pi;   /* atan(+...,-INF) */
2377             case 3: return -pi;  /* atan(-...,-INF) */
2378             }
2379         }
2380     }
2381     /* |y/x| > 0x1p64 */
2382     if (ix + (64 << 20) < iy || iy == 0x7ff00000)
2383         return m & 1 ? -pi / 2 : pi / 2;
2384
2385     /* z = atan(|y/x|) without spurious underflow */
2386     if ((m & 2) && iy + (64 << 20) < ix)  /* |y/x| < 0x1p-64, x<0 */
2387         z = 0;
2388     else
2389         z = atan(fabs(y / x));
2390     switch (m) {
2391     case 0: return z;                /* atan(+,+) */
2392     case 1: return -z;               /* atan(-,+) */
2393     case 2: return pi - (z - pi_lo); /* atan(+,-) */
2394     default: /* case 3 */
2395         return (z - pi_lo) - pi;     /* atan(-,-) */
2396     }
2397 }
2398
2399 /* Copied from musl: src/math/rint.c */
2400 static double __rint(double x)
2401 {
2402     static const double toint = 1 / DBL_EPSILON;
2403
2404     ULONGLONG llx = *(ULONGLONG*)&x;
2405     int e = llx >> 52 & 0x7ff;
2406     int s = llx >> 63;
2407     unsigned cw;
2408     double y;
2409
2410     if (e >= 0x3ff+52)
2411         return x;
2412     cw = _controlfp(0, 0);
2413     if ((cw & _MCW_PC) != _PC_53)
2414         _controlfp(_PC_53, _MCW_PC);
2415     if (s)
2416         y = fp_barrier(x - toint) + toint;
2417     else
2418         y = fp_barrier(x + toint) - toint;
2419     if ((cw & _MCW_PC) != _PC_53)
2420         _controlfp(cw, _MCW_PC);
2421     if (y == 0)
2422         return s ? -0.0 : 0;
2423     return y;
2424 }
2425
2426 /* Copied from musl: src/math/__rem_pio2.c */
2427 static int __rem_pio2(double x, double *y)
2428 {
2429     static const double pio4    = 0x1.921fb54442d18p-1,
2430                  invpio2 = 6.36619772367581382433e-01,
2431                  pio2_1  = 1.57079632673412561417e+00,
2432                  pio2_1t = 6.07710050650619224932e-11,
2433                  pio2_2  = 6.07710050630396597660e-11,
2434                  pio2_2t = 2.02226624879595063154e-21,
2435                  pio2_3  = 2.02226624871116645580e-21,
2436                  pio2_3t = 8.47842766036889956997e-32;
2437
2438     union {double f; UINT64 i;} u = {x};
2439     double z, w, t, r, fn, tx[3], ty[2];
2440     UINT32 ix;
2441     int sign, n, ex, ey, i;
2442
2443     sign = u.i >> 63;
2444     ix = u.i >> 32 & 0x7fffffff;
2445     if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
2446         if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
2447             goto medium; /* cancellation -- use medium case */
2448         if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
2449             if (!sign) {
2450                 z = x - pio2_1; /* one round good to 85 bits */
2451                 y[0] = z - pio2_1t;
2452                 y[1] = (z - y[0]) - pio2_1t;
2453                 return 1;
2454             } else {
2455                 z = x + pio2_1;
2456                 y[0] = z + pio2_1t;
2457                 y[1] = (z - y[0]) + pio2_1t;
2458                 return -1;
2459             }
2460         } else {
2461             if (!sign) {
2462                 z = x - 2 * pio2_1;
2463                 y[0] = z - 2 * pio2_1t;
2464                 y[1] = (z - y[0]) - 2 * pio2_1t;
2465                 return 2;
2466             } else {
2467                 z = x + 2 * pio2_1;
2468                 y[0] = z + 2 * pio2_1t;
2469                 y[1] = (z - y[0]) + 2 * pio2_1t;
2470                 return -2;
2471             }
2472         }
2473     }
2474     if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
2475         if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
2476             if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
2477                 goto medium;
2478             if (!sign) {
2479                 z = x - 3 * pio2_1;
2480                 y[0] = z - 3 * pio2_1t;
2481                 y[1] = (z - y[0]) - 3 * pio2_1t;
2482                 return 3;
2483             } else {
2484                 z = x + 3 * pio2_1;
2485                 y[0] = z + 3 * pio2_1t;
2486                 y[1] = (z - y[0]) + 3 * pio2_1t;
2487                 return -3;
2488             }
2489         } else {
2490             if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
2491                 goto medium;
2492             if (!sign) {
2493                 z = x - 4 * pio2_1;
2494                 y[0] = z - 4 * pio2_1t;
2495                 y[1] = (z - y[0]) - 4 * pio2_1t;
2496                 return 4;
2497             } else {
2498                 z = x + 4 * pio2_1;
2499                 y[0] = z + 4 * pio2_1t;
2500                 y[1] = (z - y[0]) + 4 * pio2_1t;
2501                 return -4;
2502             }
2503         }
2504     }
2505     if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
2506 medium:
2507         fn = __rint(x * invpio2);
2508         n = (INT32)fn;
2509         r = x - fn * pio2_1;
2510         w = fn * pio2_1t; /* 1st round, good to 85 bits */
2511         /* Matters with directed rounding. */
2512         if (r - w < -pio4) {
2513             n--;
2514             fn--;
2515             r = x - fn * pio2_1;
2516             w = fn * pio2_1t;
2517         } else if (r - w > pio4) {
2518             n++;
2519             fn++;
2520             r = x - fn * pio2_1;
2521             w = fn * pio2_1t;
2522         }
2523         y[0] = r - w;
2524         u.f = y[0];
2525         ey = u.i >> 52 & 0x7ff;
2526         ex = ix >> 20;
2527         if (ex - ey > 16) { /* 2nd round, good to 118 bits */
2528             t = r;
2529             w = fn * pio2_2;
2530             r = t - w;
2531             w = fn * pio2_2t - ((t - r) - w);
2532             y[0] = r - w;
2533             u.f = y[0];
2534             ey = u.i >> 52 & 0x7ff;
2535             if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
2536                 t = r;
2537                 w = fn * pio2_3;
2538                 r = t - w;
2539                 w = fn * pio2_3t - ((t - r) - w);
2540                 y[0] = r - w;
2541             }
2542         }
2543         y[1] = (r - y[0]) - w;
2544         return n;
2545     }
2546     /*
2547      * all other (large) arguments
2548      */
2549     if (ix >= 0x7ff00000) {  /* x is inf or NaN */
2550         y[0] = y[1] = x - x;
2551         return 0;
2552     }
2553     /* set z = scalbn(|x|,-ilogb(x)+23) */
2554     u.f = x;
2555     u.i &= (UINT64)-1 >> 12;
2556     u.i |= (UINT64)(0x3ff + 23) << 52;
2557     z = u.f;
2558     for (i = 0; i < 2; i++) {
2559         tx[i] = (double)(INT32)z;
2560         z = (z - tx[i]) * 0x1p24;
2561     }
2562     tx[i] = z;
2563     /* skip zero terms, first term is non-zero */
2564     while (tx[i] == 0.0)
2565         i--;
2566     n = __rem_pio2_large(tx, ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1);
2567     if (sign) {
2568         y[0] = -ty[0];
2569         y[1] = -ty[1];
2570         return -n;
2571     }
2572     y[0] = ty[0];
2573     y[1] = ty[1];
2574     return n;
2575 }
2576
2577 /* Copied from musl: src/math/__sin.c */
2578 static double __sin(double x, double y, int iy)
2579 {
2580     static const double S1  = -1.66666666666666324348e-01,
2581                  S2  =  8.33333333332248946124e-03,
2582                  S3  = -1.98412698298579493134e-04,
2583                  S4  =  2.75573137070700676789e-06,
2584                  S5  = -2.50507602534068634195e-08,
2585                  S6  =  1.58969099521155010221e-10;
2586
2587     double z, r, v, w;
2588
2589     z = x * x;
2590     w = z * z;
2591     r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
2592     v = z * x;
2593     if (iy == 0)
2594         return x + v * (S1 + z * r);
2595     else
2596         return x - ((z * (0.5 * y - v * r) - y) - v * S1);
2597 }
2598
2599 /* Copied from musl: src/math/__cos.c */
2600 static double __cos(double x, double y)
2601 {
2602     static const double C1  =  4.16666666666666019037e-02,
2603                  C2  = -1.38888888888741095749e-03,
2604                  C3  =  2.48015872894767294178e-05,
2605                  C4  = -2.75573143513906633035e-07,
2606                  C5  =  2.08757232129817482790e-09,
2607                  C6  = -1.13596475577881948265e-11;
2608     double hz, z, r, w;
2609
2610     z = x * x;
2611     w = z * z;
2612     r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6));
2613     hz = 0.5 * z;
2614     w = 1.0 - hz;
2615     return w + (((1.0 - w) - hz) + (z * r - x * y));
2616 }
2617
2618 /*********************************************************************
2619  *              cos (MSVCRT.@)
2620  *
2621  * Copied from musl: src/math/cos.c
2622  */
2623 double CDECL cos( double x )
2624 {
2625     double y[2];
2626     UINT32 ix;
2627     unsigned n;
2628
2629     ix = *(ULONGLONG*)&x >> 32;
2630     ix &= 0x7fffffff;
2631
2632     /* |x| ~< pi/4 */
2633     if (ix <= 0x3fe921fb) {
2634         if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
2635             /* raise inexact if x!=0 */
2636             fp_barrier(x + 0x1p120f);
2637             return 1.0;
2638         }
2639         return __cos(x, 0);
2640     }
2641
2642     /* cos(Inf or NaN) is NaN */
2643     if (isinf(x)) return math_error(_DOMAIN, "cos", x, 0, x - x);
2644     if (ix >= 0x7ff00000)
2645         return x - x;
2646
2647     /* argument reduction */
2648     n = __rem_pio2(x, y);
2649     switch (n & 3) {
2650     case 0: return __cos(y[0], y[1]);
2651     case 1: return -__sin(y[0], y[1], 1);
2652     case 2: return -__cos(y[0], y[1]);
2653     default: return __sin(y[0], y[1], 1);
2654     }
2655 }
2656
2657 /* Copied from musl: src/math/expm1.c */
2658 static double CDECL __expm1(double x)
2659 {
2660     static const double o_threshold = 7.09782712893383973096e+02,
2661         ln2_hi = 6.93147180369123816490e-01,
2662         ln2_lo = 1.90821492927058770002e-10,
2663         invln2 = 1.44269504088896338700e+00,
2664         Q1 = -3.33333333333331316428e-02,
2665         Q2 = 1.58730158725481460165e-03,
2666         Q3 = -7.93650757867487942473e-05,
2667         Q4 = 4.00821782732936239552e-06,
2668         Q5 = -2.01099218183624371326e-07;
2669
2670     double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2671     union {double f; UINT64 i;} u = {x};
2672     UINT32 hx = u.i >> 32 & 0x7fffffff;
2673     int k, sign = u.i >> 63;
2674
2675     /* filter out huge and non-finite argument */
2676     if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2677         if (isnan(x))
2678             return x;
2679         if (isinf(x))
2680             return sign ? -1 : x;
2681         if (sign)
2682             return math_error(_UNDERFLOW, "exp", x, 0, -1);
2683         if (x > o_threshold)
2684             return math_error(_OVERFLOW, "exp", x, 0, x * 0x1p1023);
2685     }
2686
2687     /* argument reduction */
2688     if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2689         if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2690             if (!sign) {
2691                 hi = x - ln2_hi;
2692                 lo = ln2_lo;
2693                 k = 1;
2694             } else {
2695                 hi = x + ln2_hi;
2696                 lo = -ln2_lo;
2697                 k = -1;
2698             }
2699         } else {
2700             k = invln2 * x + (sign ? -0.5 : 0.5);
2701             t = k;
2702             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2703             lo = t * ln2_lo;
2704         }
2705         x = hi - lo;
2706         c = (hi - x) - lo;
2707     } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
2708         fp_barrier(x + 0x1p120f);
2709         if (hx < 0x00100000)
2710             fp_barrier((float)x);
2711         return x;
2712     } else
2713         k = 0;
2714
2715     /* x is now in primary range */
2716     hfx = 0.5 * x;
2717     hxs = x * hfx;
2718     r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2719     t = 3.0 - r1 * hfx;
2720     e = hxs * ((r1 - t) / (6.0 - x * t));
2721     if (k == 0) /* c is 0 */
2722         return x - (x * e - hxs);
2723     e = x * (e - c) - c;
2724     e -= hxs;
2725     /* exp(x) ~ 2^k (x_reduced - e + 1) */
2726     if (k == -1)
2727         return 0.5 * (x - e) - 0.5;
2728     if (k == 1) {
2729         if (x < -0.25)
2730             return -2.0 * (e - (x + 0.5));
2731         return 1.0 + 2.0 * (x - e);
2732     }
2733     u.i = (UINT64)(0x3ff + k) << 52; /* 2^k */
2734     twopk = u.f;
2735     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
2736         y = x - e + 1.0;
2737         if (k == 1024)
2738             y = y * 2.0 * 0x1p1023;
2739         else
2740             y = y * twopk;
2741         return y - 1.0;
2742     }
2743     u.i = (UINT64)(0x3ff - k) << 52; /* 2^-k */
2744     if (k < 20)
2745         y = (x - e + (1 - u.f)) * twopk;
2746     else
2747         y = (x - (e + u.f) + 1) * twopk;
2748     return y;
2749 }
2750
2751 static double __expo2(double x, double sign)
2752 {
2753     static const int k = 2043;
2754     static const double kln2 = 0x1.62066151add8bp+10;
2755     double scale;
2756
2757     *(UINT64*)&scale = (UINT64)(0x3ff + k / 2) << 52;
2758     return exp(x - kln2) * (sign * scale) * scale;
2759 }
2760
2761 /*********************************************************************
2762  *              cosh (MSVCRT.@)
2763  *
2764  * Copied from musl: src/math/cosh.c
2765  */
2766 double CDECL cosh( double x )
2767 {
2768     UINT64 ux = *(UINT64*)&x;
2769     UINT32 w;
2770     double t;
2771
2772     /* |x| */
2773     ux &= (uint64_t)-1 / 2;
2774     x = *(double*)&ux;
2775     w = ux >> 32;
2776
2777     /* |x| < log(2) */
2778     if (w < 0x3fe62e42) {
2779         if (w < 0x3ff00000 - (26 << 20)) {
2780             fp_barrier(x + 0x1p120f);
2781             return 1;
2782         }
2783         t = __expm1(x);
2784         return 1 + t * t / (2 * (1 + t));
2785     }
2786
2787     /* |x| < log(DBL_MAX) */
2788     if (w < 0x40862e42) {
2789         t = exp(x);
2790         /* note: if x>log(0x1p26) then the 1/t is not needed */
2791         return 0.5 * (t + 1 / t);
2792     }
2793
2794     /* |x| > log(DBL_MAX) or nan */
2795     /* note: the result is stored to handle overflow */
2796     t = __expo2(x, 1.0);
2797     return t;
2798 }
2799
2800 /* Copied from musl: src/math/exp_data.c */
2801 static const UINT64 exp_T[] = {
2802     0x0ULL, 0x3ff0000000000000ULL,
2803     0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL,
2804     0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL,
2805     0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL,
2806     0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL,
2807     0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL,
2808     0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL,
2809     0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL,
2810     0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL,
2811     0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL,
2812     0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL,
2813     0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL,
2814     0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL,
2815     0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL,
2816     0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL,
2817     0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL,
2818     0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL,
2819     0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL,
2820     0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL,
2821     0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL,
2822     0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL,
2823     0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL,
2824     0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL,
2825     0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL,
2826     0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL,
2827     0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL,
2828     0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL,
2829     0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL,
2830     0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL,
2831     0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL,
2832     0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL,
2833     0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL,
2834     0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL,
2835     0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL,
2836     0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL,
2837     0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL,
2838     0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL,
2839     0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL,
2840     0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL,
2841     0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL,
2842     0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL,
2843     0xbc9312607a28698aULL, 0x3feeda4504ac801cULL,
2844     0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL,
2845     0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL,
2846     0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL,
2847     0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL,
2848     0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL,
2849     0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL,
2850     0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL,
2851     0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL,
2852     0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL,
2853     0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL,
2854     0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL,
2855     0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL,
2856     0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL,
2857     0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL,
2858     0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL,
2859     0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL,
2860     0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL,
2861     0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL,
2862     0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL,
2863     0xbc845378892be9aeULL, 0x3feea34634ccc320ULL,
2864     0xbc93cedd78565858ULL, 0x3feea23882552225ULL,
2865     0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL,
2866     0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL,
2867     0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL,
2868     0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL,
2869     0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL,
2870     0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL,
2871     0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL,
2872     0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL,
2873     0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL,
2874     0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL,
2875     0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL,
2876     0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL,
2877     0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL,
2878     0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL,
2879     0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL,
2880     0xbc94d450d872576eULL, 0x3feea8d99b4492edULL,
2881     0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL,
2882     0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL,
2883     0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL,
2884     0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL,
2885     0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL,
2886     0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL,
2887     0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL,
2888     0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL,
2889     0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL,
2890     0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL,
2891     0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL,
2892     0xbc87c50422622263ULL, 0x3feecc667b5de565ULL,
2893     0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL,
2894     0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL,
2895     0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL,
2896     0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL,
2897     0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL,
2898     0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL,
2899     0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL,
2900     0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL,
2901     0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL,
2902     0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL,
2903     0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL,
2904     0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL,
2905     0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL,
2906     0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL,
2907     0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL,
2908     0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL,
2909     0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL,
2910     0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL,
2911     0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL,
2912     0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL,
2913     0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL,
2914     0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL,
2915     0xbc82919e2040220fULL, 0x3fef60e316c98398ULL,
2916     0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL,
2917     0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL,
2918     0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL,
2919     0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL,
2920     0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL,
2921     0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL,
2922     0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL,
2923     0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL,
2924     0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL,
2925     0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL,
2926     0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL,
2927     0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL,
2928     0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL,
2929     0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL
2930 };
2931
2932 /*********************************************************************
2933  *              exp (MSVCRT.@)
2934  *
2935  * Copied from musl: src/math/exp.c
2936  */
2937 double CDECL exp( double x )
2938 {
2939     static const double C[] = {
2940         0x1.ffffffffffdbdp-2,
2941         0x1.555555555543cp-3,
2942         0x1.55555cf172b91p-5,
2943         0x1.1111167a4d017p-7
2944     };
2945     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
2946         negln2hiN = -0x1.62e42fefa0000p-8,
2947         negln2loN = -0x1.cf79abc9e3b3ap-47;
2948
2949     UINT32 abstop;
2950     UINT64 ki, idx, top, sbits;
2951     double kd, z, r, r2, scale, tail, tmp;
2952
2953     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
2954     if (abstop -  0x3c9 >= 0x408 - 0x3c9) {
2955         if (abstop - 0x3c9 >= 0x80000000)
2956             /* Avoid spurious underflow for tiny x. */
2957             /* Note: 0 is common input. */
2958             return 1.0 + x;
2959         if (abstop >= 0x409) {
2960             if (*(UINT64*)&x == 0xfff0000000000000ULL)
2961                 return 0.0;
2962 #if _MSVCR_VER == 0
2963             if (*(UINT64*)&x > 0x7ff0000000000000ULL)
2964                 return math_error(_DOMAIN, "exp", x, 0, 1.0 + x);
2965 #endif
2966             if (abstop >= 0x7ff)
2967                 return 1.0 + x;
2968             if (*(UINT64*)&x >> 63)
2969                 return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN);
2970             else
2971                 return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX);
2972         }
2973         /* Large x is special cased below. */
2974         abstop = 0;
2975     }
2976
2977     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
2978     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
2979     z = invln2N * x;
2980     kd = __round(z);
2981     ki = (INT64)kd;
2982
2983     r = x + kd * negln2hiN + kd * negln2loN;
2984     /* 2^(k/N) ~= scale * (1 + tail). */
2985     idx = 2 * (ki % (1 << 7));
2986     top = ki << (52 - 7);
2987     tail = *(double*)&exp_T[idx];
2988     /* This is only a valid scale when -1023*N < k < 1024*N. */
2989     sbits = exp_T[idx + 1] + top;
2990     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
2991     /* Evaluation is optimized assuming superscalar pipelined execution. */
2992     r2 = r * r;
2993     /* Without fma the worst case error is 0.25/N ulp larger. */
2994     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
2995     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
2996     if (abstop == 0) {
2997         /* Handle cases that may overflow or underflow when computing the result that
2998            is scale*(1+TMP) without intermediate rounding. The bit representation of
2999            scale is in SBITS, however it has a computed exponent that may have
3000            overflown into the sign bit so that needs to be adjusted before using it as
3001            a double. (int32_t)KI is the k used in the argument reduction and exponent
3002            adjustment of scale, positive k here means the result may overflow and
3003            negative k means the result may underflow. */
3004         double scale, y;
3005
3006         if ((ki & 0x80000000) == 0) {
3007             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3008             sbits -= 1009ull << 52;
3009             scale = *(double*)&sbits;
3010             y = 0x1p1009 * (scale + scale * tmp);
3011             if (isinf(y))
3012                 return math_error(_OVERFLOW, "exp", x, 0, y);
3013             return y;
3014         }
3015         /* k < 0, need special care in the subnormal range. */
3016         sbits += 1022ull << 52;
3017         scale = *(double*)&sbits;
3018         y = scale + scale * tmp;
3019         if (y < 1.0) {
3020             /* Round y to the right precision before scaling it into the subnormal
3021                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3022                E is the worst-case ulp error outside the subnormal range. So this
3023                is only useful if the goal is better than 1 ulp worst-case error. */
3024             double hi, lo;
3025             lo = scale - y + scale * tmp;
3026             hi = 1.0 + y;
3027             lo = 1.0 - hi + y + lo;
3028             y = hi + lo - 1.0;
3029             /* Avoid -0.0 with downward rounding. */
3030             if (y == 0.0)
3031                 y = 0.0;
3032             /* The underflow exception needs to be signaled explicitly. */
3033             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3034             y = 0x1p-1022 * y;
3035             return math_error(_UNDERFLOW, "exp", x, 0, y);
3036         }
3037         y = 0x1p-1022 * y;
3038         return y;
3039     }
3040     scale = *(double*)&sbits;
3041     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3042        is no spurious underflow here even without fma. */
3043     return scale + scale * tmp;
3044 }
3045
3046 /*********************************************************************
3047  *              fmod (MSVCRT.@)
3048  *
3049  * Copied from musl: src/math/fmod.c
3050  */
3051 double CDECL fmod( double x, double y )
3052 {
3053     UINT64 xi = *(UINT64*)&x;
3054     UINT64 yi = *(UINT64*)&y;
3055     int ex = xi >> 52 & 0x7ff;
3056     int ey = yi >> 52 & 0x7ff;
3057     int sx = xi >> 63;
3058     UINT64 i;
3059
3060     if (isinf(x)) return math_error(_DOMAIN, "fmod", x, y, (x * y) / (x * y));
3061     if (yi << 1 == 0 || isnan(y) || ex == 0x7ff)
3062         return (x * y) / (x * y);
3063     if (xi << 1 <= yi << 1) {
3064         if (xi << 1 == yi << 1)
3065             return 0 * x;
3066         return x;
3067     }
3068
3069     /* normalize x and y */
3070     if (!ex) {
3071         for (i = xi << 12; i >> 63 == 0; ex--, i <<= 1);
3072         xi <<= -ex + 1;
3073     } else {
3074         xi &= -1ULL >> 12;
3075         xi |= 1ULL << 52;
3076     }
3077     if (!ey) {
3078         for (i = yi << 12; i >> 63 == 0; ey--, i <<= 1);
3079         yi <<= -ey + 1;
3080     } else {
3081         yi &= -1ULL >> 12;
3082         yi |= 1ULL << 52;
3083     }
3084
3085     /* x mod y */
3086     for (; ex > ey; ex--) {
3087         i = xi - yi;
3088         if (i >> 63 == 0) {
3089             if (i == 0)
3090                 return 0 * x;
3091             xi = i;
3092         }
3093         xi <<= 1;
3094     }
3095     i = xi - yi;
3096     if (i >> 63 == 0) {
3097         if (i == 0)
3098             return 0 * x;
3099         xi = i;
3100     }
3101     for (; xi >> 52 == 0; xi <<= 1, ex--);
3102
3103     /* scale result */
3104     if (ex > 0) {
3105         xi -= 1ULL << 52;
3106         xi |= (UINT64)ex << 52;
3107     } else {
3108         xi >>= -ex + 1;
3109     }
3110     xi |= (UINT64)sx << 63;
3111     return *(double*)&xi;
3112 }
3113
3114 /*********************************************************************
3115  *              log (MSVCRT.@)
3116  *
3117  * Copied from musl: src/math/log.c src/math/log_data.c
3118  */
3119 double CDECL log( double x )
3120 {
3121     static const double Ln2hi = 0x1.62e42fefa3800p-1,
3122         Ln2lo = 0x1.ef35793c76730p-45;
3123     static const double A[] = {
3124         -0x1.0000000000001p-1,
3125         0x1.555555551305bp-2,
3126         -0x1.fffffffeb459p-3,
3127         0x1.999b324f10111p-3,
3128         -0x1.55575e506c89fp-3
3129     };
3130     static const double B[] = {
3131         -0x1p-1,
3132         0x1.5555555555577p-2,
3133         -0x1.ffffffffffdcbp-3,
3134         0x1.999999995dd0cp-3,
3135         -0x1.55555556745a7p-3,
3136         0x1.24924a344de3p-3,
3137         -0x1.fffffa4423d65p-4,
3138         0x1.c7184282ad6cap-4,
3139         -0x1.999eb43b068ffp-4,
3140         0x1.78182f7afd085p-4,
3141         -0x1.5521375d145cdp-4
3142     };
3143     static const struct {
3144         double invc, logc;
3145     } T[] = {
3146         {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
3147         {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
3148         {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
3149         {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
3150         {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
3151         {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
3152         {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
3153         {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
3154         {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
3155         {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
3156         {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
3157         {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
3158         {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
3159         {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
3160         {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
3161         {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
3162         {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
3163         {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
3164         {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
3165         {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
3166         {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
3167         {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
3168         {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
3169         {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
3170         {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
3171         {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
3172         {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
3173         {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
3174         {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
3175         {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
3176         {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
3177         {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
3178         {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
3179         {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
3180         {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
3181         {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
3182         {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
3183         {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
3184         {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
3185         {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
3186         {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
3187         {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
3188         {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
3189         {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
3190         {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
3191         {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
3192         {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
3193         {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
3194         {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
3195         {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
3196         {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
3197         {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
3198         {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
3199         {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
3200         {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
3201         {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
3202         {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
3203         {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
3204         {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
3205         {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
3206         {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
3207         {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
3208         {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
3209         {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
3210         {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
3211         {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
3212         {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
3213         {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
3214         {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
3215         {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
3216         {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
3217         {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
3218         {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
3219         {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
3220         {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
3221         {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
3222         {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
3223         {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
3224         {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
3225         {0x1.008040614b195p+0, -0x1.0040979240000p-9},
3226         {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
3227         {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
3228         {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
3229         {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
3230         {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
3231         {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
3232         {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
3233         {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
3234         {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
3235         {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
3236         {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
3237         {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
3238         {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
3239         {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
3240         {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
3241         {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
3242         {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
3243         {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
3244         {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
3245         {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
3246         {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
3247         {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
3248         {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
3249         {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
3250         {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
3251         {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
3252         {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
3253         {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
3254         {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
3255         {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
3256         {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
3257         {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
3258         {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
3259         {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
3260         {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
3261         {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
3262         {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
3263         {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
3264         {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
3265         {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
3266         {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
3267         {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
3268         {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
3269         {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
3270         {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
3271         {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
3272         {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
3273         {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}
3274     };
3275     static const struct {
3276         double chi, clo;
3277     } T2[] = {
3278         {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
3279         {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
3280         {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
3281         {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
3282         {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
3283         {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
3284         {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
3285         {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
3286         {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
3287         {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
3288         {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
3289         {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
3290         {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
3291         {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
3292         {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
3293         {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
3294         {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
3295         {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
3296         {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
3297         {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
3298         {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
3299         {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
3300         {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
3301         {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
3302         {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
3303         {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
3304         {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
3305         {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
3306         {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
3307         {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
3308         {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
3309         {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
3310         {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
3311         {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
3312         {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
3313         {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
3314         {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
3315         {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
3316         {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
3317         {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
3318         {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
3319         {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
3320         {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
3321         {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
3322         {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
3323         {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
3324         {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
3325         {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
3326         {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
3327         {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
3328         {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
3329         {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
3330         {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
3331         {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
3332         {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
3333         {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
3334         {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
3335         {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
3336         {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
3337         {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
3338         {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
3339         {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
3340         {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
3341         {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
3342         {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
3343         {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
3344         {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
3345         {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
3346         {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
3347         {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
3348         {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
3349         {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
3350         {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
3351         {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
3352         {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
3353         {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
3354         {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
3355         {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
3356         {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
3357         {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
3358         {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
3359         {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
3360         {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
3361         {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
3362         {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
3363         {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
3364         {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
3365         {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
3366         {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
3367         {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
3368         {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
3369         {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
3370         {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
3371         {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
3372         {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
3373         {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
3374         {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
3375         {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
3376         {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
3377         {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
3378         {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
3379         {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
3380         {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
3381         {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
3382         {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
3383         {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
3384         {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
3385         {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
3386         {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
3387         {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
3388         {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
3389         {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
3390         {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
3391         {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
3392         {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
3393         {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
3394         {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
3395         {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
3396         {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
3397         {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
3398         {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
3399         {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
3400         {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
3401         {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
3402         {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
3403         {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
3404         {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
3405         {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}
3406     };
3407
3408     double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
3409     UINT64 ix, iz, tmp;
3410     UINT32 top;
3411     int k, i;
3412
3413     ix = *(UINT64*)&x;
3414     top = ix >> 48;
3415     if (ix - 0x3fee000000000000ULL < 0x3090000000000ULL) {
3416         double rhi, rlo;
3417
3418         /* Handle close to 1.0 inputs separately. */
3419         /* Fix sign of zero with downward rounding when x==1. */
3420         if (ix == 0x3ff0000000000000ULL)
3421             return 0;
3422         r = x - 1.0;
3423         r2 = r * r;
3424         r3 = r * r2;
3425         y = r3 * (B[1] + r * B[2] + r2 * B[3] + r3 * (B[4] + r * B[5] + r2 * B[6] +
3426                     r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
3427         /* Worst-case error is around 0.507 ULP. */
3428         w = r * 0x1p27;
3429         rhi = r + w - w;
3430         rlo = r - rhi;
3431         w = rhi * rhi * B[0]; /* B[0] == -0.5. */
3432         hi = r + w;
3433         lo = r - hi + w;
3434         lo += B[0] * rlo * (rhi + r);
3435         y += lo;
3436         y += hi;
3437         return y;
3438     }
3439     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
3440         /* x < 0x1p-1022 or inf or nan. */
3441         if (ix * 2 == 0)
3442             return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x);
3443         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */
3444             return x;
3445         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
3446             return x;
3447         if (top & 0x8000)
3448             return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x));
3449         /* x is subnormal, normalize it. */
3450         x *= 0x1p52;
3451         ix = *(UINT64*)&x;
3452         ix -= 52ULL << 52;
3453     }
3454
3455     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3456        The range is split into N subintervals.
3457        The ith subinterval contains z and c is near its center. */
3458     tmp = ix - 0x3fe6000000000000ULL;
3459     i = (tmp >> (52 - 7)) % (1 << 7);
3460     k = (INT64)tmp >> 52; /* arithmetic shift */
3461     iz = ix - (tmp & 0xfffULL << 52);
3462     invc = T[i].invc;
3463     logc = T[i].logc;
3464     z = *(double*)&iz;
3465
3466     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
3467     /* r ~= z/c - 1, |r| < 1/(2*N). */
3468     r = (z - T2[i].chi - T2[i].clo) * invc;
3469     kd = (double)k;
3470
3471     /* hi + lo = r + log(c) + k*Ln2. */
3472     w = kd * Ln2hi + logc;
3473     hi = w + r;
3474     lo = w - hi + r + kd * Ln2lo;
3475
3476     /* log(x) = lo + (log1p(r) - r) + hi. */
3477     r2 = r * r; /* rounding error: 0x1p-54/N^2. */
3478     /* Worst case error if |y| > 0x1p-5:
3479        0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
3480        Worst case error if |y| > 0x1p-4:
3481        0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
3482     y = lo + r2 * A[0] +
3483         r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
3484     return y;
3485 }
3486
3487 /*********************************************************************
3488  *              log10 (MSVCRT.@)
3489  */
3490 double CDECL log10( double x )
3491 {
3492     static const double ivln10hi = 4.34294481878168880939e-01,
3493         ivln10lo = 2.50829467116452752298e-11,
3494         log10_2hi = 3.01029995663611771306e-01,
3495         log10_2lo = 3.69423907715893078616e-13,
3496         Lg1 = 6.666666666666735130e-01,
3497         Lg2 = 3.999999999940941908e-01,
3498         Lg3 = 2.857142874366239149e-01,
3499         Lg4 = 2.222219843214978396e-01,
3500         Lg5 = 1.818357216161805012e-01,
3501         Lg6 = 1.531383769920937332e-01,
3502         Lg7 = 1.479819860511658591e-01;
3503
3504     union {double f; UINT64 i;} u = {x};
3505     double hfsq, f, s, z, R, w, t1, t2, dk, y, hi, lo, val_hi, val_lo;
3506     UINT32 hx;
3507     int k;
3508
3509     hx = u.i >> 32;
3510     k = 0;
3511     if (hx < 0x00100000 || hx >> 31) {
3512         if (u.i << 1 == 0)
3513             return math_error(_SING, "log10", x, 0, -1 / (x * x));
3514         if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL)
3515             return x;
3516         if (hx >> 31)
3517             return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x));
3518         /* subnormal number, scale x up */
3519         k -= 54;
3520         x *= 0x1p54;
3521         u.f = x;
3522         hx = u.i >> 32;
3523     } else if (hx >= 0x7ff00000) {
3524         return x;
3525     } else if (hx == 0x3ff00000 && u.i<<32 == 0)
3526         return 0;
3527
3528     /* reduce x into [sqrt(2)/2, sqrt(2)] */
3529     hx += 0x3ff00000 - 0x3fe6a09e;
3530     k += (int)(hx >> 20) - 0x3ff;
3531     hx = (hx & 0x000fffff) + 0x3fe6a09e;
3532     u.i = (UINT64)hx << 32 | (u.i & 0xffffffff);
3533     x = u.f;
3534
3535     f = x - 1.0;
3536     hfsq = 0.5 * f * f;
3537     s = f / (2.0 + f);
3538     z = s * s;
3539     w = z * z;
3540     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
3541     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
3542     R = t2 + t1;
3543
3544     /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
3545     hi = f - hfsq;
3546     u.f = hi;
3547     u.i &= (UINT64)-1 << 32;
3548     hi = u.f;
3549     lo = f - hi - hfsq + s * (hfsq + R);
3550
3551     /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
3552     val_hi = hi * ivln10hi;
3553     dk = k;
3554     y = dk * log10_2hi;
3555     val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
3556
3557     /*
3558      * Extra precision in for adding y is not strictly needed
3559      * since there is no very large cancellation near x = sqrt(2) or
3560      * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
3561      * with some parallelism and it reduces the error for many args.
3562      */
3563     w = y + val_hi;
3564     val_lo += (y - w) + val_hi;
3565     val_hi = w;
3566
3567     return val_lo + val_hi;
3568 }
3569
3570 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
3571    additional 15 bits precision. IX is the bit representation of x, but
3572    normalized in the subnormal range using the sign bit for the exponent. */
3573 static double pow_log(UINT64 ix, double *tail)
3574 {
3575     static const struct {
3576         double invc, logc, logctail;
3577     } T[] = {
3578         {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48},
3579         {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46},
3580         {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45},
3581         {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49},
3582         {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47},
3583         {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46},
3584         {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50},
3585         {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45},
3586         {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45},
3587         {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45},
3588         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3589         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3590         {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46},
3591         {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46},
3592         {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46},
3593         {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45},
3594         {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47},
3595         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3596         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3597         {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47},
3598         {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45},
3599         {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46},
3600         {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45},
3601         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3602         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3603         {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46},
3604         {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52},
3605         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3606         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3607         {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45},
3608         {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45},
3609         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3610         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3611         {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46},
3612         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3613         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3614         {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45},
3615         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3616         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3617         {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48},
3618         {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45},
3619         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3620         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3621         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3622         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3623         {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45},
3624         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3625         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3626         {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46},
3627         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3628         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3629         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3630         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3631         {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45},
3632         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3633         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3634         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3635         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3636         {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46},
3637         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3638         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3639         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3640         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3641         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3642         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3643         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3644         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3645         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3646         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3647         {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45},
3648         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3649         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3650         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3651         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3652         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3653         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3654         {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46},
3655         {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45},
3656         {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45},
3657         {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47},
3658         {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45},
3659         {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46},
3660         {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46},
3661         {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47},
3662         {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45},
3663         {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45},
3664         {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45},
3665         {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49},
3666         {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45},
3667         {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46},
3668         {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45},
3669         {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45},
3670         {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45},
3671         {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45},
3672         {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45},
3673         {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47},
3674         {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51},
3675         {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45},
3676         {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45},
3677         {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46},
3678         {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45},
3679         {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46},
3680         {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47},
3681         {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47},
3682         {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45},
3683         {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47},
3684         {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45},
3685         {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48},
3686         {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45},
3687         {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51},
3688         {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51},
3689         {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46},
3690         {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48},
3691         {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45},
3692         {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45},
3693         {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45},
3694         {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45},
3695         {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47},
3696         {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45},
3697         {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45},
3698         {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46},
3699         {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46},
3700         {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47},
3701         {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45},
3702         {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45},
3703         {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45},
3704         {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46},
3705         {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47},
3706     };
3707     static const double A[] = {
3708         -0x1p-1,
3709         0x1.555555555556p-2 * -2,
3710         -0x1.0000000000006p-2 * -2,
3711         0x1.999999959554ep-3 * 4,
3712         -0x1.555555529a47ap-3 * 4,
3713         0x1.2495b9b4845e9p-3 * -8,
3714         -0x1.0002b8b263fc3p-3 * -8
3715     };
3716     static const double ln2hi = 0x1.62e42fefa3800p-1,
3717         ln2lo = 0x1.ef35793c76730p-45;
3718
3719     double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
3720     double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2;
3721     UINT64 iz, tmp;
3722     int k, i;
3723
3724     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3725        The range is split into N subintervals.
3726        The ith subinterval contains z and c is near its center. */
3727     tmp = ix - 0x3fe6955500000000ULL;
3728     i = (tmp >> (52 - 7)) % (1 << 7);
3729     k = (INT64)tmp >> 52; /* arithmetic shift */
3730     iz = ix - (tmp & 0xfffULL << 52);
3731     z = *(double*)&iz;
3732     kd = k;
3733
3734     /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
3735     invc = T[i].invc;
3736     logc = T[i].logc;
3737     logctail = T[i].logctail;
3738
3739     /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
3740      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
3741     /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
3742     iz = (iz + (1ULL << 31)) & (-1ULL << 32);
3743     zhi = *(double*)&iz;
3744     zlo = z - zhi;
3745     rhi = zhi * invc - 1.0;
3746     rlo = zlo * invc;
3747     r = rhi + rlo;
3748
3749     /* k*Ln2 + log(c) + r. */
3750     t1 = kd * ln2hi + logc;
3751     t2 = t1 + r;
3752     lo1 = kd * ln2lo + logctail;
3753     lo2 = t1 - t2 + r;
3754
3755     /* Evaluation is optimized assuming superscalar pipelined execution. */
3756     ar = A[0] * r; /* A[0] = -0.5. */
3757     ar2 = r * ar;
3758     ar3 = r * ar2;
3759     /* k*Ln2 + log(c) + r + A[0]*r*r. */
3760     arhi = A[0] * rhi;
3761     arhi2 = rhi * arhi;
3762     hi = t2 + arhi2;
3763     lo3 = rlo * (ar + arhi);
3764     lo4 = t2 - hi + arhi2;
3765     /* p = log1p(r) - r - A[0]*r*r. */
3766     p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
3767     lo = lo1 + lo2 + lo3 + lo4 + p;
3768     y = hi + lo;
3769     *tail = hi - y + lo;
3770     return y;
3771 }
3772
3773 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
3774    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
3775 static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias)
3776 {
3777     static const double C[] = {
3778         0x1.ffffffffffdbdp-2,
3779         0x1.555555555543cp-3,
3780         0x1.55555cf172b91p-5,
3781         0x1.1111167a4d017p-7
3782     };
3783     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
3784         negln2hiN = -0x1.62e42fefa0000p-8,
3785         negln2loN = -0x1.cf79abc9e3b3ap-47;
3786
3787     UINT32 abstop;
3788     UINT64 ki, idx, top, sbits;
3789     double kd, z, r, r2, scale, tail, tmp;
3790
3791     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
3792     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
3793         if (abstop - 0x3c9 >= 0x80000000) {
3794             /* Avoid spurious underflow for tiny x. */
3795             /* Note: 0 is common input. */
3796             double one = 1.0 + x;
3797             return sign_bias ? -one : one;
3798         }
3799         if (abstop >= 0x409) {
3800             /* Note: inf and nan are already handled. */
3801             if (*(UINT64*)&x >> 63)
3802                 return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN);
3803             return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX);
3804         }
3805         /* Large x is special cased below. */
3806         abstop = 0;
3807     }
3808
3809     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3810     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3811     z = invln2N * x;
3812     kd = __round(z);
3813     ki = kd;
3814     r = x + kd * negln2hiN + kd * negln2loN;
3815     /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
3816     r += xtail;
3817     /* 2^(k/N) ~= scale * (1 + tail). */
3818     idx = 2 * (ki % (1 << 7));
3819     top = (ki + sign_bias) << (52 - 7);
3820     tail = *(double*)&exp_T[idx];
3821     /* This is only a valid scale when -1023*N < k < 1024*N. */
3822     sbits = exp_T[idx + 1] + top;
3823     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3824     /* Evaluation is optimized assuming superscalar pipelined execution. */
3825     r2 = r * r;
3826     /* Without fma the worst case error is 0.25/N ulp larger. */
3827     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3828     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3829     if (abstop == 0) {
3830         /* Handle cases that may overflow or underflow when computing the result that
3831            is scale*(1+TMP) without intermediate rounding. The bit representation of
3832            scale is in SBITS, however it has a computed exponent that may have
3833            overflown into the sign bit so that needs to be adjusted before using it as
3834            a double. (int32_t)KI is the k used in the argument reduction and exponent
3835            adjustment of scale, positive k here means the result may overflow and
3836            negative k means the result may underflow. */
3837         double scale, y;
3838
3839         if ((ki & 0x80000000) == 0) {
3840             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3841             sbits -= 1009ull << 52;
3842             scale = *(double*)&sbits;
3843             y = 0x1p1009 * (scale + scale * tmp);
3844             if (isinf(y))
3845                 return math_error(_OVERFLOW, "pow", argx, argy, y);
3846             return y;
3847         }
3848         /* k < 0, need special care in the subnormal range. */
3849         sbits += 1022ull << 52;
3850         /* Note: sbits is signed scale. */
3851         scale = *(double*)&sbits;
3852         y = scale + scale * tmp;
3853         if (fabs(y) < 1.0) {
3854             /* Round y to the right precision before scaling it into the subnormal
3855                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3856                E is the worst-case ulp error outside the subnormal range. So this
3857                is only useful if the goal is better than 1 ulp worst-case error. */
3858             double hi, lo, one = 1.0;
3859             if (y < 0.0)
3860                 one = -1.0;
3861             lo = scale - y + scale * tmp;
3862             hi = one + y;
3863             lo = one - hi + y + lo;
3864             y = hi + lo - one;
3865             /* Fix the sign of 0. */
3866             if (y == 0.0) {
3867                 sbits &= 0x8000000000000000ULL;
3868                 y = *(double*)&sbits;
3869             }
3870             /* The underflow exception needs to be signaled explicitly. */
3871             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3872             y = 0x1p-1022 * y;
3873             return math_error(_UNDERFLOW, "pow", argx, argy, y);
3874         }
3875         y = 0x1p-1022 * y;
3876         return y;
3877     }
3878     scale = *(double*)&sbits;
3879     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3880        is no spurious underflow here even without fma. */
3881     return scale + scale * tmp;
3882 }
3883
3884 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
3885    the bit representation of a non-zero finite floating-point value. */
3886 static inline int pow_checkint(UINT64 iy)
3887 {
3888     int e = iy >> 52 & 0x7ff;
3889     if (e < 0x3ff)
3890         return 0;
3891     if (e > 0x3ff + 52)
3892         return 2;
3893     if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
3894         return 0;
3895     if (iy & (1ULL << (0x3ff + 52 - e)))
3896         return 1;
3897     return 2;
3898 }
3899
3900 /*********************************************************************
3901  *              pow (MSVCRT.@)
3902  *
3903  * Copied from musl: src/math/pow.c
3904  */
3905 double CDECL pow( double x, double y )
3906 {
3907     UINT32 sign_bias = 0;
3908     UINT64 ix, iy;
3909     UINT32 topx, topy;
3910     double lo, hi, ehi, elo, yhi, ylo, lhi, llo;
3911
3912     ix = *(UINT64*)&x;
3913     iy = *(UINT64*)&y;
3914     topx = ix >> 52;
3915     topy = iy >> 52;
3916     if (topx - 0x001 >= 0x7ff - 0x001 ||
3917             (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3918         /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
3919            and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
3920         /* Special cases: (x < 0x1p-126 or inf or nan) or
3921            (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
3922         if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3923             if (2 * iy == 0)
3924                 return 1.0;
3925             if (ix == 0x3ff0000000000000ULL)
3926                 return 1.0;
3927             if (2 * ix > 2 * 0x7ff0000000000000ULL ||
3928                     2 * iy > 2 * 0x7ff0000000000000ULL)
3929                 return x + y;
3930             if (2 * ix == 2 * 0x3ff0000000000000ULL)
3931                 return 1.0;
3932             if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63))
3933                 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
3934             return y * y;
3935         }
3936         if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3937             double x2 = x * x;
3938             if (ix >> 63 && pow_checkint(iy) == 1)
3939                 x2 = -x2;
3940             if (iy & 0x8000000000000000ULL && x2 == 0.0)
3941                 return math_error(_SING, "pow", x, y, 1 / x2);
3942             /* Without the barrier some versions of clang hoist the 1/x2 and
3943                thus division by zero exception can be signaled spuriously. */
3944             return iy >> 63 ? fp_barrier(1 / x2) : x2;
3945         }
3946         /* Here x and y are non-zero finite. */
3947         if (ix >> 63) {
3948             /* Finite x < 0. */
3949             int yint = pow_checkint(iy);
3950             if (yint == 0)
3951                 return math_error(_DOMAIN, "pow", x, y, 0 / (x - x));
3952             if (yint == 1)
3953                 sign_bias = 0x800 << 7;
3954             ix &= 0x7fffffffffffffff;
3955             topx &= 0x7ff;
3956         }
3957         if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3958             /* Note: sign_bias == 0 here because y is not odd. */
3959             if (ix == 0x3ff0000000000000ULL)
3960                 return 1.0;
3961             if ((topy & 0x7ff) < 0x3be) {
3962                 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
3963                 return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y;
3964             }
3965             if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800))
3966                 return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX);
3967             return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN);
3968         }
3969         if (topx == 0) {
3970             /* Normalize subnormal x so exponent becomes negative. */
3971             x *= 0x1p52;
3972             ix = *(UINT64*)&x;
3973             ix &= 0x7fffffffffffffff;
3974             ix -= 52ULL << 52;
3975         }
3976     }
3977
3978     hi = pow_log(ix, &lo);
3979     iy &= -1ULL << 27;
3980     yhi = *(double*)&iy;
3981     ylo = y - yhi;
3982     *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27;
3983     llo = fp_barrier(hi - lhi + lo);
3984     ehi = yhi * lhi;
3985     elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
3986     return pow_exp(x, y, ehi, elo, sign_bias);
3987 }
3988
3989 /*********************************************************************
3990  *              sin (MSVCRT.@)
3991  *
3992  * Copied from musl: src/math/sin.c
3993  */
3994 double CDECL sin( double x )
3995 {
3996     double y[2];
3997     UINT32 ix;
3998     unsigned n;
3999
4000     ix = *(ULONGLONG*)&x >> 32;
4001     ix &= 0x7fffffff;
4002
4003     /* |x| ~< pi/4 */
4004     if (ix <= 0x3fe921fb) {
4005         if (ix < 0x3e500000) { /* |x| < 2**-26 */
4006             /* raise inexact if x != 0 and underflow if subnormal*/
4007             fp_barrier(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
4008             return x;
4009         }
4010         return __sin(x, 0.0, 0);
4011     }
4012
4013     /* sin(Inf or NaN) is NaN */
4014     if (isinf(x))
4015         return math_error(_DOMAIN, "sin", x, 0, x - x);
4016     if (ix >= 0x7ff00000)
4017         return x - x;
4018
4019     /* argument reduction needed */
4020     n = __rem_pio2(x, y);
4021     switch (n&3) {
4022     case 0: return  __sin(y[0], y[1], 1);
4023     case 1: return  __cos(y[0], y[1]);
4024     case 2: return -__sin(y[0], y[1], 1);
4025     default: return -__cos(y[0], y[1]);
4026     }
4027 }
4028
4029 /*********************************************************************
4030  *              sinh (MSVCRT.@)
4031  */
4032 double CDECL sinh( double x )
4033 {
4034     UINT64 ux = *(UINT64*)&x;
4035     UINT32 w;
4036     double t, h, absx;
4037
4038     h = 0.5;
4039     if (ux >> 63)
4040         h = -h;
4041     /* |x| */
4042     ux &= (UINT64)-1 / 2;
4043     absx = *(double*)&ux;
4044     w = ux >> 32;
4045
4046     /* |x| < log(DBL_MAX) */
4047     if (w < 0x40862e42) {
4048         t = __expm1(absx);
4049         if (w < 0x3ff00000) {
4050             if (w < 0x3ff00000 - (26 << 20))
4051                 return x;
4052             return h * (2 * t - t * t / (t + 1));
4053         }
4054         return h * (t + t / (t + 1));
4055     }
4056
4057     /* |x| > log(DBL_MAX) or nan */
4058     /* note: the result is stored to handle overflow */
4059     t = __expo2(absx, 2 * h);
4060     return t;
4061 }
4062
4063 static BOOL sqrt_validate( double *x, BOOL update_sw )
4064 {
4065     short c = _dclass(*x);
4066
4067     if (c == FP_ZERO) return FALSE;
4068     if (c == FP_NAN)
4069     {
4070 #ifdef __i386__
4071         if (update_sw)
4072             *x = math_error(_DOMAIN, "sqrt", *x, 0, *x);
4073 #else
4074         /* set signaling bit */
4075         *(ULONGLONG*)x |= 0x8000000000000ULL;
4076 #endif
4077         return FALSE;
4078     }
4079     if (signbit(*x))
4080     {
4081         *x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan(update_sw));
4082         return FALSE;
4083     }
4084     if (c == FP_INFINITE) return FALSE;
4085     return TRUE;
4086 }
4087
4088 #if defined(__x86_64__) || defined(__i386__)
4089 double CDECL sse2_sqrt(double);
4090 __ASM_GLOBAL_FUNC( sse2_sqrt,
4091         "sqrtsd %xmm0, %xmm0\n\t"
4092         "ret" )
4093 #endif
4094
4095 #ifdef __i386__
4096 double CDECL x87_sqrt(double);
4097 __ASM_GLOBAL_FUNC( x87_sqrt,
4098         "fldl 4(%esp)\n\t"
4099         SET_X87_CW(0xc00)
4100         "fsqrt\n\t"
4101         RESET_X87_CW
4102         "ret" )
4103 #endif
4104
4105 /*********************************************************************
4106  *              sqrt (MSVCRT.@)
4107  *
4108  * Copied from musl: src/math/sqrt.c
4109  */
4110 double CDECL sqrt( double x )
4111 {
4112 #ifdef __x86_64__
4113     if (!sqrt_validate(&x, TRUE))
4114         return x;
4115
4116     return sse2_sqrt(x);
4117 #elif defined( __i386__ )
4118     if (!sqrt_validate(&x, TRUE))
4119         return x;
4120
4121     return x87_sqrt(x);
4122 #else
4123     static const double tiny = 1.0e-300;
4124
4125     double z;
4126     int sign = 0x80000000;
4127     int ix0,s0,q,m,t,i;
4128     unsigned int r,t1,s1,ix1,q1;
4129     ULONGLONG ix;
4130
4131     if (!sqrt_validate(&x, TRUE))
4132         return x;
4133
4134     ix = *(ULONGLONG*)&x;
4135     ix0 = ix >> 32;
4136     ix1 = ix;
4137
4138     /* normalize x */
4139     m = ix0 >> 20;
4140     if (m == 0) {  /* subnormal x */
4141         while (ix0 == 0) {
4142             m -= 21;
4143             ix0 |= (ix1 >> 11);
4144             ix1 <<= 21;
4145         }
4146         for (i=0; (ix0 & 0x00100000) == 0; i++)
4147             ix0 <<= 1;
4148         m -= i - 1;
4149         ix0 |= ix1 >> (32 - i);
4150         ix1 <<= i;
4151     }
4152     m -= 1023;    /* unbias exponent */
4153     ix0 = (ix0 & 0x000fffff) | 0x00100000;
4154     if (m & 1) {  /* odd m, double x to make it even */
4155         ix0 += ix0 + ((ix1 & sign) >> 31);
4156         ix1 += ix1;
4157     }
4158     m >>= 1;      /* m = [m/2] */
4159
4160     /* generate sqrt(x) bit by bit */
4161     ix0 += ix0 + ((ix1 & sign) >> 31);
4162     ix1 += ix1;
4163     q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
4164     r = 0x00200000;        /* r = moving bit from right to left */
4165
4166     while (r != 0) {
4167         t = s0 + r;
4168         if (t <= ix0) {
4169             s0   = t + r;
4170             ix0 -= t;
4171             q   += r;
4172         }
4173         ix0 += ix0 + ((ix1 & sign) >> 31);
4174         ix1 += ix1;
4175         r >>= 1;
4176     }
4177
4178     r = sign;
4179     while (r != 0) {
4180         t1 = s1 + r;
4181         t  = s0;
4182         if (t < ix0 || (t == ix0 && t1 <= ix1)) {
4183             s1 = t1 + r;
4184             if ((t1&sign) == sign && (s1 & sign) == 0)
4185                 s0++;
4186             ix0 -= t;
4187             if (ix1 < t1)
4188                 ix0--;
4189             ix1 -= t1;
4190             q1 += r;
4191         }
4192         ix0 += ix0 + ((ix1 & sign) >> 31);
4193         ix1 += ix1;
4194         r >>= 1;
4195     }
4196
4197     /* use floating add to find out rounding direction */
4198     if ((ix0 | ix1) != 0) {
4199         z = 1.0 - tiny; /* raise inexact flag */
4200         if (z >= 1.0) {
4201             z = 1.0 + tiny;
4202             if (q1 == (unsigned int)0xffffffff) {
4203                 q1 = 0;
4204                 q++;
4205             } else if (z > 1.0) {
4206                 if (q1 == (unsigned int)0xfffffffe)
4207                     q++;
4208                 q1 += 2;
4209             } else
4210                 q1 += q1 & 1;
4211         }
4212     }
4213     ix0 = (q >> 1) + 0x3fe00000;
4214     ix1 = q1 >> 1;
4215     if (q & 1)
4216         ix1 |= sign;
4217     ix = ix0 + ((unsigned int)m << 20);
4218     ix <<= 32;
4219     ix |= ix1;
4220     return *(double*)&ix;
4221 #endif
4222 }
4223
4224 /* Copied from musl: src/math/__tan.c */
4225 static double __tan(double x, double y, int odd)
4226 {
4227     static const double T[] = {
4228         3.33333333333334091986e-01,
4229         1.33333333333201242699e-01,
4230         5.39682539762260521377e-02,
4231         2.18694882948595424599e-02,
4232         8.86323982359930005737e-03,
4233         3.59207910759131235356e-03,
4234         1.45620945432529025516e-03,
4235         5.88041240820264096874e-04,
4236         2.46463134818469906812e-04,
4237         7.81794442939557092300e-05,
4238         7.14072491382608190305e-05,
4239         -1.85586374855275456654e-05,
4240         2.59073051863633712884e-05,
4241     };
4242     static const double pio4 = 7.85398163397448278999e-01;
4243     static const double pio4lo = 3.06161699786838301793e-17;
4244
4245     double z, r, v, w, s, a, w0, a0;
4246     UINT32 hx;
4247     int big, sign;
4248
4249     hx = *(ULONGLONG*)&x >> 32;
4250     big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
4251     if (big) {
4252         sign = hx >> 31;
4253         if (sign) {
4254             x = -x;
4255             y = -y;
4256         }
4257         x = (pio4 - x) + (pio4lo - y);
4258         y = 0.0;
4259     }
4260     z = x * x;
4261     w = z * z;
4262     r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
4263     v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
4264     s = z * x;
4265     r = y + z * (s * (r + v) + y) + s * T[0];
4266     w = x + r;
4267     if (big) {
4268         s = 1 - 2 * odd;
4269         v = s - 2.0 * (x + (r - w * w / (w + s)));
4270         return sign ? -v : v;
4271     }
4272     if (!odd)
4273         return w;
4274     /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
4275     w0 = w;
4276     *(LONGLONG*)&w0 = *(LONGLONG*)&w0 & 0xffffffff00000000ULL;
4277     v = r - (w0 - x);       /* w0+v = r+x */
4278     a0 = a = -1.0 / w;
4279     *(LONGLONG*)&a0 = *(LONGLONG*)&a0 & 0xffffffff00000000ULL;
4280     return a0 + a * (1.0 + a0 * w0 + a0 * v);
4281 }
4282
4283 /*********************************************************************
4284  *              tan (MSVCRT.@)
4285  *
4286  * Copied from musl: src/math/tan.c
4287  */
4288 double CDECL tan( double x )
4289 {
4290     double y[2];
4291     UINT32 ix;
4292     unsigned n;
4293
4294     ix = *(ULONGLONG*)&x >> 32;
4295     ix &= 0x7fffffff;
4296
4297     if (ix <= 0x3fe921fb) { /* |x| ~< pi/4 */
4298         if (ix < 0x3e400000) { /* |x| < 2**-27 */
4299             /* raise inexact if x!=0 and underflow if subnormal */
4300             fp_barrier(ix < 0x00100000 ? x / 0x1p120f : x + 0x1p120f);
4301             return x;
4302         }
4303         return __tan(x, 0.0, 0);
4304     }
4305
4306     if (isinf(x))
4307         return math_error(_DOMAIN, "tan", x, 0, x - x);
4308     if (ix >= 0x7ff00000)
4309         return x - x;
4310
4311     n = __rem_pio2(x, y);
4312     return __tan(y[0], y[1], n & 1);
4313 }
4314
4315 /*********************************************************************
4316  *              tanh (MSVCRT.@)
4317  */
4318 double CDECL tanh( double x )
4319 {
4320     UINT64 ui = *(UINT64*)&x;
4321     UINT32 w;
4322     int sign;
4323     double t;
4324
4325     /* x = |x| */
4326     sign = ui >> 63;
4327     ui &= (UINT64)-1 / 2;
4328     x = *(double*)&ui;
4329     w = ui >> 32;
4330
4331     if (w > 0x3fe193ea) {
4332         /* |x| > log(3)/2 ~= 0.5493 or nan */
4333         if (w > 0x40340000) {
4334 #if _MSVCR_VER < 140
4335             if (isnan(x))
4336                 return math_error(_DOMAIN, "tanh", x, 0, x);
4337 #endif
4338             /* |x| > 20 or nan */
4339             /* note: this branch avoids raising overflow */
4340             fp_barrier(x + 0x1p120f);
4341             t = 1 - 0 / x;
4342         } else {
4343             t = __expm1(2 * x);
4344             t = 1 - 2 / (t + 2);
4345         }
4346     } else if (w > 0x3fd058ae) {
4347         /* |x| > log(5/3)/2 ~= 0.2554 */
4348         t = __expm1(2 * x);
4349         t = t / (t + 2);
4350     } else if (w >= 0x00100000) {
4351         /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
4352         t = __expm1(-2 * x);
4353         t = -t / (t + 2);
4354     } else {
4355         /* |x| is subnormal */
4356         /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
4357         fp_barrier((float)x);
4358         t = x;
4359     }
4360     return sign ? -t : t;
4361 }
4362
4363
4364 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
4365
4366 #define CREATE_FPU_FUNC1(name, call) \
4367     __ASM_GLOBAL_FUNC(name, \
4368             "pushl   %ebp\n\t" \
4369             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4370             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4371             "movl    %esp, %ebp\n\t" \
4372             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4373             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4374             "fstpl   (%esp)\n\t"    /* store function argument */ \
4375             "fwait\n\t" \
4376             "movl    $1, %ecx\n\t"  /* empty FPU stack */ \
4377             "1:\n\t" \
4378             "fxam\n\t" \
4379             "fstsw   %ax\n\t" \
4380             "and     $0x4500, %ax\n\t" \
4381             "cmp     $0x4100, %ax\n\t" \
4382             "je      2f\n\t" \
4383             "fstpl    (%esp,%ecx,8)\n\t" \
4384             "fwait\n\t" \
4385             "incl    %ecx\n\t" \
4386             "jmp     1b\n\t" \
4387             "2:\n\t" \
4388             "movl    %ecx, -4(%ebp)\n\t" \
4389             "call    " __ASM_NAME( #call ) "\n\t" \
4390             "movl    -4(%ebp), %ecx\n\t" \
4391             "fstpl   (%esp)\n\t"    /* save result */ \
4392             "3:\n\t"                /* restore FPU stack */ \
4393             "decl    %ecx\n\t" \
4394             "fldl    (%esp,%ecx,8)\n\t" \
4395             "cmpl    $0, %ecx\n\t" \
4396             "jne     3b\n\t" \
4397             "leave\n\t" \
4398             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4399             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4400             "ret")
4401
4402 #define CREATE_FPU_FUNC2(name, call) \
4403     __ASM_GLOBAL_FUNC(name, \
4404             "pushl   %ebp\n\t" \
4405             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4406             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4407             "movl    %esp, %ebp\n\t" \
4408             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4409             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4410             "fstpl   8(%esp)\n\t"   /* store function argument */ \
4411             "fwait\n\t" \
4412             "fstpl   (%esp)\n\t" \
4413             "fwait\n\t" \
4414             "movl    $2, %ecx\n\t"  /* empty FPU stack */ \
4415             "1:\n\t" \
4416             "fxam\n\t" \
4417             "fstsw   %ax\n\t" \
4418             "and     $0x4500, %ax\n\t" \
4419             "cmp     $0x4100, %ax\n\t" \
4420             "je      2f\n\t" \
4421             "fstpl    (%esp,%ecx,8)\n\t" \
4422             "fwait\n\t" \
4423             "incl    %ecx\n\t" \
4424             "jmp     1b\n\t" \
4425             "2:\n\t" \
4426             "movl    %ecx, -4(%ebp)\n\t" \
4427             "call    " __ASM_NAME( #call ) "\n\t" \
4428             "movl    -4(%ebp), %ecx\n\t" \
4429             "fstpl   8(%esp)\n\t"   /* save result */ \
4430             "3:\n\t"                /* restore FPU stack */ \
4431             "decl    %ecx\n\t" \
4432             "fldl    (%esp,%ecx,8)\n\t" \
4433             "cmpl    $1, %ecx\n\t" \
4434             "jne     3b\n\t" \
4435             "leave\n\t" \
4436             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4437             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4438             "ret")
4439
4440 CREATE_FPU_FUNC1(_CIacos, acos)
4441 CREATE_FPU_FUNC1(_CIasin, asin)
4442 CREATE_FPU_FUNC1(_CIatan, atan)
4443 CREATE_FPU_FUNC2(_CIatan2, atan2)
4444 CREATE_FPU_FUNC1(_CIcos, cos)
4445 CREATE_FPU_FUNC1(_CIcosh, cosh)
4446 CREATE_FPU_FUNC1(_CIexp, exp)
4447 CREATE_FPU_FUNC2(_CIfmod, fmod)
4448 CREATE_FPU_FUNC1(_CIlog, log)
4449 CREATE_FPU_FUNC1(_CIlog10, log10)
4450 CREATE_FPU_FUNC2(_CIpow, pow)
4451 CREATE_FPU_FUNC1(_CIsin, sin)
4452 CREATE_FPU_FUNC1(_CIsinh, sinh)
4453 CREATE_FPU_FUNC1(_CIsqrt, sqrt)
4454 CREATE_FPU_FUNC1(_CItan, tan)
4455 CREATE_FPU_FUNC1(_CItanh, tanh)
4456
4457 __ASM_GLOBAL_FUNC(_ftol,
4458         "pushl   %ebp\n\t"
4459         __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
4460         __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
4461         "movl    %esp, %ebp\n\t"
4462         __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
4463         "subl    $12, %esp\n\t"     /* sizeof(LONGLONG) + 2*sizeof(WORD) */
4464         "fnstcw  (%esp)\n\t"
4465         "mov     (%esp), %ax\n\t"
4466         "or      $0xc00, %ax\n\t"
4467         "mov     %ax, 2(%esp)\n\t"
4468         "fldcw   2(%esp)\n\t"
4469         "fistpq  4(%esp)\n\t"
4470         "fldcw   (%esp)\n\t"
4471         "movl    4(%esp), %eax\n\t"
4472         "movl    8(%esp), %edx\n\t"
4473         "leave\n\t"
4474         __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
4475         __ASM_CFI(".cfi_same_value %ebp\n\t")
4476         "ret")
4477
4478 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */
4479
4480 /*********************************************************************
4481  *              _fpclass (MSVCRT.@)
4482  */
4483 int CDECL _fpclass(double num)
4484 {
4485     union { double f; UINT64 i; } u = { num };
4486     int e = u.i >> 52 & 0x7ff;
4487     int s = u.i >> 63;
4488
4489     switch (e)
4490     {
4491     case 0:
4492         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
4493         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
4494     case 0x7ff:
4495         if (u.i << 12) return ((u.i >> 51) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
4496         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
4497     default:
4498         return s ? _FPCLASS_NN : _FPCLASS_PN;
4499     }
4500 }
4501
4502 /*********************************************************************
4503  *              _rotl (MSVCRT.@)
4504  */
4505 unsigned int CDECL MSVCRT__rotl(unsigned int num, int shift)
4506 {
4507   shift &= 31;
4508   return (num << shift) | (num >> (32-shift));
4509 }
4510
4511 /*********************************************************************
4512  *              _lrotl (MSVCRT.@)
4513  */
4514 __msvcrt_ulong CDECL MSVCRT__lrotl(__msvcrt_ulong num, int shift)
4515 {
4516   shift &= 0x1f;
4517   return (num << shift) | (num >> (32-shift));
4518 }
4519
4520 /*********************************************************************
4521  *              _lrotr (MSVCRT.@)
4522  */
4523 __msvcrt_ulong CDECL MSVCRT__lrotr(__msvcrt_ulong num, int shift)
4524 {
4525   shift &= 0x1f;
4526   return (num >> shift) | (num << (32-shift));
4527 }
4528
4529 /*********************************************************************
4530  *              _rotr (MSVCRT.@)
4531  */
4532 unsigned int CDECL MSVCRT__rotr(unsigned int num, int shift)
4533 {
4534     shift &= 0x1f;
4535     return (num >> shift) | (num << (32-shift));
4536 }
4537
4538 /*********************************************************************
4539  *              _rotl64 (MSVCRT.@)
4540  */
4541 unsigned __int64 CDECL MSVCRT__rotl64(unsigned __int64 num, int shift)
4542 {
4543   shift &= 63;
4544   return (num << shift) | (num >> (64-shift));
4545 }
4546
4547 /*********************************************************************
4548  *              _rotr64 (MSVCRT.@)
4549  */
4550 unsigned __int64 CDECL MSVCRT__rotr64(unsigned __int64 num, int shift)
4551 {
4552     shift &= 63;
4553     return (num >> shift) | (num << (64-shift));
4554 }
4555
4556 /*********************************************************************
4557  *              abs (MSVCRT.@)
4558  */
4559 int CDECL abs( int n )
4560 {
4561     return n >= 0 ? n : -n;
4562 }
4563
4564 /*********************************************************************
4565  *              labs (MSVCRT.@)
4566  */
4567 __msvcrt_long CDECL labs( __msvcrt_long n )
4568 {
4569     return n >= 0 ? n : -n;
4570 }
4571
4572 #if _MSVCR_VER>=100
4573 /*********************************************************************
4574  *              llabs (MSVCR100.@)
4575  */
4576 __int64 CDECL llabs( __int64 n )
4577 {
4578     return n >= 0 ? n : -n;
4579 }
4580 #endif
4581
4582 #if _MSVCR_VER>=120
4583 /*********************************************************************
4584  *              imaxabs (MSVCR120.@)
4585  */
4586 intmax_t CDECL imaxabs( intmax_t n )
4587 {
4588     return n >= 0 ? n : -n;
4589 }
4590 #endif
4591
4592 /*********************************************************************
4593  *              _abs64 (MSVCRT.@)
4594  */
4595 __int64 CDECL _abs64( __int64 n )
4596 {
4597     return n >= 0 ? n : -n;
4598 }
4599
4600 /* Copied from musl: src/math/ilogb.c */
4601 static int __ilogb(double x)
4602 {
4603     union { double f; UINT64 i; } u = { x };
4604     int e = u.i >> 52 & 0x7ff;
4605
4606     if (!e)
4607     {
4608         u.i <<= 12;
4609         if (u.i == 0) return FP_ILOGB0;
4610         /* subnormal x */
4611         for (e = -0x3ff; u.i >> 63 == 0; e--, u.i <<= 1);
4612         return e;
4613     }
4614     if (e == 0x7ff) return u.i << 12 ? FP_ILOGBNAN : INT_MAX;
4615     return e - 0x3ff;
4616 }
4617
4618 /*********************************************************************
4619  *              _logb (MSVCRT.@)
4620  *
4621  * Copied from musl: src/math/logb.c
4622  */
4623 double CDECL _logb(double x)
4624 {
4625     if (!isfinite(x))
4626         return x * x;
4627     if (x == 0)
4628         return math_error(_SING, "_logb", x, 0, -1 / (x * x));
4629     return __ilogb(x);
4630 }
4631
4632 static void sq(double *hi, double *lo, double x)
4633 {
4634     double xh, xl, xc;
4635
4636     xc = x * (0x1p27 + 1);
4637     xh = x - xc + xc;
4638     xl = x - xh;
4639     *hi = x * x;
4640     *lo = xh * xh - *hi + 2 * xh * xl + xl * xl;
4641 }
4642
4643 /*********************************************************************
4644  *              _hypot (MSVCRT.@)
4645  *
4646  * Copied from musl: src/math/hypot.c
4647  */
4648 double CDECL _hypot(double x, double y)
4649 {
4650     UINT64 ux = *(UINT64*)&x, uy = *(UINT64*)&y, ut;
4651     double hx, lx, hy, ly, z;
4652     int ex, ey;
4653
4654     /* arrange |x| >= |y| */
4655     ux &= -1ULL >> 1;
4656     uy &= -1ULL >> 1;
4657     if (ux < uy) {
4658         ut = ux;
4659         ux = uy;
4660         uy = ut;
4661     }
4662
4663     /* special cases */
4664     ex = ux >> 52;
4665     ey = uy >> 52;
4666     x = *(double*)&ux;
4667     y = *(double*)&uy;
4668     /* note: hypot(inf,nan) == inf */
4669     if (ey == 0x7ff)
4670         return y;
4671     if (ex == 0x7ff || uy == 0)
4672         return x;
4673     /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
4674     /* 64 difference is enough for ld80 double_t */
4675     if (ex - ey > 64)
4676         return x + y;
4677
4678     /* precise sqrt argument in nearest rounding mode without overflow */
4679     /* xh*xh must not overflow and xl*xl must not underflow in sq */
4680     z = 1;
4681     if (ex > 0x3ff + 510) {
4682         z = 0x1p700;
4683         x *= 0x1p-700;
4684         y *= 0x1p-700;
4685     } else if (ey < 0x3ff - 450) {
4686         z = 0x1p-700;
4687         x *= 0x1p700;
4688         y *= 0x1p700;
4689     }
4690     sq(&hx, &lx, x);
4691     sq(&hy, &ly, y);
4692     return z * sqrt(ly + lx + hy + hx);
4693 }
4694
4695 /*********************************************************************
4696  *      _hypotf (MSVCRT.@)
4697  *
4698  * Copied from musl: src/math/hypotf.c
4699  */
4700 float CDECL _hypotf(float x, float y)
4701 {
4702     UINT32 ux = *(UINT32*)&x, uy = *(UINT32*)&y, ut;
4703     float z;
4704
4705     ux &= -1U >> 1;
4706     uy &= -1U >> 1;
4707     if (ux < uy) {
4708         ut = ux;
4709         ux = uy;
4710         uy = ut;
4711     }
4712
4713     x = *(float*)&ux;
4714     y = *(float*)&uy;
4715     if (uy == 0xff << 23)
4716         return y;
4717     if (ux >= 0xff << 23 || uy == 0 || ux - uy >= 25 << 23)
4718         return x + y;
4719
4720     z = 1;
4721     if (ux >= (0x7f + 60) << 23) {
4722         z = 0x1p90f;
4723         x *= 0x1p-90f;
4724         y *= 0x1p-90f;
4725     } else if (uy < (0x7f - 60) << 23) {
4726         z = 0x1p-90f;
4727         x *= 0x1p90f;
4728         y *= 0x1p90f;
4729     }
4730     return z * sqrtf((double)x * x + (double)y * y);
4731 }
4732
4733 /*********************************************************************
4734  *              ceil (MSVCRT.@)
4735  *
4736  * Based on musl: src/math/ceilf.c
4737  */
4738 double CDECL ceil( double x )
4739 {
4740     union {double f; UINT64 i;} u = {x};
4741     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
4742     UINT64 m;
4743
4744     if (e >= 52)
4745         return x;
4746     if (e >= 0) {
4747         m = 0x000fffffffffffffULL >> e;
4748         if ((u.i & m) == 0)
4749             return x;
4750         if (u.i >> 63 == 0)
4751             u.i += m;
4752         u.i &= ~m;
4753     } else {
4754         if (u.i >> 63)
4755             return -0.0;
4756         else if (u.i << 1)
4757             return 1.0;
4758     }
4759     return u.f;
4760 }
4761
4762 /*********************************************************************
4763  *              floor (MSVCRT.@)
4764  *
4765  * Based on musl: src/math/floorf.c
4766  */
4767 double CDECL floor( double x )
4768 {
4769     union {double f; UINT64 i;} u = {x};
4770     int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff;
4771     UINT64 m;
4772
4773     if (e >= 52)
4774         return x;
4775     if (e >= 0) {
4776         m = 0x000fffffffffffffULL >> e;
4777         if ((u.i & m) == 0)
4778             return x;
4779         if (u.i >> 63)
4780             u.i += m;
4781         u.i &= ~m;
4782     } else {
4783         if (u.i >> 63 == 0)
4784             return 0;
4785         else if (u.i << 1)
4786             return -1;
4787     }
4788     return u.f;
4789 }
4790
4791 /*********************************************************************
4792  *      fma (MSVCRT.@)
4793  *
4794  * Copied from musl: src/math/fma.c
4795  */
4796 struct fma_num
4797 {
4798     UINT64 m;
4799     int e;
4800     int sign;
4801 };
4802
4803 static struct fma_num normalize(double x)
4804 {
4805     UINT64 ix = *(UINT64*)&x;
4806     int e = ix >> 52;
4807     int sign = e & 0x800;
4808     struct fma_num ret;
4809
4810     e &= 0x7ff;
4811     if (!e) {
4812         x *= 0x1p63;
4813         ix = *(UINT64*)&x;
4814         e = ix >> 52 & 0x7ff;
4815         e = e ? e - 63 : 0x800;
4816     }
4817     ix &= (1ull << 52) - 1;
4818     ix |= 1ull << 52;
4819     ix <<= 1;
4820     e -= 0x3ff + 52 + 1;
4821
4822     ret.m = ix;
4823     ret.e = e;
4824     ret.sign = sign;
4825     return ret;
4826 }
4827
4828 static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y)
4829 {
4830     UINT64 t1, t2, t3;
4831     UINT64 xlo = (UINT32)x, xhi = x >> 32;
4832     UINT64 ylo = (UINT32)y, yhi = y >> 32;
4833
4834     t1 = xlo * ylo;
4835     t2 = xlo * yhi + xhi * ylo;
4836     t3 = xhi * yhi;
4837     *lo = t1 + (t2 << 32);
4838     *hi = t3 + (t2 >> 32) + (t1 > *lo);
4839 }
4840
4841 double CDECL fma( double x, double y, double z )
4842 {
4843     int e, d, sign, samesign, nonzero;
4844     UINT64 rhi, rlo, zhi, zlo;
4845     struct fma_num nx, ny, nz;
4846     double r;
4847     INT64 i;
4848
4849     /* normalize so top 10bits and last bit are 0 */
4850     nx = normalize(x);
4851     ny = normalize(y);
4852     nz = normalize(z);
4853
4854     if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) {
4855         r = x * y + z;
4856         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM;
4857         return r;
4858     }
4859     if (nz.e >= 0x7ff - 0x3ff - 52 - 1) {
4860         if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */
4861             r = x * y + z;
4862             if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM;
4863             return r;
4864         }
4865         return z;
4866     }
4867
4868     /* mul: r = x*y */
4869     mul(&rhi, &rlo, nx.m, ny.m);
4870     /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
4871
4872     /* align exponents */
4873     e = nx.e + ny.e;
4874     d = nz.e - e;
4875     /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
4876     if (d > 0) {
4877         if (d < 64) {
4878             zlo = nz.m << d;
4879             zhi = nz.m >> (64 - d);
4880         } else {
4881             zlo = 0;
4882             zhi = nz.m;
4883             e = nz.e - 64;
4884             d -= 64;
4885             if (d < 64 && d) {
4886                 rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d));
4887                 rhi = rhi >> d;
4888             } else if (d) {
4889                 rlo = 1;
4890                 rhi = 0;
4891             }
4892         }
4893     } else {
4894         zhi = 0;
4895         d = -d;
4896         if (d == 0) {
4897             zlo = nz.m;
4898         } else if (d < 64) {
4899             zlo = nz.m >> d | !!(nz.m << (64 - d));
4900         } else {
4901             zlo = 1;
4902         }
4903     }
4904
4905     /* add */
4906     sign = nx.sign ^ ny.sign;
4907     samesign = !(sign ^ nz.sign);
4908     nonzero = 1;
4909     if (samesign) {
4910         /* r += z */
4911         rlo += zlo;
4912         rhi += zhi + (rlo < zlo);
4913     } else {
4914         /* r -= z */
4915         UINT64 t = rlo;
4916         rlo -= zlo;
4917         rhi = rhi - zhi - (t < rlo);
4918         if (rhi >> 63) {
4919             rlo = -rlo;
4920             rhi = -rhi - !!rlo;
4921             sign = !sign;
4922         }
4923         nonzero = !!rhi;
4924     }
4925
4926     /* set rhi to top 63bit of the result (last bit is sticky) */
4927     if (nonzero) {
4928         e += 64;
4929         if (rhi >> 32) {
4930             BitScanReverse((DWORD*)&d, rhi >> 32);
4931             d = 31 - d - 1;
4932         } else {
4933             BitScanReverse((DWORD*)&d, rhi);
4934             d = 63 - d - 1;
4935         }
4936         /* note: d > 0 */
4937         rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d);
4938     } else if (rlo) {
4939         if (rlo >> 32) {
4940             BitScanReverse((DWORD*)&d, rlo >> 32);
4941             d = 31 - d - 1;
4942         } else {
4943             BitScanReverse((DWORD*)&d, rlo);
4944             d = 63 - d - 1;
4945         }
4946         if (d < 0)
4947             rhi = rlo >> 1 | (rlo & 1);
4948         else
4949             rhi = rlo << d;
4950     } else {
4951         /* exact +-0 */
4952         return x * y + z;
4953     }
4954     e -= d;
4955
4956     /* convert to double */
4957     i = rhi; /* i is in [1<<62,(1<<63)-1] */
4958     if (sign)
4959         i = -i;
4960     r = i; /* |r| is in [0x1p62,0x1p63] */
4961
4962     if (e < -1022 - 62) {
4963         /* result is subnormal before rounding */
4964         if (e == -1022 - 63) {
4965             double c = 0x1p63;
4966             if (sign)
4967                 c = -c;
4968             if (r == c) {
4969                 /* min normal after rounding, underflow depends
4970                    on arch behaviour which can be imitated by
4971                    a double to float conversion */
4972                 float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r;
4973                 return DBL_MIN / FLT_MIN * fltmin;
4974             }
4975             /* one bit is lost when scaled, add another top bit to
4976                only round once at conversion if it is inexact */
4977             if (rhi << 53) {
4978                 double tiny;
4979
4980                 i = rhi >> 1 | (rhi & 1) | 1ull << 62;
4981                 if (sign)
4982                     i = -i;
4983                 r = i;
4984                 r = 2 * r - c; /* remove top bit */
4985
4986                 /* raise underflow portably, such that it
4987                    cannot be optimized away */
4988                 tiny = DBL_MIN / FLT_MIN * r;
4989                 r += (double)(tiny * tiny) * (r - r);
4990             }
4991         } else {
4992             /* only round once when scaled */
4993             d = 10;
4994             i = (rhi >> d | !!(rhi << (64 - d))) << d;
4995             if (sign)
4996                 i = -i;
4997             r = i;
4998         }
4999     }
5000     return __scalbn(r, e);
5001 }
5002
5003 /*********************************************************************
5004  *      fmaf (MSVCRT.@)
5005  *
5006  * Copied from musl: src/math/fmaf.c
5007  */
5008 float CDECL fmaf( float x, float y, float z )
5009 {
5010     union { double f; UINT64 i; } u;
5011     double xy, adjust;
5012     int e;
5013
5014     xy = (double)x * y;
5015     u.f = xy + z;
5016     e = u.i>>52 & 0x7ff;
5017     /* Common case: The double precision result is fine. */
5018     if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
5019             e == 0x7ff || /* NaN */
5020             (u.f - xy == z && u.f - z == xy) || /* exact */
5021             (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */
5022     {
5023         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM;
5024
5025         /* underflow may not be raised correctly, example:
5026            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */
5027         if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT)
5028             fp_barrierf((float)u.f * (float)u.f);
5029         return u.f;
5030     }
5031
5032     /*
5033      * If result is inexact, and exactly halfway between two float values,
5034      * we need to adjust the low-order bit in the direction of the error.
5035      */
5036     _controlfp(_RC_CHOP, _MCW_RC);
5037     adjust = fp_barrier(xy + z);
5038     _controlfp(_RC_NEAR, _MCW_RC);
5039     if (u.f == adjust)
5040         u.i++;
5041     return u.f;
5042 }
5043
5044 /*********************************************************************
5045  *              fabs (MSVCRT.@)
5046  *
5047  * Copied from musl: src/math/fabsf.c
5048  */
5049 double CDECL fabs( double x )
5050 {
5051     union { double f; UINT64 i; } u = { x };
5052     u.i &= ~0ull >> 1;
5053     return u.f;
5054 }
5055
5056 /*********************************************************************
5057  *              frexp (MSVCRT.@)
5058  *
5059  * Copied from musl: src/math/frexp.c
5060  */
5061 double CDECL frexp( double x, int *e )
5062 {
5063     UINT64 ux = *(UINT64*)&x;
5064     int ee = ux >> 52 & 0x7ff;
5065
5066     if (!ee) {
5067         if (x) {
5068             x = frexp(x * 0x1p64, e);
5069             *e -= 64;
5070         } else *e = 0;
5071         return x;
5072     } else if (ee == 0x7ff) {
5073         return x;
5074     }
5075
5076     *e = ee - 0x3fe;
5077     ux &= 0x800fffffffffffffull;
5078     ux |= 0x3fe0000000000000ull;
5079     return *(double*)&ux;
5080 }
5081
5082 /*********************************************************************
5083  *              modf (MSVCRT.@)
5084  *
5085  * Copied from musl: src/math/modf.c
5086  */
5087 double CDECL modf( double x, double *iptr )
5088 {
5089     union {double f; UINT64 i;} u = {x};
5090     UINT64 mask;
5091     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
5092
5093     /* no fractional part */
5094     if (e >= 52) {
5095         *iptr = x;
5096         if (e == 0x400 && u.i << 12 != 0) /* nan */
5097             return x;
5098         u.i &= 1ULL << 63;
5099         return u.f;
5100     }
5101
5102     /* no integral part*/
5103     if (e < 0) {
5104         u.i &= 1ULL << 63;
5105         *iptr = u.f;
5106         return x;
5107     }
5108
5109     mask = -1ULL >> 12 >> e;
5110     if ((u.i & mask) == 0) {
5111         *iptr = x;
5112         u.i &= 1ULL << 63;
5113         return u.f;
5114     }
5115     u.i &= ~mask;
5116     *iptr = u.f;
5117     return x - u.f;
5118 }
5119
5120 /**********************************************************************
5121  *              _statusfp2 (MSVCRT.@)
5122  *
5123  * Not exported by native msvcrt, added in msvcr80.
5124  */
5125 #if defined(__i386__) || defined(__x86_64__)
5126 void CDECL _statusfp2( unsigned int *x86_sw, unsigned int *sse2_sw )
5127 {
5128 #if defined(__GNUC__) || defined(__clang__)
5129     unsigned int flags;
5130     unsigned long fpword;
5131
5132     if (x86_sw)
5133     {
5134         __asm__ __volatile__( "fstsw %0" : "=m" (fpword) );
5135         flags = 0;
5136         if (fpword & 0x1)  flags |= _SW_INVALID;
5137         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5138         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5139         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5140         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5141         if (fpword & 0x20) flags |= _SW_INEXACT;
5142         *x86_sw = flags;
5143     }
5144
5145     if (!sse2_sw) return;
5146
5147     if (sse2_supported)
5148     {
5149         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5150         flags = 0;
5151         if (fpword & 0x1)  flags |= _SW_INVALID;
5152         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5153         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5154         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5155         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5156         if (fpword & 0x20) flags |= _SW_INEXACT;
5157         *sse2_sw = flags;
5158     }
5159     else *sse2_sw = 0;
5160 #else
5161     FIXME( "not implemented\n" );
5162 #endif
5163 }
5164 #endif
5165
5166 /**********************************************************************
5167  *              _statusfp (MSVCRT.@)
5168  */
5169 unsigned int CDECL _statusfp(void)
5170 {
5171     unsigned int flags = 0;
5172 #if defined(__i386__) || defined(__x86_64__)
5173     unsigned int x86_sw, sse2_sw;
5174
5175     _statusfp2( &x86_sw, &sse2_sw );
5176     /* FIXME: there's no definition for ambiguous status, just return all status bits for now */
5177     flags = x86_sw | sse2_sw;
5178 #elif defined(__aarch64__)
5179     ULONG_PTR fpsr;
5180
5181     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5182     if (fpsr & 0x1)  flags |= _SW_INVALID;
5183     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5184     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5185     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5186     if (fpsr & 0x10) flags |= _SW_INEXACT;
5187     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5188 #else
5189     FIXME( "not implemented\n" );
5190 #endif
5191     return flags;
5192 }
5193
5194 /*********************************************************************
5195  *              _clearfp (MSVCRT.@)
5196  */
5197 unsigned int CDECL _clearfp(void)
5198 {
5199     unsigned int flags = 0;
5200 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5201     unsigned long fpword;
5202
5203     __asm__ __volatile__( "fnstsw %0; fnclex" : "=m" (fpword) );
5204     if (fpword & 0x1)  flags |= _SW_INVALID;
5205     if (fpword & 0x2)  flags |= _SW_DENORMAL;
5206     if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5207     if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5208     if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5209     if (fpword & 0x20) flags |= _SW_INEXACT;
5210
5211     if (sse2_supported)
5212     {
5213         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5214         if (fpword & 0x1)  flags |= _SW_INVALID;
5215         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5216         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5217         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5218         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5219         if (fpword & 0x20) flags |= _SW_INEXACT;
5220         fpword &= ~0x3f;
5221         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5222     }
5223 #elif defined(__aarch64__)
5224     ULONG_PTR fpsr;
5225
5226     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5227     if (fpsr & 0x1)  flags |= _SW_INVALID;
5228     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5229     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5230     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5231     if (fpsr & 0x10) flags |= _SW_INEXACT;
5232     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5233     fpsr &= ~0x9f;
5234     __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
5235 #else
5236     FIXME( "not implemented\n" );
5237 #endif
5238     return flags;
5239 }
5240
5241 /*********************************************************************
5242  *              __fpecode (MSVCRT.@)
5243  */
5244 int * CDECL __fpecode(void)
5245 {
5246     return &msvcrt_get_thread_data()->fpecode;
5247 }
5248
5249 /*********************************************************************
5250  *              ldexp (MSVCRT.@)
5251  */
5252 double CDECL ldexp(double num, int exp)
5253 {
5254   double z = __scalbn(num, exp);
5255
5256   if (isfinite(num) && !isfinite(z))
5257     return math_error(_OVERFLOW, "ldexp", num, exp, z);
5258   if (num && isfinite(num) && !z)
5259     return math_error(_UNDERFLOW, "ldexp", num, exp, z);
5260   return z;
5261 }
5262
5263 /*********************************************************************
5264  *              _cabs (MSVCRT.@)
5265  */
5266 double CDECL _cabs(struct _complex num)
5267 {
5268   return sqrt(num.x * num.x + num.y * num.y);
5269 }
5270
5271 /*********************************************************************
5272  *              _chgsign (MSVCRT.@)
5273  */
5274 double CDECL _chgsign(double num)
5275 {
5276     union { double f; UINT64 i; } u = { num };
5277     u.i ^= 1ull << 63;
5278     return u.f;
5279 }
5280
5281 /*********************************************************************
5282  *              __control87_2 (MSVCR80.@)
5283  *
5284  * Not exported by native msvcrt, added in msvcr80.
5285  */
5286 #ifdef __i386__
5287 int CDECL __control87_2( unsigned int newval, unsigned int mask,
5288                          unsigned int *x86_cw, unsigned int *sse2_cw )
5289 {
5290 #if defined(__GNUC__) || defined(__clang__)
5291     unsigned long fpword;
5292     unsigned int flags;
5293     unsigned int old_flags;
5294
5295     if (x86_cw)
5296     {
5297         __asm__ __volatile__( "fstcw %0" : "=m" (fpword) );
5298
5299         /* Convert into mask constants */
5300         flags = 0;
5301         if (fpword & 0x1)  flags |= _EM_INVALID;
5302         if (fpword & 0x2)  flags |= _EM_DENORMAL;
5303         if (fpword & 0x4)  flags |= _EM_ZERODIVIDE;
5304         if (fpword & 0x8)  flags |= _EM_OVERFLOW;
5305         if (fpword & 0x10) flags |= _EM_UNDERFLOW;
5306         if (fpword & 0x20) flags |= _EM_INEXACT;
5307         switch (fpword & 0xc00)
5308         {
5309         case 0xc00: flags |= _RC_UP|_RC_DOWN; break;
5310         case 0x800: flags |= _RC_UP; break;
5311         case 0x400: flags |= _RC_DOWN; break;
5312         }
5313         switch (fpword & 0x300)
5314         {
5315         case 0x0:   flags |= _PC_24; break;
5316         case 0x200: flags |= _PC_53; break;
5317         case 0x300: flags |= _PC_64; break;
5318         }
5319         if (fpword & 0x1000) flags |= _IC_AFFINE;
5320
5321         TRACE( "x86 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask );
5322         if (mask)
5323         {
5324             flags = (flags & ~mask) | (newval & mask);
5325
5326             /* Convert (masked) value back to fp word */
5327             fpword = 0;
5328             if (flags & _EM_INVALID)    fpword |= 0x1;
5329             if (flags & _EM_DENORMAL)   fpword |= 0x2;
5330             if (flags & _EM_ZERODIVIDE) fpword |= 0x4;
5331             if (flags & _EM_OVERFLOW)   fpword |= 0x8;
5332             if (flags & _EM_UNDERFLOW)  fpword |= 0x10;
5333             if (flags & _EM_INEXACT)    fpword |= 0x20;
5334             switch (flags & _MCW_RC)
5335             {
5336             case _RC_UP|_RC_DOWN:   fpword |= 0xc00; break;
5337             case _RC_UP:            fpword |= 0x800; break;
5338             case _RC_DOWN:          fpword |= 0x400; break;
5339             }
5340             switch (flags & _MCW_PC)
5341             {
5342             case _PC_64: fpword |= 0x300; break;
5343             case _PC_53: fpword |= 0x200; break;
5344             case _PC_24: fpword |= 0x0; break;
5345             }
5346             if (flags & _IC_AFFINE) fpword |= 0x1000;
5347
5348             __asm__ __volatile__( "fldcw %0" : : "m" (fpword) );
5349         }
5350         *x86_cw = flags;
5351     }
5352
5353     if (!sse2_cw) return 1;
5354
5355     if (sse2_supported)
5356     {
5357         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5358
5359         /* Convert into mask constants */
5360         flags = 0;
5361         if (fpword & 0x80)   flags |= _EM_INVALID;
5362         if (fpword & 0x100)  flags |= _EM_DENORMAL;
5363         if (fpword & 0x200)  flags |= _EM_ZERODIVIDE;
5364         if (fpword & 0x400)  flags |= _EM_OVERFLOW;
5365         if (fpword & 0x800)  flags |= _EM_UNDERFLOW;
5366         if (fpword & 0x1000) flags |= _EM_INEXACT;
5367         switch (fpword & 0x6000)
5368         {
5369         case 0x6000: flags |= _RC_UP|_RC_DOWN; break;
5370         case 0x4000: flags |= _RC_UP; break;
5371         case 0x2000: flags |= _RC_DOWN; break;
5372         }
5373         switch (fpword & 0x8040)
5374         {
5375         case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5376         case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5377         case 0x8040: flags |= _DN_FLUSH; break;
5378         }
5379
5380         TRACE( "sse2 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask );
5381         if (mask)
5382         {
5383             old_flags = flags;
5384             mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5385             flags = (flags & ~mask) | (newval & mask);
5386
5387             if (flags != old_flags)
5388             {
5389                 /* Convert (masked) value back to fp word */
5390                 fpword = 0;
5391                 if (flags & _EM_INVALID)    fpword |= 0x80;
5392                 if (flags & _EM_DENORMAL)   fpword |= 0x100;
5393                 if (flags & _EM_ZERODIVIDE) fpword |= 0x200;
5394                 if (flags & _EM_OVERFLOW)   fpword |= 0x400;
5395                 if (flags & _EM_UNDERFLOW)  fpword |= 0x800;
5396                 if (flags & _EM_INEXACT)    fpword |= 0x1000;
5397                 switch (flags & _MCW_RC)
5398                 {
5399                 case _RC_UP|_RC_DOWN:   fpword |= 0x6000; break;
5400                 case _RC_UP:            fpword |= 0x4000; break;
5401                 case _RC_DOWN:          fpword |= 0x2000; break;
5402                 }
5403                 switch (flags & _MCW_DN)
5404                 {
5405                 case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5406                 case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5407                 case _DN_FLUSH:                       fpword |= 0x8040; break;
5408                 }
5409                 __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5410             }
5411         }
5412         *sse2_cw = flags;
5413     }
5414     else *sse2_cw = 0;
5415
5416     return 1;
5417 #else
5418     FIXME( "not implemented\n" );
5419     return 0;
5420 #endif
5421 }
5422 #endif
5423
5424 /*********************************************************************
5425  *              _control87 (MSVCRT.@)
5426  */
5427 unsigned int CDECL _control87(unsigned int newval, unsigned int mask)
5428 {
5429     unsigned int flags = 0;
5430 #ifdef __i386__
5431     unsigned int sse2_cw;
5432
5433     __control87_2( newval, mask, &flags, &sse2_cw );
5434
5435     if ((flags ^ sse2_cw) & (_MCW_EM | _MCW_RC)) flags |= _EM_AMBIGUOUS;
5436     flags |= sse2_cw;
5437 #elif defined(__x86_64__)
5438     unsigned long fpword;
5439     unsigned int old_flags;
5440
5441     __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5442     if (fpword & 0x80)   flags |= _EM_INVALID;
5443     if (fpword & 0x100)  flags |= _EM_DENORMAL;
5444     if (fpword & 0x200)  flags |= _EM_ZERODIVIDE;
5445     if (fpword & 0x400)  flags |= _EM_OVERFLOW;
5446     if (fpword & 0x800)  flags |= _EM_UNDERFLOW;
5447     if (fpword & 0x1000) flags |= _EM_INEXACT;
5448     switch (fpword & 0x6000)
5449     {
5450     case 0x6000: flags |= _RC_CHOP; break;
5451     case 0x4000: flags |= _RC_UP; break;
5452     case 0x2000: flags |= _RC_DOWN; break;
5453     }
5454     switch (fpword & 0x8040)
5455     {
5456     case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5457     case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5458     case 0x8040: flags |= _DN_FLUSH; break;
5459     }
5460     old_flags = flags;
5461     mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5462     flags = (flags & ~mask) | (newval & mask);
5463     if (flags != old_flags)
5464     {
5465         fpword = 0;
5466         if (flags & _EM_INVALID)    fpword |= 0x80;
5467         if (flags & _EM_DENORMAL)   fpword |= 0x100;
5468         if (flags & _EM_ZERODIVIDE) fpword |= 0x200;
5469         if (flags & _EM_OVERFLOW)   fpword |= 0x400;
5470         if (flags & _EM_UNDERFLOW)  fpword |= 0x800;
5471         if (flags & _EM_INEXACT)    fpword |= 0x1000;
5472         switch (flags & _MCW_RC)
5473         {
5474         case _RC_CHOP: fpword |= 0x6000; break;
5475         case _RC_UP:   fpword |= 0x4000; break;
5476         case _RC_DOWN: fpword |= 0x2000; break;
5477         }
5478         switch (flags & _MCW_DN)
5479         {
5480         case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5481         case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5482         case _DN_FLUSH:                       fpword |= 0x8040; break;
5483         }
5484         __asm__ __volatile__( "ldmxcsr %0" :: "m" (fpword) );
5485     }
5486 #elif defined(__aarch64__)
5487     ULONG_PTR fpcr;
5488
5489     __asm__ __volatile__( "mrs %0, fpcr" : "=r" (fpcr) );
5490     if (!(fpcr & 0x100))  flags |= _EM_INVALID;
5491     if (!(fpcr & 0x200))  flags |= _EM_ZERODIVIDE;
5492     if (!(fpcr & 0x400))  flags |= _EM_OVERFLOW;
5493     if (!(fpcr & 0x800))  flags |= _EM_UNDERFLOW;
5494     if (!(fpcr & 0x1000)) flags |= _EM_INEXACT;
5495     if (!(fpcr & 0x8000)) flags |= _EM_DENORMAL;
5496     switch (fpcr & 0xc00000)
5497     {
5498     case 0x400000: flags |= _RC_UP; break;
5499     case 0x800000: flags |= _RC_DOWN; break;
5500     case 0xc00000: flags |= _RC_CHOP; break;
5501     }
5502     flags = (flags & ~mask) | (newval & mask);
5503     fpcr &= ~0xc09f00ul;
5504     if (!(flags & _EM_INVALID)) fpcr |= 0x100;
5505     if (!(flags & _EM_ZERODIVIDE)) fpcr |= 0x200;
5506     if (!(flags & _EM_OVERFLOW)) fpcr |= 0x400;
5507     if (!(flags & _EM_UNDERFLOW)) fpcr |= 0x800;
5508     if (!(flags & _EM_INEXACT)) fpcr |= 0x1000;
5509     if (!(flags & _EM_DENORMAL)) fpcr |= 0x8000;
5510     switch (flags & _MCW_RC)
5511     {
5512     case _RC_CHOP: fpcr |= 0xc00000; break;
5513     case _RC_UP:   fpcr |= 0x400000; break;
5514     case _RC_DOWN: fpcr |= 0x800000; break;
5515     }
5516     __asm__ __volatile__( "msr fpcr, %0" :: "r" (fpcr) );
5517 #else
5518     FIXME( "not implemented\n" );
5519 #endif
5520     return flags;
5521 }
5522
5523 /*********************************************************************
5524  *              _controlfp (MSVCRT.@)
5525  */
5526 unsigned int CDECL _controlfp(unsigned int newval, unsigned int mask)
5527 {
5528   return _control87( newval, mask & ~_EM_DENORMAL );
5529 }
5530
5531 /*********************************************************************
5532  *              _set_controlfp (MSVCRT.@)
5533  */
5534 void CDECL _set_controlfp( unsigned int newval, unsigned int mask )
5535 {
5536     _controlfp( newval, mask );
5537 }
5538
5539 /*********************************************************************
5540  *              _controlfp_s (MSVCRT.@)
5541  */
5542 int CDECL _controlfp_s(unsigned int *cur, unsigned int newval, unsigned int mask)
5543 {
5544     static const unsigned int all_flags = (_MCW_EM | _MCW_IC | _MCW_RC |
5545                                            _MCW_PC | _MCW_DN);
5546     unsigned int val;
5547
5548     if (!MSVCRT_CHECK_PMT( !(newval & mask & ~all_flags) ))
5549     {
5550         if (cur) *cur = _controlfp( 0, 0 );  /* retrieve it anyway */
5551         return EINVAL;
5552     }
5553     val = _controlfp( newval, mask );
5554     if (cur) *cur = val;
5555     return 0;
5556 }
5557
5558 #if _MSVCR_VER >= 140
5559 enum fenv_masks
5560 {
5561     FENV_X_INVALID = 0x00100010,
5562     FENV_X_DENORMAL = 0x00200020,
5563     FENV_X_ZERODIVIDE = 0x00080008,
5564     FENV_X_OVERFLOW = 0x00040004,
5565     FENV_X_UNDERFLOW = 0x00020002,
5566     FENV_X_INEXACT = 0x00010001,
5567     FENV_X_AFFINE = 0x00004000,
5568     FENV_X_UP = 0x00800200,
5569     FENV_X_DOWN = 0x00400100,
5570     FENV_X_24 = 0x00002000,
5571     FENV_X_53 = 0x00001000,
5572     FENV_Y_INVALID = 0x10000010,
5573     FENV_Y_DENORMAL = 0x20000020,
5574     FENV_Y_ZERODIVIDE = 0x08000008,
5575     FENV_Y_OVERFLOW = 0x04000004,
5576     FENV_Y_UNDERFLOW = 0x02000002,
5577     FENV_Y_INEXACT = 0x01000001,
5578     FENV_Y_UP = 0x80000200,
5579     FENV_Y_DOWN = 0x40000100,
5580     FENV_Y_FLUSH = 0x00000400,
5581     FENV_Y_FLUSH_SAVE = 0x00000800
5582 };
5583
5584 /* encodes x87/sse control/status word in ulong */
5585 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5586 {
5587     __msvcrt_ulong ret = 0;
5588
5589     if (x & _EM_INVALID) ret |= FENV_X_INVALID;
5590     if (x & _EM_DENORMAL) ret |= FENV_X_DENORMAL;
5591     if (x & _EM_ZERODIVIDE) ret |= FENV_X_ZERODIVIDE;
5592     if (x & _EM_OVERFLOW) ret |= FENV_X_OVERFLOW;
5593     if (x & _EM_UNDERFLOW) ret |= FENV_X_UNDERFLOW;
5594     if (x & _EM_INEXACT) ret |= FENV_X_INEXACT;
5595     if (x & _IC_AFFINE) ret |= FENV_X_AFFINE;
5596     if (x & _RC_UP) ret |= FENV_X_UP;
5597     if (x & _RC_DOWN) ret |= FENV_X_DOWN;
5598     if (x & _PC_24) ret |= FENV_X_24;
5599     if (x & _PC_53) ret |= FENV_X_53;
5600     x &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC);
5601
5602     if (y & _EM_INVALID) ret |= FENV_Y_INVALID;
5603     if (y & _EM_DENORMAL) ret |= FENV_Y_DENORMAL;
5604     if (y & _EM_ZERODIVIDE) ret |= FENV_Y_ZERODIVIDE;
5605     if (y & _EM_OVERFLOW) ret |= FENV_Y_OVERFLOW;
5606     if (y & _EM_UNDERFLOW) ret |= FENV_Y_UNDERFLOW;
5607     if (y & _EM_INEXACT) ret |= FENV_Y_INEXACT;
5608     if (y & _RC_UP) ret |= FENV_Y_UP;
5609     if (y & _RC_DOWN) ret |= FENV_Y_DOWN;
5610     if (y & _DN_FLUSH) ret |= FENV_Y_FLUSH;
5611     if (y & _DN_FLUSH_OPERANDS_SAVE_RESULTS) ret |= FENV_Y_FLUSH_SAVE;
5612     y &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_DN);
5613
5614     if(x || y) FIXME("unsupported flags: %x, %x\n", x, y);
5615     return ret;
5616 }
5617
5618 /* decodes x87/sse control/status word, returns FALSE on error */
5619 #if (defined(__i386__) || defined(__x86_64__))
5620 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5621 {
5622     *x = *y = 0;
5623     if ((enc & FENV_X_INVALID) == FENV_X_INVALID) *x |= _EM_INVALID;
5624     if ((enc & FENV_X_DENORMAL) == FENV_X_DENORMAL) *x |= _EM_DENORMAL;
5625     if ((enc & FENV_X_ZERODIVIDE) == FENV_X_ZERODIVIDE) *x |= _EM_ZERODIVIDE;
5626     if ((enc & FENV_X_OVERFLOW) == FENV_X_OVERFLOW) *x |= _EM_OVERFLOW;
5627     if ((enc & FENV_X_UNDERFLOW) == FENV_X_UNDERFLOW) *x |= _EM_UNDERFLOW;
5628     if ((enc & FENV_X_INEXACT) == FENV_X_INEXACT) *x |= _EM_INEXACT;
5629     if ((enc & FENV_X_AFFINE) == FENV_X_AFFINE) *x |= _IC_AFFINE;
5630     if ((enc & FENV_X_UP) == FENV_X_UP) *x |= _RC_UP;
5631     if ((enc & FENV_X_DOWN) == FENV_X_DOWN) *x |= _RC_DOWN;
5632     if ((enc & FENV_X_24) == FENV_X_24) *x |= _PC_24;
5633     if ((enc & FENV_X_53) == FENV_X_53) *x |= _PC_53;
5634
5635     if ((enc & FENV_Y_INVALID) == FENV_Y_INVALID) *y |= _EM_INVALID;
5636     if ((enc & FENV_Y_DENORMAL) == FENV_Y_DENORMAL) *y |= _EM_DENORMAL;
5637     if ((enc & FENV_Y_ZERODIVIDE) == FENV_Y_ZERODIVIDE) *y |= _EM_ZERODIVIDE;
5638     if ((enc & FENV_Y_OVERFLOW) == FENV_Y_OVERFLOW) *y |= _EM_OVERFLOW;
5639     if ((enc & FENV_Y_UNDERFLOW) == FENV_Y_UNDERFLOW) *y |= _EM_UNDERFLOW;
5640     if ((enc & FENV_Y_INEXACT) == FENV_Y_INEXACT) *y |= _EM_INEXACT;
5641     if ((enc & FENV_Y_UP) == FENV_Y_UP) *y |= _RC_UP;
5642     if ((enc & FENV_Y_DOWN) == FENV_Y_DOWN) *y |= _RC_DOWN;
5643     if ((enc & FENV_Y_FLUSH) == FENV_Y_FLUSH) *y |= _DN_FLUSH;
5644     if ((enc & FENV_Y_FLUSH_SAVE) == FENV_Y_FLUSH_SAVE) *y |= _DN_FLUSH_OPERANDS_SAVE_RESULTS;
5645
5646     if (fenv_encode(*x, *y) != enc)
5647     {
5648         WARN("can't decode: %lx\n", enc);
5649         return FALSE;
5650     }
5651     return TRUE;
5652 }
5653 #endif
5654 #elif _MSVCR_VER >= 120
5655 #if (defined(__i386__) || defined(__x86_64__))
5656 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5657 {
5658     *x = *y = enc;
5659     return TRUE;
5660 }
5661 #endif
5662 #endif
5663
5664 #if _MSVCR_VER>=120
5665 /*********************************************************************
5666  *              fegetenv (MSVCR120.@)
5667  */
5668 int CDECL fegetenv(fenv_t *env)
5669 {
5670 #if _MSVCR_VER>=140 && defined(__i386__)
5671     unsigned int x87, sse;
5672     __control87_2(0, 0, &x87, &sse);
5673     env->_Fe_ctl = fenv_encode(x87, sse);
5674     _statusfp2(&x87, &sse);
5675     env->_Fe_stat = fenv_encode(x87, sse);
5676 #elif _MSVCR_VER>=140
5677     env->_Fe_ctl = fenv_encode(0, _control87(0, 0));
5678     env->_Fe_stat = fenv_encode(0, _statusfp());
5679 #else
5680     env->_Fe_ctl = _controlfp(0, 0) & (_EM_INEXACT | _EM_UNDERFLOW |
5681             _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _RC_CHOP);
5682     env->_Fe_stat = _statusfp();
5683 #endif
5684     return 0;
5685 }
5686
5687 /*********************************************************************
5688  *              feupdateenv (MSVCR120.@)
5689  */
5690 int CDECL feupdateenv(const fenv_t *env)
5691 {
5692     fenv_t set;
5693     fegetenv(&set);
5694     set._Fe_ctl = env->_Fe_ctl;
5695     set._Fe_stat |= env->_Fe_stat;
5696     return fesetenv(&set);
5697 }
5698
5699 /*********************************************************************
5700  *      fetestexcept (MSVCR120.@)
5701  */
5702 int CDECL fetestexcept(int flags)
5703 {
5704     return _statusfp() & flags;
5705 }
5706
5707 /*********************************************************************
5708  *      fesetexceptflag (MSVCR120.@)
5709  */
5710 int CDECL fesetexceptflag(const fexcept_t *status, int excepts)
5711 {
5712     fenv_t env;
5713
5714     excepts &= FE_ALL_EXCEPT;
5715     if(!excepts)
5716         return 0;
5717
5718     fegetenv(&env);
5719 #if _MSVCR_VER>=140 && (defined(__i386__) || defined(__x86_64__))
5720     env._Fe_stat &= ~fenv_encode(excepts, excepts);
5721     env._Fe_stat |= *status & fenv_encode(excepts, excepts);
5722 #elif _MSVCR_VER>=140
5723     env._Fe_stat &= ~fenv_encode(0, excepts);
5724     env._Fe_stat |= *status & fenv_encode(0, excepts);
5725 #else
5726     env._Fe_stat &= ~excepts;
5727     env._Fe_stat |= *status & excepts;
5728 #endif
5729     return fesetenv(&env);
5730 }
5731
5732 /*********************************************************************
5733  *      feraiseexcept (MSVCR120.@)
5734  */
5735 int CDECL feraiseexcept(int flags)
5736 {
5737     fenv_t env;
5738
5739     flags &= FE_ALL_EXCEPT;
5740     fegetenv(&env);
5741 #if _MSVCR_VER>=140 && defined(__i386__)
5742     env._Fe_stat |= fenv_encode(flags, flags);
5743 #elif _MSVCR_VER>=140
5744     env._Fe_stat |= fenv_encode(0, flags);
5745 #else
5746     env._Fe_stat |= flags;
5747 #endif
5748     return fesetenv(&env);
5749 }
5750
5751 /*********************************************************************
5752  *      feclearexcept (MSVCR120.@)
5753  */
5754 int CDECL feclearexcept(int flags)
5755 {
5756     fenv_t env;
5757
5758     fegetenv(&env);
5759     flags &= FE_ALL_EXCEPT;
5760 #if _MSVCR_VER>=140
5761     env._Fe_stat &= ~fenv_encode(flags, flags);
5762 #else
5763     env._Fe_stat &= ~flags;
5764 #endif
5765     return fesetenv(&env);
5766 }
5767
5768 /*********************************************************************
5769  *      fegetexceptflag (MSVCR120.@)
5770  */
5771 int CDECL fegetexceptflag(fexcept_t *status, int excepts)
5772 {
5773 #if _MSVCR_VER>=140 && defined(__i386__)
5774     unsigned int x87, sse;
5775     _statusfp2(&x87, &sse);
5776     *status = fenv_encode(x87 & excepts, sse & excepts);
5777 #elif _MSVCR_VER>=140
5778     *status = fenv_encode(0, _statusfp() & excepts);
5779 #else
5780     *status = _statusfp() & excepts;
5781 #endif
5782     return 0;
5783 }
5784 #endif
5785
5786 #if _MSVCR_VER>=140
5787 /*********************************************************************
5788  *              __fpe_flt_rounds (UCRTBASE.@)
5789  */
5790 int CDECL __fpe_flt_rounds(void)
5791 {
5792     unsigned int fpc = _controlfp(0, 0) & _RC_CHOP;
5793
5794     TRACE("()\n");
5795
5796     switch(fpc) {
5797         case _RC_CHOP: return 0;
5798         case _RC_NEAR: return 1;
5799         case _RC_UP: return 2;
5800         default: return 3;
5801     }
5802 }
5803 #endif
5804
5805 #if _MSVCR_VER>=120
5806
5807 /*********************************************************************
5808  *              fegetround (MSVCR120.@)
5809  */
5810 int CDECL fegetround(void)
5811 {
5812     return _controlfp(0, 0) & _MCW_RC;
5813 }
5814
5815 /*********************************************************************
5816  *              fesetround (MSVCR120.@)
5817  */
5818 int CDECL fesetround(int round_mode)
5819 {
5820     if (round_mode & (~_MCW_RC))
5821         return 1;
5822     _controlfp(round_mode, _MCW_RC);
5823     return 0;
5824 }
5825
5826 #endif /* _MSVCR_VER>=120 */
5827
5828 /*********************************************************************
5829  *              _copysign (MSVCRT.@)
5830  *
5831  * Copied from musl: src/math/copysign.c
5832  */
5833 double CDECL _copysign( double x, double y )
5834 {
5835     union { double f; UINT64 i; } ux = { x }, uy = { y };
5836     ux.i &= ~0ull >> 1;
5837     ux.i |= uy.i & 1ull << 63;
5838     return ux.f;
5839 }
5840
5841 /*********************************************************************
5842  *              _finite (MSVCRT.@)
5843  */
5844 int CDECL _finite(double num)
5845 {
5846     union { double f; UINT64 i; } u = { num };
5847     return (u.i & ~0ull >> 1) < 0x7ffull << 52;
5848 }
5849
5850 /*********************************************************************
5851  *              _fpreset (MSVCRT.@)
5852  */
5853 void CDECL _fpreset(void)
5854 {
5855 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5856     const unsigned int x86_cw = 0x27f;
5857     __asm__ __volatile__( "fninit; fldcw %0" : : "m" (x86_cw) );
5858     if (sse2_supported)
5859     {
5860         const unsigned long sse2_cw = 0x1f80;
5861         __asm__ __volatile__( "ldmxcsr %0" : : "m" (sse2_cw) );
5862     }
5863 #else
5864     FIXME( "not implemented\n" );
5865 #endif
5866 }
5867
5868 #if _MSVCR_VER>=120
5869 /*********************************************************************
5870  *              fesetenv (MSVCR120.@)
5871  */
5872 int CDECL fesetenv(const fenv_t *env)
5873 {
5874 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5875     unsigned int x87_cw, sse_cw, x87_stat, sse_stat;
5876     struct {
5877         WORD control_word;
5878         WORD unused1;
5879         WORD status_word;
5880         WORD unused2;
5881         WORD tag_word;
5882         WORD unused3;
5883         DWORD instruction_pointer;
5884         WORD code_segment;
5885         WORD unused4;
5886         DWORD operand_addr;
5887         WORD data_segment;
5888         WORD unused5;
5889     } fenv;
5890
5891     TRACE( "(%p)\n", env );
5892
5893     if (!env->_Fe_ctl && !env->_Fe_stat) {
5894         _fpreset();
5895         return 0;
5896     }
5897
5898     if (!fenv_decode(env->_Fe_ctl, &x87_cw, &sse_cw))
5899         return 1;
5900     if (!fenv_decode(env->_Fe_stat, &x87_stat, &sse_stat))
5901         return 1;
5902
5903     __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) );
5904
5905     fenv.control_word &= ~0xc3d;
5906 #if _MSVCR_VER>=140
5907     fenv.control_word &= ~0x1302;
5908 #endif
5909     if (x87_cw & _EM_INVALID) fenv.control_word |= 0x1;
5910     if (x87_cw & _EM_ZERODIVIDE) fenv.control_word |= 0x4;
5911     if (x87_cw & _EM_OVERFLOW) fenv.control_word |= 0x8;
5912     if (x87_cw & _EM_UNDERFLOW) fenv.control_word |= 0x10;
5913     if (x87_cw & _EM_INEXACT) fenv.control_word |= 0x20;
5914     switch (x87_cw & _MCW_RC)
5915     {
5916         case _RC_UP|_RC_DOWN:   fenv.control_word |= 0xc00; break;
5917         case _RC_UP:            fenv.control_word |= 0x800; break;
5918         case _RC_DOWN:          fenv.control_word |= 0x400; break;
5919     }
5920 #if _MSVCR_VER>=140
5921     if (x87_cw & _EM_DENORMAL) fenv.control_word |= 0x2;
5922     switch (x87_cw & _MCW_PC)
5923     {
5924         case _PC_64: fenv.control_word |= 0x300; break;
5925         case _PC_53: fenv.control_word |= 0x200; break;
5926         case _PC_24: fenv.control_word |= 0x0; break;
5927     }
5928     if (x87_cw & _IC_AFFINE) fenv.control_word |= 0x1000;
5929 #endif
5930
5931     fenv.status_word &= ~0x3f;
5932     if (x87_stat & _SW_INVALID) fenv.status_word |= 0x1;
5933     if (x87_stat & _SW_DENORMAL) fenv.status_word |= 0x2;
5934     if (x87_stat & _SW_ZERODIVIDE) fenv.status_word |= 0x4;
5935     if (x87_stat & _SW_OVERFLOW) fenv.status_word |= 0x8;
5936     if (x87_stat & _SW_UNDERFLOW) fenv.status_word |= 0x10;
5937     if (x87_stat & _SW_INEXACT) fenv.status_word |= 0x20;
5938
5939     __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)",
5940             "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
5941
5942     if (sse2_supported)
5943     {
5944         DWORD fpword;
5945         __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5946         fpword &= ~0x7ebf;
5947 #if _MSVCR_VER>=140
5948         fpword &= ~0x8140;
5949 #endif
5950         if (sse_cw & _EM_INVALID) fpword |= 0x80;
5951         if (sse_cw & _EM_ZERODIVIDE) fpword |= 0x200;
5952         if (sse_cw & _EM_OVERFLOW) fpword |= 0x400;
5953         if (sse_cw & _EM_UNDERFLOW) fpword |= 0x800;
5954         if (sse_cw & _EM_INEXACT) fpword |= 0x1000;
5955         switch (sse_cw & _MCW_RC)
5956         {
5957             case _RC_CHOP: fpword |= 0x6000; break;
5958             case _RC_UP:   fpword |= 0x4000; break;
5959             case _RC_DOWN: fpword |= 0x2000; break;
5960         }
5961         if (sse_stat & _SW_INVALID) fpword |= 0x1;
5962         if (sse_stat & _SW_DENORMAL) fpword |= 0x2;
5963         if (sse_stat & _SW_ZERODIVIDE) fpword |= 0x4;
5964         if (sse_stat & _SW_OVERFLOW) fpword |= 0x8;
5965         if (sse_stat & _SW_UNDERFLOW) fpword |= 0x10;
5966         if (sse_stat & _SW_INEXACT) fpword |= 0x20;
5967 #if _MSVCR_VER>=140
5968         if (sse_cw & _EM_DENORMAL) fpword |= 0x100;
5969         switch (sse_cw & _MCW_DN)
5970         {
5971             case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5972             case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5973             case _DN_FLUSH:                       fpword |= 0x8040; break;
5974         }
5975 #endif
5976         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5977     }
5978
5979     return 0;
5980 #else
5981     FIXME( "not implemented\n" );
5982 #endif
5983     return 1;
5984 }
5985 #endif
5986
5987 /*********************************************************************
5988  *              _isnan (MSVCRT.@)
5989  */
5990 int CDECL _isnan(double num)
5991 {
5992     union { double f; UINT64 i; } u = { num };
5993     return (u.i & ~0ull >> 1) > 0x7ffull << 52;
5994 }
5995
5996 static double pzero(double x)
5997 {
5998     static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
5999         0.00000000000000000000e+00,
6000         -7.03124999999900357484e-02,
6001         -8.08167041275349795626e+00,
6002         -2.57063105679704847262e+02,
6003         -2.48521641009428822144e+03,
6004         -5.25304380490729545272e+03,
6005     }, pS8[5] = {
6006         1.16534364619668181717e+02,
6007         3.83374475364121826715e+03,
6008         4.05978572648472545552e+04,
6009         1.16752972564375915681e+05,
6010         4.76277284146730962675e+04,
6011     }, pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6012         -1.14125464691894502584e-11,
6013         -7.03124940873599280078e-02,
6014         -4.15961064470587782438e+00,
6015         -6.76747652265167261021e+01,
6016         -3.31231299649172967747e+02,
6017         -3.46433388365604912451e+02,
6018     }, pS5[5] = {
6019         6.07539382692300335975e+01,
6020         1.05125230595704579173e+03,
6021         5.97897094333855784498e+03,
6022         9.62544514357774460223e+03,
6023         2.40605815922939109441e+03,
6024     }, pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6025         -2.54704601771951915620e-09,
6026         -7.03119616381481654654e-02,
6027         -2.40903221549529611423e+00,
6028         -2.19659774734883086467e+01,
6029         -5.80791704701737572236e+01,
6030         -3.14479470594888503854e+01,
6031     }, pS3[5] = {
6032         3.58560338055209726349e+01,
6033         3.61513983050303863820e+02,
6034         1.19360783792111533330e+03,
6035         1.12799679856907414432e+03,
6036         1.73580930813335754692e+02,
6037     }, pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6038         -8.87534333032526411254e-08,
6039         -7.03030995483624743247e-02,
6040         -1.45073846780952986357e+00,
6041         -7.63569613823527770791e+00,
6042         -1.11931668860356747786e+01,
6043         -3.23364579351335335033e+00,
6044     }, pS2[5] = {
6045         2.22202997532088808441e+01,
6046         1.36206794218215208048e+02,
6047         2.70470278658083486789e+02,
6048         1.53875394208320329881e+02,
6049         1.46576176948256193810e+01,
6050     };
6051
6052     const double *p, *q;
6053     double z, r, s;
6054     UINT32 ix;
6055
6056     ix = *(ULONGLONG*)&x >> 32;
6057     ix &= 0x7fffffff;
6058     if (ix >= 0x40200000) {
6059         p = pR8;
6060         q = pS8;
6061     } else if (ix >= 0x40122E8B) {
6062         p = pR5;
6063         q = pS5;
6064     } else if (ix >= 0x4006DB6D) {
6065         p = pR3;
6066         q = pS3;
6067     } else /*ix >= 0x40000000*/ {
6068         p = pR2;
6069         q = pS2;
6070     }
6071
6072     z = 1.0 / (x * x);
6073     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6074     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6075     return 1.0 + r / s;
6076 }
6077
6078 static double qzero(double x)
6079 {
6080     static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6081         0.00000000000000000000e+00,
6082         7.32421874999935051953e-02,
6083         1.17682064682252693899e+01,
6084         5.57673380256401856059e+02,
6085         8.85919720756468632317e+03,
6086         3.70146267776887834771e+04,
6087     }, qS8[6] = {
6088         1.63776026895689824414e+02,
6089         8.09834494656449805916e+03,
6090         1.42538291419120476348e+05,
6091         8.03309257119514397345e+05,
6092         8.40501579819060512818e+05,
6093         -3.43899293537866615225e+05,
6094     }, qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6095         1.84085963594515531381e-11,
6096         7.32421766612684765896e-02,
6097         5.83563508962056953777e+00,
6098         1.35111577286449829671e+02,
6099         1.02724376596164097464e+03,
6100         1.98997785864605384631e+03,
6101     }, qS5[6] = {
6102         8.27766102236537761883e+01,
6103         2.07781416421392987104e+03,
6104         1.88472887785718085070e+04,
6105         5.67511122894947329769e+04,
6106         3.59767538425114471465e+04,
6107         -5.35434275601944773371e+03,
6108     }, qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6109         4.37741014089738620906e-09,
6110         7.32411180042911447163e-02,
6111         3.34423137516170720929e+00,
6112         4.26218440745412650017e+01,
6113         1.70808091340565596283e+02,
6114         1.66733948696651168575e+02,
6115     }, qS3[6] = {
6116         4.87588729724587182091e+01,
6117         7.09689221056606015736e+02,
6118         3.70414822620111362994e+03,
6119         6.46042516752568917582e+03,
6120         2.51633368920368957333e+03,
6121         -1.49247451836156386662e+02,
6122     }, qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6123         1.50444444886983272379e-07,
6124         7.32234265963079278272e-02,
6125         1.99819174093815998816e+00,
6126         1.44956029347885735348e+01,
6127         3.16662317504781540833e+01,
6128         1.62527075710929267416e+01,
6129     }, qS2[6] = {
6130         3.03655848355219184498e+01,
6131         2.69348118608049844624e+02,
6132         8.44783757595320139444e+02,
6133         8.82935845112488550512e+02,
6134         2.12666388511798828631e+02,
6135         -5.31095493882666946917e+00,
6136     };
6137
6138     const double *p, *q;
6139     double s, r, z;
6140     unsigned int ix;
6141
6142     ix = *(ULONGLONG*)&x >> 32;
6143     ix &= 0x7fffffff;
6144     if (ix >= 0x40200000) {
6145         p = qR8;
6146         q = qS8;
6147     } else if (ix >= 0x40122E8B) {
6148         p = qR5;
6149         q = qS5;
6150     } else if (ix >= 0x4006DB6D) {
6151         p = qR3;
6152         q = qS3;
6153     } else /*ix >= 0x40000000*/ {
6154         p = qR2;
6155         q = qS2;
6156     }
6157
6158     z = 1.0 / (x * x);
6159     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6160     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6161     return (-0.125 + r / s) / x;
6162 }
6163
6164 /* j0 and y0 approximation for |x|>=2 */
6165 static double j0_y0_approx(unsigned int ix, double x, BOOL y0)
6166 {
6167     static const double invsqrtpi = 5.64189583547756279280e-01;
6168
6169     double s, c, ss, cc, z;
6170
6171     s = sin(x);
6172     c = cos(x);
6173     if (y0) c = -c;
6174     cc = s + c;
6175     /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
6176     if (ix < 0x7fe00000) {
6177         ss = s - c;
6178         z = -cos(2 * x);
6179         if (s * c < 0) cc = z / ss;
6180         else ss = z / cc;
6181         if (ix < 0x48000000) {
6182             if (y0) ss = -ss;
6183             cc = pzero(x) * cc - qzero(x) * ss;
6184         }
6185     }
6186     return invsqrtpi * cc / sqrt(x);
6187 }
6188
6189 /*********************************************************************
6190  *              _j0 (MSVCRT.@)
6191  *
6192  * Copied from musl: src/math/j0.c
6193  */
6194 double CDECL _j0(double x)
6195 {
6196     static const double R02 =  1.56249999999999947958e-02,
6197             R03 = -1.89979294238854721751e-04,
6198             R04 =  1.82954049532700665670e-06,
6199             R05 = -4.61832688532103189199e-09,
6200             S01 =  1.56191029464890010492e-02,
6201             S02 =  1.16926784663337450260e-04,
6202             S03 =  5.13546550207318111446e-07,
6203             S04 =  1.16614003333790000205e-09;
6204
6205     double z, r, s;
6206     unsigned int ix;
6207
6208     ix = *(ULONGLONG*)&x >> 32;
6209     ix &= 0x7fffffff;
6210
6211     /* j0(+-inf)=0, j0(nan)=nan */
6212     if (ix >= 0x7ff00000)
6213         return math_error(_DOMAIN, "_j0", x, 0, 1 / (x * x));
6214     x = fabs(x);
6215
6216     if (ix >= 0x40000000) {  /* |x| >= 2 */
6217         /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
6218         return j0_y0_approx(ix, x, FALSE);
6219     }
6220
6221     if (ix >= 0x3f200000) {  /* |x| >= 2**-13 */
6222         /* up to 4ulp error close to 2 */
6223         z = x * x;
6224         r = z * (R02 + z * (R03 + z * (R04 + z * R05)));
6225         s = 1 + z * (S01 + z * (S02 + z * (S03 + z * S04)));
6226         return (1 + x / 2) * (1 - x / 2) + z * (r / s);
6227     }
6228
6229     /* 1 - x*x/4 */
6230     /* prevent underflow */
6231     /* inexact should be raised when x!=0, this is not done correctly */
6232     if (ix >= 0x38000000)  /* |x| >= 2**-127 */
6233         x = 0.25 * x * x;
6234     return 1 - x;
6235 }
6236
6237 static double pone(double x)
6238 {
6239     static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6240         0.00000000000000000000e+00,
6241         1.17187499999988647970e-01,
6242         1.32394806593073575129e+01,
6243         4.12051854307378562225e+02,
6244         3.87474538913960532227e+03,
6245         7.91447954031891731574e+03,
6246     }, ps8[5] = {
6247         1.14207370375678408436e+02,
6248         3.65093083420853463394e+03,
6249         3.69562060269033463555e+04,
6250         9.76027935934950801311e+04,
6251         3.08042720627888811578e+04,
6252     }, pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6253         1.31990519556243522749e-11,
6254         1.17187493190614097638e-01,
6255         6.80275127868432871736e+00,
6256         1.08308182990189109773e+02,
6257         5.17636139533199752805e+02,
6258         5.28715201363337541807e+02,
6259     }, ps5[5] = {
6260         5.92805987221131331921e+01,
6261         9.91401418733614377743e+02,
6262         5.35326695291487976647e+03,
6263         7.84469031749551231769e+03,
6264         1.50404688810361062679e+03,
6265     }, pr3[6] = {
6266         3.02503916137373618024e-09,
6267         1.17186865567253592491e-01,
6268         3.93297750033315640650e+00,
6269         3.51194035591636932736e+01,
6270         9.10550110750781271918e+01,
6271         4.85590685197364919645e+01,
6272     }, ps3[5] = {
6273         3.47913095001251519989e+01,
6274         3.36762458747825746741e+02,
6275         1.04687139975775130551e+03,
6276         8.90811346398256432622e+02,
6277         1.03787932439639277504e+02,
6278     }, pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6279         1.07710830106873743082e-07,
6280         1.17176219462683348094e-01,
6281         2.36851496667608785174e+00,
6282         1.22426109148261232917e+01,
6283         1.76939711271687727390e+01,
6284         5.07352312588818499250e+00,
6285     }, ps2[5] = {
6286         2.14364859363821409488e+01,
6287         1.25290227168402751090e+02,
6288         2.32276469057162813669e+02,
6289         1.17679373287147100768e+02,
6290         8.36463893371618283368e+00,
6291     };
6292
6293     const double *p, *q;
6294     double z, r, s;
6295     unsigned int ix;
6296
6297     ix = *(ULONGLONG*)&x >> 32;
6298     ix &= 0x7fffffff;
6299     if (ix >= 0x40200000) {
6300         p = pr8;
6301         q = ps8;
6302     } else if (ix >= 0x40122E8B) {
6303         p = pr5;
6304         q = ps5;
6305     } else if (ix >= 0x4006DB6D) {
6306         p = pr3;
6307         q = ps3;
6308     } else /*ix >= 0x40000000*/ {
6309         p = pr2;
6310         q = ps2;
6311     }
6312     z = 1.0 / (x * x);
6313     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6314     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6315     return 1.0 + r / s;
6316 }
6317
6318 static double qone(double x)
6319 {
6320     static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6321         0.00000000000000000000e+00,
6322         -1.02539062499992714161e-01,
6323         -1.62717534544589987888e+01,
6324         -7.59601722513950107896e+02,
6325         -1.18498066702429587167e+04,
6326         -4.84385124285750353010e+04,
6327     }, qs8[6] = {
6328         1.61395369700722909556e+02,
6329         7.82538599923348465381e+03,
6330         1.33875336287249578163e+05,
6331         7.19657723683240939863e+05,
6332         6.66601232617776375264e+05,
6333         -2.94490264303834643215e+05,
6334     }, qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6335         -2.08979931141764104297e-11,
6336         -1.02539050241375426231e-01,
6337         -8.05644828123936029840e+00,
6338         -1.83669607474888380239e+02,
6339         -1.37319376065508163265e+03,
6340         -2.61244440453215656817e+03,
6341     }, qs5[6] = {
6342         8.12765501384335777857e+01,
6343         1.99179873460485964642e+03,
6344         1.74684851924908907677e+04,
6345         4.98514270910352279316e+04,
6346         2.79480751638918118260e+04,
6347         -4.71918354795128470869e+03,
6348     }, qr3[6] = {
6349         -5.07831226461766561369e-09,
6350         -1.02537829820837089745e-01,
6351         -4.61011581139473403113e+00,
6352         -5.78472216562783643212e+01,
6353         -2.28244540737631695038e+02,
6354         -2.19210128478909325622e+02,
6355     }, qs3[6] = {
6356         4.76651550323729509273e+01,
6357         6.73865112676699709482e+02,
6358         3.38015286679526343505e+03,
6359         5.54772909720722782367e+03,
6360         1.90311919338810798763e+03,
6361         -1.35201191444307340817e+02,
6362     }, qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6363         -1.78381727510958865572e-07,
6364         -1.02517042607985553460e-01,
6365         -2.75220568278187460720e+00,
6366         -1.96636162643703720221e+01,
6367         -4.23253133372830490089e+01,
6368         -2.13719211703704061733e+01,
6369     }, qs2[6] = {
6370         2.95333629060523854548e+01,
6371         2.52981549982190529136e+02,
6372         7.57502834868645436472e+02,
6373         7.39393205320467245656e+02,
6374         1.55949003336666123687e+02,
6375         -4.95949898822628210127e+00,
6376     };
6377
6378     const double *p, *q;
6379     double s, r, z;
6380     unsigned int ix;
6381
6382     ix = *(ULONGLONG*)&x >> 32;
6383     ix &= 0x7fffffff;
6384     if (ix >= 0x40200000) {
6385         p = qr8;
6386         q = qs8;
6387     } else if (ix >= 0x40122E8B) {
6388         p = qr5;
6389         q = qs5;
6390     } else if (ix >= 0x4006DB6D) {
6391         p = qr3;
6392         q = qs3;
6393     } else /*ix >= 0x40000000*/ {
6394         p = qr2;
6395         q = qs2;
6396     }
6397     z = 1.0 / (x * x);
6398     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6399     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6400     return (0.375 + r / s) / x;
6401 }
6402
6403 static double j1_y1_approx(unsigned int ix, double x, BOOL y1, int sign)
6404 {
6405     static const double invsqrtpi = 5.64189583547756279280e-01;
6406
6407     double z, s, c, ss, cc;
6408
6409     s = sin(x);
6410     if (y1) s = -s;
6411     c = cos(x);
6412     cc = s - c;
6413     if (ix < 0x7fe00000) {
6414         ss = -s - c;
6415         z = cos(2 * x);
6416         if (s * c > 0) cc = z / ss;
6417         else ss = z / cc;
6418         if (ix < 0x48000000) {
6419             if (y1)
6420                 ss = -ss;
6421             cc = pone(x) * cc - qone(x) * ss;
6422         }
6423     }
6424     if (sign)
6425         cc = -cc;
6426     return invsqrtpi * cc / sqrt(x);
6427 }
6428
6429 /*********************************************************************
6430  *              _j1 (MSVCRT.@)
6431  *
6432  * Copied from musl: src/math/j1.c
6433  */
6434 double CDECL _j1(double x)
6435 {
6436     static const double r00 = -6.25000000000000000000e-02,
6437         r01 =  1.40705666955189706048e-03,
6438         r02 = -1.59955631084035597520e-05,
6439         r03 =  4.96727999609584448412e-08,
6440         s01 =  1.91537599538363460805e-02,
6441         s02 =  1.85946785588630915560e-04,
6442         s03 =  1.17718464042623683263e-06,
6443         s04 =  5.04636257076217042715e-09,
6444         s05 =  1.23542274426137913908e-11;
6445
6446     double z, r, s;
6447     unsigned int ix;
6448     int sign;
6449
6450     ix = *(ULONGLONG*)&x >> 32;
6451     sign = ix >> 31;
6452     ix &= 0x7fffffff;
6453     if (ix >= 0x7ff00000)
6454         return math_error(isnan(x) ? 0 : _DOMAIN, "_j1", x, 0, 1 / (x * x));
6455     if (ix >= 0x40000000)  /* |x| >= 2 */
6456         return j1_y1_approx(ix, fabs(x), FALSE, sign);
6457     if (ix >= 0x38000000) {  /* |x| >= 2**-127 */
6458         z = x * x;
6459         r = z * (r00 + z * (r01 + z * (r02 + z * r03)));
6460         s = 1 + z * (s01 + z * (s02 + z * (s03 + z * (s04 + z * s05))));
6461         z = r / s;
6462     } else {
6463         /* avoid underflow, raise inexact if x!=0 */
6464         z = x;
6465     }
6466     return (0.5 + z) * x;
6467 }
6468
6469 /*********************************************************************
6470  *              _jn (MSVCRT.@)
6471  *
6472  * Copied from musl: src/math/jn.c
6473  */
6474 double CDECL _jn(int n, double x)
6475 {
6476     static const double invsqrtpi = 5.64189583547756279280e-01;
6477
6478     unsigned int ix, lx;
6479     int nm1, i, sign;
6480     double a, b, temp;
6481
6482     ix = *(ULONGLONG*)&x >> 32;
6483     lx = *(ULONGLONG*)&x;
6484     sign = ix >> 31;
6485     ix &= 0x7fffffff;
6486
6487     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6488         return x;
6489
6490     if (n == 0)
6491         return _j0(x);
6492     if (n < 0) {
6493         nm1 = -(n + 1);
6494         x = -x;
6495         sign ^= 1;
6496     } else {
6497         nm1 = n-1;
6498     }
6499     if (nm1 == 0)
6500         return j1(x);
6501
6502     sign &= n;  /* even n: 0, odd n: signbit(x) */
6503     x = fabs(x);
6504     if ((ix | lx) == 0 || ix == 0x7ff00000)  /* if x is 0 or inf */
6505         b = 0.0;
6506     else if (nm1 < x) {
6507         if (ix >= 0x52d00000) { /* x > 2**302 */
6508             switch(nm1 & 3) {
6509             case 0:
6510                 temp = -cos(x) + sin(x);
6511                 break;
6512             case 1:
6513                 temp = -cos(x) - sin(x);
6514                 break;
6515             case 2:
6516                 temp =  cos(x) - sin(x);
6517                 break;
6518             default:
6519                 temp =  cos(x) + sin(x);
6520                 break;
6521             }
6522             b = invsqrtpi * temp / sqrt(x);
6523         } else {
6524             a = _j0(x);
6525             b = _j1(x);
6526             for (i = 0; i < nm1; ) {
6527                 i++;
6528                 temp = b;
6529                 b = b * (2.0 * i / x) - a; /* avoid underflow */
6530                 a = temp;
6531             }
6532         }
6533     } else {
6534         if (ix < 0x3e100000) { /* x < 2**-29 */
6535             if (nm1 > 32)  /* underflow */
6536                 b = 0.0;
6537             else {
6538                 temp = x * 0.5;
6539                 b = temp;
6540                 a = 1.0;
6541                 for (i = 2; i <= nm1 + 1; i++) {
6542                     a *= (double)i; /* a = n! */
6543                     b *= temp;      /* b = (x/2)^n */
6544                 }
6545                 b = b / a;
6546             }
6547         } else {
6548             double t, q0, q1, w, h, z, tmp, nf;
6549             int k;
6550
6551             nf = nm1 + 1.0;
6552             w = 2 * nf / x;
6553             h = 2 / x;
6554             z = w + h;
6555             q0 = w;
6556             q1 = w * z - 1.0;
6557             k = 1;
6558             while (q1 < 1.0e9) {
6559                 k += 1;
6560                 z += h;
6561                 tmp = z * q1 - q0;
6562                 q0 = q1;
6563                 q1 = tmp;
6564             }
6565             for (t = 0.0, i = k; i >= 0; i--)
6566                 t = 1 / (2 * (i + nf) / x - t);
6567             a = t;
6568             b = 1.0;
6569             tmp = nf * log(fabs(w));
6570             if (tmp < 7.09782712893383973096e+02) {
6571                 for (i = nm1; i > 0; i--) {
6572                     temp = b;
6573                     b = b * (2.0 * i) / x - a;
6574                     a = temp;
6575                 }
6576             } else {
6577                 for (i = nm1; i > 0; i--) {
6578                     temp = b;
6579                     b = b * (2.0 * i) / x - a;
6580                     a = temp;
6581                     /* scale b to avoid spurious overflow */
6582                     if (b > 0x1p500) {
6583                         a /= b;
6584                         t /= b;
6585                         b  = 1.0;
6586                     }
6587                 }
6588             }
6589             z = j0(x);
6590             w = j1(x);
6591             if (fabs(z) >= fabs(w))
6592                 b = t * z / b;
6593             else
6594                 b = t * w / a;
6595         }
6596     }
6597     return sign ? -b : b;
6598 }
6599
6600 /*********************************************************************
6601  *              _y0 (MSVCRT.@)
6602  */
6603 double CDECL _y0(double x)
6604 {
6605     static const double tpi = 6.36619772367581382433e-01,
6606         u00  = -7.38042951086872317523e-02,
6607         u01  =  1.76666452509181115538e-01,
6608         u02  = -1.38185671945596898896e-02,
6609         u03  =  3.47453432093683650238e-04,
6610         u04  = -3.81407053724364161125e-06,
6611         u05  =  1.95590137035022920206e-08,
6612         u06  = -3.98205194132103398453e-11,
6613         v01  =  1.27304834834123699328e-02,
6614         v02  =  7.60068627350353253702e-05,
6615         v03  =  2.59150851840457805467e-07,
6616         v04  =  4.41110311332675467403e-10;
6617
6618     double z, u, v;
6619     unsigned int ix, lx;
6620
6621     ix = *(ULONGLONG*)&x >> 32;
6622     lx = *(ULONGLONG*)&x;
6623
6624     /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
6625     if ((ix << 1 | lx) == 0)
6626         return math_error(_OVERFLOW, "_y0", x, 0, -INFINITY);
6627     if (isnan(x))
6628         return x;
6629     if (ix >> 31)
6630         return math_error(_DOMAIN, "_y0", x, 0, 0 / (x - x));
6631     if (ix >= 0x7ff00000)
6632         return 1 / x;
6633
6634     if (ix >= 0x40000000) {  /* x >= 2 */
6635         /* large ulp errors near zeros: 3.958, 7.086,.. */
6636         return j0_y0_approx(ix, x, TRUE);
6637     }
6638
6639     if (ix >= 0x3e400000) {  /* x >= 2**-27 */
6640         /* large ulp error near the first zero, x ~= 0.89 */
6641         z = x * x;
6642         u = u00 + z * (u01 + z * (u02 + z * (u03 + z * (u04 + z * (u05 + z * u06)))));
6643         v = 1.0 + z * (v01 + z * (v02 + z * (v03 + z * v04)));
6644         return u / v + tpi * (j0(x) * log(x));
6645     }
6646     return u00 + tpi * log(x);
6647 }
6648
6649 /*********************************************************************
6650  *              _y1 (MSVCRT.@)
6651  */
6652 double CDECL _y1(double x)
6653 {
6654     static const double tpi = 6.36619772367581382433e-01,
6655         u00 =  -1.96057090646238940668e-01,
6656         u01 = 5.04438716639811282616e-02,
6657         u02 = -1.91256895875763547298e-03,
6658         u03 = 2.35252600561610495928e-05,
6659         u04 = -9.19099158039878874504e-08,
6660         v00 = 1.99167318236649903973e-02,
6661         v01 = 2.02552581025135171496e-04,
6662         v02 = 1.35608801097516229404e-06,
6663         v03 = 6.22741452364621501295e-09,
6664         v04 = 1.66559246207992079114e-11;
6665
6666     double z, u, v;
6667     unsigned int ix, lx;
6668
6669     ix = *(ULONGLONG*)&x >> 32;
6670     lx = *(ULONGLONG*)&x;
6671
6672     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
6673     if ((ix << 1 | lx) == 0)
6674         return math_error(_OVERFLOW, "_y1", x, 0, -INFINITY);
6675     if (isnan(x))
6676         return x;
6677     if (ix >> 31)
6678         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6679     if (ix >= 0x7ff00000)
6680         return 1 / x;
6681
6682     if (ix >= 0x40000000)  /* x >= 2 */
6683         return j1_y1_approx(ix, x, TRUE, 0);
6684     if (ix < 0x3c900000)  /* x < 2**-54 */
6685         return -tpi / x;
6686     z = x * x;
6687     u = u00 + z * (u01 + z * (u02 + z * (u03 + z * u04)));
6688     v = 1 + z * (v00 + z * (v01 + z * (v02 + z * (v03 + z * v04))));
6689     return x * (u / v) + tpi * (j1(x) * log(x) - 1 / x);
6690 }
6691
6692 /*********************************************************************
6693  *              _yn (MSVCRT.@)
6694  *
6695  * Copied from musl: src/math/jn.c
6696  */
6697 double CDECL _yn(int n, double x)
6698 {
6699     static const double invsqrtpi = 5.64189583547756279280e-01;
6700
6701     unsigned int ix, lx, ib;
6702     int nm1, sign, i;
6703     double a, b, temp;
6704
6705     ix = *(ULONGLONG*)&x >> 32;
6706     lx = *(ULONGLONG*)&x;
6707     sign = ix >> 31;
6708     ix &= 0x7fffffff;
6709
6710     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6711         return x;
6712     if (sign && (ix | lx) != 0) /* x < 0 */
6713         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6714     if (ix == 0x7ff00000)
6715         return 0.0;
6716
6717     if (n == 0)
6718         return y0(x);
6719     if (n < 0) {
6720         nm1 = -(n + 1);
6721         sign = n & 1;
6722     } else {
6723         nm1 = n - 1;
6724         sign = 0;
6725     }
6726     if (nm1 == 0)
6727         return sign ? -y1(x) : y1(x);
6728
6729     if (ix >= 0x52d00000) { /* x > 2**302 */
6730         switch(nm1 & 3) {
6731         case 0:
6732             temp = -sin(x) - cos(x);
6733             break;
6734         case 1:
6735             temp = -sin(x) + cos(x);
6736             break;
6737         case 2:
6738             temp = sin(x) + cos(x);
6739             break;
6740         default:
6741             temp = sin(x) - cos(x);
6742             break;
6743         }
6744         b = invsqrtpi * temp / sqrt(x);
6745     } else {
6746         a = y0(x);
6747         b = y1(x);
6748         /* quit if b is -inf */
6749         ib = *(ULONGLONG*)&b >> 32;
6750         for (i = 0; i < nm1 && ib != 0xfff00000;) {
6751             i++;
6752             temp = b;
6753             b = (2.0 * i / x) * b - a;
6754             ib = *(ULONGLONG*)&b >> 32;
6755             a = temp;
6756         }
6757     }
6758     return sign ? -b : b;
6759 }
6760
6761 #if _MSVCR_VER>=120
6762
6763 /*********************************************************************
6764  *              _nearbyint (MSVCR120.@)
6765  *
6766  * Based on musl: src/math/nearbyteint.c
6767  */
6768 double CDECL nearbyint(double x)
6769 {
6770     fenv_t env;
6771
6772     fegetenv(&env);
6773     _control87(_MCW_EM, _MCW_EM);
6774     x = rint(x);
6775     feclearexcept(FE_INEXACT);
6776     feupdateenv(&env);
6777     return x;
6778 }
6779
6780 /*********************************************************************
6781  *              _nearbyintf (MSVCR120.@)
6782  *
6783  * Based on musl: src/math/nearbyteintf.c
6784  */
6785 float CDECL nearbyintf(float x)
6786 {
6787     fenv_t env;
6788
6789     fegetenv(&env);
6790     _control87(_MCW_EM, _MCW_EM);
6791     x = rintf(x);
6792     feclearexcept(FE_INEXACT);
6793     feupdateenv(&env);
6794     return x;
6795 }
6796
6797 /*********************************************************************
6798  *              nexttoward (MSVCR120.@)
6799  */
6800 double CDECL MSVCRT_nexttoward(double num, double next)
6801 {
6802     return _nextafter(num, next);
6803 }
6804
6805 /*********************************************************************
6806  *              nexttowardf (MSVCR120.@)
6807  *
6808  * Copied from musl: src/math/nexttowardf.c
6809  */
6810 float CDECL MSVCRT_nexttowardf(float x, double y)
6811 {
6812     unsigned int ix = *(unsigned int*)&x;
6813     unsigned int e;
6814     float ret;
6815
6816     if (isnan(x) || isnan(y))
6817         return x + y;
6818     if (x == y)
6819         return y;
6820     if (x == 0) {
6821         ix = 1;
6822         if (signbit(y))
6823             ix |= 0x80000000;
6824     } else if (x < y) {
6825         if (signbit(x))
6826             ix--;
6827         else
6828             ix++;
6829     } else {
6830         if (signbit(x))
6831             ix++;
6832         else
6833             ix--;
6834     }
6835     e = ix & 0x7f800000;
6836     /* raise overflow if ix is infinite and x is finite */
6837     if (e == 0x7f800000) {
6838         fp_barrierf(x + x);
6839         *_errno() = ERANGE;
6840     }
6841     ret = *(float*)&ix;
6842     /* raise underflow if ret is subnormal or zero */
6843     if (e == 0) {
6844         fp_barrierf(x * x + ret * ret);
6845         *_errno() = ERANGE;
6846     }
6847     return ret;
6848 }
6849
6850 #endif /* _MSVCR_VER>=120 */
6851
6852 /*********************************************************************
6853  *              _nextafter (MSVCRT.@)
6854  *
6855  * Copied from musl: src/math/nextafter.c
6856  */
6857 double CDECL _nextafter(double x, double y)
6858 {
6859     ULONGLONG llx = *(ULONGLONG*)&x;
6860     ULONGLONG lly = *(ULONGLONG*)&y;
6861     ULONGLONG ax, ay;
6862     int e;
6863
6864     if (isnan(x) || isnan(y))
6865         return x + y;
6866     if (llx == lly) {
6867         if (_fpclass(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
6868             *_errno() = ERANGE;
6869         return y;
6870     }
6871     ax = llx & -1ULL / 2;
6872     ay = lly & -1ULL / 2;
6873     if (ax == 0) {
6874         if (ay == 0)
6875             return y;
6876         llx = (lly & 1ULL << 63) | 1;
6877     } else if (ax > ay || ((llx ^ lly) & 1ULL << 63))
6878         llx--;
6879     else
6880         llx++;
6881     e = llx >> 52 & 0x7ff;
6882     /* raise overflow if llx is infinite and x is finite */
6883     if (e == 0x7ff) {
6884         fp_barrier(x + x);
6885         *_errno() = ERANGE;
6886     }
6887     /* raise underflow if llx is subnormal or zero */
6888     y = *(double*)&llx;
6889     if (e == 0) {
6890         fp_barrier(x * x + y * y);
6891         *_errno() = ERANGE;
6892     }
6893     return y;
6894 }
6895
6896 /*********************************************************************
6897  *              _ecvt (MSVCRT.@)
6898  */
6899 char * CDECL _ecvt( double number, int ndigits, int *decpt, int *sign )
6900 {
6901     int prec, len;
6902     thread_data_t *data = msvcrt_get_thread_data();
6903     /* FIXME: check better for overflow (native supports over 300 chars) */
6904     ndigits = min( ndigits, 80 - 8); /* 8 : space for sign, dec point, "e",
6905                                       * 4 for exponent and one for
6906                                       * terminating '\0' */
6907     if (!data->efcvt_buffer)
6908         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
6909
6910     /* handle cases with zero ndigits or less */
6911     prec = ndigits;
6912     if( prec < 1) prec = 2;
6913     len = _snprintf(data->efcvt_buffer, 80, "%.*le", prec - 1, number);
6914
6915     if (data->efcvt_buffer[0] == '-') {
6916         memmove( data->efcvt_buffer, data->efcvt_buffer + 1, len-- );
6917         *sign = 1;
6918     } else *sign = 0;
6919
6920     /* take the decimal "point away */
6921     if( prec != 1)
6922         memmove( data->efcvt_buffer + 1, data->efcvt_buffer + 2, len - 1 );
6923     /* take the exponential "e" out */
6924     data->efcvt_buffer[ prec] = '\0';
6925     /* read the exponent */
6926     sscanf( data->efcvt_buffer + prec + 1, "%d", decpt);
6927     (*decpt)++;
6928     /* adjust for some border cases */
6929     if( data->efcvt_buffer[0] == '0')/* value is zero */
6930         *decpt = 0;
6931     /* handle cases with zero ndigits or less */
6932     if( ndigits < 1){
6933         if( data->efcvt_buffer[ 0] >= '5')
6934             (*decpt)++;
6935         data->efcvt_buffer[ 0] = '\0';
6936     }
6937     TRACE("out=\"%s\"\n",data->efcvt_buffer);
6938     return data->efcvt_buffer;
6939 }
6940
6941 /*********************************************************************
6942  *              _ecvt_s (MSVCRT.@)
6943  */
6944 int CDECL _ecvt_s( char *buffer, size_t length, double number, int ndigits, int *decpt, int *sign )
6945 {
6946     int prec, len;
6947     char *result;
6948
6949     if (!MSVCRT_CHECK_PMT(buffer != NULL)) return EINVAL;
6950     if (!MSVCRT_CHECK_PMT(decpt != NULL)) return EINVAL;
6951     if (!MSVCRT_CHECK_PMT(sign != NULL)) return EINVAL;
6952     if (!MSVCRT_CHECK_PMT_ERR( length > 2, ERANGE )) return ERANGE;
6953     if (!MSVCRT_CHECK_PMT_ERR(ndigits < (int)length - 1, ERANGE )) return ERANGE;
6954
6955     /* handle cases with zero ndigits or less */
6956     prec = ndigits;
6957     if( prec < 1) prec = 2;
6958     result = malloc(prec + 8);
6959
6960     len = _snprintf(result, prec + 8, "%.*le", prec - 1, number);
6961     if (result[0] == '-') {
6962         memmove( result, result + 1, len-- );
6963         *sign = 1;
6964     } else *sign = 0;
6965
6966     /* take the decimal "point away */
6967     if( prec != 1)
6968         memmove( result + 1, result + 2, len - 1 );
6969     /* take the exponential "e" out */
6970     result[ prec] = '\0';
6971     /* read the exponent */
6972     sscanf( result + prec + 1, "%d", decpt);
6973     (*decpt)++;
6974     /* adjust for some border cases */
6975     if( result[0] == '0')/* value is zero */
6976         *decpt = 0;
6977     /* handle cases with zero ndigits or less */
6978     if( ndigits < 1){
6979         if( result[ 0] >= '5')
6980             (*decpt)++;
6981         result[ 0] = '\0';
6982     }
6983     memcpy( buffer, result, max(ndigits + 1, 1) );
6984     free( result );
6985     return 0;
6986 }
6987
6988 /***********************************************************************
6989  *              _fcvt  (MSVCRT.@)
6990  */
6991 char * CDECL _fcvt( double number, int ndigits, int *decpt, int *sign )
6992 {
6993     thread_data_t *data = msvcrt_get_thread_data();
6994     int stop, dec1, dec2;
6995     char *ptr1, *ptr2, *first;
6996     char buf[80]; /* ought to be enough */
6997     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
6998
6999     if (!data->efcvt_buffer)
7000         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
7001
7002     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7003     ptr1 = buf;
7004     ptr2 = data->efcvt_buffer;
7005     first = NULL;
7006     dec1 = 0;
7007     dec2 = 0;
7008
7009     if (*ptr1 == '-') {
7010         *sign = 1;
7011         ptr1++;
7012     } else *sign = 0;
7013
7014     /* For numbers below the requested resolution, work out where
7015        the decimal point will be rather than finding it in the string */
7016     if (number < 1.0 && number > 0.0) {
7017         dec2 = log10(number + 1e-10);
7018         if (-dec2 <= ndigits) dec2 = 0;
7019     }
7020
7021     /* If requested digits is zero or less, we will need to truncate
7022      * the returned string */
7023     if (ndigits < 1) {
7024         stop += ndigits;
7025     }
7026
7027     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7028     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7029         if (!first) first = ptr2;
7030         if ((ptr1 - buf) < stop) {
7031             *ptr2++ = *ptr1++;
7032         } else {
7033             ptr1++;
7034         }
7035         dec1++;
7036     }
7037
7038     if (ndigits > 0) {
7039         ptr1++;
7040         if (!first) {
7041             while (*ptr1 == '0') { /* Process leading zeroes */
7042                 *ptr2++ = *ptr1++;
7043                 dec1--;
7044             }
7045         }
7046         while (*ptr1 != '\0') {
7047             if (!first) first = ptr2;
7048             *ptr2++ = *ptr1++;
7049         }
7050     }
7051
7052     *ptr2 = '\0';
7053
7054     /* We never found a non-zero digit, then our number is either
7055      * smaller than the requested precision, or 0.0 */
7056     if (!first) {
7057         if (number > 0.0) {
7058             first = ptr2;
7059         } else {
7060             first = data->efcvt_buffer;
7061             dec1 = 0;
7062         }
7063     }
7064
7065     *decpt = dec2 ? dec2 : dec1;
7066     return first;
7067 }
7068
7069 /***********************************************************************
7070  *              _fcvt_s  (MSVCRT.@)
7071  */
7072 int CDECL _fcvt_s(char* outbuffer, size_t size, double number, int ndigits, int *decpt, int *sign)
7073 {
7074     int stop, dec1, dec2;
7075     char *ptr1, *ptr2, *first;
7076     char buf[80]; /* ought to be enough */
7077     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7078
7079     if (!outbuffer || !decpt || !sign || size == 0)
7080     {
7081         *_errno() = EINVAL;
7082         return EINVAL;
7083     }
7084
7085     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7086     ptr1 = buf;
7087     ptr2 = outbuffer;
7088     first = NULL;
7089     dec1 = 0;
7090     dec2 = 0;
7091
7092     if (*ptr1 == '-') {
7093         *sign = 1;
7094         ptr1++;
7095     } else *sign = 0;
7096
7097     /* For numbers below the requested resolution, work out where
7098        the decimal point will be rather than finding it in the string */
7099     if (number < 1.0 && number > 0.0) {
7100         dec2 = log10(number + 1e-10);
7101         if (-dec2 <= ndigits) dec2 = 0;
7102     }
7103
7104     /* If requested digits is zero or less, we will need to truncate
7105      * the returned string */
7106     if (ndigits < 1) {
7107         stop += ndigits;
7108     }
7109
7110     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7111     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7112         if (!first) first = ptr2;
7113         if ((ptr1 - buf) < stop) {
7114             if (size > 1) {
7115                 *ptr2++ = *ptr1++;
7116                 size--;
7117             }
7118         } else {
7119             ptr1++;
7120         }
7121         dec1++;
7122     }
7123
7124     if (ndigits > 0) {
7125         ptr1++;
7126         if (!first) {
7127             while (*ptr1 == '0') { /* Process leading zeroes */
7128                 if (number == 0.0 && size > 1) {
7129                     *ptr2++ = '0';
7130                     size--;
7131                 }
7132                 ptr1++;
7133                 dec1--;
7134             }
7135         }
7136         while (*ptr1 != '\0') {
7137             if (!first) first = ptr2;
7138             if (size > 1) {
7139                 *ptr2++ = *ptr1++;
7140                 size--;
7141             }
7142         }
7143     }
7144
7145     *ptr2 = '\0';
7146
7147     /* We never found a non-zero digit, then our number is either
7148      * smaller than the requested precision, or 0.0 */
7149     if (!first && (number <= 0.0))
7150         dec1 = 0;
7151
7152     *decpt = dec2 ? dec2 : dec1;
7153     return 0;
7154 }
7155
7156 /***********************************************************************
7157  *              _gcvt  (MSVCRT.@)
7158  */
7159 char * CDECL _gcvt( double number, int ndigit, char *buff )
7160 {
7161     if(!buff) {
7162         *_errno() = EINVAL;
7163         return NULL;
7164     }
7165
7166     if(ndigit < 0) {
7167         *_errno() = ERANGE;
7168         return NULL;
7169     }
7170
7171     sprintf(buff, "%.*g", ndigit, number);
7172     return buff;
7173 }
7174
7175 /***********************************************************************
7176  *              _gcvt_s  (MSVCRT.@)
7177  */
7178 int CDECL _gcvt_s(char *buff, size_t size, double number, int digits)
7179 {
7180     int len;
7181
7182     if(!buff) {
7183         *_errno() = EINVAL;
7184         return EINVAL;
7185     }
7186
7187     if( digits<0 || digits>=size) {
7188         if(size)
7189             buff[0] = '\0';
7190
7191         *_errno() = ERANGE;
7192         return ERANGE;
7193     }
7194
7195     len = _scprintf("%.*g", digits, number);
7196     if(len > size) {
7197         buff[0] = '\0';
7198         *_errno() = ERANGE;
7199         return ERANGE;
7200     }
7201
7202     sprintf(buff, "%.*g", digits, number);
7203     return 0;
7204 }
7205
7206 #include <stdlib.h> /* div_t, ldiv_t */
7207
7208 /*********************************************************************
7209  *              div (MSVCRT.@)
7210  * VERSION
7211  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7212  */
7213 #ifdef __i386__
7214 unsigned __int64 CDECL div(int num, int denom)
7215 {
7216     union {
7217         div_t div;
7218         unsigned __int64 uint64;
7219     } ret;
7220
7221     ret.div.quot = num / denom;
7222     ret.div.rem = num % denom;
7223     return ret.uint64;
7224 }
7225 #else
7226 /*********************************************************************
7227  *              div (MSVCRT.@)
7228  * VERSION
7229  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7230  */
7231 div_t CDECL div(int num, int denom)
7232 {
7233     div_t ret;
7234
7235     ret.quot = num / denom;
7236     ret.rem = num % denom;
7237     return ret;
7238 }
7239 #endif /* ifdef __i386__ */
7240
7241
7242 /*********************************************************************
7243  *              ldiv (MSVCRT.@)
7244  * VERSION
7245  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7246  */
7247 #ifdef __i386__
7248 unsigned __int64 CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7249 {
7250     union {
7251         ldiv_t ldiv;
7252         unsigned __int64 uint64;
7253     } ret;
7254
7255     ret.ldiv.quot = num / denom;
7256     ret.ldiv.rem = num % denom;
7257     return ret.uint64;
7258 }
7259 #else
7260 /*********************************************************************
7261  *              ldiv (MSVCRT.@)
7262  * VERSION
7263  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7264  */
7265 ldiv_t CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7266 {
7267     ldiv_t ret;
7268
7269     ret.quot = num / denom;
7270     ret.rem = num % denom;
7271     return ret;
7272 }
7273 #endif /* ifdef __i386__ */
7274
7275 #if _MSVCR_VER>=100
7276 /*********************************************************************
7277  *              lldiv (MSVCR100.@)
7278  */
7279 lldiv_t CDECL lldiv(__int64 num, __int64 denom)
7280 {
7281   lldiv_t ret;
7282
7283   ret.quot = num / denom;
7284   ret.rem = num % denom;
7285
7286   return ret;
7287 }
7288 #endif
7289
7290 #ifdef __i386__
7291
7292 /*********************************************************************
7293  *              _adjust_fdiv (MSVCRT.@)
7294  * Used by the MSVC compiler to work around the Pentium FDIV bug.
7295  */
7296 int MSVCRT__adjust_fdiv = 0;
7297
7298 /***********************************************************************
7299  *              _adj_fdiv_m16i (MSVCRT.@)
7300  *
7301  * NOTE
7302  *    I _think_ this function is intended to work around the Pentium
7303  *    fdiv bug.
7304  */
7305 void __stdcall _adj_fdiv_m16i( short arg )
7306 {
7307   TRACE("(): stub\n");
7308 }
7309
7310 /***********************************************************************
7311  *              _adj_fdiv_m32 (MSVCRT.@)
7312  *
7313  * NOTE
7314  *    I _think_ this function is intended to work around the Pentium
7315  *    fdiv bug.
7316  */
7317 void __stdcall _adj_fdiv_m32( unsigned int arg )
7318 {
7319   TRACE("(): stub\n");
7320 }
7321
7322 /***********************************************************************
7323  *              _adj_fdiv_m32i (MSVCRT.@)
7324  *
7325  * NOTE
7326  *    I _think_ this function is intended to work around the Pentium
7327  *    fdiv bug.
7328  */
7329 void __stdcall _adj_fdiv_m32i( int arg )
7330 {
7331   TRACE("(): stub\n");
7332 }
7333
7334 /***********************************************************************
7335  *              _adj_fdiv_m64 (MSVCRT.@)
7336  *
7337  * NOTE
7338  *    I _think_ this function is intended to work around the Pentium
7339  *    fdiv bug.
7340  */
7341 void __stdcall _adj_fdiv_m64( unsigned __int64 arg )
7342 {
7343   TRACE("(): stub\n");
7344 }
7345
7346 /***********************************************************************
7347  *              _adj_fdiv_r (MSVCRT.@)
7348  * FIXME
7349  *    This function is likely to have the wrong number of arguments.
7350  *
7351  * NOTE
7352  *    I _think_ this function is intended to work around the Pentium
7353  *    fdiv bug.
7354  */
7355 void _adj_fdiv_r(void)
7356 {
7357   TRACE("(): stub\n");
7358 }
7359
7360 /***********************************************************************
7361  *              _adj_fdivr_m16i (MSVCRT.@)
7362  *
7363  * NOTE
7364  *    I _think_ this function is intended to work around the Pentium
7365  *    fdiv bug.
7366  */
7367 void __stdcall _adj_fdivr_m16i( short arg )
7368 {
7369   TRACE("(): stub\n");
7370 }
7371
7372 /***********************************************************************
7373  *              _adj_fdivr_m32 (MSVCRT.@)
7374  *
7375  * NOTE
7376  *    I _think_ this function is intended to work around the Pentium
7377  *    fdiv bug.
7378  */
7379 void __stdcall _adj_fdivr_m32( unsigned int arg )
7380 {
7381   TRACE("(): stub\n");
7382 }
7383
7384 /***********************************************************************
7385  *              _adj_fdivr_m32i (MSVCRT.@)
7386  *
7387  * NOTE
7388  *    I _think_ this function is intended to work around the Pentium
7389  *    fdiv bug.
7390  */
7391 void __stdcall _adj_fdivr_m32i( int arg )
7392 {
7393   TRACE("(): stub\n");
7394 }
7395
7396 /***********************************************************************
7397  *              _adj_fdivr_m64 (MSVCRT.@)
7398  *
7399  * NOTE
7400  *    I _think_ this function is intended to work around the Pentium
7401  *    fdiv bug.
7402  */
7403 void __stdcall _adj_fdivr_m64( unsigned __int64 arg )
7404 {
7405   TRACE("(): stub\n");
7406 }
7407
7408 /***********************************************************************
7409  *              _adj_fpatan (MSVCRT.@)
7410  * FIXME
7411  *    This function is likely to have the wrong number of arguments.
7412  *
7413  * NOTE
7414  *    I _think_ this function is intended to work around the Pentium
7415  *    fdiv bug.
7416  */
7417 void _adj_fpatan(void)
7418 {
7419   TRACE("(): stub\n");
7420 }
7421
7422 /***********************************************************************
7423  *              _adj_fprem (MSVCRT.@)
7424  * FIXME
7425  *    This function is likely to have the wrong number of arguments.
7426  *
7427  * NOTE
7428  *    I _think_ this function is intended to work around the Pentium
7429  *    fdiv bug.
7430  */
7431 void _adj_fprem(void)
7432 {
7433   TRACE("(): stub\n");
7434 }
7435
7436 /***********************************************************************
7437  *              _adj_fprem1 (MSVCRT.@)
7438  * FIXME
7439  *    This function is likely to have the wrong number of arguments.
7440  *
7441  * NOTE
7442  *    I _think_ this function is intended to work around the Pentium
7443  *    fdiv bug.
7444  */
7445 void _adj_fprem1(void)
7446 {
7447   TRACE("(): stub\n");
7448 }
7449
7450 /***********************************************************************
7451  *              _adj_fptan (MSVCRT.@)
7452  * FIXME
7453  *    This function is likely to have the wrong number of arguments.
7454  *
7455  * NOTE
7456  *    I _think_ this function is intended to work around the Pentium
7457  *    fdiv bug.
7458  */
7459 void _adj_fptan(void)
7460 {
7461   TRACE("(): stub\n");
7462 }
7463
7464 /***********************************************************************
7465  *              _safe_fdiv (MSVCRT.@)
7466  * FIXME
7467  *    This function is likely to have the wrong number of arguments.
7468  *
7469  * NOTE
7470  *    I _think_ this function is intended to work around the Pentium
7471  *    fdiv bug.
7472  */
7473 void _safe_fdiv(void)
7474 {
7475   TRACE("(): stub\n");
7476 }
7477
7478 /***********************************************************************
7479  *              _safe_fdivr (MSVCRT.@)
7480  * FIXME
7481  *    This function is likely to have the wrong number of arguments.
7482  *
7483  * NOTE
7484  *    I _think_ this function is intended to work around the Pentium
7485  *    fdiv bug.
7486  */
7487 void _safe_fdivr(void)
7488 {
7489   TRACE("(): stub\n");
7490 }
7491
7492 /***********************************************************************
7493  *              _safe_fprem (MSVCRT.@)
7494  * FIXME
7495  *    This function is likely to have the wrong number of arguments.
7496  *
7497  * NOTE
7498  *    I _think_ this function is intended to work around the Pentium
7499  *    fdiv bug.
7500  */
7501 void _safe_fprem(void)
7502 {
7503   TRACE("(): stub\n");
7504 }
7505
7506 /***********************************************************************
7507  *              _safe_fprem1 (MSVCRT.@)
7508  *
7509  * FIXME
7510  *    This function is likely to have the wrong number of arguments.
7511  *
7512  * NOTE
7513  *    I _think_ this function is intended to work around the Pentium
7514  *    fdiv bug.
7515  */
7516 void _safe_fprem1(void)
7517 {
7518   TRACE("(): stub\n");
7519 }
7520
7521 /***********************************************************************
7522  *              __libm_sse2_acos   (MSVCRT.@)
7523  */
7524 void __cdecl __libm_sse2_acos(void)
7525 {
7526     double d;
7527     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7528     d = acos( d );
7529     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7530 }
7531
7532 /***********************************************************************
7533  *              __libm_sse2_acosf   (MSVCRT.@)
7534  */
7535 void __cdecl __libm_sse2_acosf(void)
7536 {
7537     float f;
7538     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7539     f = acosf( f );
7540     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7541 }
7542
7543 /***********************************************************************
7544  *              __libm_sse2_asin   (MSVCRT.@)
7545  */
7546 void __cdecl __libm_sse2_asin(void)
7547 {
7548     double d;
7549     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7550     d = asin( d );
7551     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7552 }
7553
7554 /***********************************************************************
7555  *              __libm_sse2_asinf   (MSVCRT.@)
7556  */
7557 void __cdecl __libm_sse2_asinf(void)
7558 {
7559     float f;
7560     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7561     f = asinf( f );
7562     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7563 }
7564
7565 /***********************************************************************
7566  *              __libm_sse2_atan   (MSVCRT.@)
7567  */
7568 void __cdecl __libm_sse2_atan(void)
7569 {
7570     double d;
7571     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7572     d = atan( d );
7573     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7574 }
7575
7576 /***********************************************************************
7577  *              __libm_sse2_atan2   (MSVCRT.@)
7578  */
7579 void __cdecl __libm_sse2_atan2(void)
7580 {
7581     double d1, d2;
7582     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7583     d1 = atan2( d1, d2 );
7584     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7585 }
7586
7587 /***********************************************************************
7588  *              __libm_sse2_atanf   (MSVCRT.@)
7589  */
7590 void __cdecl __libm_sse2_atanf(void)
7591 {
7592     float f;
7593     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7594     f = atanf( f );
7595     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7596 }
7597
7598 /***********************************************************************
7599  *              __libm_sse2_cos   (MSVCRT.@)
7600  */
7601 void __cdecl __libm_sse2_cos(void)
7602 {
7603     double d;
7604     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7605     d = cos( d );
7606     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7607 }
7608
7609 /***********************************************************************
7610  *              __libm_sse2_cosf   (MSVCRT.@)
7611  */
7612 void __cdecl __libm_sse2_cosf(void)
7613 {
7614     float f;
7615     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7616     f = cosf( f );
7617     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7618 }
7619
7620 /***********************************************************************
7621  *              __libm_sse2_exp   (MSVCRT.@)
7622  */
7623 void __cdecl __libm_sse2_exp(void)
7624 {
7625     double d;
7626     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7627     d = exp( d );
7628     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7629 }
7630
7631 /***********************************************************************
7632  *              __libm_sse2_expf   (MSVCRT.@)
7633  */
7634 void __cdecl __libm_sse2_expf(void)
7635 {
7636     float f;
7637     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7638     f = expf( f );
7639     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7640 }
7641
7642 /***********************************************************************
7643  *              __libm_sse2_log   (MSVCRT.@)
7644  */
7645 void __cdecl __libm_sse2_log(void)
7646 {
7647     double d;
7648     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7649     d = log( d );
7650     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7651 }
7652
7653 /***********************************************************************
7654  *              __libm_sse2_log10   (MSVCRT.@)
7655  */
7656 void __cdecl __libm_sse2_log10(void)
7657 {
7658     double d;
7659     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7660     d = log10( d );
7661     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7662 }
7663
7664 /***********************************************************************
7665  *              __libm_sse2_log10f   (MSVCRT.@)
7666  */
7667 void __cdecl __libm_sse2_log10f(void)
7668 {
7669     float f;
7670     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7671     f = log10f( f );
7672     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7673 }
7674
7675 /***********************************************************************
7676  *              __libm_sse2_logf   (MSVCRT.@)
7677  */
7678 void __cdecl __libm_sse2_logf(void)
7679 {
7680     float f;
7681     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7682     f = logf( f );
7683     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7684 }
7685
7686 /***********************************************************************
7687  *              __libm_sse2_pow   (MSVCRT.@)
7688  */
7689 void __cdecl __libm_sse2_pow(void)
7690 {
7691     double d1, d2;
7692     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7693     d1 = pow( d1, d2 );
7694     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7695 }
7696
7697 /***********************************************************************
7698  *              __libm_sse2_powf   (MSVCRT.@)
7699  */
7700 void __cdecl __libm_sse2_powf(void)
7701 {
7702     float f1, f2;
7703     __asm__ __volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1), "=g" (f2) );
7704     f1 = powf( f1, f2 );
7705     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f1) );
7706 }
7707
7708 /***********************************************************************
7709  *              __libm_sse2_sin   (MSVCRT.@)
7710  */
7711 void __cdecl __libm_sse2_sin(void)
7712 {
7713     double d;
7714     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7715     d = sin( d );
7716     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7717 }
7718
7719 /***********************************************************************
7720  *              __libm_sse2_sinf   (MSVCRT.@)
7721  */
7722 void __cdecl __libm_sse2_sinf(void)
7723 {
7724     float f;
7725     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7726     f = sinf( f );
7727     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7728 }
7729
7730 /***********************************************************************
7731  *              __libm_sse2_tan   (MSVCRT.@)
7732  */
7733 void __cdecl __libm_sse2_tan(void)
7734 {
7735     double d;
7736     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7737     d = tan( d );
7738     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7739 }
7740
7741 /***********************************************************************
7742  *              __libm_sse2_tanf   (MSVCRT.@)
7743  */
7744 void __cdecl __libm_sse2_tanf(void)
7745 {
7746     float f;
7747     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7748     f = tanf( f );
7749     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7750 }
7751
7752 /***********************************************************************
7753  *              __libm_sse2_sqrt_precise   (MSVCR110.@)
7754  */
7755 void __cdecl __libm_sse2_sqrt_precise(void)
7756 {
7757     unsigned int cw;
7758     double d;
7759
7760     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7761     __control87_2(0, 0, NULL, &cw);
7762     if (cw & _MCW_RC)
7763     {
7764         d = sqrt(d);
7765         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7766         return;
7767     }
7768
7769     if (!sqrt_validate(&d, FALSE))
7770     {
7771         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7772         return;
7773     }
7774     __asm__ __volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
7775 }
7776 #endif  /* __i386__ */
7777
7778 /*********************************************************************
7779  *      _fdclass (MSVCR120.@)
7780  *
7781  * Copied from musl: src/math/__fpclassifyf.c
7782  */
7783 short CDECL _fdclass(float x)
7784 {
7785     union { float f; UINT32 i; } u = { x };
7786     int e = u.i >> 23 & 0xff;
7787
7788     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7789     if (e == 0xff) return u.i << 9 ? FP_NAN : FP_INFINITE;
7790     return FP_NORMAL;
7791 }
7792
7793 /*********************************************************************
7794  *      _dclass (MSVCR120.@)
7795  *
7796  * Copied from musl: src/math/__fpclassify.c
7797  */
7798 short CDECL _dclass(double x)
7799 {
7800     union { double f; UINT64 i; } u = { x };
7801     int e = u.i >> 52 & 0x7ff;
7802
7803     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7804     if (e == 0x7ff) return (u.i << 12) ? FP_NAN : FP_INFINITE;
7805     return FP_NORMAL;
7806 }
7807
7808 #if _MSVCR_VER>=120
7809
7810 /*********************************************************************
7811  *      cbrt (MSVCR120.@)
7812  *
7813  * Copied from musl: src/math/cbrt.c
7814  */
7815 double CDECL cbrt(double x)
7816 {
7817     static const UINT32 B1 = 715094163, B2 = 696219795;
7818     static const double P0 =  1.87595182427177009643,
7819                  P1 = -1.88497979543377169875,
7820                  P2 =  1.621429720105354466140,
7821                  P3 = -0.758397934778766047437,
7822                  P4 =  0.145996192886612446982;
7823
7824     union {double f; UINT64 i;} u = {x};
7825     double r,s,t,w;
7826     UINT32 hx = u.i >> 32 & 0x7fffffff;
7827
7828     if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
7829         return x + x;
7830
7831     if (hx < 0x00100000) { /* zero or subnormal? */
7832         u.f = x * 0x1p54;
7833         hx = u.i>>32 & 0x7fffffff;
7834         if (hx == 0)
7835             return x;
7836         hx = hx / 3 + B2;
7837     } else
7838         hx = hx / 3 + B1;
7839     u.i &= 1ULL << 63;
7840     u.i |= (UINT64)hx << 32;
7841     t = u.f;
7842
7843     r = (t * t) * (t / x);
7844     t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
7845
7846     u.f = t;
7847     u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
7848     t = u.f;
7849
7850     s = t * t;
7851     r = x / s;
7852     w = t + t;
7853     r = (r - t) / (w + r);
7854     t = t + t * r;
7855     return t;
7856 }
7857
7858 /*********************************************************************
7859  *      cbrtf (MSVCR120.@)
7860  *
7861  * Copied from musl: src/math/cbrtf.c
7862  */
7863 float CDECL cbrtf(float x)
7864 {
7865     static const unsigned B1 = 709958130, B2 = 642849266;
7866
7867     double r,T;
7868     union {float f; UINT32 i;} u = {x};
7869     UINT32 hx = u.i & 0x7fffffff;
7870
7871     if (hx >= 0x7f800000)
7872         return x + x;
7873
7874     if (hx < 0x00800000) {  /* zero or subnormal? */
7875         if (hx == 0)
7876             return x;
7877         u.f = x * 0x1p24f;
7878         hx = u.i & 0x7fffffff;
7879         hx = hx / 3 + B2;
7880     } else
7881         hx = hx / 3 + B1;
7882     u.i &= 0x80000000;
7883     u.i |= hx;
7884
7885     T = u.f;
7886     r = T * T * T;
7887     T = T * (x + x + r) / (x + r + r);
7888
7889     r = T * T * T;
7890     T = T * (x + x + r) / (x + r + r);
7891     return T;
7892 }
7893
7894 /*********************************************************************
7895  *      exp2 (MSVCR120.@)
7896  *
7897  * Copied from musl: src/math/exp2.c
7898  */
7899 double CDECL exp2(double x)
7900 {
7901     static const double C[] = {
7902         0x1.62e42fefa39efp-1,
7903         0x1.ebfbdff82c424p-3,
7904         0x1.c6b08d70cf4b5p-5,
7905         0x1.3b2abd24650ccp-7,
7906         0x1.5d7e09b4e3a84p-10
7907     };
7908
7909     UINT32 abstop;
7910     UINT64 ki, idx, top, sbits;
7911     double kd, r, r2, scale, tail, tmp;
7912
7913     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
7914     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
7915         if (abstop - 0x3c9 >= 0x80000000) {
7916             /* Avoid spurious underflow for tiny x. */
7917             /* Note: 0 is common input. */
7918             return 1.0 + x;
7919         }
7920         if (abstop >= 409) {
7921             if (*(UINT64*)&x == 0xfff0000000000000ull)
7922                 return 0.0;
7923             if (abstop >= 0x7ff)
7924                 return 1.0 + x;
7925             if (!(*(UINT64*)&x >> 63)) {
7926                 *_errno() = ERANGE;
7927                 return fp_barrier(DBL_MAX) * DBL_MAX;
7928             }
7929             else if (x <= -2147483648.0) {
7930                 fp_barrier(x + 0x1p120f);
7931                 return 0;
7932             }
7933             else if (*(UINT64*)&x >= 0xc090cc0000000000ull) {
7934                 *_errno() = ERANGE;
7935                 fp_barrier(x + 0x1p120f);
7936                 return 0;
7937             }
7938         }
7939         if (2 * *(UINT64*)&x > 2 * 0x408d000000000000ull)
7940             /* Large x is special cased below. */
7941             abstop = 0;
7942     }
7943
7944     /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
7945     /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
7946     kd = fp_barrier(x + 0x1.8p52 / (1 << 7));
7947     ki = *(UINT64*)&kd; /* k. */
7948     kd -= 0x1.8p52 / (1 << 7); /* k/N for int k. */
7949     r = x - kd;
7950     /* 2^(k/N) ~= scale * (1 + tail). */
7951     idx = 2 * (ki % (1 << 7));
7952     top = ki << (52 - 7);
7953     tail = *(double*)&exp_T[idx];
7954     /* This is only a valid scale when -1023*N < k < 1024*N. */
7955     sbits = exp_T[idx + 1] + top;
7956     /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
7957     /* Evaluation is optimized assuming superscalar pipelined execution. */
7958     r2 = r * r;
7959     /* Without fma the worst case error is 0.5/N ulp larger. */
7960     /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
7961     tmp = tail + r * C[0] + r2 * (C[1] + r * C[2]) + r2 * r2 * (C[3] + r * C[4]);
7962     if (abstop == 0)
7963     {
7964         /* Handle cases that may overflow or underflow when computing the result that
7965            is scale*(1+TMP) without intermediate rounding. The bit representation of
7966            scale is in SBITS, however it has a computed exponent that may have
7967            overflown into the sign bit so that needs to be adjusted before using it as
7968            a double. (int32_t)KI is the k used in the argument reduction and exponent
7969            adjustment of scale, positive k here means the result may overflow and
7970            negative k means the result may underflow. */
7971         double scale, y;
7972
7973         if ((ki & 0x80000000) == 0) {
7974             /* k > 0, the exponent of scale might have overflowed by 1. */
7975             sbits -= 1ull << 52;
7976             scale = *(double*)&sbits;
7977             y = 2 * (scale + scale * tmp);
7978             return y;
7979         }
7980         /* k < 0, need special care in the subnormal range. */
7981         sbits += 1022ull << 52;
7982         scale = *(double*)&sbits;
7983         y = scale + scale * tmp;
7984         if (y < 1.0) {
7985             /* Round y to the right precision before scaling it into the subnormal
7986                range to avoid double rounding that can cause 0.5+E/2 ulp error where
7987                E is the worst-case ulp error outside the subnormal range. So this
7988                is only useful if the goal is better than 1 ulp worst-case error. */
7989             double hi, lo;
7990             lo = scale - y + scale * tmp;
7991             hi = 1.0 + y;
7992             lo = 1.0 - hi + y + lo;
7993             y = hi + lo - 1.0;
7994             /* Avoid -0.0 with downward rounding. */
7995             if (y == 0.0)
7996                 y = 0.0;
7997             /* The underflow exception needs to be signaled explicitly. */
7998             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
7999         }
8000         y = 0x1p-1022 * y;
8001         return y;
8002     }
8003     scale = *(double*)&sbits;
8004     /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
8005        is no spurious underflow here even without fma. */
8006     return scale + scale * tmp;
8007 }
8008
8009 /*********************************************************************
8010  *      exp2f (MSVCR120.@)
8011  *
8012  * Copied from musl: src/math/exp2f.c
8013  */
8014 float CDECL exp2f(float x)
8015 {
8016     static const double C[] = {
8017         0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1
8018     };
8019     static const double shift = 0x1.8p+52 / (1 << 5);
8020
8021     double kd, xd, z, r, r2, y, s;
8022     UINT32 abstop;
8023     UINT64 ki, t;
8024
8025     xd = x;
8026     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
8027     if (abstop >= 0x430) {
8028         /* |x| >= 128 or x is nan.  */
8029         if (*(UINT32*)&x == 0xff800000)
8030             return 0.0f;
8031         if (abstop >= 0x7f8)
8032             return x + x;
8033         if (x > 0.0f) {
8034             *_errno() = ERANGE;
8035             return fp_barrierf(x * FLT_MAX);
8036         }
8037         if (x <= -150.0f) {
8038             fp_barrierf(x - 0x1p120);
8039             return 0;
8040         }
8041     }
8042
8043     /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */
8044     kd = xd + shift;
8045     ki = *(UINT64*)&kd;
8046     kd -= shift; /* k/(1<<5) for int k.  */
8047     r = xd - kd;
8048
8049     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
8050     t = exp2f_T[ki % (1 << 5)];
8051     t += ki << (52 - 5);
8052     s = *(double*)&t;
8053     z = C[0] * r + C[1];
8054     r2 = r * r;
8055     y = C[2] * r + 1;
8056     y = z * r2 + y;
8057     y = y * s;
8058     return y;
8059 }
8060
8061 /*********************************************************************
8062  *      expm1 (MSVCR120.@)
8063  */
8064 double CDECL expm1(double x)
8065 {
8066     return __expm1(x);
8067 }
8068
8069 /*********************************************************************
8070  *      expm1f (MSVCR120.@)
8071  */
8072 float CDECL expm1f(float x)
8073 {
8074     return __expm1f(x);
8075 }
8076
8077 /*********************************************************************
8078  *      log1p (MSVCR120.@)
8079  *
8080  * Copied from musl: src/math/log1p.c
8081  */
8082 double CDECL log1p(double x)
8083 {
8084     static const double ln2_hi = 6.93147180369123816490e-01,
8085         ln2_lo = 1.90821492927058770002e-10,
8086         Lg1 = 6.666666666666735130e-01,
8087         Lg2 = 3.999999999940941908e-01,
8088         Lg3 = 2.857142874366239149e-01,
8089         Lg4 = 2.222219843214978396e-01,
8090         Lg5 = 1.818357216161805012e-01,
8091         Lg6 = 1.531383769920937332e-01,
8092         Lg7 = 1.479819860511658591e-01;
8093
8094     union {double f; UINT64 i;} u = {x};
8095     double hfsq, f, c, s, z, R, w, t1, t2, dk;
8096     UINT32 hx, hu;
8097     int k;
8098
8099     hx = u.i >> 32;
8100     k = 1;
8101     if (hx < 0x3fda827a || hx >> 31) { /* 1+x < sqrt(2)+ */
8102         if (hx >= 0xbff00000) { /* x <= -1.0 */
8103             if (x == -1) {
8104                 *_errno() = ERANGE;
8105                 return x / 0.0; /* og1p(-1) = -inf */
8106             }
8107             *_errno() = EDOM;
8108             return (x-x) / 0.0; /* log1p(x<-1) = NaN */
8109         }
8110         if (hx << 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */
8111             fp_barrier(x + 0x1p120f);
8112             /* underflow if subnormal */
8113             if ((hx & 0x7ff00000) == 0)
8114                 fp_barrierf(x);
8115             return x;
8116         }
8117         if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8118             k = 0;
8119             c = 0;
8120             f = x;
8121         }
8122     } else if (hx >= 0x7ff00000)
8123         return x;
8124     if (k) {
8125         u.f = 1 + x;
8126         hu = u.i >> 32;
8127         hu += 0x3ff00000 - 0x3fe6a09e;
8128         k = (int)(hu >> 20) - 0x3ff;
8129         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8130         if (k < 54) {
8131             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8132             c /= u.f;
8133         } else
8134             c = 0;
8135         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8136         hu = (hu & 0x000fffff) + 0x3fe6a09e;
8137         u.i = (UINT64)hu << 32 | (u.i & 0xffffffff);
8138         f = u.f - 1;
8139     }
8140     hfsq = 0.5 * f * f;
8141     s = f / (2.0 + f);
8142     z = s * s;
8143     w = z * z;
8144     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
8145     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
8146     R = t2 + t1;
8147     dk = k;
8148     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8149 }
8150
8151 /*********************************************************************
8152  *      log1pf (MSVCR120.@)
8153  *
8154  * Copied from musl: src/math/log1pf.c
8155  */
8156 float CDECL log1pf(float x)
8157 {
8158     static const float ln2_hi = 6.9313812256e-01,
8159         ln2_lo = 9.0580006145e-06,
8160         Lg1 = 0xaaaaaa.0p-24,
8161         Lg2 = 0xccce13.0p-25,
8162         Lg3 = 0x91e9ee.0p-25,
8163         Lg4 = 0xf89e26.0p-26;
8164
8165     union {float f; UINT32 i;} u = {x};
8166     float hfsq, f, c, s, z, R, w, t1, t2, dk;
8167     UINT32 ix, iu;
8168     int k;
8169
8170     ix = u.i;
8171     k = 1;
8172     if (ix < 0x3ed413d0 || ix >> 31) { /* 1+x < sqrt(2)+ */
8173         if (ix >= 0xbf800000) { /* x <= -1.0 */
8174             if (x == -1) {
8175                 *_errno() = ERANGE;
8176                 return x / 0.0f; /* log1p(-1)=+inf */
8177             }
8178             *_errno() = EDOM;
8179             return (x - x) / 0.0f; /* log1p(x<-1)=NaN */
8180         }
8181         if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
8182             /* underflow if subnormal */
8183             if ((ix & 0x7f800000) == 0)
8184                 fp_barrierf(x * x);
8185             return x;
8186         }
8187         if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8188             k = 0;
8189             c = 0;
8190             f = x;
8191         }
8192     } else if (ix >= 0x7f800000)
8193         return x;
8194     if (k) {
8195         u.f = 1 + x;
8196         iu = u.i;
8197         iu += 0x3f800000 - 0x3f3504f3;
8198         k = (int)(iu >> 23) - 0x7f;
8199         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8200         if (k < 25) {
8201             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8202             c /= u.f;
8203         } else
8204             c = 0;
8205         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8206         iu = (iu & 0x007fffff) + 0x3f3504f3;
8207         u.i = iu;
8208         f = u.f - 1;
8209     }
8210     s = f / (2.0f + f);
8211     z = s * s;
8212     w = z * z;
8213     t1= w * (Lg2 + w * Lg4);
8214     t2= z * (Lg1 + w * Lg3);
8215     R = t2 + t1;
8216     hfsq = 0.5f * f * f;
8217     dk = k;
8218     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8219 }
8220
8221 /*********************************************************************
8222  *      log2 (MSVCR120.@)
8223  *
8224  * Copied from musl: src/math/log2.c
8225  */
8226 double CDECL log2(double x)
8227 {
8228     static const double invln2hi = 0x1.7154765200000p+0,
8229         invln2lo = 0x1.705fc2eefa200p-33;
8230     static const double A[] = {
8231         -0x1.71547652b8339p-1,
8232         0x1.ec709dc3a04bep-2,
8233         -0x1.7154764702ffbp-2,
8234         0x1.2776c50034c48p-2,
8235         -0x1.ec7b328ea92bcp-3,
8236         0x1.a6225e117f92ep-3
8237     };
8238     static const double B[] = {
8239         -0x1.71547652b82fep-1,
8240         0x1.ec709dc3a03f7p-2,
8241         -0x1.71547652b7c3fp-2,
8242         0x1.2776c50f05be4p-2,
8243         -0x1.ec709dd768fe5p-3,
8244         0x1.a61761ec4e736p-3,
8245         -0x1.7153fbc64a79bp-3,
8246         0x1.484d154f01b4ap-3,
8247         -0x1.289e4a72c383cp-3,
8248         0x1.0b32f285aee66p-3
8249     };
8250     static const struct {
8251         double invc, logc;
8252     } T[] = {
8253         {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
8254         {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
8255         {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
8256         {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
8257         {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
8258         {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
8259         {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
8260         {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
8261         {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
8262         {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
8263         {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
8264         {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
8265         {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
8266         {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
8267         {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
8268         {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
8269         {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
8270         {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
8271         {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
8272         {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
8273         {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
8274         {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
8275         {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
8276         {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
8277         {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
8278         {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
8279         {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
8280         {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
8281         {0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
8282         {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
8283         {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
8284         {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
8285         {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
8286         {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
8287         {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
8288         {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
8289         {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
8290         {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
8291         {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
8292         {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
8293         {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
8294         {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
8295         {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
8296         {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
8297         {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
8298         {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
8299         {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
8300         {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
8301         {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
8302         {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
8303         {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
8304         {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
8305         {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
8306         {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
8307         {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
8308         {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
8309         {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
8310         {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
8311         {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
8312         {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
8313         {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
8314         {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
8315         {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
8316         {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}
8317     };
8318     static const struct {
8319         double chi, clo;
8320     } T2[] = {
8321         {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
8322         {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
8323         {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
8324         {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
8325         {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
8326         {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
8327         {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
8328         {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
8329         {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
8330         {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
8331         {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
8332         {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
8333         {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
8334         {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
8335         {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
8336         {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
8337         {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
8338         {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
8339         {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
8340         {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
8341         {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
8342         {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
8343         {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
8344         {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
8345         {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
8346         {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
8347         {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
8348         {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
8349         {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
8350         {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
8351         {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
8352         {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
8353         {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
8354         {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
8355         {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
8356         {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
8357         {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
8358         {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
8359         {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
8360         {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
8361         {0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
8362         {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
8363         {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
8364         {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
8365         {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
8366         {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
8367         {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
8368         {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
8369         {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
8370         {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
8371         {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
8372         {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
8373         {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
8374         {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
8375         {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
8376         {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
8377         {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
8378         {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
8379         {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
8380         {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
8381         {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
8382         {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
8383         {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
8384         {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}
8385     };
8386
8387     double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p, rhi, rlo;
8388     UINT64 ix, iz, tmp;
8389     UINT32 top;
8390     int k, i;
8391
8392     ix = *(UINT64*)&x;
8393     top = ix >> 48;
8394     if (ix - 0x3feea4af00000000ULL < 0x210aa00000000ULL) {
8395         /* Handle close to 1.0 inputs separately.  */
8396         /* Fix sign of zero with downward rounding when x==1.  */
8397         if (ix == 0x3ff0000000000000ULL)
8398             return 0;
8399         r = x - 1.0;
8400         *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8401         rlo = r - rhi;
8402         hi = rhi * invln2hi;
8403         lo = rlo * invln2hi + r * invln2lo;
8404         r2 = r * r; /* rounding error: 0x1p-62.  */
8405         r4 = r2 * r2;
8406         /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
8407         p = r2 * (B[0] + r * B[1]);
8408         y = hi + p;
8409         lo += hi - y + p;
8410         lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
8411                 r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
8412         y += lo;
8413         return y;
8414     }
8415     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
8416         /* x < 0x1p-1022 or inf or nan.  */
8417         if (ix * 2 == 0) {
8418             *_errno() = ERANGE;
8419             return -1.0 / x;
8420         }
8421         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf.  */
8422             return x;
8423         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
8424             return x;
8425         if (top & 0x8000) {
8426             *_errno() = EDOM;
8427             return (x - x) / (x - x);
8428         }
8429         /* x is subnormal, normalize it.  */
8430         x *= 0x1p52;
8431         ix = *(UINT64*)&x;
8432         ix -= 52ULL << 52;
8433     }
8434
8435     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
8436        The range is split into N subintervals.
8437        The ith subinterval contains z and c is near its center.  */
8438     tmp = ix - 0x3fe6000000000000ULL;
8439     i = (tmp >> (52 - 6)) % (1 << 6);
8440     k = (INT64)tmp >> 52; /* arithmetic shift */
8441     iz = ix - (tmp & 0xfffULL << 52);
8442     invc = T[i].invc;
8443     logc = T[i].logc;
8444     z = *(double*)&iz;
8445     kd = k;
8446
8447     /* log2(x) = log2(z/c) + log2(c) + k.  */
8448     /* r ~= z/c - 1, |r| < 1/(2*N).  */
8449     /* rounding error: 0x1p-55/N + 0x1p-65.  */
8450     r = (z - T2[i].chi - T2[i].clo) * invc;
8451     *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8452     rlo = r - rhi;
8453     t1 = rhi * invln2hi;
8454     t2 = rlo * invln2hi + r * invln2lo;
8455
8456     /* hi + lo = r/ln2 + log2(c) + k.  */
8457     t3 = kd + logc;
8458     hi = t3 + t1;
8459     lo = t3 - hi + t1 + t2;
8460
8461     /* log2(r+1) = r/ln2 + r^2*poly(r).  */
8462     /* Evaluation is optimized assuming superscalar pipelined execution.  */
8463     r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
8464     r4 = r2 * r2;
8465     /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
8466        ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
8467     p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
8468     y = lo + r2 * p + hi;
8469     return y;
8470 }
8471
8472 /*********************************************************************
8473  *      log2f (MSVCR120.@)
8474  *
8475  * Copied from musl: src/math/log2f.c
8476  */
8477 float CDECL log2f(float x)
8478 {
8479     static const double A[] = {
8480         -0x1.712b6f70a7e4dp-2,
8481         0x1.ecabf496832ep-2,
8482         -0x1.715479ffae3dep-1,
8483         0x1.715475f35c8b8p0
8484     };
8485     static const struct {
8486         double invc, logc;
8487     } T[] = {
8488         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
8489         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
8490         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
8491         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
8492         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
8493         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
8494         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
8495         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
8496         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
8497         { 0x1p+0, 0x0p+0 },
8498         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
8499         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
8500         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
8501         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
8502         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
8503         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }
8504     };
8505
8506     double z, r, r2, p, y, y0, invc, logc;
8507     UINT32 ix, iz, top, tmp;
8508     int k, i;
8509
8510     ix = *(UINT32*)&x;
8511     /* Fix sign of zero with downward rounding when x==1. */
8512     if (ix == 0x3f800000)
8513         return 0;
8514     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
8515         /* x < 0x1p-126 or inf or nan. */
8516         if (ix * 2 == 0) {
8517             *_errno() = ERANGE;
8518             return -1.0f / x;
8519         }
8520         if (ix == 0x7f800000) /* log2(inf) == inf. */
8521             return x;
8522         if (ix * 2 > 0xff000000)
8523             return x;
8524         if (ix & 0x80000000) {
8525             *_errno() = EDOM;
8526             return (x - x) / (x - x);
8527         }
8528         /* x is subnormal, normalize it. */
8529         x *= 0x1p23f;
8530         ix = *(UINT32*)&x;
8531         ix -= 23 << 23;
8532     }
8533
8534     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
8535        The range is split into N subintervals.
8536        The ith subinterval contains z and c is near its center. */
8537     tmp = ix - 0x3f330000;
8538     i = (tmp >> (23 - 4)) % (1 << 4);
8539     top = tmp & 0xff800000;
8540     iz = ix - top;
8541     k = (INT32)tmp >> 23; /* arithmetic shift */
8542     invc = T[i].invc;
8543     logc = T[i].logc;
8544     z = *(float*)&iz;
8545
8546     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
8547     r = z * invc - 1;
8548     y0 = logc + (double)k;
8549
8550     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
8551     r2 = r * r;
8552     y = A[1] * r + A[2];
8553     y = A[0] * r2 + y;
8554     p = A[3] * r + y0;
8555     y = y * r2 + p;
8556     return y;
8557 }
8558
8559 /*********************************************************************
8560  *      rint (MSVCR120.@)
8561  */
8562 double CDECL rint(double x)
8563 {
8564     return __rint(x);
8565 }
8566
8567 /*********************************************************************
8568  *      rintf (MSVCR120.@)
8569  *
8570  * Copied from musl: src/math/rintf.c
8571  */
8572 float CDECL rintf(float x)
8573 {
8574     static const float toint = 1 / FLT_EPSILON;
8575
8576     unsigned int ix = *(unsigned int*)&x;
8577     int e = ix >> 23 & 0xff;
8578     int s = ix >> 31;
8579     float y;
8580
8581     if (e >= 0x7f + 23)
8582         return x;
8583     if (s)
8584         y = fp_barrierf(x - toint) + toint;
8585     else
8586         y = fp_barrierf(x + toint) - toint;
8587     if (y == 0)
8588         return s ? -0.0f : 0.0f;
8589     return y;
8590 }
8591
8592 /*********************************************************************
8593  *      lrint (MSVCR120.@)
8594  */
8595 __msvcrt_long CDECL lrint(double x)
8596 {
8597     double d;
8598
8599     d = rint(x);
8600     if ((d < 0 && d != (double)(__msvcrt_long)d)
8601             || (d >= 0 && d != (double)(__msvcrt_ulong)d)) {
8602         *_errno() = EDOM;
8603         return 0;
8604     }
8605     return d;
8606 }
8607
8608 /*********************************************************************
8609  *      lrintf (MSVCR120.@)
8610  */
8611 __msvcrt_long CDECL lrintf(float x)
8612 {
8613     float f;
8614
8615     f = rintf(x);
8616     if ((f < 0 && f != (float)(__msvcrt_long)f)
8617             || (f >= 0 && f != (float)(__msvcrt_ulong)f)) {
8618         *_errno() = EDOM;
8619         return 0;
8620     }
8621     return f;
8622 }
8623
8624 /*********************************************************************
8625  *      llrint (MSVCR120.@)
8626  */
8627 __int64 CDECL llrint(double x)
8628 {
8629     double d;
8630
8631     d = rint(x);
8632     if ((d < 0 && d != (double)(__int64)d)
8633             || (d >= 0 && d != (double)(unsigned __int64)d)) {
8634         *_errno() = EDOM;
8635         return 0;
8636     }
8637     return d;
8638 }
8639
8640 /*********************************************************************
8641  *      llrintf (MSVCR120.@)
8642  */
8643 __int64 CDECL llrintf(float x)
8644 {
8645     float f;
8646
8647     f = rintf(x);
8648     if ((f < 0 && f != (float)(__int64)f)
8649             || (f >= 0 && f != (float)(unsigned __int64)f)) {
8650         *_errno() = EDOM;
8651         return 0;
8652     }
8653     return f;
8654 }
8655
8656 /*********************************************************************
8657  *      round (MSVCR120.@)
8658  */
8659 double CDECL round(double x)
8660 {
8661     return __round(x);
8662 }
8663
8664 /*********************************************************************
8665  *      roundf (MSVCR120.@)
8666  *
8667  * Copied from musl: src/math/roundf.c
8668  */
8669 float CDECL roundf(float x)
8670 {
8671     static const float toint = 1 / FLT_EPSILON;
8672
8673     unsigned int ix = *(unsigned int*)&x;
8674     int e = ix >> 23 & 0xff;
8675     float y;
8676
8677     if (e >= 0x7f + 23)
8678         return x;
8679     if (ix >> 31)
8680         x = -x;
8681     if (e < 0x7f - 1)
8682         return 0 * *(float*)&ix;
8683     y = fp_barrierf(x + toint) - toint - x;
8684     if (y > 0.5f)
8685         y = y + x - 1;
8686     else if (y <= -0.5f)
8687         y = y + x + 1;
8688     else
8689         y = y + x;
8690     if (ix >> 31)
8691         y = -y;
8692     return y;
8693 }
8694
8695 /*********************************************************************
8696  *      lround (MSVCR120.@)
8697  *
8698  * Copied from musl: src/math/lround.c
8699  */
8700 __msvcrt_long CDECL lround(double x)
8701 {
8702     double d = round(x);
8703     if (d != (double)(__msvcrt_long)d) {
8704         *_errno() = EDOM;
8705         return 0;
8706     }
8707     return d;
8708 }
8709
8710 /*********************************************************************
8711  *      lroundf (MSVCR120.@)
8712  *
8713  * Copied from musl: src/math/lroundf.c
8714  */
8715 __msvcrt_long CDECL lroundf(float x)
8716 {
8717     float f = roundf(x);
8718     if (f != (float)(__msvcrt_long)f) {
8719         *_errno() = EDOM;
8720         return 0;
8721     }
8722     return f;
8723 }
8724
8725 /*********************************************************************
8726  *      llround (MSVCR120.@)
8727  *
8728  * Copied from musl: src/math/llround.c
8729  */
8730 __int64 CDECL llround(double x)
8731 {
8732     double d = round(x);
8733     if (d != (double)(__int64)d) {
8734         *_errno() = EDOM;
8735         return 0;
8736     }
8737     return d;
8738 }
8739
8740 /*********************************************************************
8741  *      llroundf (MSVCR120.@)
8742  *
8743  * Copied from musl: src/math/llroundf.c
8744  */
8745 __int64 CDECL llroundf(float x)
8746 {
8747     float f = roundf(x);
8748     if (f != (float)(__int64)f) {
8749         *_errno() = EDOM;
8750         return 0;
8751     }
8752     return f;
8753 }
8754
8755 /*********************************************************************
8756  *      trunc (MSVCR120.@)
8757  *
8758  * Copied from musl: src/math/trunc.c
8759  */
8760 double CDECL trunc(double x)
8761 {
8762     union {double f; UINT64 i;} u = {x};
8763     int e = (u.i >> 52 & 0x7ff) - 0x3ff + 12;
8764     UINT64 m;
8765
8766     if (e >= 52 + 12)
8767         return x;
8768     if (e < 12)
8769         e = 1;
8770     m = -1ULL >> e;
8771     if ((u.i & m) == 0)
8772         return x;
8773     u.i &= ~m;
8774     return u.f;
8775 }
8776
8777 /*********************************************************************
8778  *      truncf (MSVCR120.@)
8779  *
8780  * Copied from musl: src/math/truncf.c
8781  */
8782 float CDECL truncf(float x)
8783 {
8784     union {float f; UINT32 i;} u = {x};
8785     int e = (u.i >> 23 & 0xff) - 0x7f + 9;
8786     UINT32 m;
8787
8788     if (e >= 23 + 9)
8789         return x;
8790     if (e < 9)
8791         e = 1;
8792     m = -1U >> e;
8793     if ((u.i & m) == 0)
8794         return x;
8795     u.i &= ~m;
8796     return u.f;
8797 }
8798
8799 /*********************************************************************
8800  *      _dtest (MSVCR120.@)
8801  */
8802 short CDECL _dtest(double *x)
8803 {
8804     return _dclass(*x);
8805 }
8806
8807 /*********************************************************************
8808  *      _fdtest (MSVCR120.@)
8809  */
8810 short CDECL _fdtest(float *x)
8811 {
8812     return _fdclass(*x);
8813 }
8814
8815 static double erfc1(double x)
8816 {
8817     static const double erx  = 8.45062911510467529297e-01,
8818                  pa0  = -2.36211856075265944077e-03,
8819                  pa1  =  4.14856118683748331666e-01,
8820                  pa2  = -3.72207876035701323847e-01,
8821                  pa3  =  3.18346619901161753674e-01,
8822                  pa4  = -1.10894694282396677476e-01,
8823                  pa5  =  3.54783043256182359371e-02,
8824                  pa6  = -2.16637559486879084300e-03,
8825                  qa1  =  1.06420880400844228286e-01,
8826                  qa2  =  5.40397917702171048937e-01,
8827                  qa3  =  7.18286544141962662868e-02,
8828                  qa4  =  1.26171219808761642112e-01,
8829                  qa5  =  1.36370839120290507362e-02,
8830                  qa6  =  1.19844998467991074170e-02;
8831
8832     double s, P, Q;
8833
8834     s = fabs(x) - 1;
8835     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8836     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8837     return 1 - erx - P / Q;
8838 }
8839
8840 static double erfc2(UINT32 ix, double x)
8841 {
8842     static const double ra0  = -9.86494403484714822705e-03,
8843                  ra1  = -6.93858572707181764372e-01,
8844                  ra2  = -1.05586262253232909814e+01,
8845                  ra3  = -6.23753324503260060396e+01,
8846                  ra4  = -1.62396669462573470355e+02,
8847                  ra5  = -1.84605092906711035994e+02,
8848                  ra6  = -8.12874355063065934246e+01,
8849                  ra7  = -9.81432934416914548592e+00,
8850                  sa1  =  1.96512716674392571292e+01,
8851                  sa2  =  1.37657754143519042600e+02,
8852                  sa3  =  4.34565877475229228821e+02,
8853                  sa4  =  6.45387271733267880336e+02,
8854                  sa5  =  4.29008140027567833386e+02,
8855                  sa6  =  1.08635005541779435134e+02,
8856                  sa7  =  6.57024977031928170135e+00,
8857                  sa8  = -6.04244152148580987438e-02,
8858                  rb0  = -9.86494292470009928597e-03,
8859                  rb1  = -7.99283237680523006574e-01,
8860                  rb2  = -1.77579549177547519889e+01,
8861                  rb3  = -1.60636384855821916062e+02,
8862                  rb4  = -6.37566443368389627722e+02,
8863                  rb5  = -1.02509513161107724954e+03,
8864                  rb6  = -4.83519191608651397019e+02,
8865                  sb1  =  3.03380607434824582924e+01,
8866                  sb2  =  3.25792512996573918826e+02,
8867                  sb3  =  1.53672958608443695994e+03,
8868                  sb4  =  3.19985821950859553908e+03,
8869                  sb5  =  2.55305040643316442583e+03,
8870                  sb6  =  4.74528541206955367215e+02,
8871                  sb7  = -2.24409524465858183362e+01;
8872
8873     double s, R, S, z;
8874     UINT64 iz;
8875
8876     if (ix < 0x3ff40000) /* |x| < 1.25 */
8877         return erfc1(x);
8878
8879     x = fabs(x);
8880     s = 1 / (x * x);
8881     if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
8882         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
8883                             (ra5 + s * (ra6 + s * ra7))))));
8884         S = 1.0 + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
8885                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
8886     } else { /* |x| > 1/.35 */
8887         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s *
8888                             (rb5 + s * rb6)))));
8889         S = 1.0 + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
8890                             (sb5 + s * (sb6 + s * sb7))))));
8891     }
8892     z = x;
8893     iz = *(ULONGLONG*)&z;
8894     iz &= 0xffffffff00000000ULL;
8895     z = *(double*)&iz;
8896     return exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S) / x;
8897 }
8898
8899 /*********************************************************************
8900  *      erf (MSVCR120.@)
8901  */
8902 double CDECL erf(double x)
8903 {
8904     static const double efx8 =  1.02703333676410069053e+00,
8905                  pp0  =  1.28379167095512558561e-01,
8906                  pp1  = -3.25042107247001499370e-01,
8907                  pp2  = -2.84817495755985104766e-02,
8908                  pp3  = -5.77027029648944159157e-03,
8909                  pp4  = -2.37630166566501626084e-05,
8910                  qq1  =  3.97917223959155352819e-01,
8911                  qq2  =  6.50222499887672944485e-02,
8912                  qq3  =  5.08130628187576562776e-03,
8913                  qq4  =  1.32494738004321644526e-04,
8914                  qq5  = -3.96022827877536812320e-06;
8915
8916     double r, s, z, y;
8917     UINT32 ix;
8918     int sign;
8919
8920     ix = *(UINT64*)&x >> 32;
8921     sign = ix >> 31;
8922     ix &= 0x7fffffff;
8923     if (ix >= 0x7ff00000) {
8924         /* erf(nan)=nan, erf(+-inf)=+-1 */
8925         return 1 - 2 * sign + 1 / x;
8926     }
8927     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
8928         if (ix < 0x3e300000) { /* |x| < 2**-28 */
8929             /* avoid underflow */
8930             return 0.125 * (8 * x + efx8 * x);
8931         }
8932         z = x * x;
8933         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
8934         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
8935         y = r / s;
8936         return x + x * y;
8937     }
8938     if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
8939         y = 1 - erfc2(ix, x);
8940     else
8941         y = 1 - DBL_MIN;
8942     return sign ? -y : y;
8943 }
8944
8945 static float erfc1f(float x)
8946 {
8947     static const float erx  =  8.4506291151e-01,
8948                  pa0  = -2.3621185683e-03,
8949                  pa1  =  4.1485610604e-01,
8950                  pa2  = -3.7220788002e-01,
8951                  pa3  =  3.1834661961e-01,
8952                  pa4  = -1.1089469492e-01,
8953                  pa5  =  3.5478305072e-02,
8954                  pa6  = -2.1663755178e-03,
8955                  qa1  =  1.0642088205e-01,
8956                  qa2  =  5.4039794207e-01,
8957                  qa3  =  7.1828655899e-02,
8958                  qa4  =  1.2617121637e-01,
8959                  qa5  =  1.3637083583e-02,
8960                  qa6  =  1.1984500103e-02;
8961
8962     float s, P, Q;
8963
8964     s = fabsf(x) - 1;
8965     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8966     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8967     return 1 - erx - P / Q;
8968 }
8969
8970 static float erfc2f(UINT32 ix, float x)
8971 {
8972     static const float ra0  = -9.8649440333e-03,
8973                  ra1  = -6.9385856390e-01,
8974                  ra2  = -1.0558626175e+01,
8975                  ra3  = -6.2375331879e+01,
8976                  ra4  = -1.6239666748e+02,
8977                  ra5  = -1.8460508728e+02,
8978                  ra6  = -8.1287437439e+01,
8979                  ra7  = -9.8143291473e+00,
8980                  sa1  =  1.9651271820e+01,
8981                  sa2  =  1.3765776062e+02,
8982                  sa3  =  4.3456588745e+02,
8983                  sa4  =  6.4538726807e+02,
8984                  sa5  =  4.2900814819e+02,
8985                  sa6  =  1.0863500214e+02,
8986                  sa7  =  6.5702495575e+00,
8987                  sa8  = -6.0424413532e-02,
8988                  rb0  = -9.8649431020e-03,
8989                  rb1  = -7.9928326607e-01,
8990                  rb2  = -1.7757955551e+01,
8991                  rb3  = -1.6063638306e+02,
8992                  rb4  = -6.3756646729e+02,
8993                  rb5  = -1.0250950928e+03,
8994                  rb6  = -4.8351919556e+02,
8995                  sb1  =  3.0338060379e+01,
8996                  sb2  =  3.2579251099e+02,
8997                  sb3  =  1.5367296143e+03,
8998                  sb4  =  3.1998581543e+03,
8999                  sb5  =  2.5530502930e+03,
9000                  sb6  =  4.7452853394e+02,
9001                  sb7  = -2.2440952301e+01;
9002
9003     float s, R, S, z;
9004
9005     if (ix < 0x3fa00000) /* |x| < 1.25 */
9006         return erfc1f(x);
9007
9008     x = fabsf(x);
9009     s = 1 / (x * x);
9010     if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
9011         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
9012                             (ra5 + s * (ra6 + s * ra7))))));
9013         S = 1.0f + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
9014                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
9015     } else { /* |x| >= 1/0.35 */
9016         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (rb5 + s * rb6)))));
9017         S = 1.0f + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
9018                             (sb5 + s * (sb6 + s * sb7))))));
9019     }
9020
9021     ix = *(UINT32*)&x & 0xffffe000;
9022     z = *(float*)&ix;
9023     return expf(-z * z - 0.5625f) * expf((z - x) * (z + x) + R / S) / x;
9024 }
9025
9026 /*********************************************************************
9027  *      erff (MSVCR120.@)
9028  *
9029  * Copied from musl: src/math/erff.c
9030  */
9031 float CDECL erff(float x)
9032 {
9033     static const float efx8 =  1.0270333290e+00,
9034                  pp0  =  1.2837916613e-01,
9035                  pp1  = -3.2504209876e-01,
9036                  pp2  = -2.8481749818e-02,
9037                  pp3  = -5.7702702470e-03,
9038                  pp4  = -2.3763017452e-05,
9039                  qq1  =  3.9791721106e-01,
9040                  qq2  =  6.5022252500e-02,
9041                  qq3  =  5.0813062117e-03,
9042                  qq4  =  1.3249473704e-04,
9043                  qq5  = -3.9602282413e-06;
9044
9045     float r, s, z, y;
9046     UINT32 ix;
9047     int sign;
9048
9049     ix = *(UINT32*)&x;
9050     sign = ix >> 31;
9051     ix &= 0x7fffffff;
9052     if (ix >= 0x7f800000) {
9053         /* erf(nan)=nan, erf(+-inf)=+-1 */
9054         return 1 - 2 * sign + 1 / x;
9055     }
9056     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9057         if (ix < 0x31800000) { /* |x| < 2**-28 */
9058             /*avoid underflow */
9059             return 0.125f * (8 * x + efx8 * x);
9060         }
9061         z = x * x;
9062         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9063         s = 1 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9064         y = r / s;
9065         return x + x * y;
9066     }
9067     if (ix < 0x40c00000) /* |x| < 6 */
9068         y = 1 - erfc2f(ix, x);
9069     else
9070         y = 1 - FLT_MIN;
9071     return sign ? -y : y;
9072 }
9073
9074 /*********************************************************************
9075  *      erfc (MSVCR120.@)
9076  *
9077  * Copied from musl: src/math/erf.c
9078  */
9079 double CDECL erfc(double x)
9080 {
9081     static const double pp0  =  1.28379167095512558561e-01,
9082                  pp1  = -3.25042107247001499370e-01,
9083                  pp2  = -2.84817495755985104766e-02,
9084                  pp3  = -5.77027029648944159157e-03,
9085                  pp4  = -2.37630166566501626084e-05,
9086                  qq1  =  3.97917223959155352819e-01,
9087                  qq2  =  6.50222499887672944485e-02,
9088                  qq3  =  5.08130628187576562776e-03,
9089                  qq4  =  1.32494738004321644526e-04,
9090                  qq5  = -3.96022827877536812320e-06;
9091
9092     double r, s, z, y;
9093     UINT32 ix;
9094     int sign;
9095
9096     ix = *(ULONGLONG*)&x >> 32;
9097     sign = ix >> 31;
9098     ix &= 0x7fffffff;
9099     if (ix >= 0x7ff00000) {
9100         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9101         return 2 * sign + 1 / x;
9102     }
9103     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
9104         if (ix < 0x3c700000) /* |x| < 2**-56 */
9105             return 1.0 - x;
9106         z = x * x;
9107         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9108         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9109         y = r / s;
9110         if (sign || ix < 0x3fd00000) { /* x < 1/4 */
9111             return 1.0 - (x + x * y);
9112         }
9113         return 0.5 - (x - 0.5 + x * y);
9114     }
9115     if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
9116         return sign ? 2 - erfc2(ix, x) : erfc2(ix, x);
9117     }
9118     if (sign)
9119         return 2 - DBL_MIN;
9120     *_errno() = ERANGE;
9121     return fp_barrier(DBL_MIN) * DBL_MIN;
9122 }
9123
9124 /*********************************************************************
9125  *      erfcf (MSVCR120.@)
9126  *
9127  * Copied from musl: src/math/erff.c
9128  */
9129 float CDECL erfcf(float x)
9130 {
9131     static const float pp0  =  1.2837916613e-01,
9132                  pp1  = -3.2504209876e-01,
9133                  pp2  = -2.8481749818e-02,
9134                  pp3  = -5.7702702470e-03,
9135                  pp4  = -2.3763017452e-05,
9136                  qq1  =  3.9791721106e-01,
9137                  qq2  =  6.5022252500e-02,
9138                  qq3  =  5.0813062117e-03,
9139                  qq4  =  1.3249473704e-04,
9140                  qq5  = -3.9602282413e-06;
9141
9142     float r, s, z, y;
9143     UINT32 ix;
9144     int sign;
9145
9146     ix = *(UINT32*)&x;
9147     sign = ix >> 31;
9148     ix &= 0x7fffffff;
9149     if (ix >= 0x7f800000) {
9150         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9151         return 2 * sign + 1 / x;
9152     }
9153
9154     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9155         if (ix < 0x23800000) /* |x| < 2**-56 */
9156             return 1.0f - x;
9157         z = x * x;
9158         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9159         s = 1.0f + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9160         y = r / s;
9161         if (sign || ix < 0x3e800000) /* x < 1/4 */
9162             return 1.0f - (x + x * y);
9163         return 0.5f - (x - 0.5f + x * y);
9164     }
9165     if (ix < 0x41e00000) { /* |x| < 28 */
9166         return sign ? 2 - erfc2f(ix, x) : erfc2f(ix, x);
9167     }
9168     if (sign)
9169         return 2 - FLT_MIN;
9170     *_errno() = ERANGE;
9171     return FLT_MIN * FLT_MIN;
9172 }
9173
9174 /*********************************************************************
9175  *      fmaxf (MSVCR120.@)
9176  */
9177 float CDECL fmaxf(float x, float y)
9178 {
9179     if(isnan(x))
9180         return y;
9181     if(isnan(y))
9182         return x;
9183     if(x==0 && y==0)
9184         return signbit(x) ? y : x;
9185     return x<y ? y : x;
9186 }
9187
9188 /*********************************************************************
9189  *      fmax (MSVCR120.@)
9190  */
9191 double CDECL fmax(double x, double y)
9192 {
9193     if(isnan(x))
9194         return y;
9195     if(isnan(y))
9196         return x;
9197     if(x==0 && y==0)
9198         return signbit(x) ? y : x;
9199     return x<y ? y : x;
9200 }
9201
9202 /*********************************************************************
9203  *      fdimf (MSVCR120.@)
9204  */
9205 float CDECL fdimf(float x, float y)
9206 {
9207     if(isnan(x))
9208         return x;
9209     if(isnan(y))
9210         return y;
9211     return x>y ? x-y : 0;
9212 }
9213
9214 /*********************************************************************
9215  *      fdim (MSVCR120.@)
9216  */
9217 double CDECL fdim(double x, double y)
9218 {
9219     if(isnan(x))
9220         return x;
9221     if(isnan(y))
9222         return y;
9223     return x>y ? x-y : 0;
9224 }
9225
9226 /*********************************************************************
9227  *      _fdsign (MSVCR120.@)
9228  */
9229 int CDECL _fdsign(float x)
9230 {
9231     union { float f; UINT32 i; } u = { x };
9232     return (u.i >> 16) & 0x8000;
9233 }
9234
9235 /*********************************************************************
9236  *      _dsign (MSVCR120.@)
9237  */
9238 int CDECL _dsign(double x)
9239 {
9240     union { double f; UINT64 i; } u = { x };
9241     return (u.i >> 48) & 0x8000;
9242 }
9243
9244
9245 /*********************************************************************
9246  *      _dpcomp (MSVCR120.@)
9247  */
9248 int CDECL _dpcomp(double x, double y)
9249 {
9250     if(isnan(x) || isnan(y))
9251         return 0;
9252
9253     if(x == y) return 2;
9254     return x < y ? 1 : 4;
9255 }
9256
9257 /*********************************************************************
9258  *      _fdpcomp (MSVCR120.@)
9259  */
9260 int CDECL _fdpcomp(float x, float y)
9261 {
9262     return _dpcomp(x, y);
9263 }
9264
9265 /*********************************************************************
9266  *      fminf (MSVCR120.@)
9267  */
9268 float CDECL fminf(float x, float y)
9269 {
9270     if(isnan(x))
9271         return y;
9272     if(isnan(y))
9273         return x;
9274     if(x==0 && y==0)
9275         return signbit(x) ? x : y;
9276     return x<y ? x : y;
9277 }
9278
9279 /*********************************************************************
9280  *      fmin (MSVCR120.@)
9281  */
9282 double CDECL fmin(double x, double y)
9283 {
9284     if(isnan(x))
9285         return y;
9286     if(isnan(y))
9287         return x;
9288     if(x==0 && y==0)
9289         return signbit(x) ? x : y;
9290     return x<y ? x : y;
9291 }
9292
9293 /*********************************************************************
9294  *      asinh (MSVCR120.@)
9295  *
9296  * Copied from musl: src/math/asinh.c
9297  */
9298 double CDECL asinh(double x)
9299 {
9300     UINT64 ux = *(UINT64*)&x;
9301     int e = ux >> 52 & 0x7ff;
9302     int s = ux >> 63;
9303
9304     /* |x| */
9305     ux &= (UINT64)-1 / 2;
9306     x = *(double*)&ux;
9307
9308     if (e >= 0x3ff + 26) /* |x| >= 0x1p26 or inf or nan */
9309         x = log(x) + 0.693147180559945309417232121458176568;
9310     else if (e >= 0x3ff + 1) /* |x| >= 2 */
9311         x = log(2 * x + 1 / (sqrt(x * x + 1) + x));
9312     else if (e >= 0x3ff - 26) /* |x| >= 0x1p-26 */
9313         x = log1p(x + x * x / (sqrt(x * x + 1) + 1));
9314     else /* |x| < 0x1p-26, raise inexact if x != 0 */
9315         fp_barrier(x + 0x1p120f);
9316     return s ? -x : x;
9317 }
9318
9319 /*********************************************************************
9320  *      asinhf (MSVCR120.@)
9321  *
9322  * Copied from musl: src/math/asinhf.c
9323  */
9324 float CDECL asinhf(float x)
9325 {
9326     UINT32 ux = *(UINT32*)&x;
9327     UINT32 i = ux & 0x7fffffff;
9328     int s = ux >> 31;
9329
9330     /* |x| */
9331     x = *(float*)&i;
9332
9333     if (i >= 0x3f800000 + (12 << 23))/* |x| >= 0x1p12 or inf or nan */
9334         x = logf(x) + 0.693147180559945309417232121458176568f;
9335     else if (i >= 0x3f800000 + (1 << 23)) /* |x| >= 2 */
9336         x = logf(2 * x + 1 / (sqrtf(x * x + 1) + x));
9337     else if (i >= 0x3f800000 - (12 << 23)) /* |x| >= 0x1p-12 */
9338         x = log1pf(x + x * x / (sqrtf(x * x + 1) + 1));
9339     else /* |x| < 0x1p-12, raise inexact if x!=0 */
9340         fp_barrierf(x + 0x1p120f);
9341     return s ? -x : x;
9342 }
9343
9344 /*********************************************************************
9345  *      acosh (MSVCR120.@)
9346  *
9347  * Copied from musl: src/math/acosh.c
9348  */
9349 double CDECL acosh(double x)
9350 {
9351     int e = *(UINT64*)&x >> 52 & 0x7ff;
9352
9353     if (x < 1)
9354     {
9355         *_errno() = EDOM;
9356         feraiseexcept(FE_INVALID);
9357         return NAN;
9358     }
9359
9360     if (e < 0x3ff + 1) /* |x| < 2, up to 2ulp error in [1,1.125] */
9361         return log1p(x - 1 + sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
9362     if (e < 0x3ff + 26) /* |x| < 0x1p26 */
9363         return log(2 * x - 1 / (x + sqrt(x * x - 1)));
9364     /* |x| >= 0x1p26 or nan */
9365     return log(x) + 0.693147180559945309417232121458176568;
9366 }
9367
9368 /*********************************************************************
9369  *      acoshf (MSVCR120.@)
9370  *
9371  * Copied from musl: src/math/acoshf.c
9372  */
9373 float CDECL acoshf(float x)
9374 {
9375     UINT32 a = *(UINT32*)&x & 0x7fffffff;
9376
9377     if (x < 1)
9378     {
9379         *_errno() = EDOM;
9380         feraiseexcept(FE_INVALID);
9381         return NAN;
9382     }
9383
9384     if (a < 0x3f800000 + (1 << 23)) /* |x| < 2, up to 2ulp error in [1,1.125] */
9385         return log1pf(x - 1 + sqrtf((x - 1) * (x - 1) + 2 * (x - 1)));
9386     if (*(UINT32*)&x < 0x3f800000 + (12 << 23)) /* 2 <= x < 0x1p12 */
9387         return logf(2 * x - 1 / (x + sqrtf(x * x - 1)));
9388     /* x >= 0x1p12 or x <= -2 or nan */
9389     return logf(x) + 0.693147180559945309417232121458176568f;
9390 }
9391
9392 /*********************************************************************
9393  *      atanh (MSVCR120.@)
9394  *
9395  * Copied from musl: src/math/atanh.c
9396  */
9397 double CDECL atanh(double x)
9398 {
9399     UINT64 ux = *(UINT64*)&x;
9400     int e = ux >> 52 & 0x7ff;
9401     int s = ux >> 63;
9402
9403     /* |x| */
9404     ux &= (UINT64)-1 / 2;
9405     x = *(double*)&ux;
9406
9407     if (x > 1) {
9408         *_errno() = EDOM;
9409         feraiseexcept(FE_INVALID);
9410         return NAN;
9411     }
9412
9413     if (e < 0x3ff - 1) {
9414         if (e < 0x3ff - 32) {
9415             fp_barrier(x + 0x1p120f);
9416             if (e == 0) /* handle underflow */
9417                 fp_barrier(x * x);
9418         } else { /* |x| < 0.5, up to 1.7ulp error */
9419             x = 0.5 * log1p(2 * x + 2 * x * x / (1 - x));
9420         }
9421     } else { /* avoid overflow */
9422         x = 0.5 * log1p(2 * (x / (1 - x)));
9423         if (isinf(x)) *_errno() = ERANGE;
9424     }
9425     return s ? -x : x;
9426 }
9427
9428 /*********************************************************************
9429  *      atanhf (MSVCR120.@)
9430  *
9431  * Copied from musl: src/math/atanhf.c
9432  */
9433 float CDECL atanhf(float x)
9434 {
9435     UINT32 ux = *(UINT32*)&x;
9436     int s = ux >> 31;
9437
9438     /* |x| */
9439     ux &= 0x7fffffff;
9440     x = *(float*)&ux;
9441
9442     if (x > 1) {
9443         *_errno() = EDOM;
9444         feraiseexcept(FE_INVALID);
9445         return NAN;
9446     }
9447
9448     if (ux < 0x3f800000 - (1 << 23)) {
9449         if (ux < 0x3f800000 - (32 << 23)) {
9450             fp_barrierf(x + 0x1p120f);
9451             if (ux < (1 << 23)) /* handle underflow */
9452                 fp_barrierf(x * x);
9453         } else { /* |x| < 0.5, up to 1.7ulp error */
9454             x = 0.5f * log1pf(2 * x + 2 * x * x / (1 - x));
9455         }
9456     } else { /* avoid overflow */
9457         x = 0.5f * log1pf(2 * (x / (1 - x)));
9458         if (isinf(x)) *_errno() = ERANGE;
9459     }
9460     return s ? -x : x;
9461 }
9462
9463 #endif /* _MSVCR_VER>=120 */
9464
9465 /*********************************************************************
9466  *      _scalb  (MSVCRT.@)
9467  *      scalbn  (MSVCR120.@)
9468  *      scalbln (MSVCR120.@)
9469  */
9470 double CDECL _scalb(double num, __msvcrt_long power)
9471 {
9472   return ldexp(num, power);
9473 }
9474
9475 /*********************************************************************
9476  *      _scalbf  (MSVCRT.@)
9477  *      scalbnf  (MSVCR120.@)
9478  *      scalblnf (MSVCR120.@)
9479  */
9480 float CDECL _scalbf(float num, __msvcrt_long power)
9481 {
9482   return ldexp(num, power);
9483 }
9484
9485 #if _MSVCR_VER>=120
9486
9487 /*********************************************************************
9488  *      remainder (MSVCR120.@)
9489  *
9490  * Copied from musl: src/math/remainder.c
9491  */
9492 double CDECL remainder(double x, double y)
9493 {
9494     int q;
9495 #if _MSVCR_VER == 120 && defined(__x86_64__)
9496     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9497 #endif
9498     return remquo(x, y, &q);
9499 }
9500
9501 /*********************************************************************
9502  *      remainderf (MSVCR120.@)
9503  *
9504  * Copied from musl: src/math/remainderf.c
9505  */
9506 float CDECL remainderf(float x, float y)
9507 {
9508     int q;
9509 #if _MSVCR_VER == 120 && defined(__x86_64__)
9510     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9511 #endif
9512     return remquof(x, y, &q);
9513 }
9514
9515 /*********************************************************************
9516  *      remquo (MSVCR120.@)
9517  *
9518  * Copied from musl: src/math/remquo.c
9519  */
9520 double CDECL remquo(double x, double y, int *quo)
9521 {
9522     UINT64 uxi = *(UINT64*)&x;
9523     UINT64 uyi = *(UINT64*)&y;
9524     int ex = uxi >> 52 & 0x7ff;
9525     int ey = uyi >> 52 & 0x7ff;
9526     int sx = uxi >> 63;
9527     int sy = uyi >> 63;
9528     UINT32 q;
9529     UINT64 i;
9530
9531     *quo = 0;
9532     if (y == 0 || isinf(x)) *_errno() = EDOM;
9533     if (uyi << 1 == 0 || isnan(y) || ex == 0x7ff)
9534         return (x * y) / (x * y);
9535     if (uxi << 1 == 0)
9536         return x;
9537
9538     /* normalize x and y */
9539     if (!ex) {
9540         for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1);
9541         uxi <<= -ex + 1;
9542     } else {
9543         uxi &= -1ULL >> 12;
9544         uxi |= 1ULL << 52;
9545     }
9546     if (!ey) {
9547         for (i = uyi << 12; i >> 63 == 0; ey--, i <<= 1);
9548         uyi <<= -ey + 1;
9549     } else {
9550         uyi &= -1ULL >> 12;
9551         uyi |= 1ULL << 52;
9552     }
9553
9554     q = 0;
9555     if (ex < ey) {
9556         if (ex+1 == ey)
9557             goto end;
9558         return x;
9559     }
9560
9561     /* x mod y */
9562     for (; ex > ey; ex--) {
9563         i = uxi - uyi;
9564         if (i >> 63 == 0) {
9565             uxi = i;
9566             q++;
9567         }
9568         uxi <<= 1;
9569         q <<= 1;
9570     }
9571     i = uxi - uyi;
9572     if (i >> 63 == 0) {
9573         uxi = i;
9574         q++;
9575     }
9576     if (uxi == 0)
9577         ex = -60;
9578     else
9579         for (; uxi >> 52 == 0; uxi <<= 1, ex--);
9580 end:
9581     /* scale result and decide between |x| and |x|-|y| */
9582     if (ex > 0) {
9583         uxi -= 1ULL << 52;
9584         uxi |= (UINT64)ex << 52;
9585     } else {
9586         uxi >>= -ex + 1;
9587     }
9588     x = *(double*)&uxi;
9589     if (sy)
9590         y = -y;
9591     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9592         x -= y;
9593         q++;
9594     }
9595     q &= 0x7fffffff;
9596     *quo = sx ^ sy ? -(int)q : (int)q;
9597     return sx ? -x : x;
9598 }
9599
9600 /*********************************************************************
9601  *      remquof (MSVCR120.@)
9602  *
9603  * Copied from musl: src/math/remquof.c
9604  */
9605 float CDECL remquof(float x, float y, int *quo)
9606 {
9607     UINT32 uxi = *(UINT32*)&x;
9608     UINT32 uyi = *(UINT32*)&y;
9609     int ex = uxi >> 23 & 0xff;
9610     int ey = uyi >> 23 & 0xff;
9611     int sx = uxi >> 31;
9612     int sy = uyi>> 31;
9613     UINT32 q, i;
9614
9615     *quo = 0;
9616     if (y == 0 || isinf(x)) *_errno() = EDOM;
9617     if (uyi << 1 == 0 || isnan(y) || ex == 0xff)
9618         return (x * y) / (x * y);
9619     if (uxi << 1 == 0)
9620         return x;
9621
9622     /* normalize x and y */
9623     if (!ex) {
9624         for (i = uxi << 9; i >> 31 == 0; ex--, i <<= 1);
9625         uxi <<= -ex + 1;
9626     } else {
9627         uxi &= -1U >> 9;
9628         uxi |= 1U << 23;
9629     }
9630     if (!ey) {
9631         for (i = uyi << 9; i >> 31 == 0; ey--, i <<= 1);
9632         uyi <<= -ey + 1;
9633     } else {
9634         uyi &= -1U >> 9;
9635         uyi |= 1U << 23;
9636     }
9637
9638     q = 0;
9639     if (ex < ey) {
9640         if (ex + 1 == ey)
9641             goto end;
9642         return x;
9643     }
9644
9645     /* x mod y */
9646     for (; ex > ey; ex--) {
9647         i = uxi - uyi;
9648         if (i >> 31 == 0) {
9649             uxi = i;
9650             q++;
9651         }
9652         uxi <<= 1;
9653         q <<= 1;
9654     }
9655     i = uxi - uyi;
9656     if (i >> 31 == 0) {
9657         uxi = i;
9658         q++;
9659     }
9660     if (uxi == 0)
9661         ex = -30;
9662     else
9663         for (; uxi >> 23 == 0; uxi <<= 1, ex--);
9664 end:
9665     /* scale result and decide between |x| and |x|-|y| */
9666     if (ex > 0) {
9667         uxi -= 1U << 23;
9668         uxi |= (UINT32)ex << 23;
9669     } else {
9670         uxi >>= -ex + 1;
9671     }
9672     x = *(float*)&uxi;
9673     if (sy)
9674         y = -y;
9675     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9676         x -= y;
9677         q++;
9678     }
9679     q &= 0x7fffffff;
9680     *quo = sx ^ sy ? -(int)q : (int)q;
9681     return sx ? -x : x;
9682 }
9683
9684 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9685 static double sin_pi(double x)
9686 {
9687     int n;
9688
9689     /* spurious inexact if odd int */
9690     x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */
9691
9692     n = x * 4.0;
9693     n = (n + 1) / 2;
9694     x -= n * 0.5f;
9695     x *= M_PI;
9696
9697     switch (n) {
9698     default: /* case 4: */
9699     case 0: return __sin(x, 0.0, 0);
9700     case 1: return __cos(x, 0.0);
9701     case 2: return __sin(-x, 0.0, 0);
9702     case 3: return -__cos(x, 0.0);
9703     }
9704 }
9705
9706 /*********************************************************************
9707  *      lgamma (MSVCR120.@)
9708  *
9709  * Copied from musl: src/math/lgamma_r.c
9710  */
9711 double CDECL lgamma(double x)
9712 {
9713     static const double pi = 3.14159265358979311600e+00,
9714         a0 = 7.72156649015328655494e-02,
9715         a1 = 3.22467033424113591611e-01,
9716         a2 = 6.73523010531292681824e-02,
9717         a3 = 2.05808084325167332806e-02,
9718         a4 = 7.38555086081402883957e-03,
9719         a5 = 2.89051383673415629091e-03,
9720         a6 = 1.19270763183362067845e-03,
9721         a7 = 5.10069792153511336608e-04,
9722         a8 = 2.20862790713908385557e-04,
9723         a9 = 1.08011567247583939954e-04,
9724         a10 = 2.52144565451257326939e-05,
9725         a11 = 4.48640949618915160150e-05,
9726         tc = 1.46163214496836224576e+00,
9727         tf = -1.21486290535849611461e-01,
9728         tt = -3.63867699703950536541e-18,
9729         t0 = 4.83836122723810047042e-01,
9730         t1 = -1.47587722994593911752e-01,
9731         t2 = 6.46249402391333854778e-02,
9732         t3 = -3.27885410759859649565e-02,
9733         t4 = 1.79706750811820387126e-02,
9734         t5 = -1.03142241298341437450e-02,
9735         t6 = 6.10053870246291332635e-03,
9736         t7 = -3.68452016781138256760e-03,
9737         t8 = 2.25964780900612472250e-03,
9738         t9 = -1.40346469989232843813e-03,
9739         t10 = 8.81081882437654011382e-04,
9740         t11 = -5.38595305356740546715e-04,
9741         t12 = 3.15632070903625950361e-04,
9742         t13 = -3.12754168375120860518e-04,
9743         t14 = 3.35529192635519073543e-04,
9744         u0 = -7.72156649015328655494e-02,
9745         u1 = 6.32827064025093366517e-01,
9746         u2 = 1.45492250137234768737e+00,
9747         u3 = 9.77717527963372745603e-01,
9748         u4 = 2.28963728064692451092e-01,
9749         u5 = 1.33810918536787660377e-02,
9750         v1 = 2.45597793713041134822e+00,
9751         v2 = 2.12848976379893395361e+00,
9752         v3 = 7.69285150456672783825e-01,
9753         v4 = 1.04222645593369134254e-01,
9754         v5 = 3.21709242282423911810e-03,
9755         s0 = -7.72156649015328655494e-02,
9756         s1 = 2.14982415960608852501e-01,
9757         s2 = 3.25778796408930981787e-01,
9758         s3 = 1.46350472652464452805e-01,
9759         s4 = 2.66422703033638609560e-02,
9760         s5 = 1.84028451407337715652e-03,
9761         s6 = 3.19475326584100867617e-05,
9762         r1 = 1.39200533467621045958e+00,
9763         r2 = 7.21935547567138069525e-01,
9764         r3 = 1.71933865632803078993e-01,
9765         r4 = 1.86459191715652901344e-02,
9766         r5 = 7.77942496381893596434e-04,
9767         r6 = 7.32668430744625636189e-06,
9768         w0 = 4.18938533204672725052e-01,
9769         w1 = 8.33333333333329678849e-02,
9770         w2 = -2.77777777728775536470e-03,
9771         w3 = 7.93650558643019558500e-04,
9772         w4 = -5.95187557450339963135e-04,
9773         w5 = 8.36339918996282139126e-04,
9774         w6 = -1.63092934096575273989e-03;
9775
9776     union {double f; UINT64 i;} u = {x};
9777     double t, y, z, nadj, p, p1, p2, p3, q, r, w;
9778     UINT32 ix;
9779     int sign,i;
9780
9781     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9782     sign = u.i >> 63;
9783     ix = u.i >> 32 & 0x7fffffff;
9784     if (ix >= 0x7ff00000)
9785         return x * x;
9786     if (ix < (0x3ff - 70) << 20) { /* |x|<2**-70, return -log(|x|) */
9787         if(sign)
9788             x = -x;
9789         return -log(x);
9790     }
9791     if (sign) {
9792         x = -x;
9793         t = sin_pi(x);
9794         if (t == 0.0) { /* -integer */
9795             *_errno() = ERANGE;
9796             return 1.0 / (x - x);
9797         }
9798         if (t <= 0.0)
9799             t = -t;
9800         nadj = log(pi / (t * x));
9801     }
9802
9803     /* purge off 1 and 2 */
9804     if ((ix == 0x3ff00000 || ix == 0x40000000) && (UINT32)u.i == 0)
9805         r = 0;
9806     /* for x < 2.0 */
9807     else if (ix < 0x40000000) {
9808         if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
9809             r = -log(x);
9810             if (ix >= 0x3FE76944) {
9811                 y = 1.0 - x;
9812                 i = 0;
9813             } else if (ix >= 0x3FCDA661) {
9814                 y = x - (tc - 1.0);
9815                 i = 1;
9816             } else {
9817                 y = x;
9818                 i = 2;
9819             }
9820         } else {
9821             r = 0.0;
9822             if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
9823                 y = 2.0 - x;
9824                 i = 0;
9825             } else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
9826                 y = x - tc;
9827                 i = 1;
9828             } else {
9829                 y = x - 1.0;
9830                 i = 2;
9831             }
9832         }
9833         switch (i) {
9834         case 0:
9835             z = y * y;
9836             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
9837             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
9838             p = y * p1 + p2;
9839             r += (p - 0.5 * y);
9840             break;
9841         case 1:
9842             z = y * y;
9843             w = z * y;
9844             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
9845             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
9846             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
9847             p = z * p1 - (tt - w * (p2 + y * p3));
9848             r += tf + p;
9849             break;
9850         case 2:
9851             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
9852             p2 = 1.0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
9853             r += -0.5 * y + p1 / p2;
9854         }
9855     } else if (ix < 0x40200000) { /* x < 8.0 */
9856         i = (int)x;
9857         y = x - (double)i;
9858         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
9859         q = 1.0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
9860         r = 0.5 * y + p / q;
9861         z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
9862         switch (i) {
9863         case 7: z *= y + 6.0; /* fall through */
9864         case 6: z *= y + 5.0; /* fall through */
9865         case 5: z *= y + 4.0; /* fall through */
9866         case 4: z *= y + 3.0; /* fall through */
9867         case 3:
9868             z *= y + 2.0;
9869             r += log(z);
9870             break;
9871         }
9872     } else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
9873         t = log(x);
9874         z = 1.0 / x;
9875         y = z * z;
9876         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
9877         r = (x - 0.5) * (t - 1.0) + w;
9878     } else /* 2**58 <= x <= inf */
9879         r = x * (log(x) - 1.0);
9880     if (sign)
9881         r = nadj - r;
9882     return r;
9883 }
9884
9885 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9886 static float sinf_pi(float x)
9887 {
9888     double y;
9889     int n;
9890
9891     /* spurious inexact if odd int */
9892     x = 2 * (x * 0.5f - floorf(x * 0.5f)); /* x mod 2.0 */
9893
9894     n = (int)(x * 4);
9895     n = (n + 1) / 2;
9896     y = x - n * 0.5f;
9897     y *= M_PI;
9898     switch (n) {
9899     default: /* case 4: */
9900     case 0: return __sindf(y);
9901     case 1: return __cosdf(y);
9902     case 2: return __sindf(-y);
9903     case 3: return -__cosdf(y);
9904     }
9905 }
9906
9907 /*********************************************************************
9908  *      lgammaf (MSVCR120.@)
9909  *
9910  * Copied from musl: src/math/lgammaf_r.c
9911  */
9912 float CDECL lgammaf(float x)
9913 {
9914     static const float pi = 3.1415927410e+00,
9915         a0 = 7.7215664089e-02,
9916         a1 = 3.2246702909e-01,
9917         a2 = 6.7352302372e-02,
9918         a3 = 2.0580807701e-02,
9919         a4 = 7.3855509982e-03,
9920         a5 = 2.8905137442e-03,
9921         a6 = 1.1927076848e-03,
9922         a7 = 5.1006977446e-04,
9923         a8 = 2.2086278477e-04,
9924         a9 = 1.0801156895e-04,
9925         a10 = 2.5214456400e-05,
9926         a11 = 4.4864096708e-05,
9927         tc = 1.4616321325e+00,
9928         tf = -1.2148628384e-01,
9929         tt = 6.6971006518e-09,
9930         t0 = 4.8383611441e-01,
9931         t1 = -1.4758771658e-01,
9932         t2 = 6.4624942839e-02,
9933         t3 = -3.2788541168e-02,
9934         t4 = 1.7970675603e-02,
9935         t5 = -1.0314224288e-02,
9936         t6 = 6.1005386524e-03,
9937         t7 = -3.6845202558e-03,
9938         t8 = 2.2596477065e-03,
9939         t9 = -1.4034647029e-03,
9940         t10 = 8.8108185446e-04,
9941         t11 = -5.3859531181e-04,
9942         t12 = 3.1563205994e-04,
9943         t13 = -3.1275415677e-04,
9944         t14 = 3.3552918467e-04,
9945         u0 = -7.7215664089e-02,
9946         u1 = 6.3282704353e-01,
9947         u2 = 1.4549225569e+00,
9948         u3 = 9.7771751881e-01,
9949         u4 = 2.2896373272e-01,
9950         u5 = 1.3381091878e-02,
9951         v1 = 2.4559779167e+00,
9952         v2 = 2.1284897327e+00,
9953         v3 = 7.6928514242e-01,
9954         v4 = 1.0422264785e-01,
9955         v5 = 3.2170924824e-03,
9956         s0 = -7.7215664089e-02,
9957         s1 = 2.1498242021e-01,
9958         s2 = 3.2577878237e-01,
9959         s3 = 1.4635047317e-01,
9960         s4 = 2.6642270386e-02,
9961         s5 = 1.8402845599e-03,
9962         s6 = 3.1947532989e-05,
9963         r1 = 1.3920053244e+00,
9964         r2 = 7.2193557024e-01,
9965         r3 = 1.7193385959e-01,
9966         r4 = 1.8645919859e-02,
9967         r5 = 7.7794247773e-04,
9968         r6 = 7.3266842264e-06,
9969         w0 = 4.1893854737e-01,
9970         w1 = 8.3333335817e-02,
9971         w2 = -2.7777778450e-03,
9972         w3 = 7.9365057172e-04,
9973         w4 = -5.9518753551e-04,
9974         w5 = 8.3633989561e-04,
9975         w6 = -1.6309292987e-03;
9976
9977     union {float f; UINT32 i;} u = {x};
9978     float t, y, z, nadj, p, p1, p2, p3, q, r, w;
9979     UINT32 ix;
9980     int i, sign;
9981
9982     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9983     sign = u.i >> 31;
9984     ix = u.i & 0x7fffffff;
9985     if (ix >= 0x7f800000)
9986         return x * x;
9987     if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
9988         if (sign)
9989             x = -x;
9990         return -logf(x);
9991     }
9992     if (sign) {
9993         x = -x;
9994         t = sinf_pi(x);
9995         if (t == 0.0f) { /* -integer */
9996             *_errno() = ERANGE;
9997             return 1.0f / (x - x);
9998         }
9999         if (t <= 0.0f)
10000             t = -t;
10001         nadj = logf(pi / (t * x));
10002     }
10003
10004     /* purge off 1 and 2 */
10005     if (ix == 0x3f800000 || ix == 0x40000000)
10006         r = 0;
10007     /* for x < 2.0 */
10008     else if (ix < 0x40000000) {
10009         if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
10010             r = -logf(x);
10011             if (ix >= 0x3f3b4a20) {
10012                 y = 1.0f - x;
10013                 i = 0;
10014             } else if (ix >= 0x3e6d3308) {
10015                 y = x - (tc - 1.0f);
10016                 i = 1;
10017             } else {
10018                 y = x;
10019                 i = 2;
10020             }
10021         } else {
10022             r = 0.0f;
10023             if (ix >= 0x3fdda618) { /* [1.7316,2] */
10024                 y = 2.0f - x;
10025                 i = 0;
10026             } else if (ix >= 0x3F9da620) { /* [1.23,1.73] */
10027                 y = x - tc;
10028                 i = 1;
10029             } else {
10030                 y = x - 1.0f;
10031                 i = 2;
10032             }
10033         }
10034         switch(i) {
10035         case 0:
10036             z = y * y;
10037             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
10038             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
10039             p = y * p1 + p2;
10040             r += p - 0.5f * y;
10041             break;
10042         case 1:
10043             z = y * y;
10044             w = z * y;
10045             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
10046             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
10047             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
10048             p = z * p1 - (tt - w * (p2 + y * p3));
10049             r += (tf + p);
10050             break;
10051         case 2:
10052             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
10053             p2 = 1.0f + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
10054             r += -0.5f * y + p1 / p2;
10055         }
10056     } else if (ix < 0x41000000) { /* x < 8.0 */
10057         i = (int)x;
10058         y = x - (float)i;
10059         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
10060         q = 1.0f + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
10061         r = 0.5f * y + p / q;
10062         z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */
10063         switch (i) {
10064         case 7: z *= y + 6.0f; /* fall through */
10065         case 6: z *= y + 5.0f; /* fall through */
10066         case 5: z *= y + 4.0f; /* fall through */
10067         case 4: z *= y + 3.0f; /* fall through */
10068         case 3:
10069             z *= y + 2.0f;
10070             r += logf(z);
10071             break;
10072         }
10073     } else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */
10074         t = logf(x);
10075         z = 1.0f / x;
10076         y = z * z;
10077         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
10078         r = (x - 0.5f) * (t - 1.0f) + w;
10079     } else /* 2**58 <= x <= inf */
10080         r = x * (logf(x) - 1.0f);
10081     if (sign)
10082         r = nadj - r;
10083     return r;
10084 }
10085
10086 static double tgamma_S(double x)
10087 {
10088     static const double Snum[] = {
10089         23531376880.410759688572007674451636754734846804940,
10090         42919803642.649098768957899047001988850926355848959,
10091         35711959237.355668049440185451547166705960488635843,
10092         17921034426.037209699919755754458931112671403265390,
10093         6039542586.3520280050642916443072979210699388420708,
10094         1439720407.3117216736632230727949123939715485786772,
10095         248874557.86205415651146038641322942321632125127801,
10096         31426415.585400194380614231628318205362874684987640,
10097         2876370.6289353724412254090516208496135991145378768,
10098         186056.26539522349504029498971604569928220784236328,
10099         8071.6720023658162106380029022722506138218516325024,
10100         210.82427775157934587250973392071336271166969580291,
10101         2.5066282746310002701649081771338373386264310793408,
10102     };
10103     static const double Sden[] = {
10104         0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
10105         2637558, 357423, 32670, 1925, 66, 1,
10106     };
10107
10108     double num = 0, den = 0;
10109     int i;
10110
10111     /* to avoid overflow handle large x differently */
10112     if (x < 8)
10113         for (i = ARRAY_SIZE(Snum) - 1; i >= 0; i--) {
10114             num = num * x + Snum[i];
10115             den = den * x + Sden[i];
10116         }
10117     else
10118         for (i = 0; i < ARRAY_SIZE(Snum); i++) {
10119             num = num / x + Snum[i];
10120             den = den / x + Sden[i];
10121         }
10122     return num / den;
10123 }
10124
10125 /*********************************************************************
10126  *      tgamma (MSVCR120.@)
10127  *
10128  * Copied from musl: src/math/tgamma.c
10129  */
10130 double CDECL tgamma(double x)
10131 {
10132     static const double gmhalf = 5.524680040776729583740234375;
10133     static const double fact[] = {
10134         1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
10135         479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
10136         355687428096000.0, 6402373705728000.0, 121645100408832000.0,
10137         2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
10138     };
10139
10140     union {double f; UINT64 i;} u = {x};
10141     double absx, y, dy, z, r;
10142     UINT32 ix = u.i >> 32 & 0x7fffffff;
10143     int sign = u.i >> 63;
10144
10145     /* special cases */
10146     if (ix >= 0x7ff00000) {
10147         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
10148         if (u.i == 0xfff0000000000000ULL)
10149             *_errno() = EDOM;
10150         return x + INFINITY;
10151     }
10152     if (ix < (0x3ff - 54) << 20) {
10153         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
10154         if (x == 0.0)
10155             *_errno() = ERANGE;
10156         return 1 / x;
10157     }
10158
10159     /* integer arguments */
10160     /* raise inexact when non-integer */
10161     if (x == floor(x)) {
10162         if (sign) {
10163             *_errno() = EDOM;
10164             return 0 / (x - x);
10165         }
10166         if (x <= ARRAY_SIZE(fact))
10167             return fact[(int)x - 1];
10168     }
10169
10170     /* x >= 172: tgamma(x)=inf with overflow */
10171     /* x =< -184: tgamma(x)=+-0 with underflow */
10172     if (ix >= 0x40670000) { /* |x| >= 184 */
10173         *_errno() = ERANGE;
10174         if (sign) {
10175             fp_barrierf(0x1p-126 / x);
10176             return 0;
10177         }
10178         x *= 0x1p1023;
10179         return x;
10180     }
10181
10182     absx = sign ? -x : x;
10183
10184     /* handle the error of x + g - 0.5 */
10185     y = absx + gmhalf;
10186     if (absx > gmhalf) {
10187         dy = y - absx;
10188         dy -= gmhalf;
10189     } else {
10190         dy = y - gmhalf;
10191         dy -= absx;
10192     }
10193
10194     z = absx - 0.5;
10195     r = tgamma_S(absx) * exp(-y);
10196     if (x < 0) {
10197         /* reflection formula for negative x */
10198         /* sinpi(absx) is not 0, integers are already handled */
10199         r = -M_PI / (sin_pi(absx) * absx * r);
10200         dy = -dy;
10201         z = -z;
10202     }
10203     r += dy * (gmhalf + 0.5) * r / y;
10204     z = pow(y, 0.5 * z);
10205     y = r * z * z;
10206     return y;
10207 }
10208
10209 /*********************************************************************
10210  *      tgammaf (MSVCR120.@)
10211  *
10212  * Copied from musl: src/math/tgammaf.c
10213  */
10214 float CDECL tgammaf(float x)
10215 {
10216     return tgamma(x);
10217 }
10218
10219 /*********************************************************************
10220  *      nan (MSVCR120.@)
10221  */
10222 double CDECL nan(const char *tagp)
10223 {
10224     /* Windows ignores input (MSDN) */
10225     return NAN;
10226 }
10227
10228 /*********************************************************************
10229  *      nanf (MSVCR120.@)
10230  */
10231 float CDECL nanf(const char *tagp)
10232 {
10233     return NAN;
10234 }
10235
10236 /*********************************************************************
10237  *      _except1 (MSVCR120.@)
10238  *  TODO:
10239  *   - find meaning of ignored cw and operation bits
10240  *   - unk parameter
10241  */
10242 double CDECL _except1(DWORD fpe, _FP_OPERATION_CODE op, double arg, double res, DWORD cw, void *unk)
10243 {
10244     ULONG_PTR exception_arg;
10245     DWORD exception = 0;
10246     DWORD fpword = 0;
10247     WORD operation;
10248     int raise = 0;
10249
10250     TRACE("(%x %x %lf %lf %x %p)\n", fpe, op, arg, res, cw, unk);
10251
10252 #ifdef _WIN64
10253     cw = ((cw >> 7) & 0x3f) | ((cw >> 3) & 0xc00);
10254 #endif
10255     operation = op << 5;
10256     exception_arg = (ULONG_PTR)&operation;
10257
10258     if (fpe & 0x1) { /* overflow */
10259         if ((fpe == 0x1 && (cw & 0x8)) || (fpe==0x11 && (cw & 0x28))) {
10260             /* 32-bit version also sets SW_INEXACT here */
10261             raise |= FE_OVERFLOW;
10262             if (fpe & 0x10) raise |= FE_INEXACT;
10263             res = signbit(res) ? -INFINITY : INFINITY;
10264         } else {
10265             exception = EXCEPTION_FLT_OVERFLOW;
10266         }
10267     } else if (fpe & 0x2) { /* underflow */
10268         if ((fpe == 0x2 && (cw & 0x10)) || (fpe==0x12 && (cw & 0x30))) {
10269             raise |= FE_UNDERFLOW;
10270             if (fpe & 0x10) raise |= FE_INEXACT;
10271             res = signbit(res) ? -0.0 : 0.0;
10272         } else {
10273             exception = EXCEPTION_FLT_UNDERFLOW;
10274         }
10275     } else if (fpe & 0x4) { /* zerodivide */
10276         if ((fpe == 0x4 && (cw & 0x4)) || (fpe==0x14 && (cw & 0x24))) {
10277             raise |= FE_DIVBYZERO;
10278             if (fpe & 0x10) raise |= FE_INEXACT;
10279         } else {
10280             exception = EXCEPTION_FLT_DIVIDE_BY_ZERO;
10281         }
10282     } else if (fpe & 0x8) { /* invalid */
10283         if (fpe == 0x8 && (cw & 0x1)) {
10284             raise |= FE_INVALID;
10285         } else {
10286             exception = EXCEPTION_FLT_INVALID_OPERATION;
10287         }
10288     } else if (fpe & 0x10) { /* inexact */
10289         if (fpe == 0x10 && (cw & 0x20)) {
10290             raise |= FE_INEXACT;
10291         } else {
10292             exception = EXCEPTION_FLT_INEXACT_RESULT;
10293         }
10294     }
10295
10296     if (exception)
10297         raise = 0;
10298     feraiseexcept(raise);
10299     if (exception)
10300         RaiseException(exception, 0, 1, &exception_arg);
10301
10302     if (cw & 0x1) fpword |= _EM_INVALID;
10303     if (cw & 0x2) fpword |= _EM_DENORMAL;
10304     if (cw & 0x4) fpword |= _EM_ZERODIVIDE;
10305     if (cw & 0x8) fpword |= _EM_OVERFLOW;
10306     if (cw & 0x10) fpword |= _EM_UNDERFLOW;
10307     if (cw & 0x20) fpword |= _EM_INEXACT;
10308     switch (cw & 0xc00)
10309     {
10310         case 0xc00: fpword |= _RC_UP|_RC_DOWN; break;
10311         case 0x800: fpword |= _RC_UP; break;
10312         case 0x400: fpword |= _RC_DOWN; break;
10313     }
10314     switch (cw & 0x300)
10315     {
10316         case 0x0:   fpword |= _PC_24; break;
10317         case 0x200: fpword |= _PC_53; break;
10318         case 0x300: fpword |= _PC_64; break;
10319     }
10320     if (cw & 0x1000) fpword |= _IC_AFFINE;
10321     _control87(fpword, 0xffffffff);
10322
10323     return res;
10324 }
10325
10326 _Dcomplex* CDECL _Cbuild(_Dcomplex *ret, double r, double i)
10327 {
10328     ret->_Val[0] = r;
10329     ret->_Val[1] = i;
10330     return ret;
10331 }
10332
10333 double CDECL MSVCR120_creal(_Dcomplex z)
10334 {
10335     return z._Val[0];
10336 }
10337
10338 /*********************************************************************
10339  *      ilogb (MSVCR120.@)
10340  */
10341 int CDECL ilogb(double x)
10342 {
10343     return __ilogb(x);
10344 }
10345
10346 /*********************************************************************
10347  *      ilogbf (MSVCR120.@)
10348  */
10349 int CDECL ilogbf(float x)
10350 {
10351     return __ilogbf(x);
10352 }
10353 #endif /* _MSVCR_VER>=120 */