dlls/msvcrt/math.c

   1 /*
   2  * msvcrt.dll math functions
   3  *
   4  * Copyright 2000 Jon Griffiths
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  *
  20  *
  21  * For functions copied from musl libc (http://musl.libc.org/):
  22  * ====================================================
  23  * Copyright 2005-2020 Rich Felker, et al.
  24  *
  25  * Permission is hereby granted, free of charge, to any person obtaining
  26  * a copy of this software and associated documentation files (the
  27  * "Software"), to deal in the Software without restriction, including
  28  * without limitation the rights to use, copy, modify, merge, publish,
  29  * distribute, sublicense, and/or sell copies of the Software, and to
  30  * permit persons to whom the Software is furnished to do so, subject to
  31  * the following conditions:
  32  *
  33  * The above copyright notice and this permission notice shall be
  34  * included in all copies or substantial portions of the Software.
  35  * ====================================================
  36  */
  37
  38 #include <complex.h>
  39 #include <stdio.h>
  40 #include <fenv.h>
  41 #include <fpieee.h>
  42 #include <limits.h>
  43 #include <locale.h>
  44 #include <math.h>
  45
  46 #include "msvcrt.h"
  47 #include "winternl.h"
  48
  49 #include "wine/asm.h"
  50 #include "wine/debug.h"
  51
  52 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
  53
  54 #undef div
  55 #undef ldiv
  56
  57 #define _DOMAIN         1       /* domain error in argument */
  58 #define _SING           2       /* singularity */
  59 #define _OVERFLOW       3       /* range overflow */
  60 #define _UNDERFLOW      4       /* range underflow */
  61
  62 typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
  63
  64 static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
  65
  66 BOOL sse2_supported;
  67 static BOOL sse2_enabled;
  68
  69 void msvcrt_init_math( void *module )
  70 {
  71     sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
  72 #if _MSVCR_VER <=71
  73     sse2_enabled = FALSE;
  74 #else
  75     sse2_enabled = sse2_supported;
  76 #endif
  77 }
  78
  79 /* Copied from musl: src/internal/libm.h */
  80 static inline float fp_barrierf(float x)
  81 {
  82     volatile float y = x;
  83     return y;
  84 }
  85
  86 static inline double fp_barrier(double x)
  87 {
  88     volatile double y = x;
  89     return y;
  90 }
  91
  92 static inline double CDECL ret_nan( BOOL update_sw )
  93 {
  94     double x = 1.0;
  95     if (!update_sw) return -NAN;
  96     return (x - x) / (x - x);
  97 }
  98
  99 #define SET_X87_CW(MASK) \
 100     "subl $4, %esp\n\t" \
 101     __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
 102     "fnstcw (%esp)\n\t" \
 103     "movw (%esp), %ax\n\t" \
 104     "movw %ax, 2(%esp)\n\t" \
 105     "testw $" #MASK ", %ax\n\t" \
 106     "jz 1f\n\t" \
 107     "andw $~" #MASK ", %ax\n\t" \
 108     "movw %ax, 2(%esp)\n\t" \
 109     "fldcw 2(%esp)\n\t" \
 110     "1:\n\t"
 111
 112 #define RESET_X87_CW \
 113     "movw (%esp), %ax\n\t" \
 114     "cmpw %ax, 2(%esp)\n\t" \
 115     "je 1f\n\t" \
 116     "fstpl 8(%esp)\n\t" \
 117     "fldcw (%esp)\n\t" \
 118     "fldl 8(%esp)\n\t" \
 119     "fwait\n\t" \
 120     "1:\n\t" \
 121     "addl $4, %esp\n\t" \
 122     __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t")
 123
 124 /*********************************************************************
 125  *      _matherr (CRTDLL.@)
 126  */
 127 int CDECL _matherr(struct _exception *e)
 128 {
 129     return 0;
 130 }
 131
 132
 133 static double math_error(int type, const char *name, double arg1, double arg2, double retval)
 134 {
 135     struct _exception exception = {type, (char *)name, arg1, arg2, retval};
 136
 137     TRACE("(%d, %s, %g, %g, %g)\n", type, debugstr_a(name), arg1, arg2, retval);
 138
 139     if (MSVCRT_default_matherr_func && MSVCRT_default_matherr_func(&exception))
 140         return exception.retval;
 141
 142     switch (type)
 143     {
 144     case 0:
 145         /* don't set errno */
 146         break;
 147     case _DOMAIN:
 148         *_errno() = EDOM;
 149         break;
 150     case _SING:
 151     case _OVERFLOW:
 152         *_errno() = ERANGE;
 153         break;
 154     case _UNDERFLOW:
 155         /* don't set errno */
 156         break;
 157     default:
 158         ERR("Unhandled math error!\n");
 159     }
 160
 161     return exception.retval;
 162 }
 163
 164 /*********************************************************************
 165  *      __setusermatherr (MSVCRT.@)
 166  */
 167 void CDECL __setusermatherr(MSVCRT_matherr_func func)
 168 {
 169     MSVCRT_default_matherr_func = func;
 170     TRACE("new matherr handler %p\n", func);
 171 }
 172
 173 /*********************************************************************
 174  *      _set_SSE2_enable (MSVCRT.@)
 175  */
 176 int CDECL _set_SSE2_enable(int flag)
 177 {
 178     sse2_enabled = flag && sse2_supported;
 179     return sse2_enabled;
 180 }
 181
 182 #if defined(_WIN64)
 183 # if _MSVCR_VER>=140
 184 /*********************************************************************
 185  *      _get_FMA3_enable (UCRTBASE.@)
 186  */
 187 int CDECL _get_FMA3_enable(void)
 188 {
 189     FIXME("() stub\n");
 190     return 0;
 191 }
 192 # endif
 193
 194 # if _MSVCR_VER>=120
 195 /*********************************************************************
 196  *      _set_FMA3_enable (MSVCR120.@)
 197  */
 198 int CDECL _set_FMA3_enable(int flag)
 199 {
 200     FIXME("(%x) stub\n", flag);
 201     return 0;
 202 }
 203 # endif
 204 #endif
 205
 206 #if !defined(__i386__) || _MSVCR_VER>=120
 207
 208 /*********************************************************************
 209  *      _chgsignf (MSVCRT.@)
 210  */
 211 float CDECL _chgsignf( float num )
 212 {
 213     union { float f; UINT32 i; } u = { num };
 214     u.i ^= 0x80000000;
 215     return u.f;
 216 }
 217
 218 /*********************************************************************
 219  *      _copysignf (MSVCRT.@)
 220  *
 221  * Copied from musl: src/math/copysignf.c
 222  */
 223 float CDECL _copysignf( float x, float y )
 224 {
 225     union { float f; UINT32 i; } ux = { x }, uy = { y };
 226     ux.i &= 0x7fffffff;
 227     ux.i |= uy.i & 0x80000000;
 228     return ux.f;
 229 }
 230
 231 /*********************************************************************
 232  *      _nextafterf (MSVCRT.@)
 233  *
 234  * Copied from musl: src/math/nextafterf.c
 235  */
 236 float CDECL _nextafterf( float x, float y )
 237 {
 238     unsigned int ix = *(unsigned int*)&x;
 239     unsigned int iy = *(unsigned int*)&y;
 240     unsigned int ax, ay, e;
 241
 242     if (isnan(x) || isnan(y))
 243         return x + y;
 244     if (x == y) {
 245         if (_fpclassf(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
 246             *_errno() = ERANGE;
 247         return y;
 248     }
 249     ax = ix & 0x7fffffff;
 250     ay = iy & 0x7fffffff;
 251     if (ax == 0) {
 252         if (ay == 0)
 253             return y;
 254         ix = (iy & 0x80000000) | 1;
 255     } else if (ax > ay || ((ix ^ iy) & 0x80000000))
 256         ix--;
 257     else
 258         ix++;
 259     e = ix & 0x7f800000;
 260     /* raise overflow if ix is infinite and x is finite */
 261     if (e == 0x7f800000) {
 262         fp_barrierf(x + x);
 263         *_errno() = ERANGE;
 264     }
 265     /* raise underflow if ix is subnormal or zero */
 266     y = *(float*)&ix;
 267     if (e == 0) {
 268         fp_barrierf(x * x + y * y);
 269         *_errno() = ERANGE;
 270     }
 271     return y;
 272 }
 273
 274 /* Copied from musl: src/math/ilogbf.c */
 275 static int __ilogbf(float x)
 276 {
 277     union { float f; UINT32 i; } u = { x };
 278     int e = u.i >> 23 & 0xff;
 279
 280     if (!e)
 281     {
 282         u.i <<= 9;
 283         if (u.i == 0) return FP_ILOGB0;
 284         /* subnormal x */
 285         for (e = -0x7f; u.i >> 31 == 0; e--, u.i <<= 1);
 286         return e;
 287     }
 288     if (e == 0xff) return u.i << 9 ? FP_ILOGBNAN : INT_MAX;
 289     return e - 0x7f;
 290 }
 291
 292 /*********************************************************************
 293  *      _logbf (MSVCRT.@)
 294  *
 295  * Copied from musl: src/math/logbf.c
 296  */
 297 float CDECL _logbf(float x)
 298 {
 299     if (!isfinite(x))
 300         return x * x;
 301     if (x == 0) {
 302         *_errno() = ERANGE;
 303         return -1 / (x * x);
 304     }
 305     return __ilogbf(x);
 306 }
 307
 308 #endif
 309
 310 /* Copied from musl: src/math/scalbn.c */
 311 static double __scalbn(double x, int n)
 312 {
 313     union {double f; UINT64 i;} u;
 314     double y = x;
 315
 316     if (n > 1023) {
 317         y *= 0x1p1023;
 318         n -= 1023;
 319         if (n > 1023) {
 320             y *= 0x1p1023;
 321             n -= 1023;
 322             if (n > 1023)
 323                 n = 1023;
 324         }
 325     } else if (n < -1022) {
 326         /* make sure final n < -53 to avoid double
 327            rounding in the subnormal range */
 328         y *= 0x1p-1022 * 0x1p53;
 329         n += 1022 - 53;
 330         if (n < -1022) {
 331             y *= 0x1p-1022 * 0x1p53;
 332             n += 1022 - 53;
 333             if (n < -1022)
 334                 n = -1022;
 335         }
 336     }
 337     u.i = (UINT64)(0x3ff + n) << 52;
 338     x = y * u.f;
 339     return x;
 340 }
 341
 342 /* Copied from musl: src/math/__rem_pio2_large.c */
 343 static int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 344 {
 345     static const int init_jk[] = {3, 4};
 346     static const INT32 ipio2[] = {
 347         0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 348         0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 349         0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
 350         0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
 351         0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
 352         0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
 353         0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
 354         0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
 355         0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
 356         0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
 357         0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 358     };
 359     static const double PIo2[] = {
 360         1.57079625129699707031e+00,
 361         7.54978941586159635335e-08,
 362         5.39030252995776476554e-15,
 363         3.28200341580791294123e-22,
 364         1.27065575308067607349e-29,
 365         1.22933308981111328932e-36,
 366         2.73370053816464559624e-44,
 367         2.16741683877804819444e-51,
 368     };
 369
 370     INT32 jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
 371     double z, fw, f[20], fq[20] = {0}, q[20];
 372
 373     /* initialize jk*/
 374     jk = init_jk[prec];
 375     jp = jk;
 376
 377     /* determine jx,jv,q0, note that 3>q0 */
 378     jx = nx - 1;
 379     jv = (e0 - 3) / 24;
 380     if(jv < 0) jv = 0;
 381     q0 = e0 - 24 * (jv + 1);
 382
 383     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 384     j = jv - jx;
 385     m = jx + jk;
 386     for (i = 0; i <= m; i++, j++)
 387         f[i] = j < 0 ? 0.0 : (double)ipio2[j];
 388
 389     /* compute q[0],q[1],...q[jk] */
 390     for (i = 0; i <= jk; i++) {
 391         for (j = 0, fw = 0.0; j <= jx; j++)
 392             fw += x[j] * f[jx + i - j];
 393         q[i] = fw;
 394     }
 395
 396     jz = jk;
 397 recompute:
 398     /* distill q[] into iq[] reversingly */
 399     for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
 400         fw = (double)(INT32)(0x1p-24 * z);
 401         iq[i] = (INT32)(z - 0x1p24 * fw);
 402         z = q[j - 1] + fw;
 403     }
 404
 405     /* compute n */
 406     z = __scalbn(z, q0); /* actual value of z */
 407     z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
 408     n = (INT32)z;
 409     z -= (double)n;
 410     ih = 0;
 411     if (q0 > 0) {  /* need iq[jz-1] to determine n */
 412         i = iq[jz - 1] >> (24 - q0);
 413         n += i;
 414         iq[jz - 1] -= i << (24 - q0);
 415         ih = iq[jz - 1] >> (23 - q0);
 416     }
 417     else if (q0 == 0) ih = iq[jz - 1] >> 23;
 418     else if (z >= 0.5) ih = 2;
 419
 420     if (ih > 0) {  /* q > 0.5 */
 421         n += 1;
 422         carry = 0;
 423         for (i = 0; i < jz; i++) {  /* compute 1-q */
 424             j = iq[i];
 425             if (carry == 0) {
 426                 if (j != 0) {
 427                     carry = 1;
 428                     iq[i] = 0x1000000 - j;
 429                 }
 430             } else
 431                 iq[i] = 0xffffff - j;
 432         }
 433         if (q0 > 0) {  /* rare case: chance is 1 in 12 */
 434             switch(q0) {
 435             case 1:
 436                 iq[jz - 1] &= 0x7fffff;
 437                 break;
 438             case 2:
 439                 iq[jz - 1] &= 0x3fffff;
 440                 break;
 441             }
 442         }
 443         if (ih == 2) {
 444             z = 1.0 - z;
 445             if (carry != 0)
 446                 z -= __scalbn(1.0, q0);
 447         }
 448     }
 449
 450     /* check if recomputation is needed */
 451     if (z == 0.0) {
 452         j = 0;
 453         for (i = jz - 1; i >= jk; i--) j |= iq[i];
 454         if (j == 0) {  /* need recomputation */
 455             for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
 456
 457             for (i = jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
 458                 f[jx + i] = (double)ipio2[jv + i];
 459                 for (j = 0, fw = 0.0; j <= jx; j++)
 460                     fw += x[j] * f[jx + i - j];
 461                 q[i] = fw;
 462             }
 463             jz += k;
 464             goto recompute;
 465         }
 466     }
 467
 468     /* chop off zero terms */
 469     if (z == 0.0) {
 470         jz -= 1;
 471         q0 -= 24;
 472         while (iq[jz] == 0) {
 473             jz--;
 474             q0 -= 24;
 475         }
 476     } else { /* break z into 24-bit if necessary */
 477         z = __scalbn(z, -q0);
 478         if (z >= 0x1p24) {
 479             fw = (double)(INT32)(0x1p-24 * z);
 480             iq[jz] = (INT32)(z - 0x1p24 * fw);
 481             jz += 1;
 482             q0 += 24;
 483             iq[jz] = (INT32)fw;
 484         } else
 485             iq[jz] = (INT32)z;
 486     }
 487
 488     /* convert integer "bit" chunk to floating-point value */
 489     fw = __scalbn(1.0, q0);
 490     for (i = jz; i >= 0; i--) {
 491         q[i] = fw * (double)iq[i];
 492         fw *= 0x1p-24;
 493     }
 494
 495     /* compute PIo2[0,...,jp]*q[jz,...,0] */
 496     for(i = jz; i >= 0; i--) {
 497         for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
 498             fw += PIo2[k] * q[i + k];
 499         fq[jz - i] = fw;
 500     }
 501
 502     /* compress fq[] into y[] */
 503     switch(prec) {
 504     case 0:
 505         fw = 0.0;
 506         for (i = jz; i >= 0; i--)
 507             fw += fq[i];
 508         y[0] = ih == 0 ? fw : -fw;
 509         break;
 510     case 1:
 511     case 2:
 512         fw = 0.0;
 513         for (i = jz; i >= 0; i--)
 514             fw += fq[i];
 515         fw = (double)fw;
 516         y[0] = ih==0 ? fw : -fw;
 517         fw = fq[0] - fw;
 518         for (i = 1; i <= jz; i++)
 519             fw += fq[i];
 520         y[1] = ih == 0 ? fw : -fw;
 521         break;
 522     case 3:  /* painful */
 523         for (i = jz; i > 0; i--) {
 524             fw = fq[i - 1] + fq[i];
 525             fq[i] += fq[i - 1] - fw;
 526             fq[i - 1] = fw;
 527         }
 528         for (i = jz; i > 1; i--) {
 529             fw = fq[i - 1] + fq[i];
 530             fq[i] += fq[i - 1] - fw;
 531             fq[i - 1] = fw;
 532         }
 533         for (fw = 0.0, i = jz; i >= 2; i--)
 534             fw += fq[i];
 535         if (ih == 0) {
 536             y[0] = fq[0];
 537             y[1] = fq[1];
 538             y[2] = fw;
 539         } else {
 540             y[0] = -fq[0];
 541             y[1] = -fq[1];
 542             y[2] = -fw;
 543         }
 544     }
 545     return n & 7;
 546 }
 547
 548 /* Based on musl implementation: src/math/round.c */
 549 static double __round(double x)
 550 {
 551     ULONGLONG llx = *(ULONGLONG*)&x, tmp;
 552     int e = (llx >> 52 & 0x7ff) - 0x3ff;
 553
 554     if (e >= 52)
 555         return x;
 556     if (e < -1)
 557         return 0 * x;
 558     else if (e == -1)
 559         return signbit(x) ? -1 : 1;
 560
 561     tmp = 0x000fffffffffffffULL >> e;
 562     if (!(llx & tmp))
 563         return x;
 564     llx += 0x0008000000000000ULL >> e;
 565     llx &= ~tmp;
 566     return *(double*)&llx;
 567 }
 568
 569 #if !defined(__i386__) || _MSVCR_VER >= 120
 570 /* Copied from musl: src/math/expm1f.c */
 571 static float __expm1f(float x)
 572 {
 573     static const float ln2_hi = 6.9313812256e-01,
 574         ln2_lo = 9.0580006145e-06,
 575         invln2 = 1.4426950216e+00,
 576         Q1 = -3.3333212137e-2,
 577         Q2 = 1.5807170421e-3;
 578
 579     float y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
 580     union {float f; UINT32 i;} u = {x};
 581     UINT32 hx = u.i & 0x7fffffff;
 582     int k, sign = u.i >> 31;
 583
 584     /* filter out huge and non-finite argument */
 585     if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
 586         if (hx >= 0x7f800000) /* NaN */
 587             return u.i == 0xff800000 ? -1 : x;
 588         if (sign)
 589             return math_error(_UNDERFLOW, "exp", x, 0, -1);
 590         if (hx > 0x42b17217) /* x > log(FLT_MAX) */
 591             return math_error(_OVERFLOW, "exp", x, 0, fp_barrierf(x * FLT_MAX));
 592     }
 593
 594     /* argument reduction */
 595     if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
 596         if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
 597             if (!sign) {
 598                 hi = x - ln2_hi;
 599                 lo = ln2_lo;
 600                 k = 1;
 601             } else {
 602                 hi = x + ln2_hi;
 603                 lo = -ln2_lo;
 604                 k = -1;
 605             }
 606         } else {
 607             k = invln2 * x + (sign ? -0.5f : 0.5f);
 608             t = k;
 609             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
 610             lo = t * ln2_lo;
 611         }
 612         x = hi - lo;
 613         c = (hi - x) - lo;
 614     } else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
 615         if (hx < 0x00800000)
 616             fp_barrierf(x * x);
 617         return x;
 618     } else
 619         k = 0;
 620
 621     /* x is now in primary range */
 622     hfx = 0.5f * x;
 623     hxs = x * hfx;
 624     r1 = 1.0f + hxs * (Q1 + hxs * Q2);
 625     t = 3.0f - r1 * hfx;
 626     e = hxs * ((r1 - t) / (6.0f - x * t));
 627     if (k == 0) /* c is 0 */
 628         return x - (x * e - hxs);
 629     e = x * (e - c) - c;
 630     e -= hxs;
 631     /* exp(x) ~ 2^k (x_reduced - e + 1) */
 632     if (k == -1)
 633         return 0.5f * (x - e) - 0.5f;
 634     if (k == 1) {
 635         if (x < -0.25f)
 636             return -2.0f * (e - (x + 0.5f));
 637         return 1.0f + 2.0f * (x - e);
 638     }
 639     u.i = (0x7f + k) << 23; /* 2^k */
 640     twopk = u.f;
 641     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
 642         y = x - e + 1.0f;
 643         if (k == 128)
 644             y = y * 2.0f * 0x1p127f;
 645         else
 646             y = y * twopk;
 647         return y - 1.0f;
 648     }
 649     u.i = (0x7f-k) << 23; /* 2^-k */
 650     if (k < 23)
 651         y = (x - e + (1 - u.f)) * twopk;
 652     else
 653         y = (x - (e + u.f) + 1) * twopk;
 654     return y;
 655 }
 656
 657 /* Copied from musl: src/math/__sindf.c */
 658 static float __sindf(double x)
 659 {
 660     static const double S1 = -0x1.5555555555555p-3,
 661         S2 = 0x1.1111111111111p-7,
 662         S3 = -0x1.a01a01a01a01ap-13,
 663         S4 = 0x1.71de3a556c734p-19;
 664
 665     double r, s, w, z;
 666
 667     z = x * x;
 668     if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03)
 669         return x * (1 + S1 * z);
 670
 671     w = z * z;
 672     r = S3 + z * S4;
 673     s = z * x;
 674     return (x + s * (S1 + z * S2)) + s * w * r;
 675 }
 676
 677 /* Copied from musl: src/math/__cosdf.c */
 678 static float __cosdf(double x)
 679 {
 680     static const double C0 = -0x1.0000000000000p-1,
 681         C1 = 0x1.5555555555555p-5,
 682         C2 = -0x1.6c16c16c16c17p-10,
 683         C3 = 0x1.a01a01a01a01ap-16,
 684         C4 = -0x1.27e4fb7789f5cp-22;
 685     double z;
 686
 687     z = x * x;
 688     if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03)
 689         return 1 + C0 * z;
 690     return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4))));
 691 }
 692
 693 static const UINT64 exp2f_T[] = {
 694     0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL,
 695     0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL,
 696     0x3fef06fe0a31b715ULL, 0x3feef1a7373aa9cbULL, 0x3feedea64c123422ULL, 0x3feece086061892dULL,
 697     0x3feebfdad5362a27ULL, 0x3feeb42b569d4f82ULL, 0x3feeab07dd485429ULL, 0x3feea47eb03a5585ULL,
 698     0x3feea09e667f3bcdULL, 0x3fee9f75e8ec5f74ULL, 0x3feea11473eb0187ULL, 0x3feea589994cce13ULL,
 699     0x3feeace5422aa0dbULL, 0x3feeb737b0cdc5e5ULL, 0x3feec49182a3f090ULL, 0x3feed503b23e255dULL,
 700     0x3feee89f995ad3adULL, 0x3feeff76f2fb5e47ULL, 0x3fef199bdd85529cULL, 0x3fef3720dcef9069ULL,
 701     0x3fef5818dcfba487ULL, 0x3fef7c97337b9b5fULL, 0x3fefa4afa2a490daULL, 0x3fefd0765b6e4540ULL
 702 };
 703 #endif
 704
 705 #ifndef __i386__
 706
 707 /*********************************************************************
 708  *      _fpclassf (MSVCRT.@)
 709  */
 710 int CDECL _fpclassf( float num )
 711 {
 712     union { float f; UINT32 i; } u = { num };
 713     int e = u.i >> 23 & 0xff;
 714     int s = u.i >> 31;
 715
 716     switch (e)
 717     {
 718     case 0:
 719         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
 720         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
 721     case 0xff:
 722         if (u.i << 9) return ((u.i >> 22) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
 723         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
 724     default:
 725         return s ? _FPCLASS_NN : _FPCLASS_PN;
 726     }
 727 }
 728
 729 /*********************************************************************
 730  *      _finitef (MSVCRT.@)
 731  */
 732 int CDECL _finitef( float num )
 733 {
 734     union { float f; UINT32 i; } u = { num };
 735     return (u.i & 0x7fffffff) < 0x7f800000;
 736 }
 737
 738 /*********************************************************************
 739  *      _isnanf (MSVCRT.@)
 740  */
 741 int CDECL _isnanf( float num )
 742 {
 743     union { float f; UINT32 i; } u = { num };
 744     return (u.i & 0x7fffffff) > 0x7f800000;
 745 }
 746
 747 static float asinf_R(float z)
 748 {
 749     /* coefficients for R(x^2) */
 750     static const float p1 = 1.66666672e-01,
 751                  p2 = -5.11644611e-02,
 752                  p3 = -1.21124933e-02,
 753                  p4 = -3.58742251e-03,
 754                  q1 = -7.56982703e-01;
 755
 756     float p, q;
 757     p = z * (p1 + z * (p2 + z * (p3 + z * p4)));
 758     q = 1.0f + z * q1;
 759     return p / q;
 760 }
 761
 762 /*********************************************************************
 763  *      acosf (MSVCRT.@)
 764  *
 765  * Copied from musl: src/math/acosf.c
 766  */
 767 float CDECL acosf( float x )
 768 {
 769     static const double pio2_lo = 6.12323399573676603587e-17;
 770
 771     float z, w, s, c, df;
 772     unsigned int hx, ix;
 773
 774     hx = *(unsigned int*)&x;
 775     ix = hx & 0x7fffffff;
 776     /* |x| >= 1 or nan */
 777     if (ix >= 0x3f800000) {
 778         if (ix == 0x3f800000) {
 779             if (hx >> 31)
 780                 return M_PI;
 781             return 0;
 782         }
 783         if (isnan(x)) return x;
 784         return math_error(_DOMAIN, "acosf", x, 0, 0 / (x - x));
 785     }
 786     /* |x| < 0.5 */
 787     if (ix < 0x3f000000) {
 788         if (ix <= 0x32800000) /* |x| < 2**-26 */
 789             return M_PI_2;
 790         return M_PI_2 - (x - (pio2_lo - x * asinf_R(x * x)));
 791     }
 792     /* x < -0.5 */
 793     if (hx >> 31) {
 794         z = (1 + x) * 0.5f;
 795         s = sqrtf(z);
 796         return M_PI - 2 * (s + ((double)s * asinf_R(z)));
 797     }
 798     /* x > 0.5 */
 799     z = (1 - x) * 0.5f;
 800     s = sqrtf(z);
 801     hx = *(unsigned int*)&s & 0xffff0000;
 802     df = *(float*)&hx;
 803     c = (z - df * df) / (s + df);
 804     w = asinf_R(z) * s + c;
 805     return 2 * (df + w);
 806 }
 807
 808 /*********************************************************************
 809  *      asinf (MSVCRT.@)
 810  *
 811  * Copied from musl: src/math/asinf.c
 812  */
 813 float CDECL asinf( float x )
 814 {
 815     static const double pio2 = 1.570796326794896558e+00;
 816     static const float pio4_hi = 0.785398125648;
 817     static const float pio2_lo = 7.54978941586e-08;
 818
 819     float s, z, f, c;
 820     unsigned int hx, ix;
 821
 822     hx = *(unsigned int*)&x;
 823     ix = hx & 0x7fffffff;
 824     if (ix >= 0x3f800000) {  /* |x| >= 1 */
 825         if (ix == 0x3f800000)  /* |x| == 1 */
 826             return x * pio2 + 7.5231638453e-37;  /* asin(+-1) = +-pi/2 with inexact */
 827         if (isnan(x)) return x;
 828         return math_error(_DOMAIN, "asinf", x, 0, 0 / (x - x));
 829     }
 830     if (ix < 0x3f000000) {  /* |x| < 0.5 */
 831         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
 832         if (ix < 0x39800000 && ix >= 0x00800000)
 833             return x;
 834         return x + x * asinf_R(x * x);
 835     }
 836     /* 1 > |x| >= 0.5 */
 837     z = (1 - fabsf(x)) * 0.5f;
 838     s = sqrtf(z);
 839     /* f+c = sqrt(z) */
 840     *(unsigned int*)&f = *(unsigned int*)&s & 0xffff0000;
 841     c = (z - f * f) / (s + f);
 842     x = pio4_hi - (2 * s * asinf_R(z) - (pio2_lo - 2 * c) - (pio4_hi - 2 * f));
 843     if (hx >> 31)
 844         return -x;
 845     return x;
 846 }
 847
 848 /*********************************************************************
 849  *      atanf (MSVCRT.@)
 850  *
 851  * Copied from musl: src/math/atanf.c
 852  */
 853 float CDECL atanf( float x )
 854 {
 855     static const float atanhi[] = {
 856         4.6364760399e-01,
 857         7.8539812565e-01,
 858         9.8279368877e-01,
 859         1.5707962513e+00,
 860     };
 861     static const float atanlo[] = {
 862         5.0121582440e-09,
 863         3.7748947079e-08,
 864         3.4473217170e-08,
 865         7.5497894159e-08,
 866     };
 867     static const float aT[] = {
 868         3.3333328366e-01,
 869         -1.9999158382e-01,
 870         1.4253635705e-01,
 871         -1.0648017377e-01,
 872         6.1687607318e-02,
 873     };
 874
 875     float w, s1, s2, z;
 876     unsigned int ix, sign;
 877     int id;
 878
 879 #if _MSVCR_VER == 0
 880     if (isnan(x)) return math_error(_DOMAIN, "atanf", x, 0, x);
 881 #endif
 882
 883     ix = *(unsigned int*)&x;
 884     sign = ix >> 31;
 885     ix &= 0x7fffffff;
 886     if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
 887         if (isnan(x))
 888             return x;
 889         z = atanhi[3] + 7.5231638453e-37;
 890         return sign ? -z : z;
 891     }
 892     if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 893         if (ix < 0x39800000) {  /* |x| < 2**-12 */
 894             if (ix < 0x00800000)
 895                 /* raise underflow for subnormal x */
 896                 fp_barrierf(x*x);
 897             return x;
 898         }
 899         id = -1;
 900     } else {
 901         x = fabsf(x);
 902         if (ix < 0x3f980000) {  /* |x| < 1.1875 */
 903             if (ix < 0x3f300000) {  /*  7/16 <= |x| < 11/16 */
 904                 id = 0;
 905                 x = (2.0f * x - 1.0f) / (2.0f + x);
 906             } else {                /* 11/16 <= |x| < 19/16 */
 907                 id = 1;
 908                 x = (x - 1.0f) / (x + 1.0f);
 909             }
 910         } else {
 911             if (ix < 0x401c0000) {  /* |x| < 2.4375 */
 912                 id = 2;
 913                 x = (x - 1.5f) / (1.0f + 1.5f * x);
 914             } else {                /* 2.4375 <= |x| < 2**26 */
 915                 id = 3;
 916                 x = -1.0f / x;
 917             }
 918         }
 919     }
 920     /* end of argument reduction */
 921     z = x * x;
 922     w = z * z;
 923     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
 924     s1 = z * (aT[0] + w * (aT[2] + w * aT[4]));
 925     s2 = w * (aT[1] + w * aT[3]);
 926     if (id < 0)
 927         return x - x * (s1 + s2);
 928     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
 929     return sign ? -z : z;
 930 }
 931
 932 /*********************************************************************
 933  *              atan2f (MSVCRT.@)
 934  *
 935  * Copied from musl: src/math/atan2f.c
 936  */
 937 float CDECL atan2f( float y, float x )
 938 {
 939     static const float pi     = 3.1415927410e+00,
 940                  pi_lo  = -8.7422776573e-08;
 941
 942     float z;
 943     unsigned int m, ix, iy;
 944
 945     if (isnan(x) || isnan(y))
 946         return x + y;
 947     ix = *(unsigned int*)&x;
 948     iy = *(unsigned int*)&y;
 949     if (ix == 0x3f800000)  /* x=1.0 */
 950         return atanf(y);
 951     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
 952     ix &= 0x7fffffff;
 953     iy &= 0x7fffffff;
 954
 955     /* when y = 0 */
 956     if (iy == 0) {
 957         switch (m) {
 958         case 0:
 959         case 1: return y;   /* atan(+-0,+anything)=+-0 */
 960         case 2: return pi;  /* atan(+0,-anything) = pi */
 961         case 3: return -pi; /* atan(-0,-anything) =-pi */
 962         }
 963     }
 964     /* when x = 0 */
 965     if (ix == 0)
 966         return m & 1 ? -pi / 2 : pi / 2;
 967     /* when x is INF */
 968     if (ix == 0x7f800000) {
 969         if (iy == 0x7f800000) {
 970             switch (m) {
 971             case 0: return pi / 4;      /* atan(+INF,+INF) */
 972             case 1: return -pi / 4;     /* atan(-INF,+INF) */
 973             case 2: return 3 * pi / 4;  /*atan(+INF,-INF)*/
 974             case 3: return -3 * pi / 4; /*atan(-INF,-INF)*/
 975             }
 976         } else {
 977             switch (m) {
 978             case 0: return 0.0f;    /* atan(+...,+INF) */
 979             case 1: return -0.0f;   /* atan(-...,+INF) */
 980             case 2: return pi;      /* atan(+...,-INF) */
 981             case 3: return -pi;     /* atan(-...,-INF) */
 982             }
 983         }
 984     }
 985     /* |y/x| > 0x1p26 */
 986     if (ix + (26 << 23) < iy || iy == 0x7f800000)
 987         return m & 1 ? -pi / 2 : pi / 2;
 988
 989     /* z = atan(|y/x|) with correct underflow */
 990     if ((m & 2) && iy + (26 << 23) < ix)  /*|y/x| < 0x1p-26, x < 0 */
 991         z = 0.0;
 992     else
 993         z = atanf(fabsf(y / x));
 994     switch (m) {
 995     case 0: return z;                /* atan(+,+) */
 996     case 1: return -z;               /* atan(-,+) */
 997     case 2: return pi - (z - pi_lo); /* atan(+,-) */
 998     default: /* case 3 */
 999         return (z - pi_lo) - pi;     /* atan(-,-) */
1000     }
1001 }
1002
1003 /* Copied from musl: src/math/__rem_pio2f.c */
1004 static int __rem_pio2f(float x, double *y)
1005 {
1006     static const double toint = 1.5 / DBL_EPSILON,
1007         pio4 = 0x1.921fb6p-1,
1008         invpio2 = 6.36619772367581382433e-01,
1009         pio2_1 = 1.57079631090164184570e+00,
1010         pio2_1t = 1.58932547735281966916e-08;
1011
1012     union {float f; uint32_t i;} u = {x};
1013     double tx[1], ty[1], fn;
1014     UINT32 ix;
1015     int n, sign, e0;
1016
1017     ix = u.i & 0x7fffffff;
1018     /* 25+53 bit pi is good enough for medium size */
1019     if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
1020         /* Use a specialized rint() to get fn. */
1021         fn = fp_barrier(x * invpio2 + toint) - toint;
1022         n  = (int)fn;
1023         *y = x - fn * pio2_1 - fn * pio2_1t;
1024         /* Matters with directed rounding. */
1025         if (*y < -pio4) {
1026             n--;
1027             fn--;
1028             *y = x - fn * pio2_1 - fn * pio2_1t;
1029         } else if (*y > pio4) {
1030             n++;
1031             fn++;
1032             *y = x - fn * pio2_1 - fn * pio2_1t;
1033         }
1034         return n;
1035     }
1036     if(ix >= 0x7f800000) { /* x is inf or NaN */
1037         *y = x - x;
1038         return 0;
1039     }
1040     /* scale x into [2^23, 2^24-1] */
1041     sign = u.i >> 31;
1042     e0 = (ix >> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */
1043     u.i = ix - (e0 << 23);
1044     tx[0] = u.f;
1045     n = __rem_pio2_large(tx, ty, e0, 1, 0);
1046     if (sign) {
1047         *y = -ty[0];
1048         return -n;
1049     }
1050     *y = ty[0];
1051     return n;
1052 }
1053
1054 /*********************************************************************
1055  *      cosf (MSVCRT.@)
1056  *
1057  * Copied from musl: src/math/cosf.c
1058  */
1059 float CDECL cosf( float x )
1060 {
1061     static const double c1pio2 = 1*M_PI_2,
1062         c2pio2 = 2*M_PI_2,
1063         c3pio2 = 3*M_PI_2,
1064         c4pio2 = 4*M_PI_2;
1065
1066     double y;
1067     UINT32 ix;
1068     unsigned n, sign;
1069
1070     ix = *(UINT32*)&x;
1071     sign = ix >> 31;
1072     ix &= 0x7fffffff;
1073
1074     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1075         if (ix < 0x39800000) { /* |x| < 2**-12 */
1076             /* raise inexact if x != 0 */
1077             fp_barrierf(x + 0x1p120f);
1078             return 1.0f;
1079         }
1080         return __cosdf(x);
1081     }
1082     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1083         if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
1084             return -__cosdf(sign ? x + c2pio2 : x - c2pio2);
1085         else {
1086             if (sign)
1087                 return __sindf(x + c1pio2);
1088             else
1089                 return __sindf(c1pio2 - x);
1090         }
1091     }
1092     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1093         if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
1094             return __cosdf(sign ? x + c4pio2 : x - c4pio2);
1095         else {
1096             if (sign)
1097                 return __sindf(-x - c3pio2);
1098             else
1099                 return __sindf(x - c3pio2);
1100         }
1101     }
1102
1103     /* cos(Inf or NaN) is NaN */
1104     if (isinf(x)) return math_error(_DOMAIN, "cosf", x, 0, x - x);
1105     if (ix >= 0x7f800000)
1106         return x - x;
1107
1108     /* general argument reduction needed */
1109     n = __rem_pio2f(x, &y);
1110     switch (n & 3) {
1111     case 0: return __cosdf(y);
1112     case 1: return __sindf(-y);
1113     case 2: return -__cosdf(y);
1114     default: return __sindf(y);
1115     }
1116 }
1117
1118 /* Copied from musl: src/math/__expo2f.c */
1119 static float __expo2f(float x, float sign)
1120 {
1121     static const int k = 235;
1122     static const float kln2 = 0x1.45c778p+7f;
1123     float scale;
1124
1125     *(UINT32*)&scale = (UINT32)(0x7f + k/2) << 23;
1126     return expf(x - kln2) * (sign * scale) * scale;
1127 }
1128
1129 /*********************************************************************
1130  *      coshf (MSVCRT.@)
1131  *
1132  * Copied from musl: src/math/coshf.c
1133  */
1134 float CDECL coshf( float x )
1135 {
1136     UINT32 ui = *(UINT32*)&x;
1137     UINT32 sign = ui & 0x80000000;
1138     float t;
1139
1140     /* |x| */
1141     ui &= 0x7fffffff;
1142     x = *(float*)&ui;
1143
1144     /* |x| < log(2) */
1145     if (ui < 0x3f317217) {
1146         if (ui < 0x3f800000 - (12 << 23)) {
1147             fp_barrierf(x + 0x1p120f);
1148             return 1;
1149         }
1150         t = __expm1f(x);
1151         return 1 + t * t / (2 * (1 + t));
1152     }
1153
1154     /* |x| < log(FLT_MAX) */
1155     if (ui < 0x42b17217) {
1156         t = expf(x);
1157         return 0.5f * (t + 1 / t);
1158     }
1159
1160     /* |x| > log(FLT_MAX) or nan */
1161     if (ui > 0x7f800000)
1162         *(UINT32*)&t = ui | sign | 0x400000;
1163     else
1164         t = __expo2f(x, 1.0f);
1165     return t;
1166 }
1167
1168 /*********************************************************************
1169  *      expf (MSVCRT.@)
1170  */
1171 float CDECL expf( float x )
1172 {
1173     static const double C[] = {
1174         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1175         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1176         0x1.62e42ff0c52d6p-1 / (1 << 5)
1177     };
1178     static const double invln2n = 0x1.71547652b82fep+0 * (1 << 5);
1179
1180     double kd, z, r, r2, y, s;
1181     UINT32 abstop;
1182     UINT64 ki, t;
1183
1184     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
1185     if (abstop >= 0x42b) {
1186         /* |x| >= 88 or x is nan.  */
1187         if (*(UINT32*)&x == 0xff800000)
1188             return 0.0f;
1189         if (abstop >= 0x7f8)
1190             return x + x;
1191         if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
1192             return math_error(_OVERFLOW, "expf", x, 0, x * FLT_MAX);
1193         if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
1194             return math_error(_UNDERFLOW, "expf", x, 0, fp_barrierf(FLT_MIN) * FLT_MIN);
1195     }
1196
1197     /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
1198     z = invln2n * x;
1199
1200     /* Round and convert z to int, the result is in [-150*N, 128*N] and
1201        ideally ties-to-even rule is used, otherwise the magnitude of r
1202        can be bigger which gives larger approximation error.  */
1203     kd = __round(z);
1204     ki = (INT64)kd;
1205     r = z - kd;
1206
1207     /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1208     t = exp2f_T[ki % (1 << 5)];
1209     t += ki << (52 - 5);
1210     s = *(double*)&t;
1211     z = C[0] * r + C[1];
1212     r2 = r * r;
1213     y = C[2] * r + 1;
1214     y = z * r2 + y;
1215     y = y * s;
1216     return y;
1217 }
1218
1219 /*********************************************************************
1220  *      fmodf (MSVCRT.@)
1221  *
1222  * Copied from musl: src/math/fmodf.c
1223  */
1224 float CDECL fmodf( float x, float y )
1225 {
1226     UINT32 xi = *(UINT32*)&x;
1227     UINT32 yi = *(UINT32*)&y;
1228     int ex = xi>>23 & 0xff;
1229     int ey = yi>>23 & 0xff;
1230     UINT32 sx = xi & 0x80000000;
1231     UINT32 i;
1232
1233     if (isinf(x)) return math_error(_DOMAIN, "fmodf", x, y, (x * y) / (x * y));
1234     if (yi << 1 == 0 || isnan(y) || ex == 0xff)
1235         return (x * y) / (x * y);
1236     if (xi << 1 <= yi << 1) {
1237         if (xi << 1 == yi << 1)
1238             return 0 * x;
1239         return x;
1240     }
1241
1242     /* normalize x and y */
1243     if (!ex) {
1244         for (i = xi << 9; i >> 31 == 0; ex--, i <<= 1);
1245         xi <<= -ex + 1;
1246     } else {
1247         xi &= -1U >> 9;
1248         xi |= 1U << 23;
1249     }
1250     if (!ey) {
1251         for (i = yi << 9; i >> 31 == 0; ey--, i <<= 1);
1252         yi <<= -ey + 1;
1253     } else {
1254         yi &= -1U >> 9;
1255         yi |= 1U << 23;
1256     }
1257
1258     /* x mod y */
1259     for (; ex > ey; ex--) {
1260         i = xi - yi;
1261         if (i >> 31 == 0) {
1262             if (i == 0)
1263                 return 0 * x;
1264             xi = i;
1265         }
1266         xi <<= 1;
1267     }
1268     i = xi - yi;
1269     if (i >> 31 == 0) {
1270         if (i == 0)
1271             return 0 * x;
1272         xi = i;
1273     }
1274     for (; xi>>23 == 0; xi <<= 1, ex--);
1275
1276     /* scale result up */
1277     if (ex > 0) {
1278         xi -= 1U << 23;
1279         xi |= (UINT32)ex << 23;
1280     } else {
1281         xi >>= -ex + 1;
1282     }
1283     xi |= sx;
1284     return *(float*)&xi;
1285 }
1286
1287 /*********************************************************************
1288  *      logf (MSVCRT.@)
1289  *
1290  * Copied from musl: src/math/logf.c src/math/logf_data.c
1291  */
1292 float CDECL logf( float x )
1293 {
1294     static const double Ln2 = 0x1.62e42fefa39efp-1;
1295     static const double A[] = {
1296         -0x1.00ea348b88334p-2,
1297         0x1.5575b0be00b6ap-2,
1298         -0x1.ffffef20a4123p-2
1299     };
1300     static const struct {
1301         double invc, logc;
1302     } T[] = {
1303         { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
1304         { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
1305         { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
1306         { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
1307         { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
1308         { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
1309         { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
1310         { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
1311         { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
1312         { 0x1p+0, 0x0p+0 },
1313         { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
1314         { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
1315         { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
1316         { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
1317         { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
1318         { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }
1319     };
1320
1321     double z, r, r2, y, y0, invc, logc;
1322     UINT32 ix, iz, tmp;
1323     int k, i;
1324
1325     ix = *(UINT32*)&x;
1326     /* Fix sign of zero with downward rounding when x==1. */
1327     if (ix == 0x3f800000)
1328         return 0;
1329     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
1330         /* x < 0x1p-126 or inf or nan. */
1331         if (ix * 2 == 0)
1332             return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x);
1333         if (ix == 0x7f800000) /* log(inf) == inf. */
1334             return x;
1335         if (ix * 2 > 0xff000000)
1336             return x;
1337         if (ix & 0x80000000)
1338             return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x));
1339         /* x is subnormal, normalize it. */
1340         x *= 0x1p23f;
1341         ix = *(UINT32*)&x;
1342         ix -= 23 << 23;
1343     }
1344
1345     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1346        The range is split into N subintervals.
1347        The ith subinterval contains z and c is near its center. */
1348     tmp = ix - 0x3f330000;
1349     i = (tmp >> (23 - 4)) % (1 << 4);
1350     k = (INT32)tmp >> 23; /* arithmetic shift */
1351     iz = ix - (tmp & (0x1ffu << 23));
1352     invc = T[i].invc;
1353     logc = T[i].logc;
1354     z = *(float*)&iz;
1355
1356     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
1357     r = z * invc - 1;
1358     y0 = logc + (double)k * Ln2;
1359
1360     /* Pipelined polynomial evaluation to approximate log1p(r). */
1361     r2 = r * r;
1362     y = A[1] * r + A[2];
1363     y = A[0] * r2 + y;
1364     y = y * r2 + (y0 + r);
1365     return y;
1366 }
1367
1368 /*********************************************************************
1369  *      log10f (MSVCRT.@)
1370  */
1371 float CDECL log10f( float x )
1372 {
1373     static const float ivln10hi = 4.3432617188e-01,
1374         ivln10lo = -3.1689971365e-05,
1375         log10_2hi = 3.0102920532e-01,
1376         log10_2lo = 7.9034151668e-07,
1377         Lg1 = 0xaaaaaa.0p-24,
1378         Lg2 = 0xccce13.0p-25,
1379         Lg3 = 0x91e9ee.0p-25,
1380         Lg4 = 0xf89e26.0p-26;
1381
1382     union {float f; UINT32 i;} u = {x};
1383     float hfsq, f, s, z, R, w, t1, t2, dk, hi, lo;
1384     UINT32 ix;
1385     int k;
1386
1387     ix = u.i;
1388     k = 0;
1389     if (ix < 0x00800000 || ix >> 31) { /* x < 2**-126 */
1390         if (ix << 1 == 0)
1391             return math_error(_SING, "log10f", x, 0, -1 / (x * x));
1392         if ((ix & ~(1u << 31)) > 0x7f800000)
1393             return x;
1394         if (ix >> 31)
1395             return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x));
1396         /* subnormal number, scale up x */
1397         k -= 25;
1398         x *= 0x1p25f;
1399         u.f = x;
1400         ix = u.i;
1401     } else if (ix >= 0x7f800000) {
1402         return x;
1403     } else if (ix == 0x3f800000)
1404         return 0;
1405
1406     /* reduce x into [sqrt(2)/2, sqrt(2)] */
1407     ix += 0x3f800000 - 0x3f3504f3;
1408     k += (int)(ix >> 23) - 0x7f;
1409     ix = (ix & 0x007fffff) + 0x3f3504f3;
1410     u.i = ix;
1411     x = u.f;
1412
1413     f = x - 1.0f;
1414     s = f / (2.0f + f);
1415     z = s * s;
1416     w = z * z;
1417     t1= w * (Lg2 + w * Lg4);
1418     t2= z * (Lg1 + w * Lg3);
1419     R = t2 + t1;
1420     hfsq = 0.5f * f * f;
1421
1422     hi = f - hfsq;
1423     u.f = hi;
1424     u.i &= 0xfffff000;
1425     hi = u.f;
1426     lo = f - hi - hfsq + s * (hfsq + R);
1427     dk = k;
1428     return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
1429 }
1430
1431 /* Subnormal input is normalized so ix has negative biased exponent.
1432    Output is multiplied by POWF_SCALE (where 1 << 5). */
1433 static double powf_log2(UINT32 ix)
1434 {
1435     static const struct {
1436         double invc, logc;
1437     } T[] = {
1438         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * (1 << 5) },
1439         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * (1 << 5) },
1440         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * (1 << 5) },
1441         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * (1 << 5) },
1442         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * (1 << 5) },
1443         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * (1 << 5) },
1444         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * (1 << 5) },
1445         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * (1 << 5) },
1446         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * (1 << 5) },
1447         { 0x1p+0, 0x0p+0 * (1 << 4) },
1448         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * (1 << 5) },
1449         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * (1 << 5) },
1450         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * (1 << 5) },
1451         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * (1 << 5) },
1452         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * (1 << 5) },
1453         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * (1 << 5) }
1454     };
1455     static const double A[] = {
1456         0x1.27616c9496e0bp-2 * (1 << 5), -0x1.71969a075c67ap-2 * (1 << 5),
1457         0x1.ec70a6ca7baddp-2 * (1 << 5), -0x1.7154748bef6c8p-1 * (1 << 5),
1458         0x1.71547652ab82bp0 * (1 << 5)
1459     };
1460
1461     double z, r, r2, r4, p, q, y, y0, invc, logc;
1462     UINT32 iz, top, tmp;
1463     int k, i;
1464
1465     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1466        The range is split into N subintervals.
1467        The ith subinterval contains z and c is near its center. */
1468     tmp = ix - 0x3f330000;
1469     i = (tmp >> (23 - 4)) % (1 << 4);
1470     top = tmp & 0xff800000;
1471     iz = ix - top;
1472     k = (INT32)top >> (23 - 5); /* arithmetic shift */
1473     invc = T[i].invc;
1474     logc = T[i].logc;
1475     z = *(float*)&iz;
1476
1477     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
1478     r = z * invc - 1;
1479     y0 = logc + (double)k;
1480
1481     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
1482     r2 = r * r;
1483     y = A[0] * r + A[1];
1484     p = A[2] * r + A[3];
1485     r4 = r2 * r2;
1486     q = A[4] * r + y0;
1487     q = p * r2 + q;
1488     y = y * r4 + q;
1489     return y;
1490 }
1491
1492 /* The output of log2 and thus the input of exp2 is either scaled by N
1493    (in case of fast toint intrinsics) or not. The unscaled xd must be
1494    in [-1021,1023], sign_bias sets the sign of the result. */
1495 static float powf_exp2(double xd, UINT32 sign_bias)
1496 {
1497     static const double C[] = {
1498         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1499         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1500         0x1.62e42ff0c52d6p-1 / (1 << 5)
1501     };
1502
1503     UINT64 ki, ski, t;
1504     double kd, z, r, r2, y, s;
1505
1506     /* N*x = k + r with r in [-1/2, 1/2] */
1507     kd = __round(xd); /* k */
1508     ki = (INT64)kd;
1509     r = xd - kd;
1510
1511     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1512     t = exp2f_T[ki % (1 << 5)];
1513     ski = ki + sign_bias;
1514     t += ski << (52 - 5);
1515     s = *(double*)&t;
1516     z = C[0] * r + C[1];
1517     r2 = r * r;
1518     y = C[2] * r + 1;
1519     y = z * r2 + y;
1520     y = y * s;
1521     return y;
1522 }
1523
1524 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
1525    the bit representation of a non-zero finite floating-point value. */
1526 static int powf_checkint(UINT32 iy)
1527 {
1528     int e = iy >> 23 & 0xff;
1529     if (e < 0x7f)
1530         return 0;
1531     if (e > 0x7f + 23)
1532         return 2;
1533     if (iy & ((1 << (0x7f + 23 - e)) - 1))
1534         return 0;
1535     if (iy & (1 << (0x7f + 23 - e)))
1536         return 1;
1537     return 2;
1538 }
1539
1540 /*********************************************************************
1541  *      powf (MSVCRT.@)
1542  *
1543  * Copied from musl: src/math/powf.c src/math/powf_data.c
1544  */
1545 float CDECL powf( float x, float y )
1546 {
1547     UINT32 sign_bias = 0;
1548     UINT32 ix, iy;
1549     double logx, ylogx;
1550
1551     ix = *(UINT32*)&x;
1552     iy = *(UINT32*)&y;
1553     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
1554             2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1555         /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
1556         if (2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1557             if (2 * iy == 0)
1558                 return 1.0f;
1559             if (ix == 0x3f800000)
1560                 return 1.0f;
1561             if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
1562                 return x + y;
1563             if (2 * ix == 2 * 0x3f800000)
1564                 return 1.0f;
1565             if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
1566                 return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
1567             return y * y;
1568         }
1569         if (2 * ix - 1 >= 2u * 0x7f800000 - 1) {
1570             float x2 = x * x;
1571             if (ix & 0x80000000 && powf_checkint(iy) == 1)
1572                 x2 = -x2;
1573             if (iy & 0x80000000 && x2 == 0.0)
1574                 return math_error(_SING, "powf", x, y, 1 / x2);
1575             /* Without the barrier some versions of clang hoist the 1/x2 and
1576                thus division by zero exception can be signaled spuriously. */
1577             return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
1578         }
1579         /* x and y are non-zero finite. */
1580         if (ix & 0x80000000) {
1581             /* Finite x < 0. */
1582             int yint = powf_checkint(iy);
1583             if (yint == 0)
1584                 return math_error(_DOMAIN, "powf", x, y, 0 / (x - x));
1585             if (yint == 1)
1586                 sign_bias = 1 << (5 + 11);
1587             ix &= 0x7fffffff;
1588         }
1589         if (ix < 0x00800000) {
1590             /* Normalize subnormal x so exponent becomes negative. */
1591             x *= 0x1p23f;
1592             ix = *(UINT32*)&x;
1593             ix &= 0x7fffffff;
1594             ix -= 23 << 23;
1595         }
1596     }
1597     logx = powf_log2(ix);
1598     ylogx = y * logx; /* cannot overflow, y is single prec. */
1599     if ((*(UINT64*)&ylogx >> 47 & 0xffff) >= 0x40af800000000000llu >> 47) {
1600         /* |y*log(x)| >= 126. */
1601         if (ylogx > 0x1.fffffffd1d571p+6 * (1 << 5))
1602             return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023);
1603         if (ylogx <= -150.0 * (1 << 5))
1604             return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023);
1605     }
1606     return powf_exp2(ylogx, sign_bias);
1607 }
1608
1609 /*********************************************************************
1610  *      sinf (MSVCRT.@)
1611  *
1612  * Copied from musl: src/math/sinf.c
1613  */
1614 float CDECL sinf( float x )
1615 {
1616     static const double s1pio2 = 1*M_PI_2,
1617         s2pio2 = 2*M_PI_2,
1618         s3pio2 = 3*M_PI_2,
1619         s4pio2 = 4*M_PI_2;
1620
1621     double y;
1622     UINT32 ix;
1623     int n, sign;
1624
1625     ix = *(UINT32*)&x;
1626     sign = ix >> 31;
1627     ix &= 0x7fffffff;
1628
1629     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1630         if (ix < 0x39800000) { /* |x| < 2**-12 */
1631             /* raise inexact if x!=0 and underflow if subnormal */
1632             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1633             return x;
1634         }
1635         return __sindf(x);
1636     }
1637     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1638         if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
1639             if (sign)
1640                 return -__cosdf(x + s1pio2);
1641             else
1642                 return __cosdf(x - s1pio2);
1643         }
1644         return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
1645     }
1646     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1647         if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
1648             if (sign)
1649                 return __cosdf(x + s3pio2);
1650             else
1651                 return -__cosdf(x - s3pio2);
1652         }
1653         return __sindf(sign ? x + s4pio2 : x - s4pio2);
1654     }
1655
1656     /* sin(Inf or NaN) is NaN */
1657     if (isinf(x))
1658         return math_error(_DOMAIN, "sinf", x, 0, x - x);
1659     if (ix >= 0x7f800000)
1660         return x - x;
1661
1662     /* general argument reduction needed */
1663     n = __rem_pio2f(x, &y);
1664     switch (n&3) {
1665     case 0: return __sindf(y);
1666     case 1: return __cosdf(y);
1667     case 2: return __sindf(-y);
1668     default: return -__cosdf(y);
1669     }
1670 }
1671
1672 /*********************************************************************
1673  *      sinhf (MSVCRT.@)
1674  */
1675 float CDECL sinhf( float x )
1676 {
1677     UINT32 ui = *(UINT32*)&x;
1678     float t, h, absx;
1679
1680     h = 0.5;
1681     if (ui >> 31)
1682         h = -h;
1683     /* |x| */
1684     ui &= 0x7fffffff;
1685     absx = *(float*)&ui;
1686
1687     /* |x| < log(FLT_MAX) */
1688     if (ui < 0x42b17217) {
1689         t = __expm1f(absx);
1690         if (ui < 0x3f800000) {
1691             if (ui < 0x3f800000 - (12 << 23))
1692                 return x;
1693             return h * (2 * t - t * t / (t + 1));
1694         }
1695         return h * (t + t / (t + 1));
1696     }
1697
1698     /* |x| > logf(FLT_MAX) or nan */
1699     if (ui > 0x7f800000)
1700         *(DWORD*)&t = *(DWORD*)&x | 0x400000;
1701     else
1702         t = __expo2f(absx, 2 * h);
1703     return t;
1704 }
1705
1706 static BOOL sqrtf_validate( float *x )
1707 {
1708     short c = _fdclass(*x);
1709
1710     if (c == FP_ZERO) return FALSE;
1711     if (c == FP_NAN) return FALSE;
1712     if (signbit(*x))
1713     {
1714         *x = math_error(_DOMAIN, "sqrtf", *x, 0, ret_nan(TRUE));
1715         return FALSE;
1716     }
1717     if (c == FP_INFINITE) return FALSE;
1718     return TRUE;
1719 }
1720
1721 #if defined(__x86_64__) || defined(__i386__)
1722 float CDECL sse2_sqrtf(float);
1723 __ASM_GLOBAL_FUNC( sse2_sqrtf,
1724         "sqrtss %xmm0, %xmm0\n\t"
1725         "ret" )
1726 #endif
1727
1728 /*********************************************************************
1729  *      sqrtf (MSVCRT.@)
1730  *
1731  * Copied from musl: src/math/sqrtf.c
1732  */
1733 float CDECL sqrtf( float x )
1734 {
1735 #ifdef __x86_64__
1736     if (!sqrtf_validate(&x))
1737         return x;
1738
1739     return sse2_sqrtf(x);
1740 #else
1741     static const float tiny = 1.0e-30;
1742
1743     float z;
1744     int ix,s,q,m,t,i;
1745     unsigned int r;
1746
1747     ix = *(int*)&x;
1748
1749     if (!sqrtf_validate(&x))
1750         return x;
1751
1752     /* normalize x */
1753     m = ix >> 23;
1754     if (m == 0) {  /* subnormal x */
1755         for (i = 0; (ix & 0x00800000) == 0; i++)
1756             ix <<= 1;
1757         m -= i - 1;
1758     }
1759     m -= 127;  /* unbias exponent */
1760     ix = (ix & 0x007fffff) | 0x00800000;
1761     if (m & 1)  /* odd m, double x to make it even */
1762         ix += ix;
1763     m >>= 1;  /* m = [m/2] */
1764
1765     /* generate sqrt(x) bit by bit */
1766     ix += ix;
1767     q = s = 0;       /* q = sqrt(x) */
1768     r = 0x01000000;  /* r = moving bit from right to left */
1769
1770     while (r != 0) {
1771         t = s + r;
1772         if (t <= ix) {
1773             s = t + r;
1774             ix -= t;
1775             q += r;
1776         }
1777         ix += ix;
1778         r >>= 1;
1779     }
1780
1781     /* use floating add to find out rounding direction */
1782     if (ix != 0) {
1783         z = 1.0f - tiny; /* raise inexact flag */
1784         if (z >= 1.0f) {
1785             z = 1.0f + tiny;
1786             if (z > 1.0f)
1787                 q += 2;
1788             else
1789                 q += q & 1;
1790         }
1791     }
1792     ix = (q >> 1) + 0x3f000000;
1793     r = ix + ((unsigned int)m << 23);
1794     z = *(float*)&r;
1795     return z;
1796 #endif
1797 }
1798
1799 /* Copied from musl: src/math/__tandf.c */
1800 static float __tandf(double x, int odd)
1801 {
1802     static const double T[] = {
1803         0x15554d3418c99f.0p-54,
1804         0x1112fd38999f72.0p-55,
1805         0x1b54c91d865afe.0p-57,
1806         0x191df3908c33ce.0p-58,
1807         0x185dadfcecf44e.0p-61,
1808         0x1362b9bf971bcd.0p-59,
1809     };
1810
1811     double z, r, w, s, t, u;
1812
1813     z = x * x;
1814     r = T[4] + z * T[5];
1815     t = T[2] + z * T[3];
1816     w = z * z;
1817     s = z * x;
1818     u = T[0] + z * T[1];
1819     r = (x + s * u) + (s * w) * (t + w * r);
1820     return odd ? -1.0 / r : r;
1821 }
1822
1823 /*********************************************************************
1824  *      tanf (MSVCRT.@)
1825  *
1826  * Copied from musl: src/math/tanf.c
1827  */
1828 float CDECL tanf( float x )
1829 {
1830     static const double t1pio2 = 1*M_PI_2,
1831         t2pio2 = 2*M_PI_2,
1832         t3pio2 = 3*M_PI_2,
1833         t4pio2 = 4*M_PI_2;
1834
1835     double y;
1836     UINT32 ix;
1837     unsigned n, sign;
1838
1839     ix = *(UINT32*)&x;
1840     sign = ix >> 31;
1841     ix &= 0x7fffffff;
1842
1843     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1844         if (ix < 0x39800000) { /* |x| < 2**-12 */
1845             /* raise inexact if x!=0 and underflow if subnormal */
1846             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1847             return x;
1848         }
1849         return __tandf(x, 0);
1850     }
1851     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1852         if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
1853             return __tandf((sign ? x + t1pio2 : x - t1pio2), 1);
1854         else
1855             return __tandf((sign ? x + t2pio2 : x - t2pio2), 0);
1856     }
1857     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1858         if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
1859             return __tandf((sign ? x + t3pio2 : x - t3pio2), 1);
1860         else
1861             return __tandf((sign ? x + t4pio2 : x - t4pio2), 0);
1862     }
1863
1864     /* tan(Inf or NaN) is NaN */
1865     if (isinf(x))
1866         return math_error(_DOMAIN, "tanf", x, 0, x - x);
1867     if (ix >= 0x7f800000)
1868         return x - x;
1869
1870     /* argument reduction */
1871     n = __rem_pio2f(x, &y);
1872     return __tandf(y, n & 1);
1873 }
1874
1875 /*********************************************************************
1876  *      tanhf (MSVCRT.@)
1877  */
1878 float CDECL tanhf( float x )
1879 {
1880     UINT32 ui = *(UINT32*)&x;
1881     UINT32 sign = ui & 0x80000000;
1882     float t;
1883
1884     /* x = |x| */
1885     ui &= 0x7fffffff;
1886     x = *(float*)&ui;
1887
1888     if (ui > 0x3f0c9f54) {
1889         /* |x| > log(3)/2 ~= 0.5493 or nan */
1890         if (ui > 0x41200000) {
1891             if (ui > 0x7f800000) {
1892                 *(UINT32*)&x = ui | sign | 0x400000;
1893 #if _MSVCR_VER < 140
1894                 return math_error(_DOMAIN, "tanhf", x, 0, x);
1895 #else
1896                 return x;
1897 #endif
1898             }
1899             /* |x| > 10 */
1900             fp_barrierf(x + 0x1p120f);
1901             t = 1 + 0 / x;
1902         } else {
1903             t = __expm1f(2 * x);
1904             t = 1 - 2 / (t + 2);
1905         }
1906     } else if (ui > 0x3e82c578) {
1907         /* |x| > log(5/3)/2 ~= 0.2554 */
1908         t = __expm1f(2 * x);
1909         t = t / (t + 2);
1910     } else if (ui >= 0x00800000) {
1911         /* |x| >= 0x1p-126 */
1912         t = __expm1f(-2 * x);
1913         t = -t / (t + 2);
1914     } else {
1915         /* |x| is subnormal */
1916         fp_barrierf(x * x);
1917         t = x;
1918     }
1919     return sign ? -t : t;
1920 }
1921
1922 /*********************************************************************
1923  *      ceilf (MSVCRT.@)
1924  *
1925  * Copied from musl: src/math/ceilf.c
1926  */
1927 float CDECL ceilf( float x )
1928 {
1929     union {float f; UINT32 i;} u = {x};
1930     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1931     UINT32 m;
1932
1933     if (e >= 23)
1934         return x;
1935     if (e >= 0) {
1936         m = 0x007fffff >> e;
1937         if ((u.i & m) == 0)
1938             return x;
1939         if (u.i >> 31 == 0)
1940             u.i += m;
1941         u.i &= ~m;
1942     } else {
1943         if (u.i >> 31)
1944             return -0.0;
1945         else if (u.i << 1)
1946             return 1.0;
1947     }
1948     return u.f;
1949 }
1950
1951 /*********************************************************************
1952  *      floorf (MSVCRT.@)
1953  *
1954  * Copied from musl: src/math/floorf.c
1955  */
1956 float CDECL floorf( float x )
1957 {
1958     union {float f; UINT32 i;} u = {x};
1959     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1960     UINT32 m;
1961
1962     if (e >= 23)
1963         return x;
1964     if (e >= 0) {
1965         m = 0x007fffff >> e;
1966         if ((u.i & m) == 0)
1967             return x;
1968         if (u.i >> 31)
1969             u.i += m;
1970         u.i &= ~m;
1971     } else {
1972         if (u.i >> 31 == 0)
1973             return 0;
1974         else if (u.i << 1)
1975             return -1;
1976     }
1977     return u.f;
1978 }
1979
1980 /*********************************************************************
1981  *      frexpf (MSVCRT.@)
1982  *
1983  * Copied from musl: src/math/frexpf.c
1984  */
1985 float CDECL frexpf( float x, int *e )
1986 {
1987     UINT32 ux = *(UINT32*)&x;
1988     int ee = ux >> 23 & 0xff;
1989
1990     if (!ee) {
1991         if (x) {
1992             x = frexpf(x * 0x1p64, e);
1993             *e -= 64;
1994         } else *e = 0;
1995         return x;
1996     } else if (ee == 0xff) {
1997         return x;
1998     }
1999
2000     *e = ee - 0x7e;
2001     ux &= 0x807ffffful;
2002     ux |= 0x3f000000ul;
2003     return *(float*)&ux;
2004 }
2005
2006 /*********************************************************************
2007  *      modff (MSVCRT.@)
2008  *
2009  * Copied from musl: src/math/modff.c
2010  */
2011 float CDECL modff( float x, float *iptr )
2012 {
2013     union {float f; UINT32 i;} u = {x};
2014     UINT32 mask;
2015     int e = (u.i >> 23 & 0xff) - 0x7f;
2016
2017     /* no fractional part */
2018     if (e >= 23) {
2019         *iptr = x;
2020         if (e == 0x80 && u.i << 9 != 0) { /* nan */
2021             return x;
2022         }
2023         u.i &= 0x80000000;
2024         return u.f;
2025     }
2026     /* no integral part */
2027     if (e < 0) {
2028         u.i &= 0x80000000;
2029         *iptr = u.f;
2030         return x;
2031     }
2032
2033     mask = 0x007fffff >> e;
2034     if ((u.i & mask) == 0) {
2035         *iptr = x;
2036         u.i &= 0x80000000;
2037         return u.f;
2038     }
2039     u.i &= ~mask;
2040     *iptr = u.f;
2041     return x - u.f;
2042 }
2043
2044 #endif
2045
2046 #if !defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)
2047
2048 /*********************************************************************
2049  *      fabsf (MSVCRT.@)
2050  *
2051  * Copied from musl: src/math/fabsf.c
2052  */
2053 float CDECL fabsf( float x )
2054 {
2055     union { float f; UINT32 i; } u = { x };
2056     u.i &= 0x7fffffff;
2057     return u.f;
2058 }
2059
2060 #endif
2061
2062 /*********************************************************************
2063  *              acos (MSVCRT.@)
2064  *
2065  * Copied from musl: src/math/acos.c
2066  */
2067 static double acos_R(double z)
2068 {
2069     static const double pS0 =  1.66666666666666657415e-01,
2070                  pS1 = -3.25565818622400915405e-01,
2071                  pS2 =  2.01212532134862925881e-01,
2072                  pS3 = -4.00555345006794114027e-02,
2073                  pS4 =  7.91534994289814532176e-04,
2074                  pS5 =  3.47933107596021167570e-05,
2075                  qS1 = -2.40339491173441421878e+00,
2076                  qS2 =  2.02094576023350569471e+00,
2077                  qS3 = -6.88283971605453293030e-01,
2078                  qS4 =  7.70381505559019352791e-02;
2079
2080     double p, q;
2081     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2082     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2083     return p/q;
2084 }
2085
2086 double CDECL acos( double x )
2087 {
2088     static const double pio2_hi = 1.57079632679489655800e+00,
2089                  pio2_lo = 6.12323399573676603587e-17;
2090
2091     double z, w, s, c, df;
2092     unsigned int hx, ix;
2093     ULONGLONG llx;
2094
2095     hx = *(ULONGLONG*)&x >> 32;
2096     ix = hx & 0x7fffffff;
2097     /* |x| >= 1 or nan */
2098     if (ix >= 0x3ff00000) {
2099         unsigned int lx;
2100
2101         lx = *(ULONGLONG*)&x;
2102         if (((ix - 0x3ff00000) | lx) == 0) {
2103             /* acos(1)=0, acos(-1)=pi */
2104             if (hx >> 31)
2105                 return 2 * pio2_hi + 7.5231638452626401e-37;
2106             return 0;
2107         }
2108         if (isnan(x)) return x;
2109         return math_error(_DOMAIN, "acos", x, 0, 0 / (x - x));
2110     }
2111     /* |x| < 0.5 */
2112     if (ix < 0x3fe00000) {
2113         if (ix <= 0x3c600000)  /* |x| < 2**-57 */
2114             return pio2_hi + 7.5231638452626401e-37;
2115         return pio2_hi - (x - (pio2_lo - x * acos_R(x * x)));
2116     }
2117     /* x < -0.5 */
2118     if (hx >> 31) {
2119         z = (1.0 + x) * 0.5;
2120         s = sqrt(z);
2121         w = acos_R(z) * s - pio2_lo;
2122         return 2 * (pio2_hi - (s + w));
2123     }
2124     /* x > 0.5 */
2125     z = (1.0 - x) * 0.5;
2126     s = sqrt(z);
2127     df = s;
2128     llx = (*(ULONGLONG*)&df >> 32) << 32;
2129     df = *(double*)&llx;
2130     c = (z - df * df) / (s + df);
2131     w = acos_R(z) * s + c;
2132     return 2 * (df + w);
2133 }
2134
2135 /*********************************************************************
2136  *              asin (MSVCRT.@)
2137  *
2138  * Copied from musl: src/math/asin.c
2139  */
2140 static double asin_R(double z)
2141 {
2142     /* coefficients for R(x^2) */
2143     static const double pS0 =  1.66666666666666657415e-01,
2144                  pS1 = -3.25565818622400915405e-01,
2145                  pS2 =  2.01212532134862925881e-01,
2146                  pS3 = -4.00555345006794114027e-02,
2147                  pS4 =  7.91534994289814532176e-04,
2148                  pS5 =  3.47933107596021167570e-05,
2149                  qS1 = -2.40339491173441421878e+00,
2150                  qS2 =  2.02094576023350569471e+00,
2151                  qS3 = -6.88283971605453293030e-01,
2152                  qS4 =  7.70381505559019352791e-02;
2153
2154     double p, q;
2155     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2156     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2157     return p / q;
2158 }
2159
2160 #ifdef __i386__
2161 double CDECL x87_asin(double);
2162 __ASM_GLOBAL_FUNC( x87_asin,
2163         "fldl 4(%esp)\n\t"
2164         SET_X87_CW(~0x37f)
2165         "fld %st\n\t"
2166         "fld1\n\t"
2167         "fsubp\n\t"
2168         "fld1\n\t"
2169         "fadd %st(2)\n\t"
2170         "fmulp\n\t"
2171         "fsqrt\n\t"
2172         "fpatan\n\t"
2173         RESET_X87_CW
2174         "ret" )
2175 #endif
2176
2177 double CDECL asin( double x )
2178 {
2179     static const double pio2_hi = 1.57079632679489655800e+00,
2180                  pio2_lo = 6.12323399573676603587e-17;
2181
2182     double z, r, s;
2183     unsigned int hx, ix;
2184     ULONGLONG llx;
2185 #ifdef __i386__
2186     unsigned int x87_cw, sse2_cw;
2187 #endif
2188
2189     hx = *(ULONGLONG*)&x >> 32;
2190     ix = hx & 0x7fffffff;
2191     /* |x| >= 1 or nan */
2192     if (ix >= 0x3ff00000) {
2193         unsigned int lx;
2194         lx = *(ULONGLONG*)&x;
2195         if (((ix - 0x3ff00000) | lx) == 0)
2196             /* asin(1) = +-pi/2 with inexact */
2197             return x * pio2_hi + 7.5231638452626401e-37;
2198         if (isnan(x))
2199         {
2200 #ifdef __i386__
2201             return math_error(_DOMAIN, "asin", x, 0, x);
2202 #else
2203             return x;
2204 #endif
2205         }
2206         return math_error(_DOMAIN, "asin", x, 0, 0 / (x - x));
2207     }
2208
2209 #ifdef __i386__
2210     __control87_2(0, 0, &x87_cw, &sse2_cw);
2211     if (!sse2_enabled || (x87_cw & _MCW_EM) != _MCW_EM
2212             || (sse2_cw & (_MCW_EM | _MCW_RC)) != _MCW_EM)
2213         return x87_asin(x);
2214 #endif
2215
2216     /* |x| < 0.5 */
2217     if (ix < 0x3fe00000) {
2218         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
2219         if (ix < 0x3e500000 && ix >= 0x00100000)
2220             return x;
2221         return x + x * asin_R(x * x);
2222     }
2223     /* 1 > |x| >= 0.5 */
2224     z = (1 - fabs(x)) * 0.5;
2225     s = sqrt(z);
2226     r = asin_R(z);
2227     if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
2228         x = pio2_hi - (2 * (s + s * r) - pio2_lo);
2229     } else {
2230         double f, c;
2231         /* f+c = sqrt(z) */
2232         f = s;
2233         llx = (*(ULONGLONG*)&f >> 32) << 32;
2234         f = *(double*)&llx;
2235         c = (z - f * f) / (s + f);
2236         x = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * f));
2237     }
2238     if (hx >> 31)
2239         return -x;
2240     return x;
2241 }
2242
2243 /*********************************************************************
2244  *              atan (MSVCRT.@)
2245  *
2246  * Copied from musl: src/math/atan.c
2247  */
2248 double CDECL atan( double x )
2249 {
2250     static const double atanhi[] = {
2251         4.63647609000806093515e-01,
2252         7.85398163397448278999e-01,
2253         9.82793723247329054082e-01,
2254         1.57079632679489655800e+00,
2255     };
2256     static const double atanlo[] = {
2257         2.26987774529616870924e-17,
2258         3.06161699786838301793e-17,
2259         1.39033110312309984516e-17,
2260         6.12323399573676603587e-17,
2261     };
2262     static const double aT[] = {
2263         3.33333333333329318027e-01,
2264         -1.99999999998764832476e-01,
2265         1.42857142725034663711e-01,
2266         -1.11111104054623557880e-01,
2267         9.09088713343650656196e-02,
2268         -7.69187620504482999495e-02,
2269         6.66107313738753120669e-02,
2270         -5.83357013379057348645e-02,
2271         4.97687799461593236017e-02,
2272         -3.65315727442169155270e-02,
2273         1.62858201153657823623e-02,
2274     };
2275
2276     double w, s1, s2, z;
2277     unsigned int ix, sign;
2278     int id;
2279
2280 #if _MSVCR_VER == 0
2281     if (isnan(x)) return math_error(_DOMAIN, "atan", x, 0, x);
2282 #endif
2283
2284     ix = *(ULONGLONG*)&x >> 32;
2285     sign = ix >> 31;
2286     ix &= 0x7fffffff;
2287     if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
2288         if (isnan(x))
2289             return x;
2290         z = atanhi[3] + 7.5231638452626401e-37;
2291         return sign ? -z : z;
2292     }
2293     if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
2294         if (ix < 0x3e400000) {  /* |x| < 2^-27 */
2295             if (ix < 0x00100000)
2296                 /* raise underflow for subnormal x */
2297                 fp_barrierf((float)x);
2298             return x;
2299         }
2300         id = -1;
2301     } else {
2302         x = fabs(x);
2303         if (ix < 0x3ff30000) {  /* |x| < 1.1875 */
2304             if (ix < 0x3fe60000) {  /*  7/16 <= |x| < 11/16 */
2305                 id = 0;
2306                 x = (2.0 * x - 1.0) / (2.0 + x);
2307             } else {                /* 11/16 <= |x| < 19/16 */
2308                 id = 1;
2309                 x = (x - 1.0) / (x + 1.0);
2310             }
2311         } else {
2312             if (ix < 0x40038000) {  /* |x| < 2.4375 */
2313                 id = 2;
2314                 x = (x - 1.5) / (1.0 + 1.5 * x);
2315             } else {                /* 2.4375 <= |x| < 2^66 */
2316                 id = 3;
2317                 x = -1.0 / x;
2318             }
2319         }
2320     }
2321     /* end of argument reduction */
2322     z = x * x;
2323     w = z * z;
2324     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
2325     s1 = z * (aT[0] + w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
2326     s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
2327     if (id < 0)
2328         return x - x * (s1 + s2);
2329     z = atanhi[id] - (x * (s1 + s2) - atanlo[id] - x);
2330     return sign ? -z : z;
2331 }
2332
2333 /*********************************************************************
2334  *              atan2 (MSVCRT.@)
2335  *
2336  * Copied from musl: src/math/atan2.c
2337  */
2338 double CDECL atan2( double y, double x )
2339 {
2340     static const double pi     = 3.1415926535897931160E+00,
2341                  pi_lo  = 1.2246467991473531772E-16;
2342
2343     double z;
2344     unsigned int m, lx, ly, ix, iy;
2345
2346     if (isnan(x) || isnan(y))
2347         return x+y;
2348     ix = *(ULONGLONG*)&x >> 32;
2349     lx = *(ULONGLONG*)&x;
2350     iy = *(ULONGLONG*)&y >> 32;
2351     ly = *(ULONGLONG*)&y;
2352     if (((ix - 0x3ff00000) | lx) == 0)  /* x = 1.0 */
2353         return atan(y);
2354     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
2355     ix = ix & 0x7fffffff;
2356     iy = iy & 0x7fffffff;
2357
2358     /* when y = 0 */
2359     if ((iy | ly) == 0) {
2360         switch(m) {
2361         case 0:
2362         case 1: return y;   /* atan(+-0,+anything)=+-0 */
2363         case 2: return pi;  /* atan(+0,-anything) = pi */
2364         case 3: return -pi; /* atan(-0,-anything) =-pi */
2365         }
2366     }
2367     /* when x = 0 */
2368     if ((ix | lx) == 0)
2369         return m & 1 ? -pi / 2 : pi / 2;
2370     /* when x is INF */
2371     if (ix == 0x7ff00000) {
2372         if (iy == 0x7ff00000) {
2373             switch(m) {
2374             case 0: return pi / 4;      /* atan(+INF,+INF) */
2375             case 1: return -pi / 4;     /* atan(-INF,+INF) */
2376             case 2: return 3 * pi / 4;  /* atan(+INF,-INF) */
2377             case 3: return -3 * pi / 4; /* atan(-INF,-INF) */
2378             }
2379         } else {
2380             switch(m) {
2381             case 0: return 0.0;  /* atan(+...,+INF) */
2382             case 1: return -0.0; /* atan(-...,+INF) */
2383             case 2: return pi;   /* atan(+...,-INF) */
2384             case 3: return -pi;  /* atan(-...,-INF) */
2385             }
2386         }
2387     }
2388     /* |y/x| > 0x1p64 */
2389     if (ix + (64 << 20) < iy || iy == 0x7ff00000)
2390         return m & 1 ? -pi / 2 : pi / 2;
2391
2392     /* z = atan(|y/x|) without spurious underflow */
2393     if ((m & 2) && iy + (64 << 20) < ix)  /* |y/x| < 0x1p-64, x<0 */
2394         z = 0;
2395     else
2396         z = atan(fabs(y / x));
2397     switch (m) {
2398     case 0: return z;                /* atan(+,+) */
2399     case 1: return -z;               /* atan(-,+) */
2400     case 2: return pi - (z - pi_lo); /* atan(+,-) */
2401     default: /* case 3 */
2402         return (z - pi_lo) - pi;     /* atan(-,-) */
2403     }
2404 }
2405
2406 /* Copied from musl: src/math/rint.c */
2407 static double __rint(double x)
2408 {
2409     static const double toint = 1 / DBL_EPSILON;
2410
2411     ULONGLONG llx = *(ULONGLONG*)&x;
2412     int e = llx >> 52 & 0x7ff;
2413     int s = llx >> 63;
2414     unsigned cw;
2415     double y;
2416
2417     if (e >= 0x3ff+52)
2418         return x;
2419     cw = _controlfp(0, 0);
2420     if ((cw & _MCW_PC) != _PC_53)
2421         _controlfp(_PC_53, _MCW_PC);
2422     if (s)
2423         y = fp_barrier(x - toint) + toint;
2424     else
2425         y = fp_barrier(x + toint) - toint;
2426     if ((cw & _MCW_PC) != _PC_53)
2427         _controlfp(cw, _MCW_PC);
2428     if (y == 0)
2429         return s ? -0.0 : 0;
2430     return y;
2431 }
2432
2433 /* Copied from musl: src/math/__rem_pio2.c */
2434 static int __rem_pio2(double x, double *y)
2435 {
2436     static const double pio4    = 0x1.921fb54442d18p-1,
2437                  invpio2 = 6.36619772367581382433e-01,
2438                  pio2_1  = 1.57079632673412561417e+00,
2439                  pio2_1t = 6.07710050650619224932e-11,
2440                  pio2_2  = 6.07710050630396597660e-11,
2441                  pio2_2t = 2.02226624879595063154e-21,
2442                  pio2_3  = 2.02226624871116645580e-21,
2443                  pio2_3t = 8.47842766036889956997e-32;
2444
2445     union {double f; UINT64 i;} u = {x};
2446     double z, w, t, r, fn, tx[3], ty[2];
2447     UINT32 ix;
2448     int sign, n, ex, ey, i;
2449
2450     sign = u.i >> 63;
2451     ix = u.i >> 32 & 0x7fffffff;
2452     if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
2453         if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
2454             goto medium; /* cancellation -- use medium case */
2455         if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
2456             if (!sign) {
2457                 z = x - pio2_1; /* one round good to 85 bits */
2458                 y[0] = z - pio2_1t;
2459                 y[1] = (z - y[0]) - pio2_1t;
2460                 return 1;
2461             } else {
2462                 z = x + pio2_1;
2463                 y[0] = z + pio2_1t;
2464                 y[1] = (z - y[0]) + pio2_1t;
2465                 return -1;
2466             }
2467         } else {
2468             if (!sign) {
2469                 z = x - 2 * pio2_1;
2470                 y[0] = z - 2 * pio2_1t;
2471                 y[1] = (z - y[0]) - 2 * pio2_1t;
2472                 return 2;
2473             } else {
2474                 z = x + 2 * pio2_1;
2475                 y[0] = z + 2 * pio2_1t;
2476                 y[1] = (z - y[0]) + 2 * pio2_1t;
2477                 return -2;
2478             }
2479         }
2480     }
2481     if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
2482         if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
2483             if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
2484                 goto medium;
2485             if (!sign) {
2486                 z = x - 3 * pio2_1;
2487                 y[0] = z - 3 * pio2_1t;
2488                 y[1] = (z - y[0]) - 3 * pio2_1t;
2489                 return 3;
2490             } else {
2491                 z = x + 3 * pio2_1;
2492                 y[0] = z + 3 * pio2_1t;
2493                 y[1] = (z - y[0]) + 3 * pio2_1t;
2494                 return -3;
2495             }
2496         } else {
2497             if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
2498                 goto medium;
2499             if (!sign) {
2500                 z = x - 4 * pio2_1;
2501                 y[0] = z - 4 * pio2_1t;
2502                 y[1] = (z - y[0]) - 4 * pio2_1t;
2503                 return 4;
2504             } else {
2505                 z = x + 4 * pio2_1;
2506                 y[0] = z + 4 * pio2_1t;
2507                 y[1] = (z - y[0]) + 4 * pio2_1t;
2508                 return -4;
2509             }
2510         }
2511     }
2512     if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
2513 medium:
2514         fn = __rint(x * invpio2);
2515         n = (INT32)fn;
2516         r = x - fn * pio2_1;
2517         w = fn * pio2_1t; /* 1st round, good to 85 bits */
2518         /* Matters with directed rounding. */
2519         if (r - w < -pio4) {
2520             n--;
2521             fn--;
2522             r = x - fn * pio2_1;
2523             w = fn * pio2_1t;
2524         } else if (r - w > pio4) {
2525             n++;
2526             fn++;
2527             r = x - fn * pio2_1;
2528             w = fn * pio2_1t;
2529         }
2530         y[0] = r - w;
2531         u.f = y[0];
2532         ey = u.i >> 52 & 0x7ff;
2533         ex = ix >> 20;
2534         if (ex - ey > 16) { /* 2nd round, good to 118 bits */
2535             t = r;
2536             w = fn * pio2_2;
2537             r = t - w;
2538             w = fn * pio2_2t - ((t - r) - w);
2539             y[0] = r - w;
2540             u.f = y[0];
2541             ey = u.i >> 52 & 0x7ff;
2542             if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
2543                 t = r;
2544                 w = fn * pio2_3;
2545                 r = t - w;
2546                 w = fn * pio2_3t - ((t - r) - w);
2547                 y[0] = r - w;
2548             }
2549         }
2550         y[1] = (r - y[0]) - w;
2551         return n;
2552     }
2553     /*
2554      * all other (large) arguments
2555      */
2556     if (ix >= 0x7ff00000) {  /* x is inf or NaN */
2557         y[0] = y[1] = x - x;
2558         return 0;
2559     }
2560     /* set z = scalbn(|x|,-ilogb(x)+23) */
2561     u.f = x;
2562     u.i &= (UINT64)-1 >> 12;
2563     u.i |= (UINT64)(0x3ff + 23) << 52;
2564     z = u.f;
2565     for (i = 0; i < 2; i++) {
2566         tx[i] = (double)(INT32)z;
2567         z = (z - tx[i]) * 0x1p24;
2568     }
2569     tx[i] = z;
2570     /* skip zero terms, first term is non-zero */
2571     while (tx[i] == 0.0)
2572         i--;
2573     n = __rem_pio2_large(tx, ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1);
2574     if (sign) {
2575         y[0] = -ty[0];
2576         y[1] = -ty[1];
2577         return -n;
2578     }
2579     y[0] = ty[0];
2580     y[1] = ty[1];
2581     return n;
2582 }
2583
2584 /* Copied from musl: src/math/__sin.c */
2585 static double __sin(double x, double y, int iy)
2586 {
2587     static const double S1  = -1.66666666666666324348e-01,
2588                  S2  =  8.33333333332248946124e-03,
2589                  S3  = -1.98412698298579493134e-04,
2590                  S4  =  2.75573137070700676789e-06,
2591                  S5  = -2.50507602534068634195e-08,
2592                  S6  =  1.58969099521155010221e-10;
2593
2594     double z, r, v, w;
2595
2596     z = x * x;
2597     w = z * z;
2598     r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
2599     v = z * x;
2600     if (iy == 0)
2601         return x + v * (S1 + z * r);
2602     else
2603         return x - ((z * (0.5 * y - v * r) - y) - v * S1);
2604 }
2605
2606 /* Copied from musl: src/math/__cos.c */
2607 static double __cos(double x, double y)
2608 {
2609     static const double C1  =  4.16666666666666019037e-02,
2610                  C2  = -1.38888888888741095749e-03,
2611                  C3  =  2.48015872894767294178e-05,
2612                  C4  = -2.75573143513906633035e-07,
2613                  C5  =  2.08757232129817482790e-09,
2614                  C6  = -1.13596475577881948265e-11;
2615     double hz, z, r, w;
2616
2617     z = x * x;
2618     w = z * z;
2619     r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6));
2620     hz = 0.5 * z;
2621     w = 1.0 - hz;
2622     return w + (((1.0 - w) - hz) + (z * r - x * y));
2623 }
2624
2625 /*********************************************************************
2626  *              cos (MSVCRT.@)
2627  *
2628  * Copied from musl: src/math/cos.c
2629  */
2630 double CDECL cos( double x )
2631 {
2632     double y[2];
2633     UINT32 ix;
2634     unsigned n;
2635
2636     ix = *(ULONGLONG*)&x >> 32;
2637     ix &= 0x7fffffff;
2638
2639     /* |x| ~< pi/4 */
2640     if (ix <= 0x3fe921fb) {
2641         if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
2642             /* raise inexact if x!=0 */
2643             fp_barrier(x + 0x1p120f);
2644             return 1.0;
2645         }
2646         return __cos(x, 0);
2647     }
2648
2649     /* cos(Inf or NaN) is NaN */
2650     if (isinf(x)) return math_error(_DOMAIN, "cos", x, 0, x - x);
2651     if (ix >= 0x7ff00000)
2652         return x - x;
2653
2654     /* argument reduction */
2655     n = __rem_pio2(x, y);
2656     switch (n & 3) {
2657     case 0: return __cos(y[0], y[1]);
2658     case 1: return -__sin(y[0], y[1], 1);
2659     case 2: return -__cos(y[0], y[1]);
2660     default: return __sin(y[0], y[1], 1);
2661     }
2662 }
2663
2664 /* Copied from musl: src/math/expm1.c */
2665 static double CDECL __expm1(double x)
2666 {
2667     static const double o_threshold = 7.09782712893383973096e+02,
2668         ln2_hi = 6.93147180369123816490e-01,
2669         ln2_lo = 1.90821492927058770002e-10,
2670         invln2 = 1.44269504088896338700e+00,
2671         Q1 = -3.33333333333331316428e-02,
2672         Q2 = 1.58730158725481460165e-03,
2673         Q3 = -7.93650757867487942473e-05,
2674         Q4 = 4.00821782732936239552e-06,
2675         Q5 = -2.01099218183624371326e-07;
2676
2677     double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2678     union {double f; UINT64 i;} u = {x};
2679     UINT32 hx = u.i >> 32 & 0x7fffffff;
2680     int k, sign = u.i >> 63;
2681
2682     /* filter out huge and non-finite argument */
2683     if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2684         if (isnan(x))
2685             return x;
2686         if (isinf(x))
2687             return sign ? -1 : x;
2688         if (sign)
2689             return math_error(_UNDERFLOW, "exp", x, 0, -1);
2690         if (x > o_threshold)
2691             return math_error(_OVERFLOW, "exp", x, 0, x * 0x1p1023);
2692     }
2693
2694     /* argument reduction */
2695     if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2696         if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2697             if (!sign) {
2698                 hi = x - ln2_hi;
2699                 lo = ln2_lo;
2700                 k = 1;
2701             } else {
2702                 hi = x + ln2_hi;
2703                 lo = -ln2_lo;
2704                 k = -1;
2705             }
2706         } else {
2707             k = invln2 * x + (sign ? -0.5 : 0.5);
2708             t = k;
2709             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2710             lo = t * ln2_lo;
2711         }
2712         x = hi - lo;
2713         c = (hi - x) - lo;
2714     } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
2715         fp_barrier(x + 0x1p120f);
2716         if (hx < 0x00100000)
2717             fp_barrier((float)x);
2718         return x;
2719     } else
2720         k = 0;
2721
2722     /* x is now in primary range */
2723     hfx = 0.5 * x;
2724     hxs = x * hfx;
2725     r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2726     t = 3.0 - r1 * hfx;
2727     e = hxs * ((r1 - t) / (6.0 - x * t));
2728     if (k == 0) /* c is 0 */
2729         return x - (x * e - hxs);
2730     e = x * (e - c) - c;
2731     e -= hxs;
2732     /* exp(x) ~ 2^k (x_reduced - e + 1) */
2733     if (k == -1)
2734         return 0.5 * (x - e) - 0.5;
2735     if (k == 1) {
2736         if (x < -0.25)
2737             return -2.0 * (e - (x + 0.5));
2738         return 1.0 + 2.0 * (x - e);
2739     }
2740     u.i = (UINT64)(0x3ff + k) << 52; /* 2^k */
2741     twopk = u.f;
2742     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
2743         y = x - e + 1.0;
2744         if (k == 1024)
2745             y = y * 2.0 * 0x1p1023;
2746         else
2747             y = y * twopk;
2748         return y - 1.0;
2749     }
2750     u.i = (UINT64)(0x3ff - k) << 52; /* 2^-k */
2751     if (k < 20)
2752         y = (x - e + (1 - u.f)) * twopk;
2753     else
2754         y = (x - (e + u.f) + 1) * twopk;
2755     return y;
2756 }
2757
2758 static double __expo2(double x, double sign)
2759 {
2760     static const int k = 2043;
2761     static const double kln2 = 0x1.62066151add8bp+10;
2762     double scale;
2763
2764     *(UINT64*)&scale = (UINT64)(0x3ff + k / 2) << 52;
2765     return exp(x - kln2) * (sign * scale) * scale;
2766 }
2767
2768 /*********************************************************************
2769  *              cosh (MSVCRT.@)
2770  *
2771  * Copied from musl: src/math/cosh.c
2772  */
2773 double CDECL cosh( double x )
2774 {
2775     UINT64 ux = *(UINT64*)&x;
2776     UINT64 sign = ux & 0x8000000000000000ULL;
2777     UINT32 w;
2778     double t;
2779
2780     /* |x| */
2781     ux &= (uint64_t)-1 / 2;
2782     x = *(double*)&ux;
2783     w = ux >> 32;
2784
2785     /* |x| < log(2) */
2786     if (w < 0x3fe62e42) {
2787         if (w < 0x3ff00000 - (26 << 20)) {
2788             fp_barrier(x + 0x1p120f);
2789             return 1;
2790         }
2791         t = __expm1(x);
2792         return 1 + t * t / (2 * (1 + t));
2793     }
2794
2795     /* |x| < log(DBL_MAX) */
2796     if (w < 0x40862e42) {
2797         t = exp(x);
2798         /* note: if x>log(0x1p26) then the 1/t is not needed */
2799         return 0.5 * (t + 1 / t);
2800     }
2801
2802     /* |x| > log(DBL_MAX) or nan */
2803     /* note: the result is stored to handle overflow */
2804     if (ux > 0x7ff0000000000000ULL)
2805         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
2806     else
2807         t = __expo2(x, 1.0);
2808     return t;
2809 }
2810
2811 /* Copied from musl: src/math/exp_data.c */
2812 static const UINT64 exp_T[] = {
2813     0x0ULL, 0x3ff0000000000000ULL,
2814     0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL,
2815     0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL,
2816     0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL,
2817     0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL,
2818     0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL,
2819     0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL,
2820     0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL,
2821     0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL,
2822     0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL,
2823     0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL,
2824     0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL,
2825     0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL,
2826     0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL,
2827     0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL,
2828     0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL,
2829     0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL,
2830     0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL,
2831     0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL,
2832     0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL,
2833     0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL,
2834     0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL,
2835     0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL,
2836     0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL,
2837     0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL,
2838     0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL,
2839     0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL,
2840     0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL,
2841     0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL,
2842     0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL,
2843     0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL,
2844     0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL,
2845     0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL,
2846     0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL,
2847     0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL,
2848     0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL,
2849     0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL,
2850     0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL,
2851     0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL,
2852     0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL,
2853     0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL,
2854     0xbc9312607a28698aULL, 0x3feeda4504ac801cULL,
2855     0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL,
2856     0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL,
2857     0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL,
2858     0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL,
2859     0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL,
2860     0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL,
2861     0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL,
2862     0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL,
2863     0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL,
2864     0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL,
2865     0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL,
2866     0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL,
2867     0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL,
2868     0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL,
2869     0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL,
2870     0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL,
2871     0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL,
2872     0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL,
2873     0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL,
2874     0xbc845378892be9aeULL, 0x3feea34634ccc320ULL,
2875     0xbc93cedd78565858ULL, 0x3feea23882552225ULL,
2876     0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL,
2877     0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL,
2878     0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL,
2879     0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL,
2880     0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL,
2881     0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL,
2882     0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL,
2883     0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL,
2884     0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL,
2885     0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL,
2886     0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL,
2887     0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL,
2888     0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL,
2889     0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL,
2890     0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL,
2891     0xbc94d450d872576eULL, 0x3feea8d99b4492edULL,
2892     0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL,
2893     0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL,
2894     0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL,
2895     0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL,
2896     0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL,
2897     0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL,
2898     0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL,
2899     0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL,
2900     0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL,
2901     0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL,
2902     0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL,
2903     0xbc87c50422622263ULL, 0x3feecc667b5de565ULL,
2904     0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL,
2905     0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL,
2906     0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL,
2907     0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL,
2908     0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL,
2909     0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL,
2910     0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL,
2911     0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL,
2912     0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL,
2913     0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL,
2914     0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL,
2915     0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL,
2916     0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL,
2917     0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL,
2918     0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL,
2919     0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL,
2920     0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL,
2921     0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL,
2922     0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL,
2923     0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL,
2924     0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL,
2925     0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL,
2926     0xbc82919e2040220fULL, 0x3fef60e316c98398ULL,
2927     0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL,
2928     0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL,
2929     0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL,
2930     0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL,
2931     0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL,
2932     0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL,
2933     0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL,
2934     0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL,
2935     0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL,
2936     0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL,
2937     0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL,
2938     0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL,
2939     0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL,
2940     0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL
2941 };
2942
2943 /*********************************************************************
2944  *              exp (MSVCRT.@)
2945  *
2946  * Copied from musl: src/math/exp.c
2947  */
2948 double CDECL exp( double x )
2949 {
2950     static const double C[] = {
2951         0x1.ffffffffffdbdp-2,
2952         0x1.555555555543cp-3,
2953         0x1.55555cf172b91p-5,
2954         0x1.1111167a4d017p-7
2955     };
2956     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
2957         negln2hiN = -0x1.62e42fefa0000p-8,
2958         negln2loN = -0x1.cf79abc9e3b3ap-47;
2959
2960     UINT32 abstop;
2961     UINT64 ki, idx, top, sbits;
2962     double kd, z, r, r2, scale, tail, tmp;
2963
2964     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
2965     if (abstop -  0x3c9 >= 0x408 - 0x3c9) {
2966         if (abstop - 0x3c9 >= 0x80000000)
2967             /* Avoid spurious underflow for tiny x. */
2968             /* Note: 0 is common input. */
2969             return 1.0 + x;
2970         if (abstop >= 0x409) {
2971             if (*(UINT64*)&x == 0xfff0000000000000ULL)
2972                 return 0.0;
2973 #if _MSVCR_VER == 0
2974             if (*(UINT64*)&x > 0x7ff0000000000000ULL)
2975                 return math_error(_DOMAIN, "exp", x, 0, 1.0 + x);
2976 #endif
2977             if (abstop >= 0x7ff)
2978                 return 1.0 + x;
2979             if (*(UINT64*)&x >> 63)
2980                 return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN);
2981             else
2982                 return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX);
2983         }
2984         /* Large x is special cased below. */
2985         abstop = 0;
2986     }
2987
2988     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
2989     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
2990     z = invln2N * x;
2991     kd = __round(z);
2992     ki = (INT64)kd;
2993
2994     r = x + kd * negln2hiN + kd * negln2loN;
2995     /* 2^(k/N) ~= scale * (1 + tail). */
2996     idx = 2 * (ki % (1 << 7));
2997     top = ki << (52 - 7);
2998     tail = *(double*)&exp_T[idx];
2999     /* This is only a valid scale when -1023*N < k < 1024*N. */
3000     sbits = exp_T[idx + 1] + top;
3001     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3002     /* Evaluation is optimized assuming superscalar pipelined execution. */
3003     r2 = r * r;
3004     /* Without fma the worst case error is 0.25/N ulp larger. */
3005     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3006     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3007     if (abstop == 0) {
3008         /* Handle cases that may overflow or underflow when computing the result that
3009            is scale*(1+TMP) without intermediate rounding. The bit representation of
3010            scale is in SBITS, however it has a computed exponent that may have
3011            overflown into the sign bit so that needs to be adjusted before using it as
3012            a double. (int32_t)KI is the k used in the argument reduction and exponent
3013            adjustment of scale, positive k here means the result may overflow and
3014            negative k means the result may underflow. */
3015         double scale, y;
3016
3017         if ((ki & 0x80000000) == 0) {
3018             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3019             sbits -= 1009ull << 52;
3020             scale = *(double*)&sbits;
3021             y = 0x1p1009 * (scale + scale * tmp);
3022             if (isinf(y))
3023                 return math_error(_OVERFLOW, "exp", x, 0, y);
3024             return y;
3025         }
3026         /* k < 0, need special care in the subnormal range. */
3027         sbits += 1022ull << 52;
3028         scale = *(double*)&sbits;
3029         y = scale + scale * tmp;
3030         if (y < 1.0) {
3031             /* Round y to the right precision before scaling it into the subnormal
3032                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3033                E is the worst-case ulp error outside the subnormal range. So this
3034                is only useful if the goal is better than 1 ulp worst-case error. */
3035             double hi, lo;
3036             lo = scale - y + scale * tmp;
3037             hi = 1.0 + y;
3038             lo = 1.0 - hi + y + lo;
3039             y = hi + lo - 1.0;
3040             /* Avoid -0.0 with downward rounding. */
3041             if (y == 0.0)
3042                 y = 0.0;
3043             /* The underflow exception needs to be signaled explicitly. */
3044             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3045             y = 0x1p-1022 * y;
3046             return math_error(_UNDERFLOW, "exp", x, 0, y);
3047         }
3048         y = 0x1p-1022 * y;
3049         return y;
3050     }
3051     scale = *(double*)&sbits;
3052     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3053        is no spurious underflow here even without fma. */
3054     return scale + scale * tmp;
3055 }
3056
3057 /*********************************************************************
3058  *              fmod (MSVCRT.@)
3059  *
3060  * Copied from musl: src/math/fmod.c
3061  */
3062 double CDECL fmod( double x, double y )
3063 {
3064     UINT64 xi = *(UINT64*)&x;
3065     UINT64 yi = *(UINT64*)&y;
3066     int ex = xi >> 52 & 0x7ff;
3067     int ey = yi >> 52 & 0x7ff;
3068     int sx = xi >> 63;
3069     UINT64 i;
3070
3071     if (isinf(x)) return math_error(_DOMAIN, "fmod", x, y, (x * y) / (x * y));
3072     if (yi << 1 == 0 || isnan(y) || ex == 0x7ff)
3073         return (x * y) / (x * y);
3074     if (xi << 1 <= yi << 1) {
3075         if (xi << 1 == yi << 1)
3076             return 0 * x;
3077         return x;
3078     }
3079
3080     /* normalize x and y */
3081     if (!ex) {
3082         for (i = xi << 12; i >> 63 == 0; ex--, i <<= 1);
3083         xi <<= -ex + 1;
3084     } else {
3085         xi &= -1ULL >> 12;
3086         xi |= 1ULL << 52;
3087     }
3088     if (!ey) {
3089         for (i = yi << 12; i >> 63 == 0; ey--, i <<= 1);
3090         yi <<= -ey + 1;
3091     } else {
3092         yi &= -1ULL >> 12;
3093         yi |= 1ULL << 52;
3094     }
3095
3096     /* x mod y */
3097     for (; ex > ey; ex--) {
3098         i = xi - yi;
3099         if (i >> 63 == 0) {
3100             if (i == 0)
3101                 return 0 * x;
3102             xi = i;
3103         }
3104         xi <<= 1;
3105     }
3106     i = xi - yi;
3107     if (i >> 63 == 0) {
3108         if (i == 0)
3109             return 0 * x;
3110         xi = i;
3111     }
3112     for (; xi >> 52 == 0; xi <<= 1, ex--);
3113
3114     /* scale result */
3115     if (ex > 0) {
3116         xi -= 1ULL << 52;
3117         xi |= (UINT64)ex << 52;
3118     } else {
3119         xi >>= -ex + 1;
3120     }
3121     xi |= (UINT64)sx << 63;
3122     return *(double*)&xi;
3123 }
3124
3125 /*********************************************************************
3126  *              log (MSVCRT.@)
3127  *
3128  * Copied from musl: src/math/log.c src/math/log_data.c
3129  */
3130 double CDECL log( double x )
3131 {
3132     static const double Ln2hi = 0x1.62e42fefa3800p-1,
3133         Ln2lo = 0x1.ef35793c76730p-45;
3134     static const double A[] = {
3135         -0x1.0000000000001p-1,
3136         0x1.555555551305bp-2,
3137         -0x1.fffffffeb459p-3,
3138         0x1.999b324f10111p-3,
3139         -0x1.55575e506c89fp-3
3140     };
3141     static const double B[] = {
3142         -0x1p-1,
3143         0x1.5555555555577p-2,
3144         -0x1.ffffffffffdcbp-3,
3145         0x1.999999995dd0cp-3,
3146         -0x1.55555556745a7p-3,
3147         0x1.24924a344de3p-3,
3148         -0x1.fffffa4423d65p-4,
3149         0x1.c7184282ad6cap-4,
3150         -0x1.999eb43b068ffp-4,
3151         0x1.78182f7afd085p-4,
3152         -0x1.5521375d145cdp-4
3153     };
3154     static const struct {
3155         double invc, logc;
3156     } T[] = {
3157         {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
3158         {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
3159         {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
3160         {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
3161         {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
3162         {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
3163         {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
3164         {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
3165         {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
3166         {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
3167         {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
3168         {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
3169         {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
3170         {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
3171         {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
3172         {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
3173         {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
3174         {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
3175         {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
3176         {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
3177         {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
3178         {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
3179         {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
3180         {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
3181         {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
3182         {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
3183         {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
3184         {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
3185         {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
3186         {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
3187         {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
3188         {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
3189         {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
3190         {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
3191         {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
3192         {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
3193         {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
3194         {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
3195         {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
3196         {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
3197         {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
3198         {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
3199         {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
3200         {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
3201         {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
3202         {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
3203         {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
3204         {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
3205         {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
3206         {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
3207         {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
3208         {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
3209         {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
3210         {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
3211         {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
3212         {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
3213         {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
3214         {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
3215         {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
3216         {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
3217         {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
3218         {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
3219         {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
3220         {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
3221         {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
3222         {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
3223         {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
3224         {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
3225         {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
3226         {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
3227         {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
3228         {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
3229         {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
3230         {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
3231         {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
3232         {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
3233         {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
3234         {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
3235         {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
3236         {0x1.008040614b195p+0, -0x1.0040979240000p-9},
3237         {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
3238         {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
3239         {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
3240         {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
3241         {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
3242         {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
3243         {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
3244         {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
3245         {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
3246         {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
3247         {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
3248         {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
3249         {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
3250         {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
3251         {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
3252         {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
3253         {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
3254         {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
3255         {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
3256         {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
3257         {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
3258         {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
3259         {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
3260         {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
3261         {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
3262         {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
3263         {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
3264         {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
3265         {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
3266         {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
3267         {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
3268         {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
3269         {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
3270         {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
3271         {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
3272         {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
3273         {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
3274         {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
3275         {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
3276         {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
3277         {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
3278         {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
3279         {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
3280         {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
3281         {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
3282         {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
3283         {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
3284         {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}
3285     };
3286     static const struct {
3287         double chi, clo;
3288     } T2[] = {
3289         {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
3290         {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
3291         {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
3292         {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
3293         {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
3294         {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
3295         {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
3296         {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
3297         {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
3298         {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
3299         {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
3300         {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
3301         {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
3302         {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
3303         {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
3304         {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
3305         {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
3306         {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
3307         {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
3308         {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
3309         {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
3310         {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
3311         {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
3312         {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
3313         {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
3314         {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
3315         {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
3316         {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
3317         {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
3318         {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
3319         {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
3320         {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
3321         {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
3322         {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
3323         {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
3324         {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
3325         {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
3326         {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
3327         {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
3328         {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
3329         {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
3330         {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
3331         {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
3332         {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
3333         {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
3334         {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
3335         {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
3336         {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
3337         {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
3338         {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
3339         {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
3340         {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
3341         {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
3342         {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
3343         {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
3344         {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
3345         {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
3346         {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
3347         {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
3348         {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
3349         {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
3350         {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
3351         {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
3352         {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
3353         {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
3354         {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
3355         {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
3356         {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
3357         {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
3358         {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
3359         {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
3360         {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
3361         {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
3362         {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
3363         {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
3364         {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
3365         {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
3366         {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
3367         {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
3368         {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
3369         {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
3370         {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
3371         {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
3372         {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
3373         {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
3374         {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
3375         {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
3376         {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
3377         {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
3378         {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
3379         {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
3380         {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
3381         {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
3382         {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
3383         {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
3384         {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
3385         {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
3386         {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
3387         {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
3388         {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
3389         {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
3390         {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
3391         {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
3392         {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
3393         {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
3394         {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
3395         {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
3396         {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
3397         {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
3398         {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
3399         {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
3400         {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
3401         {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
3402         {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
3403         {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
3404         {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
3405         {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
3406         {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
3407         {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
3408         {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
3409         {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
3410         {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
3411         {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
3412         {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
3413         {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
3414         {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
3415         {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
3416         {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}
3417     };
3418
3419     double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
3420     UINT64 ix, iz, tmp;
3421     UINT32 top;
3422     int k, i;
3423
3424     ix = *(UINT64*)&x;
3425     top = ix >> 48;
3426     if (ix - 0x3fee000000000000ULL < 0x3090000000000ULL) {
3427         double rhi, rlo;
3428
3429         /* Handle close to 1.0 inputs separately. */
3430         /* Fix sign of zero with downward rounding when x==1. */
3431         if (ix == 0x3ff0000000000000ULL)
3432             return 0;
3433         r = x - 1.0;
3434         r2 = r * r;
3435         r3 = r * r2;
3436         y = r3 * (B[1] + r * B[2] + r2 * B[3] + r3 * (B[4] + r * B[5] + r2 * B[6] +
3437                     r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
3438         /* Worst-case error is around 0.507 ULP. */
3439         w = r * 0x1p27;
3440         rhi = r + w - w;
3441         rlo = r - rhi;
3442         w = rhi * rhi * B[0]; /* B[0] == -0.5. */
3443         hi = r + w;
3444         lo = r - hi + w;
3445         lo += B[0] * rlo * (rhi + r);
3446         y += lo;
3447         y += hi;
3448         return y;
3449     }
3450     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
3451         /* x < 0x1p-1022 or inf or nan. */
3452         if (ix * 2 == 0)
3453             return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x);
3454         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */
3455             return x;
3456         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
3457             return x;
3458         if (top & 0x8000)
3459             return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x));
3460         /* x is subnormal, normalize it. */
3461         x *= 0x1p52;
3462         ix = *(UINT64*)&x;
3463         ix -= 52ULL << 52;
3464     }
3465
3466     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3467        The range is split into N subintervals.
3468        The ith subinterval contains z and c is near its center. */
3469     tmp = ix - 0x3fe6000000000000ULL;
3470     i = (tmp >> (52 - 7)) % (1 << 7);
3471     k = (INT64)tmp >> 52; /* arithmetic shift */
3472     iz = ix - (tmp & 0xfffULL << 52);
3473     invc = T[i].invc;
3474     logc = T[i].logc;
3475     z = *(double*)&iz;
3476
3477     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
3478     /* r ~= z/c - 1, |r| < 1/(2*N). */
3479     r = (z - T2[i].chi - T2[i].clo) * invc;
3480     kd = (double)k;
3481
3482     /* hi + lo = r + log(c) + k*Ln2. */
3483     w = kd * Ln2hi + logc;
3484     hi = w + r;
3485     lo = w - hi + r + kd * Ln2lo;
3486
3487     /* log(x) = lo + (log1p(r) - r) + hi. */
3488     r2 = r * r; /* rounding error: 0x1p-54/N^2. */
3489     /* Worst case error if |y| > 0x1p-5:
3490        0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
3491        Worst case error if |y| > 0x1p-4:
3492        0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
3493     y = lo + r2 * A[0] +
3494         r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
3495     return y;
3496 }
3497
3498 /*********************************************************************
3499  *              log10 (MSVCRT.@)
3500  */
3501 double CDECL log10( double x )
3502 {
3503     static const double ivln10hi = 4.34294481878168880939e-01,
3504         ivln10lo = 2.50829467116452752298e-11,
3505         log10_2hi = 3.01029995663611771306e-01,
3506         log10_2lo = 3.69423907715893078616e-13,
3507         Lg1 = 6.666666666666735130e-01,
3508         Lg2 = 3.999999999940941908e-01,
3509         Lg3 = 2.857142874366239149e-01,
3510         Lg4 = 2.222219843214978396e-01,
3511         Lg5 = 1.818357216161805012e-01,
3512         Lg6 = 1.531383769920937332e-01,
3513         Lg7 = 1.479819860511658591e-01;
3514
3515     union {double f; UINT64 i;} u = {x};
3516     double hfsq, f, s, z, R, w, t1, t2, dk, y, hi, lo, val_hi, val_lo;
3517     UINT32 hx;
3518     int k;
3519
3520     hx = u.i >> 32;
3521     k = 0;
3522     if (hx < 0x00100000 || hx >> 31) {
3523         if (u.i << 1 == 0)
3524             return math_error(_SING, "log10", x, 0, -1 / (x * x));
3525         if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL)
3526             return x;
3527         if (hx >> 31)
3528             return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x));
3529         /* subnormal number, scale x up */
3530         k -= 54;
3531         x *= 0x1p54;
3532         u.f = x;
3533         hx = u.i >> 32;
3534     } else if (hx >= 0x7ff00000) {
3535         return x;
3536     } else if (hx == 0x3ff00000 && u.i<<32 == 0)
3537         return 0;
3538
3539     /* reduce x into [sqrt(2)/2, sqrt(2)] */
3540     hx += 0x3ff00000 - 0x3fe6a09e;
3541     k += (int)(hx >> 20) - 0x3ff;
3542     hx = (hx & 0x000fffff) + 0x3fe6a09e;
3543     u.i = (UINT64)hx << 32 | (u.i & 0xffffffff);
3544     x = u.f;
3545
3546     f = x - 1.0;
3547     hfsq = 0.5 * f * f;
3548     s = f / (2.0 + f);
3549     z = s * s;
3550     w = z * z;
3551     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
3552     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
3553     R = t2 + t1;
3554
3555     /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
3556     hi = f - hfsq;
3557     u.f = hi;
3558     u.i &= (UINT64)-1 << 32;
3559     hi = u.f;
3560     lo = f - hi - hfsq + s * (hfsq + R);
3561
3562     /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
3563     val_hi = hi * ivln10hi;
3564     dk = k;
3565     y = dk * log10_2hi;
3566     val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
3567
3568     /*
3569      * Extra precision in for adding y is not strictly needed
3570      * since there is no very large cancellation near x = sqrt(2) or
3571      * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
3572      * with some parallelism and it reduces the error for many args.
3573      */
3574     w = y + val_hi;
3575     val_lo += (y - w) + val_hi;
3576     val_hi = w;
3577
3578     return val_lo + val_hi;
3579 }
3580
3581 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
3582    additional 15 bits precision. IX is the bit representation of x, but
3583    normalized in the subnormal range using the sign bit for the exponent. */
3584 static double pow_log(UINT64 ix, double *tail)
3585 {
3586     static const struct {
3587         double invc, logc, logctail;
3588     } T[] = {
3589         {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48},
3590         {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46},
3591         {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45},
3592         {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49},
3593         {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47},
3594         {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46},
3595         {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50},
3596         {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45},
3597         {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45},
3598         {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45},
3599         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3600         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3601         {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46},
3602         {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46},
3603         {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46},
3604         {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45},
3605         {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47},
3606         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3607         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3608         {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47},
3609         {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45},
3610         {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46},
3611         {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45},
3612         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3613         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3614         {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46},
3615         {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52},
3616         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3617         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3618         {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45},
3619         {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45},
3620         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3621         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3622         {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46},
3623         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3624         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3625         {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45},
3626         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3627         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3628         {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48},
3629         {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45},
3630         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3631         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3632         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3633         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3634         {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45},
3635         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3636         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3637         {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46},
3638         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3639         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3640         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3641         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3642         {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45},
3643         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3644         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3645         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3646         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3647         {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46},
3648         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3649         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3650         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3651         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3652         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3653         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3654         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3655         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3656         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3657         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3658         {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45},
3659         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3660         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3661         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3662         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3663         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3664         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3665         {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46},
3666         {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45},
3667         {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45},
3668         {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47},
3669         {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45},
3670         {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46},
3671         {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46},
3672         {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47},
3673         {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45},
3674         {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45},
3675         {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45},
3676         {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49},
3677         {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45},
3678         {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46},
3679         {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45},
3680         {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45},
3681         {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45},
3682         {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45},
3683         {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45},
3684         {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47},
3685         {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51},
3686         {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45},
3687         {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45},
3688         {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46},
3689         {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45},
3690         {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46},
3691         {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47},
3692         {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47},
3693         {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45},
3694         {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47},
3695         {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45},
3696         {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48},
3697         {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45},
3698         {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51},
3699         {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51},
3700         {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46},
3701         {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48},
3702         {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45},
3703         {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45},
3704         {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45},
3705         {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45},
3706         {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47},
3707         {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45},
3708         {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45},
3709         {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46},
3710         {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46},
3711         {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47},
3712         {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45},
3713         {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45},
3714         {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45},
3715         {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46},
3716         {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47},
3717     };
3718     static const double A[] = {
3719         -0x1p-1,
3720         0x1.555555555556p-2 * -2,
3721         -0x1.0000000000006p-2 * -2,
3722         0x1.999999959554ep-3 * 4,
3723         -0x1.555555529a47ap-3 * 4,
3724         0x1.2495b9b4845e9p-3 * -8,
3725         -0x1.0002b8b263fc3p-3 * -8
3726     };
3727     static const double ln2hi = 0x1.62e42fefa3800p-1,
3728         ln2lo = 0x1.ef35793c76730p-45;
3729
3730     double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
3731     double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2;
3732     UINT64 iz, tmp;
3733     int k, i;
3734
3735     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3736        The range is split into N subintervals.
3737        The ith subinterval contains z and c is near its center. */
3738     tmp = ix - 0x3fe6955500000000ULL;
3739     i = (tmp >> (52 - 7)) % (1 << 7);
3740     k = (INT64)tmp >> 52; /* arithmetic shift */
3741     iz = ix - (tmp & 0xfffULL << 52);
3742     z = *(double*)&iz;
3743     kd = k;
3744
3745     /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
3746     invc = T[i].invc;
3747     logc = T[i].logc;
3748     logctail = T[i].logctail;
3749
3750     /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
3751      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
3752     /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
3753     iz = (iz + (1ULL << 31)) & (-1ULL << 32);
3754     zhi = *(double*)&iz;
3755     zlo = z - zhi;
3756     rhi = zhi * invc - 1.0;
3757     rlo = zlo * invc;
3758     r = rhi + rlo;
3759
3760     /* k*Ln2 + log(c) + r. */
3761     t1 = kd * ln2hi + logc;
3762     t2 = t1 + r;
3763     lo1 = kd * ln2lo + logctail;
3764     lo2 = t1 - t2 + r;
3765
3766     /* Evaluation is optimized assuming superscalar pipelined execution. */
3767     ar = A[0] * r; /* A[0] = -0.5. */
3768     ar2 = r * ar;
3769     ar3 = r * ar2;
3770     /* k*Ln2 + log(c) + r + A[0]*r*r. */
3771     arhi = A[0] * rhi;
3772     arhi2 = rhi * arhi;
3773     hi = t2 + arhi2;
3774     lo3 = rlo * (ar + arhi);
3775     lo4 = t2 - hi + arhi2;
3776     /* p = log1p(r) - r - A[0]*r*r. */
3777     p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
3778     lo = lo1 + lo2 + lo3 + lo4 + p;
3779     y = hi + lo;
3780     *tail = hi - y + lo;
3781     return y;
3782 }
3783
3784 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
3785    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
3786 static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias)
3787 {
3788     static const double C[] = {
3789         0x1.ffffffffffdbdp-2,
3790         0x1.555555555543cp-3,
3791         0x1.55555cf172b91p-5,
3792         0x1.1111167a4d017p-7
3793     };
3794     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
3795         negln2hiN = -0x1.62e42fefa0000p-8,
3796         negln2loN = -0x1.cf79abc9e3b3ap-47;
3797
3798     UINT32 abstop;
3799     UINT64 ki, idx, top, sbits;
3800     double kd, z, r, r2, scale, tail, tmp;
3801
3802     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
3803     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
3804         if (abstop - 0x3c9 >= 0x80000000) {
3805             /* Avoid spurious underflow for tiny x. */
3806             /* Note: 0 is common input. */
3807             double one = 1.0 + x;
3808             return sign_bias ? -one : one;
3809         }
3810         if (abstop >= 0x409) {
3811             /* Note: inf and nan are already handled. */
3812             if (*(UINT64*)&x >> 63)
3813                 return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN);
3814             return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX);
3815         }
3816         /* Large x is special cased below. */
3817         abstop = 0;
3818     }
3819
3820     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3821     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3822     z = invln2N * x;
3823     kd = __round(z);
3824     ki = (INT64)kd;
3825     r = x + kd * negln2hiN + kd * negln2loN;
3826     /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
3827     r += xtail;
3828     /* 2^(k/N) ~= scale * (1 + tail). */
3829     idx = 2 * (ki % (1 << 7));
3830     top = (ki + sign_bias) << (52 - 7);
3831     tail = *(double*)&exp_T[idx];
3832     /* This is only a valid scale when -1023*N < k < 1024*N. */
3833     sbits = exp_T[idx + 1] + top;
3834     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3835     /* Evaluation is optimized assuming superscalar pipelined execution. */
3836     r2 = r * r;
3837     /* Without fma the worst case error is 0.25/N ulp larger. */
3838     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3839     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3840     if (abstop == 0) {
3841         /* Handle cases that may overflow or underflow when computing the result that
3842            is scale*(1+TMP) without intermediate rounding. The bit representation of
3843            scale is in SBITS, however it has a computed exponent that may have
3844            overflown into the sign bit so that needs to be adjusted before using it as
3845            a double. (int32_t)KI is the k used in the argument reduction and exponent
3846            adjustment of scale, positive k here means the result may overflow and
3847            negative k means the result may underflow. */
3848         double scale, y;
3849
3850         if ((ki & 0x80000000) == 0) {
3851             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3852             sbits -= 1009ull << 52;
3853             scale = *(double*)&sbits;
3854             y = 0x1p1009 * (scale + scale * tmp);
3855             if (isinf(y))
3856                 return math_error(_OVERFLOW, "pow", argx, argy, y);
3857             return y;
3858         }
3859         /* k < 0, need special care in the subnormal range. */
3860         sbits += 1022ull << 52;
3861         /* Note: sbits is signed scale. */
3862         scale = *(double*)&sbits;
3863         y = scale + scale * tmp;
3864         if (fabs(y) < 1.0) {
3865             /* Round y to the right precision before scaling it into the subnormal
3866                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3867                E is the worst-case ulp error outside the subnormal range. So this
3868                is only useful if the goal is better than 1 ulp worst-case error. */
3869             double hi, lo, one = 1.0;
3870             if (y < 0.0)
3871                 one = -1.0;
3872             lo = scale - y + scale * tmp;
3873             hi = one + y;
3874             lo = one - hi + y + lo;
3875             y = hi + lo - one;
3876             /* Fix the sign of 0. */
3877             if (y == 0.0) {
3878                 sbits &= 0x8000000000000000ULL;
3879                 y = *(double*)&sbits;
3880             }
3881             /* The underflow exception needs to be signaled explicitly. */
3882             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3883             y = 0x1p-1022 * y;
3884             return math_error(_UNDERFLOW, "pow", argx, argy, y);
3885         }
3886         y = 0x1p-1022 * y;
3887         return y;
3888     }
3889     scale = *(double*)&sbits;
3890     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3891        is no spurious underflow here even without fma. */
3892     return scale + scale * tmp;
3893 }
3894
3895 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
3896    the bit representation of a non-zero finite floating-point value. */
3897 static inline int pow_checkint(UINT64 iy)
3898 {
3899     int e = iy >> 52 & 0x7ff;
3900     if (e < 0x3ff)
3901         return 0;
3902     if (e > 0x3ff + 52)
3903         return 2;
3904     if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
3905         return 0;
3906     if (iy & (1ULL << (0x3ff + 52 - e)))
3907         return 1;
3908     return 2;
3909 }
3910
3911 /*********************************************************************
3912  *              pow (MSVCRT.@)
3913  *
3914  * Copied from musl: src/math/pow.c
3915  */
3916 double CDECL pow( double x, double y )
3917 {
3918     UINT32 sign_bias = 0;
3919     UINT64 ix, iy;
3920     UINT32 topx, topy;
3921     double lo, hi, ehi, elo, yhi, ylo, lhi, llo;
3922
3923     ix = *(UINT64*)&x;
3924     iy = *(UINT64*)&y;
3925     topx = ix >> 52;
3926     topy = iy >> 52;
3927     if (topx - 0x001 >= 0x7ff - 0x001 ||
3928             (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3929         /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
3930            and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
3931         /* Special cases: (x < 0x1p-126 or inf or nan) or
3932            (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
3933         if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3934             if (2 * iy == 0)
3935                 return 1.0;
3936             if (ix == 0x3ff0000000000000ULL)
3937                 return 1.0;
3938             if (2 * ix > 2 * 0x7ff0000000000000ULL ||
3939                     2 * iy > 2 * 0x7ff0000000000000ULL)
3940                 return x + y;
3941             if (2 * ix == 2 * 0x3ff0000000000000ULL)
3942                 return 1.0;
3943             if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63))
3944                 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
3945             return y * y;
3946         }
3947         if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3948             double x2 = x * x;
3949             if (ix >> 63 && pow_checkint(iy) == 1)
3950                 x2 = -x2;
3951             if (iy & 0x8000000000000000ULL && x2 == 0.0)
3952                 return math_error(_SING, "pow", x, y, 1 / x2);
3953             /* Without the barrier some versions of clang hoist the 1/x2 and
3954                thus division by zero exception can be signaled spuriously. */
3955             return iy >> 63 ? fp_barrier(1 / x2) : x2;
3956         }
3957         /* Here x and y are non-zero finite. */
3958         if (ix >> 63) {
3959             /* Finite x < 0. */
3960             int yint = pow_checkint(iy);
3961             if (yint == 0)
3962                 return math_error(_DOMAIN, "pow", x, y, 0 / (x - x));
3963             if (yint == 1)
3964                 sign_bias = 0x800 << 7;
3965             ix &= 0x7fffffffffffffff;
3966             topx &= 0x7ff;
3967         }
3968         if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3969             /* Note: sign_bias == 0 here because y is not odd. */
3970             if (ix == 0x3ff0000000000000ULL)
3971                 return 1.0;
3972             if ((topy & 0x7ff) < 0x3be) {
3973                 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
3974                 return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y;
3975             }
3976             if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800))
3977                 return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX);
3978             return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN);
3979         }
3980         if (topx == 0) {
3981             /* Normalize subnormal x so exponent becomes negative. */
3982             x *= 0x1p52;
3983             ix = *(UINT64*)&x;
3984             ix &= 0x7fffffffffffffff;
3985             ix -= 52ULL << 52;
3986         }
3987     }
3988
3989     hi = pow_log(ix, &lo);
3990     iy &= -1ULL << 27;
3991     yhi = *(double*)&iy;
3992     ylo = y - yhi;
3993     *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27;
3994     llo = fp_barrier(hi - lhi + lo);
3995     ehi = yhi * lhi;
3996     elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
3997     return pow_exp(x, y, ehi, elo, sign_bias);
3998 }
3999
4000 /*********************************************************************
4001  *              sin (MSVCRT.@)
4002  *
4003  * Copied from musl: src/math/sin.c
4004  */
4005 double CDECL sin( double x )
4006 {
4007     double y[2];
4008     UINT32 ix;
4009     unsigned n;
4010
4011     ix = *(ULONGLONG*)&x >> 32;
4012     ix &= 0x7fffffff;
4013
4014     /* |x| ~< pi/4 */
4015     if (ix <= 0x3fe921fb) {
4016         if (ix < 0x3e500000) { /* |x| < 2**-26 */
4017             /* raise inexact if x != 0 and underflow if subnormal*/
4018             fp_barrier(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
4019             return x;
4020         }
4021         return __sin(x, 0.0, 0);
4022     }
4023
4024     /* sin(Inf or NaN) is NaN */
4025     if (isinf(x))
4026         return math_error(_DOMAIN, "sin", x, 0, x - x);
4027     if (ix >= 0x7ff00000)
4028         return x - x;
4029
4030     /* argument reduction needed */
4031     n = __rem_pio2(x, y);
4032     switch (n&3) {
4033     case 0: return  __sin(y[0], y[1], 1);
4034     case 1: return  __cos(y[0], y[1]);
4035     case 2: return -__sin(y[0], y[1], 1);
4036     default: return -__cos(y[0], y[1]);
4037     }
4038 }
4039
4040 /*********************************************************************
4041  *              sinh (MSVCRT.@)
4042  */
4043 double CDECL sinh( double x )
4044 {
4045     UINT64 ux = *(UINT64*)&x;
4046     UINT64 sign = ux & 0x8000000000000000ULL;
4047     UINT32 w;
4048     double t, h, absx;
4049
4050     h = 0.5;
4051     if (ux >> 63)
4052         h = -h;
4053     /* |x| */
4054     ux &= (UINT64)-1 / 2;
4055     absx = *(double*)&ux;
4056     w = ux >> 32;
4057
4058     /* |x| < log(DBL_MAX) */
4059     if (w < 0x40862e42) {
4060         t = __expm1(absx);
4061         if (w < 0x3ff00000) {
4062             if (w < 0x3ff00000 - (26 << 20))
4063                 return x;
4064             return h * (2 * t - t * t / (t + 1));
4065         }
4066         return h * (t + t / (t + 1));
4067     }
4068
4069     /* |x| > log(DBL_MAX) or nan */
4070     /* note: the result is stored to handle overflow */
4071     if (ux > 0x7ff0000000000000ULL)
4072         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
4073     else
4074         t = __expo2(absx, 2 * h);
4075     return t;
4076 }
4077
4078 static BOOL sqrt_validate( double *x, BOOL update_sw )
4079 {
4080     short c = _dclass(*x);
4081
4082     if (c == FP_ZERO) return FALSE;
4083     if (c == FP_NAN)
4084     {
4085 #ifdef __i386__
4086         if (update_sw)
4087             *x = math_error(_DOMAIN, "sqrt", *x, 0, *x);
4088 #else
4089         /* set signaling bit */
4090         *(ULONGLONG*)x |= 0x8000000000000ULL;
4091 #endif
4092         return FALSE;
4093     }
4094     if (signbit(*x))
4095     {
4096         *x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan(update_sw));
4097         return FALSE;
4098     }
4099     if (c == FP_INFINITE) return FALSE;
4100     return TRUE;
4101 }
4102
4103 #if defined(__x86_64__) || defined(__i386__)
4104 double CDECL sse2_sqrt(double);
4105 __ASM_GLOBAL_FUNC( sse2_sqrt,
4106         "sqrtsd %xmm0, %xmm0\n\t"
4107         "ret" )
4108 #endif
4109
4110 #ifdef __i386__
4111 double CDECL x87_sqrt(double);
4112 __ASM_GLOBAL_FUNC( x87_sqrt,
4113         "fldl 4(%esp)\n\t"
4114         SET_X87_CW(0xc00)
4115         "fsqrt\n\t"
4116         RESET_X87_CW
4117         "ret" )
4118 #endif
4119
4120 /*********************************************************************
4121  *              sqrt (MSVCRT.@)
4122  *
4123  * Copied from musl: src/math/sqrt.c
4124  */
4125 double CDECL sqrt( double x )
4126 {
4127 #ifdef __x86_64__
4128     if (!sqrt_validate(&x, TRUE))
4129         return x;
4130
4131     return sse2_sqrt(x);
4132 #elif defined( __i386__ )
4133     if (!sqrt_validate(&x, TRUE))
4134         return x;
4135
4136     return x87_sqrt(x);
4137 #else
4138     static const double tiny = 1.0e-300;
4139
4140     double z;
4141     int sign = 0x80000000;
4142     int ix0,s0,q,m,t,i;
4143     unsigned int r,t1,s1,ix1,q1;
4144     ULONGLONG ix;
4145
4146     if (!sqrt_validate(&x, TRUE))
4147         return x;
4148
4149     ix = *(ULONGLONG*)&x;
4150     ix0 = ix >> 32;
4151     ix1 = ix;
4152
4153     /* normalize x */
4154     m = ix0 >> 20;
4155     if (m == 0) {  /* subnormal x */
4156         while (ix0 == 0) {
4157             m -= 21;
4158             ix0 |= (ix1 >> 11);
4159             ix1 <<= 21;
4160         }
4161         for (i=0; (ix0 & 0x00100000) == 0; i++)
4162             ix0 <<= 1;
4163         m -= i - 1;
4164         ix0 |= ix1 >> (32 - i);
4165         ix1 <<= i;
4166     }
4167     m -= 1023;    /* unbias exponent */
4168     ix0 = (ix0 & 0x000fffff) | 0x00100000;
4169     if (m & 1) {  /* odd m, double x to make it even */
4170         ix0 += ix0 + ((ix1 & sign) >> 31);
4171         ix1 += ix1;
4172     }
4173     m >>= 1;      /* m = [m/2] */
4174
4175     /* generate sqrt(x) bit by bit */
4176     ix0 += ix0 + ((ix1 & sign) >> 31);
4177     ix1 += ix1;
4178     q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
4179     r = 0x00200000;        /* r = moving bit from right to left */
4180
4181     while (r != 0) {
4182         t = s0 + r;
4183         if (t <= ix0) {
4184             s0   = t + r;
4185             ix0 -= t;
4186             q   += r;
4187         }
4188         ix0 += ix0 + ((ix1 & sign) >> 31);
4189         ix1 += ix1;
4190         r >>= 1;
4191     }
4192
4193     r = sign;
4194     while (r != 0) {
4195         t1 = s1 + r;
4196         t  = s0;
4197         if (t < ix0 || (t == ix0 && t1 <= ix1)) {
4198             s1 = t1 + r;
4199             if ((t1&sign) == sign && (s1 & sign) == 0)
4200                 s0++;
4201             ix0 -= t;
4202             if (ix1 < t1)
4203                 ix0--;
4204             ix1 -= t1;
4205             q1 += r;
4206         }
4207         ix0 += ix0 + ((ix1 & sign) >> 31);
4208         ix1 += ix1;
4209         r >>= 1;
4210     }
4211
4212     /* use floating add to find out rounding direction */
4213     if ((ix0 | ix1) != 0) {
4214         z = 1.0 - tiny; /* raise inexact flag */
4215         if (z >= 1.0) {
4216             z = 1.0 + tiny;
4217             if (q1 == (unsigned int)0xffffffff) {
4218                 q1 = 0;
4219                 q++;
4220             } else if (z > 1.0) {
4221                 if (q1 == (unsigned int)0xfffffffe)
4222                     q++;
4223                 q1 += 2;
4224             } else
4225                 q1 += q1 & 1;
4226         }
4227     }
4228     ix0 = (q >> 1) + 0x3fe00000;
4229     ix1 = q1 >> 1;
4230     if (q & 1)
4231         ix1 |= sign;
4232     ix = ix0 + ((unsigned int)m << 20);
4233     ix <<= 32;
4234     ix |= ix1;
4235     return *(double*)&ix;
4236 #endif
4237 }
4238
4239 /* Copied from musl: src/math/__tan.c */
4240 static double __tan(double x, double y, int odd)
4241 {
4242     static const double T[] = {
4243         3.33333333333334091986e-01,
4244         1.33333333333201242699e-01,
4245         5.39682539762260521377e-02,
4246         2.18694882948595424599e-02,
4247         8.86323982359930005737e-03,
4248         3.59207910759131235356e-03,
4249         1.45620945432529025516e-03,
4250         5.88041240820264096874e-04,
4251         2.46463134818469906812e-04,
4252         7.81794442939557092300e-05,
4253         7.14072491382608190305e-05,
4254         -1.85586374855275456654e-05,
4255         2.59073051863633712884e-05,
4256     };
4257     static const double pio4 = 7.85398163397448278999e-01;
4258     static const double pio4lo = 3.06161699786838301793e-17;
4259
4260     double z, r, v, w, s, a, w0, a0;
4261     UINT32 hx;
4262     int big, sign;
4263
4264     hx = *(ULONGLONG*)&x >> 32;
4265     big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
4266     if (big) {
4267         sign = hx >> 31;
4268         if (sign) {
4269             x = -x;
4270             y = -y;
4271         }
4272         x = (pio4 - x) + (pio4lo - y);
4273         y = 0.0;
4274     }
4275     z = x * x;
4276     w = z * z;
4277     r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
4278     v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
4279     s = z * x;
4280     r = y + z * (s * (r + v) + y) + s * T[0];
4281     w = x + r;
4282     if (big) {
4283         s = 1 - 2 * odd;
4284         v = s - 2.0 * (x + (r - w * w / (w + s)));
4285         return sign ? -v : v;
4286     }
4287     if (!odd)
4288         return w;
4289     /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
4290     w0 = w;
4291     *(LONGLONG*)&w0 = *(LONGLONG*)&w0 & 0xffffffff00000000ULL;
4292     v = r - (w0 - x);       /* w0+v = r+x */
4293     a0 = a = -1.0 / w;
4294     *(LONGLONG*)&a0 = *(LONGLONG*)&a0 & 0xffffffff00000000ULL;
4295     return a0 + a * (1.0 + a0 * w0 + a0 * v);
4296 }
4297
4298 /*********************************************************************
4299  *              tan (MSVCRT.@)
4300  *
4301  * Copied from musl: src/math/tan.c
4302  */
4303 double CDECL tan( double x )
4304 {
4305     double y[2];
4306     UINT32 ix;
4307     unsigned n;
4308
4309     ix = *(ULONGLONG*)&x >> 32;
4310     ix &= 0x7fffffff;
4311
4312     if (ix <= 0x3fe921fb) { /* |x| ~< pi/4 */
4313         if (ix < 0x3e400000) { /* |x| < 2**-27 */
4314             /* raise inexact if x!=0 and underflow if subnormal */
4315             fp_barrier(ix < 0x00100000 ? x / 0x1p120f : x + 0x1p120f);
4316             return x;
4317         }
4318         return __tan(x, 0.0, 0);
4319     }
4320
4321     if (isinf(x))
4322         return math_error(_DOMAIN, "tan", x, 0, x - x);
4323     if (ix >= 0x7ff00000)
4324         return x - x;
4325
4326     n = __rem_pio2(x, y);
4327     return __tan(y[0], y[1], n & 1);
4328 }
4329
4330 /*********************************************************************
4331  *              tanh (MSVCRT.@)
4332  */
4333 double CDECL tanh( double x )
4334 {
4335     UINT64 ui = *(UINT64*)&x;
4336     UINT64 sign = ui & 0x8000000000000000ULL;
4337     UINT32 w;
4338     double t;
4339
4340     /* x = |x| */
4341     ui &= (UINT64)-1 / 2;
4342     x = *(double*)&ui;
4343     w = ui >> 32;
4344
4345     if (w > 0x3fe193ea) {
4346         /* |x| > log(3)/2 ~= 0.5493 or nan */
4347         if (w > 0x40340000) {
4348             if (ui > 0x7ff0000000000000ULL) {
4349                 *(UINT64*)&x = ui | sign | 0x0008000000000000ULL;
4350 #if _MSVCR_VER < 140
4351                 return math_error(_DOMAIN, "tanh", x, 0, x);
4352 #else
4353                 return x;
4354 #endif
4355             }
4356             /* |x| > 20 */
4357             /* note: this branch avoids raising overflow */
4358             fp_barrier(x + 0x1p120f);
4359             t = 1 - 0 / x;
4360         } else {
4361             t = __expm1(2 * x);
4362             t = 1 - 2 / (t + 2);
4363         }
4364     } else if (w > 0x3fd058ae) {
4365         /* |x| > log(5/3)/2 ~= 0.2554 */
4366         t = __expm1(2 * x);
4367         t = t / (t + 2);
4368     } else if (w >= 0x00100000) {
4369         /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
4370         t = __expm1(-2 * x);
4371         t = -t / (t + 2);
4372     } else {
4373         /* |x| is subnormal */
4374         /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
4375         fp_barrier((float)x);
4376         t = x;
4377     }
4378     return sign ? -t : t;
4379 }
4380
4381
4382 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
4383
4384 #define CREATE_FPU_FUNC1(name, call) \
4385     __ASM_GLOBAL_FUNC(name, \
4386             "pushl   %ebp\n\t" \
4387             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4388             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4389             "movl    %esp, %ebp\n\t" \
4390             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4391             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4392             "fstpl   (%esp)\n\t"    /* store function argument */ \
4393             "fwait\n\t" \
4394             "movl    $1, %ecx\n\t"  /* empty FPU stack */ \
4395             "1:\n\t" \
4396             "fxam\n\t" \
4397             "fstsw   %ax\n\t" \
4398             "and     $0x4500, %ax\n\t" \
4399             "cmp     $0x4100, %ax\n\t" \
4400             "je      2f\n\t" \
4401             "fstpl    (%esp,%ecx,8)\n\t" \
4402             "fwait\n\t" \
4403             "incl    %ecx\n\t" \
4404             "jmp     1b\n\t" \
4405             "2:\n\t" \
4406             "movl    %ecx, -4(%ebp)\n\t" \
4407             "call    " __ASM_NAME( #call ) "\n\t" \
4408             "movl    -4(%ebp), %ecx\n\t" \
4409             "fstpl   (%esp)\n\t"    /* save result */ \
4410             "3:\n\t"                /* restore FPU stack */ \
4411             "decl    %ecx\n\t" \
4412             "fldl    (%esp,%ecx,8)\n\t" \
4413             "cmpl    $0, %ecx\n\t" \
4414             "jne     3b\n\t" \
4415             "leave\n\t" \
4416             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4417             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4418             "ret")
4419
4420 #define CREATE_FPU_FUNC2(name, call) \
4421     __ASM_GLOBAL_FUNC(name, \
4422             "pushl   %ebp\n\t" \
4423             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4424             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4425             "movl    %esp, %ebp\n\t" \
4426             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4427             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4428             "fstpl   8(%esp)\n\t"   /* store function argument */ \
4429             "fwait\n\t" \
4430             "fstpl   (%esp)\n\t" \
4431             "fwait\n\t" \
4432             "movl    $2, %ecx\n\t"  /* empty FPU stack */ \
4433             "1:\n\t" \
4434             "fxam\n\t" \
4435             "fstsw   %ax\n\t" \
4436             "and     $0x4500, %ax\n\t" \
4437             "cmp     $0x4100, %ax\n\t" \
4438             "je      2f\n\t" \
4439             "fstpl    (%esp,%ecx,8)\n\t" \
4440             "fwait\n\t" \
4441             "incl    %ecx\n\t" \
4442             "jmp     1b\n\t" \
4443             "2:\n\t" \
4444             "movl    %ecx, -4(%ebp)\n\t" \
4445             "call    " __ASM_NAME( #call ) "\n\t" \
4446             "movl    -4(%ebp), %ecx\n\t" \
4447             "fstpl   8(%esp)\n\t"   /* save result */ \
4448             "3:\n\t"                /* restore FPU stack */ \
4449             "decl    %ecx\n\t" \
4450             "fldl    (%esp,%ecx,8)\n\t" \
4451             "cmpl    $1, %ecx\n\t" \
4452             "jne     3b\n\t" \
4453             "leave\n\t" \
4454             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4455             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4456             "ret")
4457
4458 CREATE_FPU_FUNC1(_CIacos, acos)
4459 CREATE_FPU_FUNC1(_CIasin, asin)
4460 CREATE_FPU_FUNC1(_CIatan, atan)
4461 CREATE_FPU_FUNC2(_CIatan2, atan2)
4462 CREATE_FPU_FUNC1(_CIcos, cos)
4463 CREATE_FPU_FUNC1(_CIcosh, cosh)
4464 CREATE_FPU_FUNC1(_CIexp, exp)
4465 CREATE_FPU_FUNC2(_CIfmod, fmod)
4466 CREATE_FPU_FUNC1(_CIlog, log)
4467 CREATE_FPU_FUNC1(_CIlog10, log10)
4468 CREATE_FPU_FUNC2(_CIpow, pow)
4469 CREATE_FPU_FUNC1(_CIsin, sin)
4470 CREATE_FPU_FUNC1(_CIsinh, sinh)
4471 CREATE_FPU_FUNC1(_CIsqrt, sqrt)
4472 CREATE_FPU_FUNC1(_CItan, tan)
4473 CREATE_FPU_FUNC1(_CItanh, tanh)
4474
4475 __ASM_GLOBAL_FUNC(_ftol,
4476         "pushl   %ebp\n\t"
4477         __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
4478         __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
4479         "movl    %esp, %ebp\n\t"
4480         __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
4481         "subl    $12, %esp\n\t"     /* sizeof(LONGLONG) + 2*sizeof(WORD) */
4482         "fnstcw  (%esp)\n\t"
4483         "mov     (%esp), %ax\n\t"
4484         "or      $0xc00, %ax\n\t"
4485         "mov     %ax, 2(%esp)\n\t"
4486         "fldcw   2(%esp)\n\t"
4487         "fistpq  4(%esp)\n\t"
4488         "fldcw   (%esp)\n\t"
4489         "movl    4(%esp), %eax\n\t"
4490         "movl    8(%esp), %edx\n\t"
4491         "leave\n\t"
4492         __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
4493         __ASM_CFI(".cfi_same_value %ebp\n\t")
4494         "ret")
4495
4496 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */
4497
4498 /*********************************************************************
4499  *              _fpclass (MSVCRT.@)
4500  */
4501 int CDECL _fpclass(double num)
4502 {
4503     union { double f; UINT64 i; } u = { num };
4504     int e = u.i >> 52 & 0x7ff;
4505     int s = u.i >> 63;
4506
4507     switch (e)
4508     {
4509     case 0:
4510         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
4511         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
4512     case 0x7ff:
4513         if (u.i << 12) return ((u.i >> 51) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
4514         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
4515     default:
4516         return s ? _FPCLASS_NN : _FPCLASS_PN;
4517     }
4518 }
4519
4520 /*********************************************************************
4521  *              _rotl (MSVCRT.@)
4522  */
4523 unsigned int CDECL MSVCRT__rotl(unsigned int num, int shift)
4524 {
4525   shift &= 31;
4526   return (num << shift) | (num >> (32-shift));
4527 }
4528
4529 /*********************************************************************
4530  *              _lrotl (MSVCRT.@)
4531  */
4532 __msvcrt_ulong CDECL MSVCRT__lrotl(__msvcrt_ulong num, int shift)
4533 {
4534   shift &= 0x1f;
4535   return (num << shift) | (num >> (32-shift));
4536 }
4537
4538 /*********************************************************************
4539  *              _lrotr (MSVCRT.@)
4540  */
4541 __msvcrt_ulong CDECL MSVCRT__lrotr(__msvcrt_ulong num, int shift)
4542 {
4543   shift &= 0x1f;
4544   return (num >> shift) | (num << (32-shift));
4545 }
4546
4547 /*********************************************************************
4548  *              _rotr (MSVCRT.@)
4549  */
4550 unsigned int CDECL MSVCRT__rotr(unsigned int num, int shift)
4551 {
4552     shift &= 0x1f;
4553     return (num >> shift) | (num << (32-shift));
4554 }
4555
4556 /*********************************************************************
4557  *              _rotl64 (MSVCRT.@)
4558  */
4559 unsigned __int64 CDECL MSVCRT__rotl64(unsigned __int64 num, int shift)
4560 {
4561   shift &= 63;
4562   return (num << shift) | (num >> (64-shift));
4563 }
4564
4565 /*********************************************************************
4566  *              _rotr64 (MSVCRT.@)
4567  */
4568 unsigned __int64 CDECL MSVCRT__rotr64(unsigned __int64 num, int shift)
4569 {
4570     shift &= 63;
4571     return (num >> shift) | (num << (64-shift));
4572 }
4573
4574 /*********************************************************************
4575  *              abs (MSVCRT.@)
4576  */
4577 int CDECL abs( int n )
4578 {
4579     return n >= 0 ? n : -n;
4580 }
4581
4582 /*********************************************************************
4583  *              labs (MSVCRT.@)
4584  */
4585 __msvcrt_long CDECL labs( __msvcrt_long n )
4586 {
4587     return n >= 0 ? n : -n;
4588 }
4589
4590 #if _MSVCR_VER>=100
4591 /*********************************************************************
4592  *              llabs (MSVCR100.@)
4593  */
4594 __int64 CDECL llabs( __int64 n )
4595 {
4596     return n >= 0 ? n : -n;
4597 }
4598 #endif
4599
4600 #if _MSVCR_VER>=120
4601 /*********************************************************************
4602  *              imaxabs (MSVCR120.@)
4603  */
4604 intmax_t CDECL imaxabs( intmax_t n )
4605 {
4606     return n >= 0 ? n : -n;
4607 }
4608 #endif
4609
4610 /*********************************************************************
4611  *              _abs64 (MSVCRT.@)
4612  */
4613 __int64 CDECL _abs64( __int64 n )
4614 {
4615     return n >= 0 ? n : -n;
4616 }
4617
4618 /* Copied from musl: src/math/ilogb.c */
4619 static int __ilogb(double x)
4620 {
4621     union { double f; UINT64 i; } u = { x };
4622     int e = u.i >> 52 & 0x7ff;
4623
4624     if (!e)
4625     {
4626         u.i <<= 12;
4627         if (u.i == 0) return FP_ILOGB0;
4628         /* subnormal x */
4629         for (e = -0x3ff; u.i >> 63 == 0; e--, u.i <<= 1);
4630         return e;
4631     }
4632     if (e == 0x7ff) return u.i << 12 ? FP_ILOGBNAN : INT_MAX;
4633     return e - 0x3ff;
4634 }
4635
4636 /*********************************************************************
4637  *              _logb (MSVCRT.@)
4638  *
4639  * Copied from musl: src/math/logb.c
4640  */
4641 double CDECL _logb(double x)
4642 {
4643     if (!isfinite(x))
4644         return x * x;
4645     if (x == 0)
4646         return math_error(_SING, "_logb", x, 0, -1 / (x * x));
4647     return __ilogb(x);
4648 }
4649
4650 static void sq(double *hi, double *lo, double x)
4651 {
4652     double xh, xl, xc;
4653
4654     xc = x * (0x1p27 + 1);
4655     xh = x - xc + xc;
4656     xl = x - xh;
4657     *hi = x * x;
4658     *lo = xh * xh - *hi + 2 * xh * xl + xl * xl;
4659 }
4660
4661 /*********************************************************************
4662  *              _hypot (MSVCRT.@)
4663  *
4664  * Copied from musl: src/math/hypot.c
4665  */
4666 double CDECL _hypot(double x, double y)
4667 {
4668     UINT64 ux = *(UINT64*)&x, uy = *(UINT64*)&y, ut;
4669     double hx, lx, hy, ly, z;
4670     int ex, ey;
4671
4672     /* arrange |x| >= |y| */
4673     ux &= -1ULL >> 1;
4674     uy &= -1ULL >> 1;
4675     if (ux < uy) {
4676         ut = ux;
4677         ux = uy;
4678         uy = ut;
4679     }
4680
4681     /* special cases */
4682     ex = ux >> 52;
4683     ey = uy >> 52;
4684     x = *(double*)&ux;
4685     y = *(double*)&uy;
4686     /* note: hypot(inf,nan) == inf */
4687     if (ey == 0x7ff)
4688         return y;
4689     if (ex == 0x7ff || uy == 0)
4690         return x;
4691     /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
4692     /* 64 difference is enough for ld80 double_t */
4693     if (ex - ey > 64)
4694         return x + y;
4695
4696     /* precise sqrt argument in nearest rounding mode without overflow */
4697     /* xh*xh must not overflow and xl*xl must not underflow in sq */
4698     z = 1;
4699     if (ex > 0x3ff + 510) {
4700         z = 0x1p700;
4701         x *= 0x1p-700;
4702         y *= 0x1p-700;
4703     } else if (ey < 0x3ff - 450) {
4704         z = 0x1p-700;
4705         x *= 0x1p700;
4706         y *= 0x1p700;
4707     }
4708     sq(&hx, &lx, x);
4709     sq(&hy, &ly, y);
4710     return z * sqrt(ly + lx + hy + hx);
4711 }
4712
4713 /*********************************************************************
4714  *      _hypotf (MSVCRT.@)
4715  *
4716  * Copied from musl: src/math/hypotf.c
4717  */
4718 float CDECL _hypotf(float x, float y)
4719 {
4720     UINT32 ux = *(UINT32*)&x, uy = *(UINT32*)&y, ut;
4721     float z;
4722
4723     ux &= -1U >> 1;
4724     uy &= -1U >> 1;
4725     if (ux < uy) {
4726         ut = ux;
4727         ux = uy;
4728         uy = ut;
4729     }
4730
4731     x = *(float*)&ux;
4732     y = *(float*)&uy;
4733     if (uy == 0xff << 23)
4734         return y;
4735     if (ux >= 0xff << 23 || uy == 0 || ux - uy >= 25 << 23)
4736         return x + y;
4737
4738     z = 1;
4739     if (ux >= (0x7f + 60) << 23) {
4740         z = 0x1p90f;
4741         x *= 0x1p-90f;
4742         y *= 0x1p-90f;
4743     } else if (uy < (0x7f - 60) << 23) {
4744         z = 0x1p-90f;
4745         x *= 0x1p90f;
4746         y *= 0x1p90f;
4747     }
4748     return z * sqrtf((double)x * x + (double)y * y);
4749 }
4750
4751 /*********************************************************************
4752  *              ceil (MSVCRT.@)
4753  *
4754  * Based on musl: src/math/ceilf.c
4755  */
4756 double CDECL ceil( double x )
4757 {
4758     union {double f; UINT64 i;} u = {x};
4759     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
4760     UINT64 m;
4761
4762     if (e >= 52)
4763         return x;
4764     if (e >= 0) {
4765         m = 0x000fffffffffffffULL >> e;
4766         if ((u.i & m) == 0)
4767             return x;
4768         if (u.i >> 63 == 0)
4769             u.i += m;
4770         u.i &= ~m;
4771     } else {
4772         if (u.i >> 63)
4773             return -0.0;
4774         else if (u.i << 1)
4775             return 1.0;
4776     }
4777     return u.f;
4778 }
4779
4780 /*********************************************************************
4781  *              floor (MSVCRT.@)
4782  *
4783  * Based on musl: src/math/floorf.c
4784  */
4785 double CDECL floor( double x )
4786 {
4787     union {double f; UINT64 i;} u = {x};
4788     int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff;
4789     UINT64 m;
4790
4791     if (e >= 52)
4792         return x;
4793     if (e >= 0) {
4794         m = 0x000fffffffffffffULL >> e;
4795         if ((u.i & m) == 0)
4796             return x;
4797         if (u.i >> 63)
4798             u.i += m;
4799         u.i &= ~m;
4800     } else {
4801         if (u.i >> 63 == 0)
4802             return 0;
4803         else if (u.i << 1)
4804             return -1;
4805     }
4806     return u.f;
4807 }
4808
4809 /*********************************************************************
4810  *      fma (MSVCRT.@)
4811  *
4812  * Copied from musl: src/math/fma.c
4813  */
4814 struct fma_num
4815 {
4816     UINT64 m;
4817     int e;
4818     int sign;
4819 };
4820
4821 static struct fma_num normalize(double x)
4822 {
4823     UINT64 ix = *(UINT64*)&x;
4824     int e = ix >> 52;
4825     int sign = e & 0x800;
4826     struct fma_num ret;
4827
4828     e &= 0x7ff;
4829     if (!e) {
4830         x *= 0x1p63;
4831         ix = *(UINT64*)&x;
4832         e = ix >> 52 & 0x7ff;
4833         e = e ? e - 63 : 0x800;
4834     }
4835     ix &= (1ull << 52) - 1;
4836     ix |= 1ull << 52;
4837     ix <<= 1;
4838     e -= 0x3ff + 52 + 1;
4839
4840     ret.m = ix;
4841     ret.e = e;
4842     ret.sign = sign;
4843     return ret;
4844 }
4845
4846 static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y)
4847 {
4848     UINT64 t1, t2, t3;
4849     UINT64 xlo = (UINT32)x, xhi = x >> 32;
4850     UINT64 ylo = (UINT32)y, yhi = y >> 32;
4851
4852     t1 = xlo * ylo;
4853     t2 = xlo * yhi + xhi * ylo;
4854     t3 = xhi * yhi;
4855     *lo = t1 + (t2 << 32);
4856     *hi = t3 + (t2 >> 32) + (t1 > *lo);
4857 }
4858
4859 double CDECL fma( double x, double y, double z )
4860 {
4861     int e, d, sign, samesign, nonzero;
4862     UINT64 rhi, rlo, zhi, zlo;
4863     struct fma_num nx, ny, nz;
4864     double r;
4865     INT64 i;
4866
4867     /* normalize so top 10bits and last bit are 0 */
4868     nx = normalize(x);
4869     ny = normalize(y);
4870     nz = normalize(z);
4871
4872     if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) {
4873         r = x * y + z;
4874         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM;
4875         return r;
4876     }
4877     if (nz.e >= 0x7ff - 0x3ff - 52 - 1) {
4878         if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */
4879             r = x * y + z;
4880             if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM;
4881             return r;
4882         }
4883         return z;
4884     }
4885
4886     /* mul: r = x*y */
4887     mul(&rhi, &rlo, nx.m, ny.m);
4888     /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
4889
4890     /* align exponents */
4891     e = nx.e + ny.e;
4892     d = nz.e - e;
4893     /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
4894     if (d > 0) {
4895         if (d < 64) {
4896             zlo = nz.m << d;
4897             zhi = nz.m >> (64 - d);
4898         } else {
4899             zlo = 0;
4900             zhi = nz.m;
4901             e = nz.e - 64;
4902             d -= 64;
4903             if (d < 64 && d) {
4904                 rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d));
4905                 rhi = rhi >> d;
4906             } else if (d) {
4907                 rlo = 1;
4908                 rhi = 0;
4909             }
4910         }
4911     } else {
4912         zhi = 0;
4913         d = -d;
4914         if (d == 0) {
4915             zlo = nz.m;
4916         } else if (d < 64) {
4917             zlo = nz.m >> d | !!(nz.m << (64 - d));
4918         } else {
4919             zlo = 1;
4920         }
4921     }
4922
4923     /* add */
4924     sign = nx.sign ^ ny.sign;
4925     samesign = !(sign ^ nz.sign);
4926     nonzero = 1;
4927     if (samesign) {
4928         /* r += z */
4929         rlo += zlo;
4930         rhi += zhi + (rlo < zlo);
4931     } else {
4932         /* r -= z */
4933         UINT64 t = rlo;
4934         rlo -= zlo;
4935         rhi = rhi - zhi - (t < rlo);
4936         if (rhi >> 63) {
4937             rlo = -rlo;
4938             rhi = -rhi - !!rlo;
4939             sign = !sign;
4940         }
4941         nonzero = !!rhi;
4942     }
4943
4944     /* set rhi to top 63bit of the result (last bit is sticky) */
4945     if (nonzero) {
4946         e += 64;
4947         if (rhi >> 32) {
4948             BitScanReverse((DWORD*)&d, rhi >> 32);
4949             d = 31 - d - 1;
4950         } else {
4951             BitScanReverse((DWORD*)&d, rhi);
4952             d = 63 - d - 1;
4953         }
4954         /* note: d > 0 */
4955         rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d);
4956     } else if (rlo) {
4957         if (rlo >> 32) {
4958             BitScanReverse((DWORD*)&d, rlo >> 32);
4959             d = 31 - d - 1;
4960         } else {
4961             BitScanReverse((DWORD*)&d, rlo);
4962             d = 63 - d - 1;
4963         }
4964         if (d < 0)
4965             rhi = rlo >> 1 | (rlo & 1);
4966         else
4967             rhi = rlo << d;
4968     } else {
4969         /* exact +-0 */
4970         return x * y + z;
4971     }
4972     e -= d;
4973
4974     /* convert to double */
4975     i = rhi; /* i is in [1<<62,(1<<63)-1] */
4976     if (sign)
4977         i = -i;
4978     r = i; /* |r| is in [0x1p62,0x1p63] */
4979
4980     if (e < -1022 - 62) {
4981         /* result is subnormal before rounding */
4982         if (e == -1022 - 63) {
4983             double c = 0x1p63;
4984             if (sign)
4985                 c = -c;
4986             if (r == c) {
4987                 /* min normal after rounding, underflow depends
4988                    on arch behaviour which can be imitated by
4989                    a double to float conversion */
4990                 float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r;
4991                 return DBL_MIN / FLT_MIN * fltmin;
4992             }
4993             /* one bit is lost when scaled, add another top bit to
4994                only round once at conversion if it is inexact */
4995             if (rhi << 53) {
4996                 double tiny;
4997
4998                 i = rhi >> 1 | (rhi & 1) | 1ull << 62;
4999                 if (sign)
5000                     i = -i;
5001                 r = i;
5002                 r = 2 * r - c; /* remove top bit */
5003
5004                 /* raise underflow portably, such that it
5005                    cannot be optimized away */
5006                 tiny = DBL_MIN / FLT_MIN * r;
5007                 r += (double)(tiny * tiny) * (r - r);
5008             }
5009         } else {
5010             /* only round once when scaled */
5011             d = 10;
5012             i = (rhi >> d | !!(rhi << (64 - d))) << d;
5013             if (sign)
5014                 i = -i;
5015             r = i;
5016         }
5017     }
5018     return __scalbn(r, e);
5019 }
5020
5021 /*********************************************************************
5022  *      fmaf (MSVCRT.@)
5023  *
5024  * Copied from musl: src/math/fmaf.c
5025  */
5026 float CDECL fmaf( float x, float y, float z )
5027 {
5028     union { double f; UINT64 i; } u;
5029     double xy, adjust;
5030     int e;
5031
5032     xy = (double)x * y;
5033     u.f = xy + z;
5034     e = u.i>>52 & 0x7ff;
5035     /* Common case: The double precision result is fine. */
5036     if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
5037             e == 0x7ff || /* NaN */
5038             (u.f - xy == z && u.f - z == xy) || /* exact */
5039             (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */
5040     {
5041         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM;
5042
5043         /* underflow may not be raised correctly, example:
5044            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */
5045         if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT)
5046             fp_barrierf((float)u.f * (float)u.f);
5047         return u.f;
5048     }
5049
5050     /*
5051      * If result is inexact, and exactly halfway between two float values,
5052      * we need to adjust the low-order bit in the direction of the error.
5053      */
5054     _controlfp(_RC_CHOP, _MCW_RC);
5055     adjust = fp_barrier(xy + z);
5056     _controlfp(_RC_NEAR, _MCW_RC);
5057     if (u.f == adjust)
5058         u.i++;
5059     return u.f;
5060 }
5061
5062 /*********************************************************************
5063  *              fabs (MSVCRT.@)
5064  *
5065  * Copied from musl: src/math/fabsf.c
5066  */
5067 double CDECL fabs( double x )
5068 {
5069     union { double f; UINT64 i; } u = { x };
5070     u.i &= ~0ull >> 1;
5071     return u.f;
5072 }
5073
5074 /*********************************************************************
5075  *              frexp (MSVCRT.@)
5076  *
5077  * Copied from musl: src/math/frexp.c
5078  */
5079 double CDECL frexp( double x, int *e )
5080 {
5081     UINT64 ux = *(UINT64*)&x;
5082     int ee = ux >> 52 & 0x7ff;
5083
5084     if (!ee) {
5085         if (x) {
5086             x = frexp(x * 0x1p64, e);
5087             *e -= 64;
5088         } else *e = 0;
5089         return x;
5090     } else if (ee == 0x7ff) {
5091         return x;
5092     }
5093
5094     *e = ee - 0x3fe;
5095     ux &= 0x800fffffffffffffull;
5096     ux |= 0x3fe0000000000000ull;
5097     return *(double*)&ux;
5098 }
5099
5100 /*********************************************************************
5101  *              modf (MSVCRT.@)
5102  *
5103  * Copied from musl: src/math/modf.c
5104  */
5105 double CDECL modf( double x, double *iptr )
5106 {
5107     union {double f; UINT64 i;} u = {x};
5108     UINT64 mask;
5109     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
5110
5111     /* no fractional part */
5112     if (e >= 52) {
5113         *iptr = x;
5114         if (e == 0x400 && u.i << 12 != 0) /* nan */
5115             return x;
5116         u.i &= 1ULL << 63;
5117         return u.f;
5118     }
5119
5120     /* no integral part*/
5121     if (e < 0) {
5122         u.i &= 1ULL << 63;
5123         *iptr = u.f;
5124         return x;
5125     }
5126
5127     mask = -1ULL >> 12 >> e;
5128     if ((u.i & mask) == 0) {
5129         *iptr = x;
5130         u.i &= 1ULL << 63;
5131         return u.f;
5132     }
5133     u.i &= ~mask;
5134     *iptr = u.f;
5135     return x - u.f;
5136 }
5137
5138 #if defined(__i386__) || defined(__x86_64__)
5139 static BOOL _setfp_sse( unsigned int *cw, unsigned int cw_mask,
5140         unsigned int *sw, unsigned int sw_mask )
5141 {
5142 #if defined(__GNUC__) || defined(__clang__)
5143     unsigned long old_fpword, fpword;
5144     unsigned int flags;
5145
5146     __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5147     old_fpword = fpword;
5148
5149     cw_mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5150     sw_mask &= _MCW_EM;
5151
5152     if (sw)
5153     {
5154         flags = 0;
5155         if (fpword & 0x1) flags |= _SW_INVALID;
5156         if (fpword & 0x2) flags |= _SW_DENORMAL;
5157         if (fpword & 0x4) flags |= _SW_ZERODIVIDE;
5158         if (fpword & 0x8) flags |= _SW_OVERFLOW;
5159         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5160         if (fpword & 0x20) flags |= _SW_INEXACT;
5161
5162         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5163         TRACE("sse2 update sw %08x to %08x\n", flags, *sw);
5164         fpword &= ~0x3f;
5165         if (*sw & _SW_INVALID) fpword |= 0x1;
5166         if (*sw & _SW_DENORMAL) fpword |= 0x2;
5167         if (*sw & _SW_ZERODIVIDE) fpword |= 0x4;
5168         if (*sw & _SW_OVERFLOW) fpword |= 0x8;
5169         if (*sw & _SW_UNDERFLOW) fpword |= 0x10;
5170         if (*sw & _SW_INEXACT) fpword |= 0x20;
5171         *sw = flags;
5172     }
5173
5174     if (cw)
5175     {
5176         flags = 0;
5177         if (fpword & 0x80) flags |= _EM_INVALID;
5178         if (fpword & 0x100) flags |= _EM_DENORMAL;
5179         if (fpword & 0x200) flags |= _EM_ZERODIVIDE;
5180         if (fpword & 0x400) flags |= _EM_OVERFLOW;
5181         if (fpword & 0x800) flags |= _EM_UNDERFLOW;
5182         if (fpword & 0x1000) flags |= _EM_INEXACT;
5183         switch (fpword & 0x6000)
5184         {
5185         case 0x6000: flags |= _RC_UP|_RC_DOWN; break;
5186         case 0x4000: flags |= _RC_UP; break;
5187         case 0x2000: flags |= _RC_DOWN; break;
5188         }
5189         switch (fpword & 0x8040)
5190         {
5191         case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5192         case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5193         case 0x8040: flags |= _DN_FLUSH; break;
5194         }
5195
5196         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5197         TRACE("sse2 update cw %08x to %08x\n", flags, *cw);
5198         fpword &= ~0xffc0;
5199         if (*cw & _EM_INVALID) fpword |= 0x80;
5200         if (*cw & _EM_DENORMAL) fpword |= 0x100;
5201         if (*cw & _EM_ZERODIVIDE) fpword |= 0x200;
5202         if (*cw & _EM_OVERFLOW) fpword |= 0x400;
5203         if (*cw & _EM_UNDERFLOW) fpword |= 0x800;
5204         if (*cw & _EM_INEXACT) fpword |= 0x1000;
5205         switch (*cw & _MCW_RC)
5206         {
5207         case _RC_UP|_RC_DOWN: fpword |= 0x6000; break;
5208         case _RC_UP: fpword |= 0x4000; break;
5209         case _RC_DOWN: fpword |= 0x2000; break;
5210         }
5211         switch (*cw & _MCW_DN)
5212         {
5213         case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5214         case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5215         case _DN_FLUSH: fpword |= 0x8040; break;
5216         }
5217
5218         /* clear status word if anything changes */
5219         if (fpword != old_fpword && !sw)
5220         {
5221             TRACE("sse2 clear status word\n");
5222             fpword &= ~0x3f;
5223         }
5224     }
5225
5226     if (fpword != old_fpword)
5227         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5228     return TRUE;
5229 #else
5230     FIXME("not implemented\n");
5231     if (cw) *cw = 0;
5232     if (sw) *sw = 0;
5233     return FALSE;
5234 #endif
5235 }
5236 #endif
5237
5238 /**********************************************************************
5239  *              _statusfp2 (MSVCRT.@)
5240  *
5241  * Not exported by native msvcrt, added in msvcr80.
5242  */
5243 #if defined(__i386__) || defined(__x86_64__)
5244 void CDECL _statusfp2( unsigned int *x86_sw, unsigned int *sse2_sw )
5245 {
5246 #if defined(__GNUC__) || defined(__clang__)
5247     unsigned int flags;
5248     unsigned long fpword;
5249
5250     if (x86_sw)
5251     {
5252         __asm__ __volatile__( "fstsw %0" : "=m" (fpword) );
5253         flags = 0;
5254         if (fpword & 0x1)  flags |= _SW_INVALID;
5255         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5256         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5257         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5258         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5259         if (fpword & 0x20) flags |= _SW_INEXACT;
5260         *x86_sw = flags;
5261     }
5262
5263     if (!sse2_sw) return;
5264
5265     if (sse2_supported)
5266         _setfp_sse(NULL, 0, sse2_sw, 0);
5267     else *sse2_sw = 0;
5268 #else
5269     FIXME( "not implemented\n" );
5270 #endif
5271 }
5272 #endif
5273
5274 /**********************************************************************
5275  *              _statusfp (MSVCRT.@)
5276  */
5277 unsigned int CDECL _statusfp(void)
5278 {
5279     unsigned int flags = 0;
5280 #if defined(__i386__) || defined(__x86_64__)
5281     unsigned int x86_sw, sse2_sw;
5282
5283     _statusfp2( &x86_sw, &sse2_sw );
5284     /* FIXME: there's no definition for ambiguous status, just return all status bits for now */
5285     flags = x86_sw | sse2_sw;
5286 #elif defined(__aarch64__)
5287     ULONG_PTR fpsr;
5288
5289     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5290     if (fpsr & 0x1)  flags |= _SW_INVALID;
5291     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5292     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5293     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5294     if (fpsr & 0x10) flags |= _SW_INEXACT;
5295     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5296 #elif defined(__arm__) && !defined(__SOFTFP__)
5297     DWORD fpscr;
5298
5299     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
5300     if (fpscr & 0x1)  flags |= _SW_INVALID;
5301     if (fpscr & 0x2)  flags |= _SW_ZERODIVIDE;
5302     if (fpscr & 0x4)  flags |= _SW_OVERFLOW;
5303     if (fpscr & 0x8)  flags |= _SW_UNDERFLOW;
5304     if (fpscr & 0x10) flags |= _SW_INEXACT;
5305     if (fpscr & 0x80) flags |= _SW_DENORMAL;
5306 #else
5307     FIXME( "not implemented\n" );
5308 #endif
5309     return flags;
5310 }
5311
5312 /*********************************************************************
5313  *              _clearfp (MSVCRT.@)
5314  */
5315 unsigned int CDECL _clearfp(void)
5316 {
5317     unsigned int flags = 0;
5318 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5319     unsigned long fpword;
5320
5321     __asm__ __volatile__( "fnstsw %0; fnclex" : "=m" (fpword) );
5322     if (fpword & 0x1)  flags |= _SW_INVALID;
5323     if (fpword & 0x2)  flags |= _SW_DENORMAL;
5324     if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5325     if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5326     if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5327     if (fpword & 0x20) flags |= _SW_INEXACT;
5328
5329     if (sse2_supported)
5330     {
5331         unsigned int sse_sw = 0;
5332
5333         _setfp_sse(NULL, 0, &sse_sw, _MCW_EM);
5334         flags |= sse_sw;
5335     }
5336 #elif defined(__aarch64__)
5337     ULONG_PTR fpsr;
5338
5339     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5340     if (fpsr & 0x1)  flags |= _SW_INVALID;
5341     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5342     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5343     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5344     if (fpsr & 0x10) flags |= _SW_INEXACT;
5345     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5346     fpsr &= ~0x9f;
5347     __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
5348 #elif defined(__arm__) && !defined(__SOFTFP__)
5349     DWORD fpscr;
5350
5351     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
5352     if (fpscr & 0x1)  flags |= _SW_INVALID;
5353     if (fpscr & 0x2)  flags |= _SW_ZERODIVIDE;
5354     if (fpscr & 0x4)  flags |= _SW_OVERFLOW;
5355     if (fpscr & 0x8)  flags |= _SW_UNDERFLOW;
5356     if (fpscr & 0x10) flags |= _SW_INEXACT;
5357     if (fpscr & 0x80) flags |= _SW_DENORMAL;
5358     fpscr &= ~0x9f;
5359     __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
5360 #else
5361     FIXME( "not implemented\n" );
5362 #endif
5363     return flags;
5364 }
5365
5366 /*********************************************************************
5367  *              __fpecode (MSVCRT.@)
5368  */
5369 int * CDECL __fpecode(void)
5370 {
5371     return &msvcrt_get_thread_data()->fpecode;
5372 }
5373
5374 /*********************************************************************
5375  *              ldexp (MSVCRT.@)
5376  */
5377 double CDECL ldexp(double num, int exp)
5378 {
5379   double z = __scalbn(num, exp);
5380
5381   if (isfinite(num) && !isfinite(z))
5382     return math_error(_OVERFLOW, "ldexp", num, exp, z);
5383   if (num && isfinite(num) && !z)
5384     return math_error(_UNDERFLOW, "ldexp", num, exp, z);
5385   return z;
5386 }
5387
5388 /*********************************************************************
5389  *              _cabs (MSVCRT.@)
5390  */
5391 double CDECL _cabs(struct _complex num)
5392 {
5393   return sqrt(num.x * num.x + num.y * num.y);
5394 }
5395
5396 /*********************************************************************
5397  *              _chgsign (MSVCRT.@)
5398  */
5399 double CDECL _chgsign(double num)
5400 {
5401     union { double f; UINT64 i; } u = { num };
5402     u.i ^= 1ull << 63;
5403     return u.f;
5404 }
5405
5406 /*********************************************************************
5407  *              __control87_2 (MSVCR80.@)
5408  *
5409  * Not exported by native msvcrt, added in msvcr80.
5410  */
5411 #ifdef __i386__
5412 int CDECL __control87_2( unsigned int newval, unsigned int mask,
5413                          unsigned int *x86_cw, unsigned int *sse2_cw )
5414 {
5415 #if defined(__GNUC__) || defined(__clang__)
5416     unsigned long fpword;
5417     unsigned int flags;
5418
5419     if (x86_cw)
5420     {
5421         __asm__ __volatile__( "fstcw %0" : "=m" (fpword) );
5422
5423         /* Convert into mask constants */
5424         flags = 0;
5425         if (fpword & 0x1)  flags |= _EM_INVALID;
5426         if (fpword & 0x2)  flags |= _EM_DENORMAL;
5427         if (fpword & 0x4)  flags |= _EM_ZERODIVIDE;
5428         if (fpword & 0x8)  flags |= _EM_OVERFLOW;
5429         if (fpword & 0x10) flags |= _EM_UNDERFLOW;
5430         if (fpword & 0x20) flags |= _EM_INEXACT;
5431         switch (fpword & 0xc00)
5432         {
5433         case 0xc00: flags |= _RC_UP|_RC_DOWN; break;
5434         case 0x800: flags |= _RC_UP; break;
5435         case 0x400: flags |= _RC_DOWN; break;
5436         }
5437         switch (fpword & 0x300)
5438         {
5439         case 0x0:   flags |= _PC_24; break;
5440         case 0x200: flags |= _PC_53; break;
5441         case 0x300: flags |= _PC_64; break;
5442         }
5443         if (fpword & 0x1000) flags |= _IC_AFFINE;
5444
5445         TRACE( "x86 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask );
5446         if (mask)
5447         {
5448             flags = (flags & ~mask) | (newval & mask);
5449
5450             /* Convert (masked) value back to fp word */
5451             fpword = 0;
5452             if (flags & _EM_INVALID)    fpword |= 0x1;
5453             if (flags & _EM_DENORMAL)   fpword |= 0x2;
5454             if (flags & _EM_ZERODIVIDE) fpword |= 0x4;
5455             if (flags & _EM_OVERFLOW)   fpword |= 0x8;
5456             if (flags & _EM_UNDERFLOW)  fpword |= 0x10;
5457             if (flags & _EM_INEXACT)    fpword |= 0x20;
5458             switch (flags & _MCW_RC)
5459             {
5460             case _RC_UP|_RC_DOWN:   fpword |= 0xc00; break;
5461             case _RC_UP:            fpword |= 0x800; break;
5462             case _RC_DOWN:          fpword |= 0x400; break;
5463             }
5464             switch (flags & _MCW_PC)
5465             {
5466             case _PC_64: fpword |= 0x300; break;
5467             case _PC_53: fpword |= 0x200; break;
5468             case _PC_24: fpword |= 0x0; break;
5469             }
5470             if (flags & _IC_AFFINE) fpword |= 0x1000;
5471
5472             __asm__ __volatile__( "fldcw %0" : : "m" (fpword) );
5473         }
5474         *x86_cw = flags;
5475     }
5476
5477     if (!sse2_cw) return 1;
5478
5479     if (sse2_supported)
5480     {
5481         *sse2_cw = newval;
5482         if (!_setfp_sse(sse2_cw, mask, NULL, 0))
5483             return 0;
5484     }
5485     else *sse2_cw = 0;
5486
5487     return 1;
5488 #else
5489     FIXME( "not implemented\n" );
5490     return 0;
5491 #endif
5492 }
5493 #endif
5494
5495 /*********************************************************************
5496  *              _control87 (MSVCRT.@)
5497  */
5498 unsigned int CDECL _control87(unsigned int newval, unsigned int mask)
5499 {
5500     unsigned int flags = 0;
5501 #ifdef __i386__
5502     unsigned int sse2_cw;
5503
5504     __control87_2( newval, mask, &flags, &sse2_cw );
5505
5506     if ((flags ^ sse2_cw) & (_MCW_EM | _MCW_RC)) flags |= _EM_AMBIGUOUS;
5507     flags |= sse2_cw;
5508 #elif defined(__x86_64__)
5509     flags = newval;
5510     _setfp_sse(&flags, mask, NULL, 0);
5511 #elif defined(__aarch64__)
5512     ULONG_PTR fpcr;
5513
5514     __asm__ __volatile__( "mrs %0, fpcr" : "=r" (fpcr) );
5515     if (!(fpcr & 0x100))  flags |= _EM_INVALID;
5516     if (!(fpcr & 0x200))  flags |= _EM_ZERODIVIDE;
5517     if (!(fpcr & 0x400))  flags |= _EM_OVERFLOW;
5518     if (!(fpcr & 0x800))  flags |= _EM_UNDERFLOW;
5519     if (!(fpcr & 0x1000)) flags |= _EM_INEXACT;
5520     if (!(fpcr & 0x8000)) flags |= _EM_DENORMAL;
5521     switch (fpcr & 0xc00000)
5522     {
5523     case 0x400000: flags |= _RC_UP; break;
5524     case 0x800000: flags |= _RC_DOWN; break;
5525     case 0xc00000: flags |= _RC_CHOP; break;
5526     }
5527     flags = (flags & ~mask) | (newval & mask);
5528     fpcr &= ~0xc09f00ul;
5529     if (!(flags & _EM_INVALID)) fpcr |= 0x100;
5530     if (!(flags & _EM_ZERODIVIDE)) fpcr |= 0x200;
5531     if (!(flags & _EM_OVERFLOW)) fpcr |= 0x400;
5532     if (!(flags & _EM_UNDERFLOW)) fpcr |= 0x800;
5533     if (!(flags & _EM_INEXACT)) fpcr |= 0x1000;
5534     if (!(flags & _EM_DENORMAL)) fpcr |= 0x8000;
5535     switch (flags & _MCW_RC)
5536     {
5537     case _RC_CHOP: fpcr |= 0xc00000; break;
5538     case _RC_UP:   fpcr |= 0x400000; break;
5539     case _RC_DOWN: fpcr |= 0x800000; break;
5540     }
5541     __asm__ __volatile__( "msr fpcr, %0" :: "r" (fpcr) );
5542 #elif defined(__arm__) && !defined(__SOFTFP__)
5543     DWORD fpscr;
5544
5545     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
5546     if (!(fpscr & 0x100))  flags |= _EM_INVALID;
5547     if (!(fpscr & 0x200))  flags |= _EM_ZERODIVIDE;
5548     if (!(fpscr & 0x400))  flags |= _EM_OVERFLOW;
5549     if (!(fpscr & 0x800))  flags |= _EM_UNDERFLOW;
5550     if (!(fpscr & 0x1000)) flags |= _EM_INEXACT;
5551     if (!(fpscr & 0x8000)) flags |= _EM_DENORMAL;
5552     switch (fpscr & 0xc00000)
5553     {
5554     case 0x400000: flags |= _RC_UP; break;
5555     case 0x800000: flags |= _RC_DOWN; break;
5556     case 0xc00000: flags |= _RC_CHOP; break;
5557     }
5558     flags = (flags & ~mask) | (newval & mask);
5559     fpscr &= ~0xc09f00ul;
5560     if (!(flags & _EM_INVALID))    fpscr |= 0x100;
5561     if (!(flags & _EM_ZERODIVIDE)) fpscr |= 0x200;
5562     if (!(flags & _EM_OVERFLOW))   fpscr |= 0x400;
5563     if (!(flags & _EM_UNDERFLOW))  fpscr |= 0x800;
5564     if (!(flags & _EM_INEXACT))    fpscr |= 0x1000;
5565     if (!(flags & _EM_DENORMAL))   fpscr |= 0x8000;
5566     switch (flags & _MCW_RC)
5567     {
5568     case _RC_CHOP: fpscr |= 0xc00000; break;
5569     case _RC_UP:   fpscr |= 0x400000; break;
5570     case _RC_DOWN: fpscr |= 0x800000; break;
5571     }
5572     __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
5573 #else
5574     FIXME( "not implemented\n" );
5575 #endif
5576     return flags;
5577 }
5578
5579 /*********************************************************************
5580  *              _controlfp (MSVCRT.@)
5581  */
5582 unsigned int CDECL _controlfp(unsigned int newval, unsigned int mask)
5583 {
5584   return _control87( newval, mask & ~_EM_DENORMAL );
5585 }
5586
5587 /*********************************************************************
5588  *              _set_controlfp (MSVCRT.@)
5589  */
5590 void CDECL _set_controlfp( unsigned int newval, unsigned int mask )
5591 {
5592     _controlfp( newval, mask );
5593 }
5594
5595 /*********************************************************************
5596  *              _controlfp_s (MSVCRT.@)
5597  */
5598 int CDECL _controlfp_s(unsigned int *cur, unsigned int newval, unsigned int mask)
5599 {
5600     static const unsigned int all_flags = (_MCW_EM | _MCW_IC | _MCW_RC |
5601                                            _MCW_PC | _MCW_DN);
5602     unsigned int val;
5603
5604     if (!MSVCRT_CHECK_PMT( !(newval & mask & ~all_flags) ))
5605     {
5606         if (cur) *cur = _controlfp( 0, 0 );  /* retrieve it anyway */
5607         return EINVAL;
5608     }
5609     val = _controlfp( newval, mask );
5610     if (cur) *cur = val;
5611     return 0;
5612 }
5613
5614 #if _MSVCR_VER >= 140 && (defined(__i386__) || defined(__x86_64__))
5615 enum fenv_masks
5616 {
5617     FENV_X_INVALID = 0x00100010,
5618     FENV_X_DENORMAL = 0x00200020,
5619     FENV_X_ZERODIVIDE = 0x00080008,
5620     FENV_X_OVERFLOW = 0x00040004,
5621     FENV_X_UNDERFLOW = 0x00020002,
5622     FENV_X_INEXACT = 0x00010001,
5623     FENV_X_AFFINE = 0x00004000,
5624     FENV_X_UP = 0x00800200,
5625     FENV_X_DOWN = 0x00400100,
5626     FENV_X_24 = 0x00002000,
5627     FENV_X_53 = 0x00001000,
5628     FENV_Y_INVALID = 0x10000010,
5629     FENV_Y_DENORMAL = 0x20000020,
5630     FENV_Y_ZERODIVIDE = 0x08000008,
5631     FENV_Y_OVERFLOW = 0x04000004,
5632     FENV_Y_UNDERFLOW = 0x02000002,
5633     FENV_Y_INEXACT = 0x01000001,
5634     FENV_Y_UP = 0x80000200,
5635     FENV_Y_DOWN = 0x40000100,
5636     FENV_Y_FLUSH = 0x00000400,
5637     FENV_Y_FLUSH_SAVE = 0x00000800
5638 };
5639
5640 /* encodes x87/sse control/status word in ulong */
5641 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5642 {
5643     __msvcrt_ulong ret = 0;
5644
5645 #ifdef __i386__
5646     if (x & _EM_INVALID) ret |= FENV_X_INVALID;
5647     if (x & _EM_DENORMAL) ret |= FENV_X_DENORMAL;
5648     if (x & _EM_ZERODIVIDE) ret |= FENV_X_ZERODIVIDE;
5649     if (x & _EM_OVERFLOW) ret |= FENV_X_OVERFLOW;
5650     if (x & _EM_UNDERFLOW) ret |= FENV_X_UNDERFLOW;
5651     if (x & _EM_INEXACT) ret |= FENV_X_INEXACT;
5652     if (x & _IC_AFFINE) ret |= FENV_X_AFFINE;
5653     if (x & _RC_UP) ret |= FENV_X_UP;
5654     if (x & _RC_DOWN) ret |= FENV_X_DOWN;
5655     if (x & _PC_24) ret |= FENV_X_24;
5656     if (x & _PC_53) ret |= FENV_X_53;
5657 #endif
5658     x &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC);
5659
5660     if (y & _EM_INVALID) ret |= FENV_Y_INVALID;
5661     if (y & _EM_DENORMAL) ret |= FENV_Y_DENORMAL;
5662     if (y & _EM_ZERODIVIDE) ret |= FENV_Y_ZERODIVIDE;
5663     if (y & _EM_OVERFLOW) ret |= FENV_Y_OVERFLOW;
5664     if (y & _EM_UNDERFLOW) ret |= FENV_Y_UNDERFLOW;
5665     if (y & _EM_INEXACT) ret |= FENV_Y_INEXACT;
5666     if (y & _RC_UP) ret |= FENV_Y_UP;
5667     if (y & _RC_DOWN) ret |= FENV_Y_DOWN;
5668     if (y & _DN_FLUSH) ret |= FENV_Y_FLUSH;
5669     if (y & _DN_FLUSH_OPERANDS_SAVE_RESULTS) ret |= FENV_Y_FLUSH_SAVE;
5670     y &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_DN);
5671
5672     if(x || y) FIXME("unsupported flags: %x, %x\n", x, y);
5673     return ret;
5674 }
5675
5676 /* decodes x87/sse control/status word, returns FALSE on error */
5677 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5678 {
5679     *x = *y = 0;
5680     if ((enc & FENV_X_INVALID) == FENV_X_INVALID) *x |= _EM_INVALID;
5681     if ((enc & FENV_X_DENORMAL) == FENV_X_DENORMAL) *x |= _EM_DENORMAL;
5682     if ((enc & FENV_X_ZERODIVIDE) == FENV_X_ZERODIVIDE) *x |= _EM_ZERODIVIDE;
5683     if ((enc & FENV_X_OVERFLOW) == FENV_X_OVERFLOW) *x |= _EM_OVERFLOW;
5684     if ((enc & FENV_X_UNDERFLOW) == FENV_X_UNDERFLOW) *x |= _EM_UNDERFLOW;
5685     if ((enc & FENV_X_INEXACT) == FENV_X_INEXACT) *x |= _EM_INEXACT;
5686     if ((enc & FENV_X_AFFINE) == FENV_X_AFFINE) *x |= _IC_AFFINE;
5687     if ((enc & FENV_X_UP) == FENV_X_UP) *x |= _RC_UP;
5688     if ((enc & FENV_X_DOWN) == FENV_X_DOWN) *x |= _RC_DOWN;
5689     if ((enc & FENV_X_24) == FENV_X_24) *x |= _PC_24;
5690     if ((enc & FENV_X_53) == FENV_X_53) *x |= _PC_53;
5691
5692     if ((enc & FENV_Y_INVALID) == FENV_Y_INVALID) *y |= _EM_INVALID;
5693     if ((enc & FENV_Y_DENORMAL) == FENV_Y_DENORMAL) *y |= _EM_DENORMAL;
5694     if ((enc & FENV_Y_ZERODIVIDE) == FENV_Y_ZERODIVIDE) *y |= _EM_ZERODIVIDE;
5695     if ((enc & FENV_Y_OVERFLOW) == FENV_Y_OVERFLOW) *y |= _EM_OVERFLOW;
5696     if ((enc & FENV_Y_UNDERFLOW) == FENV_Y_UNDERFLOW) *y |= _EM_UNDERFLOW;
5697     if ((enc & FENV_Y_INEXACT) == FENV_Y_INEXACT) *y |= _EM_INEXACT;
5698     if ((enc & FENV_Y_UP) == FENV_Y_UP) *y |= _RC_UP;
5699     if ((enc & FENV_Y_DOWN) == FENV_Y_DOWN) *y |= _RC_DOWN;
5700     if ((enc & FENV_Y_FLUSH) == FENV_Y_FLUSH) *y |= _DN_FLUSH;
5701     if ((enc & FENV_Y_FLUSH_SAVE) == FENV_Y_FLUSH_SAVE) *y |= _DN_FLUSH_OPERANDS_SAVE_RESULTS;
5702
5703     if (fenv_encode(*x, *y) != enc)
5704     {
5705         WARN("can't decode: %lx\n", enc);
5706         return FALSE;
5707     }
5708     return TRUE;
5709 }
5710 #elif _MSVCR_VER >= 120
5711 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5712 {
5713     if (y & _EM_DENORMAL)
5714         y = (y & ~_EM_DENORMAL) | 0x20;
5715
5716     return x | y;
5717 }
5718
5719 #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || (defined(__arm__) && !defined(__SOFTFP__))
5720 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5721 {
5722     if (enc & 0x20)
5723         enc = (enc & ~0x20) | _EM_DENORMAL;
5724
5725     *x = *y = enc;
5726     return TRUE;
5727 }
5728 #endif
5729 #endif
5730
5731 #if _MSVCR_VER>=120
5732 /*********************************************************************
5733  *              fegetenv (MSVCR120.@)
5734  */
5735 int CDECL fegetenv(fenv_t *env)
5736 {
5737 #if _MSVCR_VER>=140 && defined(__i386__)
5738     unsigned int x87, sse;
5739     __control87_2(0, 0, &x87, &sse);
5740     env->_Fe_ctl = fenv_encode(x87, sse);
5741     _statusfp2(&x87, &sse);
5742     env->_Fe_stat = fenv_encode(x87, sse);
5743 #elif _MSVCR_VER>=140
5744     env->_Fe_ctl = fenv_encode(0, _control87(0, 0));
5745     env->_Fe_stat = fenv_encode(0, _statusfp());
5746 #else
5747     env->_Fe_ctl = _controlfp(0, 0) & (_EM_INEXACT | _EM_UNDERFLOW |
5748             _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _RC_CHOP);
5749     env->_Fe_stat = _statusfp();
5750 #endif
5751     return 0;
5752 }
5753
5754 /*********************************************************************
5755  *              feupdateenv (MSVCR120.@)
5756  */
5757 int CDECL feupdateenv(const fenv_t *env)
5758 {
5759     fenv_t set;
5760     fegetenv(&set);
5761     set._Fe_ctl = env->_Fe_ctl;
5762     set._Fe_stat |= env->_Fe_stat;
5763     return fesetenv(&set);
5764 }
5765
5766 /*********************************************************************
5767  *      fetestexcept (MSVCR120.@)
5768  */
5769 int CDECL fetestexcept(int flags)
5770 {
5771     return _statusfp() & flags;
5772 }
5773
5774 /*********************************************************************
5775  *      fesetexceptflag (MSVCR120.@)
5776  */
5777 int CDECL fesetexceptflag(const fexcept_t *status, int excepts)
5778 {
5779     fenv_t env;
5780
5781     excepts &= FE_ALL_EXCEPT;
5782     if(!excepts)
5783         return 0;
5784
5785     fegetenv(&env);
5786     env._Fe_stat &= ~fenv_encode(excepts, excepts);
5787     env._Fe_stat |= *status & fenv_encode(excepts, excepts);
5788     return fesetenv(&env);
5789 }
5790
5791 /*********************************************************************
5792  *      feraiseexcept (MSVCR120.@)
5793  */
5794 int CDECL feraiseexcept(int flags)
5795 {
5796     fenv_t env;
5797
5798     flags &= FE_ALL_EXCEPT;
5799     fegetenv(&env);
5800     env._Fe_stat |= fenv_encode(flags, flags);
5801     return fesetenv(&env);
5802 }
5803
5804 /*********************************************************************
5805  *      feclearexcept (MSVCR120.@)
5806  */
5807 int CDECL feclearexcept(int flags)
5808 {
5809     fenv_t env;
5810
5811     fegetenv(&env);
5812     flags &= FE_ALL_EXCEPT;
5813     env._Fe_stat &= ~fenv_encode(flags, flags);
5814     return fesetenv(&env);
5815 }
5816
5817 /*********************************************************************
5818  *      fegetexceptflag (MSVCR120.@)
5819  */
5820 int CDECL fegetexceptflag(fexcept_t *status, int excepts)
5821 {
5822 #if _MSVCR_VER>=140 && defined(__i386__)
5823     unsigned int x87, sse;
5824     _statusfp2(&x87, &sse);
5825     *status = fenv_encode(x87 & excepts, sse & excepts);
5826 #else
5827     *status = fenv_encode(0, _statusfp() & excepts);
5828 #endif
5829     return 0;
5830 }
5831 #endif
5832
5833 #if _MSVCR_VER>=140
5834 /*********************************************************************
5835  *              __fpe_flt_rounds (UCRTBASE.@)
5836  */
5837 int CDECL __fpe_flt_rounds(void)
5838 {
5839     unsigned int fpc = _controlfp(0, 0) & _RC_CHOP;
5840
5841     TRACE("()\n");
5842
5843     switch(fpc) {
5844         case _RC_CHOP: return 0;
5845         case _RC_NEAR: return 1;
5846         case _RC_UP: return 2;
5847         default: return 3;
5848     }
5849 }
5850 #endif
5851
5852 #if _MSVCR_VER>=120
5853
5854 /*********************************************************************
5855  *              fegetround (MSVCR120.@)
5856  */
5857 int CDECL fegetround(void)
5858 {
5859     return _controlfp(0, 0) & _MCW_RC;
5860 }
5861
5862 /*********************************************************************
5863  *              fesetround (MSVCR120.@)
5864  */
5865 int CDECL fesetround(int round_mode)
5866 {
5867     if (round_mode & (~_MCW_RC))
5868         return 1;
5869     _controlfp(round_mode, _MCW_RC);
5870     return 0;
5871 }
5872
5873 #endif /* _MSVCR_VER>=120 */
5874
5875 /*********************************************************************
5876  *              _copysign (MSVCRT.@)
5877  *
5878  * Copied from musl: src/math/copysign.c
5879  */
5880 double CDECL _copysign( double x, double y )
5881 {
5882     union { double f; UINT64 i; } ux = { x }, uy = { y };
5883     ux.i &= ~0ull >> 1;
5884     ux.i |= uy.i & 1ull << 63;
5885     return ux.f;
5886 }
5887
5888 /*********************************************************************
5889  *              _finite (MSVCRT.@)
5890  */
5891 int CDECL _finite(double num)
5892 {
5893     union { double f; UINT64 i; } u = { num };
5894     return (u.i & ~0ull >> 1) < 0x7ffull << 52;
5895 }
5896
5897 /*********************************************************************
5898  *              _fpreset (MSVCRT.@)
5899  */
5900 void CDECL _fpreset(void)
5901 {
5902 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5903     const unsigned int x86_cw = 0x27f;
5904     __asm__ __volatile__( "fninit; fldcw %0" : : "m" (x86_cw) );
5905     if (sse2_supported)
5906     {
5907         unsigned int cw = _MCW_EM, sw = 0;
5908         _setfp_sse(&cw, ~0, &sw, ~0);
5909     }
5910 #else
5911     FIXME( "not implemented\n" );
5912 #endif
5913 }
5914
5915 #if _MSVCR_VER>=120
5916 /*********************************************************************
5917  *              fesetenv (MSVCR120.@)
5918  */
5919 int CDECL fesetenv(const fenv_t *env)
5920 {
5921 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5922     unsigned int x87_cw, sse_cw, x87_stat, sse_stat;
5923 #ifdef __i386__
5924     struct {
5925         WORD control_word;
5926         WORD unused1;
5927         WORD status_word;
5928         WORD unused2;
5929         WORD tag_word;
5930         WORD unused3;
5931         DWORD instruction_pointer;
5932         WORD code_segment;
5933         WORD unused4;
5934         DWORD operand_addr;
5935         WORD data_segment;
5936         WORD unused5;
5937     } fenv;
5938 #endif
5939
5940     TRACE( "(%p)\n", env );
5941
5942     if (!env->_Fe_ctl && !env->_Fe_stat) {
5943         _fpreset();
5944         return 0;
5945     }
5946
5947     if (!fenv_decode(env->_Fe_ctl, &x87_cw, &sse_cw))
5948         return 1;
5949     if (!fenv_decode(env->_Fe_stat, &x87_stat, &sse_stat))
5950         return 1;
5951
5952 #ifdef __i386__
5953     __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) );
5954
5955     fenv.control_word &= ~0xc3d;
5956 #if _MSVCR_VER>=140
5957     fenv.control_word &= ~0x1302;
5958 #endif
5959     if (x87_cw & _EM_INVALID) fenv.control_word |= 0x1;
5960     if (x87_cw & _EM_ZERODIVIDE) fenv.control_word |= 0x4;
5961     if (x87_cw & _EM_OVERFLOW) fenv.control_word |= 0x8;
5962     if (x87_cw & _EM_UNDERFLOW) fenv.control_word |= 0x10;
5963     if (x87_cw & _EM_INEXACT) fenv.control_word |= 0x20;
5964     switch (x87_cw & _MCW_RC)
5965     {
5966         case _RC_UP|_RC_DOWN:   fenv.control_word |= 0xc00; break;
5967         case _RC_UP:            fenv.control_word |= 0x800; break;
5968         case _RC_DOWN:          fenv.control_word |= 0x400; break;
5969     }
5970 #if _MSVCR_VER>=140
5971     if (x87_cw & _EM_DENORMAL) fenv.control_word |= 0x2;
5972     switch (x87_cw & _MCW_PC)
5973     {
5974         case _PC_64: fenv.control_word |= 0x300; break;
5975         case _PC_53: fenv.control_word |= 0x200; break;
5976         case _PC_24: fenv.control_word |= 0x0; break;
5977     }
5978     if (x87_cw & _IC_AFFINE) fenv.control_word |= 0x1000;
5979 #endif
5980
5981     fenv.status_word &= ~0x3f;
5982     if (x87_stat & _SW_INVALID) fenv.status_word |= 0x1;
5983     if (x87_stat & _SW_DENORMAL) fenv.status_word |= 0x2;
5984     if (x87_stat & _SW_ZERODIVIDE) fenv.status_word |= 0x4;
5985     if (x87_stat & _SW_OVERFLOW) fenv.status_word |= 0x8;
5986     if (x87_stat & _SW_UNDERFLOW) fenv.status_word |= 0x10;
5987     if (x87_stat & _SW_INEXACT) fenv.status_word |= 0x20;
5988
5989     __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)",
5990             "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
5991 #endif
5992
5993     if (sse2_supported)
5994     {
5995         if(!_setfp_sse(&sse_cw, _MSVCR_VER>=140 ? ~0 :
5996                     ~_EM_DENORMAL & (_MCW_EM | _MCW_RC), &sse_stat, _MCW_EM))
5997             return 1;
5998     }
5999
6000     return 0;
6001 #elif defined(__aarch64__)
6002     ULONG_PTR fpsr;
6003     unsigned int tmp, fp_cw, fp_stat;
6004
6005     if (!env->_Fe_ctl && !env->_Fe_stat) {
6006         _fpreset();
6007         return 0;
6008     }
6009
6010     if (!fenv_decode(env->_Fe_ctl, &tmp, &fp_cw))
6011         return 1;
6012     if (!fenv_decode(env->_Fe_stat, &tmp, &fp_stat))
6013         return 1;
6014
6015     _control87(_MCW_EM, _MCW_EM);
6016     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
6017     fpsr &= ~0x9f;
6018     if (fp_stat & _SW_INVALID)    fpsr |= 0x1;
6019     if (fp_stat & _SW_ZERODIVIDE) fpsr |= 0x2;
6020     if (fp_stat & _SW_OVERFLOW)   fpsr |= 0x4;
6021     if (fp_stat & _SW_UNDERFLOW)  fpsr |= 0x8;
6022     if (fp_stat & _SW_INEXACT)    fpsr |= 0x10;
6023     if (fp_stat & _SW_DENORMAL)   fpsr |= 0x80;
6024     __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
6025     _control87(fp_cw, 0xffffffff);
6026     return 0;
6027 #elif defined(__arm__) && !defined(__SOFTFP__)
6028     DWORD fpscr;
6029     unsigned int tmp, fp_cw, fp_stat;
6030
6031     if (!env->_Fe_ctl && !env->_Fe_stat) {
6032         _fpreset();
6033         return 0;
6034     }
6035
6036     if (!fenv_decode(env->_Fe_ctl, &tmp, &fp_cw))
6037         return 1;
6038     if (!fenv_decode(env->_Fe_stat, &tmp, &fp_stat))
6039         return 1;
6040
6041     _control87(_MCW_EM, _MCW_EM);
6042     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
6043     fpscr &= ~0x9f;
6044     if (fp_stat & _SW_INVALID)    fpscr |= 0x1;
6045     if (fp_stat & _SW_ZERODIVIDE) fpscr |= 0x2;
6046     if (fp_stat & _SW_OVERFLOW)   fpscr |= 0x4;
6047     if (fp_stat & _SW_UNDERFLOW)  fpscr |= 0x8;
6048     if (fp_stat & _SW_INEXACT)    fpscr |= 0x10;
6049     if (fp_stat & _SW_DENORMAL)   fpscr |= 0x80;
6050     __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
6051     _control87(fp_cw, 0xffffffff);
6052     return 0;
6053 #else
6054     FIXME( "not implemented\n" );
6055 #endif
6056     return 1;
6057 }
6058 #endif
6059
6060 /*********************************************************************
6061  *              _isnan (MSVCRT.@)
6062  */
6063 int CDECL _isnan(double num)
6064 {
6065     union { double f; UINT64 i; } u = { num };
6066     return (u.i & ~0ull >> 1) > 0x7ffull << 52;
6067 }
6068
6069 static double pzero(double x)
6070 {
6071     static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6072         0.00000000000000000000e+00,
6073         -7.03124999999900357484e-02,
6074         -8.08167041275349795626e+00,
6075         -2.57063105679704847262e+02,
6076         -2.48521641009428822144e+03,
6077         -5.25304380490729545272e+03,
6078     }, pS8[5] = {
6079         1.16534364619668181717e+02,
6080         3.83374475364121826715e+03,
6081         4.05978572648472545552e+04,
6082         1.16752972564375915681e+05,
6083         4.76277284146730962675e+04,
6084     }, pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6085         -1.14125464691894502584e-11,
6086         -7.03124940873599280078e-02,
6087         -4.15961064470587782438e+00,
6088         -6.76747652265167261021e+01,
6089         -3.31231299649172967747e+02,
6090         -3.46433388365604912451e+02,
6091     }, pS5[5] = {
6092         6.07539382692300335975e+01,
6093         1.05125230595704579173e+03,
6094         5.97897094333855784498e+03,
6095         9.62544514357774460223e+03,
6096         2.40605815922939109441e+03,
6097     }, pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6098         -2.54704601771951915620e-09,
6099         -7.03119616381481654654e-02,
6100         -2.40903221549529611423e+00,
6101         -2.19659774734883086467e+01,
6102         -5.80791704701737572236e+01,
6103         -3.14479470594888503854e+01,
6104     }, pS3[5] = {
6105         3.58560338055209726349e+01,
6106         3.61513983050303863820e+02,
6107         1.19360783792111533330e+03,
6108         1.12799679856907414432e+03,
6109         1.73580930813335754692e+02,
6110     }, pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6111         -8.87534333032526411254e-08,
6112         -7.03030995483624743247e-02,
6113         -1.45073846780952986357e+00,
6114         -7.63569613823527770791e+00,
6115         -1.11931668860356747786e+01,
6116         -3.23364579351335335033e+00,
6117     }, pS2[5] = {
6118         2.22202997532088808441e+01,
6119         1.36206794218215208048e+02,
6120         2.70470278658083486789e+02,
6121         1.53875394208320329881e+02,
6122         1.46576176948256193810e+01,
6123     };
6124
6125     const double *p, *q;
6126     double z, r, s;
6127     UINT32 ix;
6128
6129     ix = *(ULONGLONG*)&x >> 32;
6130     ix &= 0x7fffffff;
6131     if (ix >= 0x40200000) {
6132         p = pR8;
6133         q = pS8;
6134     } else if (ix >= 0x40122E8B) {
6135         p = pR5;
6136         q = pS5;
6137     } else if (ix >= 0x4006DB6D) {
6138         p = pR3;
6139         q = pS3;
6140     } else /*ix >= 0x40000000*/ {
6141         p = pR2;
6142         q = pS2;
6143     }
6144
6145     z = 1.0 / (x * x);
6146     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6147     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6148     return 1.0 + r / s;
6149 }
6150
6151 static double qzero(double x)
6152 {
6153     static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6154         0.00000000000000000000e+00,
6155         7.32421874999935051953e-02,
6156         1.17682064682252693899e+01,
6157         5.57673380256401856059e+02,
6158         8.85919720756468632317e+03,
6159         3.70146267776887834771e+04,
6160     }, qS8[6] = {
6161         1.63776026895689824414e+02,
6162         8.09834494656449805916e+03,
6163         1.42538291419120476348e+05,
6164         8.03309257119514397345e+05,
6165         8.40501579819060512818e+05,
6166         -3.43899293537866615225e+05,
6167     }, qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6168         1.84085963594515531381e-11,
6169         7.32421766612684765896e-02,
6170         5.83563508962056953777e+00,
6171         1.35111577286449829671e+02,
6172         1.02724376596164097464e+03,
6173         1.98997785864605384631e+03,
6174     }, qS5[6] = {
6175         8.27766102236537761883e+01,
6176         2.07781416421392987104e+03,
6177         1.88472887785718085070e+04,
6178         5.67511122894947329769e+04,
6179         3.59767538425114471465e+04,
6180         -5.35434275601944773371e+03,
6181     }, qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6182         4.37741014089738620906e-09,
6183         7.32411180042911447163e-02,
6184         3.34423137516170720929e+00,
6185         4.26218440745412650017e+01,
6186         1.70808091340565596283e+02,
6187         1.66733948696651168575e+02,
6188     }, qS3[6] = {
6189         4.87588729724587182091e+01,
6190         7.09689221056606015736e+02,
6191         3.70414822620111362994e+03,
6192         6.46042516752568917582e+03,
6193         2.51633368920368957333e+03,
6194         -1.49247451836156386662e+02,
6195     }, qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6196         1.50444444886983272379e-07,
6197         7.32234265963079278272e-02,
6198         1.99819174093815998816e+00,
6199         1.44956029347885735348e+01,
6200         3.16662317504781540833e+01,
6201         1.62527075710929267416e+01,
6202     }, qS2[6] = {
6203         3.03655848355219184498e+01,
6204         2.69348118608049844624e+02,
6205         8.44783757595320139444e+02,
6206         8.82935845112488550512e+02,
6207         2.12666388511798828631e+02,
6208         -5.31095493882666946917e+00,
6209     };
6210
6211     const double *p, *q;
6212     double s, r, z;
6213     unsigned int ix;
6214
6215     ix = *(ULONGLONG*)&x >> 32;
6216     ix &= 0x7fffffff;
6217     if (ix >= 0x40200000) {
6218         p = qR8;
6219         q = qS8;
6220     } else if (ix >= 0x40122E8B) {
6221         p = qR5;
6222         q = qS5;
6223     } else if (ix >= 0x4006DB6D) {
6224         p = qR3;
6225         q = qS3;
6226     } else /*ix >= 0x40000000*/ {
6227         p = qR2;
6228         q = qS2;
6229     }
6230
6231     z = 1.0 / (x * x);
6232     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6233     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6234     return (-0.125 + r / s) / x;
6235 }
6236
6237 /* j0 and y0 approximation for |x|>=2 */
6238 static double j0_y0_approx(unsigned int ix, double x, BOOL y0)
6239 {
6240     static const double invsqrtpi = 5.64189583547756279280e-01;
6241
6242     double s, c, ss, cc, z;
6243
6244     s = sin(x);
6245     c = cos(x);
6246     if (y0) c = -c;
6247     cc = s + c;
6248     /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
6249     if (ix < 0x7fe00000) {
6250         ss = s - c;
6251         z = -cos(2 * x);
6252         if (s * c < 0) cc = z / ss;
6253         else ss = z / cc;
6254         if (ix < 0x48000000) {
6255             if (y0) ss = -ss;
6256             cc = pzero(x) * cc - qzero(x) * ss;
6257         }
6258     }
6259     return invsqrtpi * cc / sqrt(x);
6260 }
6261
6262 /*********************************************************************
6263  *              _j0 (MSVCRT.@)
6264  *
6265  * Copied from musl: src/math/j0.c
6266  */
6267 double CDECL _j0(double x)
6268 {
6269     static const double R02 =  1.56249999999999947958e-02,
6270             R03 = -1.89979294238854721751e-04,
6271             R04 =  1.82954049532700665670e-06,
6272             R05 = -4.61832688532103189199e-09,
6273             S01 =  1.56191029464890010492e-02,
6274             S02 =  1.16926784663337450260e-04,
6275             S03 =  5.13546550207318111446e-07,
6276             S04 =  1.16614003333790000205e-09;
6277
6278     double z, r, s;
6279     unsigned int ix;
6280
6281     ix = *(ULONGLONG*)&x >> 32;
6282     ix &= 0x7fffffff;
6283
6284     /* j0(+-inf)=0, j0(nan)=nan */
6285     if (ix >= 0x7ff00000)
6286         return math_error(_DOMAIN, "_j0", x, 0, 1 / (x * x));
6287     x = fabs(x);
6288
6289     if (ix >= 0x40000000) {  /* |x| >= 2 */
6290         /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
6291         return j0_y0_approx(ix, x, FALSE);
6292     }
6293
6294     if (ix >= 0x3f200000) {  /* |x| >= 2**-13 */
6295         /* up to 4ulp error close to 2 */
6296         z = x * x;
6297         r = z * (R02 + z * (R03 + z * (R04 + z * R05)));
6298         s = 1 + z * (S01 + z * (S02 + z * (S03 + z * S04)));
6299         return (1 + x / 2) * (1 - x / 2) + z * (r / s);
6300     }
6301
6302     /* 1 - x*x/4 */
6303     /* prevent underflow */
6304     /* inexact should be raised when x!=0, this is not done correctly */
6305     if (ix >= 0x38000000)  /* |x| >= 2**-127 */
6306         x = 0.25 * x * x;
6307     return 1 - x;
6308 }
6309
6310 static double pone(double x)
6311 {
6312     static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6313         0.00000000000000000000e+00,
6314         1.17187499999988647970e-01,
6315         1.32394806593073575129e+01,
6316         4.12051854307378562225e+02,
6317         3.87474538913960532227e+03,
6318         7.91447954031891731574e+03,
6319     }, ps8[5] = {
6320         1.14207370375678408436e+02,
6321         3.65093083420853463394e+03,
6322         3.69562060269033463555e+04,
6323         9.76027935934950801311e+04,
6324         3.08042720627888811578e+04,
6325     }, pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6326         1.31990519556243522749e-11,
6327         1.17187493190614097638e-01,
6328         6.80275127868432871736e+00,
6329         1.08308182990189109773e+02,
6330         5.17636139533199752805e+02,
6331         5.28715201363337541807e+02,
6332     }, ps5[5] = {
6333         5.92805987221131331921e+01,
6334         9.91401418733614377743e+02,
6335         5.35326695291487976647e+03,
6336         7.84469031749551231769e+03,
6337         1.50404688810361062679e+03,
6338     }, pr3[6] = {
6339         3.02503916137373618024e-09,
6340         1.17186865567253592491e-01,
6341         3.93297750033315640650e+00,
6342         3.51194035591636932736e+01,
6343         9.10550110750781271918e+01,
6344         4.85590685197364919645e+01,
6345     }, ps3[5] = {
6346         3.47913095001251519989e+01,
6347         3.36762458747825746741e+02,
6348         1.04687139975775130551e+03,
6349         8.90811346398256432622e+02,
6350         1.03787932439639277504e+02,
6351     }, pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6352         1.07710830106873743082e-07,
6353         1.17176219462683348094e-01,
6354         2.36851496667608785174e+00,
6355         1.22426109148261232917e+01,
6356         1.76939711271687727390e+01,
6357         5.07352312588818499250e+00,
6358     }, ps2[5] = {
6359         2.14364859363821409488e+01,
6360         1.25290227168402751090e+02,
6361         2.32276469057162813669e+02,
6362         1.17679373287147100768e+02,
6363         8.36463893371618283368e+00,
6364     };
6365
6366     const double *p, *q;
6367     double z, r, s;
6368     unsigned int ix;
6369
6370     ix = *(ULONGLONG*)&x >> 32;
6371     ix &= 0x7fffffff;
6372     if (ix >= 0x40200000) {
6373         p = pr8;
6374         q = ps8;
6375     } else if (ix >= 0x40122E8B) {
6376         p = pr5;
6377         q = ps5;
6378     } else if (ix >= 0x4006DB6D) {
6379         p = pr3;
6380         q = ps3;
6381     } else /*ix >= 0x40000000*/ {
6382         p = pr2;
6383         q = ps2;
6384     }
6385     z = 1.0 / (x * x);
6386     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6387     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6388     return 1.0 + r / s;
6389 }
6390
6391 static double qone(double x)
6392 {
6393     static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6394         0.00000000000000000000e+00,
6395         -1.02539062499992714161e-01,
6396         -1.62717534544589987888e+01,
6397         -7.59601722513950107896e+02,
6398         -1.18498066702429587167e+04,
6399         -4.84385124285750353010e+04,
6400     }, qs8[6] = {
6401         1.61395369700722909556e+02,
6402         7.82538599923348465381e+03,
6403         1.33875336287249578163e+05,
6404         7.19657723683240939863e+05,
6405         6.66601232617776375264e+05,
6406         -2.94490264303834643215e+05,
6407     }, qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6408         -2.08979931141764104297e-11,
6409         -1.02539050241375426231e-01,
6410         -8.05644828123936029840e+00,
6411         -1.83669607474888380239e+02,
6412         -1.37319376065508163265e+03,
6413         -2.61244440453215656817e+03,
6414     }, qs5[6] = {
6415         8.12765501384335777857e+01,
6416         1.99179873460485964642e+03,
6417         1.74684851924908907677e+04,
6418         4.98514270910352279316e+04,
6419         2.79480751638918118260e+04,
6420         -4.71918354795128470869e+03,
6421     }, qr3[6] = {
6422         -5.07831226461766561369e-09,
6423         -1.02537829820837089745e-01,
6424         -4.61011581139473403113e+00,
6425         -5.78472216562783643212e+01,
6426         -2.28244540737631695038e+02,
6427         -2.19210128478909325622e+02,
6428     }, qs3[6] = {
6429         4.76651550323729509273e+01,
6430         6.73865112676699709482e+02,
6431         3.38015286679526343505e+03,
6432         5.54772909720722782367e+03,
6433         1.90311919338810798763e+03,
6434         -1.35201191444307340817e+02,
6435     }, qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6436         -1.78381727510958865572e-07,
6437         -1.02517042607985553460e-01,
6438         -2.75220568278187460720e+00,
6439         -1.96636162643703720221e+01,
6440         -4.23253133372830490089e+01,
6441         -2.13719211703704061733e+01,
6442     }, qs2[6] = {
6443         2.95333629060523854548e+01,
6444         2.52981549982190529136e+02,
6445         7.57502834868645436472e+02,
6446         7.39393205320467245656e+02,
6447         1.55949003336666123687e+02,
6448         -4.95949898822628210127e+00,
6449     };
6450
6451     const double *p, *q;
6452     double s, r, z;
6453     unsigned int ix;
6454
6455     ix = *(ULONGLONG*)&x >> 32;
6456     ix &= 0x7fffffff;
6457     if (ix >= 0x40200000) {
6458         p = qr8;
6459         q = qs8;
6460     } else if (ix >= 0x40122E8B) {
6461         p = qr5;
6462         q = qs5;
6463     } else if (ix >= 0x4006DB6D) {
6464         p = qr3;
6465         q = qs3;
6466     } else /*ix >= 0x40000000*/ {
6467         p = qr2;
6468         q = qs2;
6469     }
6470     z = 1.0 / (x * x);
6471     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6472     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6473     return (0.375 + r / s) / x;
6474 }
6475
6476 static double j1_y1_approx(unsigned int ix, double x, BOOL y1, int sign)
6477 {
6478     static const double invsqrtpi = 5.64189583547756279280e-01;
6479
6480     double z, s, c, ss, cc;
6481
6482     s = sin(x);
6483     if (y1) s = -s;
6484     c = cos(x);
6485     cc = s - c;
6486     if (ix < 0x7fe00000) {
6487         ss = -s - c;
6488         z = cos(2 * x);
6489         if (s * c > 0) cc = z / ss;
6490         else ss = z / cc;
6491         if (ix < 0x48000000) {
6492             if (y1)
6493                 ss = -ss;
6494             cc = pone(x) * cc - qone(x) * ss;
6495         }
6496     }
6497     if (sign)
6498         cc = -cc;
6499     return invsqrtpi * cc / sqrt(x);
6500 }
6501
6502 /*********************************************************************
6503  *              _j1 (MSVCRT.@)
6504  *
6505  * Copied from musl: src/math/j1.c
6506  */
6507 double CDECL _j1(double x)
6508 {
6509     static const double r00 = -6.25000000000000000000e-02,
6510         r01 =  1.40705666955189706048e-03,
6511         r02 = -1.59955631084035597520e-05,
6512         r03 =  4.96727999609584448412e-08,
6513         s01 =  1.91537599538363460805e-02,
6514         s02 =  1.85946785588630915560e-04,
6515         s03 =  1.17718464042623683263e-06,
6516         s04 =  5.04636257076217042715e-09,
6517         s05 =  1.23542274426137913908e-11;
6518
6519     double z, r, s;
6520     unsigned int ix;
6521     int sign;
6522
6523     ix = *(ULONGLONG*)&x >> 32;
6524     sign = ix >> 31;
6525     ix &= 0x7fffffff;
6526     if (ix >= 0x7ff00000)
6527         return math_error(isnan(x) ? 0 : _DOMAIN, "_j1", x, 0, 1 / (x * x));
6528     if (ix >= 0x40000000)  /* |x| >= 2 */
6529         return j1_y1_approx(ix, fabs(x), FALSE, sign);
6530     if (ix >= 0x38000000) {  /* |x| >= 2**-127 */
6531         z = x * x;
6532         r = z * (r00 + z * (r01 + z * (r02 + z * r03)));
6533         s = 1 + z * (s01 + z * (s02 + z * (s03 + z * (s04 + z * s05))));
6534         z = r / s;
6535     } else {
6536         /* avoid underflow, raise inexact if x!=0 */
6537         z = x;
6538     }
6539     return (0.5 + z) * x;
6540 }
6541
6542 /*********************************************************************
6543  *              _jn (MSVCRT.@)
6544  *
6545  * Copied from musl: src/math/jn.c
6546  */
6547 double CDECL _jn(int n, double x)
6548 {
6549     static const double invsqrtpi = 5.64189583547756279280e-01;
6550
6551     unsigned int ix, lx;
6552     int nm1, i, sign;
6553     double a, b, temp;
6554
6555     ix = *(ULONGLONG*)&x >> 32;
6556     lx = *(ULONGLONG*)&x;
6557     sign = ix >> 31;
6558     ix &= 0x7fffffff;
6559
6560     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6561         return x;
6562
6563     if (n == 0)
6564         return _j0(x);
6565     if (n < 0) {
6566         nm1 = -(n + 1);
6567         x = -x;
6568         sign ^= 1;
6569     } else {
6570         nm1 = n-1;
6571     }
6572     if (nm1 == 0)
6573         return j1(x);
6574
6575     sign &= n;  /* even n: 0, odd n: signbit(x) */
6576     x = fabs(x);
6577     if ((ix | lx) == 0 || ix == 0x7ff00000)  /* if x is 0 or inf */
6578         b = 0.0;
6579     else if (nm1 < x) {
6580         if (ix >= 0x52d00000) { /* x > 2**302 */
6581             switch(nm1 & 3) {
6582             case 0:
6583                 temp = -cos(x) + sin(x);
6584                 break;
6585             case 1:
6586                 temp = -cos(x) - sin(x);
6587                 break;
6588             case 2:
6589                 temp =  cos(x) - sin(x);
6590                 break;
6591             default:
6592                 temp =  cos(x) + sin(x);
6593                 break;
6594             }
6595             b = invsqrtpi * temp / sqrt(x);
6596         } else {
6597             a = _j0(x);
6598             b = _j1(x);
6599             for (i = 0; i < nm1; ) {
6600                 i++;
6601                 temp = b;
6602                 b = b * (2.0 * i / x) - a; /* avoid underflow */
6603                 a = temp;
6604             }
6605         }
6606     } else {
6607         if (ix < 0x3e100000) { /* x < 2**-29 */
6608             if (nm1 > 32)  /* underflow */
6609                 b = 0.0;
6610             else {
6611                 temp = x * 0.5;
6612                 b = temp;
6613                 a = 1.0;
6614                 for (i = 2; i <= nm1 + 1; i++) {
6615                     a *= (double)i; /* a = n! */
6616                     b *= temp;      /* b = (x/2)^n */
6617                 }
6618                 b = b / a;
6619             }
6620         } else {
6621             double t, q0, q1, w, h, z, tmp, nf;
6622             int k;
6623
6624             nf = nm1 + 1.0;
6625             w = 2 * nf / x;
6626             h = 2 / x;
6627             z = w + h;
6628             q0 = w;
6629             q1 = w * z - 1.0;
6630             k = 1;
6631             while (q1 < 1.0e9) {
6632                 k += 1;
6633                 z += h;
6634                 tmp = z * q1 - q0;
6635                 q0 = q1;
6636                 q1 = tmp;
6637             }
6638             for (t = 0.0, i = k; i >= 0; i--)
6639                 t = 1 / (2 * (i + nf) / x - t);
6640             a = t;
6641             b = 1.0;
6642             tmp = nf * log(fabs(w));
6643             if (tmp < 7.09782712893383973096e+02) {
6644                 for (i = nm1; i > 0; i--) {
6645                     temp = b;
6646                     b = b * (2.0 * i) / x - a;
6647                     a = temp;
6648                 }
6649             } else {
6650                 for (i = nm1; i > 0; i--) {
6651                     temp = b;
6652                     b = b * (2.0 * i) / x - a;
6653                     a = temp;
6654                     /* scale b to avoid spurious overflow */
6655                     if (b > 0x1p500) {
6656                         a /= b;
6657                         t /= b;
6658                         b  = 1.0;
6659                     }
6660                 }
6661             }
6662             z = j0(x);
6663             w = j1(x);
6664             if (fabs(z) >= fabs(w))
6665                 b = t * z / b;
6666             else
6667                 b = t * w / a;
6668         }
6669     }
6670     return sign ? -b : b;
6671 }
6672
6673 /*********************************************************************
6674  *              _y0 (MSVCRT.@)
6675  */
6676 double CDECL _y0(double x)
6677 {
6678     static const double tpi = 6.36619772367581382433e-01,
6679         u00  = -7.38042951086872317523e-02,
6680         u01  =  1.76666452509181115538e-01,
6681         u02  = -1.38185671945596898896e-02,
6682         u03  =  3.47453432093683650238e-04,
6683         u04  = -3.81407053724364161125e-06,
6684         u05  =  1.95590137035022920206e-08,
6685         u06  = -3.98205194132103398453e-11,
6686         v01  =  1.27304834834123699328e-02,
6687         v02  =  7.60068627350353253702e-05,
6688         v03  =  2.59150851840457805467e-07,
6689         v04  =  4.41110311332675467403e-10;
6690
6691     double z, u, v;
6692     unsigned int ix, lx;
6693
6694     ix = *(ULONGLONG*)&x >> 32;
6695     lx = *(ULONGLONG*)&x;
6696
6697     /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
6698     if ((ix << 1 | lx) == 0)
6699         return math_error(_OVERFLOW, "_y0", x, 0, -INFINITY);
6700     if (isnan(x))
6701         return x;
6702     if (ix >> 31)
6703         return math_error(_DOMAIN, "_y0", x, 0, 0 / (x - x));
6704     if (ix >= 0x7ff00000)
6705         return 1 / x;
6706
6707     if (ix >= 0x40000000) {  /* x >= 2 */
6708         /* large ulp errors near zeros: 3.958, 7.086,.. */
6709         return j0_y0_approx(ix, x, TRUE);
6710     }
6711
6712     if (ix >= 0x3e400000) {  /* x >= 2**-27 */
6713         /* large ulp error near the first zero, x ~= 0.89 */
6714         z = x * x;
6715         u = u00 + z * (u01 + z * (u02 + z * (u03 + z * (u04 + z * (u05 + z * u06)))));
6716         v = 1.0 + z * (v01 + z * (v02 + z * (v03 + z * v04)));
6717         return u / v + tpi * (j0(x) * log(x));
6718     }
6719     return u00 + tpi * log(x);
6720 }
6721
6722 /*********************************************************************
6723  *              _y1 (MSVCRT.@)
6724  */
6725 double CDECL _y1(double x)
6726 {
6727     static const double tpi = 6.36619772367581382433e-01,
6728         u00 =  -1.96057090646238940668e-01,
6729         u01 = 5.04438716639811282616e-02,
6730         u02 = -1.91256895875763547298e-03,
6731         u03 = 2.35252600561610495928e-05,
6732         u04 = -9.19099158039878874504e-08,
6733         v00 = 1.99167318236649903973e-02,
6734         v01 = 2.02552581025135171496e-04,
6735         v02 = 1.35608801097516229404e-06,
6736         v03 = 6.22741452364621501295e-09,
6737         v04 = 1.66559246207992079114e-11;
6738
6739     double z, u, v;
6740     unsigned int ix, lx;
6741
6742     ix = *(ULONGLONG*)&x >> 32;
6743     lx = *(ULONGLONG*)&x;
6744
6745     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
6746     if ((ix << 1 | lx) == 0)
6747         return math_error(_OVERFLOW, "_y1", x, 0, -INFINITY);
6748     if (isnan(x))
6749         return x;
6750     if (ix >> 31)
6751         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6752     if (ix >= 0x7ff00000)
6753         return 1 / x;
6754
6755     if (ix >= 0x40000000)  /* x >= 2 */
6756         return j1_y1_approx(ix, x, TRUE, 0);
6757     if (ix < 0x3c900000)  /* x < 2**-54 */
6758         return -tpi / x;
6759     z = x * x;
6760     u = u00 + z * (u01 + z * (u02 + z * (u03 + z * u04)));
6761     v = 1 + z * (v00 + z * (v01 + z * (v02 + z * (v03 + z * v04))));
6762     return x * (u / v) + tpi * (j1(x) * log(x) - 1 / x);
6763 }
6764
6765 /*********************************************************************
6766  *              _yn (MSVCRT.@)
6767  *
6768  * Copied from musl: src/math/jn.c
6769  */
6770 double CDECL _yn(int n, double x)
6771 {
6772     static const double invsqrtpi = 5.64189583547756279280e-01;
6773
6774     unsigned int ix, lx, ib;
6775     int nm1, sign, i;
6776     double a, b, temp;
6777
6778     ix = *(ULONGLONG*)&x >> 32;
6779     lx = *(ULONGLONG*)&x;
6780     sign = ix >> 31;
6781     ix &= 0x7fffffff;
6782
6783     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6784         return x;
6785     if (sign && (ix | lx) != 0) /* x < 0 */
6786         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6787     if (ix == 0x7ff00000)
6788         return 0.0;
6789
6790     if (n == 0)
6791         return y0(x);
6792     if (n < 0) {
6793         nm1 = -(n + 1);
6794         sign = n & 1;
6795     } else {
6796         nm1 = n - 1;
6797         sign = 0;
6798     }
6799     if (nm1 == 0)
6800         return sign ? -y1(x) : y1(x);
6801
6802     if (ix >= 0x52d00000) { /* x > 2**302 */
6803         switch(nm1 & 3) {
6804         case 0:
6805             temp = -sin(x) - cos(x);
6806             break;
6807         case 1:
6808             temp = -sin(x) + cos(x);
6809             break;
6810         case 2:
6811             temp = sin(x) + cos(x);
6812             break;
6813         default:
6814             temp = sin(x) - cos(x);
6815             break;
6816         }
6817         b = invsqrtpi * temp / sqrt(x);
6818     } else {
6819         a = y0(x);
6820         b = y1(x);
6821         /* quit if b is -inf */
6822         ib = *(ULONGLONG*)&b >> 32;
6823         for (i = 0; i < nm1 && ib != 0xfff00000;) {
6824             i++;
6825             temp = b;
6826             b = (2.0 * i / x) * b - a;
6827             ib = *(ULONGLONG*)&b >> 32;
6828             a = temp;
6829         }
6830     }
6831     return sign ? -b : b;
6832 }
6833
6834 #if _MSVCR_VER>=120
6835
6836 /*********************************************************************
6837  *              _nearbyint (MSVCR120.@)
6838  *
6839  * Based on musl: src/math/nearbyteint.c
6840  */
6841 double CDECL nearbyint(double x)
6842 {
6843     fenv_t env;
6844
6845     fegetenv(&env);
6846     _control87(_MCW_EM, _MCW_EM);
6847     x = rint(x);
6848     feclearexcept(FE_INEXACT);
6849     feupdateenv(&env);
6850     return x;
6851 }
6852
6853 /*********************************************************************
6854  *              _nearbyintf (MSVCR120.@)
6855  *
6856  * Based on musl: src/math/nearbyteintf.c
6857  */
6858 float CDECL nearbyintf(float x)
6859 {
6860     fenv_t env;
6861
6862     fegetenv(&env);
6863     _control87(_MCW_EM, _MCW_EM);
6864     x = rintf(x);
6865     feclearexcept(FE_INEXACT);
6866     feupdateenv(&env);
6867     return x;
6868 }
6869
6870 /*********************************************************************
6871  *              nexttoward (MSVCR120.@)
6872  */
6873 double CDECL MSVCRT_nexttoward(double num, double next)
6874 {
6875     return _nextafter(num, next);
6876 }
6877
6878 /*********************************************************************
6879  *              nexttowardf (MSVCR120.@)
6880  *
6881  * Copied from musl: src/math/nexttowardf.c
6882  */
6883 float CDECL MSVCRT_nexttowardf(float x, double y)
6884 {
6885     unsigned int ix = *(unsigned int*)&x;
6886     unsigned int e;
6887     float ret;
6888
6889     if (isnan(x) || isnan(y))
6890         return x + y;
6891     if (x == y)
6892         return y;
6893     if (x == 0) {
6894         ix = 1;
6895         if (signbit(y))
6896             ix |= 0x80000000;
6897     } else if (x < y) {
6898         if (signbit(x))
6899             ix--;
6900         else
6901             ix++;
6902     } else {
6903         if (signbit(x))
6904             ix++;
6905         else
6906             ix--;
6907     }
6908     e = ix & 0x7f800000;
6909     /* raise overflow if ix is infinite and x is finite */
6910     if (e == 0x7f800000) {
6911         fp_barrierf(x + x);
6912         *_errno() = ERANGE;
6913     }
6914     ret = *(float*)&ix;
6915     /* raise underflow if ret is subnormal or zero */
6916     if (e == 0) {
6917         fp_barrierf(x * x + ret * ret);
6918         *_errno() = ERANGE;
6919     }
6920     return ret;
6921 }
6922
6923 #endif /* _MSVCR_VER>=120 */
6924
6925 /*********************************************************************
6926  *              _nextafter (MSVCRT.@)
6927  *
6928  * Copied from musl: src/math/nextafter.c
6929  */
6930 double CDECL _nextafter(double x, double y)
6931 {
6932     ULONGLONG llx = *(ULONGLONG*)&x;
6933     ULONGLONG lly = *(ULONGLONG*)&y;
6934     ULONGLONG ax, ay;
6935     int e;
6936
6937     if (isnan(x) || isnan(y))
6938         return x + y;
6939     if (llx == lly) {
6940         if (_fpclass(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
6941             *_errno() = ERANGE;
6942         return y;
6943     }
6944     ax = llx & -1ULL / 2;
6945     ay = lly & -1ULL / 2;
6946     if (ax == 0) {
6947         if (ay == 0)
6948             return y;
6949         llx = (lly & 1ULL << 63) | 1;
6950     } else if (ax > ay || ((llx ^ lly) & 1ULL << 63))
6951         llx--;
6952     else
6953         llx++;
6954     e = llx >> 52 & 0x7ff;
6955     /* raise overflow if llx is infinite and x is finite */
6956     if (e == 0x7ff) {
6957         fp_barrier(x + x);
6958         *_errno() = ERANGE;
6959     }
6960     /* raise underflow if llx is subnormal or zero */
6961     y = *(double*)&llx;
6962     if (e == 0) {
6963         fp_barrier(x * x + y * y);
6964         *_errno() = ERANGE;
6965     }
6966     return y;
6967 }
6968
6969 /*********************************************************************
6970  *              _ecvt (MSVCRT.@)
6971  */
6972 char * CDECL _ecvt( double number, int ndigits, int *decpt, int *sign )
6973 {
6974     int prec, len;
6975     thread_data_t *data = msvcrt_get_thread_data();
6976     /* FIXME: check better for overflow (native supports over 300 chars) */
6977     ndigits = min( ndigits, 80 - 8); /* 8 : space for sign, dec point, "e",
6978                                       * 4 for exponent and one for
6979                                       * terminating '\0' */
6980     if (!data->efcvt_buffer)
6981         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
6982
6983     /* handle cases with zero ndigits or less */
6984     prec = ndigits;
6985     if( prec < 1) prec = 2;
6986     len = _snprintf(data->efcvt_buffer, 80, "%.*le", prec - 1, number);
6987
6988     if (data->efcvt_buffer[0] == '-') {
6989         memmove( data->efcvt_buffer, data->efcvt_buffer + 1, len-- );
6990         *sign = 1;
6991     } else *sign = 0;
6992
6993     /* take the decimal "point away */
6994     if( prec != 1)
6995         memmove( data->efcvt_buffer + 1, data->efcvt_buffer + 2, len - 1 );
6996     /* take the exponential "e" out */
6997     data->efcvt_buffer[ prec] = '\0';
6998     /* read the exponent */
6999     sscanf( data->efcvt_buffer + prec + 1, "%d", decpt);
7000     (*decpt)++;
7001     /* adjust for some border cases */
7002     if( data->efcvt_buffer[0] == '0')/* value is zero */
7003         *decpt = 0;
7004     /* handle cases with zero ndigits or less */
7005     if( ndigits < 1){
7006         if( data->efcvt_buffer[ 0] >= '5')
7007             (*decpt)++;
7008         data->efcvt_buffer[ 0] = '\0';
7009     }
7010     TRACE("out=\"%s\"\n",data->efcvt_buffer);
7011     return data->efcvt_buffer;
7012 }
7013
7014 /*********************************************************************
7015  *              _ecvt_s (MSVCRT.@)
7016  */
7017 int CDECL _ecvt_s( char *buffer, size_t length, double number, int ndigits, int *decpt, int *sign )
7018 {
7019     int prec, len;
7020     char *result;
7021
7022     if (!MSVCRT_CHECK_PMT(buffer != NULL)) return EINVAL;
7023     if (!MSVCRT_CHECK_PMT(decpt != NULL)) return EINVAL;
7024     if (!MSVCRT_CHECK_PMT(sign != NULL)) return EINVAL;
7025     if (!MSVCRT_CHECK_PMT_ERR( length > 2, ERANGE )) return ERANGE;
7026     if (!MSVCRT_CHECK_PMT_ERR(ndigits < (int)length - 1, ERANGE )) return ERANGE;
7027
7028     /* handle cases with zero ndigits or less */
7029     prec = ndigits;
7030     if( prec < 1) prec = 2;
7031     result = malloc(prec + 8);
7032
7033     len = _snprintf(result, prec + 8, "%.*le", prec - 1, number);
7034     if (result[0] == '-') {
7035         memmove( result, result + 1, len-- );
7036         *sign = 1;
7037     } else *sign = 0;
7038
7039     /* take the decimal "point away */
7040     if( prec != 1)
7041         memmove( result + 1, result + 2, len - 1 );
7042     /* take the exponential "e" out */
7043     result[ prec] = '\0';
7044     /* read the exponent */
7045     sscanf( result + prec + 1, "%d", decpt);
7046     (*decpt)++;
7047     /* adjust for some border cases */
7048     if( result[0] == '0')/* value is zero */
7049         *decpt = 0;
7050     /* handle cases with zero ndigits or less */
7051     if( ndigits < 1){
7052         if( result[ 0] >= '5')
7053             (*decpt)++;
7054         result[ 0] = '\0';
7055     }
7056     memcpy( buffer, result, max(ndigits + 1, 1) );
7057     free( result );
7058     return 0;
7059 }
7060
7061 /***********************************************************************
7062  *              _fcvt  (MSVCRT.@)
7063  */
7064 char * CDECL _fcvt( double number, int ndigits, int *decpt, int *sign )
7065 {
7066     thread_data_t *data = msvcrt_get_thread_data();
7067     int stop, dec1, dec2;
7068     char *ptr1, *ptr2, *first;
7069     char buf[80]; /* ought to be enough */
7070     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7071
7072     if (!data->efcvt_buffer)
7073         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
7074
7075     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7076     ptr1 = buf;
7077     ptr2 = data->efcvt_buffer;
7078     first = NULL;
7079     dec1 = 0;
7080     dec2 = 0;
7081
7082     if (*ptr1 == '-') {
7083         *sign = 1;
7084         ptr1++;
7085     } else *sign = 0;
7086
7087     /* For numbers below the requested resolution, work out where
7088        the decimal point will be rather than finding it in the string */
7089     if (number < 1.0 && number > 0.0) {
7090         dec2 = log10(number + 1e-10);
7091         if (-dec2 <= ndigits) dec2 = 0;
7092     }
7093
7094     /* If requested digits is zero or less, we will need to truncate
7095      * the returned string */
7096     if (ndigits < 1) {
7097         stop += ndigits;
7098     }
7099
7100     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7101     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7102         if (!first) first = ptr2;
7103         if ((ptr1 - buf) < stop) {
7104             *ptr2++ = *ptr1++;
7105         } else {
7106             ptr1++;
7107         }
7108         dec1++;
7109     }
7110
7111     if (ndigits > 0) {
7112         ptr1++;
7113         if (!first) {
7114             while (*ptr1 == '0') { /* Process leading zeroes */
7115                 *ptr2++ = *ptr1++;
7116                 dec1--;
7117             }
7118         }
7119         while (*ptr1 != '\0') {
7120             if (!first) first = ptr2;
7121             *ptr2++ = *ptr1++;
7122         }
7123     }
7124
7125     *ptr2 = '\0';
7126
7127     /* We never found a non-zero digit, then our number is either
7128      * smaller than the requested precision, or 0.0 */
7129     if (!first) {
7130         if (number > 0.0) {
7131             first = ptr2;
7132         } else {
7133             first = data->efcvt_buffer;
7134             dec1 = 0;
7135         }
7136     }
7137
7138     *decpt = dec2 ? dec2 : dec1;
7139     return first;
7140 }
7141
7142 /***********************************************************************
7143  *              _fcvt_s  (MSVCRT.@)
7144  */
7145 int CDECL _fcvt_s(char* outbuffer, size_t size, double number, int ndigits, int *decpt, int *sign)
7146 {
7147     int stop, dec1, dec2;
7148     char *ptr1, *ptr2, *first;
7149     char buf[80]; /* ought to be enough */
7150     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7151
7152     if (!outbuffer || !decpt || !sign || size == 0)
7153     {
7154         *_errno() = EINVAL;
7155         return EINVAL;
7156     }
7157
7158     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7159     ptr1 = buf;
7160     ptr2 = outbuffer;
7161     first = NULL;
7162     dec1 = 0;
7163     dec2 = 0;
7164
7165     if (*ptr1 == '-') {
7166         *sign = 1;
7167         ptr1++;
7168     } else *sign = 0;
7169
7170     /* For numbers below the requested resolution, work out where
7171        the decimal point will be rather than finding it in the string */
7172     if (number < 1.0 && number > 0.0) {
7173         dec2 = log10(number + 1e-10);
7174         if (-dec2 <= ndigits) dec2 = 0;
7175     }
7176
7177     /* If requested digits is zero or less, we will need to truncate
7178      * the returned string */
7179     if (ndigits < 1) {
7180         stop += ndigits;
7181     }
7182
7183     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7184     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7185         if (!first) first = ptr2;
7186         if ((ptr1 - buf) < stop) {
7187             if (size > 1) {
7188                 *ptr2++ = *ptr1++;
7189                 size--;
7190             }
7191         } else {
7192             ptr1++;
7193         }
7194         dec1++;
7195     }
7196
7197     if (ndigits > 0) {
7198         ptr1++;
7199         if (!first) {
7200             while (*ptr1 == '0') { /* Process leading zeroes */
7201                 if (number == 0.0 && size > 1) {
7202                     *ptr2++ = '0';
7203                     size--;
7204                 }
7205                 ptr1++;
7206                 dec1--;
7207             }
7208         }
7209         while (*ptr1 != '\0') {
7210             if (!first) first = ptr2;
7211             if (size > 1) {
7212                 *ptr2++ = *ptr1++;
7213                 size--;
7214             }
7215         }
7216     }
7217
7218     *ptr2 = '\0';
7219
7220     /* We never found a non-zero digit, then our number is either
7221      * smaller than the requested precision, or 0.0 */
7222     if (!first && (number <= 0.0))
7223         dec1 = 0;
7224
7225     *decpt = dec2 ? dec2 : dec1;
7226     return 0;
7227 }
7228
7229 /***********************************************************************
7230  *              _gcvt  (MSVCRT.@)
7231  */
7232 char * CDECL _gcvt( double number, int ndigit, char *buff )
7233 {
7234     if(!buff) {
7235         *_errno() = EINVAL;
7236         return NULL;
7237     }
7238
7239     if(ndigit < 0) {
7240         *_errno() = ERANGE;
7241         return NULL;
7242     }
7243
7244     sprintf(buff, "%.*g", ndigit, number);
7245     return buff;
7246 }
7247
7248 /***********************************************************************
7249  *              _gcvt_s  (MSVCRT.@)
7250  */
7251 int CDECL _gcvt_s(char *buff, size_t size, double number, int digits)
7252 {
7253     int len;
7254
7255     if(!buff) {
7256         *_errno() = EINVAL;
7257         return EINVAL;
7258     }
7259
7260     if( digits<0 || digits>=size) {
7261         if(size)
7262             buff[0] = '\0';
7263
7264         *_errno() = ERANGE;
7265         return ERANGE;
7266     }
7267
7268     len = _scprintf("%.*g", digits, number);
7269     if(len > size) {
7270         buff[0] = '\0';
7271         *_errno() = ERANGE;
7272         return ERANGE;
7273     }
7274
7275     sprintf(buff, "%.*g", digits, number);
7276     return 0;
7277 }
7278
7279 #include <stdlib.h> /* div_t, ldiv_t */
7280
7281 /*********************************************************************
7282  *              div (MSVCRT.@)
7283  * VERSION
7284  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7285  */
7286 #ifdef __i386__
7287 unsigned __int64 CDECL div(int num, int denom)
7288 {
7289     union {
7290         div_t div;
7291         unsigned __int64 uint64;
7292     } ret;
7293
7294     ret.div.quot = num / denom;
7295     ret.div.rem = num % denom;
7296     return ret.uint64;
7297 }
7298 #else
7299 /*********************************************************************
7300  *              div (MSVCRT.@)
7301  * VERSION
7302  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7303  */
7304 div_t CDECL div(int num, int denom)
7305 {
7306     div_t ret;
7307
7308     ret.quot = num / denom;
7309     ret.rem = num % denom;
7310     return ret;
7311 }
7312 #endif /* ifdef __i386__ */
7313
7314
7315 /*********************************************************************
7316  *              ldiv (MSVCRT.@)
7317  * VERSION
7318  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7319  */
7320 #ifdef __i386__
7321 unsigned __int64 CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7322 {
7323     union {
7324         ldiv_t ldiv;
7325         unsigned __int64 uint64;
7326     } ret;
7327
7328     ret.ldiv.quot = num / denom;
7329     ret.ldiv.rem = num % denom;
7330     return ret.uint64;
7331 }
7332 #else
7333 /*********************************************************************
7334  *              ldiv (MSVCRT.@)
7335  * VERSION
7336  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7337  */
7338 ldiv_t CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7339 {
7340     ldiv_t ret;
7341
7342     ret.quot = num / denom;
7343     ret.rem = num % denom;
7344     return ret;
7345 }
7346 #endif /* ifdef __i386__ */
7347
7348 #if _MSVCR_VER>=100
7349 /*********************************************************************
7350  *              lldiv (MSVCR100.@)
7351  */
7352 lldiv_t CDECL lldiv(__int64 num, __int64 denom)
7353 {
7354   lldiv_t ret;
7355
7356   ret.quot = num / denom;
7357   ret.rem = num % denom;
7358
7359   return ret;
7360 }
7361 #endif
7362
7363 #ifdef __i386__
7364
7365 /*********************************************************************
7366  *              _adjust_fdiv (MSVCRT.@)
7367  * Used by the MSVC compiler to work around the Pentium FDIV bug.
7368  */
7369 int MSVCRT__adjust_fdiv = 0;
7370
7371 /***********************************************************************
7372  *              _adj_fdiv_m16i (MSVCRT.@)
7373  *
7374  * NOTE
7375  *    I _think_ this function is intended to work around the Pentium
7376  *    fdiv bug.
7377  */
7378 void __stdcall _adj_fdiv_m16i( short arg )
7379 {
7380   TRACE("(): stub\n");
7381 }
7382
7383 /***********************************************************************
7384  *              _adj_fdiv_m32 (MSVCRT.@)
7385  *
7386  * NOTE
7387  *    I _think_ this function is intended to work around the Pentium
7388  *    fdiv bug.
7389  */
7390 void __stdcall _adj_fdiv_m32( unsigned int arg )
7391 {
7392   TRACE("(): stub\n");
7393 }
7394
7395 /***********************************************************************
7396  *              _adj_fdiv_m32i (MSVCRT.@)
7397  *
7398  * NOTE
7399  *    I _think_ this function is intended to work around the Pentium
7400  *    fdiv bug.
7401  */
7402 void __stdcall _adj_fdiv_m32i( int arg )
7403 {
7404   TRACE("(): stub\n");
7405 }
7406
7407 /***********************************************************************
7408  *              _adj_fdiv_m64 (MSVCRT.@)
7409  *
7410  * NOTE
7411  *    I _think_ this function is intended to work around the Pentium
7412  *    fdiv bug.
7413  */
7414 void __stdcall _adj_fdiv_m64( unsigned __int64 arg )
7415 {
7416   TRACE("(): stub\n");
7417 }
7418
7419 /***********************************************************************
7420  *              _adj_fdiv_r (MSVCRT.@)
7421  * FIXME
7422  *    This function is likely to have the wrong number of arguments.
7423  *
7424  * NOTE
7425  *    I _think_ this function is intended to work around the Pentium
7426  *    fdiv bug.
7427  */
7428 void _adj_fdiv_r(void)
7429 {
7430   TRACE("(): stub\n");
7431 }
7432
7433 /***********************************************************************
7434  *              _adj_fdivr_m16i (MSVCRT.@)
7435  *
7436  * NOTE
7437  *    I _think_ this function is intended to work around the Pentium
7438  *    fdiv bug.
7439  */
7440 void __stdcall _adj_fdivr_m16i( short arg )
7441 {
7442   TRACE("(): stub\n");
7443 }
7444
7445 /***********************************************************************
7446  *              _adj_fdivr_m32 (MSVCRT.@)
7447  *
7448  * NOTE
7449  *    I _think_ this function is intended to work around the Pentium
7450  *    fdiv bug.
7451  */
7452 void __stdcall _adj_fdivr_m32( unsigned int arg )
7453 {
7454   TRACE("(): stub\n");
7455 }
7456
7457 /***********************************************************************
7458  *              _adj_fdivr_m32i (MSVCRT.@)
7459  *
7460  * NOTE
7461  *    I _think_ this function is intended to work around the Pentium
7462  *    fdiv bug.
7463  */
7464 void __stdcall _adj_fdivr_m32i( int arg )
7465 {
7466   TRACE("(): stub\n");
7467 }
7468
7469 /***********************************************************************
7470  *              _adj_fdivr_m64 (MSVCRT.@)
7471  *
7472  * NOTE
7473  *    I _think_ this function is intended to work around the Pentium
7474  *    fdiv bug.
7475  */
7476 void __stdcall _adj_fdivr_m64( unsigned __int64 arg )
7477 {
7478   TRACE("(): stub\n");
7479 }
7480
7481 /***********************************************************************
7482  *              _adj_fpatan (MSVCRT.@)
7483  * FIXME
7484  *    This function is likely to have the wrong number of arguments.
7485  *
7486  * NOTE
7487  *    I _think_ this function is intended to work around the Pentium
7488  *    fdiv bug.
7489  */
7490 void _adj_fpatan(void)
7491 {
7492   TRACE("(): stub\n");
7493 }
7494
7495 /***********************************************************************
7496  *              _adj_fprem (MSVCRT.@)
7497  * FIXME
7498  *    This function is likely to have the wrong number of arguments.
7499  *
7500  * NOTE
7501  *    I _think_ this function is intended to work around the Pentium
7502  *    fdiv bug.
7503  */
7504 void _adj_fprem(void)
7505 {
7506   TRACE("(): stub\n");
7507 }
7508
7509 /***********************************************************************
7510  *              _adj_fprem1 (MSVCRT.@)
7511  * FIXME
7512  *    This function is likely to have the wrong number of arguments.
7513  *
7514  * NOTE
7515  *    I _think_ this function is intended to work around the Pentium
7516  *    fdiv bug.
7517  */
7518 void _adj_fprem1(void)
7519 {
7520   TRACE("(): stub\n");
7521 }
7522
7523 /***********************************************************************
7524  *              _adj_fptan (MSVCRT.@)
7525  * FIXME
7526  *    This function is likely to have the wrong number of arguments.
7527  *
7528  * NOTE
7529  *    I _think_ this function is intended to work around the Pentium
7530  *    fdiv bug.
7531  */
7532 void _adj_fptan(void)
7533 {
7534   TRACE("(): stub\n");
7535 }
7536
7537 /***********************************************************************
7538  *              _safe_fdiv (MSVCRT.@)
7539  * FIXME
7540  *    This function is likely to have the wrong number of arguments.
7541  *
7542  * NOTE
7543  *    I _think_ this function is intended to work around the Pentium
7544  *    fdiv bug.
7545  */
7546 void _safe_fdiv(void)
7547 {
7548   TRACE("(): stub\n");
7549 }
7550
7551 /***********************************************************************
7552  *              _safe_fdivr (MSVCRT.@)
7553  * FIXME
7554  *    This function is likely to have the wrong number of arguments.
7555  *
7556  * NOTE
7557  *    I _think_ this function is intended to work around the Pentium
7558  *    fdiv bug.
7559  */
7560 void _safe_fdivr(void)
7561 {
7562   TRACE("(): stub\n");
7563 }
7564
7565 /***********************************************************************
7566  *              _safe_fprem (MSVCRT.@)
7567  * FIXME
7568  *    This function is likely to have the wrong number of arguments.
7569  *
7570  * NOTE
7571  *    I _think_ this function is intended to work around the Pentium
7572  *    fdiv bug.
7573  */
7574 void _safe_fprem(void)
7575 {
7576   TRACE("(): stub\n");
7577 }
7578
7579 /***********************************************************************
7580  *              _safe_fprem1 (MSVCRT.@)
7581  *
7582  * FIXME
7583  *    This function is likely to have the wrong number of arguments.
7584  *
7585  * NOTE
7586  *    I _think_ this function is intended to work around the Pentium
7587  *    fdiv bug.
7588  */
7589 void _safe_fprem1(void)
7590 {
7591   TRACE("(): stub\n");
7592 }
7593
7594 /***********************************************************************
7595  *              __libm_sse2_acos   (MSVCRT.@)
7596  */
7597 void __cdecl __libm_sse2_acos(void)
7598 {
7599     double d;
7600     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7601     d = acos( d );
7602     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7603 }
7604
7605 /***********************************************************************
7606  *              __libm_sse2_acosf   (MSVCRT.@)
7607  */
7608 void __cdecl __libm_sse2_acosf(void)
7609 {
7610     float f;
7611     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7612     f = acosf( f );
7613     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7614 }
7615
7616 /***********************************************************************
7617  *              __libm_sse2_asin   (MSVCRT.@)
7618  */
7619 void __cdecl __libm_sse2_asin(void)
7620 {
7621     double d;
7622     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7623     d = asin( d );
7624     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7625 }
7626
7627 /***********************************************************************
7628  *              __libm_sse2_asinf   (MSVCRT.@)
7629  */
7630 void __cdecl __libm_sse2_asinf(void)
7631 {
7632     float f;
7633     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7634     f = asinf( f );
7635     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7636 }
7637
7638 /***********************************************************************
7639  *              __libm_sse2_atan   (MSVCRT.@)
7640  */
7641 void __cdecl __libm_sse2_atan(void)
7642 {
7643     double d;
7644     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7645     d = atan( d );
7646     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7647 }
7648
7649 /***********************************************************************
7650  *              __libm_sse2_atan2   (MSVCRT.@)
7651  */
7652 void __cdecl __libm_sse2_atan2(void)
7653 {
7654     double d1, d2;
7655     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7656     d1 = atan2( d1, d2 );
7657     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7658 }
7659
7660 /***********************************************************************
7661  *              __libm_sse2_atanf   (MSVCRT.@)
7662  */
7663 void __cdecl __libm_sse2_atanf(void)
7664 {
7665     float f;
7666     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7667     f = atanf( f );
7668     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7669 }
7670
7671 /***********************************************************************
7672  *              __libm_sse2_cos   (MSVCRT.@)
7673  */
7674 void __cdecl __libm_sse2_cos(void)
7675 {
7676     double d;
7677     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7678     d = cos( d );
7679     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7680 }
7681
7682 /***********************************************************************
7683  *              __libm_sse2_cosf   (MSVCRT.@)
7684  */
7685 void __cdecl __libm_sse2_cosf(void)
7686 {
7687     float f;
7688     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7689     f = cosf( f );
7690     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7691 }
7692
7693 /***********************************************************************
7694  *              __libm_sse2_exp   (MSVCRT.@)
7695  */
7696 void __cdecl __libm_sse2_exp(void)
7697 {
7698     double d;
7699     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7700     d = exp( d );
7701     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7702 }
7703
7704 /***********************************************************************
7705  *              __libm_sse2_expf   (MSVCRT.@)
7706  */
7707 void __cdecl __libm_sse2_expf(void)
7708 {
7709     float f;
7710     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7711     f = expf( f );
7712     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7713 }
7714
7715 /***********************************************************************
7716  *              __libm_sse2_log   (MSVCRT.@)
7717  */
7718 void __cdecl __libm_sse2_log(void)
7719 {
7720     double d;
7721     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7722     d = log( d );
7723     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7724 }
7725
7726 /***********************************************************************
7727  *              __libm_sse2_log10   (MSVCRT.@)
7728  */
7729 void __cdecl __libm_sse2_log10(void)
7730 {
7731     double d;
7732     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7733     d = log10( d );
7734     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7735 }
7736
7737 /***********************************************************************
7738  *              __libm_sse2_log10f   (MSVCRT.@)
7739  */
7740 void __cdecl __libm_sse2_log10f(void)
7741 {
7742     float f;
7743     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7744     f = log10f( f );
7745     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7746 }
7747
7748 /***********************************************************************
7749  *              __libm_sse2_logf   (MSVCRT.@)
7750  */
7751 void __cdecl __libm_sse2_logf(void)
7752 {
7753     float f;
7754     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7755     f = logf( f );
7756     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7757 }
7758
7759 /***********************************************************************
7760  *              __libm_sse2_pow   (MSVCRT.@)
7761  */
7762 void __cdecl __libm_sse2_pow(void)
7763 {
7764     double d1, d2;
7765     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7766     d1 = pow( d1, d2 );
7767     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7768 }
7769
7770 /***********************************************************************
7771  *              __libm_sse2_powf   (MSVCRT.@)
7772  */
7773 void __cdecl __libm_sse2_powf(void)
7774 {
7775     float f1, f2;
7776     __asm__ __volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1), "=g" (f2) );
7777     f1 = powf( f1, f2 );
7778     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f1) );
7779 }
7780
7781 /***********************************************************************
7782  *              __libm_sse2_sin   (MSVCRT.@)
7783  */
7784 void __cdecl __libm_sse2_sin(void)
7785 {
7786     double d;
7787     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7788     d = sin( d );
7789     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7790 }
7791
7792 /***********************************************************************
7793  *              __libm_sse2_sinf   (MSVCRT.@)
7794  */
7795 void __cdecl __libm_sse2_sinf(void)
7796 {
7797     float f;
7798     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7799     f = sinf( f );
7800     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7801 }
7802
7803 /***********************************************************************
7804  *              __libm_sse2_tan   (MSVCRT.@)
7805  */
7806 void __cdecl __libm_sse2_tan(void)
7807 {
7808     double d;
7809     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7810     d = tan( d );
7811     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7812 }
7813
7814 /***********************************************************************
7815  *              __libm_sse2_tanf   (MSVCRT.@)
7816  */
7817 void __cdecl __libm_sse2_tanf(void)
7818 {
7819     float f;
7820     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7821     f = tanf( f );
7822     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7823 }
7824
7825 /***********************************************************************
7826  *              __libm_sse2_sqrt_precise   (MSVCR110.@)
7827  */
7828 void __cdecl __libm_sse2_sqrt_precise(void)
7829 {
7830     unsigned int cw;
7831     double d;
7832
7833     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7834     __control87_2(0, 0, NULL, &cw);
7835     if (cw & _MCW_RC)
7836     {
7837         d = sqrt(d);
7838         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7839         return;
7840     }
7841
7842     if (!sqrt_validate(&d, FALSE))
7843     {
7844         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7845         return;
7846     }
7847     __asm__ __volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
7848 }
7849 #endif  /* __i386__ */
7850
7851 /*********************************************************************
7852  *      _fdclass (MSVCR120.@)
7853  *
7854  * Copied from musl: src/math/__fpclassifyf.c
7855  */
7856 short CDECL _fdclass(float x)
7857 {
7858     union { float f; UINT32 i; } u = { x };
7859     int e = u.i >> 23 & 0xff;
7860
7861     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7862     if (e == 0xff) return u.i << 9 ? FP_NAN : FP_INFINITE;
7863     return FP_NORMAL;
7864 }
7865
7866 /*********************************************************************
7867  *      _dclass (MSVCR120.@)
7868  *
7869  * Copied from musl: src/math/__fpclassify.c
7870  */
7871 short CDECL _dclass(double x)
7872 {
7873     union { double f; UINT64 i; } u = { x };
7874     int e = u.i >> 52 & 0x7ff;
7875
7876     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7877     if (e == 0x7ff) return (u.i << 12) ? FP_NAN : FP_INFINITE;
7878     return FP_NORMAL;
7879 }
7880
7881 #if _MSVCR_VER>=120
7882
7883 /*********************************************************************
7884  *      cbrt (MSVCR120.@)
7885  *
7886  * Copied from musl: src/math/cbrt.c
7887  */
7888 double CDECL cbrt(double x)
7889 {
7890     static const UINT32 B1 = 715094163, B2 = 696219795;
7891     static const double P0 =  1.87595182427177009643,
7892                  P1 = -1.88497979543377169875,
7893                  P2 =  1.621429720105354466140,
7894                  P3 = -0.758397934778766047437,
7895                  P4 =  0.145996192886612446982;
7896
7897     union {double f; UINT64 i;} u = {x};
7898     double r,s,t,w;
7899     UINT32 hx = u.i >> 32 & 0x7fffffff;
7900
7901     if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
7902         return x + x;
7903
7904     if (hx < 0x00100000) { /* zero or subnormal? */
7905         u.f = x * 0x1p54;
7906         hx = u.i>>32 & 0x7fffffff;
7907         if (hx == 0)
7908             return x;
7909         hx = hx / 3 + B2;
7910     } else
7911         hx = hx / 3 + B1;
7912     u.i &= 1ULL << 63;
7913     u.i |= (UINT64)hx << 32;
7914     t = u.f;
7915
7916     r = (t * t) * (t / x);
7917     t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
7918
7919     u.f = t;
7920     u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
7921     t = u.f;
7922
7923     s = t * t;
7924     r = x / s;
7925     w = t + t;
7926     r = (r - t) / (w + r);
7927     t = t + t * r;
7928     return t;
7929 }
7930
7931 /*********************************************************************
7932  *      cbrtf (MSVCR120.@)
7933  *
7934  * Copied from musl: src/math/cbrtf.c
7935  */
7936 float CDECL cbrtf(float x)
7937 {
7938     static const unsigned B1 = 709958130, B2 = 642849266;
7939
7940     double r,T;
7941     union {float f; UINT32 i;} u = {x};
7942     UINT32 hx = u.i & 0x7fffffff;
7943
7944     if (hx >= 0x7f800000)
7945         return x + x;
7946
7947     if (hx < 0x00800000) {  /* zero or subnormal? */
7948         if (hx == 0)
7949             return x;
7950         u.f = x * 0x1p24f;
7951         hx = u.i & 0x7fffffff;
7952         hx = hx / 3 + B2;
7953     } else
7954         hx = hx / 3 + B1;
7955     u.i &= 0x80000000;
7956     u.i |= hx;
7957
7958     T = u.f;
7959     r = T * T * T;
7960     T = T * (x + x + r) / (x + r + r);
7961
7962     r = T * T * T;
7963     T = T * (x + x + r) / (x + r + r);
7964     return T;
7965 }
7966
7967 /*********************************************************************
7968  *      exp2 (MSVCR120.@)
7969  *
7970  * Copied from musl: src/math/exp2.c
7971  */
7972 double CDECL exp2(double x)
7973 {
7974     static const double C[] = {
7975         0x1.62e42fefa39efp-1,
7976         0x1.ebfbdff82c424p-3,
7977         0x1.c6b08d70cf4b5p-5,
7978         0x1.3b2abd24650ccp-7,
7979         0x1.5d7e09b4e3a84p-10
7980     };
7981
7982     UINT32 abstop;
7983     UINT64 ki, idx, top, sbits;
7984     double kd, r, r2, scale, tail, tmp;
7985
7986     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
7987     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
7988         if (abstop - 0x3c9 >= 0x80000000) {
7989             /* Avoid spurious underflow for tiny x. */
7990             /* Note: 0 is common input. */
7991             return 1.0 + x;
7992         }
7993         if (abstop >= 409) {
7994             if (*(UINT64*)&x == 0xfff0000000000000ull)
7995                 return 0.0;
7996             if (abstop >= 0x7ff)
7997                 return 1.0 + x;
7998             if (!(*(UINT64*)&x >> 63)) {
7999                 *_errno() = ERANGE;
8000                 return fp_barrier(DBL_MAX) * DBL_MAX;
8001             }
8002             else if (x <= -2147483648.0) {
8003                 fp_barrier(x + 0x1p120f);
8004                 return 0;
8005             }
8006             else if (*(UINT64*)&x >= 0xc090cc0000000000ull) {
8007                 *_errno() = ERANGE;
8008                 fp_barrier(x + 0x1p120f);
8009                 return 0;
8010             }
8011         }
8012         if (2 * *(UINT64*)&x > 2 * 0x408d000000000000ull)
8013             /* Large x is special cased below. */
8014             abstop = 0;
8015     }
8016
8017     /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
8018     /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
8019     kd = fp_barrier(x + 0x1.8p52 / (1 << 7));
8020     ki = *(UINT64*)&kd; /* k. */
8021     kd -= 0x1.8p52 / (1 << 7); /* k/N for int k. */
8022     r = x - kd;
8023     /* 2^(k/N) ~= scale * (1 + tail). */
8024     idx = 2 * (ki % (1 << 7));
8025     top = ki << (52 - 7);
8026     tail = *(double*)&exp_T[idx];
8027     /* This is only a valid scale when -1023*N < k < 1024*N. */
8028     sbits = exp_T[idx + 1] + top;
8029     /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
8030     /* Evaluation is optimized assuming superscalar pipelined execution. */
8031     r2 = r * r;
8032     /* Without fma the worst case error is 0.5/N ulp larger. */
8033     /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
8034     tmp = tail + r * C[0] + r2 * (C[1] + r * C[2]) + r2 * r2 * (C[3] + r * C[4]);
8035     if (abstop == 0)
8036     {
8037         /* Handle cases that may overflow or underflow when computing the result that
8038            is scale*(1+TMP) without intermediate rounding. The bit representation of
8039            scale is in SBITS, however it has a computed exponent that may have
8040            overflown into the sign bit so that needs to be adjusted before using it as
8041            a double. (int32_t)KI is the k used in the argument reduction and exponent
8042            adjustment of scale, positive k here means the result may overflow and
8043            negative k means the result may underflow. */
8044         double scale, y;
8045
8046         if ((ki & 0x80000000) == 0) {
8047             /* k > 0, the exponent of scale might have overflowed by 1. */
8048             sbits -= 1ull << 52;
8049             scale = *(double*)&sbits;
8050             y = 2 * (scale + scale * tmp);
8051             return y;
8052         }
8053         /* k < 0, need special care in the subnormal range. */
8054         sbits += 1022ull << 52;
8055         scale = *(double*)&sbits;
8056         y = scale + scale * tmp;
8057         if (y < 1.0) {
8058             /* Round y to the right precision before scaling it into the subnormal
8059                range to avoid double rounding that can cause 0.5+E/2 ulp error where
8060                E is the worst-case ulp error outside the subnormal range. So this
8061                is only useful if the goal is better than 1 ulp worst-case error. */
8062             double hi, lo;
8063             lo = scale - y + scale * tmp;
8064             hi = 1.0 + y;
8065             lo = 1.0 - hi + y + lo;
8066             y = hi + lo - 1.0;
8067             /* Avoid -0.0 with downward rounding. */
8068             if (y == 0.0)
8069                 y = 0.0;
8070             /* The underflow exception needs to be signaled explicitly. */
8071             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
8072         }
8073         y = 0x1p-1022 * y;
8074         return y;
8075     }
8076     scale = *(double*)&sbits;
8077     /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
8078        is no spurious underflow here even without fma. */
8079     return scale + scale * tmp;
8080 }
8081
8082 /*********************************************************************
8083  *      exp2f (MSVCR120.@)
8084  *
8085  * Copied from musl: src/math/exp2f.c
8086  */
8087 float CDECL exp2f(float x)
8088 {
8089     static const double C[] = {
8090         0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1
8091     };
8092     static const double shift = 0x1.8p+52 / (1 << 5);
8093
8094     double kd, xd, z, r, r2, y, s;
8095     UINT32 abstop;
8096     UINT64 ki, t;
8097
8098     xd = x;
8099     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
8100     if (abstop >= 0x430) {
8101         /* |x| >= 128 or x is nan.  */
8102         if (*(UINT32*)&x == 0xff800000)
8103             return 0.0f;
8104         if (abstop >= 0x7f8)
8105             return x + x;
8106         if (x > 0.0f) {
8107             *_errno() = ERANGE;
8108             return fp_barrierf(x * FLT_MAX);
8109         }
8110         if (x <= -150.0f) {
8111             fp_barrierf(x - 0x1p120);
8112             return 0;
8113         }
8114     }
8115
8116     /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */
8117     kd = xd + shift;
8118     ki = *(UINT64*)&kd;
8119     kd -= shift; /* k/(1<<5) for int k.  */
8120     r = xd - kd;
8121
8122     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
8123     t = exp2f_T[ki % (1 << 5)];
8124     t += ki << (52 - 5);
8125     s = *(double*)&t;
8126     z = C[0] * r + C[1];
8127     r2 = r * r;
8128     y = C[2] * r + 1;
8129     y = z * r2 + y;
8130     y = y * s;
8131     return y;
8132 }
8133
8134 /*********************************************************************
8135  *      expm1 (MSVCR120.@)
8136  */
8137 double CDECL expm1(double x)
8138 {
8139     return __expm1(x);
8140 }
8141
8142 /*********************************************************************
8143  *      expm1f (MSVCR120.@)
8144  */
8145 float CDECL expm1f(float x)
8146 {
8147     return __expm1f(x);
8148 }
8149
8150 /*********************************************************************
8151  *      log1p (MSVCR120.@)
8152  *
8153  * Copied from musl: src/math/log1p.c
8154  */
8155 double CDECL log1p(double x)
8156 {
8157     static const double ln2_hi = 6.93147180369123816490e-01,
8158         ln2_lo = 1.90821492927058770002e-10,
8159         Lg1 = 6.666666666666735130e-01,
8160         Lg2 = 3.999999999940941908e-01,
8161         Lg3 = 2.857142874366239149e-01,
8162         Lg4 = 2.222219843214978396e-01,
8163         Lg5 = 1.818357216161805012e-01,
8164         Lg6 = 1.531383769920937332e-01,
8165         Lg7 = 1.479819860511658591e-01;
8166
8167     union {double f; UINT64 i;} u = {x};
8168     double hfsq, f, c, s, z, R, w, t1, t2, dk;
8169     UINT32 hx, hu;
8170     int k;
8171
8172     hx = u.i >> 32;
8173     k = 1;
8174     if (hx < 0x3fda827a || hx >> 31) { /* 1+x < sqrt(2)+ */
8175         if (hx >= 0xbff00000) { /* x <= -1.0 */
8176             if (x == -1) {
8177                 *_errno() = ERANGE;
8178                 return x / 0.0; /* og1p(-1) = -inf */
8179             }
8180             *_errno() = EDOM;
8181             return (x-x) / 0.0; /* log1p(x<-1) = NaN */
8182         }
8183         if (hx << 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */
8184             fp_barrier(x + 0x1p120f);
8185             /* underflow if subnormal */
8186             if ((hx & 0x7ff00000) == 0)
8187                 fp_barrierf(x);
8188             return x;
8189         }
8190         if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8191             k = 0;
8192             c = 0;
8193             f = x;
8194         }
8195     } else if (hx >= 0x7ff00000)
8196         return x;
8197     if (k) {
8198         u.f = 1 + x;
8199         hu = u.i >> 32;
8200         hu += 0x3ff00000 - 0x3fe6a09e;
8201         k = (int)(hu >> 20) - 0x3ff;
8202         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8203         if (k < 54) {
8204             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8205             c /= u.f;
8206         } else
8207             c = 0;
8208         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8209         hu = (hu & 0x000fffff) + 0x3fe6a09e;
8210         u.i = (UINT64)hu << 32 | (u.i & 0xffffffff);
8211         f = u.f - 1;
8212     }
8213     hfsq = 0.5 * f * f;
8214     s = f / (2.0 + f);
8215     z = s * s;
8216     w = z * z;
8217     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
8218     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
8219     R = t2 + t1;
8220     dk = k;
8221     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8222 }
8223
8224 /*********************************************************************
8225  *      log1pf (MSVCR120.@)
8226  *
8227  * Copied from musl: src/math/log1pf.c
8228  */
8229 float CDECL log1pf(float x)
8230 {
8231     static const float ln2_hi = 6.9313812256e-01,
8232         ln2_lo = 9.0580006145e-06,
8233         Lg1 = 0xaaaaaa.0p-24,
8234         Lg2 = 0xccce13.0p-25,
8235         Lg3 = 0x91e9ee.0p-25,
8236         Lg4 = 0xf89e26.0p-26;
8237
8238     union {float f; UINT32 i;} u = {x};
8239     float hfsq, f, c, s, z, R, w, t1, t2, dk;
8240     UINT32 ix, iu;
8241     int k;
8242
8243     ix = u.i;
8244     k = 1;
8245     if (ix < 0x3ed413d0 || ix >> 31) { /* 1+x < sqrt(2)+ */
8246         if (ix >= 0xbf800000) { /* x <= -1.0 */
8247             if (x == -1) {
8248                 *_errno() = ERANGE;
8249                 return x / 0.0f; /* log1p(-1)=+inf */
8250             }
8251             *_errno() = EDOM;
8252             return (x - x) / 0.0f; /* log1p(x<-1)=NaN */
8253         }
8254         if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
8255             /* underflow if subnormal */
8256             if ((ix & 0x7f800000) == 0)
8257                 fp_barrierf(x * x);
8258             return x;
8259         }
8260         if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8261             k = 0;
8262             c = 0;
8263             f = x;
8264         }
8265     } else if (ix >= 0x7f800000)
8266         return x;
8267     if (k) {
8268         u.f = 1 + x;
8269         iu = u.i;
8270         iu += 0x3f800000 - 0x3f3504f3;
8271         k = (int)(iu >> 23) - 0x7f;
8272         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8273         if (k < 25) {
8274             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8275             c /= u.f;
8276         } else
8277             c = 0;
8278         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8279         iu = (iu & 0x007fffff) + 0x3f3504f3;
8280         u.i = iu;
8281         f = u.f - 1;
8282     }
8283     s = f / (2.0f + f);
8284     z = s * s;
8285     w = z * z;
8286     t1= w * (Lg2 + w * Lg4);
8287     t2= z * (Lg1 + w * Lg3);
8288     R = t2 + t1;
8289     hfsq = 0.5f * f * f;
8290     dk = k;
8291     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8292 }
8293
8294 /*********************************************************************
8295  *      log2 (MSVCR120.@)
8296  *
8297  * Copied from musl: src/math/log2.c
8298  */
8299 double CDECL log2(double x)
8300 {
8301     static const double invln2hi = 0x1.7154765200000p+0,
8302         invln2lo = 0x1.705fc2eefa200p-33;
8303     static const double A[] = {
8304         -0x1.71547652b8339p-1,
8305         0x1.ec709dc3a04bep-2,
8306         -0x1.7154764702ffbp-2,
8307         0x1.2776c50034c48p-2,
8308         -0x1.ec7b328ea92bcp-3,
8309         0x1.a6225e117f92ep-3
8310     };
8311     static const double B[] = {
8312         -0x1.71547652b82fep-1,
8313         0x1.ec709dc3a03f7p-2,
8314         -0x1.71547652b7c3fp-2,
8315         0x1.2776c50f05be4p-2,
8316         -0x1.ec709dd768fe5p-3,
8317         0x1.a61761ec4e736p-3,
8318         -0x1.7153fbc64a79bp-3,
8319         0x1.484d154f01b4ap-3,
8320         -0x1.289e4a72c383cp-3,
8321         0x1.0b32f285aee66p-3
8322     };
8323     static const struct {
8324         double invc, logc;
8325     } T[] = {
8326         {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
8327         {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
8328         {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
8329         {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
8330         {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
8331         {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
8332         {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
8333         {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
8334         {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
8335         {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
8336         {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
8337         {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
8338         {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
8339         {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
8340         {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
8341         {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
8342         {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
8343         {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
8344         {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
8345         {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
8346         {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
8347         {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
8348         {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
8349         {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
8350         {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
8351         {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
8352         {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
8353         {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
8354         {0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
8355         {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
8356         {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
8357         {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
8358         {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
8359         {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
8360         {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
8361         {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
8362         {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
8363         {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
8364         {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
8365         {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
8366         {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
8367         {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
8368         {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
8369         {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
8370         {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
8371         {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
8372         {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
8373         {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
8374         {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
8375         {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
8376         {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
8377         {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
8378         {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
8379         {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
8380         {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
8381         {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
8382         {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
8383         {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
8384         {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
8385         {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
8386         {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
8387         {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
8388         {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
8389         {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}
8390     };
8391     static const struct {
8392         double chi, clo;
8393     } T2[] = {
8394         {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
8395         {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
8396         {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
8397         {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
8398         {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
8399         {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
8400         {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
8401         {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
8402         {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
8403         {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
8404         {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
8405         {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
8406         {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
8407         {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
8408         {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
8409         {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
8410         {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
8411         {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
8412         {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
8413         {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
8414         {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
8415         {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
8416         {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
8417         {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
8418         {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
8419         {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
8420         {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
8421         {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
8422         {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
8423         {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
8424         {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
8425         {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
8426         {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
8427         {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
8428         {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
8429         {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
8430         {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
8431         {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
8432         {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
8433         {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
8434         {0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
8435         {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
8436         {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
8437         {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
8438         {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
8439         {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
8440         {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
8441         {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
8442         {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
8443         {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
8444         {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
8445         {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
8446         {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
8447         {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
8448         {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
8449         {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
8450         {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
8451         {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
8452         {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
8453         {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
8454         {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
8455         {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
8456         {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
8457         {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}
8458     };
8459
8460     double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p, rhi, rlo;
8461     UINT64 ix, iz, tmp;
8462     UINT32 top;
8463     int k, i;
8464
8465     ix = *(UINT64*)&x;
8466     top = ix >> 48;
8467     if (ix - 0x3feea4af00000000ULL < 0x210aa00000000ULL) {
8468         /* Handle close to 1.0 inputs separately.  */
8469         /* Fix sign of zero with downward rounding when x==1.  */
8470         if (ix == 0x3ff0000000000000ULL)
8471             return 0;
8472         r = x - 1.0;
8473         *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8474         rlo = r - rhi;
8475         hi = rhi * invln2hi;
8476         lo = rlo * invln2hi + r * invln2lo;
8477         r2 = r * r; /* rounding error: 0x1p-62.  */
8478         r4 = r2 * r2;
8479         /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
8480         p = r2 * (B[0] + r * B[1]);
8481         y = hi + p;
8482         lo += hi - y + p;
8483         lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
8484                 r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
8485         y += lo;
8486         return y;
8487     }
8488     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
8489         /* x < 0x1p-1022 or inf or nan.  */
8490         if (ix * 2 == 0) {
8491             *_errno() = ERANGE;
8492             return -1.0 / x;
8493         }
8494         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf.  */
8495             return x;
8496         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
8497             return x;
8498         if (top & 0x8000) {
8499             *_errno() = EDOM;
8500             return (x - x) / (x - x);
8501         }
8502         /* x is subnormal, normalize it.  */
8503         x *= 0x1p52;
8504         ix = *(UINT64*)&x;
8505         ix -= 52ULL << 52;
8506     }
8507
8508     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
8509        The range is split into N subintervals.
8510        The ith subinterval contains z and c is near its center.  */
8511     tmp = ix - 0x3fe6000000000000ULL;
8512     i = (tmp >> (52 - 6)) % (1 << 6);
8513     k = (INT64)tmp >> 52; /* arithmetic shift */
8514     iz = ix - (tmp & 0xfffULL << 52);
8515     invc = T[i].invc;
8516     logc = T[i].logc;
8517     z = *(double*)&iz;
8518     kd = k;
8519
8520     /* log2(x) = log2(z/c) + log2(c) + k.  */
8521     /* r ~= z/c - 1, |r| < 1/(2*N).  */
8522     /* rounding error: 0x1p-55/N + 0x1p-65.  */
8523     r = (z - T2[i].chi - T2[i].clo) * invc;
8524     *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8525     rlo = r - rhi;
8526     t1 = rhi * invln2hi;
8527     t2 = rlo * invln2hi + r * invln2lo;
8528
8529     /* hi + lo = r/ln2 + log2(c) + k.  */
8530     t3 = kd + logc;
8531     hi = t3 + t1;
8532     lo = t3 - hi + t1 + t2;
8533
8534     /* log2(r+1) = r/ln2 + r^2*poly(r).  */
8535     /* Evaluation is optimized assuming superscalar pipelined execution.  */
8536     r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
8537     r4 = r2 * r2;
8538     /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
8539        ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
8540     p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
8541     y = lo + r2 * p + hi;
8542     return y;
8543 }
8544
8545 /*********************************************************************
8546  *      log2f (MSVCR120.@)
8547  *
8548  * Copied from musl: src/math/log2f.c
8549  */
8550 float CDECL log2f(float x)
8551 {
8552     static const double A[] = {
8553         -0x1.712b6f70a7e4dp-2,
8554         0x1.ecabf496832ep-2,
8555         -0x1.715479ffae3dep-1,
8556         0x1.715475f35c8b8p0
8557     };
8558     static const struct {
8559         double invc, logc;
8560     } T[] = {
8561         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
8562         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
8563         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
8564         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
8565         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
8566         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
8567         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
8568         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
8569         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
8570         { 0x1p+0, 0x0p+0 },
8571         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
8572         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
8573         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
8574         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
8575         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
8576         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }
8577     };
8578
8579     double z, r, r2, p, y, y0, invc, logc;
8580     UINT32 ix, iz, top, tmp;
8581     int k, i;
8582
8583     ix = *(UINT32*)&x;
8584     /* Fix sign of zero with downward rounding when x==1. */
8585     if (ix == 0x3f800000)
8586         return 0;
8587     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
8588         /* x < 0x1p-126 or inf or nan. */
8589         if (ix * 2 == 0) {
8590             *_errno() = ERANGE;
8591             return -1.0f / x;
8592         }
8593         if (ix == 0x7f800000) /* log2(inf) == inf. */
8594             return x;
8595         if (ix * 2 > 0xff000000)
8596             return x;
8597         if (ix & 0x80000000) {
8598             *_errno() = EDOM;
8599             return (x - x) / (x - x);
8600         }
8601         /* x is subnormal, normalize it. */
8602         x *= 0x1p23f;
8603         ix = *(UINT32*)&x;
8604         ix -= 23 << 23;
8605     }
8606
8607     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
8608        The range is split into N subintervals.
8609        The ith subinterval contains z and c is near its center. */
8610     tmp = ix - 0x3f330000;
8611     i = (tmp >> (23 - 4)) % (1 << 4);
8612     top = tmp & 0xff800000;
8613     iz = ix - top;
8614     k = (INT32)tmp >> 23; /* arithmetic shift */
8615     invc = T[i].invc;
8616     logc = T[i].logc;
8617     z = *(float*)&iz;
8618
8619     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
8620     r = z * invc - 1;
8621     y0 = logc + (double)k;
8622
8623     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
8624     r2 = r * r;
8625     y = A[1] * r + A[2];
8626     y = A[0] * r2 + y;
8627     p = A[3] * r + y0;
8628     y = y * r2 + p;
8629     return y;
8630 }
8631
8632 /*********************************************************************
8633  *      rint (MSVCR120.@)
8634  */
8635 double CDECL rint(double x)
8636 {
8637     return __rint(x);
8638 }
8639
8640 /*********************************************************************
8641  *      rintf (MSVCR120.@)
8642  *
8643  * Copied from musl: src/math/rintf.c
8644  */
8645 float CDECL rintf(float x)
8646 {
8647     static const float toint = 1 / FLT_EPSILON;
8648
8649     unsigned int ix = *(unsigned int*)&x;
8650     int e = ix >> 23 & 0xff;
8651     int s = ix >> 31;
8652     float y;
8653
8654     if (e >= 0x7f + 23)
8655         return x;
8656     if (s)
8657         y = fp_barrierf(x - toint) + toint;
8658     else
8659         y = fp_barrierf(x + toint) - toint;
8660     if (y == 0)
8661         return s ? -0.0f : 0.0f;
8662     return y;
8663 }
8664
8665 /*********************************************************************
8666  *      lrint (MSVCR120.@)
8667  */
8668 __msvcrt_long CDECL lrint(double x)
8669 {
8670     double d;
8671
8672     d = rint(x);
8673     if ((d < 0 && d != (double)(__msvcrt_long)d)
8674             || (d >= 0 && d != (double)(__msvcrt_ulong)d)) {
8675         *_errno() = EDOM;
8676         return 0;
8677     }
8678     return d;
8679 }
8680
8681 /*********************************************************************
8682  *      lrintf (MSVCR120.@)
8683  */
8684 __msvcrt_long CDECL lrintf(float x)
8685 {
8686     float f;
8687
8688     f = rintf(x);
8689     if ((f < 0 && f != (float)(__msvcrt_long)f)
8690             || (f >= 0 && f != (float)(__msvcrt_ulong)f)) {
8691         *_errno() = EDOM;
8692         return 0;
8693     }
8694     return f;
8695 }
8696
8697 /*********************************************************************
8698  *      llrint (MSVCR120.@)
8699  */
8700 __int64 CDECL llrint(double x)
8701 {
8702     double d;
8703
8704     d = rint(x);
8705     if ((d < 0 && d != (double)(__int64)d)
8706             || (d >= 0 && d != (double)(unsigned __int64)d)) {
8707         *_errno() = EDOM;
8708         return 0;
8709     }
8710     return d;
8711 }
8712
8713 /*********************************************************************
8714  *      llrintf (MSVCR120.@)
8715  */
8716 __int64 CDECL llrintf(float x)
8717 {
8718     float f;
8719
8720     f = rintf(x);
8721     if ((f < 0 && f != (float)(__int64)f)
8722             || (f >= 0 && f != (float)(unsigned __int64)f)) {
8723         *_errno() = EDOM;
8724         return 0;
8725     }
8726     return f;
8727 }
8728
8729 /*********************************************************************
8730  *      round (MSVCR120.@)
8731  */
8732 double CDECL round(double x)
8733 {
8734     return __round(x);
8735 }
8736
8737 /*********************************************************************
8738  *      roundf (MSVCR120.@)
8739  *
8740  * Copied from musl: src/math/roundf.c
8741  */
8742 float CDECL roundf(float x)
8743 {
8744     static const float toint = 1 / FLT_EPSILON;
8745
8746     unsigned int ix = *(unsigned int*)&x;
8747     int e = ix >> 23 & 0xff;
8748     float y;
8749
8750     if (e >= 0x7f + 23)
8751         return x;
8752     if (ix >> 31)
8753         x = -x;
8754     if (e < 0x7f - 1)
8755         return 0 * *(float*)&ix;
8756     y = fp_barrierf(x + toint) - toint - x;
8757     if (y > 0.5f)
8758         y = y + x - 1;
8759     else if (y <= -0.5f)
8760         y = y + x + 1;
8761     else
8762         y = y + x;
8763     if (ix >> 31)
8764         y = -y;
8765     return y;
8766 }
8767
8768 /*********************************************************************
8769  *      lround (MSVCR120.@)
8770  *
8771  * Copied from musl: src/math/lround.c
8772  */
8773 __msvcrt_long CDECL lround(double x)
8774 {
8775     double d = round(x);
8776     if (d != (double)(__msvcrt_long)d) {
8777         *_errno() = EDOM;
8778         return 0;
8779     }
8780     return d;
8781 }
8782
8783 /*********************************************************************
8784  *      lroundf (MSVCR120.@)
8785  *
8786  * Copied from musl: src/math/lroundf.c
8787  */
8788 __msvcrt_long CDECL lroundf(float x)
8789 {
8790     float f = roundf(x);
8791     if (f != (float)(__msvcrt_long)f) {
8792         *_errno() = EDOM;
8793         return 0;
8794     }
8795     return f;
8796 }
8797
8798 /*********************************************************************
8799  *      llround (MSVCR120.@)
8800  *
8801  * Copied from musl: src/math/llround.c
8802  */
8803 __int64 CDECL llround(double x)
8804 {
8805     double d = round(x);
8806     if (d != (double)(__int64)d) {
8807         *_errno() = EDOM;
8808         return 0;
8809     }
8810     return d;
8811 }
8812
8813 /*********************************************************************
8814  *      llroundf (MSVCR120.@)
8815  *
8816  * Copied from musl: src/math/llroundf.c
8817  */
8818 __int64 CDECL llroundf(float x)
8819 {
8820     float f = roundf(x);
8821     if (f != (float)(__int64)f) {
8822         *_errno() = EDOM;
8823         return 0;
8824     }
8825     return f;
8826 }
8827
8828 /*********************************************************************
8829  *      trunc (MSVCR120.@)
8830  *
8831  * Copied from musl: src/math/trunc.c
8832  */
8833 double CDECL trunc(double x)
8834 {
8835     union {double f; UINT64 i;} u = {x};
8836     int e = (u.i >> 52 & 0x7ff) - 0x3ff + 12;
8837     UINT64 m;
8838
8839     if (e >= 52 + 12)
8840         return x;
8841     if (e < 12)
8842         e = 1;
8843     m = -1ULL >> e;
8844     if ((u.i & m) == 0)
8845         return x;
8846     u.i &= ~m;
8847     return u.f;
8848 }
8849
8850 /*********************************************************************
8851  *      truncf (MSVCR120.@)
8852  *
8853  * Copied from musl: src/math/truncf.c
8854  */
8855 float CDECL truncf(float x)
8856 {
8857     union {float f; UINT32 i;} u = {x};
8858     int e = (u.i >> 23 & 0xff) - 0x7f + 9;
8859     UINT32 m;
8860
8861     if (e >= 23 + 9)
8862         return x;
8863     if (e < 9)
8864         e = 1;
8865     m = -1U >> e;
8866     if ((u.i & m) == 0)
8867         return x;
8868     u.i &= ~m;
8869     return u.f;
8870 }
8871
8872 /*********************************************************************
8873  *      _dtest (MSVCR120.@)
8874  */
8875 short CDECL _dtest(double *x)
8876 {
8877     return _dclass(*x);
8878 }
8879
8880 /*********************************************************************
8881  *      _fdtest (MSVCR120.@)
8882  */
8883 short CDECL _fdtest(float *x)
8884 {
8885     return _fdclass(*x);
8886 }
8887
8888 static double erfc1(double x)
8889 {
8890     static const double erx  = 8.45062911510467529297e-01,
8891                  pa0  = -2.36211856075265944077e-03,
8892                  pa1  =  4.14856118683748331666e-01,
8893                  pa2  = -3.72207876035701323847e-01,
8894                  pa3  =  3.18346619901161753674e-01,
8895                  pa4  = -1.10894694282396677476e-01,
8896                  pa5  =  3.54783043256182359371e-02,
8897                  pa6  = -2.16637559486879084300e-03,
8898                  qa1  =  1.06420880400844228286e-01,
8899                  qa2  =  5.40397917702171048937e-01,
8900                  qa3  =  7.18286544141962662868e-02,
8901                  qa4  =  1.26171219808761642112e-01,
8902                  qa5  =  1.36370839120290507362e-02,
8903                  qa6  =  1.19844998467991074170e-02;
8904
8905     double s, P, Q;
8906
8907     s = fabs(x) - 1;
8908     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8909     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8910     return 1 - erx - P / Q;
8911 }
8912
8913 static double erfc2(UINT32 ix, double x)
8914 {
8915     static const double ra0  = -9.86494403484714822705e-03,
8916                  ra1  = -6.93858572707181764372e-01,
8917                  ra2  = -1.05586262253232909814e+01,
8918                  ra3  = -6.23753324503260060396e+01,
8919                  ra4  = -1.62396669462573470355e+02,
8920                  ra5  = -1.84605092906711035994e+02,
8921                  ra6  = -8.12874355063065934246e+01,
8922                  ra7  = -9.81432934416914548592e+00,
8923                  sa1  =  1.96512716674392571292e+01,
8924                  sa2  =  1.37657754143519042600e+02,
8925                  sa3  =  4.34565877475229228821e+02,
8926                  sa4  =  6.45387271733267880336e+02,
8927                  sa5  =  4.29008140027567833386e+02,
8928                  sa6  =  1.08635005541779435134e+02,
8929                  sa7  =  6.57024977031928170135e+00,
8930                  sa8  = -6.04244152148580987438e-02,
8931                  rb0  = -9.86494292470009928597e-03,
8932                  rb1  = -7.99283237680523006574e-01,
8933                  rb2  = -1.77579549177547519889e+01,
8934                  rb3  = -1.60636384855821916062e+02,
8935                  rb4  = -6.37566443368389627722e+02,
8936                  rb5  = -1.02509513161107724954e+03,
8937                  rb6  = -4.83519191608651397019e+02,
8938                  sb1  =  3.03380607434824582924e+01,
8939                  sb2  =  3.25792512996573918826e+02,
8940                  sb3  =  1.53672958608443695994e+03,
8941                  sb4  =  3.19985821950859553908e+03,
8942                  sb5  =  2.55305040643316442583e+03,
8943                  sb6  =  4.74528541206955367215e+02,
8944                  sb7  = -2.24409524465858183362e+01;
8945
8946     double s, R, S, z;
8947     UINT64 iz;
8948
8949     if (ix < 0x3ff40000) /* |x| < 1.25 */
8950         return erfc1(x);
8951
8952     x = fabs(x);
8953     s = 1 / (x * x);
8954     if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
8955         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
8956                             (ra5 + s * (ra6 + s * ra7))))));
8957         S = 1.0 + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
8958                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
8959     } else { /* |x| > 1/.35 */
8960         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s *
8961                             (rb5 + s * rb6)))));
8962         S = 1.0 + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
8963                             (sb5 + s * (sb6 + s * sb7))))));
8964     }
8965     z = x;
8966     iz = *(ULONGLONG*)&z;
8967     iz &= 0xffffffff00000000ULL;
8968     z = *(double*)&iz;
8969     return exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S) / x;
8970 }
8971
8972 /*********************************************************************
8973  *      erf (MSVCR120.@)
8974  */
8975 double CDECL erf(double x)
8976 {
8977     static const double efx8 =  1.02703333676410069053e+00,
8978                  pp0  =  1.28379167095512558561e-01,
8979                  pp1  = -3.25042107247001499370e-01,
8980                  pp2  = -2.84817495755985104766e-02,
8981                  pp3  = -5.77027029648944159157e-03,
8982                  pp4  = -2.37630166566501626084e-05,
8983                  qq1  =  3.97917223959155352819e-01,
8984                  qq2  =  6.50222499887672944485e-02,
8985                  qq3  =  5.08130628187576562776e-03,
8986                  qq4  =  1.32494738004321644526e-04,
8987                  qq5  = -3.96022827877536812320e-06;
8988
8989     double r, s, z, y;
8990     UINT32 ix;
8991     int sign;
8992
8993     ix = *(UINT64*)&x >> 32;
8994     sign = ix >> 31;
8995     ix &= 0x7fffffff;
8996     if (ix >= 0x7ff00000) {
8997         /* erf(nan)=nan, erf(+-inf)=+-1 */
8998         return 1 - 2 * sign + 1 / x;
8999     }
9000     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
9001         if (ix < 0x3e300000) { /* |x| < 2**-28 */
9002             /* avoid underflow */
9003             return 0.125 * (8 * x + efx8 * x);
9004         }
9005         z = x * x;
9006         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9007         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9008         y = r / s;
9009         return x + x * y;
9010     }
9011     if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
9012         y = 1 - erfc2(ix, x);
9013     else
9014         y = 1 - DBL_MIN;
9015     return sign ? -y : y;
9016 }
9017
9018 static float erfc1f(float x)
9019 {
9020     static const float erx  =  8.4506291151e-01,
9021                  pa0  = -2.3621185683e-03,
9022                  pa1  =  4.1485610604e-01,
9023                  pa2  = -3.7220788002e-01,
9024                  pa3  =  3.1834661961e-01,
9025                  pa4  = -1.1089469492e-01,
9026                  pa5  =  3.5478305072e-02,
9027                  pa6  = -2.1663755178e-03,
9028                  qa1  =  1.0642088205e-01,
9029                  qa2  =  5.4039794207e-01,
9030                  qa3  =  7.1828655899e-02,
9031                  qa4  =  1.2617121637e-01,
9032                  qa5  =  1.3637083583e-02,
9033                  qa6  =  1.1984500103e-02;
9034
9035     float s, P, Q;
9036
9037     s = fabsf(x) - 1;
9038     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
9039     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
9040     return 1 - erx - P / Q;
9041 }
9042
9043 static float erfc2f(UINT32 ix, float x)
9044 {
9045     static const float ra0  = -9.8649440333e-03,
9046                  ra1  = -6.9385856390e-01,
9047                  ra2  = -1.0558626175e+01,
9048                  ra3  = -6.2375331879e+01,
9049                  ra4  = -1.6239666748e+02,
9050                  ra5  = -1.8460508728e+02,
9051                  ra6  = -8.1287437439e+01,
9052                  ra7  = -9.8143291473e+00,
9053                  sa1  =  1.9651271820e+01,
9054                  sa2  =  1.3765776062e+02,
9055                  sa3  =  4.3456588745e+02,
9056                  sa4  =  6.4538726807e+02,
9057                  sa5  =  4.2900814819e+02,
9058                  sa6  =  1.0863500214e+02,
9059                  sa7  =  6.5702495575e+00,
9060                  sa8  = -6.0424413532e-02,
9061                  rb0  = -9.8649431020e-03,
9062                  rb1  = -7.9928326607e-01,
9063                  rb2  = -1.7757955551e+01,
9064                  rb3  = -1.6063638306e+02,
9065                  rb4  = -6.3756646729e+02,
9066                  rb5  = -1.0250950928e+03,
9067                  rb6  = -4.8351919556e+02,
9068                  sb1  =  3.0338060379e+01,
9069                  sb2  =  3.2579251099e+02,
9070                  sb3  =  1.5367296143e+03,
9071                  sb4  =  3.1998581543e+03,
9072                  sb5  =  2.5530502930e+03,
9073                  sb6  =  4.7452853394e+02,
9074                  sb7  = -2.2440952301e+01;
9075
9076     float s, R, S, z;
9077
9078     if (ix < 0x3fa00000) /* |x| < 1.25 */
9079         return erfc1f(x);
9080
9081     x = fabsf(x);
9082     s = 1 / (x * x);
9083     if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
9084         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
9085                             (ra5 + s * (ra6 + s * ra7))))));
9086         S = 1.0f + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
9087                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
9088     } else { /* |x| >= 1/0.35 */
9089         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (rb5 + s * rb6)))));
9090         S = 1.0f + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
9091                             (sb5 + s * (sb6 + s * sb7))))));
9092     }
9093
9094     ix = *(UINT32*)&x & 0xffffe000;
9095     z = *(float*)&ix;
9096     return expf(-z * z - 0.5625f) * expf((z - x) * (z + x) + R / S) / x;
9097 }
9098
9099 /*********************************************************************
9100  *      erff (MSVCR120.@)
9101  *
9102  * Copied from musl: src/math/erff.c
9103  */
9104 float CDECL erff(float x)
9105 {
9106     static const float efx8 =  1.0270333290e+00,
9107                  pp0  =  1.2837916613e-01,
9108                  pp1  = -3.2504209876e-01,
9109                  pp2  = -2.8481749818e-02,
9110                  pp3  = -5.7702702470e-03,
9111                  pp4  = -2.3763017452e-05,
9112                  qq1  =  3.9791721106e-01,
9113                  qq2  =  6.5022252500e-02,
9114                  qq3  =  5.0813062117e-03,
9115                  qq4  =  1.3249473704e-04,
9116                  qq5  = -3.9602282413e-06;
9117
9118     float r, s, z, y;
9119     UINT32 ix;
9120     int sign;
9121
9122     ix = *(UINT32*)&x;
9123     sign = ix >> 31;
9124     ix &= 0x7fffffff;
9125     if (ix >= 0x7f800000) {
9126         /* erf(nan)=nan, erf(+-inf)=+-1 */
9127         return 1 - 2 * sign + 1 / x;
9128     }
9129     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9130         if (ix < 0x31800000) { /* |x| < 2**-28 */
9131             /*avoid underflow */
9132             return 0.125f * (8 * x + efx8 * x);
9133         }
9134         z = x * x;
9135         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9136         s = 1 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9137         y = r / s;
9138         return x + x * y;
9139     }
9140     if (ix < 0x40c00000) /* |x| < 6 */
9141         y = 1 - erfc2f(ix, x);
9142     else
9143         y = 1 - FLT_MIN;
9144     return sign ? -y : y;
9145 }
9146
9147 /*********************************************************************
9148  *      erfc (MSVCR120.@)
9149  *
9150  * Copied from musl: src/math/erf.c
9151  */
9152 double CDECL erfc(double x)
9153 {
9154     static const double pp0  =  1.28379167095512558561e-01,
9155                  pp1  = -3.25042107247001499370e-01,
9156                  pp2  = -2.84817495755985104766e-02,
9157                  pp3  = -5.77027029648944159157e-03,
9158                  pp4  = -2.37630166566501626084e-05,
9159                  qq1  =  3.97917223959155352819e-01,
9160                  qq2  =  6.50222499887672944485e-02,
9161                  qq3  =  5.08130628187576562776e-03,
9162                  qq4  =  1.32494738004321644526e-04,
9163                  qq5  = -3.96022827877536812320e-06;
9164
9165     double r, s, z, y;
9166     UINT32 ix;
9167     int sign;
9168
9169     ix = *(ULONGLONG*)&x >> 32;
9170     sign = ix >> 31;
9171     ix &= 0x7fffffff;
9172     if (ix >= 0x7ff00000) {
9173         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9174         return 2 * sign + 1 / x;
9175     }
9176     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
9177         if (ix < 0x3c700000) /* |x| < 2**-56 */
9178             return 1.0 - x;
9179         z = x * x;
9180         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9181         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9182         y = r / s;
9183         if (sign || ix < 0x3fd00000) { /* x < 1/4 */
9184             return 1.0 - (x + x * y);
9185         }
9186         return 0.5 - (x - 0.5 + x * y);
9187     }
9188     if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
9189         return sign ? 2 - erfc2(ix, x) : erfc2(ix, x);
9190     }
9191     if (sign)
9192         return 2 - DBL_MIN;
9193     *_errno() = ERANGE;
9194     return fp_barrier(DBL_MIN) * DBL_MIN;
9195 }
9196
9197 /*********************************************************************
9198  *      erfcf (MSVCR120.@)
9199  *
9200  * Copied from musl: src/math/erff.c
9201  */
9202 float CDECL erfcf(float x)
9203 {
9204     static const float pp0  =  1.2837916613e-01,
9205                  pp1  = -3.2504209876e-01,
9206                  pp2  = -2.8481749818e-02,
9207                  pp3  = -5.7702702470e-03,
9208                  pp4  = -2.3763017452e-05,
9209                  qq1  =  3.9791721106e-01,
9210                  qq2  =  6.5022252500e-02,
9211                  qq3  =  5.0813062117e-03,
9212                  qq4  =  1.3249473704e-04,
9213                  qq5  = -3.9602282413e-06;
9214
9215     float r, s, z, y;
9216     UINT32 ix;
9217     int sign;
9218
9219     ix = *(UINT32*)&x;
9220     sign = ix >> 31;
9221     ix &= 0x7fffffff;
9222     if (ix >= 0x7f800000) {
9223         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9224         return 2 * sign + 1 / x;
9225     }
9226
9227     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9228         if (ix < 0x23800000) /* |x| < 2**-56 */
9229             return 1.0f - x;
9230         z = x * x;
9231         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9232         s = 1.0f + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9233         y = r / s;
9234         if (sign || ix < 0x3e800000) /* x < 1/4 */
9235             return 1.0f - (x + x * y);
9236         return 0.5f - (x - 0.5f + x * y);
9237     }
9238     if (ix < 0x41e00000) { /* |x| < 28 */
9239         return sign ? 2 - erfc2f(ix, x) : erfc2f(ix, x);
9240     }
9241     if (sign)
9242         return 2 - FLT_MIN;
9243     *_errno() = ERANGE;
9244     return FLT_MIN * FLT_MIN;
9245 }
9246
9247 /*********************************************************************
9248  *      fmaxf (MSVCR120.@)
9249  */
9250 float CDECL fmaxf(float x, float y)
9251 {
9252     if(isnan(x))
9253         return y;
9254     if(isnan(y))
9255         return x;
9256     if(x==0 && y==0)
9257         return signbit(x) ? y : x;
9258     return x<y ? y : x;
9259 }
9260
9261 /*********************************************************************
9262  *      fmax (MSVCR120.@)
9263  */
9264 double CDECL fmax(double x, double y)
9265 {
9266     if(isnan(x))
9267         return y;
9268     if(isnan(y))
9269         return x;
9270     if(x==0 && y==0)
9271         return signbit(x) ? y : x;
9272     return x<y ? y : x;
9273 }
9274
9275 /*********************************************************************
9276  *      fdimf (MSVCR120.@)
9277  */
9278 float CDECL fdimf(float x, float y)
9279 {
9280     if(isnan(x))
9281         return x;
9282     if(isnan(y))
9283         return y;
9284     return x>y ? x-y : 0;
9285 }
9286
9287 /*********************************************************************
9288  *      fdim (MSVCR120.@)
9289  */
9290 double CDECL fdim(double x, double y)
9291 {
9292     if(isnan(x))
9293         return x;
9294     if(isnan(y))
9295         return y;
9296     return x>y ? x-y : 0;
9297 }
9298
9299 /*********************************************************************
9300  *      _fdsign (MSVCR120.@)
9301  */
9302 int CDECL _fdsign(float x)
9303 {
9304     union { float f; UINT32 i; } u = { x };
9305     return (u.i >> 16) & 0x8000;
9306 }
9307
9308 /*********************************************************************
9309  *      _dsign (MSVCR120.@)
9310  */
9311 int CDECL _dsign(double x)
9312 {
9313     union { double f; UINT64 i; } u = { x };
9314     return (u.i >> 48) & 0x8000;
9315 }
9316
9317
9318 /*********************************************************************
9319  *      _dpcomp (MSVCR120.@)
9320  */
9321 int CDECL _dpcomp(double x, double y)
9322 {
9323     if(isnan(x) || isnan(y))
9324         return 0;
9325
9326     if(x == y) return 2;
9327     return x < y ? 1 : 4;
9328 }
9329
9330 /*********************************************************************
9331  *      _fdpcomp (MSVCR120.@)
9332  */
9333 int CDECL _fdpcomp(float x, float y)
9334 {
9335     return _dpcomp(x, y);
9336 }
9337
9338 /*********************************************************************
9339  *      fminf (MSVCR120.@)
9340  */
9341 float CDECL fminf(float x, float y)
9342 {
9343     if(isnan(x))
9344         return y;
9345     if(isnan(y))
9346         return x;
9347     if(x==0 && y==0)
9348         return signbit(x) ? x : y;
9349     return x<y ? x : y;
9350 }
9351
9352 /*********************************************************************
9353  *      fmin (MSVCR120.@)
9354  */
9355 double CDECL fmin(double x, double y)
9356 {
9357     if(isnan(x))
9358         return y;
9359     if(isnan(y))
9360         return x;
9361     if(x==0 && y==0)
9362         return signbit(x) ? x : y;
9363     return x<y ? x : y;
9364 }
9365
9366 /*********************************************************************
9367  *      asinh (MSVCR120.@)
9368  *
9369  * Copied from musl: src/math/asinh.c
9370  */
9371 double CDECL asinh(double x)
9372 {
9373     UINT64 ux = *(UINT64*)&x;
9374     int e = ux >> 52 & 0x7ff;
9375     int s = ux >> 63;
9376
9377     /* |x| */
9378     ux &= (UINT64)-1 / 2;
9379     x = *(double*)&ux;
9380
9381     if (e >= 0x3ff + 26) /* |x| >= 0x1p26 or inf or nan */
9382         x = log(x) + 0.693147180559945309417232121458176568;
9383     else if (e >= 0x3ff + 1) /* |x| >= 2 */
9384         x = log(2 * x + 1 / (sqrt(x * x + 1) + x));
9385     else if (e >= 0x3ff - 26) /* |x| >= 0x1p-26 */
9386         x = log1p(x + x * x / (sqrt(x * x + 1) + 1));
9387     else /* |x| < 0x1p-26, raise inexact if x != 0 */
9388         fp_barrier(x + 0x1p120f);
9389     return s ? -x : x;
9390 }
9391
9392 /*********************************************************************
9393  *      asinhf (MSVCR120.@)
9394  *
9395  * Copied from musl: src/math/asinhf.c
9396  */
9397 float CDECL asinhf(float x)
9398 {
9399     UINT32 ux = *(UINT32*)&x;
9400     UINT32 i = ux & 0x7fffffff;
9401     int s = ux >> 31;
9402
9403     /* |x| */
9404     x = *(float*)&i;
9405
9406     if (i >= 0x3f800000 + (12 << 23))/* |x| >= 0x1p12 or inf or nan */
9407         x = logf(x) + 0.693147180559945309417232121458176568f;
9408     else if (i >= 0x3f800000 + (1 << 23)) /* |x| >= 2 */
9409         x = logf(2 * x + 1 / (sqrtf(x * x + 1) + x));
9410     else if (i >= 0x3f800000 - (12 << 23)) /* |x| >= 0x1p-12 */
9411         x = log1pf(x + x * x / (sqrtf(x * x + 1) + 1));
9412     else /* |x| < 0x1p-12, raise inexact if x!=0 */
9413         fp_barrierf(x + 0x1p120f);
9414     return s ? -x : x;
9415 }
9416
9417 /*********************************************************************
9418  *      acosh (MSVCR120.@)
9419  *
9420  * Copied from musl: src/math/acosh.c
9421  */
9422 double CDECL acosh(double x)
9423 {
9424     int e = *(UINT64*)&x >> 52 & 0x7ff;
9425
9426     if (x < 1)
9427     {
9428         *_errno() = EDOM;
9429         feraiseexcept(FE_INVALID);
9430         return NAN;
9431     }
9432
9433     if (e < 0x3ff + 1) /* |x| < 2, up to 2ulp error in [1,1.125] */
9434         return log1p(x - 1 + sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
9435     if (e < 0x3ff + 26) /* |x| < 0x1p26 */
9436         return log(2 * x - 1 / (x + sqrt(x * x - 1)));
9437     /* |x| >= 0x1p26 or nan */
9438     return log(x) + 0.693147180559945309417232121458176568;
9439 }
9440
9441 /*********************************************************************
9442  *      acoshf (MSVCR120.@)
9443  *
9444  * Copied from musl: src/math/acoshf.c
9445  */
9446 float CDECL acoshf(float x)
9447 {
9448     UINT32 a = *(UINT32*)&x & 0x7fffffff;
9449
9450     if (x < 1)
9451     {
9452         *_errno() = EDOM;
9453         feraiseexcept(FE_INVALID);
9454         return NAN;
9455     }
9456
9457     if (a < 0x3f800000 + (1 << 23)) /* |x| < 2, up to 2ulp error in [1,1.125] */
9458         return log1pf(x - 1 + sqrtf((x - 1) * (x - 1) + 2 * (x - 1)));
9459     if (*(UINT32*)&x < 0x3f800000 + (12 << 23)) /* 2 <= x < 0x1p12 */
9460         return logf(2 * x - 1 / (x + sqrtf(x * x - 1)));
9461     /* x >= 0x1p12 or x <= -2 or nan */
9462     return logf(x) + 0.693147180559945309417232121458176568f;
9463 }
9464
9465 /*********************************************************************
9466  *      atanh (MSVCR120.@)
9467  *
9468  * Copied from musl: src/math/atanh.c
9469  */
9470 double CDECL atanh(double x)
9471 {
9472     UINT64 ux = *(UINT64*)&x;
9473     int e = ux >> 52 & 0x7ff;
9474     int s = ux >> 63;
9475
9476     /* |x| */
9477     ux &= (UINT64)-1 / 2;
9478     x = *(double*)&ux;
9479
9480     if (x > 1) {
9481         *_errno() = EDOM;
9482         feraiseexcept(FE_INVALID);
9483         return NAN;
9484     }
9485
9486     if (e < 0x3ff - 1) {
9487         if (e < 0x3ff - 32) {
9488             fp_barrier(x + 0x1p120f);
9489             if (e == 0) /* handle underflow */
9490                 fp_barrier(x * x);
9491         } else { /* |x| < 0.5, up to 1.7ulp error */
9492             x = 0.5 * log1p(2 * x + 2 * x * x / (1 - x));
9493         }
9494     } else { /* avoid overflow */
9495         x = 0.5 * log1p(2 * (x / (1 - x)));
9496         if (isinf(x)) *_errno() = ERANGE;
9497     }
9498     return s ? -x : x;
9499 }
9500
9501 /*********************************************************************
9502  *      atanhf (MSVCR120.@)
9503  *
9504  * Copied from musl: src/math/atanhf.c
9505  */
9506 float CDECL atanhf(float x)
9507 {
9508     UINT32 ux = *(UINT32*)&x;
9509     int s = ux >> 31;
9510
9511     /* |x| */
9512     ux &= 0x7fffffff;
9513     x = *(float*)&ux;
9514
9515     if (x > 1) {
9516         *_errno() = EDOM;
9517         feraiseexcept(FE_INVALID);
9518         return NAN;
9519     }
9520
9521     if (ux < 0x3f800000 - (1 << 23)) {
9522         if (ux < 0x3f800000 - (32 << 23)) {
9523             fp_barrierf(x + 0x1p120f);
9524             if (ux < (1 << 23)) /* handle underflow */
9525                 fp_barrierf(x * x);
9526         } else { /* |x| < 0.5, up to 1.7ulp error */
9527             x = 0.5f * log1pf(2 * x + 2 * x * x / (1 - x));
9528         }
9529     } else { /* avoid overflow */
9530         x = 0.5f * log1pf(2 * (x / (1 - x)));
9531         if (isinf(x)) *_errno() = ERANGE;
9532     }
9533     return s ? -x : x;
9534 }
9535
9536 #endif /* _MSVCR_VER>=120 */
9537
9538 /*********************************************************************
9539  *      _scalb  (MSVCRT.@)
9540  *      scalbn  (MSVCR120.@)
9541  *      scalbln (MSVCR120.@)
9542  */
9543 double CDECL _scalb(double num, __msvcrt_long power)
9544 {
9545   return ldexp(num, power);
9546 }
9547
9548 /*********************************************************************
9549  *      _scalbf  (MSVCRT.@)
9550  *      scalbnf  (MSVCR120.@)
9551  *      scalblnf (MSVCR120.@)
9552  */
9553 float CDECL _scalbf(float num, __msvcrt_long power)
9554 {
9555   return ldexp(num, power);
9556 }
9557
9558 #if _MSVCR_VER>=120
9559
9560 /*********************************************************************
9561  *      remainder (MSVCR120.@)
9562  *
9563  * Copied from musl: src/math/remainder.c
9564  */
9565 double CDECL remainder(double x, double y)
9566 {
9567     int q;
9568 #if _MSVCR_VER == 120 && defined(__x86_64__)
9569     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9570 #endif
9571     return remquo(x, y, &q);
9572 }
9573
9574 /*********************************************************************
9575  *      remainderf (MSVCR120.@)
9576  *
9577  * Copied from musl: src/math/remainderf.c
9578  */
9579 float CDECL remainderf(float x, float y)
9580 {
9581     int q;
9582 #if _MSVCR_VER == 120 && defined(__x86_64__)
9583     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9584 #endif
9585     return remquof(x, y, &q);
9586 }
9587
9588 /*********************************************************************
9589  *      remquo (MSVCR120.@)
9590  *
9591  * Copied from musl: src/math/remquo.c
9592  */
9593 double CDECL remquo(double x, double y, int *quo)
9594 {
9595     UINT64 uxi = *(UINT64*)&x;
9596     UINT64 uyi = *(UINT64*)&y;
9597     int ex = uxi >> 52 & 0x7ff;
9598     int ey = uyi >> 52 & 0x7ff;
9599     int sx = uxi >> 63;
9600     int sy = uyi >> 63;
9601     UINT32 q;
9602     UINT64 i;
9603
9604     *quo = 0;
9605     if (y == 0 || isinf(x)) *_errno() = EDOM;
9606     if (uyi << 1 == 0 || isnan(y) || ex == 0x7ff)
9607         return (x * y) / (x * y);
9608     if (uxi << 1 == 0)
9609         return x;
9610
9611     /* normalize x and y */
9612     if (!ex) {
9613         for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1);
9614         uxi <<= -ex + 1;
9615     } else {
9616         uxi &= -1ULL >> 12;
9617         uxi |= 1ULL << 52;
9618     }
9619     if (!ey) {
9620         for (i = uyi << 12; i >> 63 == 0; ey--, i <<= 1);
9621         uyi <<= -ey + 1;
9622     } else {
9623         uyi &= -1ULL >> 12;
9624         uyi |= 1ULL << 52;
9625     }
9626
9627     q = 0;
9628     if (ex < ey) {
9629         if (ex+1 == ey)
9630             goto end;
9631         return x;
9632     }
9633
9634     /* x mod y */
9635     for (; ex > ey; ex--) {
9636         i = uxi - uyi;
9637         if (i >> 63 == 0) {
9638             uxi = i;
9639             q++;
9640         }
9641         uxi <<= 1;
9642         q <<= 1;
9643     }
9644     i = uxi - uyi;
9645     if (i >> 63 == 0) {
9646         uxi = i;
9647         q++;
9648     }
9649     if (uxi == 0)
9650         ex = -60;
9651     else
9652         for (; uxi >> 52 == 0; uxi <<= 1, ex--);
9653 end:
9654     /* scale result and decide between |x| and |x|-|y| */
9655     if (ex > 0) {
9656         uxi -= 1ULL << 52;
9657         uxi |= (UINT64)ex << 52;
9658     } else {
9659         uxi >>= -ex + 1;
9660     }
9661     x = *(double*)&uxi;
9662     if (sy)
9663         y = -y;
9664     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9665         x -= y;
9666         q++;
9667     }
9668     q &= 0x7fffffff;
9669     *quo = sx ^ sy ? -(int)q : (int)q;
9670     return sx ? -x : x;
9671 }
9672
9673 /*********************************************************************
9674  *      remquof (MSVCR120.@)
9675  *
9676  * Copied from musl: src/math/remquof.c
9677  */
9678 float CDECL remquof(float x, float y, int *quo)
9679 {
9680     UINT32 uxi = *(UINT32*)&x;
9681     UINT32 uyi = *(UINT32*)&y;
9682     int ex = uxi >> 23 & 0xff;
9683     int ey = uyi >> 23 & 0xff;
9684     int sx = uxi >> 31;
9685     int sy = uyi>> 31;
9686     UINT32 q, i;
9687
9688     *quo = 0;
9689     if (y == 0 || isinf(x)) *_errno() = EDOM;
9690     if (uyi << 1 == 0 || isnan(y) || ex == 0xff)
9691         return (x * y) / (x * y);
9692     if (uxi << 1 == 0)
9693         return x;
9694
9695     /* normalize x and y */
9696     if (!ex) {
9697         for (i = uxi << 9; i >> 31 == 0; ex--, i <<= 1);
9698         uxi <<= -ex + 1;
9699     } else {
9700         uxi &= -1U >> 9;
9701         uxi |= 1U << 23;
9702     }
9703     if (!ey) {
9704         for (i = uyi << 9; i >> 31 == 0; ey--, i <<= 1);
9705         uyi <<= -ey + 1;
9706     } else {
9707         uyi &= -1U >> 9;
9708         uyi |= 1U << 23;
9709     }
9710
9711     q = 0;
9712     if (ex < ey) {
9713         if (ex + 1 == ey)
9714             goto end;
9715         return x;
9716     }
9717
9718     /* x mod y */
9719     for (; ex > ey; ex--) {
9720         i = uxi - uyi;
9721         if (i >> 31 == 0) {
9722             uxi = i;
9723             q++;
9724         }
9725         uxi <<= 1;
9726         q <<= 1;
9727     }
9728     i = uxi - uyi;
9729     if (i >> 31 == 0) {
9730         uxi = i;
9731         q++;
9732     }
9733     if (uxi == 0)
9734         ex = -30;
9735     else
9736         for (; uxi >> 23 == 0; uxi <<= 1, ex--);
9737 end:
9738     /* scale result and decide between |x| and |x|-|y| */
9739     if (ex > 0) {
9740         uxi -= 1U << 23;
9741         uxi |= (UINT32)ex << 23;
9742     } else {
9743         uxi >>= -ex + 1;
9744     }
9745     x = *(float*)&uxi;
9746     if (sy)
9747         y = -y;
9748     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9749         x -= y;
9750         q++;
9751     }
9752     q &= 0x7fffffff;
9753     *quo = sx ^ sy ? -(int)q : (int)q;
9754     return sx ? -x : x;
9755 }
9756
9757 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9758 static double sin_pi(double x)
9759 {
9760     int n;
9761
9762     /* spurious inexact if odd int */
9763     x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */
9764
9765     n = x * 4.0;
9766     n = (n + 1) / 2;
9767     x -= n * 0.5f;
9768     x *= M_PI;
9769
9770     switch (n) {
9771     default: /* case 4: */
9772     case 0: return __sin(x, 0.0, 0);
9773     case 1: return __cos(x, 0.0);
9774     case 2: return __sin(-x, 0.0, 0);
9775     case 3: return -__cos(x, 0.0);
9776     }
9777 }
9778
9779 /*********************************************************************
9780  *      lgamma (MSVCR120.@)
9781  *
9782  * Copied from musl: src/math/lgamma_r.c
9783  */
9784 double CDECL lgamma(double x)
9785 {
9786     static const double pi = 3.14159265358979311600e+00,
9787         a0 = 7.72156649015328655494e-02,
9788         a1 = 3.22467033424113591611e-01,
9789         a2 = 6.73523010531292681824e-02,
9790         a3 = 2.05808084325167332806e-02,
9791         a4 = 7.38555086081402883957e-03,
9792         a5 = 2.89051383673415629091e-03,
9793         a6 = 1.19270763183362067845e-03,
9794         a7 = 5.10069792153511336608e-04,
9795         a8 = 2.20862790713908385557e-04,
9796         a9 = 1.08011567247583939954e-04,
9797         a10 = 2.52144565451257326939e-05,
9798         a11 = 4.48640949618915160150e-05,
9799         tc = 1.46163214496836224576e+00,
9800         tf = -1.21486290535849611461e-01,
9801         tt = -3.63867699703950536541e-18,
9802         t0 = 4.83836122723810047042e-01,
9803         t1 = -1.47587722994593911752e-01,
9804         t2 = 6.46249402391333854778e-02,
9805         t3 = -3.27885410759859649565e-02,
9806         t4 = 1.79706750811820387126e-02,
9807         t5 = -1.03142241298341437450e-02,
9808         t6 = 6.10053870246291332635e-03,
9809         t7 = -3.68452016781138256760e-03,
9810         t8 = 2.25964780900612472250e-03,
9811         t9 = -1.40346469989232843813e-03,
9812         t10 = 8.81081882437654011382e-04,
9813         t11 = -5.38595305356740546715e-04,
9814         t12 = 3.15632070903625950361e-04,
9815         t13 = -3.12754168375120860518e-04,
9816         t14 = 3.35529192635519073543e-04,
9817         u0 = -7.72156649015328655494e-02,
9818         u1 = 6.32827064025093366517e-01,
9819         u2 = 1.45492250137234768737e+00,
9820         u3 = 9.77717527963372745603e-01,
9821         u4 = 2.28963728064692451092e-01,
9822         u5 = 1.33810918536787660377e-02,
9823         v1 = 2.45597793713041134822e+00,
9824         v2 = 2.12848976379893395361e+00,
9825         v3 = 7.69285150456672783825e-01,
9826         v4 = 1.04222645593369134254e-01,
9827         v5 = 3.21709242282423911810e-03,
9828         s0 = -7.72156649015328655494e-02,
9829         s1 = 2.14982415960608852501e-01,
9830         s2 = 3.25778796408930981787e-01,
9831         s3 = 1.46350472652464452805e-01,
9832         s4 = 2.66422703033638609560e-02,
9833         s5 = 1.84028451407337715652e-03,
9834         s6 = 3.19475326584100867617e-05,
9835         r1 = 1.39200533467621045958e+00,
9836         r2 = 7.21935547567138069525e-01,
9837         r3 = 1.71933865632803078993e-01,
9838         r4 = 1.86459191715652901344e-02,
9839         r5 = 7.77942496381893596434e-04,
9840         r6 = 7.32668430744625636189e-06,
9841         w0 = 4.18938533204672725052e-01,
9842         w1 = 8.33333333333329678849e-02,
9843         w2 = -2.77777777728775536470e-03,
9844         w3 = 7.93650558643019558500e-04,
9845         w4 = -5.95187557450339963135e-04,
9846         w5 = 8.36339918996282139126e-04,
9847         w6 = -1.63092934096575273989e-03;
9848
9849     union {double f; UINT64 i;} u = {x};
9850     double t, y, z, nadj, p, p1, p2, p3, q, r, w;
9851     UINT32 ix;
9852     int sign,i;
9853
9854     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9855     sign = u.i >> 63;
9856     ix = u.i >> 32 & 0x7fffffff;
9857     if (ix >= 0x7ff00000)
9858         return x * x;
9859     if (ix < (0x3ff - 70) << 20) { /* |x|<2**-70, return -log(|x|) */
9860         if(sign)
9861             x = -x;
9862         return -log(x);
9863     }
9864     if (sign) {
9865         x = -x;
9866         t = sin_pi(x);
9867         if (t == 0.0) { /* -integer */
9868             *_errno() = ERANGE;
9869             return 1.0 / (x - x);
9870         }
9871         if (t <= 0.0)
9872             t = -t;
9873         nadj = log(pi / (t * x));
9874     }
9875
9876     /* purge off 1 and 2 */
9877     if ((ix == 0x3ff00000 || ix == 0x40000000) && (UINT32)u.i == 0)
9878         r = 0;
9879     /* for x < 2.0 */
9880     else if (ix < 0x40000000) {
9881         if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
9882             r = -log(x);
9883             if (ix >= 0x3FE76944) {
9884                 y = 1.0 - x;
9885                 i = 0;
9886             } else if (ix >= 0x3FCDA661) {
9887                 y = x - (tc - 1.0);
9888                 i = 1;
9889             } else {
9890                 y = x;
9891                 i = 2;
9892             }
9893         } else {
9894             r = 0.0;
9895             if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
9896                 y = 2.0 - x;
9897                 i = 0;
9898             } else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
9899                 y = x - tc;
9900                 i = 1;
9901             } else {
9902                 y = x - 1.0;
9903                 i = 2;
9904             }
9905         }
9906         switch (i) {
9907         case 0:
9908             z = y * y;
9909             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
9910             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
9911             p = y * p1 + p2;
9912             r += (p - 0.5 * y);
9913             break;
9914         case 1:
9915             z = y * y;
9916             w = z * y;
9917             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
9918             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
9919             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
9920             p = z * p1 - (tt - w * (p2 + y * p3));
9921             r += tf + p;
9922             break;
9923         case 2:
9924             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
9925             p2 = 1.0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
9926             r += -0.5 * y + p1 / p2;
9927         }
9928     } else if (ix < 0x40200000) { /* x < 8.0 */
9929         i = (int)x;
9930         y = x - (double)i;
9931         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
9932         q = 1.0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
9933         r = 0.5 * y + p / q;
9934         z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
9935         switch (i) {
9936         case 7: z *= y + 6.0; /* fall through */
9937         case 6: z *= y + 5.0; /* fall through */
9938         case 5: z *= y + 4.0; /* fall through */
9939         case 4: z *= y + 3.0; /* fall through */
9940         case 3:
9941             z *= y + 2.0;
9942             r += log(z);
9943             break;
9944         }
9945     } else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
9946         t = log(x);
9947         z = 1.0 / x;
9948         y = z * z;
9949         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
9950         r = (x - 0.5) * (t - 1.0) + w;
9951     } else /* 2**58 <= x <= inf */
9952         r = x * (log(x) - 1.0);
9953     if (sign)
9954         r = nadj - r;
9955     return r;
9956 }
9957
9958 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9959 static float sinf_pi(float x)
9960 {
9961     double y;
9962     int n;
9963
9964     /* spurious inexact if odd int */
9965     x = 2 * (x * 0.5f - floorf(x * 0.5f)); /* x mod 2.0 */
9966
9967     n = (int)(x * 4);
9968     n = (n + 1) / 2;
9969     y = x - n * 0.5f;
9970     y *= M_PI;
9971     switch (n) {
9972     default: /* case 4: */
9973     case 0: return __sindf(y);
9974     case 1: return __cosdf(y);
9975     case 2: return __sindf(-y);
9976     case 3: return -__cosdf(y);
9977     }
9978 }
9979
9980 /*********************************************************************
9981  *      lgammaf (MSVCR120.@)
9982  *
9983  * Copied from musl: src/math/lgammaf_r.c
9984  */
9985 float CDECL lgammaf(float x)
9986 {
9987     static const float pi = 3.1415927410e+00,
9988         a0 = 7.7215664089e-02,
9989         a1 = 3.2246702909e-01,
9990         a2 = 6.7352302372e-02,
9991         a3 = 2.0580807701e-02,
9992         a4 = 7.3855509982e-03,
9993         a5 = 2.8905137442e-03,
9994         a6 = 1.1927076848e-03,
9995         a7 = 5.1006977446e-04,
9996         a8 = 2.2086278477e-04,
9997         a9 = 1.0801156895e-04,
9998         a10 = 2.5214456400e-05,
9999         a11 = 4.4864096708e-05,
10000         tc = 1.4616321325e+00,
10001         tf = -1.2148628384e-01,
10002         tt = 6.6971006518e-09,
10003         t0 = 4.8383611441e-01,
10004         t1 = -1.4758771658e-01,
10005         t2 = 6.4624942839e-02,
10006         t3 = -3.2788541168e-02,
10007         t4 = 1.7970675603e-02,
10008         t5 = -1.0314224288e-02,
10009         t6 = 6.1005386524e-03,
10010         t7 = -3.6845202558e-03,
10011         t8 = 2.2596477065e-03,
10012         t9 = -1.4034647029e-03,
10013         t10 = 8.8108185446e-04,
10014         t11 = -5.3859531181e-04,
10015         t12 = 3.1563205994e-04,
10016         t13 = -3.1275415677e-04,
10017         t14 = 3.3552918467e-04,
10018         u0 = -7.7215664089e-02,
10019         u1 = 6.3282704353e-01,
10020         u2 = 1.4549225569e+00,
10021         u3 = 9.7771751881e-01,
10022         u4 = 2.2896373272e-01,
10023         u5 = 1.3381091878e-02,
10024         v1 = 2.4559779167e+00,
10025         v2 = 2.1284897327e+00,
10026         v3 = 7.6928514242e-01,
10027         v4 = 1.0422264785e-01,
10028         v5 = 3.2170924824e-03,
10029         s0 = -7.7215664089e-02,
10030         s1 = 2.1498242021e-01,
10031         s2 = 3.2577878237e-01,
10032         s3 = 1.4635047317e-01,
10033         s4 = 2.6642270386e-02,
10034         s5 = 1.8402845599e-03,
10035         s6 = 3.1947532989e-05,
10036         r1 = 1.3920053244e+00,
10037         r2 = 7.2193557024e-01,
10038         r3 = 1.7193385959e-01,
10039         r4 = 1.8645919859e-02,
10040         r5 = 7.7794247773e-04,
10041         r6 = 7.3266842264e-06,
10042         w0 = 4.1893854737e-01,
10043         w1 = 8.3333335817e-02,
10044         w2 = -2.7777778450e-03,
10045         w3 = 7.9365057172e-04,
10046         w4 = -5.9518753551e-04,
10047         w5 = 8.3633989561e-04,
10048         w6 = -1.6309292987e-03;
10049
10050     union {float f; UINT32 i;} u = {x};
10051     float t, y, z, nadj, p, p1, p2, p3, q, r, w;
10052     UINT32 ix;
10053     int i, sign;
10054
10055     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
10056     sign = u.i >> 31;
10057     ix = u.i & 0x7fffffff;
10058     if (ix >= 0x7f800000)
10059         return x * x;
10060     if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
10061         if (sign)
10062             x = -x;
10063         return -logf(x);
10064     }
10065     if (sign) {
10066         x = -x;
10067         t = sinf_pi(x);
10068         if (t == 0.0f) { /* -integer */
10069             *_errno() = ERANGE;
10070             return 1.0f / (x - x);
10071         }
10072         if (t <= 0.0f)
10073             t = -t;
10074         nadj = logf(pi / (t * x));
10075     }
10076
10077     /* purge off 1 and 2 */
10078     if (ix == 0x3f800000 || ix == 0x40000000)
10079         r = 0;
10080     /* for x < 2.0 */
10081     else if (ix < 0x40000000) {
10082         if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
10083             r = -logf(x);
10084             if (ix >= 0x3f3b4a20) {
10085                 y = 1.0f - x;
10086                 i = 0;
10087             } else if (ix >= 0x3e6d3308) {
10088                 y = x - (tc - 1.0f);
10089                 i = 1;
10090             } else {
10091                 y = x;
10092                 i = 2;
10093             }
10094         } else {
10095             r = 0.0f;
10096             if (ix >= 0x3fdda618) { /* [1.7316,2] */
10097                 y = 2.0f - x;
10098                 i = 0;
10099             } else if (ix >= 0x3F9da620) { /* [1.23,1.73] */
10100                 y = x - tc;
10101                 i = 1;
10102             } else {
10103                 y = x - 1.0f;
10104                 i = 2;
10105             }
10106         }
10107         switch(i) {
10108         case 0:
10109             z = y * y;
10110             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
10111             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
10112             p = y * p1 + p2;
10113             r += p - 0.5f * y;
10114             break;
10115         case 1:
10116             z = y * y;
10117             w = z * y;
10118             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
10119             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
10120             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
10121             p = z * p1 - (tt - w * (p2 + y * p3));
10122             r += (tf + p);
10123             break;
10124         case 2:
10125             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
10126             p2 = 1.0f + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
10127             r += -0.5f * y + p1 / p2;
10128         }
10129     } else if (ix < 0x41000000) { /* x < 8.0 */
10130         i = (int)x;
10131         y = x - (float)i;
10132         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
10133         q = 1.0f + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
10134         r = 0.5f * y + p / q;
10135         z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */
10136         switch (i) {
10137         case 7: z *= y + 6.0f; /* fall through */
10138         case 6: z *= y + 5.0f; /* fall through */
10139         case 5: z *= y + 4.0f; /* fall through */
10140         case 4: z *= y + 3.0f; /* fall through */
10141         case 3:
10142             z *= y + 2.0f;
10143             r += logf(z);
10144             break;
10145         }
10146     } else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */
10147         t = logf(x);
10148         z = 1.0f / x;
10149         y = z * z;
10150         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
10151         r = (x - 0.5f) * (t - 1.0f) + w;
10152     } else /* 2**58 <= x <= inf */
10153         r = x * (logf(x) - 1.0f);
10154     if (sign)
10155         r = nadj - r;
10156     return r;
10157 }
10158
10159 static double tgamma_S(double x)
10160 {
10161     static const double Snum[] = {
10162         23531376880.410759688572007674451636754734846804940,
10163         42919803642.649098768957899047001988850926355848959,
10164         35711959237.355668049440185451547166705960488635843,
10165         17921034426.037209699919755754458931112671403265390,
10166         6039542586.3520280050642916443072979210699388420708,
10167         1439720407.3117216736632230727949123939715485786772,
10168         248874557.86205415651146038641322942321632125127801,
10169         31426415.585400194380614231628318205362874684987640,
10170         2876370.6289353724412254090516208496135991145378768,
10171         186056.26539522349504029498971604569928220784236328,
10172         8071.6720023658162106380029022722506138218516325024,
10173         210.82427775157934587250973392071336271166969580291,
10174         2.5066282746310002701649081771338373386264310793408,
10175     };
10176     static const double Sden[] = {
10177         0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
10178         2637558, 357423, 32670, 1925, 66, 1,
10179     };
10180
10181     double num = 0, den = 0;
10182     int i;
10183
10184     /* to avoid overflow handle large x differently */
10185     if (x < 8)
10186         for (i = ARRAY_SIZE(Snum) - 1; i >= 0; i--) {
10187             num = num * x + Snum[i];
10188             den = den * x + Sden[i];
10189         }
10190     else
10191         for (i = 0; i < ARRAY_SIZE(Snum); i++) {
10192             num = num / x + Snum[i];
10193             den = den / x + Sden[i];
10194         }
10195     return num / den;
10196 }
10197
10198 /*********************************************************************
10199  *      tgamma (MSVCR120.@)
10200  *
10201  * Copied from musl: src/math/tgamma.c
10202  */
10203 double CDECL tgamma(double x)
10204 {
10205     static const double gmhalf = 5.524680040776729583740234375;
10206     static const double fact[] = {
10207         1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
10208         479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
10209         355687428096000.0, 6402373705728000.0, 121645100408832000.0,
10210         2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
10211     };
10212
10213     union {double f; UINT64 i;} u = {x};
10214     double absx, y, dy, z, r;
10215     UINT32 ix = u.i >> 32 & 0x7fffffff;
10216     int sign = u.i >> 63;
10217
10218     /* special cases */
10219     if (ix >= 0x7ff00000) {
10220         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
10221         if (u.i == 0xfff0000000000000ULL)
10222             *_errno() = EDOM;
10223         return x + INFINITY;
10224     }
10225     if (ix < (0x3ff - 54) << 20) {
10226         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
10227         if (x == 0.0)
10228             *_errno() = ERANGE;
10229         return 1 / x;
10230     }
10231
10232     /* integer arguments */
10233     /* raise inexact when non-integer */
10234     if (x == floor(x)) {
10235         if (sign) {
10236             *_errno() = EDOM;
10237             return 0 / (x - x);
10238         }
10239         if (x <= ARRAY_SIZE(fact))
10240             return fact[(int)x - 1];
10241     }
10242
10243     /* x >= 172: tgamma(x)=inf with overflow */
10244     /* x =< -184: tgamma(x)=+-0 with underflow */
10245     if (ix >= 0x40670000) { /* |x| >= 184 */
10246         *_errno() = ERANGE;
10247         if (sign) {
10248             fp_barrierf(0x1p-126 / x);
10249             return 0;
10250         }
10251         x *= 0x1p1023;
10252         return x;
10253     }
10254
10255     absx = sign ? -x : x;
10256
10257     /* handle the error of x + g - 0.5 */
10258     y = absx + gmhalf;
10259     if (absx > gmhalf) {
10260         dy = y - absx;
10261         dy -= gmhalf;
10262     } else {
10263         dy = y - gmhalf;
10264         dy -= absx;
10265     }
10266
10267     z = absx - 0.5;
10268     r = tgamma_S(absx) * exp(-y);
10269     if (x < 0) {
10270         /* reflection formula for negative x */
10271         /* sinpi(absx) is not 0, integers are already handled */
10272         r = -M_PI / (sin_pi(absx) * absx * r);
10273         dy = -dy;
10274         z = -z;
10275     }
10276     r += dy * (gmhalf + 0.5) * r / y;
10277     z = pow(y, 0.5 * z);
10278     y = r * z * z;
10279     return y;
10280 }
10281
10282 /*********************************************************************
10283  *      tgammaf (MSVCR120.@)
10284  *
10285  * Copied from musl: src/math/tgammaf.c
10286  */
10287 float CDECL tgammaf(float x)
10288 {
10289     return tgamma(x);
10290 }
10291
10292 /*********************************************************************
10293  *      nan (MSVCR120.@)
10294  */
10295 double CDECL nan(const char *tagp)
10296 {
10297     /* Windows ignores input (MSDN) */
10298     return NAN;
10299 }
10300
10301 /*********************************************************************
10302  *      nanf (MSVCR120.@)
10303  */
10304 float CDECL nanf(const char *tagp)
10305 {
10306     return NAN;
10307 }
10308
10309 /*********************************************************************
10310  *      _except1 (MSVCR120.@)
10311  *  TODO:
10312  *   - find meaning of ignored cw and operation bits
10313  *   - unk parameter
10314  */
10315 double CDECL _except1(DWORD fpe, _FP_OPERATION_CODE op, double arg, double res, DWORD cw, void *unk)
10316 {
10317     ULONG_PTR exception_arg;
10318     DWORD exception = 0;
10319     DWORD fpword = 0;
10320     WORD operation;
10321     int raise = 0;
10322
10323     TRACE("(%x %x %lf %lf %x %p)\n", fpe, op, arg, res, cw, unk);
10324
10325 #ifdef _WIN64
10326     cw = ((cw >> 7) & 0x3f) | ((cw >> 3) & 0xc00);
10327 #endif
10328     operation = op << 5;
10329     exception_arg = (ULONG_PTR)&operation;
10330
10331     if (fpe & 0x1) { /* overflow */
10332         if ((fpe == 0x1 && (cw & 0x8)) || (fpe==0x11 && (cw & 0x28))) {
10333             /* 32-bit version also sets SW_INEXACT here */
10334             raise |= FE_OVERFLOW;
10335             if (fpe & 0x10) raise |= FE_INEXACT;
10336             res = signbit(res) ? -INFINITY : INFINITY;
10337         } else {
10338             exception = EXCEPTION_FLT_OVERFLOW;
10339         }
10340     } else if (fpe & 0x2) { /* underflow */
10341         if ((fpe == 0x2 && (cw & 0x10)) || (fpe==0x12 && (cw & 0x30))) {
10342             raise |= FE_UNDERFLOW;
10343             if (fpe & 0x10) raise |= FE_INEXACT;
10344             res = signbit(res) ? -0.0 : 0.0;
10345         } else {
10346             exception = EXCEPTION_FLT_UNDERFLOW;
10347         }
10348     } else if (fpe & 0x4) { /* zerodivide */
10349         if ((fpe == 0x4 && (cw & 0x4)) || (fpe==0x14 && (cw & 0x24))) {
10350             raise |= FE_DIVBYZERO;
10351             if (fpe & 0x10) raise |= FE_INEXACT;
10352         } else {
10353             exception = EXCEPTION_FLT_DIVIDE_BY_ZERO;
10354         }
10355     } else if (fpe & 0x8) { /* invalid */
10356         if (fpe == 0x8 && (cw & 0x1)) {
10357             raise |= FE_INVALID;
10358         } else {
10359             exception = EXCEPTION_FLT_INVALID_OPERATION;
10360         }
10361     } else if (fpe & 0x10) { /* inexact */
10362         if (fpe == 0x10 && (cw & 0x20)) {
10363             raise |= FE_INEXACT;
10364         } else {
10365             exception = EXCEPTION_FLT_INEXACT_RESULT;
10366         }
10367     }
10368
10369     if (exception)
10370         raise = 0;
10371     feraiseexcept(raise);
10372     if (exception)
10373         RaiseException(exception, 0, 1, &exception_arg);
10374
10375     if (cw & 0x1) fpword |= _EM_INVALID;
10376     if (cw & 0x2) fpword |= _EM_DENORMAL;
10377     if (cw & 0x4) fpword |= _EM_ZERODIVIDE;
10378     if (cw & 0x8) fpword |= _EM_OVERFLOW;
10379     if (cw & 0x10) fpword |= _EM_UNDERFLOW;
10380     if (cw & 0x20) fpword |= _EM_INEXACT;
10381     switch (cw & 0xc00)
10382     {
10383         case 0xc00: fpword |= _RC_UP|_RC_DOWN; break;
10384         case 0x800: fpword |= _RC_UP; break;
10385         case 0x400: fpword |= _RC_DOWN; break;
10386     }
10387     switch (cw & 0x300)
10388     {
10389         case 0x0:   fpword |= _PC_24; break;
10390         case 0x200: fpword |= _PC_53; break;
10391         case 0x300: fpword |= _PC_64; break;
10392     }
10393     if (cw & 0x1000) fpword |= _IC_AFFINE;
10394     _control87(fpword, 0xffffffff);
10395
10396     return res;
10397 }
10398
10399 _Dcomplex* CDECL _Cbuild(_Dcomplex *ret, double r, double i)
10400 {
10401     ret->_Val[0] = r;
10402     ret->_Val[1] = i;
10403     return ret;
10404 }
10405
10406 double CDECL MSVCR120_creal(_Dcomplex z)
10407 {
10408     return z._Val[0];
10409 }
10410
10411 /*********************************************************************
10412  *      ilogb (MSVCR120.@)
10413  */
10414 int CDECL ilogb(double x)
10415 {
10416     return __ilogb(x);
10417 }
10418
10419 /*********************************************************************
10420  *      ilogbf (MSVCR120.@)
10421  */
10422 int CDECL ilogbf(float x)
10423 {
10424     return __ilogbf(x);
10425 }
10426 #endif /* _MSVCR_VER>=120 */