dlls/msvcrt/math.c

   1 /*
   2  * msvcrt.dll math functions
   3  *
   4  * Copyright 2000 Jon Griffiths
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  *
  20  *
  21  * For functions copied from musl libc (http://musl.libc.org/):
  22  * ====================================================
  23  * Copyright 2005-2020 Rich Felker, et al.
  24  *
  25  * Permission is hereby granted, free of charge, to any person obtaining
  26  * a copy of this software and associated documentation files (the
  27  * "Software"), to deal in the Software without restriction, including
  28  * without limitation the rights to use, copy, modify, merge, publish,
  29  * distribute, sublicense, and/or sell copies of the Software, and to
  30  * permit persons to whom the Software is furnished to do so, subject to
  31  * the following conditions:
  32  *
  33  * The above copyright notice and this permission notice shall be
  34  * included in all copies or substantial portions of the Software.
  35  * ====================================================
  36  */
  37
  38 #include <complex.h>
  39 #include <stdio.h>
  40 #include <fenv.h>
  41 #include <fpieee.h>
  42 #include <limits.h>
  43 #include <locale.h>
  44 #include <math.h>
  45
  46 #include "msvcrt.h"
  47 #include "winternl.h"
  48
  49 #include "wine/asm.h"
  50 #include "wine/debug.h"
  51
  52 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
  53
  54 #undef div
  55 #undef ldiv
  56
  57 #define _DOMAIN         1       /* domain error in argument */
  58 #define _SING           2       /* singularity */
  59 #define _OVERFLOW       3       /* range overflow */
  60 #define _UNDERFLOW      4       /* range underflow */
  61
  62 typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
  63
  64 static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
  65
  66 BOOL sse2_supported;
  67 static BOOL sse2_enabled;
  68
  69 void msvcrt_init_math( void *module )
  70 {
  71     sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
  72 #if _MSVCR_VER <=71
  73     sse2_enabled = FALSE;
  74 #else
  75     sse2_enabled = sse2_supported;
  76 #endif
  77 }
  78
  79 /* Copied from musl: src/internal/libm.h */
  80 static inline float fp_barrierf(float x)
  81 {
  82     volatile float y = x;
  83     return y;
  84 }
  85
  86 static inline double fp_barrier(double x)
  87 {
  88     volatile double y = x;
  89     return y;
  90 }
  91
  92 static inline double CDECL ret_nan( BOOL update_sw )
  93 {
  94     double x = 1.0;
  95     if (!update_sw) return -NAN;
  96     return (x - x) / (x - x);
  97 }
  98
  99 #define SET_X87_CW(MASK) \
 100     "subl $4, %esp\n\t" \
 101     __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
 102     "fnstcw (%esp)\n\t" \
 103     "movw (%esp), %ax\n\t" \
 104     "movw %ax, 2(%esp)\n\t" \
 105     "testw $" #MASK ", %ax\n\t" \
 106     "jz 1f\n\t" \
 107     "andw $~" #MASK ", %ax\n\t" \
 108     "movw %ax, 2(%esp)\n\t" \
 109     "fldcw 2(%esp)\n\t" \
 110     "1:\n\t"
 111
 112 #define RESET_X87_CW \
 113     "movw (%esp), %ax\n\t" \
 114     "cmpw %ax, 2(%esp)\n\t" \
 115     "je 1f\n\t" \
 116     "fstpl 8(%esp)\n\t" \
 117     "fldcw (%esp)\n\t" \
 118     "fldl 8(%esp)\n\t" \
 119     "fwait\n\t" \
 120     "1:\n\t" \
 121     "addl $4, %esp\n\t" \
 122     __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t")
 123
 124 /*********************************************************************
 125  *      _matherr (CRTDLL.@)
 126  */
 127 int CDECL _matherr(struct _exception *e)
 128 {
 129     return 0;
 130 }
 131
 132
 133 static double math_error(int type, const char *name, double arg1, double arg2, double retval)
 134 {
 135     struct _exception exception = {type, (char *)name, arg1, arg2, retval};
 136
 137     TRACE("(%d, %s, %g, %g, %g)\n", type, debugstr_a(name), arg1, arg2, retval);
 138
 139     if (MSVCRT_default_matherr_func && MSVCRT_default_matherr_func(&exception))
 140         return exception.retval;
 141
 142     switch (type)
 143     {
 144     case 0:
 145         /* don't set errno */
 146         break;
 147     case _DOMAIN:
 148         *_errno() = EDOM;
 149         break;
 150     case _SING:
 151     case _OVERFLOW:
 152         *_errno() = ERANGE;
 153         break;
 154     case _UNDERFLOW:
 155         /* don't set errno */
 156         break;
 157     default:
 158         ERR("Unhandled math error!\n");
 159     }
 160
 161     return exception.retval;
 162 }
 163
 164 /*********************************************************************
 165  *      __setusermatherr (MSVCRT.@)
 166  */
 167 void CDECL __setusermatherr(MSVCRT_matherr_func func)
 168 {
 169     MSVCRT_default_matherr_func = func;
 170     TRACE("new matherr handler %p\n", func);
 171 }
 172
 173 /*********************************************************************
 174  *      _set_SSE2_enable (MSVCRT.@)
 175  */
 176 int CDECL _set_SSE2_enable(int flag)
 177 {
 178     sse2_enabled = flag && sse2_supported;
 179     return sse2_enabled;
 180 }
 181
 182 #if defined(_WIN64)
 183 # if _MSVCR_VER>=140
 184 /*********************************************************************
 185  *      _get_FMA3_enable (UCRTBASE.@)
 186  */
 187 int CDECL _get_FMA3_enable(void)
 188 {
 189     FIXME("() stub\n");
 190     return 0;
 191 }
 192 # endif
 193
 194 # if _MSVCR_VER>=120
 195 /*********************************************************************
 196  *      _set_FMA3_enable (MSVCR120.@)
 197  */
 198 int CDECL _set_FMA3_enable(int flag)
 199 {
 200     FIXME("(%x) stub\n", flag);
 201     return 0;
 202 }
 203 # endif
 204 #endif
 205
 206 #if !defined(__i386__) || _MSVCR_VER>=120
 207
 208 /*********************************************************************
 209  *      _chgsignf (MSVCRT.@)
 210  */
 211 float CDECL _chgsignf( float num )
 212 {
 213     union { float f; UINT32 i; } u = { num };
 214     u.i ^= 0x80000000;
 215     return u.f;
 216 }
 217
 218 /*********************************************************************
 219  *      _copysignf (MSVCRT.@)
 220  *
 221  * Copied from musl: src/math/copysignf.c
 222  */
 223 float CDECL _copysignf( float x, float y )
 224 {
 225     union { float f; UINT32 i; } ux = { x }, uy = { y };
 226     ux.i &= 0x7fffffff;
 227     ux.i |= uy.i & 0x80000000;
 228     return ux.f;
 229 }
 230
 231 /*********************************************************************
 232  *      _nextafterf (MSVCRT.@)
 233  *
 234  * Copied from musl: src/math/nextafterf.c
 235  */
 236 float CDECL _nextafterf( float x, float y )
 237 {
 238     unsigned int ix = *(unsigned int*)&x;
 239     unsigned int iy = *(unsigned int*)&y;
 240     unsigned int ax, ay, e;
 241
 242     if (isnan(x) || isnan(y))
 243         return x + y;
 244     if (x == y) {
 245         if (_fpclassf(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
 246             *_errno() = ERANGE;
 247         return y;
 248     }
 249     ax = ix & 0x7fffffff;
 250     ay = iy & 0x7fffffff;
 251     if (ax == 0) {
 252         if (ay == 0)
 253             return y;
 254         ix = (iy & 0x80000000) | 1;
 255     } else if (ax > ay || ((ix ^ iy) & 0x80000000))
 256         ix--;
 257     else
 258         ix++;
 259     e = ix & 0x7f800000;
 260     /* raise overflow if ix is infinite and x is finite */
 261     if (e == 0x7f800000) {
 262         fp_barrierf(x + x);
 263         *_errno() = ERANGE;
 264     }
 265     /* raise underflow if ix is subnormal or zero */
 266     y = *(float*)&ix;
 267     if (e == 0) {
 268         fp_barrierf(x * x + y * y);
 269         *_errno() = ERANGE;
 270     }
 271     return y;
 272 }
 273
 274 /* Copied from musl: src/math/ilogbf.c */
 275 static int __ilogbf(float x)
 276 {
 277     union { float f; UINT32 i; } u = { x };
 278     int e = u.i >> 23 & 0xff;
 279
 280     if (!e)
 281     {
 282         u.i <<= 9;
 283         if (u.i == 0) return FP_ILOGB0;
 284         /* subnormal x */
 285         for (e = -0x7f; u.i >> 31 == 0; e--, u.i <<= 1);
 286         return e;
 287     }
 288     if (e == 0xff) return u.i << 9 ? FP_ILOGBNAN : INT_MAX;
 289     return e - 0x7f;
 290 }
 291
 292 /*********************************************************************
 293  *      _logbf (MSVCRT.@)
 294  *
 295  * Copied from musl: src/math/logbf.c
 296  */
 297 float CDECL _logbf(float x)
 298 {
 299     if (!isfinite(x))
 300         return x * x;
 301     if (x == 0) {
 302         *_errno() = ERANGE;
 303         return -1 / (x * x);
 304     }
 305     return __ilogbf(x);
 306 }
 307
 308 #endif
 309
 310 /* Copied from musl: src/math/scalbn.c */
 311 static double __scalbn(double x, int n)
 312 {
 313     union {double f; UINT64 i;} u;
 314     double y = x;
 315
 316     if (n > 1023) {
 317         y *= 0x1p1023;
 318         n -= 1023;
 319         if (n > 1023) {
 320             y *= 0x1p1023;
 321             n -= 1023;
 322             if (n > 1023)
 323                 n = 1023;
 324         }
 325     } else if (n < -1022) {
 326         /* make sure final n < -53 to avoid double
 327            rounding in the subnormal range */
 328         y *= 0x1p-1022 * 0x1p53;
 329         n += 1022 - 53;
 330         if (n < -1022) {
 331             y *= 0x1p-1022 * 0x1p53;
 332             n += 1022 - 53;
 333             if (n < -1022)
 334                 n = -1022;
 335         }
 336     }
 337     u.i = (UINT64)(0x3ff + n) << 52;
 338     x = y * u.f;
 339     return x;
 340 }
 341
 342 /* Copied from musl: src/math/__rem_pio2_large.c */
 343 static int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 344 {
 345     static const int init_jk[] = {3, 4};
 346     static const INT32 ipio2[] = {
 347         0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 348         0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 349         0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
 350         0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
 351         0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
 352         0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
 353         0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
 354         0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
 355         0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
 356         0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
 357         0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 358     };
 359     static const double PIo2[] = {
 360         1.57079625129699707031e+00,
 361         7.54978941586159635335e-08,
 362         5.39030252995776476554e-15,
 363         3.28200341580791294123e-22,
 364         1.27065575308067607349e-29,
 365         1.22933308981111328932e-36,
 366         2.73370053816464559624e-44,
 367         2.16741683877804819444e-51,
 368     };
 369
 370     INT32 jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
 371     double z, fw, f[20], fq[20] = {0}, q[20];
 372
 373     /* initialize jk*/
 374     jk = init_jk[prec];
 375     jp = jk;
 376
 377     /* determine jx,jv,q0, note that 3>q0 */
 378     jx = nx - 1;
 379     jv = (e0 - 3) / 24;
 380     if(jv < 0) jv = 0;
 381     q0 = e0 - 24 * (jv + 1);
 382
 383     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 384     j = jv - jx;
 385     m = jx + jk;
 386     for (i = 0; i <= m; i++, j++)
 387         f[i] = j < 0 ? 0.0 : (double)ipio2[j];
 388
 389     /* compute q[0],q[1],...q[jk] */
 390     for (i = 0; i <= jk; i++) {
 391         for (j = 0, fw = 0.0; j <= jx; j++)
 392             fw += x[j] * f[jx + i - j];
 393         q[i] = fw;
 394     }
 395
 396     jz = jk;
 397 recompute:
 398     /* distill q[] into iq[] reversingly */
 399     for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
 400         fw = (double)(INT32)(0x1p-24 * z);
 401         iq[i] = (INT32)(z - 0x1p24 * fw);
 402         z = q[j - 1] + fw;
 403     }
 404
 405     /* compute n */
 406     z = __scalbn(z, q0); /* actual value of z */
 407     z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
 408     n = (INT32)z;
 409     z -= (double)n;
 410     ih = 0;
 411     if (q0 > 0) {  /* need iq[jz-1] to determine n */
 412         i = iq[jz - 1] >> (24 - q0);
 413         n += i;
 414         iq[jz - 1] -= i << (24 - q0);
 415         ih = iq[jz - 1] >> (23 - q0);
 416     }
 417     else if (q0 == 0) ih = iq[jz - 1] >> 23;
 418     else if (z >= 0.5) ih = 2;
 419
 420     if (ih > 0) {  /* q > 0.5 */
 421         n += 1;
 422         carry = 0;
 423         for (i = 0; i < jz; i++) {  /* compute 1-q */
 424             j = iq[i];
 425             if (carry == 0) {
 426                 if (j != 0) {
 427                     carry = 1;
 428                     iq[i] = 0x1000000 - j;
 429                 }
 430             } else
 431                 iq[i] = 0xffffff - j;
 432         }
 433         if (q0 > 0) {  /* rare case: chance is 1 in 12 */
 434             switch(q0) {
 435             case 1:
 436                 iq[jz - 1] &= 0x7fffff;
 437                 break;
 438             case 2:
 439                 iq[jz - 1] &= 0x3fffff;
 440                 break;
 441             }
 442         }
 443         if (ih == 2) {
 444             z = 1.0 - z;
 445             if (carry != 0)
 446                 z -= __scalbn(1.0, q0);
 447         }
 448     }
 449
 450     /* check if recomputation is needed */
 451     if (z == 0.0) {
 452         j = 0;
 453         for (i = jz - 1; i >= jk; i--) j |= iq[i];
 454         if (j == 0) {  /* need recomputation */
 455             for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
 456
 457             for (i = jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
 458                 f[jx + i] = (double)ipio2[jv + i];
 459                 for (j = 0, fw = 0.0; j <= jx; j++)
 460                     fw += x[j] * f[jx + i - j];
 461                 q[i] = fw;
 462             }
 463             jz += k;
 464             goto recompute;
 465         }
 466     }
 467
 468     /* chop off zero terms */
 469     if (z == 0.0) {
 470         jz -= 1;
 471         q0 -= 24;
 472         while (iq[jz] == 0) {
 473             jz--;
 474             q0 -= 24;
 475         }
 476     } else { /* break z into 24-bit if necessary */
 477         z = __scalbn(z, -q0);
 478         if (z >= 0x1p24) {
 479             fw = (double)(INT32)(0x1p-24 * z);
 480             iq[jz] = (INT32)(z - 0x1p24 * fw);
 481             jz += 1;
 482             q0 += 24;
 483             iq[jz] = (INT32)fw;
 484         } else
 485             iq[jz] = (INT32)z;
 486     }
 487
 488     /* convert integer "bit" chunk to floating-point value */
 489     fw = __scalbn(1.0, q0);
 490     for (i = jz; i >= 0; i--) {
 491         q[i] = fw * (double)iq[i];
 492         fw *= 0x1p-24;
 493     }
 494
 495     /* compute PIo2[0,...,jp]*q[jz,...,0] */
 496     for(i = jz; i >= 0; i--) {
 497         for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
 498             fw += PIo2[k] * q[i + k];
 499         fq[jz - i] = fw;
 500     }
 501
 502     /* compress fq[] into y[] */
 503     switch(prec) {
 504     case 0:
 505         fw = 0.0;
 506         for (i = jz; i >= 0; i--)
 507             fw += fq[i];
 508         y[0] = ih == 0 ? fw : -fw;
 509         break;
 510     case 1:
 511     case 2:
 512         fw = 0.0;
 513         for (i = jz; i >= 0; i--)
 514             fw += fq[i];
 515         fw = (double)fw;
 516         y[0] = ih==0 ? fw : -fw;
 517         fw = fq[0] - fw;
 518         for (i = 1; i <= jz; i++)
 519             fw += fq[i];
 520         y[1] = ih == 0 ? fw : -fw;
 521         break;
 522     case 3:  /* painful */
 523         for (i = jz; i > 0; i--) {
 524             fw = fq[i - 1] + fq[i];
 525             fq[i] += fq[i - 1] - fw;
 526             fq[i - 1] = fw;
 527         }
 528         for (i = jz; i > 1; i--) {
 529             fw = fq[i - 1] + fq[i];
 530             fq[i] += fq[i - 1] - fw;
 531             fq[i - 1] = fw;
 532         }
 533         for (fw = 0.0, i = jz; i >= 2; i--)
 534             fw += fq[i];
 535         if (ih == 0) {
 536             y[0] = fq[0];
 537             y[1] = fq[1];
 538             y[2] = fw;
 539         } else {
 540             y[0] = -fq[0];
 541             y[1] = -fq[1];
 542             y[2] = -fw;
 543         }
 544     }
 545     return n & 7;
 546 }
 547
 548 /* Based on musl implementation: src/math/round.c */
 549 static double __round(double x)
 550 {
 551     ULONGLONG llx = *(ULONGLONG*)&x, tmp;
 552     int e = (llx >> 52 & 0x7ff) - 0x3ff;
 553
 554     if (e >= 52)
 555         return x;
 556     if (e < -1)
 557         return 0 * x;
 558     else if (e == -1)
 559         return signbit(x) ? -1 : 1;
 560
 561     tmp = 0x000fffffffffffffULL >> e;
 562     if (!(llx & tmp))
 563         return x;
 564     llx += 0x0008000000000000ULL >> e;
 565     llx &= ~tmp;
 566     return *(double*)&llx;
 567 }
 568
 569 #if !defined(__i386__) || _MSVCR_VER >= 120
 570 /* Copied from musl: src/math/expm1f.c */
 571 static float __expm1f(float x)
 572 {
 573     static const float ln2_hi = 6.9313812256e-01,
 574         ln2_lo = 9.0580006145e-06,
 575         invln2 = 1.4426950216e+00,
 576         Q1 = -3.3333212137e-2,
 577         Q2 = 1.5807170421e-3;
 578
 579     float y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
 580     union {float f; UINT32 i;} u = {x};
 581     UINT32 hx = u.i & 0x7fffffff;
 582     int k, sign = u.i >> 31;
 583
 584     /* filter out huge and non-finite argument */
 585     if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
 586         if (hx >= 0x7f800000) /* NaN */
 587             return u.i == 0xff800000 ? -1 : x;
 588         if (sign)
 589             return math_error(_UNDERFLOW, "exp", x, 0, -1);
 590         if (hx > 0x42b17217) /* x > log(FLT_MAX) */
 591             return math_error(_OVERFLOW, "exp", x, 0, fp_barrierf(x * FLT_MAX));
 592     }
 593
 594     /* argument reduction */
 595     if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
 596         if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
 597             if (!sign) {
 598                 hi = x - ln2_hi;
 599                 lo = ln2_lo;
 600                 k = 1;
 601             } else {
 602                 hi = x + ln2_hi;
 603                 lo = -ln2_lo;
 604                 k = -1;
 605             }
 606         } else {
 607             k = invln2 * x + (sign ? -0.5f : 0.5f);
 608             t = k;
 609             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
 610             lo = t * ln2_lo;
 611         }
 612         x = hi - lo;
 613         c = (hi - x) - lo;
 614     } else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
 615         if (hx < 0x00800000)
 616             fp_barrierf(x * x);
 617         return x;
 618     } else
 619         k = 0;
 620
 621     /* x is now in primary range */
 622     hfx = 0.5f * x;
 623     hxs = x * hfx;
 624     r1 = 1.0f + hxs * (Q1 + hxs * Q2);
 625     t = 3.0f - r1 * hfx;
 626     e = hxs * ((r1 - t) / (6.0f - x * t));
 627     if (k == 0) /* c is 0 */
 628         return x - (x * e - hxs);
 629     e = x * (e - c) - c;
 630     e -= hxs;
 631     /* exp(x) ~ 2^k (x_reduced - e + 1) */
 632     if (k == -1)
 633         return 0.5f * (x - e) - 0.5f;
 634     if (k == 1) {
 635         if (x < -0.25f)
 636             return -2.0f * (e - (x + 0.5f));
 637         return 1.0f + 2.0f * (x - e);
 638     }
 639     u.i = (0x7f + k) << 23; /* 2^k */
 640     twopk = u.f;
 641     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
 642         y = x - e + 1.0f;
 643         if (k == 128)
 644             y = y * 2.0f * 0x1p127f;
 645         else
 646             y = y * twopk;
 647         return y - 1.0f;
 648     }
 649     u.i = (0x7f-k) << 23; /* 2^-k */
 650     if (k < 23)
 651         y = (x - e + (1 - u.f)) * twopk;
 652     else
 653         y = (x - (e + u.f) + 1) * twopk;
 654     return y;
 655 }
 656
 657 /* Copied from musl: src/math/__sindf.c */
 658 static float __sindf(double x)
 659 {
 660     static const double S1 = -0x1.5555555555555p-3,
 661         S2 = 0x1.1111111111111p-7,
 662         S3 = -0x1.a01a01a01a01ap-13,
 663         S4 = 0x1.71de3a556c734p-19;
 664
 665     double r, s, w, z;
 666
 667     z = x * x;
 668     if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03)
 669         return x * (1 + S1 * z);
 670
 671     w = z * z;
 672     r = S3 + z * S4;
 673     s = z * x;
 674     return (x + s * (S1 + z * S2)) + s * w * r;
 675 }
 676
 677 /* Copied from musl: src/math/__cosdf.c */
 678 static float __cosdf(double x)
 679 {
 680     static const double C0 = -0x1.0000000000000p-1,
 681         C1 = 0x1.5555555555555p-5,
 682         C2 = -0x1.6c16c16c16c17p-10,
 683         C3 = 0x1.a01a01a01a01ap-16,
 684         C4 = -0x1.27e4fb7789f5cp-22;
 685     double z;
 686
 687     z = x * x;
 688     if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03)
 689         return 1 + C0 * z;
 690     return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4))));
 691 }
 692
 693 static const UINT64 exp2f_T[] = {
 694     0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL,
 695     0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL,
 696     0x3fef06fe0a31b715ULL, 0x3feef1a7373aa9cbULL, 0x3feedea64c123422ULL, 0x3feece086061892dULL,
 697     0x3feebfdad5362a27ULL, 0x3feeb42b569d4f82ULL, 0x3feeab07dd485429ULL, 0x3feea47eb03a5585ULL,
 698     0x3feea09e667f3bcdULL, 0x3fee9f75e8ec5f74ULL, 0x3feea11473eb0187ULL, 0x3feea589994cce13ULL,
 699     0x3feeace5422aa0dbULL, 0x3feeb737b0cdc5e5ULL, 0x3feec49182a3f090ULL, 0x3feed503b23e255dULL,
 700     0x3feee89f995ad3adULL, 0x3feeff76f2fb5e47ULL, 0x3fef199bdd85529cULL, 0x3fef3720dcef9069ULL,
 701     0x3fef5818dcfba487ULL, 0x3fef7c97337b9b5fULL, 0x3fefa4afa2a490daULL, 0x3fefd0765b6e4540ULL
 702 };
 703 #endif
 704
 705 #ifndef __i386__
 706
 707 /*********************************************************************
 708  *      _fpclassf (MSVCRT.@)
 709  */
 710 int CDECL _fpclassf( float num )
 711 {
 712     union { float f; UINT32 i; } u = { num };
 713     int e = u.i >> 23 & 0xff;
 714     int s = u.i >> 31;
 715
 716     switch (e)
 717     {
 718     case 0:
 719         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
 720         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
 721     case 0xff:
 722         if (u.i << 9) return ((u.i >> 22) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
 723         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
 724     default:
 725         return s ? _FPCLASS_NN : _FPCLASS_PN;
 726     }
 727 }
 728
 729 /*********************************************************************
 730  *      _finitef (MSVCRT.@)
 731  */
 732 int CDECL _finitef( float num )
 733 {
 734     union { float f; UINT32 i; } u = { num };
 735     return (u.i & 0x7fffffff) < 0x7f800000;
 736 }
 737
 738 /*********************************************************************
 739  *      _isnanf (MSVCRT.@)
 740  */
 741 int CDECL _isnanf( float num )
 742 {
 743     union { float f; UINT32 i; } u = { num };
 744     return (u.i & 0x7fffffff) > 0x7f800000;
 745 }
 746
 747 static float asinf_R(float z)
 748 {
 749     /* coefficients for R(x^2) */
 750     static const float p1 = 1.66666672e-01,
 751                  p2 = -5.11644611e-02,
 752                  p3 = -1.21124933e-02,
 753                  p4 = -3.58742251e-03,
 754                  q1 = -7.56982703e-01;
 755
 756     float p, q;
 757     p = z * (p1 + z * (p2 + z * (p3 + z * p4)));
 758     q = 1.0f + z * q1;
 759     return p / q;
 760 }
 761
 762 /*********************************************************************
 763  *      acosf (MSVCRT.@)
 764  *
 765  * Copied from musl: src/math/acosf.c
 766  */
 767 float CDECL acosf( float x )
 768 {
 769     static const double pio2_lo = 6.12323399573676603587e-17;
 770
 771     float z, w, s, c, df;
 772     unsigned int hx, ix;
 773
 774     hx = *(unsigned int*)&x;
 775     ix = hx & 0x7fffffff;
 776     /* |x| >= 1 or nan */
 777     if (ix >= 0x3f800000) {
 778         if (ix == 0x3f800000) {
 779             if (hx >> 31)
 780                 return M_PI;
 781             return 0;
 782         }
 783         if (isnan(x)) return x;
 784         return math_error(_DOMAIN, "acosf", x, 0, 0 / (x - x));
 785     }
 786     /* |x| < 0.5 */
 787     if (ix < 0x3f000000) {
 788         if (ix <= 0x32800000) /* |x| < 2**-26 */
 789             return M_PI_2;
 790         return M_PI_2 - (x - (pio2_lo - x * asinf_R(x * x)));
 791     }
 792     /* x < -0.5 */
 793     if (hx >> 31) {
 794         z = (1 + x) * 0.5f;
 795         s = sqrtf(z);
 796         return M_PI - 2 * (s + ((double)s * asinf_R(z)));
 797     }
 798     /* x > 0.5 */
 799     z = (1 - x) * 0.5f;
 800     s = sqrtf(z);
 801     hx = *(unsigned int*)&s & 0xffff0000;
 802     df = *(float*)&hx;
 803     c = (z - df * df) / (s + df);
 804     w = asinf_R(z) * s + c;
 805     return 2 * (df + w);
 806 }
 807
 808 /*********************************************************************
 809  *      asinf (MSVCRT.@)
 810  *
 811  * Copied from musl: src/math/asinf.c
 812  */
 813 float CDECL asinf( float x )
 814 {
 815     static const double pio2 = 1.570796326794896558e+00;
 816     static const float pio4_hi = 0.785398125648;
 817     static const float pio2_lo = 7.54978941586e-08;
 818
 819     float s, z, f, c;
 820     unsigned int hx, ix;
 821
 822     hx = *(unsigned int*)&x;
 823     ix = hx & 0x7fffffff;
 824     if (ix >= 0x3f800000) {  /* |x| >= 1 */
 825         if (ix == 0x3f800000)  /* |x| == 1 */
 826             return x * pio2 + 7.5231638453e-37;  /* asin(+-1) = +-pi/2 with inexact */
 827         if (isnan(x)) return x;
 828         return math_error(_DOMAIN, "asinf", x, 0, 0 / (x - x));
 829     }
 830     if (ix < 0x3f000000) {  /* |x| < 0.5 */
 831         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
 832         if (ix < 0x39800000 && ix >= 0x00800000)
 833             return x;
 834         return x + x * asinf_R(x * x);
 835     }
 836     /* 1 > |x| >= 0.5 */
 837     z = (1 - fabsf(x)) * 0.5f;
 838     s = sqrtf(z);
 839     /* f+c = sqrt(z) */
 840     *(unsigned int*)&f = *(unsigned int*)&s & 0xffff0000;
 841     c = (z - f * f) / (s + f);
 842     x = pio4_hi - (2 * s * asinf_R(z) - (pio2_lo - 2 * c) - (pio4_hi - 2 * f));
 843     if (hx >> 31)
 844         return -x;
 845     return x;
 846 }
 847
 848 /*********************************************************************
 849  *      atanf (MSVCRT.@)
 850  *
 851  * Copied from musl: src/math/atanf.c
 852  */
 853 float CDECL atanf( float x )
 854 {
 855     static const float atanhi[] = {
 856         4.6364760399e-01,
 857         7.8539812565e-01,
 858         9.8279368877e-01,
 859         1.5707962513e+00,
 860     };
 861     static const float atanlo[] = {
 862         5.0121582440e-09,
 863         3.7748947079e-08,
 864         3.4473217170e-08,
 865         7.5497894159e-08,
 866     };
 867     static const float aT[] = {
 868         3.3333328366e-01,
 869         -1.9999158382e-01,
 870         1.4253635705e-01,
 871         -1.0648017377e-01,
 872         6.1687607318e-02,
 873     };
 874
 875     float w, s1, s2, z;
 876     unsigned int ix, sign;
 877     int id;
 878
 879 #if _MSVCR_VER == 0
 880     if (isnan(x)) return math_error(_DOMAIN, "atanf", x, 0, x);
 881 #endif
 882
 883     ix = *(unsigned int*)&x;
 884     sign = ix >> 31;
 885     ix &= 0x7fffffff;
 886     if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
 887         if (isnan(x))
 888             return x;
 889         z = atanhi[3] + 7.5231638453e-37;
 890         return sign ? -z : z;
 891     }
 892     if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 893         if (ix < 0x39800000) {  /* |x| < 2**-12 */
 894             if (ix < 0x00800000)
 895                 /* raise underflow for subnormal x */
 896                 fp_barrierf(x*x);
 897             return x;
 898         }
 899         id = -1;
 900     } else {
 901         x = fabsf(x);
 902         if (ix < 0x3f980000) {  /* |x| < 1.1875 */
 903             if (ix < 0x3f300000) {  /*  7/16 <= |x| < 11/16 */
 904                 id = 0;
 905                 x = (2.0f * x - 1.0f) / (2.0f + x);
 906             } else {                /* 11/16 <= |x| < 19/16 */
 907                 id = 1;
 908                 x = (x - 1.0f) / (x + 1.0f);
 909             }
 910         } else {
 911             if (ix < 0x401c0000) {  /* |x| < 2.4375 */
 912                 id = 2;
 913                 x = (x - 1.5f) / (1.0f + 1.5f * x);
 914             } else {                /* 2.4375 <= |x| < 2**26 */
 915                 id = 3;
 916                 x = -1.0f / x;
 917             }
 918         }
 919     }
 920     /* end of argument reduction */
 921     z = x * x;
 922     w = z * z;
 923     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
 924     s1 = z * (aT[0] + w * (aT[2] + w * aT[4]));
 925     s2 = w * (aT[1] + w * aT[3]);
 926     if (id < 0)
 927         return x - x * (s1 + s2);
 928     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
 929     return sign ? -z : z;
 930 }
 931
 932 /*********************************************************************
 933  *              atan2f (MSVCRT.@)
 934  *
 935  * Copied from musl: src/math/atan2f.c
 936  */
 937 float CDECL atan2f( float y, float x )
 938 {
 939     static const float pi     = 3.1415927410e+00,
 940                  pi_lo  = -8.7422776573e-08;
 941
 942     float z;
 943     unsigned int m, ix, iy;
 944
 945     if (isnan(x) || isnan(y))
 946         return x + y;
 947     ix = *(unsigned int*)&x;
 948     iy = *(unsigned int*)&y;
 949     if (ix == 0x3f800000)  /* x=1.0 */
 950         return atanf(y);
 951     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
 952     ix &= 0x7fffffff;
 953     iy &= 0x7fffffff;
 954
 955     /* when y = 0 */
 956     if (iy == 0) {
 957         switch (m) {
 958         case 0:
 959         case 1: return y;   /* atan(+-0,+anything)=+-0 */
 960         case 2: return pi;  /* atan(+0,-anything) = pi */
 961         case 3: return -pi; /* atan(-0,-anything) =-pi */
 962         }
 963     }
 964     /* when x = 0 */
 965     if (ix == 0)
 966         return m & 1 ? -pi / 2 : pi / 2;
 967     /* when x is INF */
 968     if (ix == 0x7f800000) {
 969         if (iy == 0x7f800000) {
 970             switch (m) {
 971             case 0: return pi / 4;      /* atan(+INF,+INF) */
 972             case 1: return -pi / 4;     /* atan(-INF,+INF) */
 973             case 2: return 3 * pi / 4;  /*atan(+INF,-INF)*/
 974             case 3: return -3 * pi / 4; /*atan(-INF,-INF)*/
 975             }
 976         } else {
 977             switch (m) {
 978             case 0: return 0.0f;    /* atan(+...,+INF) */
 979             case 1: return -0.0f;   /* atan(-...,+INF) */
 980             case 2: return pi;      /* atan(+...,-INF) */
 981             case 3: return -pi;     /* atan(-...,-INF) */
 982             }
 983         }
 984     }
 985     /* |y/x| > 0x1p26 */
 986     if (ix + (26 << 23) < iy || iy == 0x7f800000)
 987         return m & 1 ? -pi / 2 : pi / 2;
 988
 989     /* z = atan(|y/x|) with correct underflow */
 990     if ((m & 2) && iy + (26 << 23) < ix)  /*|y/x| < 0x1p-26, x < 0 */
 991         z = 0.0;
 992     else
 993         z = atanf(fabsf(y / x));
 994     switch (m) {
 995     case 0: return z;                /* atan(+,+) */
 996     case 1: return -z;               /* atan(-,+) */
 997     case 2: return pi - (z - pi_lo); /* atan(+,-) */
 998     default: /* case 3 */
 999         return (z - pi_lo) - pi;     /* atan(-,-) */
1000     }
1001 }
1002
1003 /* Copied from musl: src/math/__rem_pio2f.c */
1004 static int __rem_pio2f(float x, double *y)
1005 {
1006     static const double toint = 1.5 / DBL_EPSILON,
1007         pio4 = 0x1.921fb6p-1,
1008         invpio2 = 6.36619772367581382433e-01,
1009         pio2_1 = 1.57079631090164184570e+00,
1010         pio2_1t = 1.58932547735281966916e-08;
1011
1012     union {float f; uint32_t i;} u = {x};
1013     double tx[1], ty[1], fn;
1014     UINT32 ix;
1015     int n, sign, e0;
1016
1017     ix = u.i & 0x7fffffff;
1018     /* 25+53 bit pi is good enough for medium size */
1019     if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
1020         /* Use a specialized rint() to get fn. */
1021         fn = fp_barrier(x * invpio2 + toint) - toint;
1022         n  = (int)fn;
1023         *y = x - fn * pio2_1 - fn * pio2_1t;
1024         /* Matters with directed rounding. */
1025         if (*y < -pio4) {
1026             n--;
1027             fn--;
1028             *y = x - fn * pio2_1 - fn * pio2_1t;
1029         } else if (*y > pio4) {
1030             n++;
1031             fn++;
1032             *y = x - fn * pio2_1 - fn * pio2_1t;
1033         }
1034         return n;
1035     }
1036     if(ix >= 0x7f800000) { /* x is inf or NaN */
1037         *y = x - x;
1038         return 0;
1039     }
1040     /* scale x into [2^23, 2^24-1] */
1041     sign = u.i >> 31;
1042     e0 = (ix >> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */
1043     u.i = ix - (e0 << 23);
1044     tx[0] = u.f;
1045     n = __rem_pio2_large(tx, ty, e0, 1, 0);
1046     if (sign) {
1047         *y = -ty[0];
1048         return -n;
1049     }
1050     *y = ty[0];
1051     return n;
1052 }
1053
1054 /*********************************************************************
1055  *      cosf (MSVCRT.@)
1056  *
1057  * Copied from musl: src/math/cosf.c
1058  */
1059 float CDECL cosf( float x )
1060 {
1061     static const double c1pio2 = 1*M_PI_2,
1062         c2pio2 = 2*M_PI_2,
1063         c3pio2 = 3*M_PI_2,
1064         c4pio2 = 4*M_PI_2;
1065
1066     double y;
1067     UINT32 ix;
1068     unsigned n, sign;
1069
1070     ix = *(UINT32*)&x;
1071     sign = ix >> 31;
1072     ix &= 0x7fffffff;
1073
1074     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1075         if (ix < 0x39800000) { /* |x| < 2**-12 */
1076             /* raise inexact if x != 0 */
1077             fp_barrierf(x + 0x1p120f);
1078             return 1.0f;
1079         }
1080         return __cosdf(x);
1081     }
1082     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1083         if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
1084             return -__cosdf(sign ? x + c2pio2 : x - c2pio2);
1085         else {
1086             if (sign)
1087                 return __sindf(x + c1pio2);
1088             else
1089                 return __sindf(c1pio2 - x);
1090         }
1091     }
1092     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1093         if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
1094             return __cosdf(sign ? x + c4pio2 : x - c4pio2);
1095         else {
1096             if (sign)
1097                 return __sindf(-x - c3pio2);
1098             else
1099                 return __sindf(x - c3pio2);
1100         }
1101     }
1102
1103     /* cos(Inf or NaN) is NaN */
1104     if (isinf(x)) return math_error(_DOMAIN, "cosf", x, 0, x - x);
1105     if (ix >= 0x7f800000)
1106         return x - x;
1107
1108     /* general argument reduction needed */
1109     n = __rem_pio2f(x, &y);
1110     switch (n & 3) {
1111     case 0: return __cosdf(y);
1112     case 1: return __sindf(-y);
1113     case 2: return -__cosdf(y);
1114     default: return __sindf(y);
1115     }
1116 }
1117
1118 /* Copied from musl: src/math/__expo2f.c */
1119 static float __expo2f(float x, float sign)
1120 {
1121     static const int k = 235;
1122     static const float kln2 = 0x1.45c778p+7f;
1123     float scale;
1124
1125     *(UINT32*)&scale = (UINT32)(0x7f + k/2) << 23;
1126     return expf(x - kln2) * (sign * scale) * scale;
1127 }
1128
1129 /*********************************************************************
1130  *      coshf (MSVCRT.@)
1131  *
1132  * Copied from musl: src/math/coshf.c
1133  */
1134 float CDECL coshf( float x )
1135 {
1136     UINT32 ui = *(UINT32*)&x;
1137     UINT32 sign = ui & 0x80000000;
1138     float t;
1139
1140     /* |x| */
1141     ui &= 0x7fffffff;
1142     x = *(float*)&ui;
1143
1144     /* |x| < log(2) */
1145     if (ui < 0x3f317217) {
1146         if (ui < 0x3f800000 - (12 << 23)) {
1147             fp_barrierf(x + 0x1p120f);
1148             return 1;
1149         }
1150         t = __expm1f(x);
1151         return 1 + t * t / (2 * (1 + t));
1152     }
1153
1154     /* |x| < log(FLT_MAX) */
1155     if (ui < 0x42b17217) {
1156         t = expf(x);
1157         return 0.5f * (t + 1 / t);
1158     }
1159
1160     /* |x| > log(FLT_MAX) or nan */
1161     if (ui > 0x7f800000)
1162         *(UINT32*)&t = ui | sign | 0x400000;
1163     else
1164         t = __expo2f(x, 1.0f);
1165     return t;
1166 }
1167
1168 /*********************************************************************
1169  *      expf (MSVCRT.@)
1170  */
1171 float CDECL expf( float x )
1172 {
1173     static const double C[] = {
1174         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1175         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1176         0x1.62e42ff0c52d6p-1 / (1 << 5)
1177     };
1178     static const double invln2n = 0x1.71547652b82fep+0 * (1 << 5);
1179
1180     double kd, z, r, r2, y, s;
1181     UINT32 abstop;
1182     UINT64 ki, t;
1183
1184     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
1185     if (abstop >= 0x42b) {
1186         /* |x| >= 88 or x is nan.  */
1187         if (*(UINT32*)&x == 0xff800000)
1188             return 0.0f;
1189         if (abstop >= 0x7f8)
1190             return x + x;
1191         if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
1192             return math_error(_OVERFLOW, "expf", x, 0, x * FLT_MAX);
1193         if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
1194             return math_error(_UNDERFLOW, "expf", x, 0, fp_barrierf(FLT_MIN) * FLT_MIN);
1195     }
1196
1197     /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
1198     z = invln2n * x;
1199
1200     /* Round and convert z to int, the result is in [-150*N, 128*N] and
1201        ideally ties-to-even rule is used, otherwise the magnitude of r
1202        can be bigger which gives larger approximation error.  */
1203     kd = __round(z);
1204     ki = (INT64)kd;
1205     r = z - kd;
1206
1207     /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1208     t = exp2f_T[ki % (1 << 5)];
1209     t += ki << (52 - 5);
1210     s = *(double*)&t;
1211     z = C[0] * r + C[1];
1212     r2 = r * r;
1213     y = C[2] * r + 1;
1214     y = z * r2 + y;
1215     y = y * s;
1216     return y;
1217 }
1218
1219 /*********************************************************************
1220  *      fmodf (MSVCRT.@)
1221  *
1222  * Copied from musl: src/math/fmodf.c
1223  */
1224 float CDECL fmodf( float x, float y )
1225 {
1226     UINT32 xi = *(UINT32*)&x;
1227     UINT32 yi = *(UINT32*)&y;
1228     int ex = xi>>23 & 0xff;
1229     int ey = yi>>23 & 0xff;
1230     UINT32 sx = xi & 0x80000000;
1231     UINT32 i;
1232
1233     if (isinf(x)) return math_error(_DOMAIN, "fmodf", x, y, (x * y) / (x * y));
1234     if (yi << 1 == 0 || isnan(y) || ex == 0xff)
1235         return (x * y) / (x * y);
1236     if (xi << 1 <= yi << 1) {
1237         if (xi << 1 == yi << 1)
1238             return 0 * x;
1239         return x;
1240     }
1241
1242     /* normalize x and y */
1243     if (!ex) {
1244         for (i = xi << 9; i >> 31 == 0; ex--, i <<= 1);
1245         xi <<= -ex + 1;
1246     } else {
1247         xi &= -1U >> 9;
1248         xi |= 1U << 23;
1249     }
1250     if (!ey) {
1251         for (i = yi << 9; i >> 31 == 0; ey--, i <<= 1);
1252         yi <<= -ey + 1;
1253     } else {
1254         yi &= -1U >> 9;
1255         yi |= 1U << 23;
1256     }
1257
1258     /* x mod y */
1259     for (; ex > ey; ex--) {
1260         i = xi - yi;
1261         if (i >> 31 == 0) {
1262             if (i == 0)
1263                 return 0 * x;
1264             xi = i;
1265         }
1266         xi <<= 1;
1267     }
1268     i = xi - yi;
1269     if (i >> 31 == 0) {
1270         if (i == 0)
1271             return 0 * x;
1272         xi = i;
1273     }
1274     for (; xi>>23 == 0; xi <<= 1, ex--);
1275
1276     /* scale result up */
1277     if (ex > 0) {
1278         xi -= 1U << 23;
1279         xi |= (UINT32)ex << 23;
1280     } else {
1281         xi >>= -ex + 1;
1282     }
1283     xi |= sx;
1284     return *(float*)&xi;
1285 }
1286
1287 /*********************************************************************
1288  *      logf (MSVCRT.@)
1289  *
1290  * Copied from musl: src/math/logf.c src/math/logf_data.c
1291  */
1292 float CDECL logf( float x )
1293 {
1294     static const double Ln2 = 0x1.62e42fefa39efp-1;
1295     static const double A[] = {
1296         -0x1.00ea348b88334p-2,
1297         0x1.5575b0be00b6ap-2,
1298         -0x1.ffffef20a4123p-2
1299     };
1300     static const struct {
1301         double invc, logc;
1302     } T[] = {
1303         { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
1304         { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
1305         { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
1306         { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
1307         { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
1308         { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
1309         { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
1310         { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
1311         { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
1312         { 0x1p+0, 0x0p+0 },
1313         { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
1314         { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
1315         { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
1316         { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
1317         { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
1318         { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }
1319     };
1320
1321     double z, r, r2, y, y0, invc, logc;
1322     UINT32 ix, iz, tmp;
1323     int k, i;
1324
1325     ix = *(UINT32*)&x;
1326     /* Fix sign of zero with downward rounding when x==1. */
1327     if (ix == 0x3f800000)
1328         return 0;
1329     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
1330         /* x < 0x1p-126 or inf or nan. */
1331         if (ix * 2 == 0)
1332             return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x);
1333         if (ix == 0x7f800000) /* log(inf) == inf. */
1334             return x;
1335         if (ix * 2 > 0xff000000)
1336             return x;
1337         if (ix & 0x80000000)
1338             return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x));
1339         /* x is subnormal, normalize it. */
1340         x *= 0x1p23f;
1341         ix = *(UINT32*)&x;
1342         ix -= 23 << 23;
1343     }
1344
1345     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1346        The range is split into N subintervals.
1347        The ith subinterval contains z and c is near its center. */
1348     tmp = ix - 0x3f330000;
1349     i = (tmp >> (23 - 4)) % (1 << 4);
1350     k = (INT32)tmp >> 23; /* arithmetic shift */
1351     iz = ix - (tmp & (0x1ffu << 23));
1352     invc = T[i].invc;
1353     logc = T[i].logc;
1354     z = *(float*)&iz;
1355
1356     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
1357     r = z * invc - 1;
1358     y0 = logc + (double)k * Ln2;
1359
1360     /* Pipelined polynomial evaluation to approximate log1p(r). */
1361     r2 = r * r;
1362     y = A[1] * r + A[2];
1363     y = A[0] * r2 + y;
1364     y = y * r2 + (y0 + r);
1365     return y;
1366 }
1367
1368 /*********************************************************************
1369  *      log10f (MSVCRT.@)
1370  */
1371 float CDECL log10f( float x )
1372 {
1373     static const float ivln10hi = 4.3432617188e-01,
1374         ivln10lo = -3.1689971365e-05,
1375         log10_2hi = 3.0102920532e-01,
1376         log10_2lo = 7.9034151668e-07,
1377         Lg1 = 0xaaaaaa.0p-24,
1378         Lg2 = 0xccce13.0p-25,
1379         Lg3 = 0x91e9ee.0p-25,
1380         Lg4 = 0xf89e26.0p-26;
1381
1382     union {float f; UINT32 i;} u = {x};
1383     float hfsq, f, s, z, R, w, t1, t2, dk, hi, lo;
1384     UINT32 ix;
1385     int k;
1386
1387     ix = u.i;
1388     k = 0;
1389     if (ix < 0x00800000 || ix >> 31) { /* x < 2**-126 */
1390         if (ix << 1 == 0)
1391             return math_error(_SING, "log10f", x, 0, -1 / (x * x));
1392         if ((ix & ~(1u << 31)) > 0x7f800000)
1393             return x;
1394         if (ix >> 31)
1395             return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x));
1396         /* subnormal number, scale up x */
1397         k -= 25;
1398         x *= 0x1p25f;
1399         u.f = x;
1400         ix = u.i;
1401     } else if (ix >= 0x7f800000) {
1402         return x;
1403     } else if (ix == 0x3f800000)
1404         return 0;
1405
1406     /* reduce x into [sqrt(2)/2, sqrt(2)] */
1407     ix += 0x3f800000 - 0x3f3504f3;
1408     k += (int)(ix >> 23) - 0x7f;
1409     ix = (ix & 0x007fffff) + 0x3f3504f3;
1410     u.i = ix;
1411     x = u.f;
1412
1413     f = x - 1.0f;
1414     s = f / (2.0f + f);
1415     z = s * s;
1416     w = z * z;
1417     t1= w * (Lg2 + w * Lg4);
1418     t2= z * (Lg1 + w * Lg3);
1419     R = t2 + t1;
1420     hfsq = 0.5f * f * f;
1421
1422     hi = f - hfsq;
1423     u.f = hi;
1424     u.i &= 0xfffff000;
1425     hi = u.f;
1426     lo = f - hi - hfsq + s * (hfsq + R);
1427     dk = k;
1428     return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
1429 }
1430
1431 /* Subnormal input is normalized so ix has negative biased exponent.
1432    Output is multiplied by POWF_SCALE (where 1 << 5). */
1433 static double powf_log2(UINT32 ix)
1434 {
1435     static const struct {
1436         double invc, logc;
1437     } T[] = {
1438         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * (1 << 5) },
1439         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * (1 << 5) },
1440         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * (1 << 5) },
1441         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * (1 << 5) },
1442         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * (1 << 5) },
1443         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * (1 << 5) },
1444         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * (1 << 5) },
1445         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * (1 << 5) },
1446         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * (1 << 5) },
1447         { 0x1p+0, 0x0p+0 * (1 << 4) },
1448         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * (1 << 5) },
1449         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * (1 << 5) },
1450         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * (1 << 5) },
1451         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * (1 << 5) },
1452         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * (1 << 5) },
1453         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * (1 << 5) }
1454     };
1455     static const double A[] = {
1456         0x1.27616c9496e0bp-2 * (1 << 5), -0x1.71969a075c67ap-2 * (1 << 5),
1457         0x1.ec70a6ca7baddp-2 * (1 << 5), -0x1.7154748bef6c8p-1 * (1 << 5),
1458         0x1.71547652ab82bp0 * (1 << 5)
1459     };
1460
1461     double z, r, r2, r4, p, q, y, y0, invc, logc;
1462     UINT32 iz, top, tmp;
1463     int k, i;
1464
1465     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1466        The range is split into N subintervals.
1467        The ith subinterval contains z and c is near its center. */
1468     tmp = ix - 0x3f330000;
1469     i = (tmp >> (23 - 4)) % (1 << 4);
1470     top = tmp & 0xff800000;
1471     iz = ix - top;
1472     k = (INT32)top >> (23 - 5); /* arithmetic shift */
1473     invc = T[i].invc;
1474     logc = T[i].logc;
1475     z = *(float*)&iz;
1476
1477     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
1478     r = z * invc - 1;
1479     y0 = logc + (double)k;
1480
1481     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
1482     r2 = r * r;
1483     y = A[0] * r + A[1];
1484     p = A[2] * r + A[3];
1485     r4 = r2 * r2;
1486     q = A[4] * r + y0;
1487     q = p * r2 + q;
1488     y = y * r4 + q;
1489     return y;
1490 }
1491
1492 /* The output of log2 and thus the input of exp2 is either scaled by N
1493    (in case of fast toint intrinsics) or not. The unscaled xd must be
1494    in [-1021,1023], sign_bias sets the sign of the result. */
1495 static float powf_exp2(double xd, UINT32 sign_bias)
1496 {
1497     static const double C[] = {
1498         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1499         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1500         0x1.62e42ff0c52d6p-1 / (1 << 5)
1501     };
1502
1503     UINT64 ki, ski, t;
1504     double kd, z, r, r2, y, s;
1505
1506     /* N*x = k + r with r in [-1/2, 1/2] */
1507     kd = __round(xd); /* k */
1508     ki = (INT64)kd;
1509     r = xd - kd;
1510
1511     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1512     t = exp2f_T[ki % (1 << 5)];
1513     ski = ki + sign_bias;
1514     t += ski << (52 - 5);
1515     s = *(double*)&t;
1516     z = C[0] * r + C[1];
1517     r2 = r * r;
1518     y = C[2] * r + 1;
1519     y = z * r2 + y;
1520     y = y * s;
1521     return y;
1522 }
1523
1524 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
1525    the bit representation of a non-zero finite floating-point value. */
1526 static int powf_checkint(UINT32 iy)
1527 {
1528     int e = iy >> 23 & 0xff;
1529     if (e < 0x7f)
1530         return 0;
1531     if (e > 0x7f + 23)
1532         return 2;
1533     if (iy & ((1 << (0x7f + 23 - e)) - 1))
1534         return 0;
1535     if (iy & (1 << (0x7f + 23 - e)))
1536         return 1;
1537     return 2;
1538 }
1539
1540 /*********************************************************************
1541  *      powf (MSVCRT.@)
1542  *
1543  * Copied from musl: src/math/powf.c src/math/powf_data.c
1544  */
1545 float CDECL powf( float x, float y )
1546 {
1547     UINT32 sign_bias = 0;
1548     UINT32 ix, iy;
1549     double logx, ylogx;
1550
1551     ix = *(UINT32*)&x;
1552     iy = *(UINT32*)&y;
1553     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
1554             2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1555         /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
1556         if (2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1557             if (2 * iy == 0)
1558                 return 1.0f;
1559             if (ix == 0x3f800000)
1560                 return 1.0f;
1561             if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
1562                 return x + y;
1563             if (2 * ix == 2 * 0x3f800000)
1564                 return 1.0f;
1565             if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
1566                 return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
1567             return y * y;
1568         }
1569         if (2 * ix - 1 >= 2u * 0x7f800000 - 1) {
1570             float x2 = x * x;
1571             if (ix & 0x80000000 && powf_checkint(iy) == 1)
1572                 x2 = -x2;
1573             if (iy & 0x80000000 && x2 == 0.0)
1574                 return math_error(_SING, "powf", x, y, 1 / x2);
1575             /* Without the barrier some versions of clang hoist the 1/x2 and
1576                thus division by zero exception can be signaled spuriously. */
1577             return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
1578         }
1579         /* x and y are non-zero finite. */
1580         if (ix & 0x80000000) {
1581             /* Finite x < 0. */
1582             int yint = powf_checkint(iy);
1583             if (yint == 0)
1584                 return math_error(_DOMAIN, "powf", x, y, 0 / (x - x));
1585             if (yint == 1)
1586                 sign_bias = 1 << (5 + 11);
1587             ix &= 0x7fffffff;
1588         }
1589         if (ix < 0x00800000) {
1590             /* Normalize subnormal x so exponent becomes negative. */
1591             x *= 0x1p23f;
1592             ix = *(UINT32*)&x;
1593             ix &= 0x7fffffff;
1594             ix -= 23 << 23;
1595         }
1596     }
1597     logx = powf_log2(ix);
1598     ylogx = y * logx; /* cannot overflow, y is single prec. */
1599     if ((*(UINT64*)&ylogx >> 47 & 0xffff) >= 0x40af800000000000llu >> 47) {
1600         /* |y*log(x)| >= 126. */
1601         if (ylogx > 0x1.fffffffd1d571p+6 * (1 << 5))
1602             return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023);
1603         if (ylogx <= -150.0 * (1 << 5))
1604             return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023);
1605     }
1606     return powf_exp2(ylogx, sign_bias);
1607 }
1608
1609 /*********************************************************************
1610  *      sinf (MSVCRT.@)
1611  *
1612  * Copied from musl: src/math/sinf.c
1613  */
1614 float CDECL sinf( float x )
1615 {
1616     static const double s1pio2 = 1*M_PI_2,
1617         s2pio2 = 2*M_PI_2,
1618         s3pio2 = 3*M_PI_2,
1619         s4pio2 = 4*M_PI_2;
1620
1621     double y;
1622     UINT32 ix;
1623     int n, sign;
1624
1625     ix = *(UINT32*)&x;
1626     sign = ix >> 31;
1627     ix &= 0x7fffffff;
1628
1629     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1630         if (ix < 0x39800000) { /* |x| < 2**-12 */
1631             /* raise inexact if x!=0 and underflow if subnormal */
1632             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1633             return x;
1634         }
1635         return __sindf(x);
1636     }
1637     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1638         if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
1639             if (sign)
1640                 return -__cosdf(x + s1pio2);
1641             else
1642                 return __cosdf(x - s1pio2);
1643         }
1644         return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
1645     }
1646     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1647         if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
1648             if (sign)
1649                 return __cosdf(x + s3pio2);
1650             else
1651                 return -__cosdf(x - s3pio2);
1652         }
1653         return __sindf(sign ? x + s4pio2 : x - s4pio2);
1654     }
1655
1656     /* sin(Inf or NaN) is NaN */
1657     if (isinf(x))
1658         return math_error(_DOMAIN, "sinf", x, 0, x - x);
1659     if (ix >= 0x7f800000)
1660         return x - x;
1661
1662     /* general argument reduction needed */
1663     n = __rem_pio2f(x, &y);
1664     switch (n&3) {
1665     case 0: return __sindf(y);
1666     case 1: return __cosdf(y);
1667     case 2: return __sindf(-y);
1668     default: return -__cosdf(y);
1669     }
1670 }
1671
1672 /*********************************************************************
1673  *      sinhf (MSVCRT.@)
1674  */
1675 float CDECL sinhf( float x )
1676 {
1677     UINT32 ui = *(UINT32*)&x;
1678     float t, h, absx;
1679
1680     h = 0.5;
1681     if (ui >> 31)
1682         h = -h;
1683     /* |x| */
1684     ui &= 0x7fffffff;
1685     absx = *(float*)&ui;
1686
1687     /* |x| < log(FLT_MAX) */
1688     if (ui < 0x42b17217) {
1689         t = __expm1f(absx);
1690         if (ui < 0x3f800000) {
1691             if (ui < 0x3f800000 - (12 << 23))
1692                 return x;
1693             return h * (2 * t - t * t / (t + 1));
1694         }
1695         return h * (t + t / (t + 1));
1696     }
1697
1698     /* |x| > logf(FLT_MAX) or nan */
1699     if (ui > 0x7f800000)
1700         *(DWORD*)&t = *(DWORD*)&x | 0x400000;
1701     else
1702         t = __expo2f(absx, 2 * h);
1703     return t;
1704 }
1705
1706 static BOOL sqrtf_validate( float *x )
1707 {
1708     short c = _fdclass(*x);
1709
1710     if (c == FP_ZERO) return FALSE;
1711     if (c == FP_NAN) return FALSE;
1712     if (signbit(*x))
1713     {
1714         *x = math_error(_DOMAIN, "sqrtf", *x, 0, ret_nan(TRUE));
1715         return FALSE;
1716     }
1717     if (c == FP_INFINITE) return FALSE;
1718     return TRUE;
1719 }
1720
1721 #if defined(__x86_64__) || defined(__i386__)
1722 float CDECL sse2_sqrtf(float);
1723 __ASM_GLOBAL_FUNC( sse2_sqrtf,
1724         "sqrtss %xmm0, %xmm0\n\t"
1725         "ret" )
1726 #endif
1727
1728 /*********************************************************************
1729  *      sqrtf (MSVCRT.@)
1730  *
1731  * Copied from musl: src/math/sqrtf.c
1732  */
1733 float CDECL sqrtf( float x )
1734 {
1735 #ifdef __x86_64__
1736     if (!sqrtf_validate(&x))
1737         return x;
1738
1739     return sse2_sqrtf(x);
1740 #else
1741     static const float tiny = 1.0e-30;
1742
1743     float z;
1744     int ix,s,q,m,t,i;
1745     unsigned int r;
1746
1747     ix = *(int*)&x;
1748
1749     if (!sqrtf_validate(&x))
1750         return x;
1751
1752     /* normalize x */
1753     m = ix >> 23;
1754     if (m == 0) {  /* subnormal x */
1755         for (i = 0; (ix & 0x00800000) == 0; i++)
1756             ix <<= 1;
1757         m -= i - 1;
1758     }
1759     m -= 127;  /* unbias exponent */
1760     ix = (ix & 0x007fffff) | 0x00800000;
1761     if (m & 1)  /* odd m, double x to make it even */
1762         ix += ix;
1763     m >>= 1;  /* m = [m/2] */
1764
1765     /* generate sqrt(x) bit by bit */
1766     ix += ix;
1767     q = s = 0;       /* q = sqrt(x) */
1768     r = 0x01000000;  /* r = moving bit from right to left */
1769
1770     while (r != 0) {
1771         t = s + r;
1772         if (t <= ix) {
1773             s = t + r;
1774             ix -= t;
1775             q += r;
1776         }
1777         ix += ix;
1778         r >>= 1;
1779     }
1780
1781     /* use floating add to find out rounding direction */
1782     if (ix != 0) {
1783         z = 1.0f - tiny; /* raise inexact flag */
1784         if (z >= 1.0f) {
1785             z = 1.0f + tiny;
1786             if (z > 1.0f)
1787                 q += 2;
1788             else
1789                 q += q & 1;
1790         }
1791     }
1792     ix = (q >> 1) + 0x3f000000;
1793     r = ix + ((unsigned int)m << 23);
1794     z = *(float*)&r;
1795     return z;
1796 #endif
1797 }
1798
1799 /* Copied from musl: src/math/__tandf.c */
1800 static float __tandf(double x, int odd)
1801 {
1802     static const double T[] = {
1803         0x15554d3418c99f.0p-54,
1804         0x1112fd38999f72.0p-55,
1805         0x1b54c91d865afe.0p-57,
1806         0x191df3908c33ce.0p-58,
1807         0x185dadfcecf44e.0p-61,
1808         0x1362b9bf971bcd.0p-59,
1809     };
1810
1811     double z, r, w, s, t, u;
1812
1813     z = x * x;
1814     r = T[4] + z * T[5];
1815     t = T[2] + z * T[3];
1816     w = z * z;
1817     s = z * x;
1818     u = T[0] + z * T[1];
1819     r = (x + s * u) + (s * w) * (t + w * r);
1820     return odd ? -1.0 / r : r;
1821 }
1822
1823 /*********************************************************************
1824  *      tanf (MSVCRT.@)
1825  *
1826  * Copied from musl: src/math/tanf.c
1827  */
1828 float CDECL tanf( float x )
1829 {
1830     static const double t1pio2 = 1*M_PI_2,
1831         t2pio2 = 2*M_PI_2,
1832         t3pio2 = 3*M_PI_2,
1833         t4pio2 = 4*M_PI_2;
1834
1835     double y;
1836     UINT32 ix;
1837     unsigned n, sign;
1838
1839     ix = *(UINT32*)&x;
1840     sign = ix >> 31;
1841     ix &= 0x7fffffff;
1842
1843     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1844         if (ix < 0x39800000) { /* |x| < 2**-12 */
1845             /* raise inexact if x!=0 and underflow if subnormal */
1846             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1847             return x;
1848         }
1849         return __tandf(x, 0);
1850     }
1851     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1852         if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
1853             return __tandf((sign ? x + t1pio2 : x - t1pio2), 1);
1854         else
1855             return __tandf((sign ? x + t2pio2 : x - t2pio2), 0);
1856     }
1857     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1858         if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
1859             return __tandf((sign ? x + t3pio2 : x - t3pio2), 1);
1860         else
1861             return __tandf((sign ? x + t4pio2 : x - t4pio2), 0);
1862     }
1863
1864     /* tan(Inf or NaN) is NaN */
1865     if (isinf(x))
1866         return math_error(_DOMAIN, "tanf", x, 0, x - x);
1867     if (ix >= 0x7f800000)
1868         return x - x;
1869
1870     /* argument reduction */
1871     n = __rem_pio2f(x, &y);
1872     return __tandf(y, n & 1);
1873 }
1874
1875 /*********************************************************************
1876  *      tanhf (MSVCRT.@)
1877  */
1878 float CDECL tanhf( float x )
1879 {
1880     UINT32 ui = *(UINT32*)&x;
1881     UINT32 sign = ui & 0x80000000;
1882     float t;
1883
1884     /* x = |x| */
1885     ui &= 0x7fffffff;
1886     x = *(float*)&ui;
1887
1888     if (ui > 0x3f0c9f54) {
1889         /* |x| > log(3)/2 ~= 0.5493 or nan */
1890         if (ui > 0x41200000) {
1891             if (ui > 0x7f800000) {
1892                 *(UINT32*)&x = ui | sign | 0x400000;
1893 #if _MSVCR_VER < 140
1894                 return math_error(_DOMAIN, "tanhf", x, 0, x);
1895 #else
1896                 return x;
1897 #endif
1898             }
1899             /* |x| > 10 */
1900             fp_barrierf(x + 0x1p120f);
1901             t = 1 + 0 / x;
1902         } else {
1903             t = __expm1f(2 * x);
1904             t = 1 - 2 / (t + 2);
1905         }
1906     } else if (ui > 0x3e82c578) {
1907         /* |x| > log(5/3)/2 ~= 0.2554 */
1908         t = __expm1f(2 * x);
1909         t = t / (t + 2);
1910     } else if (ui >= 0x00800000) {
1911         /* |x| >= 0x1p-126 */
1912         t = __expm1f(-2 * x);
1913         t = -t / (t + 2);
1914     } else {
1915         /* |x| is subnormal */
1916         fp_barrierf(x * x);
1917         t = x;
1918     }
1919     return sign ? -t : t;
1920 }
1921
1922 /*********************************************************************
1923  *      ceilf (MSVCRT.@)
1924  *
1925  * Copied from musl: src/math/ceilf.c
1926  */
1927 float CDECL ceilf( float x )
1928 {
1929     union {float f; UINT32 i;} u = {x};
1930     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1931     UINT32 m;
1932
1933     if (e >= 23)
1934         return x;
1935     if (e >= 0) {
1936         m = 0x007fffff >> e;
1937         if ((u.i & m) == 0)
1938             return x;
1939         if (u.i >> 31 == 0)
1940             u.i += m;
1941         u.i &= ~m;
1942     } else {
1943         if (u.i >> 31)
1944             return -0.0;
1945         else if (u.i << 1)
1946             return 1.0;
1947     }
1948     return u.f;
1949 }
1950
1951 /*********************************************************************
1952  *      floorf (MSVCRT.@)
1953  *
1954  * Copied from musl: src/math/floorf.c
1955  */
1956 float CDECL floorf( float x )
1957 {
1958     union {float f; UINT32 i;} u = {x};
1959     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1960     UINT32 m;
1961
1962     if (e >= 23)
1963         return x;
1964     if (e >= 0) {
1965         m = 0x007fffff >> e;
1966         if ((u.i & m) == 0)
1967             return x;
1968         if (u.i >> 31)
1969             u.i += m;
1970         u.i &= ~m;
1971     } else {
1972         if (u.i >> 31 == 0)
1973             return 0;
1974         else if (u.i << 1)
1975             return -1;
1976     }
1977     return u.f;
1978 }
1979
1980 /*********************************************************************
1981  *      frexpf (MSVCRT.@)
1982  *
1983  * Copied from musl: src/math/frexpf.c
1984  */
1985 float CDECL frexpf( float x, int *e )
1986 {
1987     UINT32 ux = *(UINT32*)&x;
1988     int ee = ux >> 23 & 0xff;
1989
1990     if (!ee) {
1991         if (x) {
1992             x = frexpf(x * 0x1p64, e);
1993             *e -= 64;
1994         } else *e = 0;
1995         return x;
1996     } else if (ee == 0xff) {
1997         return x;
1998     }
1999
2000     *e = ee - 0x7e;
2001     ux &= 0x807ffffful;
2002     ux |= 0x3f000000ul;
2003     return *(float*)&ux;
2004 }
2005
2006 /*********************************************************************
2007  *      modff (MSVCRT.@)
2008  *
2009  * Copied from musl: src/math/modff.c
2010  */
2011 float CDECL modff( float x, float *iptr )
2012 {
2013     union {float f; UINT32 i;} u = {x};
2014     UINT32 mask;
2015     int e = (u.i >> 23 & 0xff) - 0x7f;
2016
2017     /* no fractional part */
2018     if (e >= 23) {
2019         *iptr = x;
2020         if (e == 0x80 && u.i << 9 != 0) { /* nan */
2021             return x;
2022         }
2023         u.i &= 0x80000000;
2024         return u.f;
2025     }
2026     /* no integral part */
2027     if (e < 0) {
2028         u.i &= 0x80000000;
2029         *iptr = u.f;
2030         return x;
2031     }
2032
2033     mask = 0x007fffff >> e;
2034     if ((u.i & mask) == 0) {
2035         *iptr = x;
2036         u.i &= 0x80000000;
2037         return u.f;
2038     }
2039     u.i &= ~mask;
2040     *iptr = u.f;
2041     return x - u.f;
2042 }
2043
2044 #endif
2045
2046 #if !defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)
2047
2048 /*********************************************************************
2049  *      fabsf (MSVCRT.@)
2050  *
2051  * Copied from musl: src/math/fabsf.c
2052  */
2053 float CDECL fabsf( float x )
2054 {
2055     union { float f; UINT32 i; } u = { x };
2056     u.i &= 0x7fffffff;
2057     return u.f;
2058 }
2059
2060 #endif
2061
2062 /*********************************************************************
2063  *              acos (MSVCRT.@)
2064  *
2065  * Copied from musl: src/math/acos.c
2066  */
2067 static double acos_R(double z)
2068 {
2069     static const double pS0 =  1.66666666666666657415e-01,
2070                  pS1 = -3.25565818622400915405e-01,
2071                  pS2 =  2.01212532134862925881e-01,
2072                  pS3 = -4.00555345006794114027e-02,
2073                  pS4 =  7.91534994289814532176e-04,
2074                  pS5 =  3.47933107596021167570e-05,
2075                  qS1 = -2.40339491173441421878e+00,
2076                  qS2 =  2.02094576023350569471e+00,
2077                  qS3 = -6.88283971605453293030e-01,
2078                  qS4 =  7.70381505559019352791e-02;
2079
2080     double p, q;
2081     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2082     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2083     return p/q;
2084 }
2085
2086 double CDECL acos( double x )
2087 {
2088     static const double pio2_hi = 1.57079632679489655800e+00,
2089                  pio2_lo = 6.12323399573676603587e-17;
2090
2091     double z, w, s, c, df;
2092     unsigned int hx, ix;
2093     ULONGLONG llx;
2094
2095     hx = *(ULONGLONG*)&x >> 32;
2096     ix = hx & 0x7fffffff;
2097     /* |x| >= 1 or nan */
2098     if (ix >= 0x3ff00000) {
2099         unsigned int lx;
2100
2101         lx = *(ULONGLONG*)&x;
2102         if (((ix - 0x3ff00000) | lx) == 0) {
2103             /* acos(1)=0, acos(-1)=pi */
2104             if (hx >> 31)
2105                 return 2 * pio2_hi + 7.5231638452626401e-37;
2106             return 0;
2107         }
2108         if (isnan(x)) return x;
2109         return math_error(_DOMAIN, "acos", x, 0, 0 / (x - x));
2110     }
2111     /* |x| < 0.5 */
2112     if (ix < 0x3fe00000) {
2113         if (ix <= 0x3c600000)  /* |x| < 2**-57 */
2114             return pio2_hi + 7.5231638452626401e-37;
2115         return pio2_hi - (x - (pio2_lo - x * acos_R(x * x)));
2116     }
2117     /* x < -0.5 */
2118     if (hx >> 31) {
2119         z = (1.0 + x) * 0.5;
2120         s = sqrt(z);
2121         w = acos_R(z) * s - pio2_lo;
2122         return 2 * (pio2_hi - (s + w));
2123     }
2124     /* x > 0.5 */
2125     z = (1.0 - x) * 0.5;
2126     s = sqrt(z);
2127     df = s;
2128     llx = (*(ULONGLONG*)&df >> 32) << 32;
2129     df = *(double*)&llx;
2130     c = (z - df * df) / (s + df);
2131     w = acos_R(z) * s + c;
2132     return 2 * (df + w);
2133 }
2134
2135 /*********************************************************************
2136  *              asin (MSVCRT.@)
2137  *
2138  * Copied from musl: src/math/asin.c
2139  */
2140 static double asin_R(double z)
2141 {
2142     /* coefficients for R(x^2) */
2143     static const double pS0 =  1.66666666666666657415e-01,
2144                  pS1 = -3.25565818622400915405e-01,
2145                  pS2 =  2.01212532134862925881e-01,
2146                  pS3 = -4.00555345006794114027e-02,
2147                  pS4 =  7.91534994289814532176e-04,
2148                  pS5 =  3.47933107596021167570e-05,
2149                  qS1 = -2.40339491173441421878e+00,
2150                  qS2 =  2.02094576023350569471e+00,
2151                  qS3 = -6.88283971605453293030e-01,
2152                  qS4 =  7.70381505559019352791e-02;
2153
2154     double p, q;
2155     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2156     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2157     return p / q;
2158 }
2159
2160 #ifdef __i386__
2161 double CDECL x87_asin(double);
2162 __ASM_GLOBAL_FUNC( x87_asin,
2163         "fldl 4(%esp)\n\t"
2164         SET_X87_CW(~0x37f)
2165         "fld %st\n\t"
2166         "fld1\n\t"
2167         "fsubp\n\t"
2168         "fld1\n\t"
2169         "fadd %st(2)\n\t"
2170         "fmulp\n\t"
2171         "fsqrt\n\t"
2172         "fpatan\n\t"
2173         RESET_X87_CW
2174         "ret" )
2175 #endif
2176
2177 double CDECL asin( double x )
2178 {
2179     static const double pio2_hi = 1.57079632679489655800e+00,
2180                  pio2_lo = 6.12323399573676603587e-17;
2181
2182     double z, r, s;
2183     unsigned int hx, ix;
2184     ULONGLONG llx;
2185 #ifdef __i386__
2186     unsigned int x87_cw, sse2_cw;
2187 #endif
2188
2189     hx = *(ULONGLONG*)&x >> 32;
2190     ix = hx & 0x7fffffff;
2191     /* |x| >= 1 or nan */
2192     if (ix >= 0x3ff00000) {
2193         unsigned int lx;
2194         lx = *(ULONGLONG*)&x;
2195         if (((ix - 0x3ff00000) | lx) == 0)
2196             /* asin(1) = +-pi/2 with inexact */
2197             return x * pio2_hi + 7.5231638452626401e-37;
2198         if (isnan(x))
2199         {
2200 #ifdef __i386__
2201             return math_error(_DOMAIN, "asin", x, 0, x);
2202 #else
2203             return x;
2204 #endif
2205         }
2206         return math_error(_DOMAIN, "asin", x, 0, 0 / (x - x));
2207     }
2208
2209 #ifdef __i386__
2210     __control87_2(0, 0, &x87_cw, &sse2_cw);
2211     if (!sse2_enabled || (x87_cw & _MCW_EM) != _MCW_EM
2212             || (sse2_cw & (_MCW_EM | _MCW_RC)) != _MCW_EM)
2213         return x87_asin(x);
2214 #endif
2215
2216     /* |x| < 0.5 */
2217     if (ix < 0x3fe00000) {
2218         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
2219         if (ix < 0x3e500000 && ix >= 0x00100000)
2220             return x;
2221         return x + x * asin_R(x * x);
2222     }
2223     /* 1 > |x| >= 0.5 */
2224     z = (1 - fabs(x)) * 0.5;
2225     s = sqrt(z);
2226     r = asin_R(z);
2227     if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
2228         x = pio2_hi - (2 * (s + s * r) - pio2_lo);
2229     } else {
2230         double f, c;
2231         /* f+c = sqrt(z) */
2232         f = s;
2233         llx = (*(ULONGLONG*)&f >> 32) << 32;
2234         f = *(double*)&llx;
2235         c = (z - f * f) / (s + f);
2236         x = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * f));
2237     }
2238     if (hx >> 31)
2239         return -x;
2240     return x;
2241 }
2242
2243 /*********************************************************************
2244  *              atan (MSVCRT.@)
2245  *
2246  * Copied from musl: src/math/atan.c
2247  */
2248 double CDECL atan( double x )
2249 {
2250     static const double atanhi[] = {
2251         4.63647609000806093515e-01,
2252         7.85398163397448278999e-01,
2253         9.82793723247329054082e-01,
2254         1.57079632679489655800e+00,
2255     };
2256     static const double atanlo[] = {
2257         2.26987774529616870924e-17,
2258         3.06161699786838301793e-17,
2259         1.39033110312309984516e-17,
2260         6.12323399573676603587e-17,
2261     };
2262     static const double aT[] = {
2263         3.33333333333329318027e-01,
2264         -1.99999999998764832476e-01,
2265         1.42857142725034663711e-01,
2266         -1.11111104054623557880e-01,
2267         9.09088713343650656196e-02,
2268         -7.69187620504482999495e-02,
2269         6.66107313738753120669e-02,
2270         -5.83357013379057348645e-02,
2271         4.97687799461593236017e-02,
2272         -3.65315727442169155270e-02,
2273         1.62858201153657823623e-02,
2274     };
2275
2276     double w, s1, s2, z;
2277     unsigned int ix, sign;
2278     int id;
2279
2280 #if _MSVCR_VER == 0
2281     if (isnan(x)) return math_error(_DOMAIN, "atan", x, 0, x);
2282 #endif
2283
2284     ix = *(ULONGLONG*)&x >> 32;
2285     sign = ix >> 31;
2286     ix &= 0x7fffffff;
2287     if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
2288         if (isnan(x))
2289             return x;
2290         z = atanhi[3] + 7.5231638452626401e-37;
2291         return sign ? -z : z;
2292     }
2293     if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
2294         if (ix < 0x3e400000) {  /* |x| < 2^-27 */
2295             if (ix < 0x00100000)
2296                 /* raise underflow for subnormal x */
2297                 fp_barrierf((float)x);
2298             return x;
2299         }
2300         id = -1;
2301     } else {
2302         x = fabs(x);
2303         if (ix < 0x3ff30000) {  /* |x| < 1.1875 */
2304             if (ix < 0x3fe60000) {  /*  7/16 <= |x| < 11/16 */
2305                 id = 0;
2306                 x = (2.0 * x - 1.0) / (2.0 + x);
2307             } else {                /* 11/16 <= |x| < 19/16 */
2308                 id = 1;
2309                 x = (x - 1.0) / (x + 1.0);
2310             }
2311         } else {
2312             if (ix < 0x40038000) {  /* |x| < 2.4375 */
2313                 id = 2;
2314                 x = (x - 1.5) / (1.0 + 1.5 * x);
2315             } else {                /* 2.4375 <= |x| < 2^66 */
2316                 id = 3;
2317                 x = -1.0 / x;
2318             }
2319         }
2320     }
2321     /* end of argument reduction */
2322     z = x * x;
2323     w = z * z;
2324     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
2325     s1 = z * (aT[0] + w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
2326     s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
2327     if (id < 0)
2328         return x - x * (s1 + s2);
2329     z = atanhi[id] - (x * (s1 + s2) - atanlo[id] - x);
2330     return sign ? -z : z;
2331 }
2332
2333 /*********************************************************************
2334  *              atan2 (MSVCRT.@)
2335  *
2336  * Copied from musl: src/math/atan2.c
2337  */
2338 double CDECL atan2( double y, double x )
2339 {
2340     static const double pi     = 3.1415926535897931160E+00,
2341                  pi_lo  = 1.2246467991473531772E-16;
2342
2343     double z;
2344     unsigned int m, lx, ly, ix, iy;
2345
2346     if (isnan(x) || isnan(y))
2347         return x+y;
2348     ix = *(ULONGLONG*)&x >> 32;
2349     lx = *(ULONGLONG*)&x;
2350     iy = *(ULONGLONG*)&y >> 32;
2351     ly = *(ULONGLONG*)&y;
2352     if (((ix - 0x3ff00000) | lx) == 0)  /* x = 1.0 */
2353         return atan(y);
2354     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
2355     ix = ix & 0x7fffffff;
2356     iy = iy & 0x7fffffff;
2357
2358     /* when y = 0 */
2359     if ((iy | ly) == 0) {
2360         switch(m) {
2361         case 0:
2362         case 1: return y;   /* atan(+-0,+anything)=+-0 */
2363         case 2: return pi;  /* atan(+0,-anything) = pi */
2364         case 3: return -pi; /* atan(-0,-anything) =-pi */
2365         }
2366     }
2367     /* when x = 0 */
2368     if ((ix | lx) == 0)
2369         return m & 1 ? -pi / 2 : pi / 2;
2370     /* when x is INF */
2371     if (ix == 0x7ff00000) {
2372         if (iy == 0x7ff00000) {
2373             switch(m) {
2374             case 0: return pi / 4;      /* atan(+INF,+INF) */
2375             case 1: return -pi / 4;     /* atan(-INF,+INF) */
2376             case 2: return 3 * pi / 4;  /* atan(+INF,-INF) */
2377             case 3: return -3 * pi / 4; /* atan(-INF,-INF) */
2378             }
2379         } else {
2380             switch(m) {
2381             case 0: return 0.0;  /* atan(+...,+INF) */
2382             case 1: return -0.0; /* atan(-...,+INF) */
2383             case 2: return pi;   /* atan(+...,-INF) */
2384             case 3: return -pi;  /* atan(-...,-INF) */
2385             }
2386         }
2387     }
2388     /* |y/x| > 0x1p64 */
2389     if (ix + (64 << 20) < iy || iy == 0x7ff00000)
2390         return m & 1 ? -pi / 2 : pi / 2;
2391
2392     /* z = atan(|y/x|) without spurious underflow */
2393     if ((m & 2) && iy + (64 << 20) < ix)  /* |y/x| < 0x1p-64, x<0 */
2394         z = 0;
2395     else
2396         z = atan(fabs(y / x));
2397     switch (m) {
2398     case 0: return z;                /* atan(+,+) */
2399     case 1: return -z;               /* atan(-,+) */
2400     case 2: return pi - (z - pi_lo); /* atan(+,-) */
2401     default: /* case 3 */
2402         return (z - pi_lo) - pi;     /* atan(-,-) */
2403     }
2404 }
2405
2406 /* Copied from musl: src/math/rint.c */
2407 static double __rint(double x)
2408 {
2409     static const double toint = 1 / DBL_EPSILON;
2410
2411     ULONGLONG llx = *(ULONGLONG*)&x;
2412     int e = llx >> 52 & 0x7ff;
2413     int s = llx >> 63;
2414     unsigned cw;
2415     double y;
2416
2417     if (e >= 0x3ff+52)
2418         return x;
2419     cw = _controlfp(0, 0);
2420     if ((cw & _MCW_PC) != _PC_53)
2421         _controlfp(_PC_53, _MCW_PC);
2422     if (s)
2423         y = fp_barrier(x - toint) + toint;
2424     else
2425         y = fp_barrier(x + toint) - toint;
2426     if ((cw & _MCW_PC) != _PC_53)
2427         _controlfp(cw, _MCW_PC);
2428     if (y == 0)
2429         return s ? -0.0 : 0;
2430     return y;
2431 }
2432
2433 /* Copied from musl: src/math/__rem_pio2.c */
2434 static int __rem_pio2(double x, double *y)
2435 {
2436     static const double pio4    = 0x1.921fb54442d18p-1,
2437                  invpio2 = 6.36619772367581382433e-01,
2438                  pio2_1  = 1.57079632673412561417e+00,
2439                  pio2_1t = 6.07710050650619224932e-11,
2440                  pio2_2  = 6.07710050630396597660e-11,
2441                  pio2_2t = 2.02226624879595063154e-21,
2442                  pio2_3  = 2.02226624871116645580e-21,
2443                  pio2_3t = 8.47842766036889956997e-32;
2444
2445     union {double f; UINT64 i;} u = {x};
2446     double z, w, t, r, fn, tx[3], ty[2];
2447     UINT32 ix;
2448     int sign, n, ex, ey, i;
2449
2450     sign = u.i >> 63;
2451     ix = u.i >> 32 & 0x7fffffff;
2452     if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
2453         if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
2454             goto medium; /* cancellation -- use medium case */
2455         if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
2456             if (!sign) {
2457                 z = x - pio2_1; /* one round good to 85 bits */
2458                 y[0] = z - pio2_1t;
2459                 y[1] = (z - y[0]) - pio2_1t;
2460                 return 1;
2461             } else {
2462                 z = x + pio2_1;
2463                 y[0] = z + pio2_1t;
2464                 y[1] = (z - y[0]) + pio2_1t;
2465                 return -1;
2466             }
2467         } else {
2468             if (!sign) {
2469                 z = x - 2 * pio2_1;
2470                 y[0] = z - 2 * pio2_1t;
2471                 y[1] = (z - y[0]) - 2 * pio2_1t;
2472                 return 2;
2473             } else {
2474                 z = x + 2 * pio2_1;
2475                 y[0] = z + 2 * pio2_1t;
2476                 y[1] = (z - y[0]) + 2 * pio2_1t;
2477                 return -2;
2478             }
2479         }
2480     }
2481     if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
2482         if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
2483             if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
2484                 goto medium;
2485             if (!sign) {
2486                 z = x - 3 * pio2_1;
2487                 y[0] = z - 3 * pio2_1t;
2488                 y[1] = (z - y[0]) - 3 * pio2_1t;
2489                 return 3;
2490             } else {
2491                 z = x + 3 * pio2_1;
2492                 y[0] = z + 3 * pio2_1t;
2493                 y[1] = (z - y[0]) + 3 * pio2_1t;
2494                 return -3;
2495             }
2496         } else {
2497             if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
2498                 goto medium;
2499             if (!sign) {
2500                 z = x - 4 * pio2_1;
2501                 y[0] = z - 4 * pio2_1t;
2502                 y[1] = (z - y[0]) - 4 * pio2_1t;
2503                 return 4;
2504             } else {
2505                 z = x + 4 * pio2_1;
2506                 y[0] = z + 4 * pio2_1t;
2507                 y[1] = (z - y[0]) + 4 * pio2_1t;
2508                 return -4;
2509             }
2510         }
2511     }
2512     if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
2513 medium:
2514         fn = __rint(x * invpio2);
2515         n = (INT32)fn;
2516         r = x - fn * pio2_1;
2517         w = fn * pio2_1t; /* 1st round, good to 85 bits */
2518         /* Matters with directed rounding. */
2519         if (r - w < -pio4) {
2520             n--;
2521             fn--;
2522             r = x - fn * pio2_1;
2523             w = fn * pio2_1t;
2524         } else if (r - w > pio4) {
2525             n++;
2526             fn++;
2527             r = x - fn * pio2_1;
2528             w = fn * pio2_1t;
2529         }
2530         y[0] = r - w;
2531         u.f = y[0];
2532         ey = u.i >> 52 & 0x7ff;
2533         ex = ix >> 20;
2534         if (ex - ey > 16) { /* 2nd round, good to 118 bits */
2535             t = r;
2536             w = fn * pio2_2;
2537             r = t - w;
2538             w = fn * pio2_2t - ((t - r) - w);
2539             y[0] = r - w;
2540             u.f = y[0];
2541             ey = u.i >> 52 & 0x7ff;
2542             if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
2543                 t = r;
2544                 w = fn * pio2_3;
2545                 r = t - w;
2546                 w = fn * pio2_3t - ((t - r) - w);
2547                 y[0] = r - w;
2548             }
2549         }
2550         y[1] = (r - y[0]) - w;
2551         return n;
2552     }
2553     /*
2554      * all other (large) arguments
2555      */
2556     if (ix >= 0x7ff00000) {  /* x is inf or NaN */
2557         y[0] = y[1] = x - x;
2558         return 0;
2559     }
2560     /* set z = scalbn(|x|,-ilogb(x)+23) */
2561     u.f = x;
2562     u.i &= (UINT64)-1 >> 12;
2563     u.i |= (UINT64)(0x3ff + 23) << 52;
2564     z = u.f;
2565     for (i = 0; i < 2; i++) {
2566         tx[i] = (double)(INT32)z;
2567         z = (z - tx[i]) * 0x1p24;
2568     }
2569     tx[i] = z;
2570     /* skip zero terms, first term is non-zero */
2571     while (tx[i] == 0.0)
2572         i--;
2573     n = __rem_pio2_large(tx, ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1);
2574     if (sign) {
2575         y[0] = -ty[0];
2576         y[1] = -ty[1];
2577         return -n;
2578     }
2579     y[0] = ty[0];
2580     y[1] = ty[1];
2581     return n;
2582 }
2583
2584 /* Copied from musl: src/math/__sin.c */
2585 static double __sin(double x, double y, int iy)
2586 {
2587     static const double S1  = -1.66666666666666324348e-01,
2588                  S2  =  8.33333333332248946124e-03,
2589                  S3  = -1.98412698298579493134e-04,
2590                  S4  =  2.75573137070700676789e-06,
2591                  S5  = -2.50507602534068634195e-08,
2592                  S6  =  1.58969099521155010221e-10;
2593
2594     double z, r, v, w;
2595
2596     z = x * x;
2597     w = z * z;
2598     r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
2599     v = z * x;
2600     if (iy == 0)
2601         return x + v * (S1 + z * r);
2602     else
2603         return x - ((z * (0.5 * y - v * r) - y) - v * S1);
2604 }
2605
2606 /* Copied from musl: src/math/__cos.c */
2607 static double __cos(double x, double y)
2608 {
2609     static const double C1  =  4.16666666666666019037e-02,
2610                  C2  = -1.38888888888741095749e-03,
2611                  C3  =  2.48015872894767294178e-05,
2612                  C4  = -2.75573143513906633035e-07,
2613                  C5  =  2.08757232129817482790e-09,
2614                  C6  = -1.13596475577881948265e-11;
2615     double hz, z, r, w;
2616
2617     z = x * x;
2618     w = z * z;
2619     r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6));
2620     hz = 0.5 * z;
2621     w = 1.0 - hz;
2622     return w + (((1.0 - w) - hz) + (z * r - x * y));
2623 }
2624
2625 /*********************************************************************
2626  *              cos (MSVCRT.@)
2627  *
2628  * Copied from musl: src/math/cos.c
2629  */
2630 double CDECL cos( double x )
2631 {
2632     double y[2];
2633     UINT32 ix;
2634     unsigned n;
2635
2636     ix = *(ULONGLONG*)&x >> 32;
2637     ix &= 0x7fffffff;
2638
2639     /* |x| ~< pi/4 */
2640     if (ix <= 0x3fe921fb) {
2641         if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
2642             /* raise inexact if x!=0 */
2643             fp_barrier(x + 0x1p120f);
2644             return 1.0;
2645         }
2646         return __cos(x, 0);
2647     }
2648
2649     /* cos(Inf or NaN) is NaN */
2650     if (isinf(x)) return math_error(_DOMAIN, "cos", x, 0, x - x);
2651     if (ix >= 0x7ff00000)
2652         return x - x;
2653
2654     /* argument reduction */
2655     n = __rem_pio2(x, y);
2656     switch (n & 3) {
2657     case 0: return __cos(y[0], y[1]);
2658     case 1: return -__sin(y[0], y[1], 1);
2659     case 2: return -__cos(y[0], y[1]);
2660     default: return __sin(y[0], y[1], 1);
2661     }
2662 }
2663
2664 /* Copied from musl: src/math/expm1.c */
2665 static double CDECL __expm1(double x)
2666 {
2667     static const double o_threshold = 7.09782712893383973096e+02,
2668         ln2_hi = 6.93147180369123816490e-01,
2669         ln2_lo = 1.90821492927058770002e-10,
2670         invln2 = 1.44269504088896338700e+00,
2671         Q1 = -3.33333333333331316428e-02,
2672         Q2 = 1.58730158725481460165e-03,
2673         Q3 = -7.93650757867487942473e-05,
2674         Q4 = 4.00821782732936239552e-06,
2675         Q5 = -2.01099218183624371326e-07;
2676
2677     double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2678     union {double f; UINT64 i;} u = {x};
2679     UINT32 hx = u.i >> 32 & 0x7fffffff;
2680     int k, sign = u.i >> 63;
2681
2682     /* filter out huge and non-finite argument */
2683     if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2684         if (isnan(x))
2685             return x;
2686         if (isinf(x))
2687             return sign ? -1 : x;
2688         if (sign)
2689             return math_error(_UNDERFLOW, "exp", x, 0, -1);
2690         if (x > o_threshold)
2691             return math_error(_OVERFLOW, "exp", x, 0, x * 0x1p1023);
2692     }
2693
2694     /* argument reduction */
2695     if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2696         if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2697             if (!sign) {
2698                 hi = x - ln2_hi;
2699                 lo = ln2_lo;
2700                 k = 1;
2701             } else {
2702                 hi = x + ln2_hi;
2703                 lo = -ln2_lo;
2704                 k = -1;
2705             }
2706         } else {
2707             k = invln2 * x + (sign ? -0.5 : 0.5);
2708             t = k;
2709             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2710             lo = t * ln2_lo;
2711         }
2712         x = hi - lo;
2713         c = (hi - x) - lo;
2714     } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
2715         fp_barrier(x + 0x1p120f);
2716         if (hx < 0x00100000)
2717             fp_barrier((float)x);
2718         return x;
2719     } else
2720         k = 0;
2721
2722     /* x is now in primary range */
2723     hfx = 0.5 * x;
2724     hxs = x * hfx;
2725     r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2726     t = 3.0 - r1 * hfx;
2727     e = hxs * ((r1 - t) / (6.0 - x * t));
2728     if (k == 0) /* c is 0 */
2729         return x - (x * e - hxs);
2730     e = x * (e - c) - c;
2731     e -= hxs;
2732     /* exp(x) ~ 2^k (x_reduced - e + 1) */
2733     if (k == -1)
2734         return 0.5 * (x - e) - 0.5;
2735     if (k == 1) {
2736         if (x < -0.25)
2737             return -2.0 * (e - (x + 0.5));
2738         return 1.0 + 2.0 * (x - e);
2739     }
2740     u.i = (UINT64)(0x3ff + k) << 52; /* 2^k */
2741     twopk = u.f;
2742     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
2743         y = x - e + 1.0;
2744         if (k == 1024)
2745             y = y * 2.0 * 0x1p1023;
2746         else
2747             y = y * twopk;
2748         return y - 1.0;
2749     }
2750     u.i = (UINT64)(0x3ff - k) << 52; /* 2^-k */
2751     if (k < 20)
2752         y = (x - e + (1 - u.f)) * twopk;
2753     else
2754         y = (x - (e + u.f) + 1) * twopk;
2755     return y;
2756 }
2757
2758 static double __expo2(double x, double sign)
2759 {
2760     static const int k = 2043;
2761     static const double kln2 = 0x1.62066151add8bp+10;
2762     double scale;
2763
2764     *(UINT64*)&scale = (UINT64)(0x3ff + k / 2) << 52;
2765     return exp(x - kln2) * (sign * scale) * scale;
2766 }
2767
2768 /*********************************************************************
2769  *              cosh (MSVCRT.@)
2770  *
2771  * Copied from musl: src/math/cosh.c
2772  */
2773 double CDECL cosh( double x )
2774 {
2775     UINT64 ux = *(UINT64*)&x;
2776     UINT64 sign = ux & 0x8000000000000000ULL;
2777     UINT32 w;
2778     double t;
2779
2780     /* |x| */
2781     ux &= (uint64_t)-1 / 2;
2782     x = *(double*)&ux;
2783     w = ux >> 32;
2784
2785     /* |x| < log(2) */
2786     if (w < 0x3fe62e42) {
2787         if (w < 0x3ff00000 - (26 << 20)) {
2788             fp_barrier(x + 0x1p120f);
2789             return 1;
2790         }
2791         t = __expm1(x);
2792         return 1 + t * t / (2 * (1 + t));
2793     }
2794
2795     /* |x| < log(DBL_MAX) */
2796     if (w < 0x40862e42) {
2797         t = exp(x);
2798         /* note: if x>log(0x1p26) then the 1/t is not needed */
2799         return 0.5 * (t + 1 / t);
2800     }
2801
2802     /* |x| > log(DBL_MAX) or nan */
2803     /* note: the result is stored to handle overflow */
2804     if (ux > 0x7ff0000000000000ULL)
2805         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
2806     else
2807         t = __expo2(x, 1.0);
2808     return t;
2809 }
2810
2811 /* Copied from musl: src/math/exp_data.c */
2812 static const UINT64 exp_T[] = {
2813     0x0ULL, 0x3ff0000000000000ULL,
2814     0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL,
2815     0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL,
2816     0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL,
2817     0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL,
2818     0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL,
2819     0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL,
2820     0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL,
2821     0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL,
2822     0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL,
2823     0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL,
2824     0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL,
2825     0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL,
2826     0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL,
2827     0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL,
2828     0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL,
2829     0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL,
2830     0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL,
2831     0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL,
2832     0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL,
2833     0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL,
2834     0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL,
2835     0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL,
2836     0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL,
2837     0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL,
2838     0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL,
2839     0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL,
2840     0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL,
2841     0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL,
2842     0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL,
2843     0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL,
2844     0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL,
2845     0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL,
2846     0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL,
2847     0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL,
2848     0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL,
2849     0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL,
2850     0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL,
2851     0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL,
2852     0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL,
2853     0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL,
2854     0xbc9312607a28698aULL, 0x3feeda4504ac801cULL,
2855     0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL,
2856     0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL,
2857     0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL,
2858     0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL,
2859     0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL,
2860     0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL,
2861     0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL,
2862     0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL,
2863     0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL,
2864     0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL,
2865     0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL,
2866     0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL,
2867     0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL,
2868     0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL,
2869     0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL,
2870     0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL,
2871     0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL,
2872     0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL,
2873     0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL,
2874     0xbc845378892be9aeULL, 0x3feea34634ccc320ULL,
2875     0xbc93cedd78565858ULL, 0x3feea23882552225ULL,
2876     0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL,
2877     0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL,
2878     0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL,
2879     0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL,
2880     0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL,
2881     0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL,
2882     0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL,
2883     0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL,
2884     0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL,
2885     0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL,
2886     0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL,
2887     0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL,
2888     0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL,
2889     0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL,
2890     0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL,
2891     0xbc94d450d872576eULL, 0x3feea8d99b4492edULL,
2892     0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL,
2893     0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL,
2894     0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL,
2895     0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL,
2896     0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL,
2897     0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL,
2898     0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL,
2899     0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL,
2900     0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL,
2901     0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL,
2902     0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL,
2903     0xbc87c50422622263ULL, 0x3feecc667b5de565ULL,
2904     0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL,
2905     0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL,
2906     0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL,
2907     0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL,
2908     0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL,
2909     0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL,
2910     0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL,
2911     0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL,
2912     0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL,
2913     0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL,
2914     0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL,
2915     0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL,
2916     0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL,
2917     0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL,
2918     0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL,
2919     0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL,
2920     0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL,
2921     0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL,
2922     0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL,
2923     0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL,
2924     0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL,
2925     0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL,
2926     0xbc82919e2040220fULL, 0x3fef60e316c98398ULL,
2927     0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL,
2928     0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL,
2929     0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL,
2930     0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL,
2931     0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL,
2932     0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL,
2933     0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL,
2934     0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL,
2935     0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL,
2936     0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL,
2937     0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL,
2938     0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL,
2939     0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL,
2940     0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL
2941 };
2942
2943 /*********************************************************************
2944  *              exp (MSVCRT.@)
2945  *
2946  * Copied from musl: src/math/exp.c
2947  */
2948 double CDECL exp( double x )
2949 {
2950     static const double C[] = {
2951         0x1.ffffffffffdbdp-2,
2952         0x1.555555555543cp-3,
2953         0x1.55555cf172b91p-5,
2954         0x1.1111167a4d017p-7
2955     };
2956     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
2957         negln2hiN = -0x1.62e42fefa0000p-8,
2958         negln2loN = -0x1.cf79abc9e3b3ap-47;
2959
2960     UINT32 abstop;
2961     UINT64 ki, idx, top, sbits;
2962     double kd, z, r, r2, scale, tail, tmp;
2963
2964     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
2965     if (abstop -  0x3c9 >= 0x408 - 0x3c9) {
2966         if (abstop - 0x3c9 >= 0x80000000)
2967             /* Avoid spurious underflow for tiny x. */
2968             /* Note: 0 is common input. */
2969             return 1.0 + x;
2970         if (abstop >= 0x409) {
2971             if (*(UINT64*)&x == 0xfff0000000000000ULL)
2972                 return 0.0;
2973 #if _MSVCR_VER == 0
2974             if (*(UINT64*)&x > 0x7ff0000000000000ULL)
2975                 return math_error(_DOMAIN, "exp", x, 0, 1.0 + x);
2976 #endif
2977             if (abstop >= 0x7ff)
2978                 return 1.0 + x;
2979             if (*(UINT64*)&x >> 63)
2980                 return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN);
2981             else
2982                 return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX);
2983         }
2984         /* Large x is special cased below. */
2985         abstop = 0;
2986     }
2987
2988     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
2989     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
2990     z = invln2N * x;
2991     kd = __round(z);
2992     ki = (INT64)kd;
2993
2994     r = x + kd * negln2hiN + kd * negln2loN;
2995     /* 2^(k/N) ~= scale * (1 + tail). */
2996     idx = 2 * (ki % (1 << 7));
2997     top = ki << (52 - 7);
2998     tail = *(double*)&exp_T[idx];
2999     /* This is only a valid scale when -1023*N < k < 1024*N. */
3000     sbits = exp_T[idx + 1] + top;
3001     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3002     /* Evaluation is optimized assuming superscalar pipelined execution. */
3003     r2 = r * r;
3004     /* Without fma the worst case error is 0.25/N ulp larger. */
3005     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3006     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3007     if (abstop == 0) {
3008         /* Handle cases that may overflow or underflow when computing the result that
3009            is scale*(1+TMP) without intermediate rounding. The bit representation of
3010            scale is in SBITS, however it has a computed exponent that may have
3011            overflown into the sign bit so that needs to be adjusted before using it as
3012            a double. (int32_t)KI is the k used in the argument reduction and exponent
3013            adjustment of scale, positive k here means the result may overflow and
3014            negative k means the result may underflow. */
3015         double scale, y;
3016
3017         if ((ki & 0x80000000) == 0) {
3018             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3019             sbits -= 1009ull << 52;
3020             scale = *(double*)&sbits;
3021             y = 0x1p1009 * (scale + scale * tmp);
3022             if (isinf(y))
3023                 return math_error(_OVERFLOW, "exp", x, 0, y);
3024             return y;
3025         }
3026         /* k < 0, need special care in the subnormal range. */
3027         sbits += 1022ull << 52;
3028         scale = *(double*)&sbits;
3029         y = scale + scale * tmp;
3030         if (y < 1.0) {
3031             /* Round y to the right precision before scaling it into the subnormal
3032                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3033                E is the worst-case ulp error outside the subnormal range. So this
3034                is only useful if the goal is better than 1 ulp worst-case error. */
3035             double hi, lo;
3036             lo = scale - y + scale * tmp;
3037             hi = 1.0 + y;
3038             lo = 1.0 - hi + y + lo;
3039             y = hi + lo - 1.0;
3040             /* Avoid -0.0 with downward rounding. */
3041             if (y == 0.0)
3042                 y = 0.0;
3043             /* The underflow exception needs to be signaled explicitly. */
3044             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3045             y = 0x1p-1022 * y;
3046             return math_error(_UNDERFLOW, "exp", x, 0, y);
3047         }
3048         y = 0x1p-1022 * y;
3049         return y;
3050     }
3051     scale = *(double*)&sbits;
3052     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3053        is no spurious underflow here even without fma. */
3054     return scale + scale * tmp;
3055 }
3056
3057 /*********************************************************************
3058  *              fmod (MSVCRT.@)
3059  *
3060  * Copied from musl: src/math/fmod.c
3061  */
3062 double CDECL fmod( double x, double y )
3063 {
3064     UINT64 xi = *(UINT64*)&x;
3065     UINT64 yi = *(UINT64*)&y;
3066     int ex = xi >> 52 & 0x7ff;
3067     int ey = yi >> 52 & 0x7ff;
3068     int sx = xi >> 63;
3069     UINT64 i;
3070
3071     if (isinf(x)) return math_error(_DOMAIN, "fmod", x, y, (x * y) / (x * y));
3072     if (yi << 1 == 0 || isnan(y) || ex == 0x7ff)
3073         return (x * y) / (x * y);
3074     if (xi << 1 <= yi << 1) {
3075         if (xi << 1 == yi << 1)
3076             return 0 * x;
3077         return x;
3078     }
3079
3080     /* normalize x and y */
3081     if (!ex) {
3082         for (i = xi << 12; i >> 63 == 0; ex--, i <<= 1);
3083         xi <<= -ex + 1;
3084     } else {
3085         xi &= -1ULL >> 12;
3086         xi |= 1ULL << 52;
3087     }
3088     if (!ey) {
3089         for (i = yi << 12; i >> 63 == 0; ey--, i <<= 1);
3090         yi <<= -ey + 1;
3091     } else {
3092         yi &= -1ULL >> 12;
3093         yi |= 1ULL << 52;
3094     }
3095
3096     /* x mod y */
3097     for (; ex > ey; ex--) {
3098         i = xi - yi;
3099         if (i >> 63 == 0) {
3100             if (i == 0)
3101                 return 0 * x;
3102             xi = i;
3103         }
3104         xi <<= 1;
3105     }
3106     i = xi - yi;
3107     if (i >> 63 == 0) {
3108         if (i == 0)
3109             return 0 * x;
3110         xi = i;
3111     }
3112     for (; xi >> 52 == 0; xi <<= 1, ex--);
3113
3114     /* scale result */
3115     if (ex > 0) {
3116         xi -= 1ULL << 52;
3117         xi |= (UINT64)ex << 52;
3118     } else {
3119         xi >>= -ex + 1;
3120     }
3121     xi |= (UINT64)sx << 63;
3122     return *(double*)&xi;
3123 }
3124
3125 /*********************************************************************
3126  *              log (MSVCRT.@)
3127  *
3128  * Copied from musl: src/math/log.c src/math/log_data.c
3129  */
3130 double CDECL log( double x )
3131 {
3132     static const double Ln2hi = 0x1.62e42fefa3800p-1,
3133         Ln2lo = 0x1.ef35793c76730p-45;
3134     static const double A[] = {
3135         -0x1.0000000000001p-1,
3136         0x1.555555551305bp-2,
3137         -0x1.fffffffeb459p-3,
3138         0x1.999b324f10111p-3,
3139         -0x1.55575e506c89fp-3
3140     };
3141     static const double B[] = {
3142         -0x1p-1,
3143         0x1.5555555555577p-2,
3144         -0x1.ffffffffffdcbp-3,
3145         0x1.999999995dd0cp-3,
3146         -0x1.55555556745a7p-3,
3147         0x1.24924a344de3p-3,
3148         -0x1.fffffa4423d65p-4,
3149         0x1.c7184282ad6cap-4,
3150         -0x1.999eb43b068ffp-4,
3151         0x1.78182f7afd085p-4,
3152         -0x1.5521375d145cdp-4
3153     };
3154     static const struct {
3155         double invc, logc;
3156     } T[] = {
3157         {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
3158         {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
3159         {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
3160         {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
3161         {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
3162         {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
3163         {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
3164         {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
3165         {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
3166         {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
3167         {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
3168         {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
3169         {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
3170         {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
3171         {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
3172         {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
3173         {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
3174         {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
3175         {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
3176         {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
3177         {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
3178         {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
3179         {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
3180         {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
3181         {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
3182         {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
3183         {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
3184         {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
3185         {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
3186         {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
3187         {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
3188         {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
3189         {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
3190         {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
3191         {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
3192         {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
3193         {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
3194         {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
3195         {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
3196         {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
3197         {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
3198         {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
3199         {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
3200         {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
3201         {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
3202         {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
3203         {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
3204         {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
3205         {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
3206         {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
3207         {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
3208         {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
3209         {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
3210         {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
3211         {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
3212         {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
3213         {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
3214         {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
3215         {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
3216         {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
3217         {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
3218         {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
3219         {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
3220         {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
3221         {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
3222         {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
3223         {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
3224         {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
3225         {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
3226         {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
3227         {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
3228         {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
3229         {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
3230         {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
3231         {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
3232         {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
3233         {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
3234         {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
3235         {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
3236         {0x1.008040614b195p+0, -0x1.0040979240000p-9},
3237         {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
3238         {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
3239         {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
3240         {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
3241         {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
3242         {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
3243         {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
3244         {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
3245         {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
3246         {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
3247         {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
3248         {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
3249         {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
3250         {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
3251         {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
3252         {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
3253         {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
3254         {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
3255         {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
3256         {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
3257         {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
3258         {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
3259         {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
3260         {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
3261         {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
3262         {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
3263         {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
3264         {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
3265         {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
3266         {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
3267         {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
3268         {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
3269         {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
3270         {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
3271         {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
3272         {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
3273         {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
3274         {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
3275         {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
3276         {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
3277         {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
3278         {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
3279         {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
3280         {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
3281         {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
3282         {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
3283         {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
3284         {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}
3285     };
3286     static const struct {
3287         double chi, clo;
3288     } T2[] = {
3289         {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
3290         {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
3291         {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
3292         {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
3293         {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
3294         {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
3295         {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
3296         {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
3297         {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
3298         {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
3299         {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
3300         {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
3301         {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
3302         {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
3303         {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
3304         {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
3305         {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
3306         {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
3307         {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
3308         {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
3309         {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
3310         {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
3311         {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
3312         {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
3313         {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
3314         {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
3315         {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
3316         {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
3317         {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
3318         {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
3319         {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
3320         {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
3321         {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
3322         {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
3323         {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
3324         {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
3325         {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
3326         {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
3327         {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
3328         {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
3329         {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
3330         {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
3331         {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
3332         {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
3333         {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
3334         {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
3335         {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
3336         {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
3337         {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
3338         {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
3339         {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
3340         {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
3341         {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
3342         {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
3343         {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
3344         {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
3345         {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
3346         {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
3347         {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
3348         {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
3349         {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
3350         {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
3351         {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
3352         {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
3353         {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
3354         {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
3355         {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
3356         {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
3357         {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
3358         {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
3359         {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
3360         {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
3361         {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
3362         {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
3363         {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
3364         {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
3365         {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
3366         {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
3367         {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
3368         {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
3369         {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
3370         {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
3371         {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
3372         {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
3373         {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
3374         {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
3375         {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
3376         {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
3377         {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
3378         {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
3379         {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
3380         {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
3381         {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
3382         {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
3383         {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
3384         {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
3385         {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
3386         {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
3387         {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
3388         {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
3389         {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
3390         {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
3391         {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
3392         {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
3393         {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
3394         {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
3395         {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
3396         {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
3397         {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
3398         {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
3399         {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
3400         {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
3401         {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
3402         {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
3403         {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
3404         {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
3405         {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
3406         {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
3407         {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
3408         {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
3409         {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
3410         {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
3411         {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
3412         {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
3413         {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
3414         {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
3415         {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
3416         {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}
3417     };
3418
3419     double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
3420     UINT64 ix, iz, tmp;
3421     UINT32 top;
3422     int k, i;
3423
3424     ix = *(UINT64*)&x;
3425     top = ix >> 48;
3426     if (ix - 0x3fee000000000000ULL < 0x3090000000000ULL) {
3427         double rhi, rlo;
3428
3429         /* Handle close to 1.0 inputs separately. */
3430         /* Fix sign of zero with downward rounding when x==1. */
3431         if (ix == 0x3ff0000000000000ULL)
3432             return 0;
3433         r = x - 1.0;
3434         r2 = r * r;
3435         r3 = r * r2;
3436         y = r3 * (B[1] + r * B[2] + r2 * B[3] + r3 * (B[4] + r * B[5] + r2 * B[6] +
3437                     r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
3438         /* Worst-case error is around 0.507 ULP. */
3439         w = r * 0x1p27;
3440         rhi = r + w - w;
3441         rlo = r - rhi;
3442         w = rhi * rhi * B[0]; /* B[0] == -0.5. */
3443         hi = r + w;
3444         lo = r - hi + w;
3445         lo += B[0] * rlo * (rhi + r);
3446         y += lo;
3447         y += hi;
3448         return y;
3449     }
3450     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
3451         /* x < 0x1p-1022 or inf or nan. */
3452         if (ix * 2 == 0)
3453             return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x);
3454         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */
3455             return x;
3456         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
3457             return x;
3458         if (top & 0x8000)
3459             return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x));
3460         /* x is subnormal, normalize it. */
3461         x *= 0x1p52;
3462         ix = *(UINT64*)&x;
3463         ix -= 52ULL << 52;
3464     }
3465
3466     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3467        The range is split into N subintervals.
3468        The ith subinterval contains z and c is near its center. */
3469     tmp = ix - 0x3fe6000000000000ULL;
3470     i = (tmp >> (52 - 7)) % (1 << 7);
3471     k = (INT64)tmp >> 52; /* arithmetic shift */
3472     iz = ix - (tmp & 0xfffULL << 52);
3473     invc = T[i].invc;
3474     logc = T[i].logc;
3475     z = *(double*)&iz;
3476
3477     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
3478     /* r ~= z/c - 1, |r| < 1/(2*N). */
3479     r = (z - T2[i].chi - T2[i].clo) * invc;
3480     kd = (double)k;
3481
3482     /* hi + lo = r + log(c) + k*Ln2. */
3483     w = kd * Ln2hi + logc;
3484     hi = w + r;
3485     lo = w - hi + r + kd * Ln2lo;
3486
3487     /* log(x) = lo + (log1p(r) - r) + hi. */
3488     r2 = r * r; /* rounding error: 0x1p-54/N^2. */
3489     /* Worst case error if |y| > 0x1p-5:
3490        0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
3491        Worst case error if |y| > 0x1p-4:
3492        0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
3493     y = lo + r2 * A[0] +
3494         r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
3495     return y;
3496 }
3497
3498 /*********************************************************************
3499  *              log10 (MSVCRT.@)
3500  */
3501 double CDECL log10( double x )
3502 {
3503     static const double ivln10hi = 4.34294481878168880939e-01,
3504         ivln10lo = 2.50829467116452752298e-11,
3505         log10_2hi = 3.01029995663611771306e-01,
3506         log10_2lo = 3.69423907715893078616e-13,
3507         Lg1 = 6.666666666666735130e-01,
3508         Lg2 = 3.999999999940941908e-01,
3509         Lg3 = 2.857142874366239149e-01,
3510         Lg4 = 2.222219843214978396e-01,
3511         Lg5 = 1.818357216161805012e-01,
3512         Lg6 = 1.531383769920937332e-01,
3513         Lg7 = 1.479819860511658591e-01;
3514
3515     union {double f; UINT64 i;} u = {x};
3516     double hfsq, f, s, z, R, w, t1, t2, dk, y, hi, lo, val_hi, val_lo;
3517     UINT32 hx;
3518     int k;
3519
3520     hx = u.i >> 32;
3521     k = 0;
3522     if (hx < 0x00100000 || hx >> 31) {
3523         if (u.i << 1 == 0)
3524             return math_error(_SING, "log10", x, 0, -1 / (x * x));
3525         if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL)
3526             return x;
3527         if (hx >> 31)
3528             return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x));
3529         /* subnormal number, scale x up */
3530         k -= 54;
3531         x *= 0x1p54;
3532         u.f = x;
3533         hx = u.i >> 32;
3534     } else if (hx >= 0x7ff00000) {
3535         return x;
3536     } else if (hx == 0x3ff00000 && u.i<<32 == 0)
3537         return 0;
3538
3539     /* reduce x into [sqrt(2)/2, sqrt(2)] */
3540     hx += 0x3ff00000 - 0x3fe6a09e;
3541     k += (int)(hx >> 20) - 0x3ff;
3542     hx = (hx & 0x000fffff) + 0x3fe6a09e;
3543     u.i = (UINT64)hx << 32 | (u.i & 0xffffffff);
3544     x = u.f;
3545
3546     f = x - 1.0;
3547     hfsq = 0.5 * f * f;
3548     s = f / (2.0 + f);
3549     z = s * s;
3550     w = z * z;
3551     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
3552     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
3553     R = t2 + t1;
3554
3555     /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
3556     hi = f - hfsq;
3557     u.f = hi;
3558     u.i &= (UINT64)-1 << 32;
3559     hi = u.f;
3560     lo = f - hi - hfsq + s * (hfsq + R);
3561
3562     /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
3563     val_hi = hi * ivln10hi;
3564     dk = k;
3565     y = dk * log10_2hi;
3566     val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
3567
3568     /*
3569      * Extra precision in for adding y is not strictly needed
3570      * since there is no very large cancellation near x = sqrt(2) or
3571      * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
3572      * with some parallelism and it reduces the error for many args.
3573      */
3574     w = y + val_hi;
3575     val_lo += (y - w) + val_hi;
3576     val_hi = w;
3577
3578     return val_lo + val_hi;
3579 }
3580
3581 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
3582    additional 15 bits precision. IX is the bit representation of x, but
3583    normalized in the subnormal range using the sign bit for the exponent. */
3584 static double pow_log(UINT64 ix, double *tail)
3585 {
3586     static const struct {
3587         double invc, logc, logctail;
3588     } T[] = {
3589         {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48},
3590         {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46},
3591         {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45},
3592         {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49},
3593         {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47},
3594         {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46},
3595         {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50},
3596         {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45},
3597         {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45},
3598         {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45},
3599         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3600         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3601         {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46},
3602         {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46},
3603         {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46},
3604         {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45},
3605         {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47},
3606         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3607         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3608         {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47},
3609         {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45},
3610         {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46},
3611         {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45},
3612         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3613         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3614         {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46},
3615         {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52},
3616         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3617         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3618         {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45},
3619         {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45},
3620         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3621         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3622         {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46},
3623         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3624         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3625         {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45},
3626         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3627         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3628         {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48},
3629         {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45},
3630         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3631         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3632         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3633         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3634         {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45},
3635         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3636         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3637         {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46},
3638         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3639         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3640         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3641         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3642         {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45},
3643         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3644         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3645         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3646         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3647         {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46},
3648         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3649         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3650         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3651         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3652         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3653         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3654         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3655         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3656         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3657         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3658         {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45},
3659         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3660         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3661         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3662         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3663         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3664         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3665         {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46},
3666         {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45},
3667         {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45},
3668         {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47},
3669         {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45},
3670         {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46},
3671         {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46},
3672         {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47},
3673         {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45},
3674         {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45},
3675         {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45},
3676         {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49},
3677         {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45},
3678         {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46},
3679         {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45},
3680         {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45},
3681         {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45},
3682         {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45},
3683         {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45},
3684         {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47},
3685         {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51},
3686         {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45},
3687         {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45},
3688         {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46},
3689         {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45},
3690         {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46},
3691         {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47},
3692         {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47},
3693         {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45},
3694         {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47},
3695         {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45},
3696         {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48},
3697         {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45},
3698         {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51},
3699         {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51},
3700         {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46},
3701         {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48},
3702         {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45},
3703         {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45},
3704         {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45},
3705         {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45},
3706         {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47},
3707         {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45},
3708         {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45},
3709         {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46},
3710         {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46},
3711         {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47},
3712         {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45},
3713         {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45},
3714         {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45},
3715         {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46},
3716         {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47},
3717     };
3718     static const double A[] = {
3719         -0x1p-1,
3720         0x1.555555555556p-2 * -2,
3721         -0x1.0000000000006p-2 * -2,
3722         0x1.999999959554ep-3 * 4,
3723         -0x1.555555529a47ap-3 * 4,
3724         0x1.2495b9b4845e9p-3 * -8,
3725         -0x1.0002b8b263fc3p-3 * -8
3726     };
3727     static const double ln2hi = 0x1.62e42fefa3800p-1,
3728         ln2lo = 0x1.ef35793c76730p-45;
3729
3730     double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
3731     double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2;
3732     UINT64 iz, tmp;
3733     int k, i;
3734
3735     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3736        The range is split into N subintervals.
3737        The ith subinterval contains z and c is near its center. */
3738     tmp = ix - 0x3fe6955500000000ULL;
3739     i = (tmp >> (52 - 7)) % (1 << 7);
3740     k = (INT64)tmp >> 52; /* arithmetic shift */
3741     iz = ix - (tmp & 0xfffULL << 52);
3742     z = *(double*)&iz;
3743     kd = k;
3744
3745     /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
3746     invc = T[i].invc;
3747     logc = T[i].logc;
3748     logctail = T[i].logctail;
3749
3750     /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
3751      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
3752     /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
3753     iz = (iz + (1ULL << 31)) & (-1ULL << 32);
3754     zhi = *(double*)&iz;
3755     zlo = z - zhi;
3756     rhi = zhi * invc - 1.0;
3757     rlo = zlo * invc;
3758     r = rhi + rlo;
3759
3760     /* k*Ln2 + log(c) + r. */
3761     t1 = kd * ln2hi + logc;
3762     t2 = t1 + r;
3763     lo1 = kd * ln2lo + logctail;
3764     lo2 = t1 - t2 + r;
3765
3766     /* Evaluation is optimized assuming superscalar pipelined execution. */
3767     ar = A[0] * r; /* A[0] = -0.5. */
3768     ar2 = r * ar;
3769     ar3 = r * ar2;
3770     /* k*Ln2 + log(c) + r + A[0]*r*r. */
3771     arhi = A[0] * rhi;
3772     arhi2 = rhi * arhi;
3773     hi = t2 + arhi2;
3774     lo3 = rlo * (ar + arhi);
3775     lo4 = t2 - hi + arhi2;
3776     /* p = log1p(r) - r - A[0]*r*r. */
3777     p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
3778     lo = lo1 + lo2 + lo3 + lo4 + p;
3779     y = hi + lo;
3780     *tail = hi - y + lo;
3781     return y;
3782 }
3783
3784 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
3785    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
3786 static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias)
3787 {
3788     static const double C[] = {
3789         0x1.ffffffffffdbdp-2,
3790         0x1.555555555543cp-3,
3791         0x1.55555cf172b91p-5,
3792         0x1.1111167a4d017p-7
3793     };
3794     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
3795         negln2hiN = -0x1.62e42fefa0000p-8,
3796         negln2loN = -0x1.cf79abc9e3b3ap-47;
3797
3798     UINT32 abstop;
3799     UINT64 ki, idx, top, sbits;
3800     double kd, z, r, r2, scale, tail, tmp;
3801
3802     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
3803     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
3804         if (abstop - 0x3c9 >= 0x80000000) {
3805             /* Avoid spurious underflow for tiny x. */
3806             /* Note: 0 is common input. */
3807             double one = 1.0 + x;
3808             return sign_bias ? -one : one;
3809         }
3810         if (abstop >= 0x409) {
3811             /* Note: inf and nan are already handled. */
3812             if (*(UINT64*)&x >> 63)
3813                 return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN);
3814             return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX);
3815         }
3816         /* Large x is special cased below. */
3817         abstop = 0;
3818     }
3819
3820     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3821     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3822     z = invln2N * x;
3823     kd = __round(z);
3824     ki = (INT64)kd;
3825     r = x + kd * negln2hiN + kd * negln2loN;
3826     /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
3827     r += xtail;
3828     /* 2^(k/N) ~= scale * (1 + tail). */
3829     idx = 2 * (ki % (1 << 7));
3830     top = (ki + sign_bias) << (52 - 7);
3831     tail = *(double*)&exp_T[idx];
3832     /* This is only a valid scale when -1023*N < k < 1024*N. */
3833     sbits = exp_T[idx + 1] + top;
3834     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3835     /* Evaluation is optimized assuming superscalar pipelined execution. */
3836     r2 = r * r;
3837     /* Without fma the worst case error is 0.25/N ulp larger. */
3838     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3839     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3840     if (abstop == 0) {
3841         /* Handle cases that may overflow or underflow when computing the result that
3842            is scale*(1+TMP) without intermediate rounding. The bit representation of
3843            scale is in SBITS, however it has a computed exponent that may have
3844            overflown into the sign bit so that needs to be adjusted before using it as
3845            a double. (int32_t)KI is the k used in the argument reduction and exponent
3846            adjustment of scale, positive k here means the result may overflow and
3847            negative k means the result may underflow. */
3848         double scale, y;
3849
3850         if ((ki & 0x80000000) == 0) {
3851             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3852             sbits -= 1009ull << 52;
3853             scale = *(double*)&sbits;
3854             y = 0x1p1009 * (scale + scale * tmp);
3855             if (isinf(y))
3856                 return math_error(_OVERFLOW, "pow", argx, argy, y);
3857             return y;
3858         }
3859         /* k < 0, need special care in the subnormal range. */
3860         sbits += 1022ull << 52;
3861         /* Note: sbits is signed scale. */
3862         scale = *(double*)&sbits;
3863         y = scale + scale * tmp;
3864         if (fabs(y) < 1.0) {
3865             /* Round y to the right precision before scaling it into the subnormal
3866                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3867                E is the worst-case ulp error outside the subnormal range. So this
3868                is only useful if the goal is better than 1 ulp worst-case error. */
3869             double hi, lo, one = 1.0;
3870             if (y < 0.0)
3871                 one = -1.0;
3872             lo = scale - y + scale * tmp;
3873             hi = one + y;
3874             lo = one - hi + y + lo;
3875             y = hi + lo - one;
3876             /* Fix the sign of 0. */
3877             if (y == 0.0) {
3878                 sbits &= 0x8000000000000000ULL;
3879                 y = *(double*)&sbits;
3880             }
3881             /* The underflow exception needs to be signaled explicitly. */
3882             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3883             y = 0x1p-1022 * y;
3884             return math_error(_UNDERFLOW, "pow", argx, argy, y);
3885         }
3886         y = 0x1p-1022 * y;
3887         return y;
3888     }
3889     scale = *(double*)&sbits;
3890     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3891        is no spurious underflow here even without fma. */
3892     return scale + scale * tmp;
3893 }
3894
3895 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
3896    the bit representation of a non-zero finite floating-point value. */
3897 static inline int pow_checkint(UINT64 iy)
3898 {
3899     int e = iy >> 52 & 0x7ff;
3900     if (e < 0x3ff)
3901         return 0;
3902     if (e > 0x3ff + 52)
3903         return 2;
3904     if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
3905         return 0;
3906     if (iy & (1ULL << (0x3ff + 52 - e)))
3907         return 1;
3908     return 2;
3909 }
3910
3911 /*********************************************************************
3912  *              pow (MSVCRT.@)
3913  *
3914  * Copied from musl: src/math/pow.c
3915  */
3916 double CDECL pow( double x, double y )
3917 {
3918     UINT32 sign_bias = 0;
3919     UINT64 ix, iy;
3920     UINT32 topx, topy;
3921     double lo, hi, ehi, elo, yhi, ylo, lhi, llo;
3922
3923     ix = *(UINT64*)&x;
3924     iy = *(UINT64*)&y;
3925     topx = ix >> 52;
3926     topy = iy >> 52;
3927     if (topx - 0x001 >= 0x7ff - 0x001 ||
3928             (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3929         /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
3930            and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
3931         /* Special cases: (x < 0x1p-126 or inf or nan) or
3932            (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
3933         if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3934             if (2 * iy == 0)
3935                 return 1.0;
3936             if (ix == 0x3ff0000000000000ULL)
3937                 return 1.0;
3938             if (2 * ix > 2 * 0x7ff0000000000000ULL ||
3939                     2 * iy > 2 * 0x7ff0000000000000ULL)
3940                 return x + y;
3941             if (2 * ix == 2 * 0x3ff0000000000000ULL)
3942                 return 1.0;
3943             if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63))
3944                 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
3945             return y * y;
3946         }
3947         if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3948             double x2 = x * x;
3949             if (ix >> 63 && pow_checkint(iy) == 1)
3950                 x2 = -x2;
3951             if (iy & 0x8000000000000000ULL && x2 == 0.0)
3952                 return math_error(_SING, "pow", x, y, 1 / x2);
3953             /* Without the barrier some versions of clang hoist the 1/x2 and
3954                thus division by zero exception can be signaled spuriously. */
3955             return iy >> 63 ? fp_barrier(1 / x2) : x2;
3956         }
3957         /* Here x and y are non-zero finite. */
3958         if (ix >> 63) {
3959             /* Finite x < 0. */
3960             int yint = pow_checkint(iy);
3961             if (yint == 0)
3962                 return math_error(_DOMAIN, "pow", x, y, 0 / (x - x));
3963             if (yint == 1)
3964                 sign_bias = 0x800 << 7;
3965             ix &= 0x7fffffffffffffff;
3966             topx &= 0x7ff;
3967         }
3968         if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3969             /* Note: sign_bias == 0 here because y is not odd. */
3970             if (ix == 0x3ff0000000000000ULL)
3971                 return 1.0;
3972             if ((topy & 0x7ff) < 0x3be) {
3973                 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
3974                 return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y;
3975             }
3976             if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800))
3977                 return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX);
3978             return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN);
3979         }
3980         if (topx == 0) {
3981             /* Normalize subnormal x so exponent becomes negative. */
3982             x *= 0x1p52;
3983             ix = *(UINT64*)&x;
3984             ix &= 0x7fffffffffffffff;
3985             ix -= 52ULL << 52;
3986         }
3987     }
3988
3989     hi = pow_log(ix, &lo);
3990     iy &= -1ULL << 27;
3991     yhi = *(double*)&iy;
3992     ylo = y - yhi;
3993     *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27;
3994     llo = fp_barrier(hi - lhi + lo);
3995     ehi = yhi * lhi;
3996     elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
3997     return pow_exp(x, y, ehi, elo, sign_bias);
3998 }
3999
4000 /*********************************************************************
4001  *              sin (MSVCRT.@)
4002  *
4003  * Copied from musl: src/math/sin.c
4004  */
4005 double CDECL sin( double x )
4006 {
4007     double y[2];
4008     UINT32 ix;
4009     unsigned n;
4010
4011     ix = *(ULONGLONG*)&x >> 32;
4012     ix &= 0x7fffffff;
4013
4014     /* |x| ~< pi/4 */
4015     if (ix <= 0x3fe921fb) {
4016         if (ix < 0x3e500000) { /* |x| < 2**-26 */
4017             /* raise inexact if x != 0 and underflow if subnormal*/
4018             fp_barrier(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
4019             return x;
4020         }
4021         return __sin(x, 0.0, 0);
4022     }
4023
4024     /* sin(Inf or NaN) is NaN */
4025     if (isinf(x))
4026         return math_error(_DOMAIN, "sin", x, 0, x - x);
4027     if (ix >= 0x7ff00000)
4028         return x - x;
4029
4030     /* argument reduction needed */
4031     n = __rem_pio2(x, y);
4032     switch (n&3) {
4033     case 0: return  __sin(y[0], y[1], 1);
4034     case 1: return  __cos(y[0], y[1]);
4035     case 2: return -__sin(y[0], y[1], 1);
4036     default: return -__cos(y[0], y[1]);
4037     }
4038 }
4039
4040 /*********************************************************************
4041  *              sinh (MSVCRT.@)
4042  */
4043 double CDECL sinh( double x )
4044 {
4045     UINT64 ux = *(UINT64*)&x;
4046     UINT64 sign = ux & 0x8000000000000000ULL;
4047     UINT32 w;
4048     double t, h, absx;
4049
4050     h = 0.5;
4051     if (ux >> 63)
4052         h = -h;
4053     /* |x| */
4054     ux &= (UINT64)-1 / 2;
4055     absx = *(double*)&ux;
4056     w = ux >> 32;
4057
4058     /* |x| < log(DBL_MAX) */
4059     if (w < 0x40862e42) {
4060         t = __expm1(absx);
4061         if (w < 0x3ff00000) {
4062             if (w < 0x3ff00000 - (26 << 20))
4063                 return x;
4064             return h * (2 * t - t * t / (t + 1));
4065         }
4066         return h * (t + t / (t + 1));
4067     }
4068
4069     /* |x| > log(DBL_MAX) or nan */
4070     /* note: the result is stored to handle overflow */
4071     if (ux > 0x7ff0000000000000ULL)
4072         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
4073     else
4074         t = __expo2(absx, 2 * h);
4075     return t;
4076 }
4077
4078 static BOOL sqrt_validate( double *x, BOOL update_sw )
4079 {
4080     short c = _dclass(*x);
4081
4082     if (c == FP_ZERO) return FALSE;
4083     if (c == FP_NAN)
4084     {
4085 #ifdef __i386__
4086         if (update_sw)
4087             *x = math_error(_DOMAIN, "sqrt", *x, 0, *x);
4088 #else
4089         /* set signaling bit */
4090         *(ULONGLONG*)x |= 0x8000000000000ULL;
4091 #endif
4092         return FALSE;
4093     }
4094     if (signbit(*x))
4095     {
4096         *x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan(update_sw));
4097         return FALSE;
4098     }
4099     if (c == FP_INFINITE) return FALSE;
4100     return TRUE;
4101 }
4102
4103 #if defined(__x86_64__) || defined(__i386__)
4104 double CDECL sse2_sqrt(double);
4105 __ASM_GLOBAL_FUNC( sse2_sqrt,
4106         "sqrtsd %xmm0, %xmm0\n\t"
4107         "ret" )
4108 #endif
4109
4110 #ifdef __i386__
4111 double CDECL x87_sqrt(double);
4112 __ASM_GLOBAL_FUNC( x87_sqrt,
4113         "fldl 4(%esp)\n\t"
4114         SET_X87_CW(0xc00)
4115         "fsqrt\n\t"
4116         RESET_X87_CW
4117         "ret" )
4118 #endif
4119
4120 /*********************************************************************
4121  *              sqrt (MSVCRT.@)
4122  *
4123  * Copied from musl: src/math/sqrt.c
4124  */
4125 double CDECL sqrt( double x )
4126 {
4127 #ifdef __x86_64__
4128     if (!sqrt_validate(&x, TRUE))
4129         return x;
4130
4131     return sse2_sqrt(x);
4132 #elif defined( __i386__ )
4133     if (!sqrt_validate(&x, TRUE))
4134         return x;
4135
4136     return x87_sqrt(x);
4137 #else
4138     static const double tiny = 1.0e-300;
4139
4140     double z;
4141     int sign = 0x80000000;
4142     int ix0,s0,q,m,t,i;
4143     unsigned int r,t1,s1,ix1,q1;
4144     ULONGLONG ix;
4145
4146     if (!sqrt_validate(&x, TRUE))
4147         return x;
4148
4149     ix = *(ULONGLONG*)&x;
4150     ix0 = ix >> 32;
4151     ix1 = ix;
4152
4153     /* normalize x */
4154     m = ix0 >> 20;
4155     if (m == 0) {  /* subnormal x */
4156         while (ix0 == 0) {
4157             m -= 21;
4158             ix0 |= (ix1 >> 11);
4159             ix1 <<= 21;
4160         }
4161         for (i=0; (ix0 & 0x00100000) == 0; i++)
4162             ix0 <<= 1;
4163         m -= i - 1;
4164         ix0 |= ix1 >> (32 - i);
4165         ix1 <<= i;
4166     }
4167     m -= 1023;    /* unbias exponent */
4168     ix0 = (ix0 & 0x000fffff) | 0x00100000;
4169     if (m & 1) {  /* odd m, double x to make it even */
4170         ix0 += ix0 + ((ix1 & sign) >> 31);
4171         ix1 += ix1;
4172     }
4173     m >>= 1;      /* m = [m/2] */
4174
4175     /* generate sqrt(x) bit by bit */
4176     ix0 += ix0 + ((ix1 & sign) >> 31);
4177     ix1 += ix1;
4178     q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
4179     r = 0x00200000;        /* r = moving bit from right to left */
4180
4181     while (r != 0) {
4182         t = s0 + r;
4183         if (t <= ix0) {
4184             s0   = t + r;
4185             ix0 -= t;
4186             q   += r;
4187         }
4188         ix0 += ix0 + ((ix1 & sign) >> 31);
4189         ix1 += ix1;
4190         r >>= 1;
4191     }
4192
4193     r = sign;
4194     while (r != 0) {
4195         t1 = s1 + r;
4196         t  = s0;
4197         if (t < ix0 || (t == ix0 && t1 <= ix1)) {
4198             s1 = t1 + r;
4199             if ((t1&sign) == sign && (s1 & sign) == 0)
4200                 s0++;
4201             ix0 -= t;
4202             if (ix1 < t1)
4203                 ix0--;
4204             ix1 -= t1;
4205             q1 += r;
4206         }
4207         ix0 += ix0 + ((ix1 & sign) >> 31);
4208         ix1 += ix1;
4209         r >>= 1;
4210     }
4211
4212     /* use floating add to find out rounding direction */
4213     if ((ix0 | ix1) != 0) {
4214         z = 1.0 - tiny; /* raise inexact flag */
4215         if (z >= 1.0) {
4216             z = 1.0 + tiny;
4217             if (q1 == (unsigned int)0xffffffff) {
4218                 q1 = 0;
4219                 q++;
4220             } else if (z > 1.0) {
4221                 if (q1 == (unsigned int)0xfffffffe)
4222                     q++;
4223                 q1 += 2;
4224             } else
4225                 q1 += q1 & 1;
4226         }
4227     }
4228     ix0 = (q >> 1) + 0x3fe00000;
4229     ix1 = q1 >> 1;
4230     if (q & 1)
4231         ix1 |= sign;
4232     ix = ix0 + ((unsigned int)m << 20);
4233     ix <<= 32;
4234     ix |= ix1;
4235     return *(double*)&ix;
4236 #endif
4237 }
4238
4239 /* Copied from musl: src/math/__tan.c */
4240 static double __tan(double x, double y, int odd)
4241 {
4242     static const double T[] = {
4243         3.33333333333334091986e-01,
4244         1.33333333333201242699e-01,
4245         5.39682539762260521377e-02,
4246         2.18694882948595424599e-02,
4247         8.86323982359930005737e-03,
4248         3.59207910759131235356e-03,
4249         1.45620945432529025516e-03,
4250         5.88041240820264096874e-04,
4251         2.46463134818469906812e-04,
4252         7.81794442939557092300e-05,
4253         7.14072491382608190305e-05,
4254         -1.85586374855275456654e-05,
4255         2.59073051863633712884e-05,
4256     };
4257     static const double pio4 = 7.85398163397448278999e-01;
4258     static const double pio4lo = 3.06161699786838301793e-17;
4259
4260     double z, r, v, w, s, a, w0, a0;
4261     UINT32 hx;
4262     int big, sign;
4263
4264     hx = *(ULONGLONG*)&x >> 32;
4265     big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
4266     if (big) {
4267         sign = hx >> 31;
4268         if (sign) {
4269             x = -x;
4270             y = -y;
4271         }
4272         x = (pio4 - x) + (pio4lo - y);
4273         y = 0.0;
4274     }
4275     z = x * x;
4276     w = z * z;
4277     r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
4278     v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
4279     s = z * x;
4280     r = y + z * (s * (r + v) + y) + s * T[0];
4281     w = x + r;
4282     if (big) {
4283         s = 1 - 2 * odd;
4284         v = s - 2.0 * (x + (r - w * w / (w + s)));
4285         return sign ? -v : v;
4286     }
4287     if (!odd)
4288         return w;
4289     /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
4290     w0 = w;
4291     *(LONGLONG*)&w0 = *(LONGLONG*)&w0 & 0xffffffff00000000ULL;
4292     v = r - (w0 - x);       /* w0+v = r+x */
4293     a0 = a = -1.0 / w;
4294     *(LONGLONG*)&a0 = *(LONGLONG*)&a0 & 0xffffffff00000000ULL;
4295     return a0 + a * (1.0 + a0 * w0 + a0 * v);
4296 }
4297
4298 /*********************************************************************
4299  *              tan (MSVCRT.@)
4300  *
4301  * Copied from musl: src/math/tan.c
4302  */
4303 double CDECL tan( double x )
4304 {
4305     double y[2];
4306     UINT32 ix;
4307     unsigned n;
4308
4309     ix = *(ULONGLONG*)&x >> 32;
4310     ix &= 0x7fffffff;
4311
4312     if (ix <= 0x3fe921fb) { /* |x| ~< pi/4 */
4313         if (ix < 0x3e400000) { /* |x| < 2**-27 */
4314             /* raise inexact if x!=0 and underflow if subnormal */
4315             fp_barrier(ix < 0x00100000 ? x / 0x1p120f : x + 0x1p120f);
4316             return x;
4317         }
4318         return __tan(x, 0.0, 0);
4319     }
4320
4321     if (isinf(x))
4322         return math_error(_DOMAIN, "tan", x, 0, x - x);
4323     if (ix >= 0x7ff00000)
4324         return x - x;
4325
4326     n = __rem_pio2(x, y);
4327     return __tan(y[0], y[1], n & 1);
4328 }
4329
4330 /*********************************************************************
4331  *              tanh (MSVCRT.@)
4332  */
4333 double CDECL tanh( double x )
4334 {
4335     UINT64 ui = *(UINT64*)&x;
4336     UINT64 sign = ui & 0x8000000000000000ULL;
4337     UINT32 w;
4338     double t;
4339
4340     /* x = |x| */
4341     ui &= (UINT64)-1 / 2;
4342     x = *(double*)&ui;
4343     w = ui >> 32;
4344
4345     if (w > 0x3fe193ea) {
4346         /* |x| > log(3)/2 ~= 0.5493 or nan */
4347         if (w > 0x40340000) {
4348             if (ui > 0x7ff0000000000000ULL) {
4349                 *(UINT64*)&x = ui | sign | 0x0008000000000000ULL;
4350 #if _MSVCR_VER < 140
4351                 return math_error(_DOMAIN, "tanh", x, 0, x);
4352 #else
4353                 return x;
4354 #endif
4355             }
4356             /* |x| > 20 */
4357             /* note: this branch avoids raising overflow */
4358             fp_barrier(x + 0x1p120f);
4359             t = 1 - 0 / x;
4360         } else {
4361             t = __expm1(2 * x);
4362             t = 1 - 2 / (t + 2);
4363         }
4364     } else if (w > 0x3fd058ae) {
4365         /* |x| > log(5/3)/2 ~= 0.2554 */
4366         t = __expm1(2 * x);
4367         t = t / (t + 2);
4368     } else if (w >= 0x00100000) {
4369         /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
4370         t = __expm1(-2 * x);
4371         t = -t / (t + 2);
4372     } else {
4373         /* |x| is subnormal */
4374         /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
4375         fp_barrier((float)x);
4376         t = x;
4377     }
4378     return sign ? -t : t;
4379 }
4380
4381
4382 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
4383
4384 #define CREATE_FPU_FUNC1(name, call) \
4385     __ASM_GLOBAL_FUNC(name, \
4386             "pushl   %ebp\n\t" \
4387             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4388             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4389             "movl    %esp, %ebp\n\t" \
4390             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4391             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4392             "fstpl   (%esp)\n\t"    /* store function argument */ \
4393             "fwait\n\t" \
4394             "movl    $1, %ecx\n\t"  /* empty FPU stack */ \
4395             "1:\n\t" \
4396             "fxam\n\t" \
4397             "fstsw   %ax\n\t" \
4398             "and     $0x4500, %ax\n\t" \
4399             "cmp     $0x4100, %ax\n\t" \
4400             "je      2f\n\t" \
4401             "fstpl    (%esp,%ecx,8)\n\t" \
4402             "fwait\n\t" \
4403             "incl    %ecx\n\t" \
4404             "jmp     1b\n\t" \
4405             "2:\n\t" \
4406             "movl    %ecx, -4(%ebp)\n\t" \
4407             "call    " __ASM_NAME( #call ) "\n\t" \
4408             "movl    -4(%ebp), %ecx\n\t" \
4409             "fstpl   (%esp)\n\t"    /* save result */ \
4410             "3:\n\t"                /* restore FPU stack */ \
4411             "decl    %ecx\n\t" \
4412             "fldl    (%esp,%ecx,8)\n\t" \
4413             "cmpl    $0, %ecx\n\t" \
4414             "jne     3b\n\t" \
4415             "leave\n\t" \
4416             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4417             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4418             "ret")
4419
4420 #define CREATE_FPU_FUNC2(name, call) \
4421     __ASM_GLOBAL_FUNC(name, \
4422             "pushl   %ebp\n\t" \
4423             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4424             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4425             "movl    %esp, %ebp\n\t" \
4426             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4427             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4428             "fstpl   8(%esp)\n\t"   /* store function argument */ \
4429             "fwait\n\t" \
4430             "fstpl   (%esp)\n\t" \
4431             "fwait\n\t" \
4432             "movl    $2, %ecx\n\t"  /* empty FPU stack */ \
4433             "1:\n\t" \
4434             "fxam\n\t" \
4435             "fstsw   %ax\n\t" \
4436             "and     $0x4500, %ax\n\t" \
4437             "cmp     $0x4100, %ax\n\t" \
4438             "je      2f\n\t" \
4439             "fstpl    (%esp,%ecx,8)\n\t" \
4440             "fwait\n\t" \
4441             "incl    %ecx\n\t" \
4442             "jmp     1b\n\t" \
4443             "2:\n\t" \
4444             "movl    %ecx, -4(%ebp)\n\t" \
4445             "call    " __ASM_NAME( #call ) "\n\t" \
4446             "movl    -4(%ebp), %ecx\n\t" \
4447             "fstpl   8(%esp)\n\t"   /* save result */ \
4448             "3:\n\t"                /* restore FPU stack */ \
4449             "decl    %ecx\n\t" \
4450             "fldl    (%esp,%ecx,8)\n\t" \
4451             "cmpl    $1, %ecx\n\t" \
4452             "jne     3b\n\t" \
4453             "leave\n\t" \
4454             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4455             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4456             "ret")
4457
4458 CREATE_FPU_FUNC1(_CIacos, acos)
4459 CREATE_FPU_FUNC1(_CIasin, asin)
4460 CREATE_FPU_FUNC1(_CIatan, atan)
4461 CREATE_FPU_FUNC2(_CIatan2, atan2)
4462 CREATE_FPU_FUNC1(_CIcos, cos)
4463 CREATE_FPU_FUNC1(_CIcosh, cosh)
4464 CREATE_FPU_FUNC1(_CIexp, exp)
4465 CREATE_FPU_FUNC2(_CIfmod, fmod)
4466 CREATE_FPU_FUNC1(_CIlog, log)
4467 CREATE_FPU_FUNC1(_CIlog10, log10)
4468 CREATE_FPU_FUNC2(_CIpow, pow)
4469 CREATE_FPU_FUNC1(_CIsin, sin)
4470 CREATE_FPU_FUNC1(_CIsinh, sinh)
4471 CREATE_FPU_FUNC1(_CIsqrt, sqrt)
4472 CREATE_FPU_FUNC1(_CItan, tan)
4473 CREATE_FPU_FUNC1(_CItanh, tanh)
4474
4475 __ASM_GLOBAL_FUNC(_ftol,
4476         "pushl   %ebp\n\t"
4477         __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
4478         __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
4479         "movl    %esp, %ebp\n\t"
4480         __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
4481         "subl    $12, %esp\n\t"     /* sizeof(LONGLONG) + 2*sizeof(WORD) */
4482         "fnstcw  (%esp)\n\t"
4483         "mov     (%esp), %ax\n\t"
4484         "or      $0xc00, %ax\n\t"
4485         "mov     %ax, 2(%esp)\n\t"
4486         "fldcw   2(%esp)\n\t"
4487         "fistpq  4(%esp)\n\t"
4488         "fldcw   (%esp)\n\t"
4489         "movl    4(%esp), %eax\n\t"
4490         "movl    8(%esp), %edx\n\t"
4491         "leave\n\t"
4492         __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
4493         __ASM_CFI(".cfi_same_value %ebp\n\t")
4494         "ret")
4495
4496 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */
4497
4498 /*********************************************************************
4499  *              _fpclass (MSVCRT.@)
4500  */
4501 int CDECL _fpclass(double num)
4502 {
4503     union { double f; UINT64 i; } u = { num };
4504     int e = u.i >> 52 & 0x7ff;
4505     int s = u.i >> 63;
4506
4507     switch (e)
4508     {
4509     case 0:
4510         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
4511         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
4512     case 0x7ff:
4513         if (u.i << 12) return ((u.i >> 51) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
4514         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
4515     default:
4516         return s ? _FPCLASS_NN : _FPCLASS_PN;
4517     }
4518 }
4519
4520 /*********************************************************************
4521  *              _rotl (MSVCRT.@)
4522  */
4523 unsigned int CDECL MSVCRT__rotl(unsigned int num, int shift)
4524 {
4525   shift &= 31;
4526   return (num << shift) | (num >> (32-shift));
4527 }
4528
4529 /*********************************************************************
4530  *              _lrotl (MSVCRT.@)
4531  */
4532 __msvcrt_ulong CDECL MSVCRT__lrotl(__msvcrt_ulong num, int shift)
4533 {
4534   shift &= 0x1f;
4535   return (num << shift) | (num >> (32-shift));
4536 }
4537
4538 /*********************************************************************
4539  *              _lrotr (MSVCRT.@)
4540  */
4541 __msvcrt_ulong CDECL MSVCRT__lrotr(__msvcrt_ulong num, int shift)
4542 {
4543   shift &= 0x1f;
4544   return (num >> shift) | (num << (32-shift));
4545 }
4546
4547 /*********************************************************************
4548  *              _rotr (MSVCRT.@)
4549  */
4550 unsigned int CDECL MSVCRT__rotr(unsigned int num, int shift)
4551 {
4552     shift &= 0x1f;
4553     return (num >> shift) | (num << (32-shift));
4554 }
4555
4556 /*********************************************************************
4557  *              _rotl64 (MSVCRT.@)
4558  */
4559 unsigned __int64 CDECL MSVCRT__rotl64(unsigned __int64 num, int shift)
4560 {
4561   shift &= 63;
4562   return (num << shift) | (num >> (64-shift));
4563 }
4564
4565 /*********************************************************************
4566  *              _rotr64 (MSVCRT.@)
4567  */
4568 unsigned __int64 CDECL MSVCRT__rotr64(unsigned __int64 num, int shift)
4569 {
4570     shift &= 63;
4571     return (num >> shift) | (num << (64-shift));
4572 }
4573
4574 /*********************************************************************
4575  *              abs (MSVCRT.@)
4576  */
4577 int CDECL abs( int n )
4578 {
4579     return n >= 0 ? n : -n;
4580 }
4581
4582 /*********************************************************************
4583  *              labs (MSVCRT.@)
4584  */
4585 __msvcrt_long CDECL labs( __msvcrt_long n )
4586 {
4587     return n >= 0 ? n : -n;
4588 }
4589
4590 #if _MSVCR_VER>=100
4591 /*********************************************************************
4592  *              llabs (MSVCR100.@)
4593  */
4594 __int64 CDECL llabs( __int64 n )
4595 {
4596     return n >= 0 ? n : -n;
4597 }
4598 #endif
4599
4600 #if _MSVCR_VER>=120
4601 /*********************************************************************
4602  *              imaxabs (MSVCR120.@)
4603  */
4604 intmax_t CDECL imaxabs( intmax_t n )
4605 {
4606     return n >= 0 ? n : -n;
4607 }
4608 #endif
4609
4610 /*********************************************************************
4611  *              _abs64 (MSVCRT.@)
4612  */
4613 __int64 CDECL _abs64( __int64 n )
4614 {
4615     return n >= 0 ? n : -n;
4616 }
4617
4618 /* Copied from musl: src/math/ilogb.c */
4619 static int __ilogb(double x)
4620 {
4621     union { double f; UINT64 i; } u = { x };
4622     int e = u.i >> 52 & 0x7ff;
4623
4624     if (!e)
4625     {
4626         u.i <<= 12;
4627         if (u.i == 0) return FP_ILOGB0;
4628         /* subnormal x */
4629         for (e = -0x3ff; u.i >> 63 == 0; e--, u.i <<= 1);
4630         return e;
4631     }
4632     if (e == 0x7ff) return u.i << 12 ? FP_ILOGBNAN : INT_MAX;
4633     return e - 0x3ff;
4634 }
4635
4636 /*********************************************************************
4637  *              _logb (MSVCRT.@)
4638  *
4639  * Copied from musl: src/math/logb.c
4640  */
4641 double CDECL _logb(double x)
4642 {
4643     if (!isfinite(x))
4644         return x * x;
4645     if (x == 0)
4646         return math_error(_SING, "_logb", x, 0, -1 / (x * x));
4647     return __ilogb(x);
4648 }
4649
4650 static void sq(double *hi, double *lo, double x)
4651 {
4652     double xh, xl, xc;
4653
4654     xc = x * (0x1p27 + 1);
4655     xh = x - xc + xc;
4656     xl = x - xh;
4657     *hi = x * x;
4658     *lo = xh * xh - *hi + 2 * xh * xl + xl * xl;
4659 }
4660
4661 /*********************************************************************
4662  *              _hypot (MSVCRT.@)
4663  *
4664  * Copied from musl: src/math/hypot.c
4665  */
4666 double CDECL _hypot(double x, double y)
4667 {
4668     UINT64 ux = *(UINT64*)&x, uy = *(UINT64*)&y, ut;
4669     double hx, lx, hy, ly, z;
4670     int ex, ey;
4671
4672     /* arrange |x| >= |y| */
4673     ux &= -1ULL >> 1;
4674     uy &= -1ULL >> 1;
4675     if (ux < uy) {
4676         ut = ux;
4677         ux = uy;
4678         uy = ut;
4679     }
4680
4681     /* special cases */
4682     ex = ux >> 52;
4683     ey = uy >> 52;
4684     x = *(double*)&ux;
4685     y = *(double*)&uy;
4686     /* note: hypot(inf,nan) == inf */
4687     if (ey == 0x7ff)
4688         return y;
4689     if (ex == 0x7ff || uy == 0)
4690         return x;
4691     /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
4692     /* 64 difference is enough for ld80 double_t */
4693     if (ex - ey > 64)
4694         return x + y;
4695
4696     /* precise sqrt argument in nearest rounding mode without overflow */
4697     /* xh*xh must not overflow and xl*xl must not underflow in sq */
4698     z = 1;
4699     if (ex > 0x3ff + 510) {
4700         z = 0x1p700;
4701         x *= 0x1p-700;
4702         y *= 0x1p-700;
4703     } else if (ey < 0x3ff - 450) {
4704         z = 0x1p-700;
4705         x *= 0x1p700;
4706         y *= 0x1p700;
4707     }
4708     sq(&hx, &lx, x);
4709     sq(&hy, &ly, y);
4710     return z * sqrt(ly + lx + hy + hx);
4711 }
4712
4713 /*********************************************************************
4714  *      _hypotf (MSVCRT.@)
4715  *
4716  * Copied from musl: src/math/hypotf.c
4717  */
4718 float CDECL _hypotf(float x, float y)
4719 {
4720     UINT32 ux = *(UINT32*)&x, uy = *(UINT32*)&y, ut;
4721     float z;
4722
4723     ux &= -1U >> 1;
4724     uy &= -1U >> 1;
4725     if (ux < uy) {
4726         ut = ux;
4727         ux = uy;
4728         uy = ut;
4729     }
4730
4731     x = *(float*)&ux;
4732     y = *(float*)&uy;
4733     if (uy == 0xff << 23)
4734         return y;
4735     if (ux >= 0xff << 23 || uy == 0 || ux - uy >= 25 << 23)
4736         return x + y;
4737
4738     z = 1;
4739     if (ux >= (0x7f + 60) << 23) {
4740         z = 0x1p90f;
4741         x *= 0x1p-90f;
4742         y *= 0x1p-90f;
4743     } else if (uy < (0x7f - 60) << 23) {
4744         z = 0x1p-90f;
4745         x *= 0x1p90f;
4746         y *= 0x1p90f;
4747     }
4748     return z * sqrtf((double)x * x + (double)y * y);
4749 }
4750
4751 /*********************************************************************
4752  *              ceil (MSVCRT.@)
4753  *
4754  * Based on musl: src/math/ceilf.c
4755  */
4756 double CDECL ceil( double x )
4757 {
4758     union {double f; UINT64 i;} u = {x};
4759     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
4760     UINT64 m;
4761
4762     if (e >= 52)
4763         return x;
4764     if (e >= 0) {
4765         m = 0x000fffffffffffffULL >> e;
4766         if ((u.i & m) == 0)
4767             return x;
4768         if (u.i >> 63 == 0)
4769             u.i += m;
4770         u.i &= ~m;
4771     } else {
4772         if (u.i >> 63)
4773             return -0.0;
4774         else if (u.i << 1)
4775             return 1.0;
4776     }
4777     return u.f;
4778 }
4779
4780 /*********************************************************************
4781  *              floor (MSVCRT.@)
4782  *
4783  * Based on musl: src/math/floorf.c
4784  */
4785 double CDECL floor( double x )
4786 {
4787     union {double f; UINT64 i;} u = {x};
4788     int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff;
4789     UINT64 m;
4790
4791     if (e >= 52)
4792         return x;
4793     if (e >= 0) {
4794         m = 0x000fffffffffffffULL >> e;
4795         if ((u.i & m) == 0)
4796             return x;
4797         if (u.i >> 63)
4798             u.i += m;
4799         u.i &= ~m;
4800     } else {
4801         if (u.i >> 63 == 0)
4802             return 0;
4803         else if (u.i << 1)
4804             return -1;
4805     }
4806     return u.f;
4807 }
4808
4809 /*********************************************************************
4810  *      fma (MSVCRT.@)
4811  *
4812  * Copied from musl: src/math/fma.c
4813  */
4814 struct fma_num
4815 {
4816     UINT64 m;
4817     int e;
4818     int sign;
4819 };
4820
4821 static struct fma_num normalize(double x)
4822 {
4823     UINT64 ix = *(UINT64*)&x;
4824     int e = ix >> 52;
4825     int sign = e & 0x800;
4826     struct fma_num ret;
4827
4828     e &= 0x7ff;
4829     if (!e) {
4830         x *= 0x1p63;
4831         ix = *(UINT64*)&x;
4832         e = ix >> 52 & 0x7ff;
4833         e = e ? e - 63 : 0x800;
4834     }
4835     ix &= (1ull << 52) - 1;
4836     ix |= 1ull << 52;
4837     ix <<= 1;
4838     e -= 0x3ff + 52 + 1;
4839
4840     ret.m = ix;
4841     ret.e = e;
4842     ret.sign = sign;
4843     return ret;
4844 }
4845
4846 static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y)
4847 {
4848     UINT64 t1, t2, t3;
4849     UINT64 xlo = (UINT32)x, xhi = x >> 32;
4850     UINT64 ylo = (UINT32)y, yhi = y >> 32;
4851
4852     t1 = xlo * ylo;
4853     t2 = xlo * yhi + xhi * ylo;
4854     t3 = xhi * yhi;
4855     *lo = t1 + (t2 << 32);
4856     *hi = t3 + (t2 >> 32) + (t1 > *lo);
4857 }
4858
4859 double CDECL fma( double x, double y, double z )
4860 {
4861     int e, d, sign, samesign, nonzero;
4862     UINT64 rhi, rlo, zhi, zlo;
4863     struct fma_num nx, ny, nz;
4864     double r;
4865     INT64 i;
4866
4867     /* normalize so top 10bits and last bit are 0 */
4868     nx = normalize(x);
4869     ny = normalize(y);
4870     nz = normalize(z);
4871
4872     if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) {
4873         r = x * y + z;
4874         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM;
4875         return r;
4876     }
4877     if (nz.e >= 0x7ff - 0x3ff - 52 - 1) {
4878         if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */
4879             r = x * y + z;
4880             if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM;
4881             return r;
4882         }
4883         return z;
4884     }
4885
4886     /* mul: r = x*y */
4887     mul(&rhi, &rlo, nx.m, ny.m);
4888     /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
4889
4890     /* align exponents */
4891     e = nx.e + ny.e;
4892     d = nz.e - e;
4893     /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
4894     if (d > 0) {
4895         if (d < 64) {
4896             zlo = nz.m << d;
4897             zhi = nz.m >> (64 - d);
4898         } else {
4899             zlo = 0;
4900             zhi = nz.m;
4901             e = nz.e - 64;
4902             d -= 64;
4903             if (d < 64 && d) {
4904                 rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d));
4905                 rhi = rhi >> d;
4906             } else if (d) {
4907                 rlo = 1;
4908                 rhi = 0;
4909             }
4910         }
4911     } else {
4912         zhi = 0;
4913         d = -d;
4914         if (d == 0) {
4915             zlo = nz.m;
4916         } else if (d < 64) {
4917             zlo = nz.m >> d | !!(nz.m << (64 - d));
4918         } else {
4919             zlo = 1;
4920         }
4921     }
4922
4923     /* add */
4924     sign = nx.sign ^ ny.sign;
4925     samesign = !(sign ^ nz.sign);
4926     nonzero = 1;
4927     if (samesign) {
4928         /* r += z */
4929         rlo += zlo;
4930         rhi += zhi + (rlo < zlo);
4931     } else {
4932         /* r -= z */
4933         UINT64 t = rlo;
4934         rlo -= zlo;
4935         rhi = rhi - zhi - (t < rlo);
4936         if (rhi >> 63) {
4937             rlo = -rlo;
4938             rhi = -rhi - !!rlo;
4939             sign = !sign;
4940         }
4941         nonzero = !!rhi;
4942     }
4943
4944     /* set rhi to top 63bit of the result (last bit is sticky) */
4945     if (nonzero) {
4946         e += 64;
4947         if (rhi >> 32) {
4948             BitScanReverse((DWORD*)&d, rhi >> 32);
4949             d = 31 - d - 1;
4950         } else {
4951             BitScanReverse((DWORD*)&d, rhi);
4952             d = 63 - d - 1;
4953         }
4954         /* note: d > 0 */
4955         rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d);
4956     } else if (rlo) {
4957         if (rlo >> 32) {
4958             BitScanReverse((DWORD*)&d, rlo >> 32);
4959             d = 31 - d - 1;
4960         } else {
4961             BitScanReverse((DWORD*)&d, rlo);
4962             d = 63 - d - 1;
4963         }
4964         if (d < 0)
4965             rhi = rlo >> 1 | (rlo & 1);
4966         else
4967             rhi = rlo << d;
4968     } else {
4969         /* exact +-0 */
4970         return x * y + z;
4971     }
4972     e -= d;
4973
4974     /* convert to double */
4975     i = rhi; /* i is in [1<<62,(1<<63)-1] */
4976     if (sign)
4977         i = -i;
4978     r = i; /* |r| is in [0x1p62,0x1p63] */
4979
4980     if (e < -1022 - 62) {
4981         /* result is subnormal before rounding */
4982         if (e == -1022 - 63) {
4983             double c = 0x1p63;
4984             if (sign)
4985                 c = -c;
4986             if (r == c) {
4987                 /* min normal after rounding, underflow depends
4988                    on arch behaviour which can be imitated by
4989                    a double to float conversion */
4990                 float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r;
4991                 return DBL_MIN / FLT_MIN * fltmin;
4992             }
4993             /* one bit is lost when scaled, add another top bit to
4994                only round once at conversion if it is inexact */
4995             if (rhi << 53) {
4996                 double tiny;
4997
4998                 i = rhi >> 1 | (rhi & 1) | 1ull << 62;
4999                 if (sign)
5000                     i = -i;
5001                 r = i;
5002                 r = 2 * r - c; /* remove top bit */
5003
5004                 /* raise underflow portably, such that it
5005                    cannot be optimized away */
5006                 tiny = DBL_MIN / FLT_MIN * r;
5007                 r += (double)(tiny * tiny) * (r - r);
5008             }
5009         } else {
5010             /* only round once when scaled */
5011             d = 10;
5012             i = (rhi >> d | !!(rhi << (64 - d))) << d;
5013             if (sign)
5014                 i = -i;
5015             r = i;
5016         }
5017     }
5018     return __scalbn(r, e);
5019 }
5020
5021 /*********************************************************************
5022  *      fmaf (MSVCRT.@)
5023  *
5024  * Copied from musl: src/math/fmaf.c
5025  */
5026 float CDECL fmaf( float x, float y, float z )
5027 {
5028     union { double f; UINT64 i; } u;
5029     double xy, adjust;
5030     int e;
5031
5032     xy = (double)x * y;
5033     u.f = xy + z;
5034     e = u.i>>52 & 0x7ff;
5035     /* Common case: The double precision result is fine. */
5036     if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
5037             e == 0x7ff || /* NaN */
5038             (u.f - xy == z && u.f - z == xy) || /* exact */
5039             (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */
5040     {
5041         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM;
5042
5043         /* underflow may not be raised correctly, example:
5044            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */
5045         if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT)
5046             fp_barrierf((float)u.f * (float)u.f);
5047         return u.f;
5048     }
5049
5050     /*
5051      * If result is inexact, and exactly halfway between two float values,
5052      * we need to adjust the low-order bit in the direction of the error.
5053      */
5054     _controlfp(_RC_CHOP, _MCW_RC);
5055     adjust = fp_barrier(xy + z);
5056     _controlfp(_RC_NEAR, _MCW_RC);
5057     if (u.f == adjust)
5058         u.i++;
5059     return u.f;
5060 }
5061
5062 /*********************************************************************
5063  *              fabs (MSVCRT.@)
5064  *
5065  * Copied from musl: src/math/fabsf.c
5066  */
5067 double CDECL fabs( double x )
5068 {
5069     union { double f; UINT64 i; } u = { x };
5070     u.i &= ~0ull >> 1;
5071     return u.f;
5072 }
5073
5074 /*********************************************************************
5075  *              frexp (MSVCRT.@)
5076  *
5077  * Copied from musl: src/math/frexp.c
5078  */
5079 double CDECL frexp( double x, int *e )
5080 {
5081     UINT64 ux = *(UINT64*)&x;
5082     int ee = ux >> 52 & 0x7ff;
5083
5084     if (!ee) {
5085         if (x) {
5086             x = frexp(x * 0x1p64, e);
5087             *e -= 64;
5088         } else *e = 0;
5089         return x;
5090     } else if (ee == 0x7ff) {
5091         return x;
5092     }
5093
5094     *e = ee - 0x3fe;
5095     ux &= 0x800fffffffffffffull;
5096     ux |= 0x3fe0000000000000ull;
5097     return *(double*)&ux;
5098 }
5099
5100 /*********************************************************************
5101  *              modf (MSVCRT.@)
5102  *
5103  * Copied from musl: src/math/modf.c
5104  */
5105 double CDECL modf( double x, double *iptr )
5106 {
5107     union {double f; UINT64 i;} u = {x};
5108     UINT64 mask;
5109     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
5110
5111     /* no fractional part */
5112     if (e >= 52) {
5113         *iptr = x;
5114         if (e == 0x400 && u.i << 12 != 0) /* nan */
5115             return x;
5116         u.i &= 1ULL << 63;
5117         return u.f;
5118     }
5119
5120     /* no integral part*/
5121     if (e < 0) {
5122         u.i &= 1ULL << 63;
5123         *iptr = u.f;
5124         return x;
5125     }
5126
5127     mask = -1ULL >> 12 >> e;
5128     if ((u.i & mask) == 0) {
5129         *iptr = x;
5130         u.i &= 1ULL << 63;
5131         return u.f;
5132     }
5133     u.i &= ~mask;
5134     *iptr = u.f;
5135     return x - u.f;
5136 }
5137
5138 #if defined(__i386__) || defined(__x86_64__)
5139 static BOOL _setfp_sse( unsigned int *cw, unsigned int cw_mask,
5140         unsigned int *sw, unsigned int sw_mask )
5141 {
5142 #if defined(__GNUC__) || defined(__clang__)
5143     unsigned long old_fpword, fpword;
5144     unsigned int flags;
5145
5146     __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5147     old_fpword = fpword;
5148
5149     cw_mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5150     sw_mask &= _MCW_EM;
5151
5152     if (sw)
5153     {
5154         flags = 0;
5155         if (fpword & 0x1) flags |= _SW_INVALID;
5156         if (fpword & 0x2) flags |= _SW_DENORMAL;
5157         if (fpword & 0x4) flags |= _SW_ZERODIVIDE;
5158         if (fpword & 0x8) flags |= _SW_OVERFLOW;
5159         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5160         if (fpword & 0x20) flags |= _SW_INEXACT;
5161
5162         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5163         TRACE("sse2 update sw %08x to %08x\n", flags, *sw);
5164         fpword &= ~0x3f;
5165         if (*sw & _SW_INVALID) fpword |= 0x1;
5166         if (*sw & _SW_DENORMAL) fpword |= 0x2;
5167         if (*sw & _SW_ZERODIVIDE) fpword |= 0x4;
5168         if (*sw & _SW_OVERFLOW) fpword |= 0x8;
5169         if (*sw & _SW_UNDERFLOW) fpword |= 0x10;
5170         if (*sw & _SW_INEXACT) fpword |= 0x20;
5171         *sw = flags;
5172     }
5173
5174     if (cw)
5175     {
5176         flags = 0;
5177         if (fpword & 0x80) flags |= _EM_INVALID;
5178         if (fpword & 0x100) flags |= _EM_DENORMAL;
5179         if (fpword & 0x200) flags |= _EM_ZERODIVIDE;
5180         if (fpword & 0x400) flags |= _EM_OVERFLOW;
5181         if (fpword & 0x800) flags |= _EM_UNDERFLOW;
5182         if (fpword & 0x1000) flags |= _EM_INEXACT;
5183         switch (fpword & 0x6000)
5184         {
5185         case 0x6000: flags |= _RC_UP|_RC_DOWN; break;
5186         case 0x4000: flags |= _RC_UP; break;
5187         case 0x2000: flags |= _RC_DOWN; break;
5188         }
5189         switch (fpword & 0x8040)
5190         {
5191         case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5192         case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5193         case 0x8040: flags |= _DN_FLUSH; break;
5194         }
5195
5196         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5197         TRACE("sse2 update cw %08x to %08x\n", flags, *cw);
5198         fpword &= ~0xffc0;
5199         if (*cw & _EM_INVALID) fpword |= 0x80;
5200         if (*cw & _EM_DENORMAL) fpword |= 0x100;
5201         if (*cw & _EM_ZERODIVIDE) fpword |= 0x200;
5202         if (*cw & _EM_OVERFLOW) fpword |= 0x400;
5203         if (*cw & _EM_UNDERFLOW) fpword |= 0x800;
5204         if (*cw & _EM_INEXACT) fpword |= 0x1000;
5205         switch (*cw & _MCW_RC)
5206         {
5207         case _RC_UP|_RC_DOWN: fpword |= 0x6000; break;
5208         case _RC_UP: fpword |= 0x4000; break;
5209         case _RC_DOWN: fpword |= 0x2000; break;
5210         }
5211         switch (*cw & _MCW_DN)
5212         {
5213         case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5214         case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5215         case _DN_FLUSH: fpword |= 0x8040; break;
5216         }
5217
5218         /* clear status word if anything changes */
5219         if (fpword != old_fpword && !sw)
5220         {
5221             TRACE("sse2 clear status word\n");
5222             fpword &= ~0x3f;
5223         }
5224     }
5225
5226     if (fpword != old_fpword)
5227         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5228     return TRUE;
5229 #else
5230     FIXME("not implemented\n");
5231     if (cw) *cw = 0;
5232     if (sw) *sw = 0;
5233     return FALSE;
5234 #endif
5235 }
5236 #endif
5237
5238 /**********************************************************************
5239  *              _statusfp2 (MSVCR80.@)
5240  */
5241 #if defined(__i386__)
5242 void CDECL _statusfp2( unsigned int *x86_sw, unsigned int *sse2_sw )
5243 {
5244 #if defined(__GNUC__) || defined(__clang__)
5245     unsigned int flags;
5246     unsigned long fpword;
5247
5248     if (x86_sw)
5249     {
5250         __asm__ __volatile__( "fstsw %0" : "=m" (fpword) );
5251         flags = 0;
5252         if (fpword & 0x1)  flags |= _SW_INVALID;
5253         if (fpword & 0x2)  flags |= _SW_DENORMAL;
5254         if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5255         if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5256         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5257         if (fpword & 0x20) flags |= _SW_INEXACT;
5258         *x86_sw = flags;
5259     }
5260
5261     if (!sse2_sw) return;
5262
5263     if (sse2_supported)
5264         _setfp_sse(NULL, 0, sse2_sw, 0);
5265     else *sse2_sw = 0;
5266 #else
5267     FIXME( "not implemented\n" );
5268 #endif
5269 }
5270 #endif
5271
5272 /**********************************************************************
5273  *              _statusfp (MSVCRT.@)
5274  */
5275 unsigned int CDECL _statusfp(void)
5276 {
5277     unsigned int flags = 0;
5278 #if defined(__i386__)
5279     unsigned int x86_sw, sse2_sw;
5280
5281     _statusfp2( &x86_sw, &sse2_sw );
5282     /* FIXME: there's no definition for ambiguous status, just return all status bits for now */
5283     flags = x86_sw | sse2_sw;
5284 #elif defined(__x86_64__)
5285     _setfp_sse(NULL, 0, &flags, 0);
5286 #elif defined(__aarch64__)
5287     ULONG_PTR fpsr;
5288
5289     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5290     if (fpsr & 0x1)  flags |= _SW_INVALID;
5291     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5292     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5293     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5294     if (fpsr & 0x10) flags |= _SW_INEXACT;
5295     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5296 #elif defined(__arm__) && !defined(__SOFTFP__)
5297     DWORD fpscr;
5298
5299     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
5300     if (fpscr & 0x1)  flags |= _SW_INVALID;
5301     if (fpscr & 0x2)  flags |= _SW_ZERODIVIDE;
5302     if (fpscr & 0x4)  flags |= _SW_OVERFLOW;
5303     if (fpscr & 0x8)  flags |= _SW_UNDERFLOW;
5304     if (fpscr & 0x10) flags |= _SW_INEXACT;
5305     if (fpscr & 0x80) flags |= _SW_DENORMAL;
5306 #else
5307     FIXME( "not implemented\n" );
5308 #endif
5309     return flags;
5310 }
5311
5312 /*********************************************************************
5313  *              _clearfp (MSVCRT.@)
5314  */
5315 unsigned int CDECL _clearfp(void)
5316 {
5317     unsigned int flags = 0;
5318 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
5319     unsigned long fpword;
5320
5321     __asm__ __volatile__( "fnstsw %0; fnclex" : "=m" (fpword) );
5322     if (fpword & 0x1)  flags |= _SW_INVALID;
5323     if (fpword & 0x2)  flags |= _SW_DENORMAL;
5324     if (fpword & 0x4)  flags |= _SW_ZERODIVIDE;
5325     if (fpword & 0x8)  flags |= _SW_OVERFLOW;
5326     if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5327     if (fpword & 0x20) flags |= _SW_INEXACT;
5328
5329     if (sse2_supported)
5330     {
5331         unsigned int sse_sw = 0;
5332
5333         _setfp_sse(NULL, 0, &sse_sw, _MCW_EM);
5334         flags |= sse_sw;
5335     }
5336 #elif defined(__x86_64__)
5337     _setfp_sse(NULL, 0, &flags, _MCW_EM);
5338 #elif defined(__aarch64__)
5339     ULONG_PTR fpsr;
5340
5341     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5342     if (fpsr & 0x1)  flags |= _SW_INVALID;
5343     if (fpsr & 0x2)  flags |= _SW_ZERODIVIDE;
5344     if (fpsr & 0x4)  flags |= _SW_OVERFLOW;
5345     if (fpsr & 0x8)  flags |= _SW_UNDERFLOW;
5346     if (fpsr & 0x10) flags |= _SW_INEXACT;
5347     if (fpsr & 0x80) flags |= _SW_DENORMAL;
5348     fpsr &= ~0x9f;
5349     __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
5350 #elif defined(__arm__) && !defined(__SOFTFP__)
5351     DWORD fpscr;
5352
5353     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
5354     if (fpscr & 0x1)  flags |= _SW_INVALID;
5355     if (fpscr & 0x2)  flags |= _SW_ZERODIVIDE;
5356     if (fpscr & 0x4)  flags |= _SW_OVERFLOW;
5357     if (fpscr & 0x8)  flags |= _SW_UNDERFLOW;
5358     if (fpscr & 0x10) flags |= _SW_INEXACT;
5359     if (fpscr & 0x80) flags |= _SW_DENORMAL;
5360     fpscr &= ~0x9f;
5361     __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
5362 #else
5363     FIXME( "not implemented\n" );
5364 #endif
5365     return flags;
5366 }
5367
5368 /*********************************************************************
5369  *              __fpecode (MSVCRT.@)
5370  */
5371 int * CDECL __fpecode(void)
5372 {
5373     return &msvcrt_get_thread_data()->fpecode;
5374 }
5375
5376 /*********************************************************************
5377  *              ldexp (MSVCRT.@)
5378  */
5379 double CDECL ldexp(double num, int exp)
5380 {
5381   double z = __scalbn(num, exp);
5382
5383   if (isfinite(num) && !isfinite(z))
5384     return math_error(_OVERFLOW, "ldexp", num, exp, z);
5385   if (num && isfinite(num) && !z)
5386     return math_error(_UNDERFLOW, "ldexp", num, exp, z);
5387   return z;
5388 }
5389
5390 /*********************************************************************
5391  *              _cabs (MSVCRT.@)
5392  */
5393 double CDECL _cabs(struct _complex num)
5394 {
5395   return sqrt(num.x * num.x + num.y * num.y);
5396 }
5397
5398 /*********************************************************************
5399  *              _chgsign (MSVCRT.@)
5400  */
5401 double CDECL _chgsign(double num)
5402 {
5403     union { double f; UINT64 i; } u = { num };
5404     u.i ^= 1ull << 63;
5405     return u.f;
5406 }
5407
5408 /*********************************************************************
5409  *              __control87_2 (MSVCR80.@)
5410  *
5411  * Not exported by native msvcrt, added in msvcr80.
5412  */
5413 #ifdef __i386__
5414 int CDECL __control87_2( unsigned int newval, unsigned int mask,
5415                          unsigned int *x86_cw, unsigned int *sse2_cw )
5416 {
5417 #if defined(__GNUC__) || defined(__clang__)
5418     unsigned long fpword;
5419     unsigned int flags;
5420
5421     if (x86_cw)
5422     {
5423         __asm__ __volatile__( "fstcw %0" : "=m" (fpword) );
5424
5425         /* Convert into mask constants */
5426         flags = 0;
5427         if (fpword & 0x1)  flags |= _EM_INVALID;
5428         if (fpword & 0x2)  flags |= _EM_DENORMAL;
5429         if (fpword & 0x4)  flags |= _EM_ZERODIVIDE;
5430         if (fpword & 0x8)  flags |= _EM_OVERFLOW;
5431         if (fpword & 0x10) flags |= _EM_UNDERFLOW;
5432         if (fpword & 0x20) flags |= _EM_INEXACT;
5433         switch (fpword & 0xc00)
5434         {
5435         case 0xc00: flags |= _RC_UP|_RC_DOWN; break;
5436         case 0x800: flags |= _RC_UP; break;
5437         case 0x400: flags |= _RC_DOWN; break;
5438         }
5439         switch (fpword & 0x300)
5440         {
5441         case 0x0:   flags |= _PC_24; break;
5442         case 0x200: flags |= _PC_53; break;
5443         case 0x300: flags |= _PC_64; break;
5444         }
5445         if (fpword & 0x1000) flags |= _IC_AFFINE;
5446
5447         TRACE( "x86 flags=%08x newval=%08x mask=%08x\n", flags, newval, mask );
5448         if (mask)
5449         {
5450             flags = (flags & ~mask) | (newval & mask);
5451
5452             /* Convert (masked) value back to fp word */
5453             fpword = 0;
5454             if (flags & _EM_INVALID)    fpword |= 0x1;
5455             if (flags & _EM_DENORMAL)   fpword |= 0x2;
5456             if (flags & _EM_ZERODIVIDE) fpword |= 0x4;
5457             if (flags & _EM_OVERFLOW)   fpword |= 0x8;
5458             if (flags & _EM_UNDERFLOW)  fpword |= 0x10;
5459             if (flags & _EM_INEXACT)    fpword |= 0x20;
5460             switch (flags & _MCW_RC)
5461             {
5462             case _RC_UP|_RC_DOWN:   fpword |= 0xc00; break;
5463             case _RC_UP:            fpword |= 0x800; break;
5464             case _RC_DOWN:          fpword |= 0x400; break;
5465             }
5466             switch (flags & _MCW_PC)
5467             {
5468             case _PC_64: fpword |= 0x300; break;
5469             case _PC_53: fpword |= 0x200; break;
5470             case _PC_24: fpword |= 0x0; break;
5471             }
5472             if (flags & _IC_AFFINE) fpword |= 0x1000;
5473
5474             __asm__ __volatile__( "fldcw %0" : : "m" (fpword) );
5475         }
5476         *x86_cw = flags;
5477     }
5478
5479     if (!sse2_cw) return 1;
5480
5481     if (sse2_supported)
5482     {
5483         *sse2_cw = newval;
5484         if (!_setfp_sse(sse2_cw, mask, NULL, 0))
5485             return 0;
5486     }
5487     else *sse2_cw = 0;
5488
5489     return 1;
5490 #else
5491     FIXME( "not implemented\n" );
5492     return 0;
5493 #endif
5494 }
5495 #endif
5496
5497 /*********************************************************************
5498  *              _control87 (MSVCRT.@)
5499  */
5500 unsigned int CDECL _control87(unsigned int newval, unsigned int mask)
5501 {
5502     unsigned int flags = 0;
5503 #ifdef __i386__
5504     unsigned int sse2_cw;
5505
5506     __control87_2( newval, mask, &flags, &sse2_cw );
5507
5508     if ((flags ^ sse2_cw) & (_MCW_EM | _MCW_RC)) flags |= _EM_AMBIGUOUS;
5509     flags |= sse2_cw;
5510 #elif defined(__x86_64__)
5511     flags = newval;
5512     _setfp_sse(&flags, mask, NULL, 0);
5513 #elif defined(__aarch64__)
5514     ULONG_PTR fpcr;
5515
5516     __asm__ __volatile__( "mrs %0, fpcr" : "=r" (fpcr) );
5517     if (!(fpcr & 0x100))  flags |= _EM_INVALID;
5518     if (!(fpcr & 0x200))  flags |= _EM_ZERODIVIDE;
5519     if (!(fpcr & 0x400))  flags |= _EM_OVERFLOW;
5520     if (!(fpcr & 0x800))  flags |= _EM_UNDERFLOW;
5521     if (!(fpcr & 0x1000)) flags |= _EM_INEXACT;
5522     if (!(fpcr & 0x8000)) flags |= _EM_DENORMAL;
5523     switch (fpcr & 0xc00000)
5524     {
5525     case 0x400000: flags |= _RC_UP; break;
5526     case 0x800000: flags |= _RC_DOWN; break;
5527     case 0xc00000: flags |= _RC_CHOP; break;
5528     }
5529     flags = (flags & ~mask) | (newval & mask);
5530     fpcr &= ~0xc09f00ul;
5531     if (!(flags & _EM_INVALID)) fpcr |= 0x100;
5532     if (!(flags & _EM_ZERODIVIDE)) fpcr |= 0x200;
5533     if (!(flags & _EM_OVERFLOW)) fpcr |= 0x400;
5534     if (!(flags & _EM_UNDERFLOW)) fpcr |= 0x800;
5535     if (!(flags & _EM_INEXACT)) fpcr |= 0x1000;
5536     if (!(flags & _EM_DENORMAL)) fpcr |= 0x8000;
5537     switch (flags & _MCW_RC)
5538     {
5539     case _RC_CHOP: fpcr |= 0xc00000; break;
5540     case _RC_UP:   fpcr |= 0x400000; break;
5541     case _RC_DOWN: fpcr |= 0x800000; break;
5542     }
5543     __asm__ __volatile__( "msr fpcr, %0" :: "r" (fpcr) );
5544 #elif defined(__arm__) && !defined(__SOFTFP__)
5545     DWORD fpscr;
5546
5547     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
5548     if (!(fpscr & 0x100))  flags |= _EM_INVALID;
5549     if (!(fpscr & 0x200))  flags |= _EM_ZERODIVIDE;
5550     if (!(fpscr & 0x400))  flags |= _EM_OVERFLOW;
5551     if (!(fpscr & 0x800))  flags |= _EM_UNDERFLOW;
5552     if (!(fpscr & 0x1000)) flags |= _EM_INEXACT;
5553     if (!(fpscr & 0x8000)) flags |= _EM_DENORMAL;
5554     switch (fpscr & 0xc00000)
5555     {
5556     case 0x400000: flags |= _RC_UP; break;
5557     case 0x800000: flags |= _RC_DOWN; break;
5558     case 0xc00000: flags |= _RC_CHOP; break;
5559     }
5560     flags = (flags & ~mask) | (newval & mask);
5561     fpscr &= ~0xc09f00ul;
5562     if (!(flags & _EM_INVALID))    fpscr |= 0x100;
5563     if (!(flags & _EM_ZERODIVIDE)) fpscr |= 0x200;
5564     if (!(flags & _EM_OVERFLOW))   fpscr |= 0x400;
5565     if (!(flags & _EM_UNDERFLOW))  fpscr |= 0x800;
5566     if (!(flags & _EM_INEXACT))    fpscr |= 0x1000;
5567     if (!(flags & _EM_DENORMAL))   fpscr |= 0x8000;
5568     switch (flags & _MCW_RC)
5569     {
5570     case _RC_CHOP: fpscr |= 0xc00000; break;
5571     case _RC_UP:   fpscr |= 0x400000; break;
5572     case _RC_DOWN: fpscr |= 0x800000; break;
5573     }
5574     __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
5575 #else
5576     FIXME( "not implemented\n" );
5577 #endif
5578     return flags;
5579 }
5580
5581 /*********************************************************************
5582  *              _controlfp (MSVCRT.@)
5583  */
5584 unsigned int CDECL _controlfp(unsigned int newval, unsigned int mask)
5585 {
5586   return _control87( newval, mask & ~_EM_DENORMAL );
5587 }
5588
5589 /*********************************************************************
5590  *              _set_controlfp (MSVCRT.@)
5591  */
5592 void CDECL _set_controlfp( unsigned int newval, unsigned int mask )
5593 {
5594     _controlfp( newval, mask );
5595 }
5596
5597 /*********************************************************************
5598  *              _controlfp_s (MSVCRT.@)
5599  */
5600 int CDECL _controlfp_s(unsigned int *cur, unsigned int newval, unsigned int mask)
5601 {
5602     static const unsigned int all_flags = (_MCW_EM | _MCW_IC | _MCW_RC |
5603                                            _MCW_PC | _MCW_DN);
5604     unsigned int val;
5605
5606     if (!MSVCRT_CHECK_PMT( !(newval & mask & ~all_flags) ))
5607     {
5608         if (cur) *cur = _controlfp( 0, 0 );  /* retrieve it anyway */
5609         return EINVAL;
5610     }
5611     val = _controlfp( newval, mask );
5612     if (cur) *cur = val;
5613     return 0;
5614 }
5615
5616 #if _MSVCR_VER >= 140 && (defined(__i386__) || defined(__x86_64__))
5617 enum fenv_masks
5618 {
5619     FENV_X_INVALID = 0x00100010,
5620     FENV_X_DENORMAL = 0x00200020,
5621     FENV_X_ZERODIVIDE = 0x00080008,
5622     FENV_X_OVERFLOW = 0x00040004,
5623     FENV_X_UNDERFLOW = 0x00020002,
5624     FENV_X_INEXACT = 0x00010001,
5625     FENV_X_AFFINE = 0x00004000,
5626     FENV_X_UP = 0x00800200,
5627     FENV_X_DOWN = 0x00400100,
5628     FENV_X_24 = 0x00002000,
5629     FENV_X_53 = 0x00001000,
5630     FENV_Y_INVALID = 0x10000010,
5631     FENV_Y_DENORMAL = 0x20000020,
5632     FENV_Y_ZERODIVIDE = 0x08000008,
5633     FENV_Y_OVERFLOW = 0x04000004,
5634     FENV_Y_UNDERFLOW = 0x02000002,
5635     FENV_Y_INEXACT = 0x01000001,
5636     FENV_Y_UP = 0x80000200,
5637     FENV_Y_DOWN = 0x40000100,
5638     FENV_Y_FLUSH = 0x00000400,
5639     FENV_Y_FLUSH_SAVE = 0x00000800
5640 };
5641
5642 /* encodes x87/sse control/status word in ulong */
5643 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5644 {
5645     __msvcrt_ulong ret = 0;
5646
5647 #ifdef __i386__
5648     if (x & _EM_INVALID) ret |= FENV_X_INVALID;
5649     if (x & _EM_DENORMAL) ret |= FENV_X_DENORMAL;
5650     if (x & _EM_ZERODIVIDE) ret |= FENV_X_ZERODIVIDE;
5651     if (x & _EM_OVERFLOW) ret |= FENV_X_OVERFLOW;
5652     if (x & _EM_UNDERFLOW) ret |= FENV_X_UNDERFLOW;
5653     if (x & _EM_INEXACT) ret |= FENV_X_INEXACT;
5654     if (x & _IC_AFFINE) ret |= FENV_X_AFFINE;
5655     if (x & _RC_UP) ret |= FENV_X_UP;
5656     if (x & _RC_DOWN) ret |= FENV_X_DOWN;
5657     if (x & _PC_24) ret |= FENV_X_24;
5658     if (x & _PC_53) ret |= FENV_X_53;
5659 #endif
5660     x &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC);
5661
5662     if (y & _EM_INVALID) ret |= FENV_Y_INVALID;
5663     if (y & _EM_DENORMAL) ret |= FENV_Y_DENORMAL;
5664     if (y & _EM_ZERODIVIDE) ret |= FENV_Y_ZERODIVIDE;
5665     if (y & _EM_OVERFLOW) ret |= FENV_Y_OVERFLOW;
5666     if (y & _EM_UNDERFLOW) ret |= FENV_Y_UNDERFLOW;
5667     if (y & _EM_INEXACT) ret |= FENV_Y_INEXACT;
5668     if (y & _RC_UP) ret |= FENV_Y_UP;
5669     if (y & _RC_DOWN) ret |= FENV_Y_DOWN;
5670     if (y & _DN_FLUSH) ret |= FENV_Y_FLUSH;
5671     if (y & _DN_FLUSH_OPERANDS_SAVE_RESULTS) ret |= FENV_Y_FLUSH_SAVE;
5672     y &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_DN);
5673
5674     if(x || y) FIXME("unsupported flags: %x, %x\n", x, y);
5675     return ret;
5676 }
5677
5678 /* decodes x87/sse control/status word, returns FALSE on error */
5679 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5680 {
5681     *x = *y = 0;
5682     if ((enc & FENV_X_INVALID) == FENV_X_INVALID) *x |= _EM_INVALID;
5683     if ((enc & FENV_X_DENORMAL) == FENV_X_DENORMAL) *x |= _EM_DENORMAL;
5684     if ((enc & FENV_X_ZERODIVIDE) == FENV_X_ZERODIVIDE) *x |= _EM_ZERODIVIDE;
5685     if ((enc & FENV_X_OVERFLOW) == FENV_X_OVERFLOW) *x |= _EM_OVERFLOW;
5686     if ((enc & FENV_X_UNDERFLOW) == FENV_X_UNDERFLOW) *x |= _EM_UNDERFLOW;
5687     if ((enc & FENV_X_INEXACT) == FENV_X_INEXACT) *x |= _EM_INEXACT;
5688     if ((enc & FENV_X_AFFINE) == FENV_X_AFFINE) *x |= _IC_AFFINE;
5689     if ((enc & FENV_X_UP) == FENV_X_UP) *x |= _RC_UP;
5690     if ((enc & FENV_X_DOWN) == FENV_X_DOWN) *x |= _RC_DOWN;
5691     if ((enc & FENV_X_24) == FENV_X_24) *x |= _PC_24;
5692     if ((enc & FENV_X_53) == FENV_X_53) *x |= _PC_53;
5693
5694     if ((enc & FENV_Y_INVALID) == FENV_Y_INVALID) *y |= _EM_INVALID;
5695     if ((enc & FENV_Y_DENORMAL) == FENV_Y_DENORMAL) *y |= _EM_DENORMAL;
5696     if ((enc & FENV_Y_ZERODIVIDE) == FENV_Y_ZERODIVIDE) *y |= _EM_ZERODIVIDE;
5697     if ((enc & FENV_Y_OVERFLOW) == FENV_Y_OVERFLOW) *y |= _EM_OVERFLOW;
5698     if ((enc & FENV_Y_UNDERFLOW) == FENV_Y_UNDERFLOW) *y |= _EM_UNDERFLOW;
5699     if ((enc & FENV_Y_INEXACT) == FENV_Y_INEXACT) *y |= _EM_INEXACT;
5700     if ((enc & FENV_Y_UP) == FENV_Y_UP) *y |= _RC_UP;
5701     if ((enc & FENV_Y_DOWN) == FENV_Y_DOWN) *y |= _RC_DOWN;
5702     if ((enc & FENV_Y_FLUSH) == FENV_Y_FLUSH) *y |= _DN_FLUSH;
5703     if ((enc & FENV_Y_FLUSH_SAVE) == FENV_Y_FLUSH_SAVE) *y |= _DN_FLUSH_OPERANDS_SAVE_RESULTS;
5704
5705     if (fenv_encode(*x, *y) != enc)
5706     {
5707         WARN("can't decode: %lx\n", enc);
5708         return FALSE;
5709     }
5710     return TRUE;
5711 }
5712 #elif _MSVCR_VER >= 120
5713 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5714 {
5715     if (y & _EM_DENORMAL)
5716         y = (y & ~_EM_DENORMAL) | 0x20;
5717
5718     return x | y;
5719 }
5720
5721 #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || (defined(__arm__) && !defined(__SOFTFP__))
5722 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5723 {
5724     if (enc & 0x20)
5725         enc = (enc & ~0x20) | _EM_DENORMAL;
5726
5727     *x = *y = enc;
5728     return TRUE;
5729 }
5730 #endif
5731 #endif
5732
5733 #if _MSVCR_VER>=120
5734 /*********************************************************************
5735  *              fegetenv (MSVCR120.@)
5736  */
5737 int CDECL fegetenv(fenv_t *env)
5738 {
5739 #if _MSVCR_VER>=140 && defined(__i386__)
5740     unsigned int x87, sse;
5741     __control87_2(0, 0, &x87, &sse);
5742     env->_Fe_ctl = fenv_encode(x87, sse);
5743     _statusfp2(&x87, &sse);
5744     env->_Fe_stat = fenv_encode(x87, sse);
5745 #elif _MSVCR_VER>=140
5746     env->_Fe_ctl = fenv_encode(0, _control87(0, 0));
5747     env->_Fe_stat = fenv_encode(0, _statusfp());
5748 #else
5749     env->_Fe_ctl = _controlfp(0, 0) & (_EM_INEXACT | _EM_UNDERFLOW |
5750             _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _RC_CHOP);
5751     env->_Fe_stat = _statusfp();
5752 #endif
5753     return 0;
5754 }
5755
5756 /*********************************************************************
5757  *              feupdateenv (MSVCR120.@)
5758  */
5759 int CDECL feupdateenv(const fenv_t *env)
5760 {
5761     fenv_t set;
5762     fegetenv(&set);
5763     set._Fe_ctl = env->_Fe_ctl;
5764     set._Fe_stat |= env->_Fe_stat;
5765     return fesetenv(&set);
5766 }
5767
5768 /*********************************************************************
5769  *      fetestexcept (MSVCR120.@)
5770  */
5771 int CDECL fetestexcept(int flags)
5772 {
5773     return _statusfp() & flags;
5774 }
5775
5776 /*********************************************************************
5777  *      fesetexceptflag (MSVCR120.@)
5778  */
5779 int CDECL fesetexceptflag(const fexcept_t *status, int excepts)
5780 {
5781     fenv_t env;
5782
5783     excepts &= FE_ALL_EXCEPT;
5784     if(!excepts)
5785         return 0;
5786
5787     fegetenv(&env);
5788     env._Fe_stat &= ~fenv_encode(excepts, excepts);
5789     env._Fe_stat |= *status & fenv_encode(excepts, excepts);
5790     return fesetenv(&env);
5791 }
5792
5793 /*********************************************************************
5794  *      feraiseexcept (MSVCR120.@)
5795  */
5796 int CDECL feraiseexcept(int flags)
5797 {
5798     fenv_t env;
5799
5800     flags &= FE_ALL_EXCEPT;
5801     fegetenv(&env);
5802     env._Fe_stat |= fenv_encode(flags, flags);
5803     return fesetenv(&env);
5804 }
5805
5806 /*********************************************************************
5807  *      feclearexcept (MSVCR120.@)
5808  */
5809 int CDECL feclearexcept(int flags)
5810 {
5811     fenv_t env;
5812
5813     fegetenv(&env);
5814     flags &= FE_ALL_EXCEPT;
5815     env._Fe_stat &= ~fenv_encode(flags, flags);
5816     return fesetenv(&env);
5817 }
5818
5819 /*********************************************************************
5820  *      fegetexceptflag (MSVCR120.@)
5821  */
5822 int CDECL fegetexceptflag(fexcept_t *status, int excepts)
5823 {
5824 #if _MSVCR_VER>=140 && defined(__i386__)
5825     unsigned int x87, sse;
5826     _statusfp2(&x87, &sse);
5827     *status = fenv_encode(x87 & excepts, sse & excepts);
5828 #else
5829     *status = fenv_encode(0, _statusfp() & excepts);
5830 #endif
5831     return 0;
5832 }
5833 #endif
5834
5835 #if _MSVCR_VER>=140
5836 /*********************************************************************
5837  *              __fpe_flt_rounds (UCRTBASE.@)
5838  */
5839 int CDECL __fpe_flt_rounds(void)
5840 {
5841     unsigned int fpc = _controlfp(0, 0) & _RC_CHOP;
5842
5843     TRACE("()\n");
5844
5845     switch(fpc) {
5846         case _RC_CHOP: return 0;
5847         case _RC_NEAR: return 1;
5848         case _RC_UP: return 2;
5849         default: return 3;
5850     }
5851 }
5852 #endif
5853
5854 #if _MSVCR_VER>=120
5855
5856 /*********************************************************************
5857  *              fegetround (MSVCR120.@)
5858  */
5859 int CDECL fegetround(void)
5860 {
5861     return _controlfp(0, 0) & _MCW_RC;
5862 }
5863
5864 /*********************************************************************
5865  *              fesetround (MSVCR120.@)
5866  */
5867 int CDECL fesetround(int round_mode)
5868 {
5869     if (round_mode & (~_MCW_RC))
5870         return 1;
5871     _controlfp(round_mode, _MCW_RC);
5872     return 0;
5873 }
5874
5875 #endif /* _MSVCR_VER>=120 */
5876
5877 /*********************************************************************
5878  *              _copysign (MSVCRT.@)
5879  *
5880  * Copied from musl: src/math/copysign.c
5881  */
5882 double CDECL _copysign( double x, double y )
5883 {
5884     union { double f; UINT64 i; } ux = { x }, uy = { y };
5885     ux.i &= ~0ull >> 1;
5886     ux.i |= uy.i & 1ull << 63;
5887     return ux.f;
5888 }
5889
5890 /*********************************************************************
5891  *              _finite (MSVCRT.@)
5892  */
5893 int CDECL _finite(double num)
5894 {
5895     union { double f; UINT64 i; } u = { num };
5896     return (u.i & ~0ull >> 1) < 0x7ffull << 52;
5897 }
5898
5899 /*********************************************************************
5900  *              _fpreset (MSVCRT.@)
5901  */
5902 void CDECL _fpreset(void)
5903 {
5904 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
5905     const unsigned int x86_cw = 0x27f;
5906     __asm__ __volatile__( "fninit; fldcw %0" : : "m" (x86_cw) );
5907     if (sse2_supported)
5908     {
5909         unsigned int cw = _MCW_EM, sw = 0;
5910         _setfp_sse(&cw, ~0, &sw, ~0);
5911     }
5912 #elif defined(__x86_64__)
5913     unsigned int cw = _MCW_EM, sw = 0;
5914     _setfp_sse(&cw, ~0, &sw, ~0);
5915 #else
5916     FIXME( "not implemented\n" );
5917 #endif
5918 }
5919
5920 #if _MSVCR_VER>=120
5921 /*********************************************************************
5922  *              fesetenv (MSVCR120.@)
5923  */
5924 int CDECL fesetenv(const fenv_t *env)
5925 {
5926 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
5927     unsigned int x87_cw, sse_cw, x87_stat, sse_stat;
5928 #ifdef __i386__
5929     struct {
5930         WORD control_word;
5931         WORD unused1;
5932         WORD status_word;
5933         WORD unused2;
5934         WORD tag_word;
5935         WORD unused3;
5936         DWORD instruction_pointer;
5937         WORD code_segment;
5938         WORD unused4;
5939         DWORD operand_addr;
5940         WORD data_segment;
5941         WORD unused5;
5942     } fenv;
5943 #endif
5944
5945     TRACE( "(%p)\n", env );
5946
5947     if (!env->_Fe_ctl && !env->_Fe_stat) {
5948         _fpreset();
5949         return 0;
5950     }
5951
5952     if (!fenv_decode(env->_Fe_ctl, &x87_cw, &sse_cw))
5953         return 1;
5954     if (!fenv_decode(env->_Fe_stat, &x87_stat, &sse_stat))
5955         return 1;
5956
5957 #ifdef __i386__
5958     __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) );
5959
5960     fenv.control_word &= ~0xc3d;
5961 #if _MSVCR_VER>=140
5962     fenv.control_word &= ~0x1302;
5963 #endif
5964     if (x87_cw & _EM_INVALID) fenv.control_word |= 0x1;
5965     if (x87_cw & _EM_ZERODIVIDE) fenv.control_word |= 0x4;
5966     if (x87_cw & _EM_OVERFLOW) fenv.control_word |= 0x8;
5967     if (x87_cw & _EM_UNDERFLOW) fenv.control_word |= 0x10;
5968     if (x87_cw & _EM_INEXACT) fenv.control_word |= 0x20;
5969     switch (x87_cw & _MCW_RC)
5970     {
5971         case _RC_UP|_RC_DOWN:   fenv.control_word |= 0xc00; break;
5972         case _RC_UP:            fenv.control_word |= 0x800; break;
5973         case _RC_DOWN:          fenv.control_word |= 0x400; break;
5974     }
5975 #if _MSVCR_VER>=140
5976     if (x87_cw & _EM_DENORMAL) fenv.control_word |= 0x2;
5977     switch (x87_cw & _MCW_PC)
5978     {
5979         case _PC_64: fenv.control_word |= 0x300; break;
5980         case _PC_53: fenv.control_word |= 0x200; break;
5981         case _PC_24: fenv.control_word |= 0x0; break;
5982     }
5983     if (x87_cw & _IC_AFFINE) fenv.control_word |= 0x1000;
5984 #endif
5985
5986     fenv.status_word &= ~0x3f;
5987     if (x87_stat & _SW_INVALID) fenv.status_word |= 0x1;
5988     if (x87_stat & _SW_DENORMAL) fenv.status_word |= 0x2;
5989     if (x87_stat & _SW_ZERODIVIDE) fenv.status_word |= 0x4;
5990     if (x87_stat & _SW_OVERFLOW) fenv.status_word |= 0x8;
5991     if (x87_stat & _SW_UNDERFLOW) fenv.status_word |= 0x10;
5992     if (x87_stat & _SW_INEXACT) fenv.status_word |= 0x20;
5993
5994     __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)",
5995             "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
5996 #endif
5997
5998     if (sse2_supported)
5999     {
6000         if(!_setfp_sse(&sse_cw, _MSVCR_VER>=140 ? ~0 :
6001                     ~_EM_DENORMAL & (_MCW_EM | _MCW_RC), &sse_stat, _MCW_EM))
6002             return 1;
6003     }
6004
6005     return 0;
6006 #elif defined(__aarch64__)
6007     ULONG_PTR fpsr;
6008     unsigned int tmp, fp_cw, fp_stat;
6009
6010     if (!env->_Fe_ctl && !env->_Fe_stat) {
6011         _fpreset();
6012         return 0;
6013     }
6014
6015     if (!fenv_decode(env->_Fe_ctl, &tmp, &fp_cw))
6016         return 1;
6017     if (!fenv_decode(env->_Fe_stat, &tmp, &fp_stat))
6018         return 1;
6019
6020     _control87(_MCW_EM, _MCW_EM);
6021     __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
6022     fpsr &= ~0x9f;
6023     if (fp_stat & _SW_INVALID)    fpsr |= 0x1;
6024     if (fp_stat & _SW_ZERODIVIDE) fpsr |= 0x2;
6025     if (fp_stat & _SW_OVERFLOW)   fpsr |= 0x4;
6026     if (fp_stat & _SW_UNDERFLOW)  fpsr |= 0x8;
6027     if (fp_stat & _SW_INEXACT)    fpsr |= 0x10;
6028     if (fp_stat & _SW_DENORMAL)   fpsr |= 0x80;
6029     __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
6030     _control87(fp_cw, 0xffffffff);
6031     return 0;
6032 #elif defined(__arm__) && !defined(__SOFTFP__)
6033     DWORD fpscr;
6034     unsigned int tmp, fp_cw, fp_stat;
6035
6036     if (!env->_Fe_ctl && !env->_Fe_stat) {
6037         _fpreset();
6038         return 0;
6039     }
6040
6041     if (!fenv_decode(env->_Fe_ctl, &tmp, &fp_cw))
6042         return 1;
6043     if (!fenv_decode(env->_Fe_stat, &tmp, &fp_stat))
6044         return 1;
6045
6046     _control87(_MCW_EM, _MCW_EM);
6047     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
6048     fpscr &= ~0x9f;
6049     if (fp_stat & _SW_INVALID)    fpscr |= 0x1;
6050     if (fp_stat & _SW_ZERODIVIDE) fpscr |= 0x2;
6051     if (fp_stat & _SW_OVERFLOW)   fpscr |= 0x4;
6052     if (fp_stat & _SW_UNDERFLOW)  fpscr |= 0x8;
6053     if (fp_stat & _SW_INEXACT)    fpscr |= 0x10;
6054     if (fp_stat & _SW_DENORMAL)   fpscr |= 0x80;
6055     __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
6056     _control87(fp_cw, 0xffffffff);
6057     return 0;
6058 #else
6059     FIXME( "not implemented\n" );
6060 #endif
6061     return 1;
6062 }
6063 #endif
6064
6065 /*********************************************************************
6066  *              _isnan (MSVCRT.@)
6067  */
6068 int CDECL _isnan(double num)
6069 {
6070     union { double f; UINT64 i; } u = { num };
6071     return (u.i & ~0ull >> 1) > 0x7ffull << 52;
6072 }
6073
6074 static double pzero(double x)
6075 {
6076     static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6077         0.00000000000000000000e+00,
6078         -7.03124999999900357484e-02,
6079         -8.08167041275349795626e+00,
6080         -2.57063105679704847262e+02,
6081         -2.48521641009428822144e+03,
6082         -5.25304380490729545272e+03,
6083     }, pS8[5] = {
6084         1.16534364619668181717e+02,
6085         3.83374475364121826715e+03,
6086         4.05978572648472545552e+04,
6087         1.16752972564375915681e+05,
6088         4.76277284146730962675e+04,
6089     }, pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6090         -1.14125464691894502584e-11,
6091         -7.03124940873599280078e-02,
6092         -4.15961064470587782438e+00,
6093         -6.76747652265167261021e+01,
6094         -3.31231299649172967747e+02,
6095         -3.46433388365604912451e+02,
6096     }, pS5[5] = {
6097         6.07539382692300335975e+01,
6098         1.05125230595704579173e+03,
6099         5.97897094333855784498e+03,
6100         9.62544514357774460223e+03,
6101         2.40605815922939109441e+03,
6102     }, pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6103         -2.54704601771951915620e-09,
6104         -7.03119616381481654654e-02,
6105         -2.40903221549529611423e+00,
6106         -2.19659774734883086467e+01,
6107         -5.80791704701737572236e+01,
6108         -3.14479470594888503854e+01,
6109     }, pS3[5] = {
6110         3.58560338055209726349e+01,
6111         3.61513983050303863820e+02,
6112         1.19360783792111533330e+03,
6113         1.12799679856907414432e+03,
6114         1.73580930813335754692e+02,
6115     }, pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6116         -8.87534333032526411254e-08,
6117         -7.03030995483624743247e-02,
6118         -1.45073846780952986357e+00,
6119         -7.63569613823527770791e+00,
6120         -1.11931668860356747786e+01,
6121         -3.23364579351335335033e+00,
6122     }, pS2[5] = {
6123         2.22202997532088808441e+01,
6124         1.36206794218215208048e+02,
6125         2.70470278658083486789e+02,
6126         1.53875394208320329881e+02,
6127         1.46576176948256193810e+01,
6128     };
6129
6130     const double *p, *q;
6131     double z, r, s;
6132     UINT32 ix;
6133
6134     ix = *(ULONGLONG*)&x >> 32;
6135     ix &= 0x7fffffff;
6136     if (ix >= 0x40200000) {
6137         p = pR8;
6138         q = pS8;
6139     } else if (ix >= 0x40122E8B) {
6140         p = pR5;
6141         q = pS5;
6142     } else if (ix >= 0x4006DB6D) {
6143         p = pR3;
6144         q = pS3;
6145     } else /*ix >= 0x40000000*/ {
6146         p = pR2;
6147         q = pS2;
6148     }
6149
6150     z = 1.0 / (x * x);
6151     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6152     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6153     return 1.0 + r / s;
6154 }
6155
6156 static double qzero(double x)
6157 {
6158     static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6159         0.00000000000000000000e+00,
6160         7.32421874999935051953e-02,
6161         1.17682064682252693899e+01,
6162         5.57673380256401856059e+02,
6163         8.85919720756468632317e+03,
6164         3.70146267776887834771e+04,
6165     }, qS8[6] = {
6166         1.63776026895689824414e+02,
6167         8.09834494656449805916e+03,
6168         1.42538291419120476348e+05,
6169         8.03309257119514397345e+05,
6170         8.40501579819060512818e+05,
6171         -3.43899293537866615225e+05,
6172     }, qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6173         1.84085963594515531381e-11,
6174         7.32421766612684765896e-02,
6175         5.83563508962056953777e+00,
6176         1.35111577286449829671e+02,
6177         1.02724376596164097464e+03,
6178         1.98997785864605384631e+03,
6179     }, qS5[6] = {
6180         8.27766102236537761883e+01,
6181         2.07781416421392987104e+03,
6182         1.88472887785718085070e+04,
6183         5.67511122894947329769e+04,
6184         3.59767538425114471465e+04,
6185         -5.35434275601944773371e+03,
6186     }, qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6187         4.37741014089738620906e-09,
6188         7.32411180042911447163e-02,
6189         3.34423137516170720929e+00,
6190         4.26218440745412650017e+01,
6191         1.70808091340565596283e+02,
6192         1.66733948696651168575e+02,
6193     }, qS3[6] = {
6194         4.87588729724587182091e+01,
6195         7.09689221056606015736e+02,
6196         3.70414822620111362994e+03,
6197         6.46042516752568917582e+03,
6198         2.51633368920368957333e+03,
6199         -1.49247451836156386662e+02,
6200     }, qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6201         1.50444444886983272379e-07,
6202         7.32234265963079278272e-02,
6203         1.99819174093815998816e+00,
6204         1.44956029347885735348e+01,
6205         3.16662317504781540833e+01,
6206         1.62527075710929267416e+01,
6207     }, qS2[6] = {
6208         3.03655848355219184498e+01,
6209         2.69348118608049844624e+02,
6210         8.44783757595320139444e+02,
6211         8.82935845112488550512e+02,
6212         2.12666388511798828631e+02,
6213         -5.31095493882666946917e+00,
6214     };
6215
6216     const double *p, *q;
6217     double s, r, z;
6218     unsigned int ix;
6219
6220     ix = *(ULONGLONG*)&x >> 32;
6221     ix &= 0x7fffffff;
6222     if (ix >= 0x40200000) {
6223         p = qR8;
6224         q = qS8;
6225     } else if (ix >= 0x40122E8B) {
6226         p = qR5;
6227         q = qS5;
6228     } else if (ix >= 0x4006DB6D) {
6229         p = qR3;
6230         q = qS3;
6231     } else /*ix >= 0x40000000*/ {
6232         p = qR2;
6233         q = qS2;
6234     }
6235
6236     z = 1.0 / (x * x);
6237     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6238     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6239     return (-0.125 + r / s) / x;
6240 }
6241
6242 /* j0 and y0 approximation for |x|>=2 */
6243 static double j0_y0_approx(unsigned int ix, double x, BOOL y0)
6244 {
6245     static const double invsqrtpi = 5.64189583547756279280e-01;
6246
6247     double s, c, ss, cc, z;
6248
6249     s = sin(x);
6250     c = cos(x);
6251     if (y0) c = -c;
6252     cc = s + c;
6253     /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
6254     if (ix < 0x7fe00000) {
6255         ss = s - c;
6256         z = -cos(2 * x);
6257         if (s * c < 0) cc = z / ss;
6258         else ss = z / cc;
6259         if (ix < 0x48000000) {
6260             if (y0) ss = -ss;
6261             cc = pzero(x) * cc - qzero(x) * ss;
6262         }
6263     }
6264     return invsqrtpi * cc / sqrt(x);
6265 }
6266
6267 /*********************************************************************
6268  *              _j0 (MSVCRT.@)
6269  *
6270  * Copied from musl: src/math/j0.c
6271  */
6272 double CDECL _j0(double x)
6273 {
6274     static const double R02 =  1.56249999999999947958e-02,
6275             R03 = -1.89979294238854721751e-04,
6276             R04 =  1.82954049532700665670e-06,
6277             R05 = -4.61832688532103189199e-09,
6278             S01 =  1.56191029464890010492e-02,
6279             S02 =  1.16926784663337450260e-04,
6280             S03 =  5.13546550207318111446e-07,
6281             S04 =  1.16614003333790000205e-09;
6282
6283     double z, r, s;
6284     unsigned int ix;
6285
6286     ix = *(ULONGLONG*)&x >> 32;
6287     ix &= 0x7fffffff;
6288
6289     /* j0(+-inf)=0, j0(nan)=nan */
6290     if (ix >= 0x7ff00000)
6291         return math_error(_DOMAIN, "_j0", x, 0, 1 / (x * x));
6292     x = fabs(x);
6293
6294     if (ix >= 0x40000000) {  /* |x| >= 2 */
6295         /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
6296         return j0_y0_approx(ix, x, FALSE);
6297     }
6298
6299     if (ix >= 0x3f200000) {  /* |x| >= 2**-13 */
6300         /* up to 4ulp error close to 2 */
6301         z = x * x;
6302         r = z * (R02 + z * (R03 + z * (R04 + z * R05)));
6303         s = 1 + z * (S01 + z * (S02 + z * (S03 + z * S04)));
6304         return (1 + x / 2) * (1 - x / 2) + z * (r / s);
6305     }
6306
6307     /* 1 - x*x/4 */
6308     /* prevent underflow */
6309     /* inexact should be raised when x!=0, this is not done correctly */
6310     if (ix >= 0x38000000)  /* |x| >= 2**-127 */
6311         x = 0.25 * x * x;
6312     return 1 - x;
6313 }
6314
6315 static double pone(double x)
6316 {
6317     static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6318         0.00000000000000000000e+00,
6319         1.17187499999988647970e-01,
6320         1.32394806593073575129e+01,
6321         4.12051854307378562225e+02,
6322         3.87474538913960532227e+03,
6323         7.91447954031891731574e+03,
6324     }, ps8[5] = {
6325         1.14207370375678408436e+02,
6326         3.65093083420853463394e+03,
6327         3.69562060269033463555e+04,
6328         9.76027935934950801311e+04,
6329         3.08042720627888811578e+04,
6330     }, pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6331         1.31990519556243522749e-11,
6332         1.17187493190614097638e-01,
6333         6.80275127868432871736e+00,
6334         1.08308182990189109773e+02,
6335         5.17636139533199752805e+02,
6336         5.28715201363337541807e+02,
6337     }, ps5[5] = {
6338         5.92805987221131331921e+01,
6339         9.91401418733614377743e+02,
6340         5.35326695291487976647e+03,
6341         7.84469031749551231769e+03,
6342         1.50404688810361062679e+03,
6343     }, pr3[6] = {
6344         3.02503916137373618024e-09,
6345         1.17186865567253592491e-01,
6346         3.93297750033315640650e+00,
6347         3.51194035591636932736e+01,
6348         9.10550110750781271918e+01,
6349         4.85590685197364919645e+01,
6350     }, ps3[5] = {
6351         3.47913095001251519989e+01,
6352         3.36762458747825746741e+02,
6353         1.04687139975775130551e+03,
6354         8.90811346398256432622e+02,
6355         1.03787932439639277504e+02,
6356     }, pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6357         1.07710830106873743082e-07,
6358         1.17176219462683348094e-01,
6359         2.36851496667608785174e+00,
6360         1.22426109148261232917e+01,
6361         1.76939711271687727390e+01,
6362         5.07352312588818499250e+00,
6363     }, ps2[5] = {
6364         2.14364859363821409488e+01,
6365         1.25290227168402751090e+02,
6366         2.32276469057162813669e+02,
6367         1.17679373287147100768e+02,
6368         8.36463893371618283368e+00,
6369     };
6370
6371     const double *p, *q;
6372     double z, r, s;
6373     unsigned int ix;
6374
6375     ix = *(ULONGLONG*)&x >> 32;
6376     ix &= 0x7fffffff;
6377     if (ix >= 0x40200000) {
6378         p = pr8;
6379         q = ps8;
6380     } else if (ix >= 0x40122E8B) {
6381         p = pr5;
6382         q = ps5;
6383     } else if (ix >= 0x4006DB6D) {
6384         p = pr3;
6385         q = ps3;
6386     } else /*ix >= 0x40000000*/ {
6387         p = pr2;
6388         q = ps2;
6389     }
6390     z = 1.0 / (x * x);
6391     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6392     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6393     return 1.0 + r / s;
6394 }
6395
6396 static double qone(double x)
6397 {
6398     static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6399         0.00000000000000000000e+00,
6400         -1.02539062499992714161e-01,
6401         -1.62717534544589987888e+01,
6402         -7.59601722513950107896e+02,
6403         -1.18498066702429587167e+04,
6404         -4.84385124285750353010e+04,
6405     }, qs8[6] = {
6406         1.61395369700722909556e+02,
6407         7.82538599923348465381e+03,
6408         1.33875336287249578163e+05,
6409         7.19657723683240939863e+05,
6410         6.66601232617776375264e+05,
6411         -2.94490264303834643215e+05,
6412     }, qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6413         -2.08979931141764104297e-11,
6414         -1.02539050241375426231e-01,
6415         -8.05644828123936029840e+00,
6416         -1.83669607474888380239e+02,
6417         -1.37319376065508163265e+03,
6418         -2.61244440453215656817e+03,
6419     }, qs5[6] = {
6420         8.12765501384335777857e+01,
6421         1.99179873460485964642e+03,
6422         1.74684851924908907677e+04,
6423         4.98514270910352279316e+04,
6424         2.79480751638918118260e+04,
6425         -4.71918354795128470869e+03,
6426     }, qr3[6] = {
6427         -5.07831226461766561369e-09,
6428         -1.02537829820837089745e-01,
6429         -4.61011581139473403113e+00,
6430         -5.78472216562783643212e+01,
6431         -2.28244540737631695038e+02,
6432         -2.19210128478909325622e+02,
6433     }, qs3[6] = {
6434         4.76651550323729509273e+01,
6435         6.73865112676699709482e+02,
6436         3.38015286679526343505e+03,
6437         5.54772909720722782367e+03,
6438         1.90311919338810798763e+03,
6439         -1.35201191444307340817e+02,
6440     }, qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6441         -1.78381727510958865572e-07,
6442         -1.02517042607985553460e-01,
6443         -2.75220568278187460720e+00,
6444         -1.96636162643703720221e+01,
6445         -4.23253133372830490089e+01,
6446         -2.13719211703704061733e+01,
6447     }, qs2[6] = {
6448         2.95333629060523854548e+01,
6449         2.52981549982190529136e+02,
6450         7.57502834868645436472e+02,
6451         7.39393205320467245656e+02,
6452         1.55949003336666123687e+02,
6453         -4.95949898822628210127e+00,
6454     };
6455
6456     const double *p, *q;
6457     double s, r, z;
6458     unsigned int ix;
6459
6460     ix = *(ULONGLONG*)&x >> 32;
6461     ix &= 0x7fffffff;
6462     if (ix >= 0x40200000) {
6463         p = qr8;
6464         q = qs8;
6465     } else if (ix >= 0x40122E8B) {
6466         p = qr5;
6467         q = qs5;
6468     } else if (ix >= 0x4006DB6D) {
6469         p = qr3;
6470         q = qs3;
6471     } else /*ix >= 0x40000000*/ {
6472         p = qr2;
6473         q = qs2;
6474     }
6475     z = 1.0 / (x * x);
6476     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6477     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6478     return (0.375 + r / s) / x;
6479 }
6480
6481 static double j1_y1_approx(unsigned int ix, double x, BOOL y1, int sign)
6482 {
6483     static const double invsqrtpi = 5.64189583547756279280e-01;
6484
6485     double z, s, c, ss, cc;
6486
6487     s = sin(x);
6488     if (y1) s = -s;
6489     c = cos(x);
6490     cc = s - c;
6491     if (ix < 0x7fe00000) {
6492         ss = -s - c;
6493         z = cos(2 * x);
6494         if (s * c > 0) cc = z / ss;
6495         else ss = z / cc;
6496         if (ix < 0x48000000) {
6497             if (y1)
6498                 ss = -ss;
6499             cc = pone(x) * cc - qone(x) * ss;
6500         }
6501     }
6502     if (sign)
6503         cc = -cc;
6504     return invsqrtpi * cc / sqrt(x);
6505 }
6506
6507 /*********************************************************************
6508  *              _j1 (MSVCRT.@)
6509  *
6510  * Copied from musl: src/math/j1.c
6511  */
6512 double CDECL _j1(double x)
6513 {
6514     static const double r00 = -6.25000000000000000000e-02,
6515         r01 =  1.40705666955189706048e-03,
6516         r02 = -1.59955631084035597520e-05,
6517         r03 =  4.96727999609584448412e-08,
6518         s01 =  1.91537599538363460805e-02,
6519         s02 =  1.85946785588630915560e-04,
6520         s03 =  1.17718464042623683263e-06,
6521         s04 =  5.04636257076217042715e-09,
6522         s05 =  1.23542274426137913908e-11;
6523
6524     double z, r, s;
6525     unsigned int ix;
6526     int sign;
6527
6528     ix = *(ULONGLONG*)&x >> 32;
6529     sign = ix >> 31;
6530     ix &= 0x7fffffff;
6531     if (ix >= 0x7ff00000)
6532         return math_error(isnan(x) ? 0 : _DOMAIN, "_j1", x, 0, 1 / (x * x));
6533     if (ix >= 0x40000000)  /* |x| >= 2 */
6534         return j1_y1_approx(ix, fabs(x), FALSE, sign);
6535     if (ix >= 0x38000000) {  /* |x| >= 2**-127 */
6536         z = x * x;
6537         r = z * (r00 + z * (r01 + z * (r02 + z * r03)));
6538         s = 1 + z * (s01 + z * (s02 + z * (s03 + z * (s04 + z * s05))));
6539         z = r / s;
6540     } else {
6541         /* avoid underflow, raise inexact if x!=0 */
6542         z = x;
6543     }
6544     return (0.5 + z) * x;
6545 }
6546
6547 /*********************************************************************
6548  *              _jn (MSVCRT.@)
6549  *
6550  * Copied from musl: src/math/jn.c
6551  */
6552 double CDECL _jn(int n, double x)
6553 {
6554     static const double invsqrtpi = 5.64189583547756279280e-01;
6555
6556     unsigned int ix, lx;
6557     int nm1, i, sign;
6558     double a, b, temp;
6559
6560     ix = *(ULONGLONG*)&x >> 32;
6561     lx = *(ULONGLONG*)&x;
6562     sign = ix >> 31;
6563     ix &= 0x7fffffff;
6564
6565     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6566         return x;
6567
6568     if (n == 0)
6569         return _j0(x);
6570     if (n < 0) {
6571         nm1 = -(n + 1);
6572         x = -x;
6573         sign ^= 1;
6574     } else {
6575         nm1 = n-1;
6576     }
6577     if (nm1 == 0)
6578         return j1(x);
6579
6580     sign &= n;  /* even n: 0, odd n: signbit(x) */
6581     x = fabs(x);
6582     if ((ix | lx) == 0 || ix == 0x7ff00000)  /* if x is 0 or inf */
6583         b = 0.0;
6584     else if (nm1 < x) {
6585         if (ix >= 0x52d00000) { /* x > 2**302 */
6586             switch(nm1 & 3) {
6587             case 0:
6588                 temp = -cos(x) + sin(x);
6589                 break;
6590             case 1:
6591                 temp = -cos(x) - sin(x);
6592                 break;
6593             case 2:
6594                 temp =  cos(x) - sin(x);
6595                 break;
6596             default:
6597                 temp =  cos(x) + sin(x);
6598                 break;
6599             }
6600             b = invsqrtpi * temp / sqrt(x);
6601         } else {
6602             a = _j0(x);
6603             b = _j1(x);
6604             for (i = 0; i < nm1; ) {
6605                 i++;
6606                 temp = b;
6607                 b = b * (2.0 * i / x) - a; /* avoid underflow */
6608                 a = temp;
6609             }
6610         }
6611     } else {
6612         if (ix < 0x3e100000) { /* x < 2**-29 */
6613             if (nm1 > 32)  /* underflow */
6614                 b = 0.0;
6615             else {
6616                 temp = x * 0.5;
6617                 b = temp;
6618                 a = 1.0;
6619                 for (i = 2; i <= nm1 + 1; i++) {
6620                     a *= (double)i; /* a = n! */
6621                     b *= temp;      /* b = (x/2)^n */
6622                 }
6623                 b = b / a;
6624             }
6625         } else {
6626             double t, q0, q1, w, h, z, tmp, nf;
6627             int k;
6628
6629             nf = nm1 + 1.0;
6630             w = 2 * nf / x;
6631             h = 2 / x;
6632             z = w + h;
6633             q0 = w;
6634             q1 = w * z - 1.0;
6635             k = 1;
6636             while (q1 < 1.0e9) {
6637                 k += 1;
6638                 z += h;
6639                 tmp = z * q1 - q0;
6640                 q0 = q1;
6641                 q1 = tmp;
6642             }
6643             for (t = 0.0, i = k; i >= 0; i--)
6644                 t = 1 / (2 * (i + nf) / x - t);
6645             a = t;
6646             b = 1.0;
6647             tmp = nf * log(fabs(w));
6648             if (tmp < 7.09782712893383973096e+02) {
6649                 for (i = nm1; i > 0; i--) {
6650                     temp = b;
6651                     b = b * (2.0 * i) / x - a;
6652                     a = temp;
6653                 }
6654             } else {
6655                 for (i = nm1; i > 0; i--) {
6656                     temp = b;
6657                     b = b * (2.0 * i) / x - a;
6658                     a = temp;
6659                     /* scale b to avoid spurious overflow */
6660                     if (b > 0x1p500) {
6661                         a /= b;
6662                         t /= b;
6663                         b  = 1.0;
6664                     }
6665                 }
6666             }
6667             z = j0(x);
6668             w = j1(x);
6669             if (fabs(z) >= fabs(w))
6670                 b = t * z / b;
6671             else
6672                 b = t * w / a;
6673         }
6674     }
6675     return sign ? -b : b;
6676 }
6677
6678 /*********************************************************************
6679  *              _y0 (MSVCRT.@)
6680  */
6681 double CDECL _y0(double x)
6682 {
6683     static const double tpi = 6.36619772367581382433e-01,
6684         u00  = -7.38042951086872317523e-02,
6685         u01  =  1.76666452509181115538e-01,
6686         u02  = -1.38185671945596898896e-02,
6687         u03  =  3.47453432093683650238e-04,
6688         u04  = -3.81407053724364161125e-06,
6689         u05  =  1.95590137035022920206e-08,
6690         u06  = -3.98205194132103398453e-11,
6691         v01  =  1.27304834834123699328e-02,
6692         v02  =  7.60068627350353253702e-05,
6693         v03  =  2.59150851840457805467e-07,
6694         v04  =  4.41110311332675467403e-10;
6695
6696     double z, u, v;
6697     unsigned int ix, lx;
6698
6699     ix = *(ULONGLONG*)&x >> 32;
6700     lx = *(ULONGLONG*)&x;
6701
6702     /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
6703     if ((ix << 1 | lx) == 0)
6704         return math_error(_OVERFLOW, "_y0", x, 0, -INFINITY);
6705     if (isnan(x))
6706         return x;
6707     if (ix >> 31)
6708         return math_error(_DOMAIN, "_y0", x, 0, 0 / (x - x));
6709     if (ix >= 0x7ff00000)
6710         return 1 / x;
6711
6712     if (ix >= 0x40000000) {  /* x >= 2 */
6713         /* large ulp errors near zeros: 3.958, 7.086,.. */
6714         return j0_y0_approx(ix, x, TRUE);
6715     }
6716
6717     if (ix >= 0x3e400000) {  /* x >= 2**-27 */
6718         /* large ulp error near the first zero, x ~= 0.89 */
6719         z = x * x;
6720         u = u00 + z * (u01 + z * (u02 + z * (u03 + z * (u04 + z * (u05 + z * u06)))));
6721         v = 1.0 + z * (v01 + z * (v02 + z * (v03 + z * v04)));
6722         return u / v + tpi * (j0(x) * log(x));
6723     }
6724     return u00 + tpi * log(x);
6725 }
6726
6727 /*********************************************************************
6728  *              _y1 (MSVCRT.@)
6729  */
6730 double CDECL _y1(double x)
6731 {
6732     static const double tpi = 6.36619772367581382433e-01,
6733         u00 =  -1.96057090646238940668e-01,
6734         u01 = 5.04438716639811282616e-02,
6735         u02 = -1.91256895875763547298e-03,
6736         u03 = 2.35252600561610495928e-05,
6737         u04 = -9.19099158039878874504e-08,
6738         v00 = 1.99167318236649903973e-02,
6739         v01 = 2.02552581025135171496e-04,
6740         v02 = 1.35608801097516229404e-06,
6741         v03 = 6.22741452364621501295e-09,
6742         v04 = 1.66559246207992079114e-11;
6743
6744     double z, u, v;
6745     unsigned int ix, lx;
6746
6747     ix = *(ULONGLONG*)&x >> 32;
6748     lx = *(ULONGLONG*)&x;
6749
6750     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
6751     if ((ix << 1 | lx) == 0)
6752         return math_error(_OVERFLOW, "_y1", x, 0, -INFINITY);
6753     if (isnan(x))
6754         return x;
6755     if (ix >> 31)
6756         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6757     if (ix >= 0x7ff00000)
6758         return 1 / x;
6759
6760     if (ix >= 0x40000000)  /* x >= 2 */
6761         return j1_y1_approx(ix, x, TRUE, 0);
6762     if (ix < 0x3c900000)  /* x < 2**-54 */
6763         return -tpi / x;
6764     z = x * x;
6765     u = u00 + z * (u01 + z * (u02 + z * (u03 + z * u04)));
6766     v = 1 + z * (v00 + z * (v01 + z * (v02 + z * (v03 + z * v04))));
6767     return x * (u / v) + tpi * (j1(x) * log(x) - 1 / x);
6768 }
6769
6770 /*********************************************************************
6771  *              _yn (MSVCRT.@)
6772  *
6773  * Copied from musl: src/math/jn.c
6774  */
6775 double CDECL _yn(int n, double x)
6776 {
6777     static const double invsqrtpi = 5.64189583547756279280e-01;
6778
6779     unsigned int ix, lx, ib;
6780     int nm1, sign, i;
6781     double a, b, temp;
6782
6783     ix = *(ULONGLONG*)&x >> 32;
6784     lx = *(ULONGLONG*)&x;
6785     sign = ix >> 31;
6786     ix &= 0x7fffffff;
6787
6788     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6789         return x;
6790     if (sign && (ix | lx) != 0) /* x < 0 */
6791         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6792     if (ix == 0x7ff00000)
6793         return 0.0;
6794
6795     if (n == 0)
6796         return y0(x);
6797     if (n < 0) {
6798         nm1 = -(n + 1);
6799         sign = n & 1;
6800     } else {
6801         nm1 = n - 1;
6802         sign = 0;
6803     }
6804     if (nm1 == 0)
6805         return sign ? -y1(x) : y1(x);
6806
6807     if (ix >= 0x52d00000) { /* x > 2**302 */
6808         switch(nm1 & 3) {
6809         case 0:
6810             temp = -sin(x) - cos(x);
6811             break;
6812         case 1:
6813             temp = -sin(x) + cos(x);
6814             break;
6815         case 2:
6816             temp = sin(x) + cos(x);
6817             break;
6818         default:
6819             temp = sin(x) - cos(x);
6820             break;
6821         }
6822         b = invsqrtpi * temp / sqrt(x);
6823     } else {
6824         a = y0(x);
6825         b = y1(x);
6826         /* quit if b is -inf */
6827         ib = *(ULONGLONG*)&b >> 32;
6828         for (i = 0; i < nm1 && ib != 0xfff00000;) {
6829             i++;
6830             temp = b;
6831             b = (2.0 * i / x) * b - a;
6832             ib = *(ULONGLONG*)&b >> 32;
6833             a = temp;
6834         }
6835     }
6836     return sign ? -b : b;
6837 }
6838
6839 #if _MSVCR_VER>=120
6840
6841 /*********************************************************************
6842  *              _nearbyint (MSVCR120.@)
6843  *
6844  * Based on musl: src/math/nearbyteint.c
6845  */
6846 double CDECL nearbyint(double x)
6847 {
6848     fenv_t env;
6849
6850     fegetenv(&env);
6851     _control87(_MCW_EM, _MCW_EM);
6852     x = rint(x);
6853     feclearexcept(FE_INEXACT);
6854     feupdateenv(&env);
6855     return x;
6856 }
6857
6858 /*********************************************************************
6859  *              _nearbyintf (MSVCR120.@)
6860  *
6861  * Based on musl: src/math/nearbyteintf.c
6862  */
6863 float CDECL nearbyintf(float x)
6864 {
6865     fenv_t env;
6866
6867     fegetenv(&env);
6868     _control87(_MCW_EM, _MCW_EM);
6869     x = rintf(x);
6870     feclearexcept(FE_INEXACT);
6871     feupdateenv(&env);
6872     return x;
6873 }
6874
6875 /*********************************************************************
6876  *              nexttoward (MSVCR120.@)
6877  */
6878 double CDECL MSVCRT_nexttoward(double num, double next)
6879 {
6880     return _nextafter(num, next);
6881 }
6882
6883 /*********************************************************************
6884  *              nexttowardf (MSVCR120.@)
6885  *
6886  * Copied from musl: src/math/nexttowardf.c
6887  */
6888 float CDECL MSVCRT_nexttowardf(float x, double y)
6889 {
6890     unsigned int ix = *(unsigned int*)&x;
6891     unsigned int e;
6892     float ret;
6893
6894     if (isnan(x) || isnan(y))
6895         return x + y;
6896     if (x == y)
6897         return y;
6898     if (x == 0) {
6899         ix = 1;
6900         if (signbit(y))
6901             ix |= 0x80000000;
6902     } else if (x < y) {
6903         if (signbit(x))
6904             ix--;
6905         else
6906             ix++;
6907     } else {
6908         if (signbit(x))
6909             ix++;
6910         else
6911             ix--;
6912     }
6913     e = ix & 0x7f800000;
6914     /* raise overflow if ix is infinite and x is finite */
6915     if (e == 0x7f800000) {
6916         fp_barrierf(x + x);
6917         *_errno() = ERANGE;
6918     }
6919     ret = *(float*)&ix;
6920     /* raise underflow if ret is subnormal or zero */
6921     if (e == 0) {
6922         fp_barrierf(x * x + ret * ret);
6923         *_errno() = ERANGE;
6924     }
6925     return ret;
6926 }
6927
6928 #endif /* _MSVCR_VER>=120 */
6929
6930 /*********************************************************************
6931  *              _nextafter (MSVCRT.@)
6932  *
6933  * Copied from musl: src/math/nextafter.c
6934  */
6935 double CDECL _nextafter(double x, double y)
6936 {
6937     ULONGLONG llx = *(ULONGLONG*)&x;
6938     ULONGLONG lly = *(ULONGLONG*)&y;
6939     ULONGLONG ax, ay;
6940     int e;
6941
6942     if (isnan(x) || isnan(y))
6943         return x + y;
6944     if (llx == lly) {
6945         if (_fpclass(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
6946             *_errno() = ERANGE;
6947         return y;
6948     }
6949     ax = llx & -1ULL / 2;
6950     ay = lly & -1ULL / 2;
6951     if (ax == 0) {
6952         if (ay == 0)
6953             return y;
6954         llx = (lly & 1ULL << 63) | 1;
6955     } else if (ax > ay || ((llx ^ lly) & 1ULL << 63))
6956         llx--;
6957     else
6958         llx++;
6959     e = llx >> 52 & 0x7ff;
6960     /* raise overflow if llx is infinite and x is finite */
6961     if (e == 0x7ff) {
6962         fp_barrier(x + x);
6963         *_errno() = ERANGE;
6964     }
6965     /* raise underflow if llx is subnormal or zero */
6966     y = *(double*)&llx;
6967     if (e == 0) {
6968         fp_barrier(x * x + y * y);
6969         *_errno() = ERANGE;
6970     }
6971     return y;
6972 }
6973
6974 /*********************************************************************
6975  *              _ecvt (MSVCRT.@)
6976  */
6977 char * CDECL _ecvt( double number, int ndigits, int *decpt, int *sign )
6978 {
6979     int prec, len;
6980     thread_data_t *data = msvcrt_get_thread_data();
6981     /* FIXME: check better for overflow (native supports over 300 chars) */
6982     ndigits = min( ndigits, 80 - 8); /* 8 : space for sign, dec point, "e",
6983                                       * 4 for exponent and one for
6984                                       * terminating '\0' */
6985     if (!data->efcvt_buffer)
6986         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
6987
6988     /* handle cases with zero ndigits or less */
6989     prec = ndigits;
6990     if( prec < 1) prec = 2;
6991     len = _snprintf(data->efcvt_buffer, 80, "%.*le", prec - 1, number);
6992
6993     if (data->efcvt_buffer[0] == '-') {
6994         memmove( data->efcvt_buffer, data->efcvt_buffer + 1, len-- );
6995         *sign = 1;
6996     } else *sign = 0;
6997
6998     /* take the decimal "point away */
6999     if( prec != 1)
7000         memmove( data->efcvt_buffer + 1, data->efcvt_buffer + 2, len - 1 );
7001     /* take the exponential "e" out */
7002     data->efcvt_buffer[ prec] = '\0';
7003     /* read the exponent */
7004     sscanf( data->efcvt_buffer + prec + 1, "%d", decpt);
7005     (*decpt)++;
7006     /* adjust for some border cases */
7007     if( data->efcvt_buffer[0] == '0')/* value is zero */
7008         *decpt = 0;
7009     /* handle cases with zero ndigits or less */
7010     if( ndigits < 1){
7011         if( data->efcvt_buffer[ 0] >= '5')
7012             (*decpt)++;
7013         data->efcvt_buffer[ 0] = '\0';
7014     }
7015     TRACE("out=\"%s\"\n",data->efcvt_buffer);
7016     return data->efcvt_buffer;
7017 }
7018
7019 /*********************************************************************
7020  *              _ecvt_s (MSVCRT.@)
7021  */
7022 int CDECL _ecvt_s( char *buffer, size_t length, double number, int ndigits, int *decpt, int *sign )
7023 {
7024     int prec, len;
7025     char *result;
7026
7027     if (!MSVCRT_CHECK_PMT(buffer != NULL)) return EINVAL;
7028     if (!MSVCRT_CHECK_PMT(decpt != NULL)) return EINVAL;
7029     if (!MSVCRT_CHECK_PMT(sign != NULL)) return EINVAL;
7030     if (!MSVCRT_CHECK_PMT_ERR( length > 2, ERANGE )) return ERANGE;
7031     if (!MSVCRT_CHECK_PMT_ERR(ndigits < (int)length - 1, ERANGE )) return ERANGE;
7032
7033     /* handle cases with zero ndigits or less */
7034     prec = ndigits;
7035     if( prec < 1) prec = 2;
7036     result = malloc(prec + 8);
7037
7038     len = _snprintf(result, prec + 8, "%.*le", prec - 1, number);
7039     if (result[0] == '-') {
7040         memmove( result, result + 1, len-- );
7041         *sign = 1;
7042     } else *sign = 0;
7043
7044     /* take the decimal "point away */
7045     if( prec != 1)
7046         memmove( result + 1, result + 2, len - 1 );
7047     /* take the exponential "e" out */
7048     result[ prec] = '\0';
7049     /* read the exponent */
7050     sscanf( result + prec + 1, "%d", decpt);
7051     (*decpt)++;
7052     /* adjust for some border cases */
7053     if( result[0] == '0')/* value is zero */
7054         *decpt = 0;
7055     /* handle cases with zero ndigits or less */
7056     if( ndigits < 1){
7057         if( result[ 0] >= '5')
7058             (*decpt)++;
7059         result[ 0] = '\0';
7060     }
7061     memcpy( buffer, result, max(ndigits + 1, 1) );
7062     free( result );
7063     return 0;
7064 }
7065
7066 /***********************************************************************
7067  *              _fcvt  (MSVCRT.@)
7068  */
7069 char * CDECL _fcvt( double number, int ndigits, int *decpt, int *sign )
7070 {
7071     thread_data_t *data = msvcrt_get_thread_data();
7072     int stop, dec1, dec2;
7073     char *ptr1, *ptr2, *first;
7074     char buf[80]; /* ought to be enough */
7075     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7076
7077     if (!data->efcvt_buffer)
7078         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
7079
7080     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7081     ptr1 = buf;
7082     ptr2 = data->efcvt_buffer;
7083     first = NULL;
7084     dec1 = 0;
7085     dec2 = 0;
7086
7087     if (*ptr1 == '-') {
7088         *sign = 1;
7089         ptr1++;
7090     } else *sign = 0;
7091
7092     /* For numbers below the requested resolution, work out where
7093        the decimal point will be rather than finding it in the string */
7094     if (number < 1.0 && number > 0.0) {
7095         dec2 = log10(number + 1e-10);
7096         if (-dec2 <= ndigits) dec2 = 0;
7097     }
7098
7099     /* If requested digits is zero or less, we will need to truncate
7100      * the returned string */
7101     if (ndigits < 1) {
7102         stop += ndigits;
7103     }
7104
7105     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7106     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7107         if (!first) first = ptr2;
7108         if ((ptr1 - buf) < stop) {
7109             *ptr2++ = *ptr1++;
7110         } else {
7111             ptr1++;
7112         }
7113         dec1++;
7114     }
7115
7116     if (ndigits > 0) {
7117         ptr1++;
7118         if (!first) {
7119             while (*ptr1 == '0') { /* Process leading zeroes */
7120                 *ptr2++ = *ptr1++;
7121                 dec1--;
7122             }
7123         }
7124         while (*ptr1 != '\0') {
7125             if (!first) first = ptr2;
7126             *ptr2++ = *ptr1++;
7127         }
7128     }
7129
7130     *ptr2 = '\0';
7131
7132     /* We never found a non-zero digit, then our number is either
7133      * smaller than the requested precision, or 0.0 */
7134     if (!first) {
7135         if (number > 0.0) {
7136             first = ptr2;
7137         } else {
7138             first = data->efcvt_buffer;
7139             dec1 = 0;
7140         }
7141     }
7142
7143     *decpt = dec2 ? dec2 : dec1;
7144     return first;
7145 }
7146
7147 /***********************************************************************
7148  *              _fcvt_s  (MSVCRT.@)
7149  */
7150 int CDECL _fcvt_s(char* outbuffer, size_t size, double number, int ndigits, int *decpt, int *sign)
7151 {
7152     int stop, dec1, dec2;
7153     char *ptr1, *ptr2, *first;
7154     char buf[80]; /* ought to be enough */
7155     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7156
7157     if (!outbuffer || !decpt || !sign || size == 0)
7158     {
7159         *_errno() = EINVAL;
7160         return EINVAL;
7161     }
7162
7163     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7164     ptr1 = buf;
7165     ptr2 = outbuffer;
7166     first = NULL;
7167     dec1 = 0;
7168     dec2 = 0;
7169
7170     if (*ptr1 == '-') {
7171         *sign = 1;
7172         ptr1++;
7173     } else *sign = 0;
7174
7175     /* For numbers below the requested resolution, work out where
7176        the decimal point will be rather than finding it in the string */
7177     if (number < 1.0 && number > 0.0) {
7178         dec2 = log10(number + 1e-10);
7179         if (-dec2 <= ndigits) dec2 = 0;
7180     }
7181
7182     /* If requested digits is zero or less, we will need to truncate
7183      * the returned string */
7184     if (ndigits < 1) {
7185         stop += ndigits;
7186     }
7187
7188     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7189     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7190         if (!first) first = ptr2;
7191         if ((ptr1 - buf) < stop) {
7192             if (size > 1) {
7193                 *ptr2++ = *ptr1++;
7194                 size--;
7195             }
7196         } else {
7197             ptr1++;
7198         }
7199         dec1++;
7200     }
7201
7202     if (ndigits > 0) {
7203         ptr1++;
7204         if (!first) {
7205             while (*ptr1 == '0') { /* Process leading zeroes */
7206                 if (number == 0.0 && size > 1) {
7207                     *ptr2++ = '0';
7208                     size--;
7209                 }
7210                 ptr1++;
7211                 dec1--;
7212             }
7213         }
7214         while (*ptr1 != '\0') {
7215             if (!first) first = ptr2;
7216             if (size > 1) {
7217                 *ptr2++ = *ptr1++;
7218                 size--;
7219             }
7220         }
7221     }
7222
7223     *ptr2 = '\0';
7224
7225     /* We never found a non-zero digit, then our number is either
7226      * smaller than the requested precision, or 0.0 */
7227     if (!first && (number <= 0.0))
7228         dec1 = 0;
7229
7230     *decpt = dec2 ? dec2 : dec1;
7231     return 0;
7232 }
7233
7234 /***********************************************************************
7235  *              _gcvt  (MSVCRT.@)
7236  */
7237 char * CDECL _gcvt( double number, int ndigit, char *buff )
7238 {
7239     if(!buff) {
7240         *_errno() = EINVAL;
7241         return NULL;
7242     }
7243
7244     if(ndigit < 0) {
7245         *_errno() = ERANGE;
7246         return NULL;
7247     }
7248
7249     sprintf(buff, "%.*g", ndigit, number);
7250     return buff;
7251 }
7252
7253 /***********************************************************************
7254  *              _gcvt_s  (MSVCRT.@)
7255  */
7256 int CDECL _gcvt_s(char *buff, size_t size, double number, int digits)
7257 {
7258     int len;
7259
7260     if(!buff) {
7261         *_errno() = EINVAL;
7262         return EINVAL;
7263     }
7264
7265     if( digits<0 || digits>=size) {
7266         if(size)
7267             buff[0] = '\0';
7268
7269         *_errno() = ERANGE;
7270         return ERANGE;
7271     }
7272
7273     len = _scprintf("%.*g", digits, number);
7274     if(len > size) {
7275         buff[0] = '\0';
7276         *_errno() = ERANGE;
7277         return ERANGE;
7278     }
7279
7280     sprintf(buff, "%.*g", digits, number);
7281     return 0;
7282 }
7283
7284 #include <stdlib.h> /* div_t, ldiv_t */
7285
7286 /*********************************************************************
7287  *              div (MSVCRT.@)
7288  * VERSION
7289  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7290  */
7291 #ifdef __i386__
7292 unsigned __int64 CDECL div(int num, int denom)
7293 {
7294     union {
7295         div_t div;
7296         unsigned __int64 uint64;
7297     } ret;
7298
7299     ret.div.quot = num / denom;
7300     ret.div.rem = num % denom;
7301     return ret.uint64;
7302 }
7303 #else
7304 /*********************************************************************
7305  *              div (MSVCRT.@)
7306  * VERSION
7307  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7308  */
7309 div_t CDECL div(int num, int denom)
7310 {
7311     div_t ret;
7312
7313     ret.quot = num / denom;
7314     ret.rem = num % denom;
7315     return ret;
7316 }
7317 #endif /* ifdef __i386__ */
7318
7319
7320 /*********************************************************************
7321  *              ldiv (MSVCRT.@)
7322  * VERSION
7323  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7324  */
7325 #ifdef __i386__
7326 unsigned __int64 CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7327 {
7328     union {
7329         ldiv_t ldiv;
7330         unsigned __int64 uint64;
7331     } ret;
7332
7333     ret.ldiv.quot = num / denom;
7334     ret.ldiv.rem = num % denom;
7335     return ret.uint64;
7336 }
7337 #else
7338 /*********************************************************************
7339  *              ldiv (MSVCRT.@)
7340  * VERSION
7341  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7342  */
7343 ldiv_t CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7344 {
7345     ldiv_t ret;
7346
7347     ret.quot = num / denom;
7348     ret.rem = num % denom;
7349     return ret;
7350 }
7351 #endif /* ifdef __i386__ */
7352
7353 #if _MSVCR_VER>=100
7354 /*********************************************************************
7355  *              lldiv (MSVCR100.@)
7356  */
7357 lldiv_t CDECL lldiv(__int64 num, __int64 denom)
7358 {
7359   lldiv_t ret;
7360
7361   ret.quot = num / denom;
7362   ret.rem = num % denom;
7363
7364   return ret;
7365 }
7366 #endif
7367
7368 #ifdef __i386__
7369
7370 /*********************************************************************
7371  *              _adjust_fdiv (MSVCRT.@)
7372  * Used by the MSVC compiler to work around the Pentium FDIV bug.
7373  */
7374 int MSVCRT__adjust_fdiv = 0;
7375
7376 /***********************************************************************
7377  *              _adj_fdiv_m16i (MSVCRT.@)
7378  *
7379  * NOTE
7380  *    I _think_ this function is intended to work around the Pentium
7381  *    fdiv bug.
7382  */
7383 void __stdcall _adj_fdiv_m16i( short arg )
7384 {
7385   TRACE("(): stub\n");
7386 }
7387
7388 /***********************************************************************
7389  *              _adj_fdiv_m32 (MSVCRT.@)
7390  *
7391  * NOTE
7392  *    I _think_ this function is intended to work around the Pentium
7393  *    fdiv bug.
7394  */
7395 void __stdcall _adj_fdiv_m32( unsigned int arg )
7396 {
7397   TRACE("(): stub\n");
7398 }
7399
7400 /***********************************************************************
7401  *              _adj_fdiv_m32i (MSVCRT.@)
7402  *
7403  * NOTE
7404  *    I _think_ this function is intended to work around the Pentium
7405  *    fdiv bug.
7406  */
7407 void __stdcall _adj_fdiv_m32i( int arg )
7408 {
7409   TRACE("(): stub\n");
7410 }
7411
7412 /***********************************************************************
7413  *              _adj_fdiv_m64 (MSVCRT.@)
7414  *
7415  * NOTE
7416  *    I _think_ this function is intended to work around the Pentium
7417  *    fdiv bug.
7418  */
7419 void __stdcall _adj_fdiv_m64( unsigned __int64 arg )
7420 {
7421   TRACE("(): stub\n");
7422 }
7423
7424 /***********************************************************************
7425  *              _adj_fdiv_r (MSVCRT.@)
7426  * FIXME
7427  *    This function is likely to have the wrong number of arguments.
7428  *
7429  * NOTE
7430  *    I _think_ this function is intended to work around the Pentium
7431  *    fdiv bug.
7432  */
7433 void _adj_fdiv_r(void)
7434 {
7435   TRACE("(): stub\n");
7436 }
7437
7438 /***********************************************************************
7439  *              _adj_fdivr_m16i (MSVCRT.@)
7440  *
7441  * NOTE
7442  *    I _think_ this function is intended to work around the Pentium
7443  *    fdiv bug.
7444  */
7445 void __stdcall _adj_fdivr_m16i( short arg )
7446 {
7447   TRACE("(): stub\n");
7448 }
7449
7450 /***********************************************************************
7451  *              _adj_fdivr_m32 (MSVCRT.@)
7452  *
7453  * NOTE
7454  *    I _think_ this function is intended to work around the Pentium
7455  *    fdiv bug.
7456  */
7457 void __stdcall _adj_fdivr_m32( unsigned int arg )
7458 {
7459   TRACE("(): stub\n");
7460 }
7461
7462 /***********************************************************************
7463  *              _adj_fdivr_m32i (MSVCRT.@)
7464  *
7465  * NOTE
7466  *    I _think_ this function is intended to work around the Pentium
7467  *    fdiv bug.
7468  */
7469 void __stdcall _adj_fdivr_m32i( int arg )
7470 {
7471   TRACE("(): stub\n");
7472 }
7473
7474 /***********************************************************************
7475  *              _adj_fdivr_m64 (MSVCRT.@)
7476  *
7477  * NOTE
7478  *    I _think_ this function is intended to work around the Pentium
7479  *    fdiv bug.
7480  */
7481 void __stdcall _adj_fdivr_m64( unsigned __int64 arg )
7482 {
7483   TRACE("(): stub\n");
7484 }
7485
7486 /***********************************************************************
7487  *              _adj_fpatan (MSVCRT.@)
7488  * FIXME
7489  *    This function is likely to have the wrong number of arguments.
7490  *
7491  * NOTE
7492  *    I _think_ this function is intended to work around the Pentium
7493  *    fdiv bug.
7494  */
7495 void _adj_fpatan(void)
7496 {
7497   TRACE("(): stub\n");
7498 }
7499
7500 /***********************************************************************
7501  *              _adj_fprem (MSVCRT.@)
7502  * FIXME
7503  *    This function is likely to have the wrong number of arguments.
7504  *
7505  * NOTE
7506  *    I _think_ this function is intended to work around the Pentium
7507  *    fdiv bug.
7508  */
7509 void _adj_fprem(void)
7510 {
7511   TRACE("(): stub\n");
7512 }
7513
7514 /***********************************************************************
7515  *              _adj_fprem1 (MSVCRT.@)
7516  * FIXME
7517  *    This function is likely to have the wrong number of arguments.
7518  *
7519  * NOTE
7520  *    I _think_ this function is intended to work around the Pentium
7521  *    fdiv bug.
7522  */
7523 void _adj_fprem1(void)
7524 {
7525   TRACE("(): stub\n");
7526 }
7527
7528 /***********************************************************************
7529  *              _adj_fptan (MSVCRT.@)
7530  * FIXME
7531  *    This function is likely to have the wrong number of arguments.
7532  *
7533  * NOTE
7534  *    I _think_ this function is intended to work around the Pentium
7535  *    fdiv bug.
7536  */
7537 void _adj_fptan(void)
7538 {
7539   TRACE("(): stub\n");
7540 }
7541
7542 /***********************************************************************
7543  *              _safe_fdiv (MSVCRT.@)
7544  * FIXME
7545  *    This function is likely to have the wrong number of arguments.
7546  *
7547  * NOTE
7548  *    I _think_ this function is intended to work around the Pentium
7549  *    fdiv bug.
7550  */
7551 void _safe_fdiv(void)
7552 {
7553   TRACE("(): stub\n");
7554 }
7555
7556 /***********************************************************************
7557  *              _safe_fdivr (MSVCRT.@)
7558  * FIXME
7559  *    This function is likely to have the wrong number of arguments.
7560  *
7561  * NOTE
7562  *    I _think_ this function is intended to work around the Pentium
7563  *    fdiv bug.
7564  */
7565 void _safe_fdivr(void)
7566 {
7567   TRACE("(): stub\n");
7568 }
7569
7570 /***********************************************************************
7571  *              _safe_fprem (MSVCRT.@)
7572  * FIXME
7573  *    This function is likely to have the wrong number of arguments.
7574  *
7575  * NOTE
7576  *    I _think_ this function is intended to work around the Pentium
7577  *    fdiv bug.
7578  */
7579 void _safe_fprem(void)
7580 {
7581   TRACE("(): stub\n");
7582 }
7583
7584 /***********************************************************************
7585  *              _safe_fprem1 (MSVCRT.@)
7586  *
7587  * FIXME
7588  *    This function is likely to have the wrong number of arguments.
7589  *
7590  * NOTE
7591  *    I _think_ this function is intended to work around the Pentium
7592  *    fdiv bug.
7593  */
7594 void _safe_fprem1(void)
7595 {
7596   TRACE("(): stub\n");
7597 }
7598
7599 /***********************************************************************
7600  *              __libm_sse2_acos   (MSVCRT.@)
7601  */
7602 void __cdecl __libm_sse2_acos(void)
7603 {
7604     double d;
7605     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7606     d = acos( d );
7607     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7608 }
7609
7610 /***********************************************************************
7611  *              __libm_sse2_acosf   (MSVCRT.@)
7612  */
7613 void __cdecl __libm_sse2_acosf(void)
7614 {
7615     float f;
7616     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7617     f = acosf( f );
7618     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7619 }
7620
7621 /***********************************************************************
7622  *              __libm_sse2_asin   (MSVCRT.@)
7623  */
7624 void __cdecl __libm_sse2_asin(void)
7625 {
7626     double d;
7627     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7628     d = asin( d );
7629     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7630 }
7631
7632 /***********************************************************************
7633  *              __libm_sse2_asinf   (MSVCRT.@)
7634  */
7635 void __cdecl __libm_sse2_asinf(void)
7636 {
7637     float f;
7638     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7639     f = asinf( f );
7640     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7641 }
7642
7643 /***********************************************************************
7644  *              __libm_sse2_atan   (MSVCRT.@)
7645  */
7646 void __cdecl __libm_sse2_atan(void)
7647 {
7648     double d;
7649     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7650     d = atan( d );
7651     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7652 }
7653
7654 /***********************************************************************
7655  *              __libm_sse2_atan2   (MSVCRT.@)
7656  */
7657 void __cdecl __libm_sse2_atan2(void)
7658 {
7659     double d1, d2;
7660     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7661     d1 = atan2( d1, d2 );
7662     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7663 }
7664
7665 /***********************************************************************
7666  *              __libm_sse2_atanf   (MSVCRT.@)
7667  */
7668 void __cdecl __libm_sse2_atanf(void)
7669 {
7670     float f;
7671     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7672     f = atanf( f );
7673     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7674 }
7675
7676 /***********************************************************************
7677  *              __libm_sse2_cos   (MSVCRT.@)
7678  */
7679 void __cdecl __libm_sse2_cos(void)
7680 {
7681     double d;
7682     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7683     d = cos( d );
7684     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7685 }
7686
7687 /***********************************************************************
7688  *              __libm_sse2_cosf   (MSVCRT.@)
7689  */
7690 void __cdecl __libm_sse2_cosf(void)
7691 {
7692     float f;
7693     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7694     f = cosf( f );
7695     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7696 }
7697
7698 /***********************************************************************
7699  *              __libm_sse2_exp   (MSVCRT.@)
7700  */
7701 void __cdecl __libm_sse2_exp(void)
7702 {
7703     double d;
7704     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7705     d = exp( d );
7706     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7707 }
7708
7709 /***********************************************************************
7710  *              __libm_sse2_expf   (MSVCRT.@)
7711  */
7712 void __cdecl __libm_sse2_expf(void)
7713 {
7714     float f;
7715     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7716     f = expf( f );
7717     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7718 }
7719
7720 /***********************************************************************
7721  *              __libm_sse2_log   (MSVCRT.@)
7722  */
7723 void __cdecl __libm_sse2_log(void)
7724 {
7725     double d;
7726     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7727     d = log( d );
7728     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7729 }
7730
7731 /***********************************************************************
7732  *              __libm_sse2_log10   (MSVCRT.@)
7733  */
7734 void __cdecl __libm_sse2_log10(void)
7735 {
7736     double d;
7737     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7738     d = log10( d );
7739     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7740 }
7741
7742 /***********************************************************************
7743  *              __libm_sse2_log10f   (MSVCRT.@)
7744  */
7745 void __cdecl __libm_sse2_log10f(void)
7746 {
7747     float f;
7748     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7749     f = log10f( f );
7750     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7751 }
7752
7753 /***********************************************************************
7754  *              __libm_sse2_logf   (MSVCRT.@)
7755  */
7756 void __cdecl __libm_sse2_logf(void)
7757 {
7758     float f;
7759     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7760     f = logf( f );
7761     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7762 }
7763
7764 /***********************************************************************
7765  *              __libm_sse2_pow   (MSVCRT.@)
7766  */
7767 void __cdecl __libm_sse2_pow(void)
7768 {
7769     double d1, d2;
7770     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7771     d1 = pow( d1, d2 );
7772     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7773 }
7774
7775 /***********************************************************************
7776  *              __libm_sse2_powf   (MSVCRT.@)
7777  */
7778 void __cdecl __libm_sse2_powf(void)
7779 {
7780     float f1, f2;
7781     __asm__ __volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1), "=g" (f2) );
7782     f1 = powf( f1, f2 );
7783     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f1) );
7784 }
7785
7786 /***********************************************************************
7787  *              __libm_sse2_sin   (MSVCRT.@)
7788  */
7789 void __cdecl __libm_sse2_sin(void)
7790 {
7791     double d;
7792     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7793     d = sin( d );
7794     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7795 }
7796
7797 /***********************************************************************
7798  *              __libm_sse2_sinf   (MSVCRT.@)
7799  */
7800 void __cdecl __libm_sse2_sinf(void)
7801 {
7802     float f;
7803     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7804     f = sinf( f );
7805     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7806 }
7807
7808 /***********************************************************************
7809  *              __libm_sse2_tan   (MSVCRT.@)
7810  */
7811 void __cdecl __libm_sse2_tan(void)
7812 {
7813     double d;
7814     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7815     d = tan( d );
7816     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7817 }
7818
7819 /***********************************************************************
7820  *              __libm_sse2_tanf   (MSVCRT.@)
7821  */
7822 void __cdecl __libm_sse2_tanf(void)
7823 {
7824     float f;
7825     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7826     f = tanf( f );
7827     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7828 }
7829
7830 /***********************************************************************
7831  *              __libm_sse2_sqrt_precise   (MSVCR110.@)
7832  */
7833 void __cdecl __libm_sse2_sqrt_precise(void)
7834 {
7835     unsigned int cw;
7836     double d;
7837
7838     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7839     __control87_2(0, 0, NULL, &cw);
7840     if (cw & _MCW_RC)
7841     {
7842         d = sqrt(d);
7843         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7844         return;
7845     }
7846
7847     if (!sqrt_validate(&d, FALSE))
7848     {
7849         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7850         return;
7851     }
7852     __asm__ __volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
7853 }
7854 #endif  /* __i386__ */
7855
7856 /*********************************************************************
7857  *      _fdclass (MSVCR120.@)
7858  *
7859  * Copied from musl: src/math/__fpclassifyf.c
7860  */
7861 short CDECL _fdclass(float x)
7862 {
7863     union { float f; UINT32 i; } u = { x };
7864     int e = u.i >> 23 & 0xff;
7865
7866     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7867     if (e == 0xff) return u.i << 9 ? FP_NAN : FP_INFINITE;
7868     return FP_NORMAL;
7869 }
7870
7871 /*********************************************************************
7872  *      _dclass (MSVCR120.@)
7873  *
7874  * Copied from musl: src/math/__fpclassify.c
7875  */
7876 short CDECL _dclass(double x)
7877 {
7878     union { double f; UINT64 i; } u = { x };
7879     int e = u.i >> 52 & 0x7ff;
7880
7881     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
7882     if (e == 0x7ff) return (u.i << 12) ? FP_NAN : FP_INFINITE;
7883     return FP_NORMAL;
7884 }
7885
7886 #if _MSVCR_VER>=120
7887
7888 /*********************************************************************
7889  *      cbrt (MSVCR120.@)
7890  *
7891  * Copied from musl: src/math/cbrt.c
7892  */
7893 double CDECL cbrt(double x)
7894 {
7895     static const UINT32 B1 = 715094163, B2 = 696219795;
7896     static const double P0 =  1.87595182427177009643,
7897                  P1 = -1.88497979543377169875,
7898                  P2 =  1.621429720105354466140,
7899                  P3 = -0.758397934778766047437,
7900                  P4 =  0.145996192886612446982;
7901
7902     union {double f; UINT64 i;} u = {x};
7903     double r,s,t,w;
7904     UINT32 hx = u.i >> 32 & 0x7fffffff;
7905
7906     if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
7907         return x + x;
7908
7909     if (hx < 0x00100000) { /* zero or subnormal? */
7910         u.f = x * 0x1p54;
7911         hx = u.i>>32 & 0x7fffffff;
7912         if (hx == 0)
7913             return x;
7914         hx = hx / 3 + B2;
7915     } else
7916         hx = hx / 3 + B1;
7917     u.i &= 1ULL << 63;
7918     u.i |= (UINT64)hx << 32;
7919     t = u.f;
7920
7921     r = (t * t) * (t / x);
7922     t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
7923
7924     u.f = t;
7925     u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
7926     t = u.f;
7927
7928     s = t * t;
7929     r = x / s;
7930     w = t + t;
7931     r = (r - t) / (w + r);
7932     t = t + t * r;
7933     return t;
7934 }
7935
7936 /*********************************************************************
7937  *      cbrtf (MSVCR120.@)
7938  *
7939  * Copied from musl: src/math/cbrtf.c
7940  */
7941 float CDECL cbrtf(float x)
7942 {
7943     static const unsigned B1 = 709958130, B2 = 642849266;
7944
7945     double r,T;
7946     union {float f; UINT32 i;} u = {x};
7947     UINT32 hx = u.i & 0x7fffffff;
7948
7949     if (hx >= 0x7f800000)
7950         return x + x;
7951
7952     if (hx < 0x00800000) {  /* zero or subnormal? */
7953         if (hx == 0)
7954             return x;
7955         u.f = x * 0x1p24f;
7956         hx = u.i & 0x7fffffff;
7957         hx = hx / 3 + B2;
7958     } else
7959         hx = hx / 3 + B1;
7960     u.i &= 0x80000000;
7961     u.i |= hx;
7962
7963     T = u.f;
7964     r = T * T * T;
7965     T = T * (x + x + r) / (x + r + r);
7966
7967     r = T * T * T;
7968     T = T * (x + x + r) / (x + r + r);
7969     return T;
7970 }
7971
7972 /*********************************************************************
7973  *      exp2 (MSVCR120.@)
7974  *
7975  * Copied from musl: src/math/exp2.c
7976  */
7977 double CDECL exp2(double x)
7978 {
7979     static const double C[] = {
7980         0x1.62e42fefa39efp-1,
7981         0x1.ebfbdff82c424p-3,
7982         0x1.c6b08d70cf4b5p-5,
7983         0x1.3b2abd24650ccp-7,
7984         0x1.5d7e09b4e3a84p-10
7985     };
7986
7987     UINT32 abstop;
7988     UINT64 ki, idx, top, sbits;
7989     double kd, r, r2, scale, tail, tmp;
7990
7991     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
7992     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
7993         if (abstop - 0x3c9 >= 0x80000000) {
7994             /* Avoid spurious underflow for tiny x. */
7995             /* Note: 0 is common input. */
7996             return 1.0 + x;
7997         }
7998         if (abstop >= 409) {
7999             if (*(UINT64*)&x == 0xfff0000000000000ull)
8000                 return 0.0;
8001             if (abstop >= 0x7ff)
8002                 return 1.0 + x;
8003             if (!(*(UINT64*)&x >> 63)) {
8004                 *_errno() = ERANGE;
8005                 return fp_barrier(DBL_MAX) * DBL_MAX;
8006             }
8007             else if (x <= -2147483648.0) {
8008                 fp_barrier(x + 0x1p120f);
8009                 return 0;
8010             }
8011             else if (*(UINT64*)&x >= 0xc090cc0000000000ull) {
8012                 *_errno() = ERANGE;
8013                 fp_barrier(x + 0x1p120f);
8014                 return 0;
8015             }
8016         }
8017         if (2 * *(UINT64*)&x > 2 * 0x408d000000000000ull)
8018             /* Large x is special cased below. */
8019             abstop = 0;
8020     }
8021
8022     /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
8023     /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
8024     kd = fp_barrier(x + 0x1.8p52 / (1 << 7));
8025     ki = *(UINT64*)&kd; /* k. */
8026     kd -= 0x1.8p52 / (1 << 7); /* k/N for int k. */
8027     r = x - kd;
8028     /* 2^(k/N) ~= scale * (1 + tail). */
8029     idx = 2 * (ki % (1 << 7));
8030     top = ki << (52 - 7);
8031     tail = *(double*)&exp_T[idx];
8032     /* This is only a valid scale when -1023*N < k < 1024*N. */
8033     sbits = exp_T[idx + 1] + top;
8034     /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
8035     /* Evaluation is optimized assuming superscalar pipelined execution. */
8036     r2 = r * r;
8037     /* Without fma the worst case error is 0.5/N ulp larger. */
8038     /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
8039     tmp = tail + r * C[0] + r2 * (C[1] + r * C[2]) + r2 * r2 * (C[3] + r * C[4]);
8040     if (abstop == 0)
8041     {
8042         /* Handle cases that may overflow or underflow when computing the result that
8043            is scale*(1+TMP) without intermediate rounding. The bit representation of
8044            scale is in SBITS, however it has a computed exponent that may have
8045            overflown into the sign bit so that needs to be adjusted before using it as
8046            a double. (int32_t)KI is the k used in the argument reduction and exponent
8047            adjustment of scale, positive k here means the result may overflow and
8048            negative k means the result may underflow. */
8049         double scale, y;
8050
8051         if ((ki & 0x80000000) == 0) {
8052             /* k > 0, the exponent of scale might have overflowed by 1. */
8053             sbits -= 1ull << 52;
8054             scale = *(double*)&sbits;
8055             y = 2 * (scale + scale * tmp);
8056             return y;
8057         }
8058         /* k < 0, need special care in the subnormal range. */
8059         sbits += 1022ull << 52;
8060         scale = *(double*)&sbits;
8061         y = scale + scale * tmp;
8062         if (y < 1.0) {
8063             /* Round y to the right precision before scaling it into the subnormal
8064                range to avoid double rounding that can cause 0.5+E/2 ulp error where
8065                E is the worst-case ulp error outside the subnormal range. So this
8066                is only useful if the goal is better than 1 ulp worst-case error. */
8067             double hi, lo;
8068             lo = scale - y + scale * tmp;
8069             hi = 1.0 + y;
8070             lo = 1.0 - hi + y + lo;
8071             y = hi + lo - 1.0;
8072             /* Avoid -0.0 with downward rounding. */
8073             if (y == 0.0)
8074                 y = 0.0;
8075             /* The underflow exception needs to be signaled explicitly. */
8076             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
8077         }
8078         y = 0x1p-1022 * y;
8079         return y;
8080     }
8081     scale = *(double*)&sbits;
8082     /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
8083        is no spurious underflow here even without fma. */
8084     return scale + scale * tmp;
8085 }
8086
8087 /*********************************************************************
8088  *      exp2f (MSVCR120.@)
8089  *
8090  * Copied from musl: src/math/exp2f.c
8091  */
8092 float CDECL exp2f(float x)
8093 {
8094     static const double C[] = {
8095         0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1
8096     };
8097     static const double shift = 0x1.8p+52 / (1 << 5);
8098
8099     double kd, xd, z, r, r2, y, s;
8100     UINT32 abstop;
8101     UINT64 ki, t;
8102
8103     xd = x;
8104     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
8105     if (abstop >= 0x430) {
8106         /* |x| >= 128 or x is nan.  */
8107         if (*(UINT32*)&x == 0xff800000)
8108             return 0.0f;
8109         if (abstop >= 0x7f8)
8110             return x + x;
8111         if (x > 0.0f) {
8112             *_errno() = ERANGE;
8113             return fp_barrierf(x * FLT_MAX);
8114         }
8115         if (x <= -150.0f) {
8116             fp_barrierf(x - 0x1p120);
8117             return 0;
8118         }
8119     }
8120
8121     /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */
8122     kd = xd + shift;
8123     ki = *(UINT64*)&kd;
8124     kd -= shift; /* k/(1<<5) for int k.  */
8125     r = xd - kd;
8126
8127     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
8128     t = exp2f_T[ki % (1 << 5)];
8129     t += ki << (52 - 5);
8130     s = *(double*)&t;
8131     z = C[0] * r + C[1];
8132     r2 = r * r;
8133     y = C[2] * r + 1;
8134     y = z * r2 + y;
8135     y = y * s;
8136     return y;
8137 }
8138
8139 /*********************************************************************
8140  *      expm1 (MSVCR120.@)
8141  */
8142 double CDECL expm1(double x)
8143 {
8144     return __expm1(x);
8145 }
8146
8147 /*********************************************************************
8148  *      expm1f (MSVCR120.@)
8149  */
8150 float CDECL expm1f(float x)
8151 {
8152     return __expm1f(x);
8153 }
8154
8155 /*********************************************************************
8156  *      log1p (MSVCR120.@)
8157  *
8158  * Copied from musl: src/math/log1p.c
8159  */
8160 double CDECL log1p(double x)
8161 {
8162     static const double ln2_hi = 6.93147180369123816490e-01,
8163         ln2_lo = 1.90821492927058770002e-10,
8164         Lg1 = 6.666666666666735130e-01,
8165         Lg2 = 3.999999999940941908e-01,
8166         Lg3 = 2.857142874366239149e-01,
8167         Lg4 = 2.222219843214978396e-01,
8168         Lg5 = 1.818357216161805012e-01,
8169         Lg6 = 1.531383769920937332e-01,
8170         Lg7 = 1.479819860511658591e-01;
8171
8172     union {double f; UINT64 i;} u = {x};
8173     double hfsq, f, c, s, z, R, w, t1, t2, dk;
8174     UINT32 hx, hu;
8175     int k;
8176
8177     hx = u.i >> 32;
8178     k = 1;
8179     if (hx < 0x3fda827a || hx >> 31) { /* 1+x < sqrt(2)+ */
8180         if (hx >= 0xbff00000) { /* x <= -1.0 */
8181             if (x == -1) {
8182                 *_errno() = ERANGE;
8183                 return x / 0.0; /* og1p(-1) = -inf */
8184             }
8185             *_errno() = EDOM;
8186             return (x-x) / 0.0; /* log1p(x<-1) = NaN */
8187         }
8188         if (hx << 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */
8189             fp_barrier(x + 0x1p120f);
8190             /* underflow if subnormal */
8191             if ((hx & 0x7ff00000) == 0)
8192                 fp_barrierf(x);
8193             return x;
8194         }
8195         if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8196             k = 0;
8197             c = 0;
8198             f = x;
8199         }
8200     } else if (hx >= 0x7ff00000)
8201         return x;
8202     if (k) {
8203         u.f = 1 + x;
8204         hu = u.i >> 32;
8205         hu += 0x3ff00000 - 0x3fe6a09e;
8206         k = (int)(hu >> 20) - 0x3ff;
8207         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8208         if (k < 54) {
8209             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8210             c /= u.f;
8211         } else
8212             c = 0;
8213         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8214         hu = (hu & 0x000fffff) + 0x3fe6a09e;
8215         u.i = (UINT64)hu << 32 | (u.i & 0xffffffff);
8216         f = u.f - 1;
8217     }
8218     hfsq = 0.5 * f * f;
8219     s = f / (2.0 + f);
8220     z = s * s;
8221     w = z * z;
8222     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
8223     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
8224     R = t2 + t1;
8225     dk = k;
8226     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8227 }
8228
8229 /*********************************************************************
8230  *      log1pf (MSVCR120.@)
8231  *
8232  * Copied from musl: src/math/log1pf.c
8233  */
8234 float CDECL log1pf(float x)
8235 {
8236     static const float ln2_hi = 6.9313812256e-01,
8237         ln2_lo = 9.0580006145e-06,
8238         Lg1 = 0xaaaaaa.0p-24,
8239         Lg2 = 0xccce13.0p-25,
8240         Lg3 = 0x91e9ee.0p-25,
8241         Lg4 = 0xf89e26.0p-26;
8242
8243     union {float f; UINT32 i;} u = {x};
8244     float hfsq, f, c, s, z, R, w, t1, t2, dk;
8245     UINT32 ix, iu;
8246     int k;
8247
8248     ix = u.i;
8249     k = 1;
8250     if (ix < 0x3ed413d0 || ix >> 31) { /* 1+x < sqrt(2)+ */
8251         if (ix >= 0xbf800000) { /* x <= -1.0 */
8252             if (x == -1) {
8253                 *_errno() = ERANGE;
8254                 return x / 0.0f; /* log1p(-1)=+inf */
8255             }
8256             *_errno() = EDOM;
8257             return (x - x) / 0.0f; /* log1p(x<-1)=NaN */
8258         }
8259         if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
8260             /* underflow if subnormal */
8261             if ((ix & 0x7f800000) == 0)
8262                 fp_barrierf(x * x);
8263             return x;
8264         }
8265         if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8266             k = 0;
8267             c = 0;
8268             f = x;
8269         }
8270     } else if (ix >= 0x7f800000)
8271         return x;
8272     if (k) {
8273         u.f = 1 + x;
8274         iu = u.i;
8275         iu += 0x3f800000 - 0x3f3504f3;
8276         k = (int)(iu >> 23) - 0x7f;
8277         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8278         if (k < 25) {
8279             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8280             c /= u.f;
8281         } else
8282             c = 0;
8283         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8284         iu = (iu & 0x007fffff) + 0x3f3504f3;
8285         u.i = iu;
8286         f = u.f - 1;
8287     }
8288     s = f / (2.0f + f);
8289     z = s * s;
8290     w = z * z;
8291     t1= w * (Lg2 + w * Lg4);
8292     t2= z * (Lg1 + w * Lg3);
8293     R = t2 + t1;
8294     hfsq = 0.5f * f * f;
8295     dk = k;
8296     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8297 }
8298
8299 /*********************************************************************
8300  *      log2 (MSVCR120.@)
8301  *
8302  * Copied from musl: src/math/log2.c
8303  */
8304 double CDECL log2(double x)
8305 {
8306     static const double invln2hi = 0x1.7154765200000p+0,
8307         invln2lo = 0x1.705fc2eefa200p-33;
8308     static const double A[] = {
8309         -0x1.71547652b8339p-1,
8310         0x1.ec709dc3a04bep-2,
8311         -0x1.7154764702ffbp-2,
8312         0x1.2776c50034c48p-2,
8313         -0x1.ec7b328ea92bcp-3,
8314         0x1.a6225e117f92ep-3
8315     };
8316     static const double B[] = {
8317         -0x1.71547652b82fep-1,
8318         0x1.ec709dc3a03f7p-2,
8319         -0x1.71547652b7c3fp-2,
8320         0x1.2776c50f05be4p-2,
8321         -0x1.ec709dd768fe5p-3,
8322         0x1.a61761ec4e736p-3,
8323         -0x1.7153fbc64a79bp-3,
8324         0x1.484d154f01b4ap-3,
8325         -0x1.289e4a72c383cp-3,
8326         0x1.0b32f285aee66p-3
8327     };
8328     static const struct {
8329         double invc, logc;
8330     } T[] = {
8331         {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
8332         {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
8333         {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
8334         {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
8335         {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
8336         {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
8337         {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
8338         {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
8339         {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
8340         {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
8341         {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
8342         {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
8343         {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
8344         {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
8345         {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
8346         {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
8347         {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
8348         {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
8349         {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
8350         {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
8351         {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
8352         {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
8353         {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
8354         {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
8355         {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
8356         {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
8357         {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
8358         {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
8359         {0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
8360         {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
8361         {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
8362         {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
8363         {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
8364         {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
8365         {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
8366         {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
8367         {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
8368         {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
8369         {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
8370         {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
8371         {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
8372         {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
8373         {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
8374         {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
8375         {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
8376         {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
8377         {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
8378         {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
8379         {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
8380         {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
8381         {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
8382         {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
8383         {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
8384         {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
8385         {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
8386         {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
8387         {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
8388         {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
8389         {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
8390         {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
8391         {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
8392         {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
8393         {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
8394         {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}
8395     };
8396     static const struct {
8397         double chi, clo;
8398     } T2[] = {
8399         {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
8400         {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
8401         {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
8402         {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
8403         {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
8404         {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
8405         {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
8406         {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
8407         {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
8408         {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
8409         {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
8410         {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
8411         {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
8412         {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
8413         {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
8414         {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
8415         {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
8416         {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
8417         {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
8418         {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
8419         {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
8420         {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
8421         {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
8422         {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
8423         {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
8424         {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
8425         {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
8426         {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
8427         {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
8428         {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
8429         {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
8430         {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
8431         {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
8432         {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
8433         {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
8434         {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
8435         {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
8436         {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
8437         {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
8438         {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
8439         {0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
8440         {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
8441         {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
8442         {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
8443         {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
8444         {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
8445         {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
8446         {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
8447         {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
8448         {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
8449         {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
8450         {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
8451         {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
8452         {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
8453         {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
8454         {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
8455         {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
8456         {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
8457         {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
8458         {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
8459         {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
8460         {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
8461         {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
8462         {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}
8463     };
8464
8465     double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p, rhi, rlo;
8466     UINT64 ix, iz, tmp;
8467     UINT32 top;
8468     int k, i;
8469
8470     ix = *(UINT64*)&x;
8471     top = ix >> 48;
8472     if (ix - 0x3feea4af00000000ULL < 0x210aa00000000ULL) {
8473         /* Handle close to 1.0 inputs separately.  */
8474         /* Fix sign of zero with downward rounding when x==1.  */
8475         if (ix == 0x3ff0000000000000ULL)
8476             return 0;
8477         r = x - 1.0;
8478         *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8479         rlo = r - rhi;
8480         hi = rhi * invln2hi;
8481         lo = rlo * invln2hi + r * invln2lo;
8482         r2 = r * r; /* rounding error: 0x1p-62.  */
8483         r4 = r2 * r2;
8484         /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
8485         p = r2 * (B[0] + r * B[1]);
8486         y = hi + p;
8487         lo += hi - y + p;
8488         lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
8489                 r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
8490         y += lo;
8491         return y;
8492     }
8493     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
8494         /* x < 0x1p-1022 or inf or nan.  */
8495         if (ix * 2 == 0) {
8496             *_errno() = ERANGE;
8497             return -1.0 / x;
8498         }
8499         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf.  */
8500             return x;
8501         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
8502             return x;
8503         if (top & 0x8000) {
8504             *_errno() = EDOM;
8505             return (x - x) / (x - x);
8506         }
8507         /* x is subnormal, normalize it.  */
8508         x *= 0x1p52;
8509         ix = *(UINT64*)&x;
8510         ix -= 52ULL << 52;
8511     }
8512
8513     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
8514        The range is split into N subintervals.
8515        The ith subinterval contains z and c is near its center.  */
8516     tmp = ix - 0x3fe6000000000000ULL;
8517     i = (tmp >> (52 - 6)) % (1 << 6);
8518     k = (INT64)tmp >> 52; /* arithmetic shift */
8519     iz = ix - (tmp & 0xfffULL << 52);
8520     invc = T[i].invc;
8521     logc = T[i].logc;
8522     z = *(double*)&iz;
8523     kd = k;
8524
8525     /* log2(x) = log2(z/c) + log2(c) + k.  */
8526     /* r ~= z/c - 1, |r| < 1/(2*N).  */
8527     /* rounding error: 0x1p-55/N + 0x1p-65.  */
8528     r = (z - T2[i].chi - T2[i].clo) * invc;
8529     *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8530     rlo = r - rhi;
8531     t1 = rhi * invln2hi;
8532     t2 = rlo * invln2hi + r * invln2lo;
8533
8534     /* hi + lo = r/ln2 + log2(c) + k.  */
8535     t3 = kd + logc;
8536     hi = t3 + t1;
8537     lo = t3 - hi + t1 + t2;
8538
8539     /* log2(r+1) = r/ln2 + r^2*poly(r).  */
8540     /* Evaluation is optimized assuming superscalar pipelined execution.  */
8541     r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
8542     r4 = r2 * r2;
8543     /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
8544        ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
8545     p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
8546     y = lo + r2 * p + hi;
8547     return y;
8548 }
8549
8550 /*********************************************************************
8551  *      log2f (MSVCR120.@)
8552  *
8553  * Copied from musl: src/math/log2f.c
8554  */
8555 float CDECL log2f(float x)
8556 {
8557     static const double A[] = {
8558         -0x1.712b6f70a7e4dp-2,
8559         0x1.ecabf496832ep-2,
8560         -0x1.715479ffae3dep-1,
8561         0x1.715475f35c8b8p0
8562     };
8563     static const struct {
8564         double invc, logc;
8565     } T[] = {
8566         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
8567         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
8568         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
8569         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
8570         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
8571         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
8572         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
8573         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
8574         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
8575         { 0x1p+0, 0x0p+0 },
8576         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
8577         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
8578         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
8579         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
8580         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
8581         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }
8582     };
8583
8584     double z, r, r2, p, y, y0, invc, logc;
8585     UINT32 ix, iz, top, tmp;
8586     int k, i;
8587
8588     ix = *(UINT32*)&x;
8589     /* Fix sign of zero with downward rounding when x==1. */
8590     if (ix == 0x3f800000)
8591         return 0;
8592     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
8593         /* x < 0x1p-126 or inf or nan. */
8594         if (ix * 2 == 0) {
8595             *_errno() = ERANGE;
8596             return -1.0f / x;
8597         }
8598         if (ix == 0x7f800000) /* log2(inf) == inf. */
8599             return x;
8600         if (ix * 2 > 0xff000000)
8601             return x;
8602         if (ix & 0x80000000) {
8603             *_errno() = EDOM;
8604             return (x - x) / (x - x);
8605         }
8606         /* x is subnormal, normalize it. */
8607         x *= 0x1p23f;
8608         ix = *(UINT32*)&x;
8609         ix -= 23 << 23;
8610     }
8611
8612     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
8613        The range is split into N subintervals.
8614        The ith subinterval contains z and c is near its center. */
8615     tmp = ix - 0x3f330000;
8616     i = (tmp >> (23 - 4)) % (1 << 4);
8617     top = tmp & 0xff800000;
8618     iz = ix - top;
8619     k = (INT32)tmp >> 23; /* arithmetic shift */
8620     invc = T[i].invc;
8621     logc = T[i].logc;
8622     z = *(float*)&iz;
8623
8624     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
8625     r = z * invc - 1;
8626     y0 = logc + (double)k;
8627
8628     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
8629     r2 = r * r;
8630     y = A[1] * r + A[2];
8631     y = A[0] * r2 + y;
8632     p = A[3] * r + y0;
8633     y = y * r2 + p;
8634     return y;
8635 }
8636
8637 /*********************************************************************
8638  *      rint (MSVCR120.@)
8639  */
8640 double CDECL rint(double x)
8641 {
8642     return __rint(x);
8643 }
8644
8645 /*********************************************************************
8646  *      rintf (MSVCR120.@)
8647  *
8648  * Copied from musl: src/math/rintf.c
8649  */
8650 float CDECL rintf(float x)
8651 {
8652     static const float toint = 1 / FLT_EPSILON;
8653
8654     unsigned int ix = *(unsigned int*)&x;
8655     int e = ix >> 23 & 0xff;
8656     int s = ix >> 31;
8657     float y;
8658
8659     if (e >= 0x7f + 23)
8660         return x;
8661     if (s)
8662         y = fp_barrierf(x - toint) + toint;
8663     else
8664         y = fp_barrierf(x + toint) - toint;
8665     if (y == 0)
8666         return s ? -0.0f : 0.0f;
8667     return y;
8668 }
8669
8670 /*********************************************************************
8671  *      lrint (MSVCR120.@)
8672  */
8673 __msvcrt_long CDECL lrint(double x)
8674 {
8675     double d;
8676
8677     d = rint(x);
8678     if ((d < 0 && d != (double)(__msvcrt_long)d)
8679             || (d >= 0 && d != (double)(__msvcrt_ulong)d)) {
8680         *_errno() = EDOM;
8681         return 0;
8682     }
8683     return d;
8684 }
8685
8686 /*********************************************************************
8687  *      lrintf (MSVCR120.@)
8688  */
8689 __msvcrt_long CDECL lrintf(float x)
8690 {
8691     float f;
8692
8693     f = rintf(x);
8694     if ((f < 0 && f != (float)(__msvcrt_long)f)
8695             || (f >= 0 && f != (float)(__msvcrt_ulong)f)) {
8696         *_errno() = EDOM;
8697         return 0;
8698     }
8699     return f;
8700 }
8701
8702 /*********************************************************************
8703  *      llrint (MSVCR120.@)
8704  */
8705 __int64 CDECL llrint(double x)
8706 {
8707     double d;
8708
8709     d = rint(x);
8710     if ((d < 0 && d != (double)(__int64)d)
8711             || (d >= 0 && d != (double)(unsigned __int64)d)) {
8712         *_errno() = EDOM;
8713         return 0;
8714     }
8715     return d;
8716 }
8717
8718 /*********************************************************************
8719  *      llrintf (MSVCR120.@)
8720  */
8721 __int64 CDECL llrintf(float x)
8722 {
8723     float f;
8724
8725     f = rintf(x);
8726     if ((f < 0 && f != (float)(__int64)f)
8727             || (f >= 0 && f != (float)(unsigned __int64)f)) {
8728         *_errno() = EDOM;
8729         return 0;
8730     }
8731     return f;
8732 }
8733
8734 /*********************************************************************
8735  *      round (MSVCR120.@)
8736  */
8737 double CDECL round(double x)
8738 {
8739     return __round(x);
8740 }
8741
8742 /*********************************************************************
8743  *      roundf (MSVCR120.@)
8744  *
8745  * Copied from musl: src/math/roundf.c
8746  */
8747 float CDECL roundf(float x)
8748 {
8749     static const float toint = 1 / FLT_EPSILON;
8750
8751     unsigned int ix = *(unsigned int*)&x;
8752     int e = ix >> 23 & 0xff;
8753     float y;
8754
8755     if (e >= 0x7f + 23)
8756         return x;
8757     if (ix >> 31)
8758         x = -x;
8759     if (e < 0x7f - 1)
8760         return 0 * *(float*)&ix;
8761     y = fp_barrierf(x + toint) - toint - x;
8762     if (y > 0.5f)
8763         y = y + x - 1;
8764     else if (y <= -0.5f)
8765         y = y + x + 1;
8766     else
8767         y = y + x;
8768     if (ix >> 31)
8769         y = -y;
8770     return y;
8771 }
8772
8773 /*********************************************************************
8774  *      lround (MSVCR120.@)
8775  *
8776  * Copied from musl: src/math/lround.c
8777  */
8778 __msvcrt_long CDECL lround(double x)
8779 {
8780     double d = round(x);
8781     if (d != (double)(__msvcrt_long)d) {
8782         *_errno() = EDOM;
8783         return 0;
8784     }
8785     return d;
8786 }
8787
8788 /*********************************************************************
8789  *      lroundf (MSVCR120.@)
8790  *
8791  * Copied from musl: src/math/lroundf.c
8792  */
8793 __msvcrt_long CDECL lroundf(float x)
8794 {
8795     float f = roundf(x);
8796     if (f != (float)(__msvcrt_long)f) {
8797         *_errno() = EDOM;
8798         return 0;
8799     }
8800     return f;
8801 }
8802
8803 /*********************************************************************
8804  *      llround (MSVCR120.@)
8805  *
8806  * Copied from musl: src/math/llround.c
8807  */
8808 __int64 CDECL llround(double x)
8809 {
8810     double d = round(x);
8811     if (d != (double)(__int64)d) {
8812         *_errno() = EDOM;
8813         return 0;
8814     }
8815     return d;
8816 }
8817
8818 /*********************************************************************
8819  *      llroundf (MSVCR120.@)
8820  *
8821  * Copied from musl: src/math/llroundf.c
8822  */
8823 __int64 CDECL llroundf(float x)
8824 {
8825     float f = roundf(x);
8826     if (f != (float)(__int64)f) {
8827         *_errno() = EDOM;
8828         return 0;
8829     }
8830     return f;
8831 }
8832
8833 /*********************************************************************
8834  *      trunc (MSVCR120.@)
8835  *
8836  * Copied from musl: src/math/trunc.c
8837  */
8838 double CDECL trunc(double x)
8839 {
8840     union {double f; UINT64 i;} u = {x};
8841     int e = (u.i >> 52 & 0x7ff) - 0x3ff + 12;
8842     UINT64 m;
8843
8844     if (e >= 52 + 12)
8845         return x;
8846     if (e < 12)
8847         e = 1;
8848     m = -1ULL >> e;
8849     if ((u.i & m) == 0)
8850         return x;
8851     u.i &= ~m;
8852     return u.f;
8853 }
8854
8855 /*********************************************************************
8856  *      truncf (MSVCR120.@)
8857  *
8858  * Copied from musl: src/math/truncf.c
8859  */
8860 float CDECL truncf(float x)
8861 {
8862     union {float f; UINT32 i;} u = {x};
8863     int e = (u.i >> 23 & 0xff) - 0x7f + 9;
8864     UINT32 m;
8865
8866     if (e >= 23 + 9)
8867         return x;
8868     if (e < 9)
8869         e = 1;
8870     m = -1U >> e;
8871     if ((u.i & m) == 0)
8872         return x;
8873     u.i &= ~m;
8874     return u.f;
8875 }
8876
8877 /*********************************************************************
8878  *      _dtest (MSVCR120.@)
8879  */
8880 short CDECL _dtest(double *x)
8881 {
8882     return _dclass(*x);
8883 }
8884
8885 /*********************************************************************
8886  *      _fdtest (MSVCR120.@)
8887  */
8888 short CDECL _fdtest(float *x)
8889 {
8890     return _fdclass(*x);
8891 }
8892
8893 static double erfc1(double x)
8894 {
8895     static const double erx  = 8.45062911510467529297e-01,
8896                  pa0  = -2.36211856075265944077e-03,
8897                  pa1  =  4.14856118683748331666e-01,
8898                  pa2  = -3.72207876035701323847e-01,
8899                  pa3  =  3.18346619901161753674e-01,
8900                  pa4  = -1.10894694282396677476e-01,
8901                  pa5  =  3.54783043256182359371e-02,
8902                  pa6  = -2.16637559486879084300e-03,
8903                  qa1  =  1.06420880400844228286e-01,
8904                  qa2  =  5.40397917702171048937e-01,
8905                  qa3  =  7.18286544141962662868e-02,
8906                  qa4  =  1.26171219808761642112e-01,
8907                  qa5  =  1.36370839120290507362e-02,
8908                  qa6  =  1.19844998467991074170e-02;
8909
8910     double s, P, Q;
8911
8912     s = fabs(x) - 1;
8913     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8914     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8915     return 1 - erx - P / Q;
8916 }
8917
8918 static double erfc2(UINT32 ix, double x)
8919 {
8920     static const double ra0  = -9.86494403484714822705e-03,
8921                  ra1  = -6.93858572707181764372e-01,
8922                  ra2  = -1.05586262253232909814e+01,
8923                  ra3  = -6.23753324503260060396e+01,
8924                  ra4  = -1.62396669462573470355e+02,
8925                  ra5  = -1.84605092906711035994e+02,
8926                  ra6  = -8.12874355063065934246e+01,
8927                  ra7  = -9.81432934416914548592e+00,
8928                  sa1  =  1.96512716674392571292e+01,
8929                  sa2  =  1.37657754143519042600e+02,
8930                  sa3  =  4.34565877475229228821e+02,
8931                  sa4  =  6.45387271733267880336e+02,
8932                  sa5  =  4.29008140027567833386e+02,
8933                  sa6  =  1.08635005541779435134e+02,
8934                  sa7  =  6.57024977031928170135e+00,
8935                  sa8  = -6.04244152148580987438e-02,
8936                  rb0  = -9.86494292470009928597e-03,
8937                  rb1  = -7.99283237680523006574e-01,
8938                  rb2  = -1.77579549177547519889e+01,
8939                  rb3  = -1.60636384855821916062e+02,
8940                  rb4  = -6.37566443368389627722e+02,
8941                  rb5  = -1.02509513161107724954e+03,
8942                  rb6  = -4.83519191608651397019e+02,
8943                  sb1  =  3.03380607434824582924e+01,
8944                  sb2  =  3.25792512996573918826e+02,
8945                  sb3  =  1.53672958608443695994e+03,
8946                  sb4  =  3.19985821950859553908e+03,
8947                  sb5  =  2.55305040643316442583e+03,
8948                  sb6  =  4.74528541206955367215e+02,
8949                  sb7  = -2.24409524465858183362e+01;
8950
8951     double s, R, S, z;
8952     UINT64 iz;
8953
8954     if (ix < 0x3ff40000) /* |x| < 1.25 */
8955         return erfc1(x);
8956
8957     x = fabs(x);
8958     s = 1 / (x * x);
8959     if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
8960         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
8961                             (ra5 + s * (ra6 + s * ra7))))));
8962         S = 1.0 + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
8963                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
8964     } else { /* |x| > 1/.35 */
8965         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s *
8966                             (rb5 + s * rb6)))));
8967         S = 1.0 + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
8968                             (sb5 + s * (sb6 + s * sb7))))));
8969     }
8970     z = x;
8971     iz = *(ULONGLONG*)&z;
8972     iz &= 0xffffffff00000000ULL;
8973     z = *(double*)&iz;
8974     return exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S) / x;
8975 }
8976
8977 /*********************************************************************
8978  *      erf (MSVCR120.@)
8979  */
8980 double CDECL erf(double x)
8981 {
8982     static const double efx8 =  1.02703333676410069053e+00,
8983                  pp0  =  1.28379167095512558561e-01,
8984                  pp1  = -3.25042107247001499370e-01,
8985                  pp2  = -2.84817495755985104766e-02,
8986                  pp3  = -5.77027029648944159157e-03,
8987                  pp4  = -2.37630166566501626084e-05,
8988                  qq1  =  3.97917223959155352819e-01,
8989                  qq2  =  6.50222499887672944485e-02,
8990                  qq3  =  5.08130628187576562776e-03,
8991                  qq4  =  1.32494738004321644526e-04,
8992                  qq5  = -3.96022827877536812320e-06;
8993
8994     double r, s, z, y;
8995     UINT32 ix;
8996     int sign;
8997
8998     ix = *(UINT64*)&x >> 32;
8999     sign = ix >> 31;
9000     ix &= 0x7fffffff;
9001     if (ix >= 0x7ff00000) {
9002         /* erf(nan)=nan, erf(+-inf)=+-1 */
9003         return 1 - 2 * sign + 1 / x;
9004     }
9005     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
9006         if (ix < 0x3e300000) { /* |x| < 2**-28 */
9007             /* avoid underflow */
9008             return 0.125 * (8 * x + efx8 * x);
9009         }
9010         z = x * x;
9011         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9012         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9013         y = r / s;
9014         return x + x * y;
9015     }
9016     if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
9017         y = 1 - erfc2(ix, x);
9018     else
9019         y = 1 - DBL_MIN;
9020     return sign ? -y : y;
9021 }
9022
9023 static float erfc1f(float x)
9024 {
9025     static const float erx  =  8.4506291151e-01,
9026                  pa0  = -2.3621185683e-03,
9027                  pa1  =  4.1485610604e-01,
9028                  pa2  = -3.7220788002e-01,
9029                  pa3  =  3.1834661961e-01,
9030                  pa4  = -1.1089469492e-01,
9031                  pa5  =  3.5478305072e-02,
9032                  pa6  = -2.1663755178e-03,
9033                  qa1  =  1.0642088205e-01,
9034                  qa2  =  5.4039794207e-01,
9035                  qa3  =  7.1828655899e-02,
9036                  qa4  =  1.2617121637e-01,
9037                  qa5  =  1.3637083583e-02,
9038                  qa6  =  1.1984500103e-02;
9039
9040     float s, P, Q;
9041
9042     s = fabsf(x) - 1;
9043     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
9044     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
9045     return 1 - erx - P / Q;
9046 }
9047
9048 static float erfc2f(UINT32 ix, float x)
9049 {
9050     static const float ra0  = -9.8649440333e-03,
9051                  ra1  = -6.9385856390e-01,
9052                  ra2  = -1.0558626175e+01,
9053                  ra3  = -6.2375331879e+01,
9054                  ra4  = -1.6239666748e+02,
9055                  ra5  = -1.8460508728e+02,
9056                  ra6  = -8.1287437439e+01,
9057                  ra7  = -9.8143291473e+00,
9058                  sa1  =  1.9651271820e+01,
9059                  sa2  =  1.3765776062e+02,
9060                  sa3  =  4.3456588745e+02,
9061                  sa4  =  6.4538726807e+02,
9062                  sa5  =  4.2900814819e+02,
9063                  sa6  =  1.0863500214e+02,
9064                  sa7  =  6.5702495575e+00,
9065                  sa8  = -6.0424413532e-02,
9066                  rb0  = -9.8649431020e-03,
9067                  rb1  = -7.9928326607e-01,
9068                  rb2  = -1.7757955551e+01,
9069                  rb3  = -1.6063638306e+02,
9070                  rb4  = -6.3756646729e+02,
9071                  rb5  = -1.0250950928e+03,
9072                  rb6  = -4.8351919556e+02,
9073                  sb1  =  3.0338060379e+01,
9074                  sb2  =  3.2579251099e+02,
9075                  sb3  =  1.5367296143e+03,
9076                  sb4  =  3.1998581543e+03,
9077                  sb5  =  2.5530502930e+03,
9078                  sb6  =  4.7452853394e+02,
9079                  sb7  = -2.2440952301e+01;
9080
9081     float s, R, S, z;
9082
9083     if (ix < 0x3fa00000) /* |x| < 1.25 */
9084         return erfc1f(x);
9085
9086     x = fabsf(x);
9087     s = 1 / (x * x);
9088     if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
9089         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
9090                             (ra5 + s * (ra6 + s * ra7))))));
9091         S = 1.0f + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
9092                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
9093     } else { /* |x| >= 1/0.35 */
9094         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (rb5 + s * rb6)))));
9095         S = 1.0f + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
9096                             (sb5 + s * (sb6 + s * sb7))))));
9097     }
9098
9099     ix = *(UINT32*)&x & 0xffffe000;
9100     z = *(float*)&ix;
9101     return expf(-z * z - 0.5625f) * expf((z - x) * (z + x) + R / S) / x;
9102 }
9103
9104 /*********************************************************************
9105  *      erff (MSVCR120.@)
9106  *
9107  * Copied from musl: src/math/erff.c
9108  */
9109 float CDECL erff(float x)
9110 {
9111     static const float efx8 =  1.0270333290e+00,
9112                  pp0  =  1.2837916613e-01,
9113                  pp1  = -3.2504209876e-01,
9114                  pp2  = -2.8481749818e-02,
9115                  pp3  = -5.7702702470e-03,
9116                  pp4  = -2.3763017452e-05,
9117                  qq1  =  3.9791721106e-01,
9118                  qq2  =  6.5022252500e-02,
9119                  qq3  =  5.0813062117e-03,
9120                  qq4  =  1.3249473704e-04,
9121                  qq5  = -3.9602282413e-06;
9122
9123     float r, s, z, y;
9124     UINT32 ix;
9125     int sign;
9126
9127     ix = *(UINT32*)&x;
9128     sign = ix >> 31;
9129     ix &= 0x7fffffff;
9130     if (ix >= 0x7f800000) {
9131         /* erf(nan)=nan, erf(+-inf)=+-1 */
9132         return 1 - 2 * sign + 1 / x;
9133     }
9134     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9135         if (ix < 0x31800000) { /* |x| < 2**-28 */
9136             /*avoid underflow */
9137             return 0.125f * (8 * x + efx8 * x);
9138         }
9139         z = x * x;
9140         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9141         s = 1 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9142         y = r / s;
9143         return x + x * y;
9144     }
9145     if (ix < 0x40c00000) /* |x| < 6 */
9146         y = 1 - erfc2f(ix, x);
9147     else
9148         y = 1 - FLT_MIN;
9149     return sign ? -y : y;
9150 }
9151
9152 /*********************************************************************
9153  *      erfc (MSVCR120.@)
9154  *
9155  * Copied from musl: src/math/erf.c
9156  */
9157 double CDECL erfc(double x)
9158 {
9159     static const double pp0  =  1.28379167095512558561e-01,
9160                  pp1  = -3.25042107247001499370e-01,
9161                  pp2  = -2.84817495755985104766e-02,
9162                  pp3  = -5.77027029648944159157e-03,
9163                  pp4  = -2.37630166566501626084e-05,
9164                  qq1  =  3.97917223959155352819e-01,
9165                  qq2  =  6.50222499887672944485e-02,
9166                  qq3  =  5.08130628187576562776e-03,
9167                  qq4  =  1.32494738004321644526e-04,
9168                  qq5  = -3.96022827877536812320e-06;
9169
9170     double r, s, z, y;
9171     UINT32 ix;
9172     int sign;
9173
9174     ix = *(ULONGLONG*)&x >> 32;
9175     sign = ix >> 31;
9176     ix &= 0x7fffffff;
9177     if (ix >= 0x7ff00000) {
9178         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9179         return 2 * sign + 1 / x;
9180     }
9181     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
9182         if (ix < 0x3c700000) /* |x| < 2**-56 */
9183             return 1.0 - x;
9184         z = x * x;
9185         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9186         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9187         y = r / s;
9188         if (sign || ix < 0x3fd00000) { /* x < 1/4 */
9189             return 1.0 - (x + x * y);
9190         }
9191         return 0.5 - (x - 0.5 + x * y);
9192     }
9193     if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
9194         return sign ? 2 - erfc2(ix, x) : erfc2(ix, x);
9195     }
9196     if (sign)
9197         return 2 - DBL_MIN;
9198     *_errno() = ERANGE;
9199     return fp_barrier(DBL_MIN) * DBL_MIN;
9200 }
9201
9202 /*********************************************************************
9203  *      erfcf (MSVCR120.@)
9204  *
9205  * Copied from musl: src/math/erff.c
9206  */
9207 float CDECL erfcf(float x)
9208 {
9209     static const float pp0  =  1.2837916613e-01,
9210                  pp1  = -3.2504209876e-01,
9211                  pp2  = -2.8481749818e-02,
9212                  pp3  = -5.7702702470e-03,
9213                  pp4  = -2.3763017452e-05,
9214                  qq1  =  3.9791721106e-01,
9215                  qq2  =  6.5022252500e-02,
9216                  qq3  =  5.0813062117e-03,
9217                  qq4  =  1.3249473704e-04,
9218                  qq5  = -3.9602282413e-06;
9219
9220     float r, s, z, y;
9221     UINT32 ix;
9222     int sign;
9223
9224     ix = *(UINT32*)&x;
9225     sign = ix >> 31;
9226     ix &= 0x7fffffff;
9227     if (ix >= 0x7f800000) {
9228         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9229         return 2 * sign + 1 / x;
9230     }
9231
9232     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9233         if (ix < 0x23800000) /* |x| < 2**-56 */
9234             return 1.0f - x;
9235         z = x * x;
9236         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9237         s = 1.0f + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9238         y = r / s;
9239         if (sign || ix < 0x3e800000) /* x < 1/4 */
9240             return 1.0f - (x + x * y);
9241         return 0.5f - (x - 0.5f + x * y);
9242     }
9243     if (ix < 0x41e00000) { /* |x| < 28 */
9244         return sign ? 2 - erfc2f(ix, x) : erfc2f(ix, x);
9245     }
9246     if (sign)
9247         return 2 - FLT_MIN;
9248     *_errno() = ERANGE;
9249     return FLT_MIN * FLT_MIN;
9250 }
9251
9252 /*********************************************************************
9253  *      fmaxf (MSVCR120.@)
9254  */
9255 float CDECL fmaxf(float x, float y)
9256 {
9257     if(isnan(x))
9258         return y;
9259     if(isnan(y))
9260         return x;
9261     if(x==0 && y==0)
9262         return signbit(x) ? y : x;
9263     return x<y ? y : x;
9264 }
9265
9266 /*********************************************************************
9267  *      fmax (MSVCR120.@)
9268  */
9269 double CDECL fmax(double x, double y)
9270 {
9271     if(isnan(x))
9272         return y;
9273     if(isnan(y))
9274         return x;
9275     if(x==0 && y==0)
9276         return signbit(x) ? y : x;
9277     return x<y ? y : x;
9278 }
9279
9280 /*********************************************************************
9281  *      fdimf (MSVCR120.@)
9282  */
9283 float CDECL fdimf(float x, float y)
9284 {
9285     if(isnan(x))
9286         return x;
9287     if(isnan(y))
9288         return y;
9289     return x>y ? x-y : 0;
9290 }
9291
9292 /*********************************************************************
9293  *      fdim (MSVCR120.@)
9294  */
9295 double CDECL fdim(double x, double y)
9296 {
9297     if(isnan(x))
9298         return x;
9299     if(isnan(y))
9300         return y;
9301     return x>y ? x-y : 0;
9302 }
9303
9304 /*********************************************************************
9305  *      _fdsign (MSVCR120.@)
9306  */
9307 int CDECL _fdsign(float x)
9308 {
9309     union { float f; UINT32 i; } u = { x };
9310     return (u.i >> 16) & 0x8000;
9311 }
9312
9313 /*********************************************************************
9314  *      _dsign (MSVCR120.@)
9315  */
9316 int CDECL _dsign(double x)
9317 {
9318     union { double f; UINT64 i; } u = { x };
9319     return (u.i >> 48) & 0x8000;
9320 }
9321
9322
9323 /*********************************************************************
9324  *      _dpcomp (MSVCR120.@)
9325  */
9326 int CDECL _dpcomp(double x, double y)
9327 {
9328     if(isnan(x) || isnan(y))
9329         return 0;
9330
9331     if(x == y) return 2;
9332     return x < y ? 1 : 4;
9333 }
9334
9335 /*********************************************************************
9336  *      _fdpcomp (MSVCR120.@)
9337  */
9338 int CDECL _fdpcomp(float x, float y)
9339 {
9340     return _dpcomp(x, y);
9341 }
9342
9343 /*********************************************************************
9344  *      fminf (MSVCR120.@)
9345  */
9346 float CDECL fminf(float x, float y)
9347 {
9348     if(isnan(x))
9349         return y;
9350     if(isnan(y))
9351         return x;
9352     if(x==0 && y==0)
9353         return signbit(x) ? x : y;
9354     return x<y ? x : y;
9355 }
9356
9357 /*********************************************************************
9358  *      fmin (MSVCR120.@)
9359  */
9360 double CDECL fmin(double x, double y)
9361 {
9362     if(isnan(x))
9363         return y;
9364     if(isnan(y))
9365         return x;
9366     if(x==0 && y==0)
9367         return signbit(x) ? x : y;
9368     return x<y ? x : y;
9369 }
9370
9371 /*********************************************************************
9372  *      asinh (MSVCR120.@)
9373  *
9374  * Copied from musl: src/math/asinh.c
9375  */
9376 double CDECL asinh(double x)
9377 {
9378     UINT64 ux = *(UINT64*)&x;
9379     int e = ux >> 52 & 0x7ff;
9380     int s = ux >> 63;
9381
9382     /* |x| */
9383     ux &= (UINT64)-1 / 2;
9384     x = *(double*)&ux;
9385
9386     if (e >= 0x3ff + 26) /* |x| >= 0x1p26 or inf or nan */
9387         x = log(x) + 0.693147180559945309417232121458176568;
9388     else if (e >= 0x3ff + 1) /* |x| >= 2 */
9389         x = log(2 * x + 1 / (sqrt(x * x + 1) + x));
9390     else if (e >= 0x3ff - 26) /* |x| >= 0x1p-26 */
9391         x = log1p(x + x * x / (sqrt(x * x + 1) + 1));
9392     else /* |x| < 0x1p-26, raise inexact if x != 0 */
9393         fp_barrier(x + 0x1p120f);
9394     return s ? -x : x;
9395 }
9396
9397 /*********************************************************************
9398  *      asinhf (MSVCR120.@)
9399  *
9400  * Copied from musl: src/math/asinhf.c
9401  */
9402 float CDECL asinhf(float x)
9403 {
9404     UINT32 ux = *(UINT32*)&x;
9405     UINT32 i = ux & 0x7fffffff;
9406     int s = ux >> 31;
9407
9408     /* |x| */
9409     x = *(float*)&i;
9410
9411     if (i >= 0x3f800000 + (12 << 23))/* |x| >= 0x1p12 or inf or nan */
9412         x = logf(x) + 0.693147180559945309417232121458176568f;
9413     else if (i >= 0x3f800000 + (1 << 23)) /* |x| >= 2 */
9414         x = logf(2 * x + 1 / (sqrtf(x * x + 1) + x));
9415     else if (i >= 0x3f800000 - (12 << 23)) /* |x| >= 0x1p-12 */
9416         x = log1pf(x + x * x / (sqrtf(x * x + 1) + 1));
9417     else /* |x| < 0x1p-12, raise inexact if x!=0 */
9418         fp_barrierf(x + 0x1p120f);
9419     return s ? -x : x;
9420 }
9421
9422 /*********************************************************************
9423  *      acosh (MSVCR120.@)
9424  *
9425  * Copied from musl: src/math/acosh.c
9426  */
9427 double CDECL acosh(double x)
9428 {
9429     int e = *(UINT64*)&x >> 52 & 0x7ff;
9430
9431     if (x < 1)
9432     {
9433         *_errno() = EDOM;
9434         feraiseexcept(FE_INVALID);
9435         return NAN;
9436     }
9437
9438     if (e < 0x3ff + 1) /* |x| < 2, up to 2ulp error in [1,1.125] */
9439         return log1p(x - 1 + sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
9440     if (e < 0x3ff + 26) /* |x| < 0x1p26 */
9441         return log(2 * x - 1 / (x + sqrt(x * x - 1)));
9442     /* |x| >= 0x1p26 or nan */
9443     return log(x) + 0.693147180559945309417232121458176568;
9444 }
9445
9446 /*********************************************************************
9447  *      acoshf (MSVCR120.@)
9448  *
9449  * Copied from musl: src/math/acoshf.c
9450  */
9451 float CDECL acoshf(float x)
9452 {
9453     UINT32 a = *(UINT32*)&x & 0x7fffffff;
9454
9455     if (x < 1)
9456     {
9457         *_errno() = EDOM;
9458         feraiseexcept(FE_INVALID);
9459         return NAN;
9460     }
9461
9462     if (a < 0x3f800000 + (1 << 23)) /* |x| < 2, up to 2ulp error in [1,1.125] */
9463         return log1pf(x - 1 + sqrtf((x - 1) * (x - 1) + 2 * (x - 1)));
9464     if (*(UINT32*)&x < 0x3f800000 + (12 << 23)) /* 2 <= x < 0x1p12 */
9465         return logf(2 * x - 1 / (x + sqrtf(x * x - 1)));
9466     /* x >= 0x1p12 or x <= -2 or nan */
9467     return logf(x) + 0.693147180559945309417232121458176568f;
9468 }
9469
9470 /*********************************************************************
9471  *      atanh (MSVCR120.@)
9472  *
9473  * Copied from musl: src/math/atanh.c
9474  */
9475 double CDECL atanh(double x)
9476 {
9477     UINT64 ux = *(UINT64*)&x;
9478     int e = ux >> 52 & 0x7ff;
9479     int s = ux >> 63;
9480
9481     /* |x| */
9482     ux &= (UINT64)-1 / 2;
9483     x = *(double*)&ux;
9484
9485     if (x > 1) {
9486         *_errno() = EDOM;
9487         feraiseexcept(FE_INVALID);
9488         return NAN;
9489     }
9490
9491     if (e < 0x3ff - 1) {
9492         if (e < 0x3ff - 32) {
9493             fp_barrier(x + 0x1p120f);
9494             if (e == 0) /* handle underflow */
9495                 fp_barrier(x * x);
9496         } else { /* |x| < 0.5, up to 1.7ulp error */
9497             x = 0.5 * log1p(2 * x + 2 * x * x / (1 - x));
9498         }
9499     } else { /* avoid overflow */
9500         x = 0.5 * log1p(2 * (x / (1 - x)));
9501         if (isinf(x)) *_errno() = ERANGE;
9502     }
9503     return s ? -x : x;
9504 }
9505
9506 /*********************************************************************
9507  *      atanhf (MSVCR120.@)
9508  *
9509  * Copied from musl: src/math/atanhf.c
9510  */
9511 float CDECL atanhf(float x)
9512 {
9513     UINT32 ux = *(UINT32*)&x;
9514     int s = ux >> 31;
9515
9516     /* |x| */
9517     ux &= 0x7fffffff;
9518     x = *(float*)&ux;
9519
9520     if (x > 1) {
9521         *_errno() = EDOM;
9522         feraiseexcept(FE_INVALID);
9523         return NAN;
9524     }
9525
9526     if (ux < 0x3f800000 - (1 << 23)) {
9527         if (ux < 0x3f800000 - (32 << 23)) {
9528             fp_barrierf(x + 0x1p120f);
9529             if (ux < (1 << 23)) /* handle underflow */
9530                 fp_barrierf(x * x);
9531         } else { /* |x| < 0.5, up to 1.7ulp error */
9532             x = 0.5f * log1pf(2 * x + 2 * x * x / (1 - x));
9533         }
9534     } else { /* avoid overflow */
9535         x = 0.5f * log1pf(2 * (x / (1 - x)));
9536         if (isinf(x)) *_errno() = ERANGE;
9537     }
9538     return s ? -x : x;
9539 }
9540
9541 #endif /* _MSVCR_VER>=120 */
9542
9543 /*********************************************************************
9544  *      _scalb  (MSVCRT.@)
9545  *      scalbn  (MSVCR120.@)
9546  *      scalbln (MSVCR120.@)
9547  */
9548 double CDECL _scalb(double num, __msvcrt_long power)
9549 {
9550   return ldexp(num, power);
9551 }
9552
9553 /*********************************************************************
9554  *      _scalbf  (MSVCRT.@)
9555  *      scalbnf  (MSVCR120.@)
9556  *      scalblnf (MSVCR120.@)
9557  */
9558 float CDECL _scalbf(float num, __msvcrt_long power)
9559 {
9560   return ldexp(num, power);
9561 }
9562
9563 #if _MSVCR_VER>=120
9564
9565 /*********************************************************************
9566  *      remainder (MSVCR120.@)
9567  *
9568  * Copied from musl: src/math/remainder.c
9569  */
9570 double CDECL remainder(double x, double y)
9571 {
9572     int q;
9573 #if _MSVCR_VER == 120 && defined(__x86_64__)
9574     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9575 #endif
9576     return remquo(x, y, &q);
9577 }
9578
9579 /*********************************************************************
9580  *      remainderf (MSVCR120.@)
9581  *
9582  * Copied from musl: src/math/remainderf.c
9583  */
9584 float CDECL remainderf(float x, float y)
9585 {
9586     int q;
9587 #if _MSVCR_VER == 120 && defined(__x86_64__)
9588     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9589 #endif
9590     return remquof(x, y, &q);
9591 }
9592
9593 /*********************************************************************
9594  *      remquo (MSVCR120.@)
9595  *
9596  * Copied from musl: src/math/remquo.c
9597  */
9598 double CDECL remquo(double x, double y, int *quo)
9599 {
9600     UINT64 uxi = *(UINT64*)&x;
9601     UINT64 uyi = *(UINT64*)&y;
9602     int ex = uxi >> 52 & 0x7ff;
9603     int ey = uyi >> 52 & 0x7ff;
9604     int sx = uxi >> 63;
9605     int sy = uyi >> 63;
9606     UINT32 q;
9607     UINT64 i;
9608
9609     *quo = 0;
9610     if (y == 0 || isinf(x)) *_errno() = EDOM;
9611     if (uyi << 1 == 0 || isnan(y) || ex == 0x7ff)
9612         return (x * y) / (x * y);
9613     if (uxi << 1 == 0)
9614         return x;
9615
9616     /* normalize x and y */
9617     if (!ex) {
9618         for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1);
9619         uxi <<= -ex + 1;
9620     } else {
9621         uxi &= -1ULL >> 12;
9622         uxi |= 1ULL << 52;
9623     }
9624     if (!ey) {
9625         for (i = uyi << 12; i >> 63 == 0; ey--, i <<= 1);
9626         uyi <<= -ey + 1;
9627     } else {
9628         uyi &= -1ULL >> 12;
9629         uyi |= 1ULL << 52;
9630     }
9631
9632     q = 0;
9633     if (ex < ey) {
9634         if (ex+1 == ey)
9635             goto end;
9636         return x;
9637     }
9638
9639     /* x mod y */
9640     for (; ex > ey; ex--) {
9641         i = uxi - uyi;
9642         if (i >> 63 == 0) {
9643             uxi = i;
9644             q++;
9645         }
9646         uxi <<= 1;
9647         q <<= 1;
9648     }
9649     i = uxi - uyi;
9650     if (i >> 63 == 0) {
9651         uxi = i;
9652         q++;
9653     }
9654     if (uxi == 0)
9655         ex = -60;
9656     else
9657         for (; uxi >> 52 == 0; uxi <<= 1, ex--);
9658 end:
9659     /* scale result and decide between |x| and |x|-|y| */
9660     if (ex > 0) {
9661         uxi -= 1ULL << 52;
9662         uxi |= (UINT64)ex << 52;
9663     } else {
9664         uxi >>= -ex + 1;
9665     }
9666     x = *(double*)&uxi;
9667     if (sy)
9668         y = -y;
9669     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9670         x -= y;
9671         q++;
9672     }
9673     q &= 0x7fffffff;
9674     *quo = sx ^ sy ? -(int)q : (int)q;
9675     return sx ? -x : x;
9676 }
9677
9678 /*********************************************************************
9679  *      remquof (MSVCR120.@)
9680  *
9681  * Copied from musl: src/math/remquof.c
9682  */
9683 float CDECL remquof(float x, float y, int *quo)
9684 {
9685     UINT32 uxi = *(UINT32*)&x;
9686     UINT32 uyi = *(UINT32*)&y;
9687     int ex = uxi >> 23 & 0xff;
9688     int ey = uyi >> 23 & 0xff;
9689     int sx = uxi >> 31;
9690     int sy = uyi>> 31;
9691     UINT32 q, i;
9692
9693     *quo = 0;
9694     if (y == 0 || isinf(x)) *_errno() = EDOM;
9695     if (uyi << 1 == 0 || isnan(y) || ex == 0xff)
9696         return (x * y) / (x * y);
9697     if (uxi << 1 == 0)
9698         return x;
9699
9700     /* normalize x and y */
9701     if (!ex) {
9702         for (i = uxi << 9; i >> 31 == 0; ex--, i <<= 1);
9703         uxi <<= -ex + 1;
9704     } else {
9705         uxi &= -1U >> 9;
9706         uxi |= 1U << 23;
9707     }
9708     if (!ey) {
9709         for (i = uyi << 9; i >> 31 == 0; ey--, i <<= 1);
9710         uyi <<= -ey + 1;
9711     } else {
9712         uyi &= -1U >> 9;
9713         uyi |= 1U << 23;
9714     }
9715
9716     q = 0;
9717     if (ex < ey) {
9718         if (ex + 1 == ey)
9719             goto end;
9720         return x;
9721     }
9722
9723     /* x mod y */
9724     for (; ex > ey; ex--) {
9725         i = uxi - uyi;
9726         if (i >> 31 == 0) {
9727             uxi = i;
9728             q++;
9729         }
9730         uxi <<= 1;
9731         q <<= 1;
9732     }
9733     i = uxi - uyi;
9734     if (i >> 31 == 0) {
9735         uxi = i;
9736         q++;
9737     }
9738     if (uxi == 0)
9739         ex = -30;
9740     else
9741         for (; uxi >> 23 == 0; uxi <<= 1, ex--);
9742 end:
9743     /* scale result and decide between |x| and |x|-|y| */
9744     if (ex > 0) {
9745         uxi -= 1U << 23;
9746         uxi |= (UINT32)ex << 23;
9747     } else {
9748         uxi >>= -ex + 1;
9749     }
9750     x = *(float*)&uxi;
9751     if (sy)
9752         y = -y;
9753     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9754         x -= y;
9755         q++;
9756     }
9757     q &= 0x7fffffff;
9758     *quo = sx ^ sy ? -(int)q : (int)q;
9759     return sx ? -x : x;
9760 }
9761
9762 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9763 static double sin_pi(double x)
9764 {
9765     int n;
9766
9767     /* spurious inexact if odd int */
9768     x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */
9769
9770     n = x * 4.0;
9771     n = (n + 1) / 2;
9772     x -= n * 0.5f;
9773     x *= M_PI;
9774
9775     switch (n) {
9776     default: /* case 4: */
9777     case 0: return __sin(x, 0.0, 0);
9778     case 1: return __cos(x, 0.0);
9779     case 2: return __sin(-x, 0.0, 0);
9780     case 3: return -__cos(x, 0.0);
9781     }
9782 }
9783
9784 /*********************************************************************
9785  *      lgamma (MSVCR120.@)
9786  *
9787  * Copied from musl: src/math/lgamma_r.c
9788  */
9789 double CDECL lgamma(double x)
9790 {
9791     static const double pi = 3.14159265358979311600e+00,
9792         a0 = 7.72156649015328655494e-02,
9793         a1 = 3.22467033424113591611e-01,
9794         a2 = 6.73523010531292681824e-02,
9795         a3 = 2.05808084325167332806e-02,
9796         a4 = 7.38555086081402883957e-03,
9797         a5 = 2.89051383673415629091e-03,
9798         a6 = 1.19270763183362067845e-03,
9799         a7 = 5.10069792153511336608e-04,
9800         a8 = 2.20862790713908385557e-04,
9801         a9 = 1.08011567247583939954e-04,
9802         a10 = 2.52144565451257326939e-05,
9803         a11 = 4.48640949618915160150e-05,
9804         tc = 1.46163214496836224576e+00,
9805         tf = -1.21486290535849611461e-01,
9806         tt = -3.63867699703950536541e-18,
9807         t0 = 4.83836122723810047042e-01,
9808         t1 = -1.47587722994593911752e-01,
9809         t2 = 6.46249402391333854778e-02,
9810         t3 = -3.27885410759859649565e-02,
9811         t4 = 1.79706750811820387126e-02,
9812         t5 = -1.03142241298341437450e-02,
9813         t6 = 6.10053870246291332635e-03,
9814         t7 = -3.68452016781138256760e-03,
9815         t8 = 2.25964780900612472250e-03,
9816         t9 = -1.40346469989232843813e-03,
9817         t10 = 8.81081882437654011382e-04,
9818         t11 = -5.38595305356740546715e-04,
9819         t12 = 3.15632070903625950361e-04,
9820         t13 = -3.12754168375120860518e-04,
9821         t14 = 3.35529192635519073543e-04,
9822         u0 = -7.72156649015328655494e-02,
9823         u1 = 6.32827064025093366517e-01,
9824         u2 = 1.45492250137234768737e+00,
9825         u3 = 9.77717527963372745603e-01,
9826         u4 = 2.28963728064692451092e-01,
9827         u5 = 1.33810918536787660377e-02,
9828         v1 = 2.45597793713041134822e+00,
9829         v2 = 2.12848976379893395361e+00,
9830         v3 = 7.69285150456672783825e-01,
9831         v4 = 1.04222645593369134254e-01,
9832         v5 = 3.21709242282423911810e-03,
9833         s0 = -7.72156649015328655494e-02,
9834         s1 = 2.14982415960608852501e-01,
9835         s2 = 3.25778796408930981787e-01,
9836         s3 = 1.46350472652464452805e-01,
9837         s4 = 2.66422703033638609560e-02,
9838         s5 = 1.84028451407337715652e-03,
9839         s6 = 3.19475326584100867617e-05,
9840         r1 = 1.39200533467621045958e+00,
9841         r2 = 7.21935547567138069525e-01,
9842         r3 = 1.71933865632803078993e-01,
9843         r4 = 1.86459191715652901344e-02,
9844         r5 = 7.77942496381893596434e-04,
9845         r6 = 7.32668430744625636189e-06,
9846         w0 = 4.18938533204672725052e-01,
9847         w1 = 8.33333333333329678849e-02,
9848         w2 = -2.77777777728775536470e-03,
9849         w3 = 7.93650558643019558500e-04,
9850         w4 = -5.95187557450339963135e-04,
9851         w5 = 8.36339918996282139126e-04,
9852         w6 = -1.63092934096575273989e-03;
9853
9854     union {double f; UINT64 i;} u = {x};
9855     double t, y, z, nadj, p, p1, p2, p3, q, r, w;
9856     UINT32 ix;
9857     int sign,i;
9858
9859     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9860     sign = u.i >> 63;
9861     ix = u.i >> 32 & 0x7fffffff;
9862     if (ix >= 0x7ff00000)
9863         return x * x;
9864     if (ix < (0x3ff - 70) << 20) { /* |x|<2**-70, return -log(|x|) */
9865         if(sign)
9866             x = -x;
9867         return -log(x);
9868     }
9869     if (sign) {
9870         x = -x;
9871         t = sin_pi(x);
9872         if (t == 0.0) { /* -integer */
9873             *_errno() = ERANGE;
9874             return 1.0 / (x - x);
9875         }
9876         if (t <= 0.0)
9877             t = -t;
9878         nadj = log(pi / (t * x));
9879     }
9880
9881     /* purge off 1 and 2 */
9882     if ((ix == 0x3ff00000 || ix == 0x40000000) && (UINT32)u.i == 0)
9883         r = 0;
9884     /* for x < 2.0 */
9885     else if (ix < 0x40000000) {
9886         if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
9887             r = -log(x);
9888             if (ix >= 0x3FE76944) {
9889                 y = 1.0 - x;
9890                 i = 0;
9891             } else if (ix >= 0x3FCDA661) {
9892                 y = x - (tc - 1.0);
9893                 i = 1;
9894             } else {
9895                 y = x;
9896                 i = 2;
9897             }
9898         } else {
9899             r = 0.0;
9900             if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
9901                 y = 2.0 - x;
9902                 i = 0;
9903             } else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
9904                 y = x - tc;
9905                 i = 1;
9906             } else {
9907                 y = x - 1.0;
9908                 i = 2;
9909             }
9910         }
9911         switch (i) {
9912         case 0:
9913             z = y * y;
9914             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
9915             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
9916             p = y * p1 + p2;
9917             r += (p - 0.5 * y);
9918             break;
9919         case 1:
9920             z = y * y;
9921             w = z * y;
9922             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
9923             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
9924             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
9925             p = z * p1 - (tt - w * (p2 + y * p3));
9926             r += tf + p;
9927             break;
9928         case 2:
9929             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
9930             p2 = 1.0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
9931             r += -0.5 * y + p1 / p2;
9932         }
9933     } else if (ix < 0x40200000) { /* x < 8.0 */
9934         i = (int)x;
9935         y = x - (double)i;
9936         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
9937         q = 1.0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
9938         r = 0.5 * y + p / q;
9939         z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
9940         switch (i) {
9941         case 7: z *= y + 6.0; /* fall through */
9942         case 6: z *= y + 5.0; /* fall through */
9943         case 5: z *= y + 4.0; /* fall through */
9944         case 4: z *= y + 3.0; /* fall through */
9945         case 3:
9946             z *= y + 2.0;
9947             r += log(z);
9948             break;
9949         }
9950     } else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
9951         t = log(x);
9952         z = 1.0 / x;
9953         y = z * z;
9954         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
9955         r = (x - 0.5) * (t - 1.0) + w;
9956     } else /* 2**58 <= x <= inf */
9957         r = x * (log(x) - 1.0);
9958     if (sign)
9959         r = nadj - r;
9960     return r;
9961 }
9962
9963 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9964 static float sinf_pi(float x)
9965 {
9966     double y;
9967     int n;
9968
9969     /* spurious inexact if odd int */
9970     x = 2 * (x * 0.5f - floorf(x * 0.5f)); /* x mod 2.0 */
9971
9972     n = (int)(x * 4);
9973     n = (n + 1) / 2;
9974     y = x - n * 0.5f;
9975     y *= M_PI;
9976     switch (n) {
9977     default: /* case 4: */
9978     case 0: return __sindf(y);
9979     case 1: return __cosdf(y);
9980     case 2: return __sindf(-y);
9981     case 3: return -__cosdf(y);
9982     }
9983 }
9984
9985 /*********************************************************************
9986  *      lgammaf (MSVCR120.@)
9987  *
9988  * Copied from musl: src/math/lgammaf_r.c
9989  */
9990 float CDECL lgammaf(float x)
9991 {
9992     static const float pi = 3.1415927410e+00,
9993         a0 = 7.7215664089e-02,
9994         a1 = 3.2246702909e-01,
9995         a2 = 6.7352302372e-02,
9996         a3 = 2.0580807701e-02,
9997         a4 = 7.3855509982e-03,
9998         a5 = 2.8905137442e-03,
9999         a6 = 1.1927076848e-03,
10000         a7 = 5.1006977446e-04,
10001         a8 = 2.2086278477e-04,
10002         a9 = 1.0801156895e-04,
10003         a10 = 2.5214456400e-05,
10004         a11 = 4.4864096708e-05,
10005         tc = 1.4616321325e+00,
10006         tf = -1.2148628384e-01,
10007         tt = 6.6971006518e-09,
10008         t0 = 4.8383611441e-01,
10009         t1 = -1.4758771658e-01,
10010         t2 = 6.4624942839e-02,
10011         t3 = -3.2788541168e-02,
10012         t4 = 1.7970675603e-02,
10013         t5 = -1.0314224288e-02,
10014         t6 = 6.1005386524e-03,
10015         t7 = -3.6845202558e-03,
10016         t8 = 2.2596477065e-03,
10017         t9 = -1.4034647029e-03,
10018         t10 = 8.8108185446e-04,
10019         t11 = -5.3859531181e-04,
10020         t12 = 3.1563205994e-04,
10021         t13 = -3.1275415677e-04,
10022         t14 = 3.3552918467e-04,
10023         u0 = -7.7215664089e-02,
10024         u1 = 6.3282704353e-01,
10025         u2 = 1.4549225569e+00,
10026         u3 = 9.7771751881e-01,
10027         u4 = 2.2896373272e-01,
10028         u5 = 1.3381091878e-02,
10029         v1 = 2.4559779167e+00,
10030         v2 = 2.1284897327e+00,
10031         v3 = 7.6928514242e-01,
10032         v4 = 1.0422264785e-01,
10033         v5 = 3.2170924824e-03,
10034         s0 = -7.7215664089e-02,
10035         s1 = 2.1498242021e-01,
10036         s2 = 3.2577878237e-01,
10037         s3 = 1.4635047317e-01,
10038         s4 = 2.6642270386e-02,
10039         s5 = 1.8402845599e-03,
10040         s6 = 3.1947532989e-05,
10041         r1 = 1.3920053244e+00,
10042         r2 = 7.2193557024e-01,
10043         r3 = 1.7193385959e-01,
10044         r4 = 1.8645919859e-02,
10045         r5 = 7.7794247773e-04,
10046         r6 = 7.3266842264e-06,
10047         w0 = 4.1893854737e-01,
10048         w1 = 8.3333335817e-02,
10049         w2 = -2.7777778450e-03,
10050         w3 = 7.9365057172e-04,
10051         w4 = -5.9518753551e-04,
10052         w5 = 8.3633989561e-04,
10053         w6 = -1.6309292987e-03;
10054
10055     union {float f; UINT32 i;} u = {x};
10056     float t, y, z, nadj, p, p1, p2, p3, q, r, w;
10057     UINT32 ix;
10058     int i, sign;
10059
10060     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
10061     sign = u.i >> 31;
10062     ix = u.i & 0x7fffffff;
10063     if (ix >= 0x7f800000)
10064         return x * x;
10065     if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
10066         if (sign)
10067             x = -x;
10068         return -logf(x);
10069     }
10070     if (sign) {
10071         x = -x;
10072         t = sinf_pi(x);
10073         if (t == 0.0f) { /* -integer */
10074             *_errno() = ERANGE;
10075             return 1.0f / (x - x);
10076         }
10077         if (t <= 0.0f)
10078             t = -t;
10079         nadj = logf(pi / (t * x));
10080     }
10081
10082     /* purge off 1 and 2 */
10083     if (ix == 0x3f800000 || ix == 0x40000000)
10084         r = 0;
10085     /* for x < 2.0 */
10086     else if (ix < 0x40000000) {
10087         if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
10088             r = -logf(x);
10089             if (ix >= 0x3f3b4a20) {
10090                 y = 1.0f - x;
10091                 i = 0;
10092             } else if (ix >= 0x3e6d3308) {
10093                 y = x - (tc - 1.0f);
10094                 i = 1;
10095             } else {
10096                 y = x;
10097                 i = 2;
10098             }
10099         } else {
10100             r = 0.0f;
10101             if (ix >= 0x3fdda618) { /* [1.7316,2] */
10102                 y = 2.0f - x;
10103                 i = 0;
10104             } else if (ix >= 0x3F9da620) { /* [1.23,1.73] */
10105                 y = x - tc;
10106                 i = 1;
10107             } else {
10108                 y = x - 1.0f;
10109                 i = 2;
10110             }
10111         }
10112         switch(i) {
10113         case 0:
10114             z = y * y;
10115             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
10116             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
10117             p = y * p1 + p2;
10118             r += p - 0.5f * y;
10119             break;
10120         case 1:
10121             z = y * y;
10122             w = z * y;
10123             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
10124             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
10125             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
10126             p = z * p1 - (tt - w * (p2 + y * p3));
10127             r += (tf + p);
10128             break;
10129         case 2:
10130             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
10131             p2 = 1.0f + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
10132             r += -0.5f * y + p1 / p2;
10133         }
10134     } else if (ix < 0x41000000) { /* x < 8.0 */
10135         i = (int)x;
10136         y = x - (float)i;
10137         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
10138         q = 1.0f + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
10139         r = 0.5f * y + p / q;
10140         z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */
10141         switch (i) {
10142         case 7: z *= y + 6.0f; /* fall through */
10143         case 6: z *= y + 5.0f; /* fall through */
10144         case 5: z *= y + 4.0f; /* fall through */
10145         case 4: z *= y + 3.0f; /* fall through */
10146         case 3:
10147             z *= y + 2.0f;
10148             r += logf(z);
10149             break;
10150         }
10151     } else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */
10152         t = logf(x);
10153         z = 1.0f / x;
10154         y = z * z;
10155         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
10156         r = (x - 0.5f) * (t - 1.0f) + w;
10157     } else /* 2**58 <= x <= inf */
10158         r = x * (logf(x) - 1.0f);
10159     if (sign)
10160         r = nadj - r;
10161     return r;
10162 }
10163
10164 static double tgamma_S(double x)
10165 {
10166     static const double Snum[] = {
10167         23531376880.410759688572007674451636754734846804940,
10168         42919803642.649098768957899047001988850926355848959,
10169         35711959237.355668049440185451547166705960488635843,
10170         17921034426.037209699919755754458931112671403265390,
10171         6039542586.3520280050642916443072979210699388420708,
10172         1439720407.3117216736632230727949123939715485786772,
10173         248874557.86205415651146038641322942321632125127801,
10174         31426415.585400194380614231628318205362874684987640,
10175         2876370.6289353724412254090516208496135991145378768,
10176         186056.26539522349504029498971604569928220784236328,
10177         8071.6720023658162106380029022722506138218516325024,
10178         210.82427775157934587250973392071336271166969580291,
10179         2.5066282746310002701649081771338373386264310793408,
10180     };
10181     static const double Sden[] = {
10182         0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
10183         2637558, 357423, 32670, 1925, 66, 1,
10184     };
10185
10186     double num = 0, den = 0;
10187     int i;
10188
10189     /* to avoid overflow handle large x differently */
10190     if (x < 8)
10191         for (i = ARRAY_SIZE(Snum) - 1; i >= 0; i--) {
10192             num = num * x + Snum[i];
10193             den = den * x + Sden[i];
10194         }
10195     else
10196         for (i = 0; i < ARRAY_SIZE(Snum); i++) {
10197             num = num / x + Snum[i];
10198             den = den / x + Sden[i];
10199         }
10200     return num / den;
10201 }
10202
10203 /*********************************************************************
10204  *      tgamma (MSVCR120.@)
10205  *
10206  * Copied from musl: src/math/tgamma.c
10207  */
10208 double CDECL tgamma(double x)
10209 {
10210     static const double gmhalf = 5.524680040776729583740234375;
10211     static const double fact[] = {
10212         1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
10213         479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
10214         355687428096000.0, 6402373705728000.0, 121645100408832000.0,
10215         2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
10216     };
10217
10218     union {double f; UINT64 i;} u = {x};
10219     double absx, y, dy, z, r;
10220     UINT32 ix = u.i >> 32 & 0x7fffffff;
10221     int sign = u.i >> 63;
10222
10223     /* special cases */
10224     if (ix >= 0x7ff00000) {
10225         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
10226         if (u.i == 0xfff0000000000000ULL)
10227             *_errno() = EDOM;
10228         return x + INFINITY;
10229     }
10230     if (ix < (0x3ff - 54) << 20) {
10231         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
10232         if (x == 0.0)
10233             *_errno() = ERANGE;
10234         return 1 / x;
10235     }
10236
10237     /* integer arguments */
10238     /* raise inexact when non-integer */
10239     if (x == floor(x)) {
10240         if (sign) {
10241             *_errno() = EDOM;
10242             return 0 / (x - x);
10243         }
10244         if (x <= ARRAY_SIZE(fact))
10245             return fact[(int)x - 1];
10246     }
10247
10248     /* x >= 172: tgamma(x)=inf with overflow */
10249     /* x =< -184: tgamma(x)=+-0 with underflow */
10250     if (ix >= 0x40670000) { /* |x| >= 184 */
10251         *_errno() = ERANGE;
10252         if (sign) {
10253             fp_barrierf(0x1p-126 / x);
10254             return 0;
10255         }
10256         x *= 0x1p1023;
10257         return x;
10258     }
10259
10260     absx = sign ? -x : x;
10261
10262     /* handle the error of x + g - 0.5 */
10263     y = absx + gmhalf;
10264     if (absx > gmhalf) {
10265         dy = y - absx;
10266         dy -= gmhalf;
10267     } else {
10268         dy = y - gmhalf;
10269         dy -= absx;
10270     }
10271
10272     z = absx - 0.5;
10273     r = tgamma_S(absx) * exp(-y);
10274     if (x < 0) {
10275         /* reflection formula for negative x */
10276         /* sinpi(absx) is not 0, integers are already handled */
10277         r = -M_PI / (sin_pi(absx) * absx * r);
10278         dy = -dy;
10279         z = -z;
10280     }
10281     r += dy * (gmhalf + 0.5) * r / y;
10282     z = pow(y, 0.5 * z);
10283     y = r * z * z;
10284     return y;
10285 }
10286
10287 /*********************************************************************
10288  *      tgammaf (MSVCR120.@)
10289  *
10290  * Copied from musl: src/math/tgammaf.c
10291  */
10292 float CDECL tgammaf(float x)
10293 {
10294     return tgamma(x);
10295 }
10296
10297 /*********************************************************************
10298  *      nan (MSVCR120.@)
10299  */
10300 double CDECL nan(const char *tagp)
10301 {
10302     /* Windows ignores input (MSDN) */
10303     return NAN;
10304 }
10305
10306 /*********************************************************************
10307  *      nanf (MSVCR120.@)
10308  */
10309 float CDECL nanf(const char *tagp)
10310 {
10311     return NAN;
10312 }
10313
10314 /*********************************************************************
10315  *      _except1 (MSVCR120.@)
10316  *  TODO:
10317  *   - find meaning of ignored cw and operation bits
10318  *   - unk parameter
10319  */
10320 double CDECL _except1(DWORD fpe, _FP_OPERATION_CODE op, double arg, double res, DWORD cw, void *unk)
10321 {
10322     ULONG_PTR exception_arg;
10323     DWORD exception = 0;
10324     DWORD fpword = 0;
10325     WORD operation;
10326     int raise = 0;
10327
10328     TRACE("(%x %x %lf %lf %x %p)\n", fpe, op, arg, res, cw, unk);
10329
10330 #ifdef _WIN64
10331     cw = ((cw >> 7) & 0x3f) | ((cw >> 3) & 0xc00);
10332 #endif
10333     operation = op << 5;
10334     exception_arg = (ULONG_PTR)&operation;
10335
10336     if (fpe & 0x1) { /* overflow */
10337         if ((fpe == 0x1 && (cw & 0x8)) || (fpe==0x11 && (cw & 0x28))) {
10338             /* 32-bit version also sets SW_INEXACT here */
10339             raise |= FE_OVERFLOW;
10340             if (fpe & 0x10) raise |= FE_INEXACT;
10341             res = signbit(res) ? -INFINITY : INFINITY;
10342         } else {
10343             exception = EXCEPTION_FLT_OVERFLOW;
10344         }
10345     } else if (fpe & 0x2) { /* underflow */
10346         if ((fpe == 0x2 && (cw & 0x10)) || (fpe==0x12 && (cw & 0x30))) {
10347             raise |= FE_UNDERFLOW;
10348             if (fpe & 0x10) raise |= FE_INEXACT;
10349             res = signbit(res) ? -0.0 : 0.0;
10350         } else {
10351             exception = EXCEPTION_FLT_UNDERFLOW;
10352         }
10353     } else if (fpe & 0x4) { /* zerodivide */
10354         if ((fpe == 0x4 && (cw & 0x4)) || (fpe==0x14 && (cw & 0x24))) {
10355             raise |= FE_DIVBYZERO;
10356             if (fpe & 0x10) raise |= FE_INEXACT;
10357         } else {
10358             exception = EXCEPTION_FLT_DIVIDE_BY_ZERO;
10359         }
10360     } else if (fpe & 0x8) { /* invalid */
10361         if (fpe == 0x8 && (cw & 0x1)) {
10362             raise |= FE_INVALID;
10363         } else {
10364             exception = EXCEPTION_FLT_INVALID_OPERATION;
10365         }
10366     } else if (fpe & 0x10) { /* inexact */
10367         if (fpe == 0x10 && (cw & 0x20)) {
10368             raise |= FE_INEXACT;
10369         } else {
10370             exception = EXCEPTION_FLT_INEXACT_RESULT;
10371         }
10372     }
10373
10374     if (exception)
10375         raise = 0;
10376     feraiseexcept(raise);
10377     if (exception)
10378         RaiseException(exception, 0, 1, &exception_arg);
10379
10380     if (cw & 0x1) fpword |= _EM_INVALID;
10381     if (cw & 0x2) fpword |= _EM_DENORMAL;
10382     if (cw & 0x4) fpword |= _EM_ZERODIVIDE;
10383     if (cw & 0x8) fpword |= _EM_OVERFLOW;
10384     if (cw & 0x10) fpword |= _EM_UNDERFLOW;
10385     if (cw & 0x20) fpword |= _EM_INEXACT;
10386     switch (cw & 0xc00)
10387     {
10388         case 0xc00: fpword |= _RC_UP|_RC_DOWN; break;
10389         case 0x800: fpword |= _RC_UP; break;
10390         case 0x400: fpword |= _RC_DOWN; break;
10391     }
10392     switch (cw & 0x300)
10393     {
10394         case 0x0:   fpword |= _PC_24; break;
10395         case 0x200: fpword |= _PC_53; break;
10396         case 0x300: fpword |= _PC_64; break;
10397     }
10398     if (cw & 0x1000) fpword |= _IC_AFFINE;
10399     _control87(fpword, 0xffffffff);
10400
10401     return res;
10402 }
10403
10404 _Dcomplex* CDECL _Cbuild(_Dcomplex *ret, double r, double i)
10405 {
10406     ret->_Val[0] = r;
10407     ret->_Val[1] = i;
10408     return ret;
10409 }
10410
10411 double CDECL MSVCR120_creal(_Dcomplex z)
10412 {
10413     return z._Val[0];
10414 }
10415
10416 /*********************************************************************
10417  *      ilogb (MSVCR120.@)
10418  */
10419 int CDECL ilogb(double x)
10420 {
10421     return __ilogb(x);
10422 }
10423
10424 /*********************************************************************
10425  *      ilogbf (MSVCR120.@)
10426  */
10427 int CDECL ilogbf(float x)
10428 {
10429     return __ilogbf(x);
10430 }
10431 #endif /* _MSVCR_VER>=120 */