dlls/msvcrt/math.c

   1 /*
   2  * msvcrt.dll math functions
   3  *
   4  * Copyright 2000 Jon Griffiths
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  *
  20  *
  21  * For functions copied from musl libc (http://musl.libc.org/):
  22  * ====================================================
  23  * Copyright 2005-2020 Rich Felker, et al.
  24  *
  25  * Permission is hereby granted, free of charge, to any person obtaining
  26  * a copy of this software and associated documentation files (the
  27  * "Software"), to deal in the Software without restriction, including
  28  * without limitation the rights to use, copy, modify, merge, publish,
  29  * distribute, sublicense, and/or sell copies of the Software, and to
  30  * permit persons to whom the Software is furnished to do so, subject to
  31  * the following conditions:
  32  *
  33  * The above copyright notice and this permission notice shall be
  34  * included in all copies or substantial portions of the Software.
  35  * ====================================================
  36  */
  37
  38 #include <assert.h>
  39 #include <complex.h>
  40 #include <stdio.h>
  41 #include <fenv.h>
  42 #include <fpieee.h>
  43 #include <limits.h>
  44 #include <locale.h>
  45 #include <math.h>
  46
  47 #include "msvcrt.h"
  48 #include "winternl.h"
  49
  50 #include "wine/asm.h"
  51 #include "wine/debug.h"
  52
  53 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
  54
  55 #undef div
  56 #undef ldiv
  57
  58 #define _DOMAIN         1       /* domain error in argument */
  59 #define _SING           2       /* singularity */
  60 #define _OVERFLOW       3       /* range overflow */
  61 #define _UNDERFLOW      4       /* range underflow */
  62
  63 typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
  64
  65 static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
  66
  67 BOOL sse2_supported;
  68 static BOOL sse2_enabled;
  69
  70 void msvcrt_init_math( void *module )
  71 {
  72     sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
  73 #if _MSVCR_VER <=71
  74     sse2_enabled = FALSE;
  75 #else
  76     sse2_enabled = sse2_supported;
  77 #endif
  78 }
  79
  80 /* Copied from musl: src/internal/libm.h */
  81 static inline float fp_barrierf(float x)
  82 {
  83     volatile float y = x;
  84     return y;
  85 }
  86
  87 static inline double fp_barrier(double x)
  88 {
  89     volatile double y = x;
  90     return y;
  91 }
  92
  93 static inline double ret_nan( BOOL update_sw )
  94 {
  95     double x = 1.0;
  96     if (!update_sw) return -NAN;
  97     return (x - x) / (x - x);
  98 }
  99
 100 #define SET_X87_CW(MASK) \
 101     "subl $4, %esp\n\t" \
 102     __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
 103     "fnstcw (%esp)\n\t" \
 104     "movw (%esp), %ax\n\t" \
 105     "movw %ax, 2(%esp)\n\t" \
 106     "testw $" #MASK ", %ax\n\t" \
 107     "jz 1f\n\t" \
 108     "andw $~" #MASK ", %ax\n\t" \
 109     "movw %ax, 2(%esp)\n\t" \
 110     "fldcw 2(%esp)\n\t" \
 111     "1:\n\t"
 112
 113 #define RESET_X87_CW \
 114     "movw (%esp), %ax\n\t" \
 115     "cmpw %ax, 2(%esp)\n\t" \
 116     "je 1f\n\t" \
 117     "fstpl 8(%esp)\n\t" \
 118     "fldcw (%esp)\n\t" \
 119     "fldl 8(%esp)\n\t" \
 120     "fwait\n\t" \
 121     "1:\n\t" \
 122     "addl $4, %esp\n\t" \
 123     __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t")
 124
 125 /*********************************************************************
 126  *      _matherr (CRTDLL.@)
 127  */
 128 int CDECL _matherr(struct _exception *e)
 129 {
 130     return 0;
 131 }
 132
 133
 134 static double math_error(int type, const char *name, double arg1, double arg2, double retval)
 135 {
 136     struct _exception exception = {type, (char *)name, arg1, arg2, retval};
 137
 138     TRACE("(%d, %s, %g, %g, %g)\n", type, debugstr_a(name), arg1, arg2, retval);
 139
 140     if (MSVCRT_default_matherr_func && MSVCRT_default_matherr_func(&exception))
 141         return exception.retval;
 142
 143     switch (type)
 144     {
 145     case 0:
 146         /* don't set errno */
 147         break;
 148     case _DOMAIN:
 149         *_errno() = EDOM;
 150         break;
 151     case _SING:
 152     case _OVERFLOW:
 153         *_errno() = ERANGE;
 154         break;
 155     case _UNDERFLOW:
 156         /* don't set errno */
 157         break;
 158     default:
 159         ERR("Unhandled math error!\n");
 160     }
 161
 162     return exception.retval;
 163 }
 164
 165 /*********************************************************************
 166  *      __setusermatherr (MSVCRT.@)
 167  */
 168 void CDECL __setusermatherr(MSVCRT_matherr_func func)
 169 {
 170     MSVCRT_default_matherr_func = func;
 171     TRACE("new matherr handler %p\n", func);
 172 }
 173
 174 /*********************************************************************
 175  *      _set_SSE2_enable (MSVCRT.@)
 176  */
 177 int CDECL _set_SSE2_enable(int flag)
 178 {
 179     sse2_enabled = flag && sse2_supported;
 180     return sse2_enabled;
 181 }
 182
 183 #if defined(_WIN64)
 184 # if _MSVCR_VER>=140
 185 /*********************************************************************
 186  *      _get_FMA3_enable (UCRTBASE.@)
 187  */
 188 int CDECL _get_FMA3_enable(void)
 189 {
 190     FIXME("() stub\n");
 191     return 0;
 192 }
 193 # endif
 194
 195 # if _MSVCR_VER>=120
 196 /*********************************************************************
 197  *      _set_FMA3_enable (MSVCR120.@)
 198  */
 199 int CDECL _set_FMA3_enable(int flag)
 200 {
 201     FIXME("(%x) stub\n", flag);
 202     return 0;
 203 }
 204 # endif
 205 #endif
 206
 207 #if !defined(__i386__) || _MSVCR_VER>=120
 208
 209 /*********************************************************************
 210  *      _chgsignf (MSVCRT.@)
 211  */
 212 float CDECL _chgsignf( float num )
 213 {
 214     union { float f; UINT32 i; } u = { num };
 215     u.i ^= 0x80000000;
 216     return u.f;
 217 }
 218
 219 /*********************************************************************
 220  *      _copysignf (MSVCRT.@)
 221  *
 222  * Copied from musl: src/math/copysignf.c
 223  */
 224 float CDECL _copysignf( float x, float y )
 225 {
 226     union { float f; UINT32 i; } ux = { x }, uy = { y };
 227     ux.i &= 0x7fffffff;
 228     ux.i |= uy.i & 0x80000000;
 229     return ux.f;
 230 }
 231
 232 /*********************************************************************
 233  *      _nextafterf (MSVCRT.@)
 234  *
 235  * Copied from musl: src/math/nextafterf.c
 236  */
 237 float CDECL _nextafterf( float x, float y )
 238 {
 239     unsigned int ix = *(unsigned int*)&x;
 240     unsigned int iy = *(unsigned int*)&y;
 241     unsigned int ax, ay, e;
 242
 243     if (isnan(x) || isnan(y))
 244         return x + y;
 245     if (x == y) {
 246         if (_fpclassf(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
 247             *_errno() = ERANGE;
 248         return y;
 249     }
 250     ax = ix & 0x7fffffff;
 251     ay = iy & 0x7fffffff;
 252     if (ax == 0) {
 253         if (ay == 0)
 254             return y;
 255         ix = (iy & 0x80000000) | 1;
 256     } else if (ax > ay || ((ix ^ iy) & 0x80000000))
 257         ix--;
 258     else
 259         ix++;
 260     e = ix & 0x7f800000;
 261     /* raise overflow if ix is infinite and x is finite */
 262     if (e == 0x7f800000) {
 263         fp_barrierf(x + x);
 264         *_errno() = ERANGE;
 265     }
 266     /* raise underflow if ix is subnormal or zero */
 267     y = *(float*)&ix;
 268     if (e == 0) {
 269         fp_barrierf(x * x + y * y);
 270         *_errno() = ERANGE;
 271     }
 272     return y;
 273 }
 274
 275 /* Copied from musl: src/math/ilogbf.c */
 276 static int __ilogbf(float x)
 277 {
 278     union { float f; UINT32 i; } u = { x };
 279     int e = u.i >> 23 & 0xff;
 280
 281     if (!e)
 282     {
 283         u.i <<= 9;
 284         if (u.i == 0) return FP_ILOGB0;
 285         /* subnormal x */
 286         for (e = -0x7f; u.i >> 31 == 0; e--, u.i <<= 1);
 287         return e;
 288     }
 289     if (e == 0xff) return u.i << 9 ? FP_ILOGBNAN : INT_MAX;
 290     return e - 0x7f;
 291 }
 292
 293 /*********************************************************************
 294  *      _logbf (MSVCRT.@)
 295  *
 296  * Copied from musl: src/math/logbf.c
 297  */
 298 float CDECL _logbf(float x)
 299 {
 300     if (!isfinite(x))
 301         return x * x;
 302     if (x == 0) {
 303         *_errno() = ERANGE;
 304         return -1 / (x * x);
 305     }
 306     return __ilogbf(x);
 307 }
 308
 309 #endif
 310
 311 /* Copied from musl: src/math/scalbn.c */
 312 static double __scalbn(double x, int n)
 313 {
 314     union {double f; UINT64 i;} u;
 315     double y = x;
 316
 317     if (n > 1023) {
 318         y *= 0x1p1023;
 319         n -= 1023;
 320         if (n > 1023) {
 321             y *= 0x1p1023;
 322             n -= 1023;
 323             if (n > 1023)
 324                 n = 1023;
 325         }
 326     } else if (n < -1022) {
 327         /* make sure final n < -53 to avoid double
 328            rounding in the subnormal range */
 329         y *= 0x1p-1022 * 0x1p53;
 330         n += 1022 - 53;
 331         if (n < -1022) {
 332             y *= 0x1p-1022 * 0x1p53;
 333             n += 1022 - 53;
 334             if (n < -1022)
 335                 n = -1022;
 336         }
 337     }
 338     u.i = (UINT64)(0x3ff + n) << 52;
 339     x = y * u.f;
 340     return x;
 341 }
 342
 343 /* Copied from musl: src/math/__rem_pio2_large.c */
 344 static int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 345 {
 346     static const int init_jk[] = {3, 4};
 347     static const INT32 ipio2[] = {
 348         0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 349         0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 350         0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
 351         0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
 352         0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
 353         0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
 354         0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
 355         0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
 356         0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
 357         0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
 358         0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 359     };
 360     static const double PIo2[] = {
 361         1.57079625129699707031e+00,
 362         7.54978941586159635335e-08,
 363         5.39030252995776476554e-15,
 364         3.28200341580791294123e-22,
 365         1.27065575308067607349e-29,
 366         1.22933308981111328932e-36,
 367         2.73370053816464559624e-44,
 368         2.16741683877804819444e-51,
 369     };
 370
 371     INT32 jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
 372     double z, fw, f[20], fq[20] = {0}, q[20];
 373
 374     /* initialize jk*/
 375     jk = init_jk[prec];
 376     jp = jk;
 377
 378     /* determine jx,jv,q0, note that 3>q0 */
 379     jx = nx - 1;
 380     jv = (e0 - 3) / 24;
 381     if(jv < 0) jv = 0;
 382     q0 = e0 - 24 * (jv + 1);
 383
 384     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 385     j = jv - jx;
 386     m = jx + jk;
 387     for (i = 0; i <= m; i++, j++)
 388         f[i] = j < 0 ? 0.0 : (double)ipio2[j];
 389
 390     /* compute q[0],q[1],...q[jk] */
 391     for (i = 0; i <= jk; i++) {
 392         for (j = 0, fw = 0.0; j <= jx; j++)
 393             fw += x[j] * f[jx + i - j];
 394         q[i] = fw;
 395     }
 396
 397     jz = jk;
 398 recompute:
 399     /* distill q[] into iq[] reversingly */
 400     for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
 401         fw = (double)(INT32)(0x1p-24 * z);
 402         iq[i] = (INT32)(z - 0x1p24 * fw);
 403         z = q[j - 1] + fw;
 404     }
 405
 406     /* compute n */
 407     z = __scalbn(z, q0); /* actual value of z */
 408     z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
 409     n = (INT32)z;
 410     z -= (double)n;
 411     ih = 0;
 412     if (q0 > 0) {  /* need iq[jz-1] to determine n */
 413         i = iq[jz - 1] >> (24 - q0);
 414         n += i;
 415         iq[jz - 1] -= i << (24 - q0);
 416         ih = iq[jz - 1] >> (23 - q0);
 417     }
 418     else if (q0 == 0) ih = iq[jz - 1] >> 23;
 419     else if (z >= 0.5) ih = 2;
 420
 421     if (ih > 0) {  /* q > 0.5 */
 422         n += 1;
 423         carry = 0;
 424         for (i = 0; i < jz; i++) {  /* compute 1-q */
 425             j = iq[i];
 426             if (carry == 0) {
 427                 if (j != 0) {
 428                     carry = 1;
 429                     iq[i] = 0x1000000 - j;
 430                 }
 431             } else
 432                 iq[i] = 0xffffff - j;
 433         }
 434         if (q0 > 0) {  /* rare case: chance is 1 in 12 */
 435             switch(q0) {
 436             case 1:
 437                 iq[jz - 1] &= 0x7fffff;
 438                 break;
 439             case 2:
 440                 iq[jz - 1] &= 0x3fffff;
 441                 break;
 442             }
 443         }
 444         if (ih == 2) {
 445             z = 1.0 - z;
 446             if (carry != 0)
 447                 z -= __scalbn(1.0, q0);
 448         }
 449     }
 450
 451     /* check if recomputation is needed */
 452     if (z == 0.0) {
 453         j = 0;
 454         for (i = jz - 1; i >= jk; i--) j |= iq[i];
 455         if (j == 0) {  /* need recomputation */
 456             for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
 457
 458             for (i = jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
 459                 f[jx + i] = (double)ipio2[jv + i];
 460                 for (j = 0, fw = 0.0; j <= jx; j++)
 461                     fw += x[j] * f[jx + i - j];
 462                 q[i] = fw;
 463             }
 464             jz += k;
 465             goto recompute;
 466         }
 467     }
 468
 469     /* chop off zero terms */
 470     if (z == 0.0) {
 471         jz -= 1;
 472         q0 -= 24;
 473         while (iq[jz] == 0) {
 474             jz--;
 475             q0 -= 24;
 476         }
 477     } else { /* break z into 24-bit if necessary */
 478         z = __scalbn(z, -q0);
 479         if (z >= 0x1p24) {
 480             fw = (double)(INT32)(0x1p-24 * z);
 481             iq[jz] = (INT32)(z - 0x1p24 * fw);
 482             jz += 1;
 483             q0 += 24;
 484             iq[jz] = (INT32)fw;
 485         } else
 486             iq[jz] = (INT32)z;
 487     }
 488
 489     /* convert integer "bit" chunk to floating-point value */
 490     fw = __scalbn(1.0, q0);
 491     for (i = jz; i >= 0; i--) {
 492         q[i] = fw * (double)iq[i];
 493         fw *= 0x1p-24;
 494     }
 495
 496     /* compute PIo2[0,...,jp]*q[jz,...,0] */
 497     for(i = jz; i >= 0; i--) {
 498         for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
 499             fw += PIo2[k] * q[i + k];
 500         fq[jz - i] = fw;
 501     }
 502
 503     /* compress fq[] into y[] */
 504     switch(prec) {
 505     case 0:
 506         fw = 0.0;
 507         for (i = jz; i >= 0; i--)
 508             fw += fq[i];
 509         y[0] = ih == 0 ? fw : -fw;
 510         break;
 511     case 1:
 512     case 2:
 513         fw = 0.0;
 514         for (i = jz; i >= 0; i--)
 515             fw += fq[i];
 516         fw = (double)fw;
 517         y[0] = ih==0 ? fw : -fw;
 518         fw = fq[0] - fw;
 519         for (i = 1; i <= jz; i++)
 520             fw += fq[i];
 521         y[1] = ih == 0 ? fw : -fw;
 522         break;
 523     case 3:  /* painful */
 524         for (i = jz; i > 0; i--) {
 525             fw = fq[i - 1] + fq[i];
 526             fq[i] += fq[i - 1] - fw;
 527             fq[i - 1] = fw;
 528         }
 529         for (i = jz; i > 1; i--) {
 530             fw = fq[i - 1] + fq[i];
 531             fq[i] += fq[i - 1] - fw;
 532             fq[i - 1] = fw;
 533         }
 534         for (fw = 0.0, i = jz; i >= 2; i--)
 535             fw += fq[i];
 536         if (ih == 0) {
 537             y[0] = fq[0];
 538             y[1] = fq[1];
 539             y[2] = fw;
 540         } else {
 541             y[0] = -fq[0];
 542             y[1] = -fq[1];
 543             y[2] = -fw;
 544         }
 545     }
 546     return n & 7;
 547 }
 548
 549 /* Based on musl implementation: src/math/round.c */
 550 static double __round(double x)
 551 {
 552     ULONGLONG llx = *(ULONGLONG*)&x, tmp;
 553     int e = (llx >> 52 & 0x7ff) - 0x3ff;
 554
 555     if (e >= 52)
 556         return x;
 557     if (e < -1)
 558         return 0 * x;
 559     else if (e == -1)
 560         return signbit(x) ? -1 : 1;
 561
 562     tmp = 0x000fffffffffffffULL >> e;
 563     if (!(llx & tmp))
 564         return x;
 565     llx += 0x0008000000000000ULL >> e;
 566     llx &= ~tmp;
 567     return *(double*)&llx;
 568 }
 569
 570 #if !defined(__i386__) || _MSVCR_VER >= 120
 571 /* Copied from musl: src/math/expm1f.c */
 572 static float __expm1f(float x)
 573 {
 574     static const float ln2_hi = 6.9313812256e-01,
 575         ln2_lo = 9.0580006145e-06,
 576         invln2 = 1.4426950216e+00,
 577         Q1 = -3.3333212137e-2,
 578         Q2 = 1.5807170421e-3;
 579
 580     float y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
 581     union {float f; UINT32 i;} u = {x};
 582     UINT32 hx = u.i & 0x7fffffff;
 583     int k, sign = u.i >> 31;
 584
 585     /* filter out huge and non-finite argument */
 586     if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
 587         if (hx >= 0x7f800000) /* NaN */
 588             return u.i == 0xff800000 ? -1 : x;
 589         if (sign)
 590             return math_error(_UNDERFLOW, "exp", x, 0, -1);
 591         if (hx > 0x42b17217) /* x > log(FLT_MAX) */
 592             return math_error(_OVERFLOW, "exp", x, 0, fp_barrierf(x * FLT_MAX));
 593     }
 594
 595     /* argument reduction */
 596     if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
 597         if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
 598             if (!sign) {
 599                 hi = x - ln2_hi;
 600                 lo = ln2_lo;
 601                 k = 1;
 602             } else {
 603                 hi = x + ln2_hi;
 604                 lo = -ln2_lo;
 605                 k = -1;
 606             }
 607         } else {
 608             k = invln2 * x + (sign ? -0.5f : 0.5f);
 609             t = k;
 610             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
 611             lo = t * ln2_lo;
 612         }
 613         x = hi - lo;
 614         c = (hi - x) - lo;
 615     } else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
 616         if (hx < 0x00800000)
 617             fp_barrierf(x * x);
 618         return x;
 619     } else
 620         k = 0;
 621
 622     /* x is now in primary range */
 623     hfx = 0.5f * x;
 624     hxs = x * hfx;
 625     r1 = 1.0f + hxs * (Q1 + hxs * Q2);
 626     t = 3.0f - r1 * hfx;
 627     e = hxs * ((r1 - t) / (6.0f - x * t));
 628     if (k == 0) /* c is 0 */
 629         return x - (x * e - hxs);
 630     e = x * (e - c) - c;
 631     e -= hxs;
 632     /* exp(x) ~ 2^k (x_reduced - e + 1) */
 633     if (k == -1)
 634         return 0.5f * (x - e) - 0.5f;
 635     if (k == 1) {
 636         if (x < -0.25f)
 637             return -2.0f * (e - (x + 0.5f));
 638         return 1.0f + 2.0f * (x - e);
 639     }
 640     u.i = (0x7f + k) << 23; /* 2^k */
 641     twopk = u.f;
 642     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
 643         y = x - e + 1.0f;
 644         if (k == 128)
 645             y = y * 2.0f * 0x1p127f;
 646         else
 647             y = y * twopk;
 648         return y - 1.0f;
 649     }
 650     u.i = (0x7f-k) << 23; /* 2^-k */
 651     if (k < 23)
 652         y = (x - e + (1 - u.f)) * twopk;
 653     else
 654         y = (x - (e + u.f) + 1) * twopk;
 655     return y;
 656 }
 657
 658 /* Copied from musl: src/math/__sindf.c */
 659 static float __sindf(double x)
 660 {
 661     static const double S1 = -0x1.5555555555555p-3,
 662         S2 = 0x1.1111111111111p-7,
 663         S3 = -0x1.a01a01a01a01ap-13,
 664         S4 = 0x1.71de3a556c734p-19;
 665
 666     double r, s, w, z;
 667
 668     z = x * x;
 669     if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03)
 670         return x * (1 + S1 * z);
 671
 672     w = z * z;
 673     r = S3 + z * S4;
 674     s = z * x;
 675     return (x + s * (S1 + z * S2)) + s * w * r;
 676 }
 677
 678 /* Copied from musl: src/math/__cosdf.c */
 679 static float __cosdf(double x)
 680 {
 681     static const double C0 = -0x1.0000000000000p-1,
 682         C1 = 0x1.5555555555555p-5,
 683         C2 = -0x1.6c16c16c16c17p-10,
 684         C3 = 0x1.a01a01a01a01ap-16,
 685         C4 = -0x1.27e4fb7789f5cp-22;
 686     double z;
 687
 688     z = x * x;
 689     if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03)
 690         return 1 + C0 * z;
 691     return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4))));
 692 }
 693
 694 static const UINT64 exp2f_T[] = {
 695     0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL,
 696     0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL,
 697     0x3fef06fe0a31b715ULL, 0x3feef1a7373aa9cbULL, 0x3feedea64c123422ULL, 0x3feece086061892dULL,
 698     0x3feebfdad5362a27ULL, 0x3feeb42b569d4f82ULL, 0x3feeab07dd485429ULL, 0x3feea47eb03a5585ULL,
 699     0x3feea09e667f3bcdULL, 0x3fee9f75e8ec5f74ULL, 0x3feea11473eb0187ULL, 0x3feea589994cce13ULL,
 700     0x3feeace5422aa0dbULL, 0x3feeb737b0cdc5e5ULL, 0x3feec49182a3f090ULL, 0x3feed503b23e255dULL,
 701     0x3feee89f995ad3adULL, 0x3feeff76f2fb5e47ULL, 0x3fef199bdd85529cULL, 0x3fef3720dcef9069ULL,
 702     0x3fef5818dcfba487ULL, 0x3fef7c97337b9b5fULL, 0x3fefa4afa2a490daULL, 0x3fefd0765b6e4540ULL
 703 };
 704 #endif
 705
 706 /*********************************************************************
 707  *      _fdclass (MSVCR120.@)
 708  *
 709  * Copied from musl: src/math/__fpclassifyf.c
 710  */
 711 short CDECL _fdclass(float x)
 712 {
 713     union { float f; UINT32 i; } u = { x };
 714     int e = u.i >> 23 & 0xff;
 715
 716     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
 717     if (e == 0xff) return u.i << 9 ? FP_NAN : FP_INFINITE;
 718     return FP_NORMAL;
 719 }
 720
 721 /*********************************************************************
 722  *      _dclass (MSVCR120.@)
 723  *
 724  * Copied from musl: src/math/__fpclassify.c
 725  */
 726 short CDECL _dclass(double x)
 727 {
 728     union { double f; UINT64 i; } u = { x };
 729     int e = u.i >> 52 & 0x7ff;
 730
 731     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
 732     if (e == 0x7ff) return (u.i << 12) ? FP_NAN : FP_INFINITE;
 733     return FP_NORMAL;
 734 }
 735
 736 #ifndef __i386__
 737
 738 /*********************************************************************
 739  *      _fpclassf (MSVCRT.@)
 740  */
 741 int CDECL _fpclassf( float num )
 742 {
 743     union { float f; UINT32 i; } u = { num };
 744     int e = u.i >> 23 & 0xff;
 745     int s = u.i >> 31;
 746
 747     switch (e)
 748     {
 749     case 0:
 750         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
 751         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
 752     case 0xff:
 753         if (u.i << 9) return ((u.i >> 22) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
 754         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
 755     default:
 756         return s ? _FPCLASS_NN : _FPCLASS_PN;
 757     }
 758 }
 759
 760 /*********************************************************************
 761  *      _finitef (MSVCRT.@)
 762  */
 763 int CDECL _finitef( float num )
 764 {
 765     union { float f; UINT32 i; } u = { num };
 766     return (u.i & 0x7fffffff) < 0x7f800000;
 767 }
 768
 769 /*********************************************************************
 770  *      _isnanf (MSVCRT.@)
 771  */
 772 int CDECL _isnanf( float num )
 773 {
 774     union { float f; UINT32 i; } u = { num };
 775     return (u.i & 0x7fffffff) > 0x7f800000;
 776 }
 777
 778 static float asinf_R(float z)
 779 {
 780     /* coefficients for R(x^2) */
 781     static const float p1 = 1.66666672e-01,
 782                  p2 = -5.11644611e-02,
 783                  p3 = -1.21124933e-02,
 784                  p4 = -3.58742251e-03,
 785                  q1 = -7.56982703e-01;
 786
 787     float p, q;
 788     p = z * (p1 + z * (p2 + z * (p3 + z * p4)));
 789     q = 1.0f + z * q1;
 790     return p / q;
 791 }
 792
 793 /*********************************************************************
 794  *      acosf (MSVCRT.@)
 795  *
 796  * Copied from musl: src/math/acosf.c
 797  */
 798 float CDECL acosf( float x )
 799 {
 800     static const double pio2_lo = 6.12323399573676603587e-17;
 801     static const double pio2_hi = 1.57079632679489655800e+00;
 802
 803     float z, w, s, c, df;
 804     unsigned int hx, ix;
 805
 806     hx = *(unsigned int*)&x;
 807     ix = hx & 0x7fffffff;
 808     /* |x| >= 1 or nan */
 809     if (ix >= 0x3f800000) {
 810         if (ix == 0x3f800000) {
 811             if (hx >> 31)
 812                 return M_PI;
 813             return 0;
 814         }
 815         if (isnan(x)) return x;
 816         return math_error(_DOMAIN, "acosf", x, 0, 0 / (x - x));
 817     }
 818     /* |x| < 0.5 */
 819     if (ix < 0x3f000000) {
 820         if (ix <= 0x32800000) /* |x| < 2**-26 */
 821             return M_PI_2;
 822         return pio2_hi - (x - (pio2_lo - x * asinf_R(x * x)));
 823     }
 824     /* x < -0.5 */
 825     if (hx >> 31) {
 826         z = (1 + x) * 0.5f;
 827         s = sqrtf(z);
 828         return 2*(pio2_hi - (s + (asinf_R(z) * s - pio2_lo)));
 829     }
 830     /* x > 0.5 */
 831     z = (1 - x) * 0.5f;
 832     s = sqrtf(z);
 833     hx = *(unsigned int*)&s & 0xffff0000;
 834     df = *(float*)&hx;
 835     c = (z - df * df) / (s + df);
 836     w = asinf_R(z) * s + c;
 837     return 2 * (df + w);
 838 }
 839
 840 /*********************************************************************
 841  *      asinf (MSVCRT.@)
 842  *
 843  * Copied from musl: src/math/asinf.c
 844  */
 845 float CDECL asinf( float x )
 846 {
 847     static const double pio2 = 1.570796326794896558e+00;
 848     static const float pio4_hi = 0.785398125648;
 849     static const float pio2_lo = 7.54978941586e-08;
 850
 851     float s, z, f, c;
 852     unsigned int hx, ix;
 853
 854     hx = *(unsigned int*)&x;
 855     ix = hx & 0x7fffffff;
 856     if (ix >= 0x3f800000) {  /* |x| >= 1 */
 857         if (ix == 0x3f800000)  /* |x| == 1 */
 858             return x * pio2 + 7.5231638453e-37;  /* asin(+-1) = +-pi/2 with inexact */
 859         if (isnan(x)) return x;
 860         return math_error(_DOMAIN, "asinf", x, 0, 0 / (x - x));
 861     }
 862     if (ix < 0x3f000000) {  /* |x| < 0.5 */
 863         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
 864         if (ix < 0x39800000 && ix >= 0x00800000)
 865             return x;
 866         return x + x * asinf_R(x * x);
 867     }
 868     /* 1 > |x| >= 0.5 */
 869     z = (1 - fabsf(x)) * 0.5f;
 870     s = sqrtf(z);
 871     /* f+c = sqrt(z) */
 872     *(unsigned int*)&f = *(unsigned int*)&s & 0xffff0000;
 873     c = (z - f * f) / (s + f);
 874     x = pio4_hi - (2 * s * asinf_R(z) - (pio2_lo - 2 * c) - (pio4_hi - 2 * f));
 875     if (hx >> 31)
 876         return -x;
 877     return x;
 878 }
 879
 880 /*********************************************************************
 881  *      atanf (MSVCRT.@)
 882  *
 883  * Copied from musl: src/math/atanf.c
 884  */
 885 float CDECL atanf( float x )
 886 {
 887     static const float atanhi[] = {
 888         4.6364760399e-01,
 889         7.8539812565e-01,
 890         9.8279368877e-01,
 891         1.5707962513e+00,
 892     };
 893     static const float atanlo[] = {
 894         5.0121582440e-09,
 895         3.7748947079e-08,
 896         3.4473217170e-08,
 897         7.5497894159e-08,
 898     };
 899     static const float aT[] = {
 900         3.3333328366e-01,
 901         -1.9999158382e-01,
 902         1.4253635705e-01,
 903         -1.0648017377e-01,
 904         6.1687607318e-02,
 905     };
 906
 907     float w, s1, s2, z;
 908     unsigned int ix, sign;
 909     int id;
 910
 911 #if _MSVCR_VER == 0
 912     if (isnan(x)) return math_error(_DOMAIN, "atanf", x, 0, x);
 913 #endif
 914
 915     ix = *(unsigned int*)&x;
 916     sign = ix >> 31;
 917     ix &= 0x7fffffff;
 918     if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
 919         if (isnan(x))
 920             return x;
 921         z = atanhi[3] + 7.5231638453e-37;
 922         return sign ? -z : z;
 923     }
 924     if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 925         if (ix < 0x39800000) {  /* |x| < 2**-12 */
 926             if (ix < 0x00800000)
 927                 /* raise underflow for subnormal x */
 928                 fp_barrierf(x*x);
 929             return x;
 930         }
 931         id = -1;
 932     } else {
 933         x = fabsf(x);
 934         if (ix < 0x3f980000) {  /* |x| < 1.1875 */
 935             if (ix < 0x3f300000) {  /*  7/16 <= |x| < 11/16 */
 936                 id = 0;
 937                 x = (2.0f * x - 1.0f) / (2.0f + x);
 938             } else {                /* 11/16 <= |x| < 19/16 */
 939                 id = 1;
 940                 x = (x - 1.0f) / (x + 1.0f);
 941             }
 942         } else {
 943             if (ix < 0x401c0000) {  /* |x| < 2.4375 */
 944                 id = 2;
 945                 x = (x - 1.5f) / (1.0f + 1.5f * x);
 946             } else {                /* 2.4375 <= |x| < 2**26 */
 947                 id = 3;
 948                 x = -1.0f / x;
 949             }
 950         }
 951     }
 952     /* end of argument reduction */
 953     z = x * x;
 954     w = z * z;
 955     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
 956     s1 = z * (aT[0] + w * (aT[2] + w * aT[4]));
 957     s2 = w * (aT[1] + w * aT[3]);
 958     if (id < 0)
 959         return x - x * (s1 + s2);
 960     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
 961     return sign ? -z : z;
 962 }
 963
 964 /*********************************************************************
 965  *              atan2f (MSVCRT.@)
 966  *
 967  * Copied from musl: src/math/atan2f.c
 968  */
 969 float CDECL atan2f( float y, float x )
 970 {
 971     static const float pi     = 3.1415927410e+00,
 972                  pi_lo  = -8.7422776573e-08;
 973
 974     float z;
 975     unsigned int m, ix, iy;
 976
 977     if (isnan(x) || isnan(y))
 978         return x + y;
 979     ix = *(unsigned int*)&x;
 980     iy = *(unsigned int*)&y;
 981     if (ix == 0x3f800000)  /* x=1.0 */
 982         return atanf(y);
 983     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
 984     ix &= 0x7fffffff;
 985     iy &= 0x7fffffff;
 986
 987     /* when y = 0 */
 988     if (iy == 0) {
 989         switch (m) {
 990         case 0:
 991         case 1: return y;   /* atan(+-0,+anything)=+-0 */
 992         case 2: return pi;  /* atan(+0,-anything) = pi */
 993         case 3: return -pi; /* atan(-0,-anything) =-pi */
 994         }
 995     }
 996     /* when x = 0 */
 997     if (ix == 0)
 998         return m & 1 ? -pi / 2 : pi / 2;
 999     /* when x is INF */
1000     if (ix == 0x7f800000) {
1001         if (iy == 0x7f800000) {
1002             switch (m) {
1003             case 0: return pi / 4;      /* atan(+INF,+INF) */
1004             case 1: return -pi / 4;     /* atan(-INF,+INF) */
1005             case 2: return 3 * pi / 4;  /*atan(+INF,-INF)*/
1006             case 3: return -3 * pi / 4; /*atan(-INF,-INF)*/
1007             }
1008         } else {
1009             switch (m) {
1010             case 0: return 0.0f;    /* atan(+...,+INF) */
1011             case 1: return -0.0f;   /* atan(-...,+INF) */
1012             case 2: return pi;      /* atan(+...,-INF) */
1013             case 3: return -pi;     /* atan(-...,-INF) */
1014             }
1015         }
1016     }
1017     /* |y/x| > 0x1p26 */
1018     if (ix + (26 << 23) < iy || iy == 0x7f800000)
1019         return m & 1 ? -pi / 2 : pi / 2;
1020
1021     /* z = atan(|y/x|) with correct underflow */
1022     if ((m & 2) && iy + (26 << 23) < ix)  /*|y/x| < 0x1p-26, x < 0 */
1023         z = 0.0;
1024     else
1025         z = atanf(fabsf(y / x));
1026     switch (m) {
1027     case 0: return z;                /* atan(+,+) */
1028     case 1: return -z;               /* atan(-,+) */
1029     case 2: return pi - (z - pi_lo); /* atan(+,-) */
1030     default: /* case 3 */
1031         return (z - pi_lo) - pi;     /* atan(-,-) */
1032     }
1033 }
1034
1035 /* Copied from musl: src/math/__rem_pio2f.c */
1036 static int __rem_pio2f(float x, double *y)
1037 {
1038     static const double toint = 1.5 / DBL_EPSILON,
1039         pio4 = 0x1.921fb6p-1,
1040         invpio2 = 6.36619772367581382433e-01,
1041         pio2_1 = 1.57079631090164184570e+00,
1042         pio2_1t = 1.58932547735281966916e-08;
1043
1044     union {float f; uint32_t i;} u = {x};
1045     double tx[1], ty[1], fn;
1046     UINT32 ix;
1047     int n, sign, e0;
1048
1049     ix = u.i & 0x7fffffff;
1050     /* 25+53 bit pi is good enough for medium size */
1051     if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
1052         /* Use a specialized rint() to get fn. */
1053         fn = fp_barrier(x * invpio2 + toint) - toint;
1054         n  = (int)fn;
1055         *y = x - fn * pio2_1 - fn * pio2_1t;
1056         /* Matters with directed rounding. */
1057         if (*y < -pio4) {
1058             n--;
1059             fn--;
1060             *y = x - fn * pio2_1 - fn * pio2_1t;
1061         } else if (*y > pio4) {
1062             n++;
1063             fn++;
1064             *y = x - fn * pio2_1 - fn * pio2_1t;
1065         }
1066         return n;
1067     }
1068     if(ix >= 0x7f800000) { /* x is inf or NaN */
1069         *y = x - x;
1070         return 0;
1071     }
1072     /* scale x into [2^23, 2^24-1] */
1073     sign = u.i >> 31;
1074     e0 = (ix >> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */
1075     u.i = ix - (e0 << 23);
1076     tx[0] = u.f;
1077     n = __rem_pio2_large(tx, ty, e0, 1, 0);
1078     if (sign) {
1079         *y = -ty[0];
1080         return -n;
1081     }
1082     *y = ty[0];
1083     return n;
1084 }
1085
1086 /*********************************************************************
1087  *      cosf (MSVCRT.@)
1088  *
1089  * Copied from musl: src/math/cosf.c
1090  */
1091 float CDECL cosf( float x )
1092 {
1093     static const double c1pio2 = 1*M_PI_2,
1094         c2pio2 = 2*M_PI_2,
1095         c3pio2 = 3*M_PI_2,
1096         c4pio2 = 4*M_PI_2;
1097
1098     double y;
1099     UINT32 ix;
1100     unsigned n, sign;
1101
1102     ix = *(UINT32*)&x;
1103     sign = ix >> 31;
1104     ix &= 0x7fffffff;
1105
1106     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1107         if (ix < 0x39800000) { /* |x| < 2**-12 */
1108             /* raise inexact if x != 0 */
1109             fp_barrierf(x + 0x1p120f);
1110             return 1.0f;
1111         }
1112         return __cosdf(x);
1113     }
1114     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1115         if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
1116             return -__cosdf(sign ? x + c2pio2 : x - c2pio2);
1117         else {
1118             if (sign)
1119                 return __sindf(x + c1pio2);
1120             else
1121                 return __sindf(c1pio2 - x);
1122         }
1123     }
1124     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1125         if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
1126             return __cosdf(sign ? x + c4pio2 : x - c4pio2);
1127         else {
1128             if (sign)
1129                 return __sindf(-x - c3pio2);
1130             else
1131                 return __sindf(x - c3pio2);
1132         }
1133     }
1134
1135     /* cos(Inf or NaN) is NaN */
1136     if (isinf(x)) return math_error(_DOMAIN, "cosf", x, 0, x - x);
1137     if (ix >= 0x7f800000)
1138         return x - x;
1139
1140     /* general argument reduction needed */
1141     n = __rem_pio2f(x, &y);
1142     switch (n & 3) {
1143     case 0: return __cosdf(y);
1144     case 1: return __sindf(-y);
1145     case 2: return -__cosdf(y);
1146     default: return __sindf(y);
1147     }
1148 }
1149
1150 /* Copied from musl: src/math/__expo2f.c */
1151 static float __expo2f(float x, float sign)
1152 {
1153     static const int k = 235;
1154     static const float kln2 = 0x1.45c778p+7f;
1155     float scale;
1156
1157     *(UINT32*)&scale = (UINT32)(0x7f + k/2) << 23;
1158     return expf(x - kln2) * (sign * scale) * scale;
1159 }
1160
1161 /*********************************************************************
1162  *      coshf (MSVCRT.@)
1163  *
1164  * Copied from musl: src/math/coshf.c
1165  */
1166 float CDECL coshf( float x )
1167 {
1168     UINT32 ui = *(UINT32*)&x;
1169     UINT32 sign = ui & 0x80000000;
1170     float t;
1171
1172     /* |x| */
1173     ui &= 0x7fffffff;
1174     x = *(float*)&ui;
1175
1176     /* |x| < log(2) */
1177     if (ui < 0x3f317217) {
1178         if (ui < 0x3f800000 - (12 << 23)) {
1179             fp_barrierf(x + 0x1p120f);
1180             return 1;
1181         }
1182         t = __expm1f(x);
1183         return 1 + t * t / (2 * (1 + t));
1184     }
1185
1186     /* |x| < log(FLT_MAX) */
1187     if (ui < 0x42b17217) {
1188         t = expf(x);
1189         return 0.5f * (t + 1 / t);
1190     }
1191
1192     /* |x| > log(FLT_MAX) or nan */
1193     if (ui > 0x7f800000)
1194         *(UINT32*)&t = ui | sign | 0x400000;
1195     else
1196         t = __expo2f(x, 1.0f);
1197     return t;
1198 }
1199
1200 /*********************************************************************
1201  *      expf (MSVCRT.@)
1202  */
1203 float CDECL expf( float x )
1204 {
1205     static const double C[] = {
1206         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1207         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1208         0x1.62e42ff0c52d6p-1 / (1 << 5)
1209     };
1210     static const double invln2n = 0x1.71547652b82fep+0 * (1 << 5);
1211
1212     double kd, z, r, r2, y, s;
1213     UINT32 abstop;
1214     UINT64 ki, t;
1215
1216     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
1217     if (abstop >= 0x42b) {
1218         /* |x| >= 88 or x is nan.  */
1219         if (*(UINT32*)&x == 0xff800000)
1220             return 0.0f;
1221         if (abstop >= 0x7f8)
1222             return x + x;
1223         if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
1224             return math_error(_OVERFLOW, "expf", x, 0, x * FLT_MAX);
1225         if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
1226             return math_error(_UNDERFLOW, "expf", x, 0, fp_barrierf(FLT_MIN) * FLT_MIN);
1227     }
1228
1229     /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
1230     z = invln2n * x;
1231
1232     /* Round and convert z to int, the result is in [-150*N, 128*N] and
1233        ideally ties-to-even rule is used, otherwise the magnitude of r
1234        can be bigger which gives larger approximation error.  */
1235     kd = __round(z);
1236     ki = (INT64)kd;
1237     r = z - kd;
1238
1239     /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1240     t = exp2f_T[ki % (1 << 5)];
1241     t += ki << (52 - 5);
1242     s = *(double*)&t;
1243     z = C[0] * r + C[1];
1244     r2 = r * r;
1245     y = C[2] * r + 1;
1246     y = z * r2 + y;
1247     y = y * s;
1248     return y;
1249 }
1250
1251 /*********************************************************************
1252  *      fmodf (MSVCRT.@)
1253  *
1254  * Copied from musl: src/math/fmodf.c
1255  */
1256 float CDECL fmodf( float x, float y )
1257 {
1258     UINT32 xi = *(UINT32*)&x;
1259     UINT32 yi = *(UINT32*)&y;
1260     int ex = xi>>23 & 0xff;
1261     int ey = yi>>23 & 0xff;
1262     UINT32 sx = xi & 0x80000000;
1263     UINT32 i;
1264
1265     if (isinf(x)) return math_error(_DOMAIN, "fmodf", x, y, (x * y) / (x * y));
1266     if (yi << 1 == 0 || isnan(y) || ex == 0xff)
1267         return (x * y) / (x * y);
1268     if (xi << 1 <= yi << 1) {
1269         if (xi << 1 == yi << 1)
1270             return 0 * x;
1271         return x;
1272     }
1273
1274     /* normalize x and y */
1275     if (!ex) {
1276         for (i = xi << 9; i >> 31 == 0; ex--, i <<= 1);
1277         xi <<= -ex + 1;
1278     } else {
1279         xi &= -1U >> 9;
1280         xi |= 1U << 23;
1281     }
1282     if (!ey) {
1283         for (i = yi << 9; i >> 31 == 0; ey--, i <<= 1);
1284         yi <<= -ey + 1;
1285     } else {
1286         yi &= -1U >> 9;
1287         yi |= 1U << 23;
1288     }
1289
1290     /* x mod y */
1291     for (; ex > ey; ex--) {
1292         i = xi - yi;
1293         if (i >> 31 == 0) {
1294             if (i == 0)
1295                 return 0 * x;
1296             xi = i;
1297         }
1298         xi <<= 1;
1299     }
1300     i = xi - yi;
1301     if (i >> 31 == 0) {
1302         if (i == 0)
1303             return 0 * x;
1304         xi = i;
1305     }
1306     for (; xi>>23 == 0; xi <<= 1, ex--);
1307
1308     /* scale result up */
1309     if (ex > 0) {
1310         xi -= 1U << 23;
1311         xi |= (UINT32)ex << 23;
1312     } else {
1313         xi >>= -ex + 1;
1314     }
1315     xi |= sx;
1316     return *(float*)&xi;
1317 }
1318
1319 /*********************************************************************
1320  *      logf (MSVCRT.@)
1321  *
1322  * Copied from musl: src/math/logf.c src/math/logf_data.c
1323  */
1324 float CDECL logf( float x )
1325 {
1326     static const double Ln2 = 0x1.62e42fefa39efp-1;
1327     static const double A[] = {
1328         -0x1.00ea348b88334p-2,
1329         0x1.5575b0be00b6ap-2,
1330         -0x1.ffffef20a4123p-2
1331     };
1332     static const struct {
1333         double invc, logc;
1334     } T[] = {
1335         { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
1336         { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
1337         { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
1338         { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
1339         { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
1340         { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
1341         { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
1342         { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
1343         { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
1344         { 0x1p+0, 0x0p+0 },
1345         { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
1346         { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
1347         { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
1348         { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
1349         { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
1350         { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }
1351     };
1352
1353     double z, r, r2, y, y0, invc, logc;
1354     UINT32 ix, iz, tmp;
1355     int k, i;
1356
1357     ix = *(UINT32*)&x;
1358     /* Fix sign of zero with downward rounding when x==1. */
1359     if (ix == 0x3f800000)
1360         return 0;
1361     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
1362         /* x < 0x1p-126 or inf or nan. */
1363         if (ix * 2 == 0)
1364             return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x);
1365         if (ix == 0x7f800000) /* log(inf) == inf. */
1366             return x;
1367         if (ix * 2 > 0xff000000)
1368             return x;
1369         if (ix & 0x80000000)
1370             return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x));
1371         /* x is subnormal, normalize it. */
1372         x *= 0x1p23f;
1373         ix = *(UINT32*)&x;
1374         ix -= 23 << 23;
1375     }
1376
1377     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1378        The range is split into N subintervals.
1379        The ith subinterval contains z and c is near its center. */
1380     tmp = ix - 0x3f330000;
1381     i = (tmp >> (23 - 4)) % (1 << 4);
1382     k = (INT32)tmp >> 23; /* arithmetic shift */
1383     iz = ix - (tmp & (0x1ffu << 23));
1384     invc = T[i].invc;
1385     logc = T[i].logc;
1386     z = *(float*)&iz;
1387
1388     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
1389     r = z * invc - 1;
1390     y0 = logc + (double)k * Ln2;
1391
1392     /* Pipelined polynomial evaluation to approximate log1p(r). */
1393     r2 = r * r;
1394     y = A[1] * r + A[2];
1395     y = A[0] * r2 + y;
1396     y = y * r2 + (y0 + r);
1397     return y;
1398 }
1399
1400 /*********************************************************************
1401  *      log10f (MSVCRT.@)
1402  */
1403 float CDECL log10f( float x )
1404 {
1405     static const float ivln10hi = 4.3432617188e-01,
1406         ivln10lo = -3.1689971365e-05,
1407         log10_2hi = 3.0102920532e-01,
1408         log10_2lo = 7.9034151668e-07,
1409         Lg1 = 0xaaaaaa.0p-24,
1410         Lg2 = 0xccce13.0p-25,
1411         Lg3 = 0x91e9ee.0p-25,
1412         Lg4 = 0xf89e26.0p-26;
1413
1414     union {float f; UINT32 i;} u = {x};
1415     float hfsq, f, s, z, R, w, t1, t2, dk, hi, lo;
1416     UINT32 ix;
1417     int k;
1418
1419     ix = u.i;
1420     k = 0;
1421     if (ix < 0x00800000 || ix >> 31) { /* x < 2**-126 */
1422         if (ix << 1 == 0)
1423             return math_error(_SING, "log10f", x, 0, -1 / (x * x));
1424         if ((ix & ~(1u << 31)) > 0x7f800000)
1425             return x;
1426         if (ix >> 31)
1427             return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x));
1428         /* subnormal number, scale up x */
1429         k -= 25;
1430         x *= 0x1p25f;
1431         u.f = x;
1432         ix = u.i;
1433     } else if (ix >= 0x7f800000) {
1434         return x;
1435     } else if (ix == 0x3f800000)
1436         return 0;
1437
1438     /* reduce x into [sqrt(2)/2, sqrt(2)] */
1439     ix += 0x3f800000 - 0x3f3504f3;
1440     k += (int)(ix >> 23) - 0x7f;
1441     ix = (ix & 0x007fffff) + 0x3f3504f3;
1442     u.i = ix;
1443     x = u.f;
1444
1445     f = x - 1.0f;
1446     s = f / (2.0f + f);
1447     z = s * s;
1448     w = z * z;
1449     t1= w * (Lg2 + w * Lg4);
1450     t2= z * (Lg1 + w * Lg3);
1451     R = t2 + t1;
1452     hfsq = 0.5f * f * f;
1453
1454     hi = f - hfsq;
1455     u.f = hi;
1456     u.i &= 0xfffff000;
1457     hi = u.f;
1458     lo = f - hi - hfsq + s * (hfsq + R);
1459     dk = k;
1460     return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
1461 }
1462
1463 /* Subnormal input is normalized so ix has negative biased exponent.
1464    Output is multiplied by POWF_SCALE (where 1 << 5). */
1465 static double powf_log2(UINT32 ix)
1466 {
1467     static const struct {
1468         double invc, logc;
1469     } T[] = {
1470         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * (1 << 5) },
1471         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * (1 << 5) },
1472         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * (1 << 5) },
1473         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * (1 << 5) },
1474         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * (1 << 5) },
1475         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * (1 << 5) },
1476         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * (1 << 5) },
1477         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * (1 << 5) },
1478         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * (1 << 5) },
1479         { 0x1p+0, 0x0p+0 * (1 << 4) },
1480         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * (1 << 5) },
1481         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * (1 << 5) },
1482         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * (1 << 5) },
1483         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * (1 << 5) },
1484         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * (1 << 5) },
1485         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * (1 << 5) }
1486     };
1487     static const double A[] = {
1488         0x1.27616c9496e0bp-2 * (1 << 5), -0x1.71969a075c67ap-2 * (1 << 5),
1489         0x1.ec70a6ca7baddp-2 * (1 << 5), -0x1.7154748bef6c8p-1 * (1 << 5),
1490         0x1.71547652ab82bp0 * (1 << 5)
1491     };
1492
1493     double z, r, r2, r4, p, q, y, y0, invc, logc;
1494     UINT32 iz, top, tmp;
1495     int k, i;
1496
1497     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1498        The range is split into N subintervals.
1499        The ith subinterval contains z and c is near its center. */
1500     tmp = ix - 0x3f330000;
1501     i = (tmp >> (23 - 4)) % (1 << 4);
1502     top = tmp & 0xff800000;
1503     iz = ix - top;
1504     k = (INT32)top >> (23 - 5); /* arithmetic shift */
1505     invc = T[i].invc;
1506     logc = T[i].logc;
1507     z = *(float*)&iz;
1508
1509     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
1510     r = z * invc - 1;
1511     y0 = logc + (double)k;
1512
1513     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
1514     r2 = r * r;
1515     y = A[0] * r + A[1];
1516     p = A[2] * r + A[3];
1517     r4 = r2 * r2;
1518     q = A[4] * r + y0;
1519     q = p * r2 + q;
1520     y = y * r4 + q;
1521     return y;
1522 }
1523
1524 /* The output of log2 and thus the input of exp2 is either scaled by N
1525    (in case of fast toint intrinsics) or not. The unscaled xd must be
1526    in [-1021,1023], sign_bias sets the sign of the result. */
1527 static float powf_exp2(double xd, UINT32 sign_bias)
1528 {
1529     static const double C[] = {
1530         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1531         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1532         0x1.62e42ff0c52d6p-1 / (1 << 5)
1533     };
1534
1535     UINT64 ki, ski, t;
1536     double kd, z, r, r2, y, s;
1537
1538     /* N*x = k + r with r in [-1/2, 1/2] */
1539     kd = __round(xd); /* k */
1540     ki = (INT64)kd;
1541     r = xd - kd;
1542
1543     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1544     t = exp2f_T[ki % (1 << 5)];
1545     ski = ki + sign_bias;
1546     t += ski << (52 - 5);
1547     s = *(double*)&t;
1548     z = C[0] * r + C[1];
1549     r2 = r * r;
1550     y = C[2] * r + 1;
1551     y = z * r2 + y;
1552     y = y * s;
1553     return y;
1554 }
1555
1556 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
1557    the bit representation of a non-zero finite floating-point value. */
1558 static int powf_checkint(UINT32 iy)
1559 {
1560     int e = iy >> 23 & 0xff;
1561     if (e < 0x7f)
1562         return 0;
1563     if (e > 0x7f + 23)
1564         return 2;
1565     if (iy & ((1 << (0x7f + 23 - e)) - 1))
1566         return 0;
1567     if (iy & (1 << (0x7f + 23 - e)))
1568         return 1;
1569     return 2;
1570 }
1571
1572 /*********************************************************************
1573  *      powf (MSVCRT.@)
1574  *
1575  * Copied from musl: src/math/powf.c src/math/powf_data.c
1576  */
1577 float CDECL powf( float x, float y )
1578 {
1579     UINT32 sign_bias = 0;
1580     UINT32 ix, iy;
1581     double logx, ylogx;
1582
1583     ix = *(UINT32*)&x;
1584     iy = *(UINT32*)&y;
1585     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
1586             2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1587         /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
1588         if (2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1589             if (2 * iy == 0)
1590                 return 1.0f;
1591             if (ix == 0x3f800000)
1592                 return 1.0f;
1593             if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
1594                 return x + y;
1595             if (2 * ix == 2 * 0x3f800000)
1596                 return 1.0f;
1597             if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
1598                 return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
1599             return y * y;
1600         }
1601         if (2 * ix - 1 >= 2u * 0x7f800000 - 1) {
1602             float x2 = x * x;
1603             if (ix & 0x80000000 && powf_checkint(iy) == 1)
1604                 x2 = -x2;
1605             if (iy & 0x80000000 && x2 == 0.0)
1606                 return math_error(_SING, "powf", x, y, 1 / x2);
1607             /* Without the barrier some versions of clang hoist the 1/x2 and
1608                thus division by zero exception can be signaled spuriously. */
1609             return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
1610         }
1611         /* x and y are non-zero finite. */
1612         if (ix & 0x80000000) {
1613             /* Finite x < 0. */
1614             int yint = powf_checkint(iy);
1615             if (yint == 0)
1616                 return math_error(_DOMAIN, "powf", x, y, 0 / (x - x));
1617             if (yint == 1)
1618                 sign_bias = 1 << (5 + 11);
1619             ix &= 0x7fffffff;
1620         }
1621         if (ix < 0x00800000) {
1622             /* Normalize subnormal x so exponent becomes negative. */
1623             x *= 0x1p23f;
1624             ix = *(UINT32*)&x;
1625             ix &= 0x7fffffff;
1626             ix -= 23 << 23;
1627         }
1628     }
1629     logx = powf_log2(ix);
1630     ylogx = y * logx; /* cannot overflow, y is single prec. */
1631     if ((*(UINT64*)&ylogx >> 47 & 0xffff) >= 0x40af800000000000llu >> 47) {
1632         /* |y*log(x)| >= 126. */
1633         if (ylogx > 0x1.fffffffd1d571p+6 * (1 << 5))
1634             return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023);
1635         if (ylogx <= -150.0 * (1 << 5))
1636             return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023);
1637     }
1638     return powf_exp2(ylogx, sign_bias);
1639 }
1640
1641 /*********************************************************************
1642  *      sinf (MSVCRT.@)
1643  *
1644  * Copied from musl: src/math/sinf.c
1645  */
1646 float CDECL sinf( float x )
1647 {
1648     static const double s1pio2 = 1*M_PI_2,
1649         s2pio2 = 2*M_PI_2,
1650         s3pio2 = 3*M_PI_2,
1651         s4pio2 = 4*M_PI_2;
1652
1653     double y;
1654     UINT32 ix;
1655     int n, sign;
1656
1657     ix = *(UINT32*)&x;
1658     sign = ix >> 31;
1659     ix &= 0x7fffffff;
1660
1661     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1662         if (ix < 0x39800000) { /* |x| < 2**-12 */
1663             /* raise inexact if x!=0 and underflow if subnormal */
1664             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1665             return x;
1666         }
1667         return __sindf(x);
1668     }
1669     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1670         if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
1671             if (sign)
1672                 return -__cosdf(x + s1pio2);
1673             else
1674                 return __cosdf(x - s1pio2);
1675         }
1676         return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
1677     }
1678     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1679         if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
1680             if (sign)
1681                 return __cosdf(x + s3pio2);
1682             else
1683                 return -__cosdf(x - s3pio2);
1684         }
1685         return __sindf(sign ? x + s4pio2 : x - s4pio2);
1686     }
1687
1688     /* sin(Inf or NaN) is NaN */
1689     if (isinf(x))
1690         return math_error(_DOMAIN, "sinf", x, 0, x - x);
1691     if (ix >= 0x7f800000)
1692         return x - x;
1693
1694     /* general argument reduction needed */
1695     n = __rem_pio2f(x, &y);
1696     switch (n&3) {
1697     case 0: return __sindf(y);
1698     case 1: return __cosdf(y);
1699     case 2: return __sindf(-y);
1700     default: return -__cosdf(y);
1701     }
1702 }
1703
1704 /*********************************************************************
1705  *      sinhf (MSVCRT.@)
1706  */
1707 float CDECL sinhf( float x )
1708 {
1709     UINT32 ui = *(UINT32*)&x;
1710     float t, h, absx;
1711
1712     h = 0.5;
1713     if (ui >> 31)
1714         h = -h;
1715     /* |x| */
1716     ui &= 0x7fffffff;
1717     absx = *(float*)&ui;
1718
1719     /* |x| < log(FLT_MAX) */
1720     if (ui < 0x42b17217) {
1721         t = __expm1f(absx);
1722         if (ui < 0x3f800000) {
1723             if (ui < 0x3f800000 - (12 << 23))
1724                 return x;
1725             return h * (2 * t - t * t / (t + 1));
1726         }
1727         return h * (t + t / (t + 1));
1728     }
1729
1730     /* |x| > logf(FLT_MAX) or nan */
1731     if (ui > 0x7f800000)
1732         *(DWORD*)&t = *(DWORD*)&x | 0x400000;
1733     else
1734         t = __expo2f(absx, 2 * h);
1735     return t;
1736 }
1737
1738 static BOOL sqrtf_validate( float *x )
1739 {
1740     short c = _fdclass(*x);
1741
1742     if (c == FP_ZERO) return FALSE;
1743     if (c == FP_NAN) return FALSE;
1744     if (signbit(*x))
1745     {
1746         *x = math_error(_DOMAIN, "sqrtf", *x, 0, ret_nan(TRUE));
1747         return FALSE;
1748     }
1749     if (c == FP_INFINITE) return FALSE;
1750     return TRUE;
1751 }
1752
1753 #if defined(__x86_64__) || defined(__i386__)
1754 float CDECL sse2_sqrtf(float);
1755 __ASM_GLOBAL_FUNC( sse2_sqrtf,
1756         "sqrtss %xmm0, %xmm0\n\t"
1757         "ret" )
1758 #endif
1759
1760 /*********************************************************************
1761  *      sqrtf (MSVCRT.@)
1762  *
1763  * Copied from musl: src/math/sqrtf.c
1764  */
1765 float CDECL sqrtf( float x )
1766 {
1767 #ifdef __x86_64__
1768     if (!sqrtf_validate(&x))
1769         return x;
1770
1771     return sse2_sqrtf(x);
1772 #else
1773     static const float tiny = 1.0e-30;
1774
1775     float z;
1776     int ix,s,q,m,t,i;
1777     unsigned int r;
1778
1779     ix = *(int*)&x;
1780
1781     if (!sqrtf_validate(&x))
1782         return x;
1783
1784     /* normalize x */
1785     m = ix >> 23;
1786     if (m == 0) {  /* subnormal x */
1787         for (i = 0; (ix & 0x00800000) == 0; i++)
1788             ix <<= 1;
1789         m -= i - 1;
1790     }
1791     m -= 127;  /* unbias exponent */
1792     ix = (ix & 0x007fffff) | 0x00800000;
1793     if (m & 1)  /* odd m, double x to make it even */
1794         ix += ix;
1795     m >>= 1;  /* m = [m/2] */
1796
1797     /* generate sqrt(x) bit by bit */
1798     ix += ix;
1799     q = s = 0;       /* q = sqrt(x) */
1800     r = 0x01000000;  /* r = moving bit from right to left */
1801
1802     while (r != 0) {
1803         t = s + r;
1804         if (t <= ix) {
1805             s = t + r;
1806             ix -= t;
1807             q += r;
1808         }
1809         ix += ix;
1810         r >>= 1;
1811     }
1812
1813     /* use floating add to find out rounding direction */
1814     if (ix != 0) {
1815         z = 1.0f - tiny; /* raise inexact flag */
1816         if (z >= 1.0f) {
1817             z = 1.0f + tiny;
1818             if (z > 1.0f)
1819                 q += 2;
1820             else
1821                 q += q & 1;
1822         }
1823     }
1824     ix = (q >> 1) + 0x3f000000;
1825     r = ix + ((unsigned int)m << 23);
1826     z = *(float*)&r;
1827     return z;
1828 #endif
1829 }
1830
1831 /* Copied from musl: src/math/__tandf.c */
1832 static float __tandf(double x, int odd)
1833 {
1834     static const double T[] = {
1835         0x15554d3418c99f.0p-54,
1836         0x1112fd38999f72.0p-55,
1837         0x1b54c91d865afe.0p-57,
1838         0x191df3908c33ce.0p-58,
1839         0x185dadfcecf44e.0p-61,
1840         0x1362b9bf971bcd.0p-59,
1841     };
1842
1843     double z, r, w, s, t, u;
1844
1845     z = x * x;
1846     r = T[4] + z * T[5];
1847     t = T[2] + z * T[3];
1848     w = z * z;
1849     s = z * x;
1850     u = T[0] + z * T[1];
1851     r = (x + s * u) + (s * w) * (t + w * r);
1852     return odd ? -1.0 / r : r;
1853 }
1854
1855 /*********************************************************************
1856  *      tanf (MSVCRT.@)
1857  *
1858  * Copied from musl: src/math/tanf.c
1859  */
1860 float CDECL tanf( float x )
1861 {
1862     static const double t1pio2 = 1*M_PI_2,
1863         t2pio2 = 2*M_PI_2,
1864         t3pio2 = 3*M_PI_2,
1865         t4pio2 = 4*M_PI_2;
1866
1867     double y;
1868     UINT32 ix;
1869     unsigned n, sign;
1870
1871     ix = *(UINT32*)&x;
1872     sign = ix >> 31;
1873     ix &= 0x7fffffff;
1874
1875     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1876         if (ix < 0x39800000) { /* |x| < 2**-12 */
1877             /* raise inexact if x!=0 and underflow if subnormal */
1878             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1879             return x;
1880         }
1881         return __tandf(x, 0);
1882     }
1883     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1884         if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
1885             return __tandf((sign ? x + t1pio2 : x - t1pio2), 1);
1886         else
1887             return __tandf((sign ? x + t2pio2 : x - t2pio2), 0);
1888     }
1889     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1890         if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
1891             return __tandf((sign ? x + t3pio2 : x - t3pio2), 1);
1892         else
1893             return __tandf((sign ? x + t4pio2 : x - t4pio2), 0);
1894     }
1895
1896     /* tan(Inf or NaN) is NaN */
1897     if (isinf(x))
1898         return math_error(_DOMAIN, "tanf", x, 0, x - x);
1899     if (ix >= 0x7f800000)
1900         return x - x;
1901
1902     /* argument reduction */
1903     n = __rem_pio2f(x, &y);
1904     return __tandf(y, n & 1);
1905 }
1906
1907 /*********************************************************************
1908  *      tanhf (MSVCRT.@)
1909  */
1910 float CDECL tanhf( float x )
1911 {
1912     UINT32 ui = *(UINT32*)&x;
1913     UINT32 sign = ui & 0x80000000;
1914     float t;
1915
1916     /* x = |x| */
1917     ui &= 0x7fffffff;
1918     x = *(float*)&ui;
1919
1920     if (ui > 0x3f0c9f54) {
1921         /* |x| > log(3)/2 ~= 0.5493 or nan */
1922         if (ui > 0x41200000) {
1923             if (ui > 0x7f800000) {
1924                 *(UINT32*)&x = ui | sign | 0x400000;
1925 #if _MSVCR_VER < 140
1926                 return math_error(_DOMAIN, "tanhf", x, 0, x);
1927 #else
1928                 return x;
1929 #endif
1930             }
1931             /* |x| > 10 */
1932             fp_barrierf(x + 0x1p120f);
1933             t = 1 + 0 / x;
1934         } else {
1935             t = __expm1f(2 * x);
1936             t = 1 - 2 / (t + 2);
1937         }
1938     } else if (ui > 0x3e82c578) {
1939         /* |x| > log(5/3)/2 ~= 0.2554 */
1940         t = __expm1f(2 * x);
1941         t = t / (t + 2);
1942     } else if (ui >= 0x00800000) {
1943         /* |x| >= 0x1p-126 */
1944         t = __expm1f(-2 * x);
1945         t = -t / (t + 2);
1946     } else {
1947         /* |x| is subnormal */
1948         fp_barrierf(x * x);
1949         t = x;
1950     }
1951     return sign ? -t : t;
1952 }
1953
1954 /*********************************************************************
1955  *      ceilf (MSVCRT.@)
1956  *
1957  * Copied from musl: src/math/ceilf.c
1958  */
1959 float CDECL ceilf( float x )
1960 {
1961     union {float f; UINT32 i;} u = {x};
1962     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1963     UINT32 m;
1964
1965     if (e >= 23)
1966         return x;
1967     if (e >= 0) {
1968         m = 0x007fffff >> e;
1969         if ((u.i & m) == 0)
1970             return x;
1971         if (u.i >> 31 == 0)
1972             u.i += m;
1973         u.i &= ~m;
1974     } else {
1975         if (u.i >> 31)
1976             return -0.0;
1977         else if (u.i << 1)
1978             return 1.0;
1979     }
1980     return u.f;
1981 }
1982
1983 /*********************************************************************
1984  *      floorf (MSVCRT.@)
1985  *
1986  * Copied from musl: src/math/floorf.c
1987  */
1988 float CDECL floorf( float x )
1989 {
1990     union {float f; UINT32 i;} u = {x};
1991     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1992     UINT32 m;
1993
1994     if (e >= 23)
1995         return x;
1996     if (e >= 0) {
1997         m = 0x007fffff >> e;
1998         if ((u.i & m) == 0)
1999             return x;
2000         if (u.i >> 31)
2001             u.i += m;
2002         u.i &= ~m;
2003     } else {
2004         if (u.i >> 31 == 0)
2005             return 0;
2006         else if (u.i << 1)
2007             return -1;
2008     }
2009     return u.f;
2010 }
2011
2012 #endif
2013
2014 /*********************************************************************
2015  *              acos (MSVCRT.@)
2016  *
2017  * Copied from musl: src/math/acos.c
2018  */
2019 static double acos_R(double z)
2020 {
2021     static const double pS0 =  1.66666666666666657415e-01,
2022                  pS1 = -3.25565818622400915405e-01,
2023                  pS2 =  2.01212532134862925881e-01,
2024                  pS3 = -4.00555345006794114027e-02,
2025                  pS4 =  7.91534994289814532176e-04,
2026                  pS5 =  3.47933107596021167570e-05,
2027                  qS1 = -2.40339491173441421878e+00,
2028                  qS2 =  2.02094576023350569471e+00,
2029                  qS3 = -6.88283971605453293030e-01,
2030                  qS4 =  7.70381505559019352791e-02;
2031
2032     double p, q;
2033     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2034     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2035     return p/q;
2036 }
2037
2038 double CDECL acos( double x )
2039 {
2040     static const double pio2_hi = 1.57079632679489655800e+00,
2041                  pio2_lo = 6.12323399573676603587e-17;
2042
2043     double z, w, s, c, df;
2044     unsigned int hx, ix;
2045     ULONGLONG llx;
2046
2047     hx = *(ULONGLONG*)&x >> 32;
2048     ix = hx & 0x7fffffff;
2049     /* |x| >= 1 or nan */
2050     if (ix >= 0x3ff00000) {
2051         unsigned int lx;
2052
2053         lx = *(ULONGLONG*)&x;
2054         if (((ix - 0x3ff00000) | lx) == 0) {
2055             /* acos(1)=0, acos(-1)=pi */
2056             if (hx >> 31)
2057                 return 2 * pio2_hi + 7.5231638452626401e-37;
2058             return 0;
2059         }
2060         if (isnan(x)) return x;
2061         return math_error(_DOMAIN, "acos", x, 0, 0 / (x - x));
2062     }
2063     /* |x| < 0.5 */
2064     if (ix < 0x3fe00000) {
2065         if (ix <= 0x3c600000)  /* |x| < 2**-57 */
2066             return pio2_hi + 7.5231638452626401e-37;
2067         return pio2_hi - (x - (pio2_lo - x * acos_R(x * x)));
2068     }
2069     /* x < -0.5 */
2070     if (hx >> 31) {
2071         z = (1.0 + x) * 0.5;
2072         s = sqrt(z);
2073         w = acos_R(z) * s - pio2_lo;
2074         return 2 * (pio2_hi - (s + w));
2075     }
2076     /* x > 0.5 */
2077     z = (1.0 - x) * 0.5;
2078     s = sqrt(z);
2079     df = s;
2080     llx = (*(ULONGLONG*)&df >> 32) << 32;
2081     df = *(double*)&llx;
2082     c = (z - df * df) / (s + df);
2083     w = acos_R(z) * s + c;
2084     return 2 * (df + w);
2085 }
2086
2087 /*********************************************************************
2088  *              asin (MSVCRT.@)
2089  *
2090  * Copied from musl: src/math/asin.c
2091  */
2092 static double asin_R(double z)
2093 {
2094     /* coefficients for R(x^2) */
2095     static const double pS0 =  1.66666666666666657415e-01,
2096                  pS1 = -3.25565818622400915405e-01,
2097                  pS2 =  2.01212532134862925881e-01,
2098                  pS3 = -4.00555345006794114027e-02,
2099                  pS4 =  7.91534994289814532176e-04,
2100                  pS5 =  3.47933107596021167570e-05,
2101                  qS1 = -2.40339491173441421878e+00,
2102                  qS2 =  2.02094576023350569471e+00,
2103                  qS3 = -6.88283971605453293030e-01,
2104                  qS4 =  7.70381505559019352791e-02;
2105
2106     double p, q;
2107     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2108     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2109     return p / q;
2110 }
2111
2112 #ifdef __i386__
2113 double CDECL x87_asin(double);
2114 __ASM_GLOBAL_FUNC( x87_asin,
2115         "fldl 4(%esp)\n\t"
2116         SET_X87_CW(~0x37f)
2117         "fld %st\n\t"
2118         "fld1\n\t"
2119         "fsubp\n\t"
2120         "fld1\n\t"
2121         "fadd %st(2)\n\t"
2122         "fmulp\n\t"
2123         "fsqrt\n\t"
2124         "fpatan\n\t"
2125         RESET_X87_CW
2126         "ret" )
2127 #endif
2128
2129 double CDECL asin( double x )
2130 {
2131     static const double pio2_hi = 1.57079632679489655800e+00,
2132                  pio2_lo = 6.12323399573676603587e-17;
2133
2134     double z, r, s;
2135     unsigned int hx, ix;
2136     ULONGLONG llx;
2137 #ifdef __i386__
2138     unsigned int x87_cw, sse2_cw;
2139 #endif
2140
2141     hx = *(ULONGLONG*)&x >> 32;
2142     ix = hx & 0x7fffffff;
2143     /* |x| >= 1 or nan */
2144     if (ix >= 0x3ff00000) {
2145         unsigned int lx;
2146         lx = *(ULONGLONG*)&x;
2147         if (((ix - 0x3ff00000) | lx) == 0)
2148             /* asin(1) = +-pi/2 with inexact */
2149             return x * pio2_hi + 7.5231638452626401e-37;
2150         if (isnan(x))
2151         {
2152 #ifdef __i386__
2153             return math_error(_DOMAIN, "asin", x, 0, x);
2154 #else
2155             return x;
2156 #endif
2157         }
2158         return math_error(_DOMAIN, "asin", x, 0, 0 / (x - x));
2159     }
2160
2161 #ifdef __i386__
2162     __control87_2(0, 0, &x87_cw, &sse2_cw);
2163     if (!sse2_enabled || (x87_cw & _MCW_EM) != _MCW_EM
2164             || (sse2_cw & (_MCW_EM | _MCW_RC)) != _MCW_EM)
2165         return x87_asin(x);
2166 #endif
2167
2168     /* |x| < 0.5 */
2169     if (ix < 0x3fe00000) {
2170         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
2171         if (ix < 0x3e500000 && ix >= 0x00100000)
2172             return x;
2173         return x + x * asin_R(x * x);
2174     }
2175     /* 1 > |x| >= 0.5 */
2176     z = (1 - fabs(x)) * 0.5;
2177     s = sqrt(z);
2178     r = asin_R(z);
2179     if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
2180         x = pio2_hi - (2 * (s + s * r) - pio2_lo);
2181     } else {
2182         double f, c;
2183         /* f+c = sqrt(z) */
2184         f = s;
2185         llx = (*(ULONGLONG*)&f >> 32) << 32;
2186         f = *(double*)&llx;
2187         c = (z - f * f) / (s + f);
2188         x = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * f));
2189     }
2190     if (hx >> 31)
2191         return -x;
2192     return x;
2193 }
2194
2195 /*********************************************************************
2196  *              atan (MSVCRT.@)
2197  *
2198  * Copied from musl: src/math/atan.c
2199  */
2200 double CDECL atan( double x )
2201 {
2202     static const double atanhi[] = {
2203         4.63647609000806093515e-01,
2204         7.85398163397448278999e-01,
2205         9.82793723247329054082e-01,
2206         1.57079632679489655800e+00,
2207     };
2208     static const double atanlo[] = {
2209         2.26987774529616870924e-17,
2210         3.06161699786838301793e-17,
2211         1.39033110312309984516e-17,
2212         6.12323399573676603587e-17,
2213     };
2214     static const double aT[] = {
2215         3.33333333333329318027e-01,
2216         -1.99999999998764832476e-01,
2217         1.42857142725034663711e-01,
2218         -1.11111104054623557880e-01,
2219         9.09088713343650656196e-02,
2220         -7.69187620504482999495e-02,
2221         6.66107313738753120669e-02,
2222         -5.83357013379057348645e-02,
2223         4.97687799461593236017e-02,
2224         -3.65315727442169155270e-02,
2225         1.62858201153657823623e-02,
2226     };
2227
2228     double w, s1, s2, z;
2229     unsigned int ix, sign;
2230     int id;
2231
2232 #if _MSVCR_VER == 0
2233     if (isnan(x)) return math_error(_DOMAIN, "atan", x, 0, x);
2234 #endif
2235
2236     ix = *(ULONGLONG*)&x >> 32;
2237     sign = ix >> 31;
2238     ix &= 0x7fffffff;
2239     if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
2240         if (isnan(x))
2241             return x;
2242         z = atanhi[3] + 7.5231638452626401e-37;
2243         return sign ? -z : z;
2244     }
2245     if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
2246         if (ix < 0x3e400000) {  /* |x| < 2^-27 */
2247             if (ix < 0x00100000)
2248                 /* raise underflow for subnormal x */
2249                 fp_barrierf((float)x);
2250             return x;
2251         }
2252         id = -1;
2253     } else {
2254         x = fabs(x);
2255         if (ix < 0x3ff30000) {  /* |x| < 1.1875 */
2256             if (ix < 0x3fe60000) {  /*  7/16 <= |x| < 11/16 */
2257                 id = 0;
2258                 x = (2.0 * x - 1.0) / (2.0 + x);
2259             } else {                /* 11/16 <= |x| < 19/16 */
2260                 id = 1;
2261                 x = (x - 1.0) / (x + 1.0);
2262             }
2263         } else {
2264             if (ix < 0x40038000) {  /* |x| < 2.4375 */
2265                 id = 2;
2266                 x = (x - 1.5) / (1.0 + 1.5 * x);
2267             } else {                /* 2.4375 <= |x| < 2^66 */
2268                 id = 3;
2269                 x = -1.0 / x;
2270             }
2271         }
2272     }
2273     /* end of argument reduction */
2274     z = x * x;
2275     w = z * z;
2276     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
2277     s1 = z * (aT[0] + w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
2278     s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
2279     if (id < 0)
2280         return x - x * (s1 + s2);
2281     z = atanhi[id] - (x * (s1 + s2) - atanlo[id] - x);
2282     return sign ? -z : z;
2283 }
2284
2285 /*********************************************************************
2286  *              atan2 (MSVCRT.@)
2287  *
2288  * Copied from musl: src/math/atan2.c
2289  */
2290 double CDECL atan2( double y, double x )
2291 {
2292     static const double pi     = 3.1415926535897931160E+00,
2293                  pi_lo  = 1.2246467991473531772E-16;
2294
2295     double z;
2296     unsigned int m, lx, ly, ix, iy;
2297
2298     if (isnan(x) || isnan(y))
2299         return x+y;
2300     ix = *(ULONGLONG*)&x >> 32;
2301     lx = *(ULONGLONG*)&x;
2302     iy = *(ULONGLONG*)&y >> 32;
2303     ly = *(ULONGLONG*)&y;
2304     if (((ix - 0x3ff00000) | lx) == 0)  /* x = 1.0 */
2305         return atan(y);
2306     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
2307     ix = ix & 0x7fffffff;
2308     iy = iy & 0x7fffffff;
2309
2310     /* when y = 0 */
2311     if ((iy | ly) == 0) {
2312         switch(m) {
2313         case 0:
2314         case 1: return y;   /* atan(+-0,+anything)=+-0 */
2315         case 2: return pi;  /* atan(+0,-anything) = pi */
2316         case 3: return -pi; /* atan(-0,-anything) =-pi */
2317         }
2318     }
2319     /* when x = 0 */
2320     if ((ix | lx) == 0)
2321         return m & 1 ? -pi / 2 : pi / 2;
2322     /* when x is INF */
2323     if (ix == 0x7ff00000) {
2324         if (iy == 0x7ff00000) {
2325             switch(m) {
2326             case 0: return pi / 4;      /* atan(+INF,+INF) */
2327             case 1: return -pi / 4;     /* atan(-INF,+INF) */
2328             case 2: return 3 * pi / 4;  /* atan(+INF,-INF) */
2329             case 3: return -3 * pi / 4; /* atan(-INF,-INF) */
2330             }
2331         } else {
2332             switch(m) {
2333             case 0: return 0.0;  /* atan(+...,+INF) */
2334             case 1: return -0.0; /* atan(-...,+INF) */
2335             case 2: return pi;   /* atan(+...,-INF) */
2336             case 3: return -pi;  /* atan(-...,-INF) */
2337             }
2338         }
2339     }
2340     /* |y/x| > 0x1p64 */
2341     if (ix + (64 << 20) < iy || iy == 0x7ff00000)
2342         return m & 1 ? -pi / 2 : pi / 2;
2343
2344     /* z = atan(|y/x|) without spurious underflow */
2345     if ((m & 2) && iy + (64 << 20) < ix)  /* |y/x| < 0x1p-64, x<0 */
2346         z = 0;
2347     else
2348         z = atan(fabs(y / x));
2349     switch (m) {
2350     case 0: return z;                /* atan(+,+) */
2351     case 1: return -z;               /* atan(-,+) */
2352     case 2: return pi - (z - pi_lo); /* atan(+,-) */
2353     default: /* case 3 */
2354         return (z - pi_lo) - pi;     /* atan(-,-) */
2355     }
2356 }
2357
2358 /* Copied from musl: src/math/rint.c */
2359 static double __rint(double x)
2360 {
2361     static const double toint = 1 / DBL_EPSILON;
2362
2363     ULONGLONG llx = *(ULONGLONG*)&x;
2364     int e = llx >> 52 & 0x7ff;
2365     int s = llx >> 63;
2366     unsigned cw;
2367     double y;
2368
2369     if (e >= 0x3ff+52)
2370         return x;
2371     cw = _controlfp(0, 0);
2372     if ((cw & _MCW_PC) != _PC_53)
2373         _controlfp(_PC_53, _MCW_PC);
2374     if (s)
2375         y = fp_barrier(x - toint) + toint;
2376     else
2377         y = fp_barrier(x + toint) - toint;
2378     if ((cw & _MCW_PC) != _PC_53)
2379         _controlfp(cw, _MCW_PC);
2380     if (y == 0)
2381         return s ? -0.0 : 0;
2382     return y;
2383 }
2384
2385 /* Copied from musl: src/math/__rem_pio2.c */
2386 static int __rem_pio2(double x, double *y)
2387 {
2388     static const double pio4    = 0x1.921fb54442d18p-1,
2389                  invpio2 = 6.36619772367581382433e-01,
2390                  pio2_1  = 1.57079632673412561417e+00,
2391                  pio2_1t = 6.07710050650619224932e-11,
2392                  pio2_2  = 6.07710050630396597660e-11,
2393                  pio2_2t = 2.02226624879595063154e-21,
2394                  pio2_3  = 2.02226624871116645580e-21,
2395                  pio2_3t = 8.47842766036889956997e-32;
2396
2397     union {double f; UINT64 i;} u = {x};
2398     double z, w, t, r, fn, tx[3], ty[2];
2399     UINT32 ix;
2400     int sign, n, ex, ey, i;
2401
2402     sign = u.i >> 63;
2403     ix = u.i >> 32 & 0x7fffffff;
2404     if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
2405         if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
2406             goto medium; /* cancellation -- use medium case */
2407         if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
2408             if (!sign) {
2409                 z = x - pio2_1; /* one round good to 85 bits */
2410                 y[0] = z - pio2_1t;
2411                 y[1] = (z - y[0]) - pio2_1t;
2412                 return 1;
2413             } else {
2414                 z = x + pio2_1;
2415                 y[0] = z + pio2_1t;
2416                 y[1] = (z - y[0]) + pio2_1t;
2417                 return -1;
2418             }
2419         } else {
2420             if (!sign) {
2421                 z = x - 2 * pio2_1;
2422                 y[0] = z - 2 * pio2_1t;
2423                 y[1] = (z - y[0]) - 2 * pio2_1t;
2424                 return 2;
2425             } else {
2426                 z = x + 2 * pio2_1;
2427                 y[0] = z + 2 * pio2_1t;
2428                 y[1] = (z - y[0]) + 2 * pio2_1t;
2429                 return -2;
2430             }
2431         }
2432     }
2433     if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
2434         if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
2435             if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
2436                 goto medium;
2437             if (!sign) {
2438                 z = x - 3 * pio2_1;
2439                 y[0] = z - 3 * pio2_1t;
2440                 y[1] = (z - y[0]) - 3 * pio2_1t;
2441                 return 3;
2442             } else {
2443                 z = x + 3 * pio2_1;
2444                 y[0] = z + 3 * pio2_1t;
2445                 y[1] = (z - y[0]) + 3 * pio2_1t;
2446                 return -3;
2447             }
2448         } else {
2449             if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
2450                 goto medium;
2451             if (!sign) {
2452                 z = x - 4 * pio2_1;
2453                 y[0] = z - 4 * pio2_1t;
2454                 y[1] = (z - y[0]) - 4 * pio2_1t;
2455                 return 4;
2456             } else {
2457                 z = x + 4 * pio2_1;
2458                 y[0] = z + 4 * pio2_1t;
2459                 y[1] = (z - y[0]) + 4 * pio2_1t;
2460                 return -4;
2461             }
2462         }
2463     }
2464     if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
2465 medium:
2466         fn = __rint(x * invpio2);
2467         n = (INT32)fn;
2468         r = x - fn * pio2_1;
2469         w = fn * pio2_1t; /* 1st round, good to 85 bits */
2470         /* Matters with directed rounding. */
2471         if (r - w < -pio4) {
2472             n--;
2473             fn--;
2474             r = x - fn * pio2_1;
2475             w = fn * pio2_1t;
2476         } else if (r - w > pio4) {
2477             n++;
2478             fn++;
2479             r = x - fn * pio2_1;
2480             w = fn * pio2_1t;
2481         }
2482         y[0] = r - w;
2483         u.f = y[0];
2484         ey = u.i >> 52 & 0x7ff;
2485         ex = ix >> 20;
2486         if (ex - ey > 16) { /* 2nd round, good to 118 bits */
2487             t = r;
2488             w = fn * pio2_2;
2489             r = t - w;
2490             w = fn * pio2_2t - ((t - r) - w);
2491             y[0] = r - w;
2492             u.f = y[0];
2493             ey = u.i >> 52 & 0x7ff;
2494             if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
2495                 t = r;
2496                 w = fn * pio2_3;
2497                 r = t - w;
2498                 w = fn * pio2_3t - ((t - r) - w);
2499                 y[0] = r - w;
2500             }
2501         }
2502         y[1] = (r - y[0]) - w;
2503         return n;
2504     }
2505     /*
2506      * all other (large) arguments
2507      */
2508     if (ix >= 0x7ff00000) {  /* x is inf or NaN */
2509         y[0] = y[1] = x - x;
2510         return 0;
2511     }
2512     /* set z = scalbn(|x|,-ilogb(x)+23) */
2513     u.f = x;
2514     u.i &= (UINT64)-1 >> 12;
2515     u.i |= (UINT64)(0x3ff + 23) << 52;
2516     z = u.f;
2517     for (i = 0; i < 2; i++) {
2518         tx[i] = (double)(INT32)z;
2519         z = (z - tx[i]) * 0x1p24;
2520     }
2521     tx[i] = z;
2522     /* skip zero terms, first term is non-zero */
2523     while (tx[i] == 0.0)
2524         i--;
2525     n = __rem_pio2_large(tx, ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1);
2526     if (sign) {
2527         y[0] = -ty[0];
2528         y[1] = -ty[1];
2529         return -n;
2530     }
2531     y[0] = ty[0];
2532     y[1] = ty[1];
2533     return n;
2534 }
2535
2536 /* Copied from musl: src/math/__sin.c */
2537 static double __sin(double x, double y, int iy)
2538 {
2539     static const double S1  = -1.66666666666666324348e-01,
2540                  S2  =  8.33333333332248946124e-03,
2541                  S3  = -1.98412698298579493134e-04,
2542                  S4  =  2.75573137070700676789e-06,
2543                  S5  = -2.50507602534068634195e-08,
2544                  S6  =  1.58969099521155010221e-10;
2545
2546     double z, r, v, w;
2547
2548     z = x * x;
2549     w = z * z;
2550     r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
2551     v = z * x;
2552     if (iy == 0)
2553         return x + v * (S1 + z * r);
2554     else
2555         return x - ((z * (0.5 * y - v * r) - y) - v * S1);
2556 }
2557
2558 /* Copied from musl: src/math/__cos.c */
2559 static double __cos(double x, double y)
2560 {
2561     static const double C1  =  4.16666666666666019037e-02,
2562                  C2  = -1.38888888888741095749e-03,
2563                  C3  =  2.48015872894767294178e-05,
2564                  C4  = -2.75573143513906633035e-07,
2565                  C5  =  2.08757232129817482790e-09,
2566                  C6  = -1.13596475577881948265e-11;
2567     double hz, z, r, w;
2568
2569     z = x * x;
2570     w = z * z;
2571     r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6));
2572     hz = 0.5 * z;
2573     w = 1.0 - hz;
2574     return w + (((1.0 - w) - hz) + (z * r - x * y));
2575 }
2576
2577 /*********************************************************************
2578  *              cos (MSVCRT.@)
2579  *
2580  * Copied from musl: src/math/cos.c
2581  */
2582 double CDECL cos( double x )
2583 {
2584     double y[2];
2585     UINT32 ix;
2586     unsigned n;
2587
2588     ix = *(ULONGLONG*)&x >> 32;
2589     ix &= 0x7fffffff;
2590
2591     /* |x| ~< pi/4 */
2592     if (ix <= 0x3fe921fb) {
2593         if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
2594             /* raise inexact if x!=0 */
2595             fp_barrier(x + 0x1p120f);
2596             return 1.0;
2597         }
2598         return __cos(x, 0);
2599     }
2600
2601     /* cos(Inf or NaN) is NaN */
2602     if (isinf(x)) return math_error(_DOMAIN, "cos", x, 0, x - x);
2603     if (ix >= 0x7ff00000)
2604         return x - x;
2605
2606     /* argument reduction */
2607     n = __rem_pio2(x, y);
2608     switch (n & 3) {
2609     case 0: return __cos(y[0], y[1]);
2610     case 1: return -__sin(y[0], y[1], 1);
2611     case 2: return -__cos(y[0], y[1]);
2612     default: return __sin(y[0], y[1], 1);
2613     }
2614 }
2615
2616 /* Copied from musl: src/math/expm1.c */
2617 static double __expm1(double x)
2618 {
2619     static const double o_threshold = 7.09782712893383973096e+02,
2620         ln2_hi = 6.93147180369123816490e-01,
2621         ln2_lo = 1.90821492927058770002e-10,
2622         invln2 = 1.44269504088896338700e+00,
2623         Q1 = -3.33333333333331316428e-02,
2624         Q2 = 1.58730158725481460165e-03,
2625         Q3 = -7.93650757867487942473e-05,
2626         Q4 = 4.00821782732936239552e-06,
2627         Q5 = -2.01099218183624371326e-07;
2628
2629     double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2630     union {double f; UINT64 i;} u = {x};
2631     UINT32 hx = u.i >> 32 & 0x7fffffff;
2632     int k, sign = u.i >> 63;
2633
2634     /* filter out huge and non-finite argument */
2635     if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2636         if (isnan(x))
2637             return x;
2638         if (isinf(x))
2639             return sign ? -1 : x;
2640         if (sign)
2641             return math_error(_UNDERFLOW, "exp", x, 0, -1);
2642         if (x > o_threshold)
2643             return math_error(_OVERFLOW, "exp", x, 0, x * 0x1p1023);
2644     }
2645
2646     /* argument reduction */
2647     if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2648         if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2649             if (!sign) {
2650                 hi = x - ln2_hi;
2651                 lo = ln2_lo;
2652                 k = 1;
2653             } else {
2654                 hi = x + ln2_hi;
2655                 lo = -ln2_lo;
2656                 k = -1;
2657             }
2658         } else {
2659             k = invln2 * x + (sign ? -0.5 : 0.5);
2660             t = k;
2661             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2662             lo = t * ln2_lo;
2663         }
2664         x = hi - lo;
2665         c = (hi - x) - lo;
2666     } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
2667         fp_barrier(x + 0x1p120f);
2668         if (hx < 0x00100000)
2669             fp_barrier((float)x);
2670         return x;
2671     } else
2672         k = 0;
2673
2674     /* x is now in primary range */
2675     hfx = 0.5 * x;
2676     hxs = x * hfx;
2677     r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2678     t = 3.0 - r1 * hfx;
2679     e = hxs * ((r1 - t) / (6.0 - x * t));
2680     if (k == 0) /* c is 0 */
2681         return x - (x * e - hxs);
2682     e = x * (e - c) - c;
2683     e -= hxs;
2684     /* exp(x) ~ 2^k (x_reduced - e + 1) */
2685     if (k == -1)
2686         return 0.5 * (x - e) - 0.5;
2687     if (k == 1) {
2688         if (x < -0.25)
2689             return -2.0 * (e - (x + 0.5));
2690         return 1.0 + 2.0 * (x - e);
2691     }
2692     u.i = (UINT64)(0x3ff + k) << 52; /* 2^k */
2693     twopk = u.f;
2694     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
2695         y = x - e + 1.0;
2696         if (k == 1024)
2697             y = y * 2.0 * 0x1p1023;
2698         else
2699             y = y * twopk;
2700         return y - 1.0;
2701     }
2702     u.i = (UINT64)(0x3ff - k) << 52; /* 2^-k */
2703     if (k < 20)
2704         y = (x - e + (1 - u.f)) * twopk;
2705     else
2706         y = (x - (e + u.f) + 1) * twopk;
2707     return y;
2708 }
2709
2710 static double __expo2(double x, double sign)
2711 {
2712     static const int k = 2043;
2713     static const double kln2 = 0x1.62066151add8bp+10;
2714     double scale;
2715
2716     *(UINT64*)&scale = (UINT64)(0x3ff + k / 2) << 52;
2717     return exp(x - kln2) * (sign * scale) * scale;
2718 }
2719
2720 /*********************************************************************
2721  *              cosh (MSVCRT.@)
2722  *
2723  * Copied from musl: src/math/cosh.c
2724  */
2725 double CDECL cosh( double x )
2726 {
2727     UINT64 ux = *(UINT64*)&x;
2728     UINT64 sign = ux & 0x8000000000000000ULL;
2729     UINT32 w;
2730     double t;
2731
2732     /* |x| */
2733     ux &= (uint64_t)-1 / 2;
2734     x = *(double*)&ux;
2735     w = ux >> 32;
2736
2737     /* |x| < log(2) */
2738     if (w < 0x3fe62e42) {
2739         if (w < 0x3ff00000 - (26 << 20)) {
2740             fp_barrier(x + 0x1p120f);
2741             return 1;
2742         }
2743         t = __expm1(x);
2744         return 1 + t * t / (2 * (1 + t));
2745     }
2746
2747     /* |x| < log(DBL_MAX) */
2748     if (w < 0x40862e42) {
2749         t = exp(x);
2750         /* note: if x>log(0x1p26) then the 1/t is not needed */
2751         return 0.5 * (t + 1 / t);
2752     }
2753
2754     /* |x| > log(DBL_MAX) or nan */
2755     /* note: the result is stored to handle overflow */
2756     if (ux > 0x7ff0000000000000ULL)
2757         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
2758     else
2759         t = __expo2(x, 1.0);
2760     return t;
2761 }
2762
2763 /* Copied from musl: src/math/exp_data.c */
2764 static const UINT64 exp_T[] = {
2765     0x0ULL, 0x3ff0000000000000ULL,
2766     0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL,
2767     0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL,
2768     0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL,
2769     0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL,
2770     0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL,
2771     0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL,
2772     0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL,
2773     0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL,
2774     0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL,
2775     0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL,
2776     0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL,
2777     0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL,
2778     0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL,
2779     0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL,
2780     0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL,
2781     0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL,
2782     0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL,
2783     0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL,
2784     0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL,
2785     0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL,
2786     0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL,
2787     0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL,
2788     0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL,
2789     0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL,
2790     0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL,
2791     0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL,
2792     0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL,
2793     0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL,
2794     0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL,
2795     0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL,
2796     0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL,
2797     0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL,
2798     0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL,
2799     0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL,
2800     0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL,
2801     0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL,
2802     0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL,
2803     0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL,
2804     0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL,
2805     0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL,
2806     0xbc9312607a28698aULL, 0x3feeda4504ac801cULL,
2807     0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL,
2808     0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL,
2809     0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL,
2810     0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL,
2811     0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL,
2812     0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL,
2813     0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL,
2814     0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL,
2815     0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL,
2816     0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL,
2817     0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL,
2818     0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL,
2819     0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL,
2820     0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL,
2821     0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL,
2822     0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL,
2823     0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL,
2824     0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL,
2825     0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL,
2826     0xbc845378892be9aeULL, 0x3feea34634ccc320ULL,
2827     0xbc93cedd78565858ULL, 0x3feea23882552225ULL,
2828     0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL,
2829     0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL,
2830     0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL,
2831     0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL,
2832     0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL,
2833     0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL,
2834     0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL,
2835     0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL,
2836     0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL,
2837     0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL,
2838     0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL,
2839     0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL,
2840     0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL,
2841     0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL,
2842     0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL,
2843     0xbc94d450d872576eULL, 0x3feea8d99b4492edULL,
2844     0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL,
2845     0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL,
2846     0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL,
2847     0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL,
2848     0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL,
2849     0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL,
2850     0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL,
2851     0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL,
2852     0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL,
2853     0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL,
2854     0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL,
2855     0xbc87c50422622263ULL, 0x3feecc667b5de565ULL,
2856     0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL,
2857     0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL,
2858     0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL,
2859     0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL,
2860     0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL,
2861     0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL,
2862     0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL,
2863     0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL,
2864     0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL,
2865     0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL,
2866     0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL,
2867     0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL,
2868     0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL,
2869     0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL,
2870     0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL,
2871     0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL,
2872     0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL,
2873     0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL,
2874     0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL,
2875     0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL,
2876     0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL,
2877     0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL,
2878     0xbc82919e2040220fULL, 0x3fef60e316c98398ULL,
2879     0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL,
2880     0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL,
2881     0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL,
2882     0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL,
2883     0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL,
2884     0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL,
2885     0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL,
2886     0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL,
2887     0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL,
2888     0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL,
2889     0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL,
2890     0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL,
2891     0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL,
2892     0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL
2893 };
2894
2895 /*********************************************************************
2896  *              exp (MSVCRT.@)
2897  *
2898  * Copied from musl: src/math/exp.c
2899  */
2900 double CDECL exp( double x )
2901 {
2902     static const double C[] = {
2903         0x1.ffffffffffdbdp-2,
2904         0x1.555555555543cp-3,
2905         0x1.55555cf172b91p-5,
2906         0x1.1111167a4d017p-7
2907     };
2908     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
2909         negln2hiN = -0x1.62e42fefa0000p-8,
2910         negln2loN = -0x1.cf79abc9e3b3ap-47;
2911
2912     UINT32 abstop;
2913     UINT64 ki, idx, top, sbits;
2914     double kd, z, r, r2, scale, tail, tmp;
2915
2916     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
2917     if (abstop -  0x3c9 >= 0x408 - 0x3c9) {
2918         if (abstop - 0x3c9 >= 0x80000000)
2919             /* Avoid spurious underflow for tiny x. */
2920             /* Note: 0 is common input. */
2921             return 1.0 + x;
2922         if (abstop >= 0x409) {
2923             if (*(UINT64*)&x == 0xfff0000000000000ULL)
2924                 return 0.0;
2925 #if _MSVCR_VER == 0
2926             if (*(UINT64*)&x > 0x7ff0000000000000ULL)
2927                 return math_error(_DOMAIN, "exp", x, 0, 1.0 + x);
2928 #endif
2929             if (abstop >= 0x7ff)
2930                 return 1.0 + x;
2931             if (*(UINT64*)&x >> 63)
2932                 return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN);
2933             else
2934                 return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX);
2935         }
2936         /* Large x is special cased below. */
2937         abstop = 0;
2938     }
2939
2940     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
2941     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
2942     z = invln2N * x;
2943     kd = __round(z);
2944     ki = (INT64)kd;
2945
2946     r = x + kd * negln2hiN + kd * negln2loN;
2947     /* 2^(k/N) ~= scale * (1 + tail). */
2948     idx = 2 * (ki % (1 << 7));
2949     top = ki << (52 - 7);
2950     tail = *(double*)&exp_T[idx];
2951     /* This is only a valid scale when -1023*N < k < 1024*N. */
2952     sbits = exp_T[idx + 1] + top;
2953     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
2954     /* Evaluation is optimized assuming superscalar pipelined execution. */
2955     r2 = r * r;
2956     /* Without fma the worst case error is 0.25/N ulp larger. */
2957     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
2958     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
2959     if (abstop == 0) {
2960         /* Handle cases that may overflow or underflow when computing the result that
2961            is scale*(1+TMP) without intermediate rounding. The bit representation of
2962            scale is in SBITS, however it has a computed exponent that may have
2963            overflown into the sign bit so that needs to be adjusted before using it as
2964            a double. (int32_t)KI is the k used in the argument reduction and exponent
2965            adjustment of scale, positive k here means the result may overflow and
2966            negative k means the result may underflow. */
2967         double scale, y;
2968
2969         if ((ki & 0x80000000) == 0) {
2970             /* k > 0, the exponent of scale might have overflowed by <= 460. */
2971             sbits -= 1009ull << 52;
2972             scale = *(double*)&sbits;
2973             y = 0x1p1009 * (scale + scale * tmp);
2974             if (isinf(y))
2975                 return math_error(_OVERFLOW, "exp", x, 0, y);
2976             return y;
2977         }
2978         /* k < 0, need special care in the subnormal range. */
2979         sbits += 1022ull << 52;
2980         scale = *(double*)&sbits;
2981         y = scale + scale * tmp;
2982         if (y < 1.0) {
2983             /* Round y to the right precision before scaling it into the subnormal
2984                range to avoid double rounding that can cause 0.5+E/2 ulp error where
2985                E is the worst-case ulp error outside the subnormal range. So this
2986                is only useful if the goal is better than 1 ulp worst-case error. */
2987             double hi, lo;
2988             lo = scale - y + scale * tmp;
2989             hi = 1.0 + y;
2990             lo = 1.0 - hi + y + lo;
2991             y = hi + lo - 1.0;
2992             /* Avoid -0.0 with downward rounding. */
2993             if (y == 0.0)
2994                 y = 0.0;
2995             /* The underflow exception needs to be signaled explicitly. */
2996             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
2997             y = 0x1p-1022 * y;
2998             return math_error(_UNDERFLOW, "exp", x, 0, y);
2999         }
3000         y = 0x1p-1022 * y;
3001         return y;
3002     }
3003     scale = *(double*)&sbits;
3004     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3005        is no spurious underflow here even without fma. */
3006     return scale + scale * tmp;
3007 }
3008
3009 /*********************************************************************
3010  *              fmod (MSVCRT.@)
3011  *
3012  * Copied from musl: src/math/fmod.c
3013  */
3014 double CDECL fmod( double x, double y )
3015 {
3016     UINT64 xi = *(UINT64*)&x;
3017     UINT64 yi = *(UINT64*)&y;
3018     int ex = xi >> 52 & 0x7ff;
3019     int ey = yi >> 52 & 0x7ff;
3020     int sx = xi >> 63;
3021     UINT64 i;
3022
3023     if (isinf(x)) return math_error(_DOMAIN, "fmod", x, y, (x * y) / (x * y));
3024     if (yi << 1 == 0 || isnan(y) || ex == 0x7ff)
3025         return (x * y) / (x * y);
3026     if (xi << 1 <= yi << 1) {
3027         if (xi << 1 == yi << 1)
3028             return 0 * x;
3029         return x;
3030     }
3031
3032     /* normalize x and y */
3033     if (!ex) {
3034         for (i = xi << 12; i >> 63 == 0; ex--, i <<= 1);
3035         xi <<= -ex + 1;
3036     } else {
3037         xi &= -1ULL >> 12;
3038         xi |= 1ULL << 52;
3039     }
3040     if (!ey) {
3041         for (i = yi << 12; i >> 63 == 0; ey--, i <<= 1);
3042         yi <<= -ey + 1;
3043     } else {
3044         yi &= -1ULL >> 12;
3045         yi |= 1ULL << 52;
3046     }
3047
3048     /* x mod y */
3049     for (; ex > ey; ex--) {
3050         i = xi - yi;
3051         if (i >> 63 == 0) {
3052             if (i == 0)
3053                 return 0 * x;
3054             xi = i;
3055         }
3056         xi <<= 1;
3057     }
3058     i = xi - yi;
3059     if (i >> 63 == 0) {
3060         if (i == 0)
3061             return 0 * x;
3062         xi = i;
3063     }
3064     for (; xi >> 52 == 0; xi <<= 1, ex--);
3065
3066     /* scale result */
3067     if (ex > 0) {
3068         xi -= 1ULL << 52;
3069         xi |= (UINT64)ex << 52;
3070     } else {
3071         xi >>= -ex + 1;
3072     }
3073     xi |= (UINT64)sx << 63;
3074     return *(double*)&xi;
3075 }
3076
3077 /*********************************************************************
3078  *              log (MSVCRT.@)
3079  *
3080  * Copied from musl: src/math/log.c src/math/log_data.c
3081  */
3082 double CDECL log( double x )
3083 {
3084     static const double Ln2hi = 0x1.62e42fefa3800p-1,
3085         Ln2lo = 0x1.ef35793c76730p-45;
3086     static const double A[] = {
3087         -0x1.0000000000001p-1,
3088         0x1.555555551305bp-2,
3089         -0x1.fffffffeb459p-3,
3090         0x1.999b324f10111p-3,
3091         -0x1.55575e506c89fp-3
3092     };
3093     static const double B[] = {
3094         -0x1p-1,
3095         0x1.5555555555577p-2,
3096         -0x1.ffffffffffdcbp-3,
3097         0x1.999999995dd0cp-3,
3098         -0x1.55555556745a7p-3,
3099         0x1.24924a344de3p-3,
3100         -0x1.fffffa4423d65p-4,
3101         0x1.c7184282ad6cap-4,
3102         -0x1.999eb43b068ffp-4,
3103         0x1.78182f7afd085p-4,
3104         -0x1.5521375d145cdp-4
3105     };
3106     static const struct {
3107         double invc, logc;
3108     } T[] = {
3109         {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
3110         {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
3111         {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
3112         {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
3113         {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
3114         {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
3115         {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
3116         {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
3117         {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
3118         {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
3119         {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
3120         {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
3121         {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
3122         {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
3123         {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
3124         {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
3125         {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
3126         {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
3127         {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
3128         {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
3129         {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
3130         {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
3131         {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
3132         {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
3133         {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
3134         {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
3135         {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
3136         {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
3137         {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
3138         {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
3139         {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
3140         {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
3141         {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
3142         {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
3143         {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
3144         {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
3145         {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
3146         {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
3147         {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
3148         {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
3149         {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
3150         {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
3151         {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
3152         {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
3153         {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
3154         {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
3155         {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
3156         {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
3157         {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
3158         {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
3159         {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
3160         {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
3161         {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
3162         {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
3163         {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
3164         {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
3165         {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
3166         {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
3167         {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
3168         {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
3169         {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
3170         {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
3171         {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
3172         {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
3173         {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
3174         {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
3175         {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
3176         {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
3177         {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
3178         {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
3179         {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
3180         {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
3181         {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
3182         {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
3183         {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
3184         {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
3185         {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
3186         {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
3187         {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
3188         {0x1.008040614b195p+0, -0x1.0040979240000p-9},
3189         {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
3190         {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
3191         {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
3192         {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
3193         {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
3194         {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
3195         {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
3196         {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
3197         {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
3198         {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
3199         {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
3200         {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
3201         {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
3202         {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
3203         {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
3204         {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
3205         {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
3206         {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
3207         {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
3208         {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
3209         {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
3210         {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
3211         {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
3212         {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
3213         {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
3214         {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
3215         {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
3216         {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
3217         {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
3218         {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
3219         {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
3220         {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
3221         {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
3222         {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
3223         {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
3224         {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
3225         {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
3226         {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
3227         {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
3228         {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
3229         {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
3230         {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
3231         {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
3232         {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
3233         {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
3234         {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
3235         {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
3236         {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}
3237     };
3238     static const struct {
3239         double chi, clo;
3240     } T2[] = {
3241         {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
3242         {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
3243         {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
3244         {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
3245         {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
3246         {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
3247         {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
3248         {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
3249         {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
3250         {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
3251         {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
3252         {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
3253         {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
3254         {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
3255         {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
3256         {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
3257         {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
3258         {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
3259         {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
3260         {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
3261         {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
3262         {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
3263         {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
3264         {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
3265         {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
3266         {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
3267         {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
3268         {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
3269         {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
3270         {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
3271         {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
3272         {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
3273         {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
3274         {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
3275         {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
3276         {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
3277         {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
3278         {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
3279         {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
3280         {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
3281         {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
3282         {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
3283         {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
3284         {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
3285         {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
3286         {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
3287         {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
3288         {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
3289         {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
3290         {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
3291         {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
3292         {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
3293         {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
3294         {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
3295         {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
3296         {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
3297         {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
3298         {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
3299         {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
3300         {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
3301         {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
3302         {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
3303         {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
3304         {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
3305         {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
3306         {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
3307         {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
3308         {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
3309         {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
3310         {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
3311         {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
3312         {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
3313         {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
3314         {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
3315         {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
3316         {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
3317         {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
3318         {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
3319         {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
3320         {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
3321         {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
3322         {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
3323         {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
3324         {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
3325         {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
3326         {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
3327         {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
3328         {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
3329         {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
3330         {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
3331         {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
3332         {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
3333         {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
3334         {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
3335         {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
3336         {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
3337         {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
3338         {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
3339         {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
3340         {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
3341         {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
3342         {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
3343         {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
3344         {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
3345         {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
3346         {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
3347         {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
3348         {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
3349         {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
3350         {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
3351         {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
3352         {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
3353         {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
3354         {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
3355         {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
3356         {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
3357         {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
3358         {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
3359         {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
3360         {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
3361         {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
3362         {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
3363         {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
3364         {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
3365         {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
3366         {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
3367         {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
3368         {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}
3369     };
3370
3371     double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
3372     UINT64 ix, iz, tmp;
3373     UINT32 top;
3374     int k, i;
3375
3376     ix = *(UINT64*)&x;
3377     top = ix >> 48;
3378     if (ix - 0x3fee000000000000ULL < 0x3090000000000ULL) {
3379         double rhi, rlo;
3380
3381         /* Handle close to 1.0 inputs separately. */
3382         /* Fix sign of zero with downward rounding when x==1. */
3383         if (ix == 0x3ff0000000000000ULL)
3384             return 0;
3385         r = x - 1.0;
3386         r2 = r * r;
3387         r3 = r * r2;
3388         y = r3 * (B[1] + r * B[2] + r2 * B[3] + r3 * (B[4] + r * B[5] + r2 * B[6] +
3389                     r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
3390         /* Worst-case error is around 0.507 ULP. */
3391         w = r * 0x1p27;
3392         rhi = r + w - w;
3393         rlo = r - rhi;
3394         w = rhi * rhi * B[0]; /* B[0] == -0.5. */
3395         hi = r + w;
3396         lo = r - hi + w;
3397         lo += B[0] * rlo * (rhi + r);
3398         y += lo;
3399         y += hi;
3400         return y;
3401     }
3402     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
3403         /* x < 0x1p-1022 or inf or nan. */
3404         if (ix * 2 == 0)
3405             return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x);
3406         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */
3407             return x;
3408         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
3409             return x;
3410         if (top & 0x8000)
3411             return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x));
3412         /* x is subnormal, normalize it. */
3413         x *= 0x1p52;
3414         ix = *(UINT64*)&x;
3415         ix -= 52ULL << 52;
3416     }
3417
3418     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3419        The range is split into N subintervals.
3420        The ith subinterval contains z and c is near its center. */
3421     tmp = ix - 0x3fe6000000000000ULL;
3422     i = (tmp >> (52 - 7)) % (1 << 7);
3423     k = (INT64)tmp >> 52; /* arithmetic shift */
3424     iz = ix - (tmp & 0xfffULL << 52);
3425     invc = T[i].invc;
3426     logc = T[i].logc;
3427     z = *(double*)&iz;
3428
3429     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
3430     /* r ~= z/c - 1, |r| < 1/(2*N). */
3431     r = (z - T2[i].chi - T2[i].clo) * invc;
3432     kd = (double)k;
3433
3434     /* hi + lo = r + log(c) + k*Ln2. */
3435     w = kd * Ln2hi + logc;
3436     hi = w + r;
3437     lo = w - hi + r + kd * Ln2lo;
3438
3439     /* log(x) = lo + (log1p(r) - r) + hi. */
3440     r2 = r * r; /* rounding error: 0x1p-54/N^2. */
3441     /* Worst case error if |y| > 0x1p-5:
3442        0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
3443        Worst case error if |y| > 0x1p-4:
3444        0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
3445     y = lo + r2 * A[0] +
3446         r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
3447     return y;
3448 }
3449
3450 /*********************************************************************
3451  *              log10 (MSVCRT.@)
3452  */
3453 double CDECL log10( double x )
3454 {
3455     static const double ivln10hi = 4.34294481878168880939e-01,
3456         ivln10lo = 2.50829467116452752298e-11,
3457         log10_2hi = 3.01029995663611771306e-01,
3458         log10_2lo = 3.69423907715893078616e-13,
3459         Lg1 = 6.666666666666735130e-01,
3460         Lg2 = 3.999999999940941908e-01,
3461         Lg3 = 2.857142874366239149e-01,
3462         Lg4 = 2.222219843214978396e-01,
3463         Lg5 = 1.818357216161805012e-01,
3464         Lg6 = 1.531383769920937332e-01,
3465         Lg7 = 1.479819860511658591e-01;
3466
3467     union {double f; UINT64 i;} u = {x};
3468     double hfsq, f, s, z, R, w, t1, t2, dk, y, hi, lo, val_hi, val_lo;
3469     UINT32 hx;
3470     int k;
3471
3472     hx = u.i >> 32;
3473     k = 0;
3474     if (hx < 0x00100000 || hx >> 31) {
3475         if (u.i << 1 == 0)
3476             return math_error(_SING, "log10", x, 0, -1 / (x * x));
3477         if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL)
3478             return x;
3479         if (hx >> 31)
3480             return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x));
3481         /* subnormal number, scale x up */
3482         k -= 54;
3483         x *= 0x1p54;
3484         u.f = x;
3485         hx = u.i >> 32;
3486     } else if (hx >= 0x7ff00000) {
3487         return x;
3488     } else if (hx == 0x3ff00000 && u.i<<32 == 0)
3489         return 0;
3490
3491     /* reduce x into [sqrt(2)/2, sqrt(2)] */
3492     hx += 0x3ff00000 - 0x3fe6a09e;
3493     k += (int)(hx >> 20) - 0x3ff;
3494     hx = (hx & 0x000fffff) + 0x3fe6a09e;
3495     u.i = (UINT64)hx << 32 | (u.i & 0xffffffff);
3496     x = u.f;
3497
3498     f = x - 1.0;
3499     hfsq = 0.5 * f * f;
3500     s = f / (2.0 + f);
3501     z = s * s;
3502     w = z * z;
3503     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
3504     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
3505     R = t2 + t1;
3506
3507     /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
3508     hi = f - hfsq;
3509     u.f = hi;
3510     u.i &= (UINT64)-1 << 32;
3511     hi = u.f;
3512     lo = f - hi - hfsq + s * (hfsq + R);
3513
3514     /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
3515     val_hi = hi * ivln10hi;
3516     dk = k;
3517     y = dk * log10_2hi;
3518     val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
3519
3520     /*
3521      * Extra precision in for adding y is not strictly needed
3522      * since there is no very large cancellation near x = sqrt(2) or
3523      * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
3524      * with some parallelism and it reduces the error for many args.
3525      */
3526     w = y + val_hi;
3527     val_lo += (y - w) + val_hi;
3528     val_hi = w;
3529
3530     return val_lo + val_hi;
3531 }
3532
3533 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
3534    additional 15 bits precision. IX is the bit representation of x, but
3535    normalized in the subnormal range using the sign bit for the exponent. */
3536 static double pow_log(UINT64 ix, double *tail)
3537 {
3538     static const struct {
3539         double invc, logc, logctail;
3540     } T[] = {
3541         {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48},
3542         {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46},
3543         {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45},
3544         {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49},
3545         {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47},
3546         {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46},
3547         {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50},
3548         {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45},
3549         {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45},
3550         {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45},
3551         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3552         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3553         {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46},
3554         {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46},
3555         {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46},
3556         {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45},
3557         {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47},
3558         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3559         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3560         {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47},
3561         {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45},
3562         {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46},
3563         {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45},
3564         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3565         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3566         {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46},
3567         {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52},
3568         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3569         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3570         {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45},
3571         {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45},
3572         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3573         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3574         {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46},
3575         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3576         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3577         {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45},
3578         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3579         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3580         {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48},
3581         {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45},
3582         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3583         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3584         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3585         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3586         {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45},
3587         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3588         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3589         {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46},
3590         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3591         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3592         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3593         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3594         {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45},
3595         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3596         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3597         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3598         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3599         {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46},
3600         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3601         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3602         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3603         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3604         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3605         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3606         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3607         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3608         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3609         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3610         {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45},
3611         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3612         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3613         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3614         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3615         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3616         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3617         {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46},
3618         {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45},
3619         {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45},
3620         {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47},
3621         {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45},
3622         {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46},
3623         {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46},
3624         {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47},
3625         {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45},
3626         {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45},
3627         {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45},
3628         {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49},
3629         {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45},
3630         {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46},
3631         {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45},
3632         {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45},
3633         {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45},
3634         {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45},
3635         {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45},
3636         {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47},
3637         {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51},
3638         {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45},
3639         {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45},
3640         {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46},
3641         {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45},
3642         {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46},
3643         {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47},
3644         {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47},
3645         {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45},
3646         {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47},
3647         {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45},
3648         {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48},
3649         {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45},
3650         {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51},
3651         {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51},
3652         {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46},
3653         {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48},
3654         {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45},
3655         {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45},
3656         {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45},
3657         {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45},
3658         {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47},
3659         {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45},
3660         {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45},
3661         {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46},
3662         {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46},
3663         {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47},
3664         {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45},
3665         {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45},
3666         {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45},
3667         {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46},
3668         {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47},
3669     };
3670     static const double A[] = {
3671         -0x1p-1,
3672         0x1.555555555556p-2 * -2,
3673         -0x1.0000000000006p-2 * -2,
3674         0x1.999999959554ep-3 * 4,
3675         -0x1.555555529a47ap-3 * 4,
3676         0x1.2495b9b4845e9p-3 * -8,
3677         -0x1.0002b8b263fc3p-3 * -8
3678     };
3679     static const double ln2hi = 0x1.62e42fefa3800p-1,
3680         ln2lo = 0x1.ef35793c76730p-45;
3681
3682     double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
3683     double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2;
3684     UINT64 iz, tmp;
3685     int k, i;
3686
3687     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3688        The range is split into N subintervals.
3689        The ith subinterval contains z and c is near its center. */
3690     tmp = ix - 0x3fe6955500000000ULL;
3691     i = (tmp >> (52 - 7)) % (1 << 7);
3692     k = (INT64)tmp >> 52; /* arithmetic shift */
3693     iz = ix - (tmp & 0xfffULL << 52);
3694     z = *(double*)&iz;
3695     kd = k;
3696
3697     /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
3698     invc = T[i].invc;
3699     logc = T[i].logc;
3700     logctail = T[i].logctail;
3701
3702     /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
3703      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
3704     /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
3705     iz = (iz + (1ULL << 31)) & (-1ULL << 32);
3706     zhi = *(double*)&iz;
3707     zlo = z - zhi;
3708     rhi = zhi * invc - 1.0;
3709     rlo = zlo * invc;
3710     r = rhi + rlo;
3711
3712     /* k*Ln2 + log(c) + r. */
3713     t1 = kd * ln2hi + logc;
3714     t2 = t1 + r;
3715     lo1 = kd * ln2lo + logctail;
3716     lo2 = t1 - t2 + r;
3717
3718     /* Evaluation is optimized assuming superscalar pipelined execution. */
3719     ar = A[0] * r; /* A[0] = -0.5. */
3720     ar2 = r * ar;
3721     ar3 = r * ar2;
3722     /* k*Ln2 + log(c) + r + A[0]*r*r. */
3723     arhi = A[0] * rhi;
3724     arhi2 = rhi * arhi;
3725     hi = t2 + arhi2;
3726     lo3 = rlo * (ar + arhi);
3727     lo4 = t2 - hi + arhi2;
3728     /* p = log1p(r) - r - A[0]*r*r. */
3729     p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
3730     lo = lo1 + lo2 + lo3 + lo4 + p;
3731     y = hi + lo;
3732     *tail = hi - y + lo;
3733     return y;
3734 }
3735
3736 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
3737    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
3738 static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias)
3739 {
3740     static const double C[] = {
3741         0x1.ffffffffffdbdp-2,
3742         0x1.555555555543cp-3,
3743         0x1.55555cf172b91p-5,
3744         0x1.1111167a4d017p-7
3745     };
3746     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
3747         negln2hiN = -0x1.62e42fefa0000p-8,
3748         negln2loN = -0x1.cf79abc9e3b3ap-47;
3749
3750     UINT32 abstop;
3751     UINT64 ki, idx, top, sbits;
3752     double kd, z, r, r2, scale, tail, tmp;
3753
3754     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
3755     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
3756         if (abstop - 0x3c9 >= 0x80000000) {
3757             /* Avoid spurious underflow for tiny x. */
3758             /* Note: 0 is common input. */
3759             double one = 1.0 + x;
3760             return sign_bias ? -one : one;
3761         }
3762         if (abstop >= 0x409) {
3763             /* Note: inf and nan are already handled. */
3764             if (*(UINT64*)&x >> 63)
3765                 return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN);
3766             return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX);
3767         }
3768         /* Large x is special cased below. */
3769         abstop = 0;
3770     }
3771
3772     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3773     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3774     z = invln2N * x;
3775     kd = __round(z);
3776     ki = (INT64)kd;
3777     r = x + kd * negln2hiN + kd * negln2loN;
3778     /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
3779     r += xtail;
3780     /* 2^(k/N) ~= scale * (1 + tail). */
3781     idx = 2 * (ki % (1 << 7));
3782     top = (ki + sign_bias) << (52 - 7);
3783     tail = *(double*)&exp_T[idx];
3784     /* This is only a valid scale when -1023*N < k < 1024*N. */
3785     sbits = exp_T[idx + 1] + top;
3786     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3787     /* Evaluation is optimized assuming superscalar pipelined execution. */
3788     r2 = r * r;
3789     /* Without fma the worst case error is 0.25/N ulp larger. */
3790     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3791     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3792     if (abstop == 0) {
3793         /* Handle cases that may overflow or underflow when computing the result that
3794            is scale*(1+TMP) without intermediate rounding. The bit representation of
3795            scale is in SBITS, however it has a computed exponent that may have
3796            overflown into the sign bit so that needs to be adjusted before using it as
3797            a double. (int32_t)KI is the k used in the argument reduction and exponent
3798            adjustment of scale, positive k here means the result may overflow and
3799            negative k means the result may underflow. */
3800         double scale, y;
3801
3802         if ((ki & 0x80000000) == 0) {
3803             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3804             sbits -= 1009ull << 52;
3805             scale = *(double*)&sbits;
3806             y = 0x1p1009 * (scale + scale * tmp);
3807             if (isinf(y))
3808                 return math_error(_OVERFLOW, "pow", argx, argy, y);
3809             return y;
3810         }
3811         /* k < 0, need special care in the subnormal range. */
3812         sbits += 1022ull << 52;
3813         /* Note: sbits is signed scale. */
3814         scale = *(double*)&sbits;
3815         y = scale + scale * tmp;
3816         if (fabs(y) < 1.0) {
3817             /* Round y to the right precision before scaling it into the subnormal
3818                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3819                E is the worst-case ulp error outside the subnormal range. So this
3820                is only useful if the goal is better than 1 ulp worst-case error. */
3821             double hi, lo, one = 1.0;
3822             if (y < 0.0)
3823                 one = -1.0;
3824             lo = scale - y + scale * tmp;
3825             hi = one + y;
3826             lo = one - hi + y + lo;
3827             y = hi + lo - one;
3828             /* Fix the sign of 0. */
3829             if (y == 0.0) {
3830                 sbits &= 0x8000000000000000ULL;
3831                 y = *(double*)&sbits;
3832             }
3833             /* The underflow exception needs to be signaled explicitly. */
3834             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3835             y = 0x1p-1022 * y;
3836             return math_error(_UNDERFLOW, "pow", argx, argy, y);
3837         }
3838         y = 0x1p-1022 * y;
3839         return y;
3840     }
3841     scale = *(double*)&sbits;
3842     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3843        is no spurious underflow here even without fma. */
3844     return scale + scale * tmp;
3845 }
3846
3847 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
3848    the bit representation of a non-zero finite floating-point value. */
3849 static inline int pow_checkint(UINT64 iy)
3850 {
3851     int e = iy >> 52 & 0x7ff;
3852     if (e < 0x3ff)
3853         return 0;
3854     if (e > 0x3ff + 52)
3855         return 2;
3856     if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
3857         return 0;
3858     if (iy & (1ULL << (0x3ff + 52 - e)))
3859         return 1;
3860     return 2;
3861 }
3862
3863 /*********************************************************************
3864  *              pow (MSVCRT.@)
3865  *
3866  * Copied from musl: src/math/pow.c
3867  */
3868 double CDECL pow( double x, double y )
3869 {
3870     UINT32 sign_bias = 0;
3871     UINT64 ix, iy;
3872     UINT32 topx, topy;
3873     double lo, hi, ehi, elo, yhi, ylo, lhi, llo;
3874
3875     ix = *(UINT64*)&x;
3876     iy = *(UINT64*)&y;
3877     topx = ix >> 52;
3878     topy = iy >> 52;
3879     if (topx - 0x001 >= 0x7ff - 0x001 ||
3880             (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3881         /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
3882            and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
3883         /* Special cases: (x < 0x1p-126 or inf or nan) or
3884            (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
3885         if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3886             if (2 * iy == 0)
3887                 return 1.0;
3888             if (ix == 0x3ff0000000000000ULL)
3889                 return 1.0;
3890             if (2 * ix > 2 * 0x7ff0000000000000ULL ||
3891                     2 * iy > 2 * 0x7ff0000000000000ULL)
3892                 return x + y;
3893             if (2 * ix == 2 * 0x3ff0000000000000ULL)
3894                 return 1.0;
3895             if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63))
3896                 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
3897             return y * y;
3898         }
3899         if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3900             double x2 = x * x;
3901             if (ix >> 63 && pow_checkint(iy) == 1)
3902                 x2 = -x2;
3903             if (iy & 0x8000000000000000ULL && x2 == 0.0)
3904                 return math_error(_SING, "pow", x, y, 1 / x2);
3905             /* Without the barrier some versions of clang hoist the 1/x2 and
3906                thus division by zero exception can be signaled spuriously. */
3907             return iy >> 63 ? fp_barrier(1 / x2) : x2;
3908         }
3909         /* Here x and y are non-zero finite. */
3910         if (ix >> 63) {
3911             /* Finite x < 0. */
3912             int yint = pow_checkint(iy);
3913             if (yint == 0)
3914                 return math_error(_DOMAIN, "pow", x, y, 0 / (x - x));
3915             if (yint == 1)
3916                 sign_bias = 0x800 << 7;
3917             ix &= 0x7fffffffffffffff;
3918             topx &= 0x7ff;
3919         }
3920         if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3921             /* Note: sign_bias == 0 here because y is not odd. */
3922             if (ix == 0x3ff0000000000000ULL)
3923                 return 1.0;
3924             if ((topy & 0x7ff) < 0x3be) {
3925                 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
3926                 return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y;
3927             }
3928             if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800))
3929                 return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX);
3930             return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN);
3931         }
3932         if (topx == 0) {
3933             /* Normalize subnormal x so exponent becomes negative. */
3934             x *= 0x1p52;
3935             ix = *(UINT64*)&x;
3936             ix &= 0x7fffffffffffffff;
3937             ix -= 52ULL << 52;
3938         }
3939     }
3940
3941     hi = pow_log(ix, &lo);
3942     iy &= -1ULL << 27;
3943     yhi = *(double*)&iy;
3944     ylo = y - yhi;
3945     *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27;
3946     llo = fp_barrier(hi - lhi + lo);
3947     ehi = yhi * lhi;
3948     elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
3949     return pow_exp(x, y, ehi, elo, sign_bias);
3950 }
3951
3952 /*********************************************************************
3953  *              sin (MSVCRT.@)
3954  *
3955  * Copied from musl: src/math/sin.c
3956  */
3957 double CDECL sin( double x )
3958 {
3959     double y[2];
3960     UINT32 ix;
3961     unsigned n;
3962
3963     ix = *(ULONGLONG*)&x >> 32;
3964     ix &= 0x7fffffff;
3965
3966     /* |x| ~< pi/4 */
3967     if (ix <= 0x3fe921fb) {
3968         if (ix < 0x3e500000) { /* |x| < 2**-26 */
3969             /* raise inexact if x != 0 and underflow if subnormal*/
3970             fp_barrier(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
3971             return x;
3972         }
3973         return __sin(x, 0.0, 0);
3974     }
3975
3976     /* sin(Inf or NaN) is NaN */
3977     if (isinf(x))
3978         return math_error(_DOMAIN, "sin", x, 0, x - x);
3979     if (ix >= 0x7ff00000)
3980         return x - x;
3981
3982     /* argument reduction needed */
3983     n = __rem_pio2(x, y);
3984     switch (n&3) {
3985     case 0: return  __sin(y[0], y[1], 1);
3986     case 1: return  __cos(y[0], y[1]);
3987     case 2: return -__sin(y[0], y[1], 1);
3988     default: return -__cos(y[0], y[1]);
3989     }
3990 }
3991
3992 /*********************************************************************
3993  *              sinh (MSVCRT.@)
3994  */
3995 double CDECL sinh( double x )
3996 {
3997     UINT64 ux = *(UINT64*)&x;
3998     UINT64 sign = ux & 0x8000000000000000ULL;
3999     UINT32 w;
4000     double t, h, absx;
4001
4002     h = 0.5;
4003     if (ux >> 63)
4004         h = -h;
4005     /* |x| */
4006     ux &= (UINT64)-1 / 2;
4007     absx = *(double*)&ux;
4008     w = ux >> 32;
4009
4010     /* |x| < log(DBL_MAX) */
4011     if (w < 0x40862e42) {
4012         t = __expm1(absx);
4013         if (w < 0x3ff00000) {
4014             if (w < 0x3ff00000 - (26 << 20))
4015                 return x;
4016             return h * (2 * t - t * t / (t + 1));
4017         }
4018         return h * (t + t / (t + 1));
4019     }
4020
4021     /* |x| > log(DBL_MAX) or nan */
4022     /* note: the result is stored to handle overflow */
4023     if (ux > 0x7ff0000000000000ULL)
4024         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
4025     else
4026         t = __expo2(absx, 2 * h);
4027     return t;
4028 }
4029
4030 static BOOL sqrt_validate( double *x, BOOL update_sw )
4031 {
4032     short c = _dclass(*x);
4033
4034     if (c == FP_ZERO) return FALSE;
4035     if (c == FP_NAN)
4036     {
4037 #ifdef __i386__
4038         if (update_sw)
4039             *x = math_error(_DOMAIN, "sqrt", *x, 0, *x);
4040 #else
4041         /* set signaling bit */
4042         *(ULONGLONG*)x |= 0x8000000000000ULL;
4043 #endif
4044         return FALSE;
4045     }
4046     if (signbit(*x))
4047     {
4048         *x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan(update_sw));
4049         return FALSE;
4050     }
4051     if (c == FP_INFINITE) return FALSE;
4052     return TRUE;
4053 }
4054
4055 #if defined(__x86_64__) || defined(__i386__)
4056 double CDECL sse2_sqrt(double);
4057 __ASM_GLOBAL_FUNC( sse2_sqrt,
4058         "sqrtsd %xmm0, %xmm0\n\t"
4059         "ret" )
4060 #endif
4061
4062 #ifdef __i386__
4063 double CDECL x87_sqrt(double);
4064 __ASM_GLOBAL_FUNC( x87_sqrt,
4065         "fldl 4(%esp)\n\t"
4066         SET_X87_CW(0xc00)
4067         "fsqrt\n\t"
4068         RESET_X87_CW
4069         "ret" )
4070 #endif
4071
4072 /*********************************************************************
4073  *              sqrt (MSVCRT.@)
4074  *
4075  * Copied from musl: src/math/sqrt.c
4076  */
4077 double CDECL sqrt( double x )
4078 {
4079 #ifdef __x86_64__
4080     if (!sqrt_validate(&x, TRUE))
4081         return x;
4082
4083     return sse2_sqrt(x);
4084 #elif defined( __i386__ )
4085     if (!sqrt_validate(&x, TRUE))
4086         return x;
4087
4088     return x87_sqrt(x);
4089 #else
4090     static const double tiny = 1.0e-300;
4091
4092     double z;
4093     int sign = 0x80000000;
4094     int ix0,s0,q,m,t,i;
4095     unsigned int r,t1,s1,ix1,q1;
4096     ULONGLONG ix;
4097
4098     if (!sqrt_validate(&x, TRUE))
4099         return x;
4100
4101     ix = *(ULONGLONG*)&x;
4102     ix0 = ix >> 32;
4103     ix1 = ix;
4104
4105     /* normalize x */
4106     m = ix0 >> 20;
4107     if (m == 0) {  /* subnormal x */
4108         while (ix0 == 0) {
4109             m -= 21;
4110             ix0 |= (ix1 >> 11);
4111             ix1 <<= 21;
4112         }
4113         for (i=0; (ix0 & 0x00100000) == 0; i++)
4114             ix0 <<= 1;
4115         m -= i - 1;
4116         ix0 |= ix1 >> (32 - i);
4117         ix1 <<= i;
4118     }
4119     m -= 1023;    /* unbias exponent */
4120     ix0 = (ix0 & 0x000fffff) | 0x00100000;
4121     if (m & 1) {  /* odd m, double x to make it even */
4122         ix0 += ix0 + ((ix1 & sign) >> 31);
4123         ix1 += ix1;
4124     }
4125     m >>= 1;      /* m = [m/2] */
4126
4127     /* generate sqrt(x) bit by bit */
4128     ix0 += ix0 + ((ix1 & sign) >> 31);
4129     ix1 += ix1;
4130     q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
4131     r = 0x00200000;        /* r = moving bit from right to left */
4132
4133     while (r != 0) {
4134         t = s0 + r;
4135         if (t <= ix0) {
4136             s0   = t + r;
4137             ix0 -= t;
4138             q   += r;
4139         }
4140         ix0 += ix0 + ((ix1 & sign) >> 31);
4141         ix1 += ix1;
4142         r >>= 1;
4143     }
4144
4145     r = sign;
4146     while (r != 0) {
4147         t1 = s1 + r;
4148         t  = s0;
4149         if (t < ix0 || (t == ix0 && t1 <= ix1)) {
4150             s1 = t1 + r;
4151             if ((t1&sign) == sign && (s1 & sign) == 0)
4152                 s0++;
4153             ix0 -= t;
4154             if (ix1 < t1)
4155                 ix0--;
4156             ix1 -= t1;
4157             q1 += r;
4158         }
4159         ix0 += ix0 + ((ix1 & sign) >> 31);
4160         ix1 += ix1;
4161         r >>= 1;
4162     }
4163
4164     /* use floating add to find out rounding direction */
4165     if ((ix0 | ix1) != 0) {
4166         z = 1.0 - tiny; /* raise inexact flag */
4167         if (z >= 1.0) {
4168             z = 1.0 + tiny;
4169             if (q1 == (unsigned int)0xffffffff) {
4170                 q1 = 0;
4171                 q++;
4172             } else if (z > 1.0) {
4173                 if (q1 == (unsigned int)0xfffffffe)
4174                     q++;
4175                 q1 += 2;
4176             } else
4177                 q1 += q1 & 1;
4178         }
4179     }
4180     ix0 = (q >> 1) + 0x3fe00000;
4181     ix1 = q1 >> 1;
4182     if (q & 1)
4183         ix1 |= sign;
4184     ix = ix0 + ((unsigned int)m << 20);
4185     ix <<= 32;
4186     ix |= ix1;
4187     return *(double*)&ix;
4188 #endif
4189 }
4190
4191 /* Copied from musl: src/math/__tan.c */
4192 static double __tan(double x, double y, int odd)
4193 {
4194     static const double T[] = {
4195         3.33333333333334091986e-01,
4196         1.33333333333201242699e-01,
4197         5.39682539762260521377e-02,
4198         2.18694882948595424599e-02,
4199         8.86323982359930005737e-03,
4200         3.59207910759131235356e-03,
4201         1.45620945432529025516e-03,
4202         5.88041240820264096874e-04,
4203         2.46463134818469906812e-04,
4204         7.81794442939557092300e-05,
4205         7.14072491382608190305e-05,
4206         -1.85586374855275456654e-05,
4207         2.59073051863633712884e-05,
4208     };
4209     static const double pio4 = 7.85398163397448278999e-01;
4210     static const double pio4lo = 3.06161699786838301793e-17;
4211
4212     double z, r, v, w, s, a, w0, a0;
4213     UINT32 hx;
4214     int big, sign;
4215
4216     hx = *(ULONGLONG*)&x >> 32;
4217     big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
4218     if (big) {
4219         sign = hx >> 31;
4220         if (sign) {
4221             x = -x;
4222             y = -y;
4223         }
4224         x = (pio4 - x) + (pio4lo - y);
4225         y = 0.0;
4226     }
4227     z = x * x;
4228     w = z * z;
4229     r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
4230     v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
4231     s = z * x;
4232     r = y + z * (s * (r + v) + y) + s * T[0];
4233     w = x + r;
4234     if (big) {
4235         s = 1 - 2 * odd;
4236         v = s - 2.0 * (x + (r - w * w / (w + s)));
4237         return sign ? -v : v;
4238     }
4239     if (!odd)
4240         return w;
4241     /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
4242     w0 = w;
4243     *(LONGLONG*)&w0 = *(LONGLONG*)&w0 & 0xffffffff00000000ULL;
4244     v = r - (w0 - x);       /* w0+v = r+x */
4245     a0 = a = -1.0 / w;
4246     *(LONGLONG*)&a0 = *(LONGLONG*)&a0 & 0xffffffff00000000ULL;
4247     return a0 + a * (1.0 + a0 * w0 + a0 * v);
4248 }
4249
4250 /*********************************************************************
4251  *              tan (MSVCRT.@)
4252  *
4253  * Copied from musl: src/math/tan.c
4254  */
4255 double CDECL tan( double x )
4256 {
4257     double y[2];
4258     UINT32 ix;
4259     unsigned n;
4260
4261     ix = *(ULONGLONG*)&x >> 32;
4262     ix &= 0x7fffffff;
4263
4264     if (ix <= 0x3fe921fb) { /* |x| ~< pi/4 */
4265         if (ix < 0x3e400000) { /* |x| < 2**-27 */
4266             /* raise inexact if x!=0 and underflow if subnormal */
4267             fp_barrier(ix < 0x00100000 ? x / 0x1p120f : x + 0x1p120f);
4268             return x;
4269         }
4270         return __tan(x, 0.0, 0);
4271     }
4272
4273     if (isinf(x))
4274         return math_error(_DOMAIN, "tan", x, 0, x - x);
4275     if (ix >= 0x7ff00000)
4276         return x - x;
4277
4278     n = __rem_pio2(x, y);
4279     return __tan(y[0], y[1], n & 1);
4280 }
4281
4282 /*********************************************************************
4283  *              tanh (MSVCRT.@)
4284  */
4285 double CDECL tanh( double x )
4286 {
4287     UINT64 ui = *(UINT64*)&x;
4288     UINT64 sign = ui & 0x8000000000000000ULL;
4289     UINT32 w;
4290     double t;
4291
4292     /* x = |x| */
4293     ui &= (UINT64)-1 / 2;
4294     x = *(double*)&ui;
4295     w = ui >> 32;
4296
4297     if (w > 0x3fe193ea) {
4298         /* |x| > log(3)/2 ~= 0.5493 or nan */
4299         if (w > 0x40340000) {
4300             if (ui > 0x7ff0000000000000ULL) {
4301                 *(UINT64*)&x = ui | sign | 0x0008000000000000ULL;
4302 #if _MSVCR_VER < 140
4303                 return math_error(_DOMAIN, "tanh", x, 0, x);
4304 #else
4305                 return x;
4306 #endif
4307             }
4308             /* |x| > 20 */
4309             /* note: this branch avoids raising overflow */
4310             fp_barrier(x + 0x1p120f);
4311             t = 1 - 0 / x;
4312         } else {
4313             t = __expm1(2 * x);
4314             t = 1 - 2 / (t + 2);
4315         }
4316     } else if (w > 0x3fd058ae) {
4317         /* |x| > log(5/3)/2 ~= 0.2554 */
4318         t = __expm1(2 * x);
4319         t = t / (t + 2);
4320     } else if (w >= 0x00100000) {
4321         /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
4322         t = __expm1(-2 * x);
4323         t = -t / (t + 2);
4324     } else {
4325         /* |x| is subnormal */
4326         /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
4327         fp_barrier((float)x);
4328         t = x;
4329     }
4330     return sign ? -t : t;
4331 }
4332
4333
4334 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
4335
4336 #define CREATE_FPU_FUNC1(name, call) \
4337     __ASM_GLOBAL_FUNC(name, \
4338             "pushl   %ebp\n\t" \
4339             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4340             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4341             "movl    %esp, %ebp\n\t" \
4342             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4343             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4344             "fstpl   (%esp)\n\t"    /* store function argument */ \
4345             "fwait\n\t" \
4346             "movl    $1, %ecx\n\t"  /* empty FPU stack */ \
4347             "1:\n\t" \
4348             "fxam\n\t" \
4349             "fstsw   %ax\n\t" \
4350             "and     $0x4500, %ax\n\t" \
4351             "cmp     $0x4100, %ax\n\t" \
4352             "je      2f\n\t" \
4353             "fstpl    (%esp,%ecx,8)\n\t" \
4354             "fwait\n\t" \
4355             "incl    %ecx\n\t" \
4356             "jmp     1b\n\t" \
4357             "2:\n\t" \
4358             "movl    %ecx, -4(%ebp)\n\t" \
4359             "call    " __ASM_NAME( #call ) "\n\t" \
4360             "movl    -4(%ebp), %ecx\n\t" \
4361             "fstpl   (%esp)\n\t"    /* save result */ \
4362             "3:\n\t"                /* restore FPU stack */ \
4363             "decl    %ecx\n\t" \
4364             "fldl    (%esp,%ecx,8)\n\t" \
4365             "cmpl    $0, %ecx\n\t" \
4366             "jne     3b\n\t" \
4367             "leave\n\t" \
4368             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4369             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4370             "ret")
4371
4372 #define CREATE_FPU_FUNC2(name, call) \
4373     __ASM_GLOBAL_FUNC(name, \
4374             "pushl   %ebp\n\t" \
4375             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4376             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4377             "movl    %esp, %ebp\n\t" \
4378             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4379             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4380             "fstpl   8(%esp)\n\t"   /* store function argument */ \
4381             "fwait\n\t" \
4382             "fstpl   (%esp)\n\t" \
4383             "fwait\n\t" \
4384             "movl    $2, %ecx\n\t"  /* empty FPU stack */ \
4385             "1:\n\t" \
4386             "fxam\n\t" \
4387             "fstsw   %ax\n\t" \
4388             "and     $0x4500, %ax\n\t" \
4389             "cmp     $0x4100, %ax\n\t" \
4390             "je      2f\n\t" \
4391             "fstpl    (%esp,%ecx,8)\n\t" \
4392             "fwait\n\t" \
4393             "incl    %ecx\n\t" \
4394             "jmp     1b\n\t" \
4395             "2:\n\t" \
4396             "movl    %ecx, -4(%ebp)\n\t" \
4397             "call    " __ASM_NAME( #call ) "\n\t" \
4398             "movl    -4(%ebp), %ecx\n\t" \
4399             "fstpl   8(%esp)\n\t"   /* save result */ \
4400             "3:\n\t"                /* restore FPU stack */ \
4401             "decl    %ecx\n\t" \
4402             "fldl    (%esp,%ecx,8)\n\t" \
4403             "cmpl    $1, %ecx\n\t" \
4404             "jne     3b\n\t" \
4405             "leave\n\t" \
4406             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4407             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4408             "ret")
4409
4410 CREATE_FPU_FUNC1(_CIacos, acos)
4411 CREATE_FPU_FUNC1(_CIasin, asin)
4412 CREATE_FPU_FUNC1(_CIatan, atan)
4413 CREATE_FPU_FUNC2(_CIatan2, atan2)
4414 CREATE_FPU_FUNC1(_CIcos, cos)
4415 CREATE_FPU_FUNC1(_CIcosh, cosh)
4416 CREATE_FPU_FUNC1(_CIexp, exp)
4417 CREATE_FPU_FUNC2(_CIfmod, fmod)
4418 CREATE_FPU_FUNC1(_CIlog, log)
4419 CREATE_FPU_FUNC1(_CIlog10, log10)
4420 CREATE_FPU_FUNC2(_CIpow, pow)
4421 CREATE_FPU_FUNC1(_CIsin, sin)
4422 CREATE_FPU_FUNC1(_CIsinh, sinh)
4423 CREATE_FPU_FUNC1(_CIsqrt, sqrt)
4424 CREATE_FPU_FUNC1(_CItan, tan)
4425 CREATE_FPU_FUNC1(_CItanh, tanh)
4426
4427 __ASM_GLOBAL_FUNC(_ftol,
4428         "pushl   %ebp\n\t"
4429         __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
4430         __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
4431         "movl    %esp, %ebp\n\t"
4432         __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
4433         "subl    $12, %esp\n\t"     /* sizeof(LONGLONG) + 2*sizeof(WORD) */
4434         "fnstcw  (%esp)\n\t"
4435         "mov     (%esp), %ax\n\t"
4436         "or      $0xc00, %ax\n\t"
4437         "mov     %ax, 2(%esp)\n\t"
4438         "fldcw   2(%esp)\n\t"
4439         "fistpq  4(%esp)\n\t"
4440         "fldcw   (%esp)\n\t"
4441         "movl    4(%esp), %eax\n\t"
4442         "movl    8(%esp), %edx\n\t"
4443         "leave\n\t"
4444         __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
4445         __ASM_CFI(".cfi_same_value %ebp\n\t")
4446         "ret")
4447
4448 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */
4449
4450 /*********************************************************************
4451  *              _fpclass (MSVCRT.@)
4452  */
4453 int CDECL _fpclass(double num)
4454 {
4455     union { double f; UINT64 i; } u = { num };
4456     int e = u.i >> 52 & 0x7ff;
4457     int s = u.i >> 63;
4458
4459     switch (e)
4460     {
4461     case 0:
4462         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
4463         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
4464     case 0x7ff:
4465         if (u.i << 12) return ((u.i >> 51) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
4466         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
4467     default:
4468         return s ? _FPCLASS_NN : _FPCLASS_PN;
4469     }
4470 }
4471
4472 /*********************************************************************
4473  *              _rotl (MSVCRT.@)
4474  */
4475 unsigned int CDECL MSVCRT__rotl(unsigned int num, int shift)
4476 {
4477   shift &= 31;
4478   return (num << shift) | (num >> (32-shift));
4479 }
4480
4481 /*********************************************************************
4482  *              _lrotl (MSVCRT.@)
4483  */
4484 __msvcrt_ulong CDECL MSVCRT__lrotl(__msvcrt_ulong num, int shift)
4485 {
4486   shift &= 0x1f;
4487   return (num << shift) | (num >> (32-shift));
4488 }
4489
4490 /*********************************************************************
4491  *              _lrotr (MSVCRT.@)
4492  */
4493 __msvcrt_ulong CDECL MSVCRT__lrotr(__msvcrt_ulong num, int shift)
4494 {
4495   shift &= 0x1f;
4496   return (num >> shift) | (num << (32-shift));
4497 }
4498
4499 /*********************************************************************
4500  *              _rotr (MSVCRT.@)
4501  */
4502 unsigned int CDECL MSVCRT__rotr(unsigned int num, int shift)
4503 {
4504     shift &= 0x1f;
4505     return (num >> shift) | (num << (32-shift));
4506 }
4507
4508 /*********************************************************************
4509  *              _rotl64 (MSVCRT.@)
4510  */
4511 unsigned __int64 CDECL MSVCRT__rotl64(unsigned __int64 num, int shift)
4512 {
4513   shift &= 63;
4514   return (num << shift) | (num >> (64-shift));
4515 }
4516
4517 /*********************************************************************
4518  *              _rotr64 (MSVCRT.@)
4519  */
4520 unsigned __int64 CDECL MSVCRT__rotr64(unsigned __int64 num, int shift)
4521 {
4522     shift &= 63;
4523     return (num >> shift) | (num << (64-shift));
4524 }
4525
4526 /*********************************************************************
4527  *              abs (MSVCRT.@)
4528  */
4529 int CDECL abs( int n )
4530 {
4531     return n >= 0 ? n : -n;
4532 }
4533
4534 /*********************************************************************
4535  *              labs (MSVCRT.@)
4536  */
4537 __msvcrt_long CDECL labs( __msvcrt_long n )
4538 {
4539     return n >= 0 ? n : -n;
4540 }
4541
4542 #if _MSVCR_VER>=100
4543 /*********************************************************************
4544  *              llabs (MSVCR100.@)
4545  */
4546 __int64 CDECL llabs( __int64 n )
4547 {
4548     return n >= 0 ? n : -n;
4549 }
4550 #endif
4551
4552 #if _MSVCR_VER>=120
4553 /*********************************************************************
4554  *              imaxabs (MSVCR120.@)
4555  */
4556 intmax_t CDECL imaxabs( intmax_t n )
4557 {
4558     return n >= 0 ? n : -n;
4559 }
4560 #endif
4561
4562 /*********************************************************************
4563  *              _abs64 (MSVCRT.@)
4564  */
4565 __int64 CDECL _abs64( __int64 n )
4566 {
4567     return n >= 0 ? n : -n;
4568 }
4569
4570 /* Copied from musl: src/math/ilogb.c */
4571 static int __ilogb(double x)
4572 {
4573     union { double f; UINT64 i; } u = { x };
4574     int e = u.i >> 52 & 0x7ff;
4575
4576     if (!e)
4577     {
4578         u.i <<= 12;
4579         if (u.i == 0) return FP_ILOGB0;
4580         /* subnormal x */
4581         for (e = -0x3ff; u.i >> 63 == 0; e--, u.i <<= 1);
4582         return e;
4583     }
4584     if (e == 0x7ff) return u.i << 12 ? FP_ILOGBNAN : INT_MAX;
4585     return e - 0x3ff;
4586 }
4587
4588 /*********************************************************************
4589  *              _logb (MSVCRT.@)
4590  *
4591  * Copied from musl: src/math/logb.c
4592  */
4593 double CDECL _logb(double x)
4594 {
4595     if (!isfinite(x))
4596         return x * x;
4597     if (x == 0)
4598         return math_error(_SING, "_logb", x, 0, -1 / (x * x));
4599     return __ilogb(x);
4600 }
4601
4602 static void sq(double *hi, double *lo, double x)
4603 {
4604     double xh, xl, xc;
4605
4606     xc = x * (0x1p27 + 1);
4607     xh = x - xc + xc;
4608     xl = x - xh;
4609     *hi = x * x;
4610     *lo = xh * xh - *hi + 2 * xh * xl + xl * xl;
4611 }
4612
4613 /*********************************************************************
4614  *              _hypot (MSVCRT.@)
4615  *
4616  * Copied from musl: src/math/hypot.c
4617  */
4618 double CDECL _hypot(double x, double y)
4619 {
4620     UINT64 ux = *(UINT64*)&x, uy = *(UINT64*)&y, ut;
4621     double hx, lx, hy, ly, z;
4622     int ex, ey;
4623
4624     /* arrange |x| >= |y| */
4625     ux &= -1ULL >> 1;
4626     uy &= -1ULL >> 1;
4627     if (ux < uy) {
4628         ut = ux;
4629         ux = uy;
4630         uy = ut;
4631     }
4632
4633     /* special cases */
4634     ex = ux >> 52;
4635     ey = uy >> 52;
4636     x = *(double*)&ux;
4637     y = *(double*)&uy;
4638     /* note: hypot(inf,nan) == inf */
4639     if (ey == 0x7ff)
4640         return y;
4641     if (ex == 0x7ff || uy == 0)
4642         return x;
4643     /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
4644     /* 64 difference is enough for ld80 double_t */
4645     if (ex - ey > 64)
4646         return x + y;
4647
4648     /* precise sqrt argument in nearest rounding mode without overflow */
4649     /* xh*xh must not overflow and xl*xl must not underflow in sq */
4650     z = 1;
4651     if (ex > 0x3ff + 510) {
4652         z = 0x1p700;
4653         x *= 0x1p-700;
4654         y *= 0x1p-700;
4655     } else if (ey < 0x3ff - 450) {
4656         z = 0x1p-700;
4657         x *= 0x1p700;
4658         y *= 0x1p700;
4659     }
4660     sq(&hx, &lx, x);
4661     sq(&hy, &ly, y);
4662     return z * sqrt(ly + lx + hy + hx);
4663 }
4664
4665 /*********************************************************************
4666  *      _hypotf (MSVCRT.@)
4667  *
4668  * Copied from musl: src/math/hypotf.c
4669  */
4670 float CDECL _hypotf(float x, float y)
4671 {
4672     UINT32 ux = *(UINT32*)&x, uy = *(UINT32*)&y, ut;
4673     float z;
4674
4675     ux &= -1U >> 1;
4676     uy &= -1U >> 1;
4677     if (ux < uy) {
4678         ut = ux;
4679         ux = uy;
4680         uy = ut;
4681     }
4682
4683     x = *(float*)&ux;
4684     y = *(float*)&uy;
4685     if (uy == 0xff << 23)
4686         return y;
4687     if (ux >= 0xff << 23 || uy == 0 || ux - uy >= 25 << 23)
4688         return x + y;
4689
4690     z = 1;
4691     if (ux >= (0x7f + 60) << 23) {
4692         z = 0x1p90f;
4693         x *= 0x1p-90f;
4694         y *= 0x1p-90f;
4695     } else if (uy < (0x7f - 60) << 23) {
4696         z = 0x1p-90f;
4697         x *= 0x1p90f;
4698         y *= 0x1p90f;
4699     }
4700     return z * sqrtf((double)x * x + (double)y * y);
4701 }
4702
4703 /*********************************************************************
4704  *              ceil (MSVCRT.@)
4705  *
4706  * Based on musl: src/math/ceilf.c
4707  */
4708 double CDECL ceil( double x )
4709 {
4710     union {double f; UINT64 i;} u = {x};
4711     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
4712     UINT64 m;
4713
4714     if (e >= 52)
4715         return x;
4716     if (e >= 0) {
4717         m = 0x000fffffffffffffULL >> e;
4718         if ((u.i & m) == 0)
4719             return x;
4720         if (u.i >> 63 == 0)
4721             u.i += m;
4722         u.i &= ~m;
4723     } else {
4724         if (u.i >> 63)
4725             return -0.0;
4726         else if (u.i << 1)
4727             return 1.0;
4728     }
4729     return u.f;
4730 }
4731
4732 /*********************************************************************
4733  *              floor (MSVCRT.@)
4734  *
4735  * Based on musl: src/math/floorf.c
4736  */
4737 double CDECL floor( double x )
4738 {
4739     union {double f; UINT64 i;} u = {x};
4740     int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff;
4741     UINT64 m;
4742
4743     if (e >= 52)
4744         return x;
4745     if (e >= 0) {
4746         m = 0x000fffffffffffffULL >> e;
4747         if ((u.i & m) == 0)
4748             return x;
4749         if (u.i >> 63)
4750             u.i += m;
4751         u.i &= ~m;
4752     } else {
4753         if (u.i >> 63 == 0)
4754             return 0;
4755         else if (u.i << 1)
4756             return -1;
4757     }
4758     return u.f;
4759 }
4760
4761 /*********************************************************************
4762  *      fma (MSVCRT.@)
4763  *
4764  * Copied from musl: src/math/fma.c
4765  */
4766 struct fma_num
4767 {
4768     UINT64 m;
4769     int e;
4770     int sign;
4771 };
4772
4773 static struct fma_num normalize(double x)
4774 {
4775     UINT64 ix = *(UINT64*)&x;
4776     int e = ix >> 52;
4777     int sign = e & 0x800;
4778     struct fma_num ret;
4779
4780     e &= 0x7ff;
4781     if (!e) {
4782         x *= 0x1p63;
4783         ix = *(UINT64*)&x;
4784         e = ix >> 52 & 0x7ff;
4785         e = e ? e - 63 : 0x800;
4786     }
4787     ix &= (1ull << 52) - 1;
4788     ix |= 1ull << 52;
4789     ix <<= 1;
4790     e -= 0x3ff + 52 + 1;
4791
4792     ret.m = ix;
4793     ret.e = e;
4794     ret.sign = sign;
4795     return ret;
4796 }
4797
4798 static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y)
4799 {
4800     UINT64 t1, t2, t3;
4801     UINT64 xlo = (UINT32)x, xhi = x >> 32;
4802     UINT64 ylo = (UINT32)y, yhi = y >> 32;
4803
4804     t1 = xlo * ylo;
4805     t2 = xlo * yhi + xhi * ylo;
4806     t3 = xhi * yhi;
4807     *lo = t1 + (t2 << 32);
4808     *hi = t3 + (t2 >> 32) + (t1 > *lo);
4809 }
4810
4811 double CDECL fma( double x, double y, double z )
4812 {
4813     int e, d, sign, samesign, nonzero;
4814     UINT64 rhi, rlo, zhi, zlo;
4815     struct fma_num nx, ny, nz;
4816     double r;
4817     INT64 i;
4818
4819     /* normalize so top 10bits and last bit are 0 */
4820     nx = normalize(x);
4821     ny = normalize(y);
4822     nz = normalize(z);
4823
4824     if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) {
4825         r = x * y + z;
4826         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM;
4827         return r;
4828     }
4829     if (nz.e >= 0x7ff - 0x3ff - 52 - 1) {
4830         if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */
4831             r = x * y + z;
4832             if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM;
4833             return r;
4834         }
4835         return z;
4836     }
4837
4838     /* mul: r = x*y */
4839     mul(&rhi, &rlo, nx.m, ny.m);
4840     /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
4841
4842     /* align exponents */
4843     e = nx.e + ny.e;
4844     d = nz.e - e;
4845     /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
4846     if (d > 0) {
4847         if (d < 64) {
4848             zlo = nz.m << d;
4849             zhi = nz.m >> (64 - d);
4850         } else {
4851             zlo = 0;
4852             zhi = nz.m;
4853             e = nz.e - 64;
4854             d -= 64;
4855             if (d < 64 && d) {
4856                 rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d));
4857                 rhi = rhi >> d;
4858             } else if (d) {
4859                 rlo = 1;
4860                 rhi = 0;
4861             }
4862         }
4863     } else {
4864         zhi = 0;
4865         d = -d;
4866         if (d == 0) {
4867             zlo = nz.m;
4868         } else if (d < 64) {
4869             zlo = nz.m >> d | !!(nz.m << (64 - d));
4870         } else {
4871             zlo = 1;
4872         }
4873     }
4874
4875     /* add */
4876     sign = nx.sign ^ ny.sign;
4877     samesign = !(sign ^ nz.sign);
4878     nonzero = 1;
4879     if (samesign) {
4880         /* r += z */
4881         rlo += zlo;
4882         rhi += zhi + (rlo < zlo);
4883     } else {
4884         /* r -= z */
4885         UINT64 t = rlo;
4886         rlo -= zlo;
4887         rhi = rhi - zhi - (t < rlo);
4888         if (rhi >> 63) {
4889             rlo = -rlo;
4890             rhi = -rhi - !!rlo;
4891             sign = !sign;
4892         }
4893         nonzero = !!rhi;
4894     }
4895
4896     /* set rhi to top 63bit of the result (last bit is sticky) */
4897     if (nonzero) {
4898         e += 64;
4899         if (rhi >> 32) {
4900             BitScanReverse((DWORD*)&d, rhi >> 32);
4901             d = 31 - d - 1;
4902         } else {
4903             BitScanReverse((DWORD*)&d, rhi);
4904             d = 63 - d - 1;
4905         }
4906         /* note: d > 0 */
4907         rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d);
4908     } else if (rlo) {
4909         if (rlo >> 32) {
4910             BitScanReverse((DWORD*)&d, rlo >> 32);
4911             d = 31 - d - 1;
4912         } else {
4913             BitScanReverse((DWORD*)&d, rlo);
4914             d = 63 - d - 1;
4915         }
4916         if (d < 0)
4917             rhi = rlo >> 1 | (rlo & 1);
4918         else
4919             rhi = rlo << d;
4920     } else {
4921         /* exact +-0 */
4922         return x * y + z;
4923     }
4924     e -= d;
4925
4926     /* convert to double */
4927     i = rhi; /* i is in [1<<62,(1<<63)-1] */
4928     if (sign)
4929         i = -i;
4930     r = i; /* |r| is in [0x1p62,0x1p63] */
4931
4932     if (e < -1022 - 62) {
4933         /* result is subnormal before rounding */
4934         if (e == -1022 - 63) {
4935             double c = 0x1p63;
4936             if (sign)
4937                 c = -c;
4938             if (r == c) {
4939                 /* min normal after rounding, underflow depends
4940                    on arch behaviour which can be imitated by
4941                    a double to float conversion */
4942                 float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r;
4943                 return DBL_MIN / FLT_MIN * fltmin;
4944             }
4945             /* one bit is lost when scaled, add another top bit to
4946                only round once at conversion if it is inexact */
4947             if (rhi << 53) {
4948                 double tiny;
4949
4950                 i = rhi >> 1 | (rhi & 1) | 1ull << 62;
4951                 if (sign)
4952                     i = -i;
4953                 r = i;
4954                 r = 2 * r - c; /* remove top bit */
4955
4956                 /* raise underflow portably, such that it
4957                    cannot be optimized away */
4958                 tiny = DBL_MIN / FLT_MIN * r;
4959                 r += (double)(tiny * tiny) * (r - r);
4960             }
4961         } else {
4962             /* only round once when scaled */
4963             d = 10;
4964             i = (rhi >> d | !!(rhi << (64 - d))) << d;
4965             if (sign)
4966                 i = -i;
4967             r = i;
4968         }
4969     }
4970     return __scalbn(r, e);
4971 }
4972
4973 /*********************************************************************
4974  *      fmaf (MSVCRT.@)
4975  *
4976  * Copied from musl: src/math/fmaf.c
4977  */
4978 float CDECL fmaf( float x, float y, float z )
4979 {
4980     union { double f; UINT64 i; } u;
4981     double xy, err;
4982     int e, neg;
4983
4984     xy = (double)x * y;
4985     u.f = xy + z;
4986     e = u.i>>52 & 0x7ff;
4987     /* Common case: The double precision result is fine. */
4988     if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
4989             e == 0x7ff || /* NaN */
4990             (u.f - xy == z && u.f - z == xy) || /* exact */
4991             (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */
4992     {
4993         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM;
4994
4995         /* underflow may not be raised correctly, example:
4996            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */
4997         if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT)
4998             fp_barrierf((float)u.f * (float)u.f);
4999         return u.f;
5000     }
5001
5002     /*
5003      * If result is inexact, and exactly halfway between two float values,
5004      * we need to adjust the low-order bit in the direction of the error.
5005      */
5006     neg = u.i >> 63;
5007     if (neg == (z > xy))
5008         err = xy - u.f + z;
5009     else
5010         err = z - u.f + xy;
5011     if (neg == (err < 0))
5012         u.i++;
5013     else
5014         u.i--;
5015     return u.f;
5016 }
5017
5018 #if defined(__i386__) || defined(__x86_64__)
5019 static void _setfp_sse( unsigned int *cw, unsigned int cw_mask,
5020         unsigned int *sw, unsigned int sw_mask )
5021 {
5022 #if defined(__GNUC__) || defined(__clang__)
5023     unsigned long old_fpword, fpword;
5024     unsigned int flags;
5025
5026     __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5027     old_fpword = fpword;
5028
5029     cw_mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5030     sw_mask &= _MCW_EM;
5031
5032     if (sw)
5033     {
5034         flags = 0;
5035         if (fpword & 0x1) flags |= _SW_INVALID;
5036         if (fpword & 0x2) flags |= _SW_DENORMAL;
5037         if (fpword & 0x4) flags |= _SW_ZERODIVIDE;
5038         if (fpword & 0x8) flags |= _SW_OVERFLOW;
5039         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5040         if (fpword & 0x20) flags |= _SW_INEXACT;
5041
5042         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5043         TRACE("sse2 update sw %08x to %08x\n", flags, *sw);
5044         fpword &= ~0x3f;
5045         if (*sw & _SW_INVALID) fpword |= 0x1;
5046         if (*sw & _SW_DENORMAL) fpword |= 0x2;
5047         if (*sw & _SW_ZERODIVIDE) fpword |= 0x4;
5048         if (*sw & _SW_OVERFLOW) fpword |= 0x8;
5049         if (*sw & _SW_UNDERFLOW) fpword |= 0x10;
5050         if (*sw & _SW_INEXACT) fpword |= 0x20;
5051         *sw = flags;
5052     }
5053
5054     if (cw)
5055     {
5056         flags = 0;
5057         if (fpword & 0x80) flags |= _EM_INVALID;
5058         if (fpword & 0x100) flags |= _EM_DENORMAL;
5059         if (fpword & 0x200) flags |= _EM_ZERODIVIDE;
5060         if (fpword & 0x400) flags |= _EM_OVERFLOW;
5061         if (fpword & 0x800) flags |= _EM_UNDERFLOW;
5062         if (fpword & 0x1000) flags |= _EM_INEXACT;
5063         switch (fpword & 0x6000)
5064         {
5065         case 0x6000: flags |= _RC_UP|_RC_DOWN; break;
5066         case 0x4000: flags |= _RC_UP; break;
5067         case 0x2000: flags |= _RC_DOWN; break;
5068         }
5069         switch (fpword & 0x8040)
5070         {
5071         case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5072         case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5073         case 0x8040: flags |= _DN_FLUSH; break;
5074         }
5075
5076         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5077         TRACE("sse2 update cw %08x to %08x\n", flags, *cw);
5078         fpword &= ~0xffc0;
5079         if (*cw & _EM_INVALID) fpword |= 0x80;
5080         if (*cw & _EM_DENORMAL) fpword |= 0x100;
5081         if (*cw & _EM_ZERODIVIDE) fpword |= 0x200;
5082         if (*cw & _EM_OVERFLOW) fpword |= 0x400;
5083         if (*cw & _EM_UNDERFLOW) fpword |= 0x800;
5084         if (*cw & _EM_INEXACT) fpword |= 0x1000;
5085         switch (*cw & _MCW_RC)
5086         {
5087         case _RC_UP|_RC_DOWN: fpword |= 0x6000; break;
5088         case _RC_UP: fpword |= 0x4000; break;
5089         case _RC_DOWN: fpword |= 0x2000; break;
5090         }
5091         switch (*cw & _MCW_DN)
5092         {
5093         case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5094         case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5095         case _DN_FLUSH: fpword |= 0x8040; break;
5096         }
5097
5098         /* clear status word if anything changes */
5099         if (fpword != old_fpword && !sw)
5100         {
5101             TRACE("sse2 clear status word\n");
5102             fpword &= ~0x3f;
5103         }
5104     }
5105
5106     if (fpword != old_fpword)
5107         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5108 #else
5109     FIXME("not implemented\n");
5110     if (cw) *cw = 0;
5111     if (sw) *sw = 0;
5112 #endif
5113 }
5114 #endif
5115
5116 static void _setfp( unsigned int *cw, unsigned int cw_mask,
5117         unsigned int *sw, unsigned int sw_mask )
5118 {
5119 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
5120     unsigned long oldcw = 0, newcw = 0;
5121     unsigned long oldsw = 0, newsw = 0;
5122     unsigned int flags;
5123
5124     cw_mask &= _MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC;
5125     sw_mask &= _MCW_EM;
5126
5127     if (sw)
5128     {
5129         __asm__ __volatile__( "fstsw %0" : "=m" (newsw) );
5130         oldsw = newsw;
5131
5132         flags = 0;
5133         if (newsw & 0x1) flags |= _SW_INVALID;
5134         if (newsw & 0x2) flags |= _SW_DENORMAL;
5135         if (newsw & 0x4) flags |= _SW_ZERODIVIDE;
5136         if (newsw & 0x8) flags |= _SW_OVERFLOW;
5137         if (newsw & 0x10) flags |= _SW_UNDERFLOW;
5138         if (newsw & 0x20) flags |= _SW_INEXACT;
5139
5140         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5141         TRACE("x86 update sw %08x to %08x\n", flags, *sw);
5142         newsw &= ~0x3f;
5143         if (*sw & _SW_INVALID) newsw |= 0x1;
5144         if (*sw & _SW_DENORMAL) newsw |= 0x2;
5145         if (*sw & _SW_ZERODIVIDE) newsw |= 0x4;
5146         if (*sw & _SW_OVERFLOW) newsw |= 0x8;
5147         if (*sw & _SW_UNDERFLOW) newsw |= 0x10;
5148         if (*sw & _SW_INEXACT) newsw |= 0x20;
5149         *sw = flags;
5150     }
5151
5152     if (cw)
5153     {
5154         __asm__ __volatile__( "fstcw %0" : "=m" (newcw) );
5155         oldcw = newcw;
5156
5157         flags = 0;
5158         if (newcw & 0x1) flags |= _EM_INVALID;
5159         if (newcw & 0x2) flags |= _EM_DENORMAL;
5160         if (newcw & 0x4) flags |= _EM_ZERODIVIDE;
5161         if (newcw & 0x8) flags |= _EM_OVERFLOW;
5162         if (newcw & 0x10) flags |= _EM_UNDERFLOW;
5163         if (newcw & 0x20) flags |= _EM_INEXACT;
5164         switch (newcw & 0xc00)
5165         {
5166         case 0xc00: flags |= _RC_UP|_RC_DOWN; break;
5167         case 0x800: flags |= _RC_UP; break;
5168         case 0x400: flags |= _RC_DOWN; break;
5169         }
5170         switch (newcw & 0x300)
5171         {
5172         case 0x0: flags |= _PC_24; break;
5173         case 0x200: flags |= _PC_53; break;
5174         case 0x300: flags |= _PC_64; break;
5175         }
5176         if (newcw & 0x1000) flags |= _IC_AFFINE;
5177
5178         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5179         TRACE("x86 update cw %08x to %08x\n", flags, *cw);
5180         newcw &= ~0x1f3f;
5181         if (*cw & _EM_INVALID) newcw |= 0x1;
5182         if (*cw & _EM_DENORMAL) newcw |= 0x2;
5183         if (*cw & _EM_ZERODIVIDE) newcw |= 0x4;
5184         if (*cw & _EM_OVERFLOW) newcw |= 0x8;
5185         if (*cw & _EM_UNDERFLOW) newcw |= 0x10;
5186         if (*cw & _EM_INEXACT) newcw |= 0x20;
5187         switch (*cw & _MCW_RC)
5188         {
5189         case _RC_UP|_RC_DOWN: newcw |= 0xc00; break;
5190         case _RC_UP: newcw |= 0x800; break;
5191         case _RC_DOWN: newcw |= 0x400; break;
5192         }
5193         switch (*cw & _MCW_PC)
5194         {
5195         case _PC_64: newcw |= 0x300; break;
5196         case _PC_53: newcw |= 0x200; break;
5197         case _PC_24: newcw |= 0x0; break;
5198         }
5199         if (*cw & _IC_AFFINE) newcw |= 0x1000;
5200     }
5201
5202     if (oldsw != newsw && (newsw & 0x3f))
5203     {
5204         struct {
5205             WORD control_word;
5206             WORD unused1;
5207             WORD status_word;
5208             WORD unused2;
5209             WORD tag_word;
5210             WORD unused3;
5211             DWORD instruction_pointer;
5212             WORD code_segment;
5213             WORD unused4;
5214             DWORD operand_addr;
5215             WORD data_segment;
5216             WORD unused5;
5217         } fenv;
5218
5219         assert(cw);
5220
5221         __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) );
5222         fenv.control_word = newcw;
5223         fenv.status_word = newsw;
5224         __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)",
5225                 "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
5226         return;
5227     }
5228
5229     if (oldsw != newsw)
5230         __asm__ __volatile__( "fnclex" );
5231     if (oldcw != newcw)
5232         __asm__ __volatile__( "fldcw %0" : : "m" (newcw) );
5233 #elif defined(__x86_64__)
5234     _setfp_sse(cw, cw_mask, sw, sw_mask);
5235 #elif defined(__aarch64__)
5236     ULONG_PTR old_fpsr = 0, fpsr = 0, old_fpcr = 0, fpcr = 0;
5237     unsigned int flags;
5238
5239     cw_mask &= _MCW_EM | _MCW_RC;
5240     sw_mask &= _MCW_EM;
5241
5242     if (sw)
5243     {
5244         __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5245         old_fpsr = fpsr;
5246
5247         flags = 0;
5248         if (fpsr & 0x1) flags |= _SW_INVALID;
5249         if (fpsr & 0x2) flags |= _SW_ZERODIVIDE;
5250         if (fpsr & 0x4) flags |= _SW_OVERFLOW;
5251         if (fpsr & 0x8) flags |= _SW_UNDERFLOW;
5252         if (fpsr & 0x10) flags |= _SW_INEXACT;
5253         if (fpsr & 0x80) flags |= _SW_DENORMAL;
5254
5255         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5256         TRACE("aarch64 update sw %08x to %08x\n", flags, *sw);
5257         fpsr &= ~0x9f;
5258         if (*sw & _SW_INVALID) fpsr |= 0x1;
5259         if (*sw & _SW_ZERODIVIDE) fpsr |= 0x2;
5260         if (*sw & _SW_OVERFLOW) fpsr |= 0x4;
5261         if (*sw & _SW_UNDERFLOW) fpsr |= 0x8;
5262         if (*sw & _SW_INEXACT) fpsr |= 0x10;
5263         if (*sw & _SW_DENORMAL) fpsr |= 0x80;
5264         *sw = flags;
5265     }
5266
5267     if (cw)
5268     {
5269         __asm__ __volatile__( "mrs %0, fpcr" : "=r" (fpcr) );
5270         old_fpcr = fpcr;
5271
5272         flags = 0;
5273         if (!(fpcr & 0x100)) flags |= _EM_INVALID;
5274         if (!(fpcr & 0x200)) flags |= _EM_ZERODIVIDE;
5275         if (!(fpcr & 0x400)) flags |= _EM_OVERFLOW;
5276         if (!(fpcr & 0x800)) flags |= _EM_UNDERFLOW;
5277         if (!(fpcr & 0x1000)) flags |= _EM_INEXACT;
5278         if (!(fpcr & 0x8000)) flags |= _EM_DENORMAL;
5279         switch (fpcr & 0xc00000)
5280         {
5281         case 0x400000: flags |= _RC_UP; break;
5282         case 0x800000: flags |= _RC_DOWN; break;
5283         case 0xc00000: flags |= _RC_CHOP; break;
5284         }
5285
5286         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5287         TRACE("aarch64 update cw %08x to %08x\n", flags, *cw);
5288         fpcr &= ~0xc09f00ul;
5289         if (!(*cw & _EM_INVALID)) fpcr |= 0x100;
5290         if (!(*cw & _EM_ZERODIVIDE)) fpcr |= 0x200;
5291         if (!(*cw & _EM_OVERFLOW)) fpcr |= 0x400;
5292         if (!(*cw & _EM_UNDERFLOW)) fpcr |= 0x800;
5293         if (!(*cw & _EM_INEXACT)) fpcr |= 0x1000;
5294         if (!(*cw & _EM_DENORMAL)) fpcr |= 0x8000;
5295         switch (*cw & _MCW_RC)
5296         {
5297         case _RC_CHOP: fpcr |= 0xc00000; break;
5298         case _RC_UP: fpcr |= 0x400000; break;
5299         case _RC_DOWN: fpcr |= 0x800000; break;
5300         }
5301     }
5302
5303     /* mask exceptions if needed */
5304     if (old_fpcr != fpcr && ~(old_fpcr >> 8) & fpsr & 0x9f != fpsr & 0x9f)
5305     {
5306         ULONG_PTR mask = fpcr & ~0x9f00;
5307         __asm__ __volatile__( "msr fpcr, %0" :: "r" (mask) );
5308     }
5309
5310     if (old_fpsr != fpsr)
5311         __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
5312     if (old_fpcr != fpcr)
5313         __asm__ __volatile__( "msr fpcr, %0" :: "r" (fpcr) );
5314 #elif defined(__arm__) && !defined(__SOFTFP__)
5315     DWORD old_fpscr, fpscr;
5316     unsigned int flags;
5317
5318     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
5319     old_fpscr = fpscr;
5320
5321     cw_mask &= _MCW_EM | _MCW_RC;
5322     sw_mask &= _MCW_EM;
5323
5324     if (sw)
5325     {
5326         flags = 0;
5327         if (fpscr & 0x1) flags |= _SW_INVALID;
5328         if (fpscr & 0x2) flags |= _SW_ZERODIVIDE;
5329         if (fpscr & 0x4) flags |= _SW_OVERFLOW;
5330         if (fpscr & 0x8) flags |= _SW_UNDERFLOW;
5331         if (fpscr & 0x10) flags |= _SW_INEXACT;
5332         if (fpscr & 0x80) flags |= _SW_DENORMAL;
5333
5334         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5335         TRACE("arm update sw %08x to %08x\n", flags, *sw);
5336         fpscr &= ~0x9f;
5337         if (*sw & _SW_INVALID) fpscr |= 0x1;
5338         if (*sw & _SW_ZERODIVIDE) fpscr |= 0x2;
5339         if (*sw & _SW_OVERFLOW) fpscr |= 0x4;
5340         if (*sw & _SW_UNDERFLOW) fpscr |= 0x8;
5341         if (*sw & _SW_INEXACT) fpscr |= 0x10;
5342         if (*sw & _SW_DENORMAL) fpscr |= 0x80;
5343         *sw = flags;
5344     }
5345
5346     if (cw)
5347     {
5348         flags = 0;
5349         if (!(fpscr & 0x100)) flags |= _EM_INVALID;
5350         if (!(fpscr & 0x200)) flags |= _EM_ZERODIVIDE;
5351         if (!(fpscr & 0x400)) flags |= _EM_OVERFLOW;
5352         if (!(fpscr & 0x800)) flags |= _EM_UNDERFLOW;
5353         if (!(fpscr & 0x1000)) flags |= _EM_INEXACT;
5354         if (!(fpscr & 0x8000)) flags |= _EM_DENORMAL;
5355         switch (fpscr & 0xc00000)
5356         {
5357         case 0x400000: flags |= _RC_UP; break;
5358         case 0x800000: flags |= _RC_DOWN; break;
5359         case 0xc00000: flags |= _RC_CHOP; break;
5360         }
5361
5362         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5363         TRACE("arm update cw %08x to %08x\n", flags, *cw);
5364         fpscr &= ~0xc09f00ul;
5365         if (!(*cw & _EM_INVALID)) fpscr |= 0x100;
5366         if (!(*cw & _EM_ZERODIVIDE)) fpscr |= 0x200;
5367         if (!(*cw & _EM_OVERFLOW)) fpscr |= 0x400;
5368         if (!(*cw & _EM_UNDERFLOW)) fpscr |= 0x800;
5369         if (!(*cw & _EM_INEXACT)) fpscr |= 0x1000;
5370         if (!(*cw & _EM_DENORMAL)) fpscr |= 0x8000;
5371         switch (*cw & _MCW_RC)
5372         {
5373         case _RC_CHOP: fpscr |= 0xc00000; break;
5374         case _RC_UP: fpscr |= 0x400000; break;
5375         case _RC_DOWN: fpscr |= 0x800000; break;
5376         }
5377     }
5378
5379     if (old_fpscr != fpscr)
5380         __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
5381 #else
5382     FIXME("not implemented\n");
5383     if (cw) *cw = 0;
5384     if (sw) *sw = 0;
5385 #endif
5386 }
5387
5388 /**********************************************************************
5389  *              _statusfp2 (MSVCR80.@)
5390  */
5391 #if defined(__i386__)
5392 void CDECL _statusfp2( unsigned int *x86_sw, unsigned int *sse2_sw )
5393 {
5394     if (x86_sw)
5395         _setfp(NULL, 0, x86_sw, 0);
5396     if (!sse2_sw) return;
5397     if (sse2_supported)
5398         _setfp_sse(NULL, 0, sse2_sw, 0);
5399     else *sse2_sw = 0;
5400 }
5401 #endif
5402
5403 /**********************************************************************
5404  *              _statusfp (MSVCRT.@)
5405  */
5406 unsigned int CDECL _statusfp(void)
5407 {
5408     unsigned int flags = 0;
5409 #if defined(__i386__)
5410     unsigned int x86_sw, sse2_sw;
5411
5412     _statusfp2( &x86_sw, &sse2_sw );
5413     /* FIXME: there's no definition for ambiguous status, just return all status bits for now */
5414     flags = x86_sw | sse2_sw;
5415 #else
5416     _setfp(NULL, 0, &flags, 0);
5417 #endif
5418     return flags;
5419 }
5420
5421 /*********************************************************************
5422  *              _clearfp (MSVCRT.@)
5423  */
5424 unsigned int CDECL _clearfp(void)
5425 {
5426     unsigned int flags = 0;
5427 #ifdef __i386__
5428     _setfp(NULL, 0, &flags, _MCW_EM);
5429     if (sse2_supported)
5430     {
5431         unsigned int sse_sw = 0;
5432
5433         _setfp_sse(NULL, 0, &sse_sw, _MCW_EM);
5434         flags |= sse_sw;
5435     }
5436 #else
5437     _setfp(NULL, 0, &flags, _MCW_EM);
5438 #endif
5439     return flags;
5440 }
5441
5442 /*********************************************************************
5443  *              __fpecode (MSVCRT.@)
5444  */
5445 int * CDECL __fpecode(void)
5446 {
5447     return &msvcrt_get_thread_data()->fpecode;
5448 }
5449
5450 /*********************************************************************
5451  *              ldexp (MSVCRT.@)
5452  */
5453 double CDECL ldexp(double num, int exp)
5454 {
5455   double z = __scalbn(num, exp);
5456
5457   if (isfinite(num) && !isfinite(z))
5458     return math_error(_OVERFLOW, "ldexp", num, exp, z);
5459   if (num && isfinite(num) && !z)
5460     return math_error(_UNDERFLOW, "ldexp", num, exp, z);
5461   return z;
5462 }
5463
5464 /*********************************************************************
5465  *              _cabs (MSVCRT.@)
5466  */
5467 double CDECL _cabs(struct _complex num)
5468 {
5469   return sqrt(num.x * num.x + num.y * num.y);
5470 }
5471
5472 /*********************************************************************
5473  *              _chgsign (MSVCRT.@)
5474  */
5475 double CDECL _chgsign(double num)
5476 {
5477     union { double f; UINT64 i; } u = { num };
5478     u.i ^= 1ull << 63;
5479     return u.f;
5480 }
5481
5482 /*********************************************************************
5483  *              __control87_2 (MSVCR80.@)
5484  *
5485  * Not exported by native msvcrt, added in msvcr80.
5486  */
5487 #ifdef __i386__
5488 int CDECL __control87_2( unsigned int newval, unsigned int mask,
5489                          unsigned int *x86_cw, unsigned int *sse2_cw )
5490 {
5491     if (x86_cw)
5492     {
5493         *x86_cw = newval;
5494         _setfp(x86_cw, mask, NULL, 0);
5495     }
5496
5497     if (!sse2_cw) return 1;
5498
5499     if (sse2_supported)
5500     {
5501         *sse2_cw = newval;
5502         _setfp_sse(sse2_cw, mask, NULL, 0);
5503     }
5504     else *sse2_cw = 0;
5505
5506     return 1;
5507 }
5508 #endif
5509
5510 /*********************************************************************
5511  *              _control87 (MSVCRT.@)
5512  */
5513 unsigned int CDECL _control87(unsigned int newval, unsigned int mask)
5514 {
5515     unsigned int flags = 0;
5516 #ifdef __i386__
5517     unsigned int sse2_cw;
5518
5519     __control87_2( newval, mask, &flags, &sse2_cw );
5520
5521     if (sse2_supported)
5522     {
5523         if ((flags ^ sse2_cw) & (_MCW_EM | _MCW_RC)) flags |= _EM_AMBIGUOUS;
5524         flags |= sse2_cw;
5525     }
5526 #else
5527     flags = newval;
5528     _setfp(&flags, mask, NULL, 0);
5529 #endif
5530     return flags;
5531 }
5532
5533 /*********************************************************************
5534  *              _controlfp (MSVCRT.@)
5535  */
5536 unsigned int CDECL _controlfp(unsigned int newval, unsigned int mask)
5537 {
5538   return _control87( newval, mask & ~_EM_DENORMAL );
5539 }
5540
5541 /*********************************************************************
5542  *              _set_controlfp (MSVCRT.@)
5543  */
5544 void CDECL _set_controlfp( unsigned int newval, unsigned int mask )
5545 {
5546     _controlfp( newval, mask );
5547 }
5548
5549 /*********************************************************************
5550  *              _controlfp_s (MSVCRT.@)
5551  */
5552 int CDECL _controlfp_s(unsigned int *cur, unsigned int newval, unsigned int mask)
5553 {
5554     static const unsigned int all_flags = (_MCW_EM | _MCW_IC | _MCW_RC |
5555                                            _MCW_PC | _MCW_DN);
5556     unsigned int val;
5557
5558     if (!MSVCRT_CHECK_PMT( !(newval & mask & ~all_flags) ))
5559     {
5560         if (cur) *cur = _controlfp( 0, 0 );  /* retrieve it anyway */
5561         return EINVAL;
5562     }
5563     val = _controlfp( newval, mask );
5564     if (cur) *cur = val;
5565     return 0;
5566 }
5567
5568 #if _MSVCR_VER >= 140 && (defined(__i386__) || defined(__x86_64__))
5569 enum fenv_masks
5570 {
5571     FENV_X_INVALID = 0x00100010,
5572     FENV_X_DENORMAL = 0x00200020,
5573     FENV_X_ZERODIVIDE = 0x00080008,
5574     FENV_X_OVERFLOW = 0x00040004,
5575     FENV_X_UNDERFLOW = 0x00020002,
5576     FENV_X_INEXACT = 0x00010001,
5577     FENV_X_AFFINE = 0x00004000,
5578     FENV_X_UP = 0x00800200,
5579     FENV_X_DOWN = 0x00400100,
5580     FENV_X_24 = 0x00002000,
5581     FENV_X_53 = 0x00001000,
5582     FENV_Y_INVALID = 0x10000010,
5583     FENV_Y_DENORMAL = 0x20000020,
5584     FENV_Y_ZERODIVIDE = 0x08000008,
5585     FENV_Y_OVERFLOW = 0x04000004,
5586     FENV_Y_UNDERFLOW = 0x02000002,
5587     FENV_Y_INEXACT = 0x01000001,
5588     FENV_Y_UP = 0x80000200,
5589     FENV_Y_DOWN = 0x40000100,
5590     FENV_Y_FLUSH = 0x00000400,
5591     FENV_Y_FLUSH_SAVE = 0x00000800
5592 };
5593
5594 /* encodes x87/sse control/status word in ulong */
5595 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5596 {
5597     __msvcrt_ulong ret = 0;
5598
5599 #ifdef __i386__
5600     if (x & _EM_INVALID) ret |= FENV_X_INVALID;
5601     if (x & _EM_DENORMAL) ret |= FENV_X_DENORMAL;
5602     if (x & _EM_ZERODIVIDE) ret |= FENV_X_ZERODIVIDE;
5603     if (x & _EM_OVERFLOW) ret |= FENV_X_OVERFLOW;
5604     if (x & _EM_UNDERFLOW) ret |= FENV_X_UNDERFLOW;
5605     if (x & _EM_INEXACT) ret |= FENV_X_INEXACT;
5606     if (x & _IC_AFFINE) ret |= FENV_X_AFFINE;
5607     if (x & _RC_UP) ret |= FENV_X_UP;
5608     if (x & _RC_DOWN) ret |= FENV_X_DOWN;
5609     if (x & _PC_24) ret |= FENV_X_24;
5610     if (x & _PC_53) ret |= FENV_X_53;
5611 #endif
5612     x &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC);
5613
5614     if (y & _EM_INVALID) ret |= FENV_Y_INVALID;
5615     if (y & _EM_DENORMAL) ret |= FENV_Y_DENORMAL;
5616     if (y & _EM_ZERODIVIDE) ret |= FENV_Y_ZERODIVIDE;
5617     if (y & _EM_OVERFLOW) ret |= FENV_Y_OVERFLOW;
5618     if (y & _EM_UNDERFLOW) ret |= FENV_Y_UNDERFLOW;
5619     if (y & _EM_INEXACT) ret |= FENV_Y_INEXACT;
5620     if (y & _RC_UP) ret |= FENV_Y_UP;
5621     if (y & _RC_DOWN) ret |= FENV_Y_DOWN;
5622     if (y & _DN_FLUSH) ret |= FENV_Y_FLUSH;
5623     if (y & _DN_FLUSH_OPERANDS_SAVE_RESULTS) ret |= FENV_Y_FLUSH_SAVE;
5624     y &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_DN);
5625
5626     if(x || y) FIXME("unsupported flags: %x, %x\n", x, y);
5627     return ret;
5628 }
5629
5630 /* decodes x87/sse control/status word, returns FALSE on error */
5631 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5632 {
5633     *x = *y = 0;
5634     if ((enc & FENV_X_INVALID) == FENV_X_INVALID) *x |= _EM_INVALID;
5635     if ((enc & FENV_X_DENORMAL) == FENV_X_DENORMAL) *x |= _EM_DENORMAL;
5636     if ((enc & FENV_X_ZERODIVIDE) == FENV_X_ZERODIVIDE) *x |= _EM_ZERODIVIDE;
5637     if ((enc & FENV_X_OVERFLOW) == FENV_X_OVERFLOW) *x |= _EM_OVERFLOW;
5638     if ((enc & FENV_X_UNDERFLOW) == FENV_X_UNDERFLOW) *x |= _EM_UNDERFLOW;
5639     if ((enc & FENV_X_INEXACT) == FENV_X_INEXACT) *x |= _EM_INEXACT;
5640     if ((enc & FENV_X_AFFINE) == FENV_X_AFFINE) *x |= _IC_AFFINE;
5641     if ((enc & FENV_X_UP) == FENV_X_UP) *x |= _RC_UP;
5642     if ((enc & FENV_X_DOWN) == FENV_X_DOWN) *x |= _RC_DOWN;
5643     if ((enc & FENV_X_24) == FENV_X_24) *x |= _PC_24;
5644     if ((enc & FENV_X_53) == FENV_X_53) *x |= _PC_53;
5645
5646     if ((enc & FENV_Y_INVALID) == FENV_Y_INVALID) *y |= _EM_INVALID;
5647     if ((enc & FENV_Y_DENORMAL) == FENV_Y_DENORMAL) *y |= _EM_DENORMAL;
5648     if ((enc & FENV_Y_ZERODIVIDE) == FENV_Y_ZERODIVIDE) *y |= _EM_ZERODIVIDE;
5649     if ((enc & FENV_Y_OVERFLOW) == FENV_Y_OVERFLOW) *y |= _EM_OVERFLOW;
5650     if ((enc & FENV_Y_UNDERFLOW) == FENV_Y_UNDERFLOW) *y |= _EM_UNDERFLOW;
5651     if ((enc & FENV_Y_INEXACT) == FENV_Y_INEXACT) *y |= _EM_INEXACT;
5652     if ((enc & FENV_Y_UP) == FENV_Y_UP) *y |= _RC_UP;
5653     if ((enc & FENV_Y_DOWN) == FENV_Y_DOWN) *y |= _RC_DOWN;
5654     if ((enc & FENV_Y_FLUSH) == FENV_Y_FLUSH) *y |= _DN_FLUSH;
5655     if ((enc & FENV_Y_FLUSH_SAVE) == FENV_Y_FLUSH_SAVE) *y |= _DN_FLUSH_OPERANDS_SAVE_RESULTS;
5656
5657     if (fenv_encode(*x, *y) != enc)
5658     {
5659         WARN("can't decode: %lx\n", enc);
5660         return FALSE;
5661     }
5662     return TRUE;
5663 }
5664 #elif _MSVCR_VER >= 120
5665 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5666 {
5667     if (y & _EM_DENORMAL)
5668         y = (y & ~_EM_DENORMAL) | 0x20;
5669
5670     return x | y;
5671 }
5672
5673 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5674 {
5675     if (enc & 0x20)
5676         enc = (enc & ~0x20) | _EM_DENORMAL;
5677
5678     *x = *y = enc;
5679     return TRUE;
5680 }
5681 #endif
5682
5683 #if _MSVCR_VER>=120
5684 /*********************************************************************
5685  *              fegetenv (MSVCR120.@)
5686  */
5687 int CDECL fegetenv(fenv_t *env)
5688 {
5689 #if _MSVCR_VER>=140 && defined(__i386__)
5690     unsigned int x87, sse;
5691     __control87_2(0, 0, &x87, &sse);
5692     env->_Fe_ctl = fenv_encode(x87, sse);
5693     _statusfp2(&x87, &sse);
5694     env->_Fe_stat = fenv_encode(x87, sse);
5695 #elif _MSVCR_VER>=140
5696     env->_Fe_ctl = fenv_encode(0, _control87(0, 0));
5697     env->_Fe_stat = fenv_encode(0, _statusfp());
5698 #else
5699     env->_Fe_ctl = _controlfp(0, 0) & (_EM_INEXACT | _EM_UNDERFLOW |
5700             _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _MCW_RC);
5701     env->_Fe_stat = _statusfp();
5702 #endif
5703     return 0;
5704 }
5705
5706 /*********************************************************************
5707  *              feupdateenv (MSVCR120.@)
5708  */
5709 int CDECL feupdateenv(const fenv_t *env)
5710 {
5711     fenv_t set;
5712     fegetenv(&set);
5713     set._Fe_ctl = env->_Fe_ctl;
5714     set._Fe_stat |= env->_Fe_stat;
5715     return fesetenv(&set);
5716 }
5717
5718 /*********************************************************************
5719  *      fetestexcept (MSVCR120.@)
5720  */
5721 int CDECL fetestexcept(int flags)
5722 {
5723     return _statusfp() & flags;
5724 }
5725
5726 /*********************************************************************
5727  *      fesetexceptflag (MSVCR120.@)
5728  */
5729 int CDECL fesetexceptflag(const fexcept_t *status, int excepts)
5730 {
5731     fenv_t env;
5732
5733     excepts &= FE_ALL_EXCEPT;
5734     if(!excepts)
5735         return 0;
5736
5737     fegetenv(&env);
5738     env._Fe_stat &= ~fenv_encode(excepts, excepts);
5739     env._Fe_stat |= *status & fenv_encode(excepts, excepts);
5740     return fesetenv(&env);
5741 }
5742
5743 /*********************************************************************
5744  *      feraiseexcept (MSVCR120.@)
5745  */
5746 int CDECL feraiseexcept(int flags)
5747 {
5748     fenv_t env;
5749
5750     flags &= FE_ALL_EXCEPT;
5751     fegetenv(&env);
5752     env._Fe_stat |= fenv_encode(flags, flags);
5753     return fesetenv(&env);
5754 }
5755
5756 /*********************************************************************
5757  *      feclearexcept (MSVCR120.@)
5758  */
5759 int CDECL feclearexcept(int flags)
5760 {
5761     fenv_t env;
5762
5763     fegetenv(&env);
5764     flags &= FE_ALL_EXCEPT;
5765     env._Fe_stat &= ~fenv_encode(flags, flags);
5766     return fesetenv(&env);
5767 }
5768
5769 /*********************************************************************
5770  *      fegetexceptflag (MSVCR120.@)
5771  */
5772 int CDECL fegetexceptflag(fexcept_t *status, int excepts)
5773 {
5774 #if _MSVCR_VER>=140 && defined(__i386__)
5775     unsigned int x87, sse;
5776     _statusfp2(&x87, &sse);
5777     *status = fenv_encode(x87 & excepts, sse & excepts);
5778 #else
5779     *status = fenv_encode(0, _statusfp() & excepts);
5780 #endif
5781     return 0;
5782 }
5783 #endif
5784
5785 #if _MSVCR_VER>=140
5786 /*********************************************************************
5787  *              __fpe_flt_rounds (UCRTBASE.@)
5788  */
5789 int CDECL __fpe_flt_rounds(void)
5790 {
5791     unsigned int fpc = _controlfp(0, 0) & _RC_CHOP;
5792
5793     TRACE("()\n");
5794
5795     switch(fpc) {
5796         case _RC_CHOP: return 0;
5797         case _RC_NEAR: return 1;
5798         case _RC_UP: return 2;
5799         default: return 3;
5800     }
5801 }
5802 #endif
5803
5804 #if _MSVCR_VER>=120
5805
5806 /*********************************************************************
5807  *              fegetround (MSVCR120.@)
5808  */
5809 int CDECL fegetround(void)
5810 {
5811     return _controlfp(0, 0) & _MCW_RC;
5812 }
5813
5814 /*********************************************************************
5815  *              fesetround (MSVCR120.@)
5816  */
5817 int CDECL fesetround(int round_mode)
5818 {
5819     if (round_mode & (~_MCW_RC))
5820         return 1;
5821     _controlfp(round_mode, _MCW_RC);
5822     return 0;
5823 }
5824
5825 #endif /* _MSVCR_VER>=120 */
5826
5827 /*********************************************************************
5828  *              _copysign (MSVCRT.@)
5829  *
5830  * Copied from musl: src/math/copysign.c
5831  */
5832 double CDECL _copysign( double x, double y )
5833 {
5834     union { double f; UINT64 i; } ux = { x }, uy = { y };
5835     ux.i &= ~0ull >> 1;
5836     ux.i |= uy.i & 1ull << 63;
5837     return ux.f;
5838 }
5839
5840 /*********************************************************************
5841  *              _finite (MSVCRT.@)
5842  */
5843 int CDECL _finite(double num)
5844 {
5845     union { double f; UINT64 i; } u = { num };
5846     return (u.i & ~0ull >> 1) < 0x7ffull << 52;
5847 }
5848
5849 /*********************************************************************
5850  *              _fpreset (MSVCRT.@)
5851  */
5852 void CDECL _fpreset(void)
5853 {
5854 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
5855     const unsigned int x86_cw = 0x27f;
5856     __asm__ __volatile__( "fninit; fldcw %0" : : "m" (x86_cw) );
5857     if (sse2_supported)
5858     {
5859         unsigned int cw = _MCW_EM, sw = 0;
5860         _setfp_sse(&cw, ~0, &sw, ~0);
5861     }
5862 #else
5863     unsigned int cw = _MCW_EM, sw = 0;
5864     _setfp(&cw, ~0, &sw, ~0);
5865 #endif
5866 }
5867
5868 #if _MSVCR_VER>=120
5869 /*********************************************************************
5870  *              fesetenv (MSVCR120.@)
5871  */
5872 int CDECL fesetenv(const fenv_t *env)
5873 {
5874     unsigned int x87_cw, cw, x87_stat, stat;
5875     unsigned int mask;
5876
5877     TRACE( "(%p)\n", env );
5878
5879     if (!env->_Fe_ctl && !env->_Fe_stat) {
5880         _fpreset();
5881         return 0;
5882     }
5883
5884     if (!fenv_decode(env->_Fe_ctl, &x87_cw, &cw))
5885         return 1;
5886     if (!fenv_decode(env->_Fe_stat, &x87_stat, &stat))
5887         return 1;
5888
5889 #if _MSVCR_VER >= 140
5890     mask = ~0;
5891 #else
5892     mask = _EM_INEXACT | _EM_UNDERFLOW | _EM_OVERFLOW
5893         | _EM_ZERODIVIDE | _EM_INVALID | _MCW_RC;
5894 #endif
5895
5896 #ifdef __i386__
5897     _setfp(&x87_cw, mask, &x87_stat, ~0);
5898     if (sse2_supported)
5899         _setfp_sse(&cw, mask, &stat, ~0);
5900     return 0;
5901 #else
5902     _setfp(&cw, mask, &stat, ~0);
5903     return 0;
5904 #endif
5905 }
5906 #endif
5907
5908 /*********************************************************************
5909  *              _isnan (MSVCRT.@)
5910  */
5911 int CDECL _isnan(double num)
5912 {
5913     union { double f; UINT64 i; } u = { num };
5914     return (u.i & ~0ull >> 1) > 0x7ffull << 52;
5915 }
5916
5917 static double pzero(double x)
5918 {
5919     static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
5920         0.00000000000000000000e+00,
5921         -7.03124999999900357484e-02,
5922         -8.08167041275349795626e+00,
5923         -2.57063105679704847262e+02,
5924         -2.48521641009428822144e+03,
5925         -5.25304380490729545272e+03,
5926     }, pS8[5] = {
5927         1.16534364619668181717e+02,
5928         3.83374475364121826715e+03,
5929         4.05978572648472545552e+04,
5930         1.16752972564375915681e+05,
5931         4.76277284146730962675e+04,
5932     }, pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
5933         -1.14125464691894502584e-11,
5934         -7.03124940873599280078e-02,
5935         -4.15961064470587782438e+00,
5936         -6.76747652265167261021e+01,
5937         -3.31231299649172967747e+02,
5938         -3.46433388365604912451e+02,
5939     }, pS5[5] = {
5940         6.07539382692300335975e+01,
5941         1.05125230595704579173e+03,
5942         5.97897094333855784498e+03,
5943         9.62544514357774460223e+03,
5944         2.40605815922939109441e+03,
5945     }, pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
5946         -2.54704601771951915620e-09,
5947         -7.03119616381481654654e-02,
5948         -2.40903221549529611423e+00,
5949         -2.19659774734883086467e+01,
5950         -5.80791704701737572236e+01,
5951         -3.14479470594888503854e+01,
5952     }, pS3[5] = {
5953         3.58560338055209726349e+01,
5954         3.61513983050303863820e+02,
5955         1.19360783792111533330e+03,
5956         1.12799679856907414432e+03,
5957         1.73580930813335754692e+02,
5958     }, pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
5959         -8.87534333032526411254e-08,
5960         -7.03030995483624743247e-02,
5961         -1.45073846780952986357e+00,
5962         -7.63569613823527770791e+00,
5963         -1.11931668860356747786e+01,
5964         -3.23364579351335335033e+00,
5965     }, pS2[5] = {
5966         2.22202997532088808441e+01,
5967         1.36206794218215208048e+02,
5968         2.70470278658083486789e+02,
5969         1.53875394208320329881e+02,
5970         1.46576176948256193810e+01,
5971     };
5972
5973     const double *p, *q;
5974     double z, r, s;
5975     UINT32 ix;
5976
5977     ix = *(ULONGLONG*)&x >> 32;
5978     ix &= 0x7fffffff;
5979     if (ix >= 0x40200000) {
5980         p = pR8;
5981         q = pS8;
5982     } else if (ix >= 0x40122E8B) {
5983         p = pR5;
5984         q = pS5;
5985     } else if (ix >= 0x4006DB6D) {
5986         p = pR3;
5987         q = pS3;
5988     } else /*ix >= 0x40000000*/ {
5989         p = pR2;
5990         q = pS2;
5991     }
5992
5993     z = 1.0 / (x * x);
5994     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
5995     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
5996     return 1.0 + r / s;
5997 }
5998
5999 static double qzero(double x)
6000 {
6001     static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6002         0.00000000000000000000e+00,
6003         7.32421874999935051953e-02,
6004         1.17682064682252693899e+01,
6005         5.57673380256401856059e+02,
6006         8.85919720756468632317e+03,
6007         3.70146267776887834771e+04,
6008     }, qS8[6] = {
6009         1.63776026895689824414e+02,
6010         8.09834494656449805916e+03,
6011         1.42538291419120476348e+05,
6012         8.03309257119514397345e+05,
6013         8.40501579819060512818e+05,
6014         -3.43899293537866615225e+05,
6015     }, qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6016         1.84085963594515531381e-11,
6017         7.32421766612684765896e-02,
6018         5.83563508962056953777e+00,
6019         1.35111577286449829671e+02,
6020         1.02724376596164097464e+03,
6021         1.98997785864605384631e+03,
6022     }, qS5[6] = {
6023         8.27766102236537761883e+01,
6024         2.07781416421392987104e+03,
6025         1.88472887785718085070e+04,
6026         5.67511122894947329769e+04,
6027         3.59767538425114471465e+04,
6028         -5.35434275601944773371e+03,
6029     }, qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6030         4.37741014089738620906e-09,
6031         7.32411180042911447163e-02,
6032         3.34423137516170720929e+00,
6033         4.26218440745412650017e+01,
6034         1.70808091340565596283e+02,
6035         1.66733948696651168575e+02,
6036     }, qS3[6] = {
6037         4.87588729724587182091e+01,
6038         7.09689221056606015736e+02,
6039         3.70414822620111362994e+03,
6040         6.46042516752568917582e+03,
6041         2.51633368920368957333e+03,
6042         -1.49247451836156386662e+02,
6043     }, qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6044         1.50444444886983272379e-07,
6045         7.32234265963079278272e-02,
6046         1.99819174093815998816e+00,
6047         1.44956029347885735348e+01,
6048         3.16662317504781540833e+01,
6049         1.62527075710929267416e+01,
6050     }, qS2[6] = {
6051         3.03655848355219184498e+01,
6052         2.69348118608049844624e+02,
6053         8.44783757595320139444e+02,
6054         8.82935845112488550512e+02,
6055         2.12666388511798828631e+02,
6056         -5.31095493882666946917e+00,
6057     };
6058
6059     const double *p, *q;
6060     double s, r, z;
6061     unsigned int ix;
6062
6063     ix = *(ULONGLONG*)&x >> 32;
6064     ix &= 0x7fffffff;
6065     if (ix >= 0x40200000) {
6066         p = qR8;
6067         q = qS8;
6068     } else if (ix >= 0x40122E8B) {
6069         p = qR5;
6070         q = qS5;
6071     } else if (ix >= 0x4006DB6D) {
6072         p = qR3;
6073         q = qS3;
6074     } else /*ix >= 0x40000000*/ {
6075         p = qR2;
6076         q = qS2;
6077     }
6078
6079     z = 1.0 / (x * x);
6080     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6081     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6082     return (-0.125 + r / s) / x;
6083 }
6084
6085 /* j0 and y0 approximation for |x|>=2 */
6086 static double j0_y0_approx(unsigned int ix, double x, BOOL y0)
6087 {
6088     static const double invsqrtpi = 5.64189583547756279280e-01;
6089
6090     double s, c, ss, cc, z;
6091
6092     s = sin(x);
6093     c = cos(x);
6094     if (y0) c = -c;
6095     cc = s + c;
6096     /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
6097     if (ix < 0x7fe00000) {
6098         ss = s - c;
6099         z = -cos(2 * x);
6100         if (s * c < 0) cc = z / ss;
6101         else ss = z / cc;
6102         if (ix < 0x48000000) {
6103             if (y0) ss = -ss;
6104             cc = pzero(x) * cc - qzero(x) * ss;
6105         }
6106     }
6107     return invsqrtpi * cc / sqrt(x);
6108 }
6109
6110 /*********************************************************************
6111  *              _j0 (MSVCRT.@)
6112  *
6113  * Copied from musl: src/math/j0.c
6114  */
6115 double CDECL _j0(double x)
6116 {
6117     static const double R02 =  1.56249999999999947958e-02,
6118             R03 = -1.89979294238854721751e-04,
6119             R04 =  1.82954049532700665670e-06,
6120             R05 = -4.61832688532103189199e-09,
6121             S01 =  1.56191029464890010492e-02,
6122             S02 =  1.16926784663337450260e-04,
6123             S03 =  5.13546550207318111446e-07,
6124             S04 =  1.16614003333790000205e-09;
6125
6126     double z, r, s;
6127     unsigned int ix;
6128
6129     ix = *(ULONGLONG*)&x >> 32;
6130     ix &= 0x7fffffff;
6131
6132     /* j0(+-inf)=0, j0(nan)=nan */
6133     if (ix >= 0x7ff00000)
6134         return math_error(_DOMAIN, "_j0", x, 0, 1 / (x * x));
6135     x = fabs(x);
6136
6137     if (ix >= 0x40000000) {  /* |x| >= 2 */
6138         /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
6139         return j0_y0_approx(ix, x, FALSE);
6140     }
6141
6142     if (ix >= 0x3f200000) {  /* |x| >= 2**-13 */
6143         /* up to 4ulp error close to 2 */
6144         z = x * x;
6145         r = z * (R02 + z * (R03 + z * (R04 + z * R05)));
6146         s = 1 + z * (S01 + z * (S02 + z * (S03 + z * S04)));
6147         return (1 + x / 2) * (1 - x / 2) + z * (r / s);
6148     }
6149
6150     /* 1 - x*x/4 */
6151     /* prevent underflow */
6152     /* inexact should be raised when x!=0, this is not done correctly */
6153     if (ix >= 0x38000000)  /* |x| >= 2**-127 */
6154         x = 0.25 * x * x;
6155     return 1 - x;
6156 }
6157
6158 static double pone(double x)
6159 {
6160     static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6161         0.00000000000000000000e+00,
6162         1.17187499999988647970e-01,
6163         1.32394806593073575129e+01,
6164         4.12051854307378562225e+02,
6165         3.87474538913960532227e+03,
6166         7.91447954031891731574e+03,
6167     }, ps8[5] = {
6168         1.14207370375678408436e+02,
6169         3.65093083420853463394e+03,
6170         3.69562060269033463555e+04,
6171         9.76027935934950801311e+04,
6172         3.08042720627888811578e+04,
6173     }, pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6174         1.31990519556243522749e-11,
6175         1.17187493190614097638e-01,
6176         6.80275127868432871736e+00,
6177         1.08308182990189109773e+02,
6178         5.17636139533199752805e+02,
6179         5.28715201363337541807e+02,
6180     }, ps5[5] = {
6181         5.92805987221131331921e+01,
6182         9.91401418733614377743e+02,
6183         5.35326695291487976647e+03,
6184         7.84469031749551231769e+03,
6185         1.50404688810361062679e+03,
6186     }, pr3[6] = {
6187         3.02503916137373618024e-09,
6188         1.17186865567253592491e-01,
6189         3.93297750033315640650e+00,
6190         3.51194035591636932736e+01,
6191         9.10550110750781271918e+01,
6192         4.85590685197364919645e+01,
6193     }, ps3[5] = {
6194         3.47913095001251519989e+01,
6195         3.36762458747825746741e+02,
6196         1.04687139975775130551e+03,
6197         8.90811346398256432622e+02,
6198         1.03787932439639277504e+02,
6199     }, pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6200         1.07710830106873743082e-07,
6201         1.17176219462683348094e-01,
6202         2.36851496667608785174e+00,
6203         1.22426109148261232917e+01,
6204         1.76939711271687727390e+01,
6205         5.07352312588818499250e+00,
6206     }, ps2[5] = {
6207         2.14364859363821409488e+01,
6208         1.25290227168402751090e+02,
6209         2.32276469057162813669e+02,
6210         1.17679373287147100768e+02,
6211         8.36463893371618283368e+00,
6212     };
6213
6214     const double *p, *q;
6215     double z, r, s;
6216     unsigned int ix;
6217
6218     ix = *(ULONGLONG*)&x >> 32;
6219     ix &= 0x7fffffff;
6220     if (ix >= 0x40200000) {
6221         p = pr8;
6222         q = ps8;
6223     } else if (ix >= 0x40122E8B) {
6224         p = pr5;
6225         q = ps5;
6226     } else if (ix >= 0x4006DB6D) {
6227         p = pr3;
6228         q = ps3;
6229     } else /*ix >= 0x40000000*/ {
6230         p = pr2;
6231         q = ps2;
6232     }
6233     z = 1.0 / (x * x);
6234     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6235     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6236     return 1.0 + r / s;
6237 }
6238
6239 static double qone(double x)
6240 {
6241     static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6242         0.00000000000000000000e+00,
6243         -1.02539062499992714161e-01,
6244         -1.62717534544589987888e+01,
6245         -7.59601722513950107896e+02,
6246         -1.18498066702429587167e+04,
6247         -4.84385124285750353010e+04,
6248     }, qs8[6] = {
6249         1.61395369700722909556e+02,
6250         7.82538599923348465381e+03,
6251         1.33875336287249578163e+05,
6252         7.19657723683240939863e+05,
6253         6.66601232617776375264e+05,
6254         -2.94490264303834643215e+05,
6255     }, qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6256         -2.08979931141764104297e-11,
6257         -1.02539050241375426231e-01,
6258         -8.05644828123936029840e+00,
6259         -1.83669607474888380239e+02,
6260         -1.37319376065508163265e+03,
6261         -2.61244440453215656817e+03,
6262     }, qs5[6] = {
6263         8.12765501384335777857e+01,
6264         1.99179873460485964642e+03,
6265         1.74684851924908907677e+04,
6266         4.98514270910352279316e+04,
6267         2.79480751638918118260e+04,
6268         -4.71918354795128470869e+03,
6269     }, qr3[6] = {
6270         -5.07831226461766561369e-09,
6271         -1.02537829820837089745e-01,
6272         -4.61011581139473403113e+00,
6273         -5.78472216562783643212e+01,
6274         -2.28244540737631695038e+02,
6275         -2.19210128478909325622e+02,
6276     }, qs3[6] = {
6277         4.76651550323729509273e+01,
6278         6.73865112676699709482e+02,
6279         3.38015286679526343505e+03,
6280         5.54772909720722782367e+03,
6281         1.90311919338810798763e+03,
6282         -1.35201191444307340817e+02,
6283     }, qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6284         -1.78381727510958865572e-07,
6285         -1.02517042607985553460e-01,
6286         -2.75220568278187460720e+00,
6287         -1.96636162643703720221e+01,
6288         -4.23253133372830490089e+01,
6289         -2.13719211703704061733e+01,
6290     }, qs2[6] = {
6291         2.95333629060523854548e+01,
6292         2.52981549982190529136e+02,
6293         7.57502834868645436472e+02,
6294         7.39393205320467245656e+02,
6295         1.55949003336666123687e+02,
6296         -4.95949898822628210127e+00,
6297     };
6298
6299     const double *p, *q;
6300     double s, r, z;
6301     unsigned int ix;
6302
6303     ix = *(ULONGLONG*)&x >> 32;
6304     ix &= 0x7fffffff;
6305     if (ix >= 0x40200000) {
6306         p = qr8;
6307         q = qs8;
6308     } else if (ix >= 0x40122E8B) {
6309         p = qr5;
6310         q = qs5;
6311     } else if (ix >= 0x4006DB6D) {
6312         p = qr3;
6313         q = qs3;
6314     } else /*ix >= 0x40000000*/ {
6315         p = qr2;
6316         q = qs2;
6317     }
6318     z = 1.0 / (x * x);
6319     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6320     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6321     return (0.375 + r / s) / x;
6322 }
6323
6324 static double j1_y1_approx(unsigned int ix, double x, BOOL y1, int sign)
6325 {
6326     static const double invsqrtpi = 5.64189583547756279280e-01;
6327
6328     double z, s, c, ss, cc;
6329
6330     s = sin(x);
6331     if (y1) s = -s;
6332     c = cos(x);
6333     cc = s - c;
6334     if (ix < 0x7fe00000) {
6335         ss = -s - c;
6336         z = cos(2 * x);
6337         if (s * c > 0) cc = z / ss;
6338         else ss = z / cc;
6339         if (ix < 0x48000000) {
6340             if (y1)
6341                 ss = -ss;
6342             cc = pone(x) * cc - qone(x) * ss;
6343         }
6344     }
6345     if (sign)
6346         cc = -cc;
6347     return invsqrtpi * cc / sqrt(x);
6348 }
6349
6350 /*********************************************************************
6351  *              _j1 (MSVCRT.@)
6352  *
6353  * Copied from musl: src/math/j1.c
6354  */
6355 double CDECL _j1(double x)
6356 {
6357     static const double r00 = -6.25000000000000000000e-02,
6358         r01 =  1.40705666955189706048e-03,
6359         r02 = -1.59955631084035597520e-05,
6360         r03 =  4.96727999609584448412e-08,
6361         s01 =  1.91537599538363460805e-02,
6362         s02 =  1.85946785588630915560e-04,
6363         s03 =  1.17718464042623683263e-06,
6364         s04 =  5.04636257076217042715e-09,
6365         s05 =  1.23542274426137913908e-11;
6366
6367     double z, r, s;
6368     unsigned int ix;
6369     int sign;
6370
6371     ix = *(ULONGLONG*)&x >> 32;
6372     sign = ix >> 31;
6373     ix &= 0x7fffffff;
6374     if (ix >= 0x7ff00000)
6375         return math_error(isnan(x) ? 0 : _DOMAIN, "_j1", x, 0, 1 / (x * x));
6376     if (ix >= 0x40000000)  /* |x| >= 2 */
6377         return j1_y1_approx(ix, fabs(x), FALSE, sign);
6378     if (ix >= 0x38000000) {  /* |x| >= 2**-127 */
6379         z = x * x;
6380         r = z * (r00 + z * (r01 + z * (r02 + z * r03)));
6381         s = 1 + z * (s01 + z * (s02 + z * (s03 + z * (s04 + z * s05))));
6382         z = r / s;
6383     } else {
6384         /* avoid underflow, raise inexact if x!=0 */
6385         z = x;
6386     }
6387     return (0.5 + z) * x;
6388 }
6389
6390 /*********************************************************************
6391  *              _jn (MSVCRT.@)
6392  *
6393  * Copied from musl: src/math/jn.c
6394  */
6395 double CDECL _jn(int n, double x)
6396 {
6397     static const double invsqrtpi = 5.64189583547756279280e-01;
6398
6399     unsigned int ix, lx;
6400     int nm1, i, sign;
6401     double a, b, temp;
6402
6403     ix = *(ULONGLONG*)&x >> 32;
6404     lx = *(ULONGLONG*)&x;
6405     sign = ix >> 31;
6406     ix &= 0x7fffffff;
6407
6408     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6409         return x;
6410
6411     if (n == 0)
6412         return _j0(x);
6413     if (n < 0) {
6414         nm1 = -(n + 1);
6415         x = -x;
6416         sign ^= 1;
6417     } else {
6418         nm1 = n-1;
6419     }
6420     if (nm1 == 0)
6421         return j1(x);
6422
6423     sign &= n;  /* even n: 0, odd n: signbit(x) */
6424     x = fabs(x);
6425     if ((ix | lx) == 0 || ix == 0x7ff00000)  /* if x is 0 or inf */
6426         b = 0.0;
6427     else if (nm1 < x) {
6428         if (ix >= 0x52d00000) { /* x > 2**302 */
6429             switch(nm1 & 3) {
6430             case 0:
6431                 temp = -cos(x) + sin(x);
6432                 break;
6433             case 1:
6434                 temp = -cos(x) - sin(x);
6435                 break;
6436             case 2:
6437                 temp =  cos(x) - sin(x);
6438                 break;
6439             default:
6440                 temp =  cos(x) + sin(x);
6441                 break;
6442             }
6443             b = invsqrtpi * temp / sqrt(x);
6444         } else {
6445             a = _j0(x);
6446             b = _j1(x);
6447             for (i = 0; i < nm1; ) {
6448                 i++;
6449                 temp = b;
6450                 b = b * (2.0 * i / x) - a; /* avoid underflow */
6451                 a = temp;
6452             }
6453         }
6454     } else {
6455         if (ix < 0x3e100000) { /* x < 2**-29 */
6456             if (nm1 > 32)  /* underflow */
6457                 b = 0.0;
6458             else {
6459                 temp = x * 0.5;
6460                 b = temp;
6461                 a = 1.0;
6462                 for (i = 2; i <= nm1 + 1; i++) {
6463                     a *= (double)i; /* a = n! */
6464                     b *= temp;      /* b = (x/2)^n */
6465                 }
6466                 b = b / a;
6467             }
6468         } else {
6469             double t, q0, q1, w, h, z, tmp, nf;
6470             int k;
6471
6472             nf = nm1 + 1.0;
6473             w = 2 * nf / x;
6474             h = 2 / x;
6475             z = w + h;
6476             q0 = w;
6477             q1 = w * z - 1.0;
6478             k = 1;
6479             while (q1 < 1.0e9) {
6480                 k += 1;
6481                 z += h;
6482                 tmp = z * q1 - q0;
6483                 q0 = q1;
6484                 q1 = tmp;
6485             }
6486             for (t = 0.0, i = k; i >= 0; i--)
6487                 t = 1 / (2 * (i + nf) / x - t);
6488             a = t;
6489             b = 1.0;
6490             tmp = nf * log(fabs(w));
6491             if (tmp < 7.09782712893383973096e+02) {
6492                 for (i = nm1; i > 0; i--) {
6493                     temp = b;
6494                     b = b * (2.0 * i) / x - a;
6495                     a = temp;
6496                 }
6497             } else {
6498                 for (i = nm1; i > 0; i--) {
6499                     temp = b;
6500                     b = b * (2.0 * i) / x - a;
6501                     a = temp;
6502                     /* scale b to avoid spurious overflow */
6503                     if (b > 0x1p500) {
6504                         a /= b;
6505                         t /= b;
6506                         b  = 1.0;
6507                     }
6508                 }
6509             }
6510             z = j0(x);
6511             w = j1(x);
6512             if (fabs(z) >= fabs(w))
6513                 b = t * z / b;
6514             else
6515                 b = t * w / a;
6516         }
6517     }
6518     return sign ? -b : b;
6519 }
6520
6521 /*********************************************************************
6522  *              _y0 (MSVCRT.@)
6523  */
6524 double CDECL _y0(double x)
6525 {
6526     static const double tpi = 6.36619772367581382433e-01,
6527         u00  = -7.38042951086872317523e-02,
6528         u01  =  1.76666452509181115538e-01,
6529         u02  = -1.38185671945596898896e-02,
6530         u03  =  3.47453432093683650238e-04,
6531         u04  = -3.81407053724364161125e-06,
6532         u05  =  1.95590137035022920206e-08,
6533         u06  = -3.98205194132103398453e-11,
6534         v01  =  1.27304834834123699328e-02,
6535         v02  =  7.60068627350353253702e-05,
6536         v03  =  2.59150851840457805467e-07,
6537         v04  =  4.41110311332675467403e-10;
6538
6539     double z, u, v;
6540     unsigned int ix, lx;
6541
6542     ix = *(ULONGLONG*)&x >> 32;
6543     lx = *(ULONGLONG*)&x;
6544
6545     /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
6546     if ((ix << 1 | lx) == 0)
6547         return math_error(_OVERFLOW, "_y0", x, 0, -INFINITY);
6548     if (isnan(x))
6549         return x;
6550     if (ix >> 31)
6551         return math_error(_DOMAIN, "_y0", x, 0, 0 / (x - x));
6552     if (ix >= 0x7ff00000)
6553         return 1 / x;
6554
6555     if (ix >= 0x40000000) {  /* x >= 2 */
6556         /* large ulp errors near zeros: 3.958, 7.086,.. */
6557         return j0_y0_approx(ix, x, TRUE);
6558     }
6559
6560     if (ix >= 0x3e400000) {  /* x >= 2**-27 */
6561         /* large ulp error near the first zero, x ~= 0.89 */
6562         z = x * x;
6563         u = u00 + z * (u01 + z * (u02 + z * (u03 + z * (u04 + z * (u05 + z * u06)))));
6564         v = 1.0 + z * (v01 + z * (v02 + z * (v03 + z * v04)));
6565         return u / v + tpi * (j0(x) * log(x));
6566     }
6567     return u00 + tpi * log(x);
6568 }
6569
6570 /*********************************************************************
6571  *              _y1 (MSVCRT.@)
6572  */
6573 double CDECL _y1(double x)
6574 {
6575     static const double tpi = 6.36619772367581382433e-01,
6576         u00 =  -1.96057090646238940668e-01,
6577         u01 = 5.04438716639811282616e-02,
6578         u02 = -1.91256895875763547298e-03,
6579         u03 = 2.35252600561610495928e-05,
6580         u04 = -9.19099158039878874504e-08,
6581         v00 = 1.99167318236649903973e-02,
6582         v01 = 2.02552581025135171496e-04,
6583         v02 = 1.35608801097516229404e-06,
6584         v03 = 6.22741452364621501295e-09,
6585         v04 = 1.66559246207992079114e-11;
6586
6587     double z, u, v;
6588     unsigned int ix, lx;
6589
6590     ix = *(ULONGLONG*)&x >> 32;
6591     lx = *(ULONGLONG*)&x;
6592
6593     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
6594     if ((ix << 1 | lx) == 0)
6595         return math_error(_OVERFLOW, "_y1", x, 0, -INFINITY);
6596     if (isnan(x))
6597         return x;
6598     if (ix >> 31)
6599         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6600     if (ix >= 0x7ff00000)
6601         return 1 / x;
6602
6603     if (ix >= 0x40000000)  /* x >= 2 */
6604         return j1_y1_approx(ix, x, TRUE, 0);
6605     if (ix < 0x3c900000)  /* x < 2**-54 */
6606         return -tpi / x;
6607     z = x * x;
6608     u = u00 + z * (u01 + z * (u02 + z * (u03 + z * u04)));
6609     v = 1 + z * (v00 + z * (v01 + z * (v02 + z * (v03 + z * v04))));
6610     return x * (u / v) + tpi * (j1(x) * log(x) - 1 / x);
6611 }
6612
6613 /*********************************************************************
6614  *              _yn (MSVCRT.@)
6615  *
6616  * Copied from musl: src/math/jn.c
6617  */
6618 double CDECL _yn(int n, double x)
6619 {
6620     static const double invsqrtpi = 5.64189583547756279280e-01;
6621
6622     unsigned int ix, lx, ib;
6623     int nm1, sign, i;
6624     double a, b, temp;
6625
6626     ix = *(ULONGLONG*)&x >> 32;
6627     lx = *(ULONGLONG*)&x;
6628     sign = ix >> 31;
6629     ix &= 0x7fffffff;
6630
6631     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6632         return x;
6633     if (sign && (ix | lx) != 0) /* x < 0 */
6634         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6635     if (ix == 0x7ff00000)
6636         return 0.0;
6637
6638     if (n == 0)
6639         return y0(x);
6640     if (n < 0) {
6641         nm1 = -(n + 1);
6642         sign = n & 1;
6643     } else {
6644         nm1 = n - 1;
6645         sign = 0;
6646     }
6647     if (nm1 == 0)
6648         return sign ? -y1(x) : y1(x);
6649
6650     if (ix >= 0x52d00000) { /* x > 2**302 */
6651         switch(nm1 & 3) {
6652         case 0:
6653             temp = -sin(x) - cos(x);
6654             break;
6655         case 1:
6656             temp = -sin(x) + cos(x);
6657             break;
6658         case 2:
6659             temp = sin(x) + cos(x);
6660             break;
6661         default:
6662             temp = sin(x) - cos(x);
6663             break;
6664         }
6665         b = invsqrtpi * temp / sqrt(x);
6666     } else {
6667         a = y0(x);
6668         b = y1(x);
6669         /* quit if b is -inf */
6670         ib = *(ULONGLONG*)&b >> 32;
6671         for (i = 0; i < nm1 && ib != 0xfff00000;) {
6672             i++;
6673             temp = b;
6674             b = (2.0 * i / x) * b - a;
6675             ib = *(ULONGLONG*)&b >> 32;
6676             a = temp;
6677         }
6678     }
6679     return sign ? -b : b;
6680 }
6681
6682 #if _MSVCR_VER>=120
6683
6684 /*********************************************************************
6685  *              _nearbyint (MSVCR120.@)
6686  *
6687  * Based on musl: src/math/nearbyteint.c
6688  */
6689 double CDECL nearbyint(double x)
6690 {
6691     BOOL update_cw, update_sw;
6692     unsigned int cw, sw;
6693
6694     _setfp(&cw, 0, &sw, 0);
6695     update_cw = !(cw & _EM_INEXACT);
6696     update_sw = !(sw & _SW_INEXACT);
6697     if (update_cw)
6698     {
6699         cw |= _EM_INEXACT;
6700         _setfp(&cw, _EM_INEXACT, NULL, 0);
6701     }
6702     x = rint(x);
6703     if (update_cw || update_sw)
6704     {
6705         sw = 0;
6706         cw &= ~_EM_INEXACT;
6707         _setfp(update_cw ? &cw : NULL, _EM_INEXACT,
6708                 update_sw ? &sw : NULL, _SW_INEXACT);
6709     }
6710     return x;
6711 }
6712
6713 /*********************************************************************
6714  *              _nearbyintf (MSVCR120.@)
6715  *
6716  * Based on musl: src/math/nearbyteintf.c
6717  */
6718 float CDECL nearbyintf(float x)
6719 {
6720     BOOL update_cw, update_sw;
6721     unsigned int cw, sw;
6722
6723     _setfp(&cw, 0, &sw, 0);
6724     update_cw = !(cw & _EM_INEXACT);
6725     update_sw = !(sw & _SW_INEXACT);
6726     if (update_cw)
6727     {
6728         cw |= _EM_INEXACT;
6729         _setfp(&cw, _EM_INEXACT, NULL, 0);
6730     }
6731     x = rintf(x);
6732     if (update_cw || update_sw)
6733     {
6734         sw = 0;
6735         cw &= ~_EM_INEXACT;
6736         _setfp(update_cw ? &cw : NULL, _EM_INEXACT,
6737                 update_sw ? &sw : NULL, _SW_INEXACT);
6738     }
6739     return x;
6740 }
6741
6742 /*********************************************************************
6743  *              nexttoward (MSVCR120.@)
6744  */
6745 double CDECL MSVCRT_nexttoward(double num, double next)
6746 {
6747     return _nextafter(num, next);
6748 }
6749
6750 /*********************************************************************
6751  *              nexttowardf (MSVCR120.@)
6752  *
6753  * Copied from musl: src/math/nexttowardf.c
6754  */
6755 float CDECL MSVCRT_nexttowardf(float x, double y)
6756 {
6757     unsigned int ix = *(unsigned int*)&x;
6758     unsigned int e;
6759     float ret;
6760
6761     if (isnan(x) || isnan(y))
6762         return x + y;
6763     if (x == y)
6764         return y;
6765     if (x == 0) {
6766         ix = 1;
6767         if (signbit(y))
6768             ix |= 0x80000000;
6769     } else if (x < y) {
6770         if (signbit(x))
6771             ix--;
6772         else
6773             ix++;
6774     } else {
6775         if (signbit(x))
6776             ix++;
6777         else
6778             ix--;
6779     }
6780     e = ix & 0x7f800000;
6781     /* raise overflow if ix is infinite and x is finite */
6782     if (e == 0x7f800000) {
6783         fp_barrierf(x + x);
6784         *_errno() = ERANGE;
6785     }
6786     ret = *(float*)&ix;
6787     /* raise underflow if ret is subnormal or zero */
6788     if (e == 0) {
6789         fp_barrierf(x * x + ret * ret);
6790         *_errno() = ERANGE;
6791     }
6792     return ret;
6793 }
6794
6795 #endif /* _MSVCR_VER>=120 */
6796
6797 /*********************************************************************
6798  *              _nextafter (MSVCRT.@)
6799  *
6800  * Copied from musl: src/math/nextafter.c
6801  */
6802 double CDECL _nextafter(double x, double y)
6803 {
6804     ULONGLONG llx = *(ULONGLONG*)&x;
6805     ULONGLONG lly = *(ULONGLONG*)&y;
6806     ULONGLONG ax, ay;
6807     int e;
6808
6809     if (isnan(x) || isnan(y))
6810         return x + y;
6811     if (llx == lly) {
6812         if (_fpclass(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
6813             *_errno() = ERANGE;
6814         return y;
6815     }
6816     ax = llx & -1ULL / 2;
6817     ay = lly & -1ULL / 2;
6818     if (ax == 0) {
6819         if (ay == 0)
6820             return y;
6821         llx = (lly & 1ULL << 63) | 1;
6822     } else if (ax > ay || ((llx ^ lly) & 1ULL << 63))
6823         llx--;
6824     else
6825         llx++;
6826     e = llx >> 52 & 0x7ff;
6827     /* raise overflow if llx is infinite and x is finite */
6828     if (e == 0x7ff) {
6829         fp_barrier(x + x);
6830         *_errno() = ERANGE;
6831     }
6832     /* raise underflow if llx is subnormal or zero */
6833     y = *(double*)&llx;
6834     if (e == 0) {
6835         fp_barrier(x * x + y * y);
6836         *_errno() = ERANGE;
6837     }
6838     return y;
6839 }
6840
6841 /*********************************************************************
6842  *              _ecvt (MSVCRT.@)
6843  */
6844 char * CDECL _ecvt( double number, int ndigits, int *decpt, int *sign )
6845 {
6846     int prec, len;
6847     thread_data_t *data = msvcrt_get_thread_data();
6848     /* FIXME: check better for overflow (native supports over 300 chars) */
6849     ndigits = min( ndigits, 80 - 8); /* 8 : space for sign, dec point, "e",
6850                                       * 4 for exponent and one for
6851                                       * terminating '\0' */
6852     if (!data->efcvt_buffer)
6853         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
6854
6855     /* handle cases with zero ndigits or less */
6856     prec = ndigits;
6857     if( prec < 1) prec = 2;
6858     len = _snprintf(data->efcvt_buffer, 80, "%.*le", prec - 1, number);
6859
6860     if (data->efcvt_buffer[0] == '-') {
6861         memmove( data->efcvt_buffer, data->efcvt_buffer + 1, len-- );
6862         *sign = 1;
6863     } else *sign = 0;
6864
6865     /* take the decimal "point away */
6866     if( prec != 1)
6867         memmove( data->efcvt_buffer + 1, data->efcvt_buffer + 2, len - 1 );
6868     /* take the exponential "e" out */
6869     data->efcvt_buffer[ prec] = '\0';
6870     /* read the exponent */
6871     sscanf( data->efcvt_buffer + prec + 1, "%d", decpt);
6872     (*decpt)++;
6873     /* adjust for some border cases */
6874     if( data->efcvt_buffer[0] == '0')/* value is zero */
6875         *decpt = 0;
6876     /* handle cases with zero ndigits or less */
6877     if( ndigits < 1){
6878         if( data->efcvt_buffer[ 0] >= '5')
6879             (*decpt)++;
6880         data->efcvt_buffer[ 0] = '\0';
6881     }
6882     TRACE("out=\"%s\"\n",data->efcvt_buffer);
6883     return data->efcvt_buffer;
6884 }
6885
6886 /*********************************************************************
6887  *              _ecvt_s (MSVCRT.@)
6888  */
6889 int CDECL _ecvt_s( char *buffer, size_t length, double number, int ndigits, int *decpt, int *sign )
6890 {
6891     int prec, len;
6892     char *result;
6893
6894     if (!MSVCRT_CHECK_PMT(buffer != NULL)) return EINVAL;
6895     if (!MSVCRT_CHECK_PMT(decpt != NULL)) return EINVAL;
6896     if (!MSVCRT_CHECK_PMT(sign != NULL)) return EINVAL;
6897     if (!MSVCRT_CHECK_PMT_ERR( length > 2, ERANGE )) return ERANGE;
6898     if (!MSVCRT_CHECK_PMT_ERR(ndigits < (int)length - 1, ERANGE )) return ERANGE;
6899
6900     /* handle cases with zero ndigits or less */
6901     prec = ndigits;
6902     if( prec < 1) prec = 2;
6903     result = malloc(prec + 8);
6904
6905     len = _snprintf(result, prec + 8, "%.*le", prec - 1, number);
6906     if (result[0] == '-') {
6907         memmove( result, result + 1, len-- );
6908         *sign = 1;
6909     } else *sign = 0;
6910
6911     /* take the decimal "point away */
6912     if( prec != 1)
6913         memmove( result + 1, result + 2, len - 1 );
6914     /* take the exponential "e" out */
6915     result[ prec] = '\0';
6916     /* read the exponent */
6917     sscanf( result + prec + 1, "%d", decpt);
6918     (*decpt)++;
6919     /* adjust for some border cases */
6920     if( result[0] == '0')/* value is zero */
6921         *decpt = 0;
6922     /* handle cases with zero ndigits or less */
6923     if( ndigits < 1){
6924         if( result[ 0] >= '5')
6925             (*decpt)++;
6926         result[ 0] = '\0';
6927     }
6928     memcpy( buffer, result, max(ndigits + 1, 1) );
6929     free( result );
6930     return 0;
6931 }
6932
6933 /***********************************************************************
6934  *              _fcvt  (MSVCRT.@)
6935  */
6936 char * CDECL _fcvt( double number, int ndigits, int *decpt, int *sign )
6937 {
6938     thread_data_t *data = msvcrt_get_thread_data();
6939     int stop, dec1, dec2;
6940     char *ptr1, *ptr2, *first;
6941     char buf[80]; /* ought to be enough */
6942     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
6943
6944     if (!data->efcvt_buffer)
6945         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
6946
6947     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
6948     ptr1 = buf;
6949     ptr2 = data->efcvt_buffer;
6950     first = NULL;
6951     dec1 = 0;
6952     dec2 = 0;
6953
6954     if (*ptr1 == '-') {
6955         *sign = 1;
6956         ptr1++;
6957     } else *sign = 0;
6958
6959     /* For numbers below the requested resolution, work out where
6960        the decimal point will be rather than finding it in the string */
6961     if (number < 1.0 && number > 0.0) {
6962         dec2 = log10(number + 1e-10);
6963         if (-dec2 <= ndigits) dec2 = 0;
6964     }
6965
6966     /* If requested digits is zero or less, we will need to truncate
6967      * the returned string */
6968     if (ndigits < 1) {
6969         stop += ndigits;
6970     }
6971
6972     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
6973     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
6974         if (!first) first = ptr2;
6975         if ((ptr1 - buf) < stop) {
6976             *ptr2++ = *ptr1++;
6977         } else {
6978             ptr1++;
6979         }
6980         dec1++;
6981     }
6982
6983     if (ndigits > 0) {
6984         ptr1++;
6985         if (!first) {
6986             while (*ptr1 == '0') { /* Process leading zeroes */
6987                 *ptr2++ = *ptr1++;
6988                 dec1--;
6989             }
6990         }
6991         while (*ptr1 != '\0') {
6992             if (!first) first = ptr2;
6993             *ptr2++ = *ptr1++;
6994         }
6995     }
6996
6997     *ptr2 = '\0';
6998
6999     /* We never found a non-zero digit, then our number is either
7000      * smaller than the requested precision, or 0.0 */
7001     if (!first) {
7002         if (number > 0.0) {
7003             first = ptr2;
7004         } else {
7005             first = data->efcvt_buffer;
7006             dec1 = 0;
7007         }
7008     }
7009
7010     *decpt = dec2 ? dec2 : dec1;
7011     return first;
7012 }
7013
7014 /***********************************************************************
7015  *              _fcvt_s  (MSVCRT.@)
7016  */
7017 int CDECL _fcvt_s(char* outbuffer, size_t size, double number, int ndigits, int *decpt, int *sign)
7018 {
7019     int stop, dec1, dec2;
7020     char *ptr1, *ptr2, *first;
7021     char buf[80]; /* ought to be enough */
7022     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7023
7024     if (!outbuffer || !decpt || !sign || size == 0)
7025     {
7026         *_errno() = EINVAL;
7027         return EINVAL;
7028     }
7029
7030     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7031     ptr1 = buf;
7032     ptr2 = outbuffer;
7033     first = NULL;
7034     dec1 = 0;
7035     dec2 = 0;
7036
7037     if (*ptr1 == '-') {
7038         *sign = 1;
7039         ptr1++;
7040     } else *sign = 0;
7041
7042     /* For numbers below the requested resolution, work out where
7043        the decimal point will be rather than finding it in the string */
7044     if (number < 1.0 && number > 0.0) {
7045         dec2 = log10(number + 1e-10);
7046         if (-dec2 <= ndigits) dec2 = 0;
7047     }
7048
7049     /* If requested digits is zero or less, we will need to truncate
7050      * the returned string */
7051     if (ndigits < 1) {
7052         stop += ndigits;
7053     }
7054
7055     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7056     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7057         if (!first) first = ptr2;
7058         if ((ptr1 - buf) < stop) {
7059             if (size > 1) {
7060                 *ptr2++ = *ptr1++;
7061                 size--;
7062             }
7063         } else {
7064             ptr1++;
7065         }
7066         dec1++;
7067     }
7068
7069     if (ndigits > 0) {
7070         ptr1++;
7071         if (!first) {
7072             while (*ptr1 == '0') { /* Process leading zeroes */
7073                 if (number == 0.0 && size > 1) {
7074                     *ptr2++ = '0';
7075                     size--;
7076                 }
7077                 ptr1++;
7078                 dec1--;
7079             }
7080         }
7081         while (*ptr1 != '\0') {
7082             if (!first) first = ptr2;
7083             if (size > 1) {
7084                 *ptr2++ = *ptr1++;
7085                 size--;
7086             }
7087         }
7088     }
7089
7090     *ptr2 = '\0';
7091
7092     /* We never found a non-zero digit, then our number is either
7093      * smaller than the requested precision, or 0.0 */
7094     if (!first && (number <= 0.0))
7095         dec1 = 0;
7096
7097     *decpt = dec2 ? dec2 : dec1;
7098     return 0;
7099 }
7100
7101 /***********************************************************************
7102  *              _gcvt  (MSVCRT.@)
7103  */
7104 char * CDECL _gcvt( double number, int ndigit, char *buff )
7105 {
7106     if(!buff) {
7107         *_errno() = EINVAL;
7108         return NULL;
7109     }
7110
7111     if(ndigit < 0) {
7112         *_errno() = ERANGE;
7113         return NULL;
7114     }
7115
7116     sprintf(buff, "%.*g", ndigit, number);
7117     return buff;
7118 }
7119
7120 /***********************************************************************
7121  *              _gcvt_s  (MSVCRT.@)
7122  */
7123 int CDECL _gcvt_s(char *buff, size_t size, double number, int digits)
7124 {
7125     int len;
7126
7127     if(!buff) {
7128         *_errno() = EINVAL;
7129         return EINVAL;
7130     }
7131
7132     if( digits<0 || digits>=size) {
7133         if(size)
7134             buff[0] = '\0';
7135
7136         *_errno() = ERANGE;
7137         return ERANGE;
7138     }
7139
7140     len = _scprintf("%.*g", digits, number);
7141     if(len > size) {
7142         buff[0] = '\0';
7143         *_errno() = ERANGE;
7144         return ERANGE;
7145     }
7146
7147     sprintf(buff, "%.*g", digits, number);
7148     return 0;
7149 }
7150
7151 #include <stdlib.h> /* div_t, ldiv_t */
7152
7153 /*********************************************************************
7154  *              div (MSVCRT.@)
7155  * VERSION
7156  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7157  */
7158 #ifdef __i386__
7159 unsigned __int64 CDECL div(int num, int denom)
7160 {
7161     union {
7162         div_t div;
7163         unsigned __int64 uint64;
7164     } ret;
7165
7166     ret.div.quot = num / denom;
7167     ret.div.rem = num % denom;
7168     return ret.uint64;
7169 }
7170 #else
7171 /*********************************************************************
7172  *              div (MSVCRT.@)
7173  * VERSION
7174  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7175  */
7176 div_t CDECL div(int num, int denom)
7177 {
7178     div_t ret;
7179
7180     ret.quot = num / denom;
7181     ret.rem = num % denom;
7182     return ret;
7183 }
7184 #endif /* ifdef __i386__ */
7185
7186
7187 /*********************************************************************
7188  *              ldiv (MSVCRT.@)
7189  * VERSION
7190  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7191  */
7192 #ifdef __i386__
7193 unsigned __int64 CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7194 {
7195     union {
7196         ldiv_t ldiv;
7197         unsigned __int64 uint64;
7198     } ret;
7199
7200     ret.ldiv.quot = num / denom;
7201     ret.ldiv.rem = num % denom;
7202     return ret.uint64;
7203 }
7204 #else
7205 /*********************************************************************
7206  *              ldiv (MSVCRT.@)
7207  * VERSION
7208  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7209  */
7210 ldiv_t CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7211 {
7212     ldiv_t ret;
7213
7214     ret.quot = num / denom;
7215     ret.rem = num % denom;
7216     return ret;
7217 }
7218 #endif /* ifdef __i386__ */
7219
7220 #if _MSVCR_VER>=100
7221 /*********************************************************************
7222  *              lldiv (MSVCR100.@)
7223  */
7224 lldiv_t CDECL lldiv(__int64 num, __int64 denom)
7225 {
7226   lldiv_t ret;
7227
7228   ret.quot = num / denom;
7229   ret.rem = num % denom;
7230
7231   return ret;
7232 }
7233 #endif
7234
7235 #ifdef __i386__
7236
7237 /*********************************************************************
7238  *              _adjust_fdiv (MSVCRT.@)
7239  * Used by the MSVC compiler to work around the Pentium FDIV bug.
7240  */
7241 int MSVCRT__adjust_fdiv = 0;
7242
7243 /***********************************************************************
7244  *              _adj_fdiv_m16i (MSVCRT.@)
7245  *
7246  * NOTE
7247  *    I _think_ this function is intended to work around the Pentium
7248  *    fdiv bug.
7249  */
7250 void __stdcall _adj_fdiv_m16i( short arg )
7251 {
7252   TRACE("(): stub\n");
7253 }
7254
7255 /***********************************************************************
7256  *              _adj_fdiv_m32 (MSVCRT.@)
7257  *
7258  * NOTE
7259  *    I _think_ this function is intended to work around the Pentium
7260  *    fdiv bug.
7261  */
7262 void __stdcall _adj_fdiv_m32( unsigned int arg )
7263 {
7264   TRACE("(): stub\n");
7265 }
7266
7267 /***********************************************************************
7268  *              _adj_fdiv_m32i (MSVCRT.@)
7269  *
7270  * NOTE
7271  *    I _think_ this function is intended to work around the Pentium
7272  *    fdiv bug.
7273  */
7274 void __stdcall _adj_fdiv_m32i( int arg )
7275 {
7276   TRACE("(): stub\n");
7277 }
7278
7279 /***********************************************************************
7280  *              _adj_fdiv_m64 (MSVCRT.@)
7281  *
7282  * NOTE
7283  *    I _think_ this function is intended to work around the Pentium
7284  *    fdiv bug.
7285  */
7286 void __stdcall _adj_fdiv_m64( unsigned __int64 arg )
7287 {
7288   TRACE("(): stub\n");
7289 }
7290
7291 /***********************************************************************
7292  *              _adj_fdiv_r (MSVCRT.@)
7293  * FIXME
7294  *    This function is likely to have the wrong number of arguments.
7295  *
7296  * NOTE
7297  *    I _think_ this function is intended to work around the Pentium
7298  *    fdiv bug.
7299  */
7300 void _adj_fdiv_r(void)
7301 {
7302   TRACE("(): stub\n");
7303 }
7304
7305 /***********************************************************************
7306  *              _adj_fdivr_m16i (MSVCRT.@)
7307  *
7308  * NOTE
7309  *    I _think_ this function is intended to work around the Pentium
7310  *    fdiv bug.
7311  */
7312 void __stdcall _adj_fdivr_m16i( short arg )
7313 {
7314   TRACE("(): stub\n");
7315 }
7316
7317 /***********************************************************************
7318  *              _adj_fdivr_m32 (MSVCRT.@)
7319  *
7320  * NOTE
7321  *    I _think_ this function is intended to work around the Pentium
7322  *    fdiv bug.
7323  */
7324 void __stdcall _adj_fdivr_m32( unsigned int arg )
7325 {
7326   TRACE("(): stub\n");
7327 }
7328
7329 /***********************************************************************
7330  *              _adj_fdivr_m32i (MSVCRT.@)
7331  *
7332  * NOTE
7333  *    I _think_ this function is intended to work around the Pentium
7334  *    fdiv bug.
7335  */
7336 void __stdcall _adj_fdivr_m32i( int arg )
7337 {
7338   TRACE("(): stub\n");
7339 }
7340
7341 /***********************************************************************
7342  *              _adj_fdivr_m64 (MSVCRT.@)
7343  *
7344  * NOTE
7345  *    I _think_ this function is intended to work around the Pentium
7346  *    fdiv bug.
7347  */
7348 void __stdcall _adj_fdivr_m64( unsigned __int64 arg )
7349 {
7350   TRACE("(): stub\n");
7351 }
7352
7353 /***********************************************************************
7354  *              _adj_fpatan (MSVCRT.@)
7355  * FIXME
7356  *    This function is likely to have the wrong number of arguments.
7357  *
7358  * NOTE
7359  *    I _think_ this function is intended to work around the Pentium
7360  *    fdiv bug.
7361  */
7362 void _adj_fpatan(void)
7363 {
7364   TRACE("(): stub\n");
7365 }
7366
7367 /***********************************************************************
7368  *              _adj_fprem (MSVCRT.@)
7369  * FIXME
7370  *    This function is likely to have the wrong number of arguments.
7371  *
7372  * NOTE
7373  *    I _think_ this function is intended to work around the Pentium
7374  *    fdiv bug.
7375  */
7376 void _adj_fprem(void)
7377 {
7378   TRACE("(): stub\n");
7379 }
7380
7381 /***********************************************************************
7382  *              _adj_fprem1 (MSVCRT.@)
7383  * FIXME
7384  *    This function is likely to have the wrong number of arguments.
7385  *
7386  * NOTE
7387  *    I _think_ this function is intended to work around the Pentium
7388  *    fdiv bug.
7389  */
7390 void _adj_fprem1(void)
7391 {
7392   TRACE("(): stub\n");
7393 }
7394
7395 /***********************************************************************
7396  *              _adj_fptan (MSVCRT.@)
7397  * FIXME
7398  *    This function is likely to have the wrong number of arguments.
7399  *
7400  * NOTE
7401  *    I _think_ this function is intended to work around the Pentium
7402  *    fdiv bug.
7403  */
7404 void _adj_fptan(void)
7405 {
7406   TRACE("(): stub\n");
7407 }
7408
7409 /***********************************************************************
7410  *              _safe_fdiv (MSVCRT.@)
7411  * FIXME
7412  *    This function is likely to have the wrong number of arguments.
7413  *
7414  * NOTE
7415  *    I _think_ this function is intended to work around the Pentium
7416  *    fdiv bug.
7417  */
7418 void _safe_fdiv(void)
7419 {
7420   TRACE("(): stub\n");
7421 }
7422
7423 /***********************************************************************
7424  *              _safe_fdivr (MSVCRT.@)
7425  * FIXME
7426  *    This function is likely to have the wrong number of arguments.
7427  *
7428  * NOTE
7429  *    I _think_ this function is intended to work around the Pentium
7430  *    fdiv bug.
7431  */
7432 void _safe_fdivr(void)
7433 {
7434   TRACE("(): stub\n");
7435 }
7436
7437 /***********************************************************************
7438  *              _safe_fprem (MSVCRT.@)
7439  * FIXME
7440  *    This function is likely to have the wrong number of arguments.
7441  *
7442  * NOTE
7443  *    I _think_ this function is intended to work around the Pentium
7444  *    fdiv bug.
7445  */
7446 void _safe_fprem(void)
7447 {
7448   TRACE("(): stub\n");
7449 }
7450
7451 /***********************************************************************
7452  *              _safe_fprem1 (MSVCRT.@)
7453  *
7454  * FIXME
7455  *    This function is likely to have the wrong number of arguments.
7456  *
7457  * NOTE
7458  *    I _think_ this function is intended to work around the Pentium
7459  *    fdiv bug.
7460  */
7461 void _safe_fprem1(void)
7462 {
7463   TRACE("(): stub\n");
7464 }
7465
7466 /***********************************************************************
7467  *              __libm_sse2_acos   (MSVCRT.@)
7468  */
7469 void __cdecl __libm_sse2_acos(void)
7470 {
7471     double d;
7472     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7473     d = acos( d );
7474     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7475 }
7476
7477 /***********************************************************************
7478  *              __libm_sse2_acosf   (MSVCRT.@)
7479  */
7480 void __cdecl __libm_sse2_acosf(void)
7481 {
7482     float f;
7483     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7484     f = acosf( f );
7485     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7486 }
7487
7488 /***********************************************************************
7489  *              __libm_sse2_asin   (MSVCRT.@)
7490  */
7491 void __cdecl __libm_sse2_asin(void)
7492 {
7493     double d;
7494     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7495     d = asin( d );
7496     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7497 }
7498
7499 /***********************************************************************
7500  *              __libm_sse2_asinf   (MSVCRT.@)
7501  */
7502 void __cdecl __libm_sse2_asinf(void)
7503 {
7504     float f;
7505     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7506     f = asinf( f );
7507     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7508 }
7509
7510 /***********************************************************************
7511  *              __libm_sse2_atan   (MSVCRT.@)
7512  */
7513 void __cdecl __libm_sse2_atan(void)
7514 {
7515     double d;
7516     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7517     d = atan( d );
7518     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7519 }
7520
7521 /***********************************************************************
7522  *              __libm_sse2_atan2   (MSVCRT.@)
7523  */
7524 void __cdecl __libm_sse2_atan2(void)
7525 {
7526     double d1, d2;
7527     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7528     d1 = atan2( d1, d2 );
7529     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7530 }
7531
7532 /***********************************************************************
7533  *              __libm_sse2_atanf   (MSVCRT.@)
7534  */
7535 void __cdecl __libm_sse2_atanf(void)
7536 {
7537     float f;
7538     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7539     f = atanf( f );
7540     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7541 }
7542
7543 /***********************************************************************
7544  *              __libm_sse2_cos   (MSVCRT.@)
7545  */
7546 void __cdecl __libm_sse2_cos(void)
7547 {
7548     double d;
7549     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7550     d = cos( d );
7551     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7552 }
7553
7554 /***********************************************************************
7555  *              __libm_sse2_cosf   (MSVCRT.@)
7556  */
7557 void __cdecl __libm_sse2_cosf(void)
7558 {
7559     float f;
7560     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7561     f = cosf( f );
7562     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7563 }
7564
7565 /***********************************************************************
7566  *              __libm_sse2_exp   (MSVCRT.@)
7567  */
7568 void __cdecl __libm_sse2_exp(void)
7569 {
7570     double d;
7571     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7572     d = exp( d );
7573     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7574 }
7575
7576 /***********************************************************************
7577  *              __libm_sse2_expf   (MSVCRT.@)
7578  */
7579 void __cdecl __libm_sse2_expf(void)
7580 {
7581     float f;
7582     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7583     f = expf( f );
7584     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7585 }
7586
7587 /***********************************************************************
7588  *              __libm_sse2_log   (MSVCRT.@)
7589  */
7590 void __cdecl __libm_sse2_log(void)
7591 {
7592     double d;
7593     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7594     d = log( d );
7595     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7596 }
7597
7598 /***********************************************************************
7599  *              __libm_sse2_log10   (MSVCRT.@)
7600  */
7601 void __cdecl __libm_sse2_log10(void)
7602 {
7603     double d;
7604     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7605     d = log10( d );
7606     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7607 }
7608
7609 /***********************************************************************
7610  *              __libm_sse2_log10f   (MSVCRT.@)
7611  */
7612 void __cdecl __libm_sse2_log10f(void)
7613 {
7614     float f;
7615     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7616     f = log10f( f );
7617     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7618 }
7619
7620 /***********************************************************************
7621  *              __libm_sse2_logf   (MSVCRT.@)
7622  */
7623 void __cdecl __libm_sse2_logf(void)
7624 {
7625     float f;
7626     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7627     f = logf( f );
7628     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7629 }
7630
7631 /***********************************************************************
7632  *              __libm_sse2_pow   (MSVCRT.@)
7633  */
7634 void __cdecl __libm_sse2_pow(void)
7635 {
7636     double d1, d2;
7637     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7638     d1 = pow( d1, d2 );
7639     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7640 }
7641
7642 /***********************************************************************
7643  *              __libm_sse2_powf   (MSVCRT.@)
7644  */
7645 void __cdecl __libm_sse2_powf(void)
7646 {
7647     float f1, f2;
7648     __asm__ __volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1), "=g" (f2) );
7649     f1 = powf( f1, f2 );
7650     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f1) );
7651 }
7652
7653 /***********************************************************************
7654  *              __libm_sse2_sin   (MSVCRT.@)
7655  */
7656 void __cdecl __libm_sse2_sin(void)
7657 {
7658     double d;
7659     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7660     d = sin( d );
7661     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7662 }
7663
7664 /***********************************************************************
7665  *              __libm_sse2_sinf   (MSVCRT.@)
7666  */
7667 void __cdecl __libm_sse2_sinf(void)
7668 {
7669     float f;
7670     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7671     f = sinf( f );
7672     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7673 }
7674
7675 /***********************************************************************
7676  *              __libm_sse2_tan   (MSVCRT.@)
7677  */
7678 void __cdecl __libm_sse2_tan(void)
7679 {
7680     double d;
7681     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7682     d = tan( d );
7683     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7684 }
7685
7686 /***********************************************************************
7687  *              __libm_sse2_tanf   (MSVCRT.@)
7688  */
7689 void __cdecl __libm_sse2_tanf(void)
7690 {
7691     float f;
7692     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7693     f = tanf( f );
7694     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7695 }
7696
7697 /***********************************************************************
7698  *              __libm_sse2_sqrt_precise   (MSVCR110.@)
7699  */
7700 void __cdecl __libm_sse2_sqrt_precise(void)
7701 {
7702     unsigned int cw;
7703     double d;
7704
7705     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7706     __control87_2(0, 0, NULL, &cw);
7707     if (cw & _MCW_RC)
7708     {
7709         d = sqrt(d);
7710         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7711         return;
7712     }
7713
7714     if (!sqrt_validate(&d, FALSE))
7715     {
7716         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7717         return;
7718     }
7719     __asm__ __volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
7720 }
7721 #endif  /* __i386__ */
7722
7723 #if _MSVCR_VER>=120
7724
7725 /*********************************************************************
7726  *      exp2 (MSVCR120.@)
7727  *
7728  * Copied from musl: src/math/exp2.c
7729  */
7730 double CDECL exp2(double x)
7731 {
7732     static const double C[] = {
7733         0x1.62e42fefa39efp-1,
7734         0x1.ebfbdff82c424p-3,
7735         0x1.c6b08d70cf4b5p-5,
7736         0x1.3b2abd24650ccp-7,
7737         0x1.5d7e09b4e3a84p-10
7738     };
7739
7740     UINT32 abstop;
7741     UINT64 ki, idx, top, sbits;
7742     double kd, r, r2, scale, tail, tmp;
7743
7744     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
7745     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
7746         if (abstop - 0x3c9 >= 0x80000000) {
7747             /* Avoid spurious underflow for tiny x. */
7748             /* Note: 0 is common input. */
7749             return 1.0 + x;
7750         }
7751         if (abstop >= 409) {
7752             if (*(UINT64*)&x == 0xfff0000000000000ull)
7753                 return 0.0;
7754             if (abstop >= 0x7ff)
7755                 return 1.0 + x;
7756             if (!(*(UINT64*)&x >> 63)) {
7757                 *_errno() = ERANGE;
7758                 return fp_barrier(DBL_MAX) * DBL_MAX;
7759             }
7760             else if (x <= -2147483648.0) {
7761                 fp_barrier(x + 0x1p120f);
7762                 return 0;
7763             }
7764             else if (*(UINT64*)&x >= 0xc090cc0000000000ull) {
7765                 *_errno() = ERANGE;
7766                 fp_barrier(x + 0x1p120f);
7767                 return 0;
7768             }
7769         }
7770         if (2 * *(UINT64*)&x > 2 * 0x408d000000000000ull)
7771             /* Large x is special cased below. */
7772             abstop = 0;
7773     }
7774
7775     /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
7776     /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
7777     kd = fp_barrier(x + 0x1.8p52 / (1 << 7));
7778     ki = *(UINT64*)&kd; /* k. */
7779     kd -= 0x1.8p52 / (1 << 7); /* k/N for int k. */
7780     r = x - kd;
7781     /* 2^(k/N) ~= scale * (1 + tail). */
7782     idx = 2 * (ki % (1 << 7));
7783     top = ki << (52 - 7);
7784     tail = *(double*)&exp_T[idx];
7785     /* This is only a valid scale when -1023*N < k < 1024*N. */
7786     sbits = exp_T[idx + 1] + top;
7787     /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
7788     /* Evaluation is optimized assuming superscalar pipelined execution. */
7789     r2 = r * r;
7790     /* Without fma the worst case error is 0.5/N ulp larger. */
7791     /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
7792     tmp = tail + r * C[0] + r2 * (C[1] + r * C[2]) + r2 * r2 * (C[3] + r * C[4]);
7793     if (abstop == 0)
7794     {
7795         /* Handle cases that may overflow or underflow when computing the result that
7796            is scale*(1+TMP) without intermediate rounding. The bit representation of
7797            scale is in SBITS, however it has a computed exponent that may have
7798            overflown into the sign bit so that needs to be adjusted before using it as
7799            a double. (int32_t)KI is the k used in the argument reduction and exponent
7800            adjustment of scale, positive k here means the result may overflow and
7801            negative k means the result may underflow. */
7802         double scale, y;
7803
7804         if ((ki & 0x80000000) == 0) {
7805             /* k > 0, the exponent of scale might have overflowed by 1. */
7806             sbits -= 1ull << 52;
7807             scale = *(double*)&sbits;
7808             y = 2 * (scale + scale * tmp);
7809             return y;
7810         }
7811         /* k < 0, need special care in the subnormal range. */
7812         sbits += 1022ull << 52;
7813         scale = *(double*)&sbits;
7814         y = scale + scale * tmp;
7815         if (y < 1.0) {
7816             /* Round y to the right precision before scaling it into the subnormal
7817                range to avoid double rounding that can cause 0.5+E/2 ulp error where
7818                E is the worst-case ulp error outside the subnormal range. So this
7819                is only useful if the goal is better than 1 ulp worst-case error. */
7820             double hi, lo;
7821             lo = scale - y + scale * tmp;
7822             hi = 1.0 + y;
7823             lo = 1.0 - hi + y + lo;
7824             y = hi + lo - 1.0;
7825             /* Avoid -0.0 with downward rounding. */
7826             if (y == 0.0)
7827                 y = 0.0;
7828             /* The underflow exception needs to be signaled explicitly. */
7829             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
7830         }
7831         y = 0x1p-1022 * y;
7832         return y;
7833     }
7834     scale = *(double*)&sbits;
7835     /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
7836        is no spurious underflow here even without fma. */
7837     return scale + scale * tmp;
7838 }
7839
7840 /*********************************************************************
7841  *      exp2f (MSVCR120.@)
7842  *
7843  * Copied from musl: src/math/exp2f.c
7844  */
7845 float CDECL exp2f(float x)
7846 {
7847     static const double C[] = {
7848         0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1
7849     };
7850     static const double shift = 0x1.8p+52 / (1 << 5);
7851
7852     double kd, xd, z, r, r2, y, s;
7853     UINT32 abstop;
7854     UINT64 ki, t;
7855
7856     xd = x;
7857     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
7858     if (abstop >= 0x430) {
7859         /* |x| >= 128 or x is nan.  */
7860         if (*(UINT32*)&x == 0xff800000)
7861             return 0.0f;
7862         if (abstop >= 0x7f8)
7863             return x + x;
7864         if (x > 0.0f) {
7865             *_errno() = ERANGE;
7866             return fp_barrierf(x * FLT_MAX);
7867         }
7868         if (x <= -150.0f) {
7869             fp_barrierf(x - 0x1p120);
7870             return 0;
7871         }
7872     }
7873
7874     /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */
7875     kd = xd + shift;
7876     ki = *(UINT64*)&kd;
7877     kd -= shift; /* k/(1<<5) for int k.  */
7878     r = xd - kd;
7879
7880     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
7881     t = exp2f_T[ki % (1 << 5)];
7882     t += ki << (52 - 5);
7883     s = *(double*)&t;
7884     z = C[0] * r + C[1];
7885     r2 = r * r;
7886     y = C[2] * r + 1;
7887     y = z * r2 + y;
7888     y = y * s;
7889     return y;
7890 }
7891
7892 /*********************************************************************
7893  *      expm1 (MSVCR120.@)
7894  */
7895 double CDECL expm1(double x)
7896 {
7897     return __expm1(x);
7898 }
7899
7900 /*********************************************************************
7901  *      expm1f (MSVCR120.@)
7902  */
7903 float CDECL expm1f(float x)
7904 {
7905     return __expm1f(x);
7906 }
7907
7908 /*********************************************************************
7909  *      log1p (MSVCR120.@)
7910  *
7911  * Copied from musl: src/math/log1p.c
7912  */
7913 double CDECL log1p(double x)
7914 {
7915     static const double ln2_hi = 6.93147180369123816490e-01,
7916         ln2_lo = 1.90821492927058770002e-10,
7917         Lg1 = 6.666666666666735130e-01,
7918         Lg2 = 3.999999999940941908e-01,
7919         Lg3 = 2.857142874366239149e-01,
7920         Lg4 = 2.222219843214978396e-01,
7921         Lg5 = 1.818357216161805012e-01,
7922         Lg6 = 1.531383769920937332e-01,
7923         Lg7 = 1.479819860511658591e-01;
7924
7925     union {double f; UINT64 i;} u = {x};
7926     double hfsq, f, c, s, z, R, w, t1, t2, dk;
7927     UINT32 hx, hu;
7928     int k;
7929
7930     hx = u.i >> 32;
7931     k = 1;
7932     if (hx < 0x3fda827a || hx >> 31) { /* 1+x < sqrt(2)+ */
7933         if (hx >= 0xbff00000) { /* x <= -1.0 */
7934             if (x == -1) {
7935                 *_errno() = ERANGE;
7936                 return x / 0.0; /* og1p(-1) = -inf */
7937             }
7938             *_errno() = EDOM;
7939             return (x-x) / 0.0; /* log1p(x<-1) = NaN */
7940         }
7941         if (hx << 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */
7942             fp_barrier(x + 0x1p120f);
7943             /* underflow if subnormal */
7944             if ((hx & 0x7ff00000) == 0)
7945                 fp_barrierf(x);
7946             return x;
7947         }
7948         if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
7949             k = 0;
7950             c = 0;
7951             f = x;
7952         }
7953     } else if (hx >= 0x7ff00000)
7954         return x;
7955     if (k) {
7956         u.f = 1 + x;
7957         hu = u.i >> 32;
7958         hu += 0x3ff00000 - 0x3fe6a09e;
7959         k = (int)(hu >> 20) - 0x3ff;
7960         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
7961         if (k < 54) {
7962             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
7963             c /= u.f;
7964         } else
7965             c = 0;
7966         /* reduce u into [sqrt(2)/2, sqrt(2)] */
7967         hu = (hu & 0x000fffff) + 0x3fe6a09e;
7968         u.i = (UINT64)hu << 32 | (u.i & 0xffffffff);
7969         f = u.f - 1;
7970     }
7971     hfsq = 0.5 * f * f;
7972     s = f / (2.0 + f);
7973     z = s * s;
7974     w = z * z;
7975     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
7976     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
7977     R = t2 + t1;
7978     dk = k;
7979     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
7980 }
7981
7982 /*********************************************************************
7983  *      log1pf (MSVCR120.@)
7984  *
7985  * Copied from musl: src/math/log1pf.c
7986  */
7987 float CDECL log1pf(float x)
7988 {
7989     static const float ln2_hi = 6.9313812256e-01,
7990         ln2_lo = 9.0580006145e-06,
7991         Lg1 = 0xaaaaaa.0p-24,
7992         Lg2 = 0xccce13.0p-25,
7993         Lg3 = 0x91e9ee.0p-25,
7994         Lg4 = 0xf89e26.0p-26;
7995
7996     union {float f; UINT32 i;} u = {x};
7997     float hfsq, f, c, s, z, R, w, t1, t2, dk;
7998     UINT32 ix, iu;
7999     int k;
8000
8001     ix = u.i;
8002     k = 1;
8003     if (ix < 0x3ed413d0 || ix >> 31) { /* 1+x < sqrt(2)+ */
8004         if (ix >= 0xbf800000) { /* x <= -1.0 */
8005             if (x == -1) {
8006                 *_errno() = ERANGE;
8007                 return x / 0.0f; /* log1p(-1)=+inf */
8008             }
8009             *_errno() = EDOM;
8010             return (x - x) / 0.0f; /* log1p(x<-1)=NaN */
8011         }
8012         if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
8013             /* underflow if subnormal */
8014             if ((ix & 0x7f800000) == 0)
8015                 fp_barrierf(x * x);
8016             return x;
8017         }
8018         if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8019             k = 0;
8020             c = 0;
8021             f = x;
8022         }
8023     } else if (ix >= 0x7f800000)
8024         return x;
8025     if (k) {
8026         u.f = 1 + x;
8027         iu = u.i;
8028         iu += 0x3f800000 - 0x3f3504f3;
8029         k = (int)(iu >> 23) - 0x7f;
8030         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8031         if (k < 25) {
8032             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8033             c /= u.f;
8034         } else
8035             c = 0;
8036         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8037         iu = (iu & 0x007fffff) + 0x3f3504f3;
8038         u.i = iu;
8039         f = u.f - 1;
8040     }
8041     s = f / (2.0f + f);
8042     z = s * s;
8043     w = z * z;
8044     t1= w * (Lg2 + w * Lg4);
8045     t2= z * (Lg1 + w * Lg3);
8046     R = t2 + t1;
8047     hfsq = 0.5f * f * f;
8048     dk = k;
8049     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8050 }
8051
8052 /*********************************************************************
8053  *      log2 (MSVCR120.@)
8054  *
8055  * Copied from musl: src/math/log2.c
8056  */
8057 double CDECL log2(double x)
8058 {
8059     static const double invln2hi = 0x1.7154765200000p+0,
8060         invln2lo = 0x1.705fc2eefa200p-33;
8061     static const double A[] = {
8062         -0x1.71547652b8339p-1,
8063         0x1.ec709dc3a04bep-2,
8064         -0x1.7154764702ffbp-2,
8065         0x1.2776c50034c48p-2,
8066         -0x1.ec7b328ea92bcp-3,
8067         0x1.a6225e117f92ep-3
8068     };
8069     static const double B[] = {
8070         -0x1.71547652b82fep-1,
8071         0x1.ec709dc3a03f7p-2,
8072         -0x1.71547652b7c3fp-2,
8073         0x1.2776c50f05be4p-2,
8074         -0x1.ec709dd768fe5p-3,
8075         0x1.a61761ec4e736p-3,
8076         -0x1.7153fbc64a79bp-3,
8077         0x1.484d154f01b4ap-3,
8078         -0x1.289e4a72c383cp-3,
8079         0x1.0b32f285aee66p-3
8080     };
8081     static const struct {
8082         double invc, logc;
8083     } T[] = {
8084         {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
8085         {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
8086         {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
8087         {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
8088         {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
8089         {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
8090         {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
8091         {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
8092         {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
8093         {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
8094         {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
8095         {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
8096         {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
8097         {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
8098         {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
8099         {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
8100         {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
8101         {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
8102         {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
8103         {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
8104         {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
8105         {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
8106         {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
8107         {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
8108         {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
8109         {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
8110         {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
8111         {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
8112         {0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
8113         {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
8114         {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
8115         {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
8116         {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
8117         {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
8118         {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
8119         {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
8120         {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
8121         {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
8122         {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
8123         {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
8124         {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
8125         {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
8126         {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
8127         {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
8128         {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
8129         {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
8130         {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
8131         {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
8132         {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
8133         {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
8134         {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
8135         {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
8136         {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
8137         {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
8138         {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
8139         {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
8140         {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
8141         {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
8142         {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
8143         {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
8144         {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
8145         {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
8146         {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
8147         {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}
8148     };
8149     static const struct {
8150         double chi, clo;
8151     } T2[] = {
8152         {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
8153         {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
8154         {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
8155         {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
8156         {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
8157         {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
8158         {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
8159         {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
8160         {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
8161         {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
8162         {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
8163         {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
8164         {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
8165         {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
8166         {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
8167         {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
8168         {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
8169         {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
8170         {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
8171         {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
8172         {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
8173         {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
8174         {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
8175         {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
8176         {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
8177         {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
8178         {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
8179         {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
8180         {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
8181         {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
8182         {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
8183         {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
8184         {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
8185         {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
8186         {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
8187         {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
8188         {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
8189         {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
8190         {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
8191         {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
8192         {0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
8193         {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
8194         {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
8195         {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
8196         {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
8197         {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
8198         {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
8199         {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
8200         {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
8201         {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
8202         {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
8203         {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
8204         {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
8205         {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
8206         {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
8207         {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
8208         {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
8209         {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
8210         {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
8211         {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
8212         {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
8213         {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
8214         {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
8215         {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}
8216     };
8217
8218     double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p, rhi, rlo;
8219     UINT64 ix, iz, tmp;
8220     UINT32 top;
8221     int k, i;
8222
8223     ix = *(UINT64*)&x;
8224     top = ix >> 48;
8225     if (ix - 0x3feea4af00000000ULL < 0x210aa00000000ULL) {
8226         /* Handle close to 1.0 inputs separately.  */
8227         /* Fix sign of zero with downward rounding when x==1.  */
8228         if (ix == 0x3ff0000000000000ULL)
8229             return 0;
8230         r = x - 1.0;
8231         *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8232         rlo = r - rhi;
8233         hi = rhi * invln2hi;
8234         lo = rlo * invln2hi + r * invln2lo;
8235         r2 = r * r; /* rounding error: 0x1p-62.  */
8236         r4 = r2 * r2;
8237         /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
8238         p = r2 * (B[0] + r * B[1]);
8239         y = hi + p;
8240         lo += hi - y + p;
8241         lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
8242                 r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
8243         y += lo;
8244         return y;
8245     }
8246     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
8247         /* x < 0x1p-1022 or inf or nan.  */
8248         if (ix * 2 == 0) {
8249             *_errno() = ERANGE;
8250             return -1.0 / x;
8251         }
8252         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf.  */
8253             return x;
8254         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
8255             return x;
8256         if (top & 0x8000) {
8257             *_errno() = EDOM;
8258             return (x - x) / (x - x);
8259         }
8260         /* x is subnormal, normalize it.  */
8261         x *= 0x1p52;
8262         ix = *(UINT64*)&x;
8263         ix -= 52ULL << 52;
8264     }
8265
8266     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
8267        The range is split into N subintervals.
8268        The ith subinterval contains z and c is near its center.  */
8269     tmp = ix - 0x3fe6000000000000ULL;
8270     i = (tmp >> (52 - 6)) % (1 << 6);
8271     k = (INT64)tmp >> 52; /* arithmetic shift */
8272     iz = ix - (tmp & 0xfffULL << 52);
8273     invc = T[i].invc;
8274     logc = T[i].logc;
8275     z = *(double*)&iz;
8276     kd = k;
8277
8278     /* log2(x) = log2(z/c) + log2(c) + k.  */
8279     /* r ~= z/c - 1, |r| < 1/(2*N).  */
8280     /* rounding error: 0x1p-55/N + 0x1p-65.  */
8281     r = (z - T2[i].chi - T2[i].clo) * invc;
8282     *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8283     rlo = r - rhi;
8284     t1 = rhi * invln2hi;
8285     t2 = rlo * invln2hi + r * invln2lo;
8286
8287     /* hi + lo = r/ln2 + log2(c) + k.  */
8288     t3 = kd + logc;
8289     hi = t3 + t1;
8290     lo = t3 - hi + t1 + t2;
8291
8292     /* log2(r+1) = r/ln2 + r^2*poly(r).  */
8293     /* Evaluation is optimized assuming superscalar pipelined execution.  */
8294     r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
8295     r4 = r2 * r2;
8296     /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
8297        ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
8298     p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
8299     y = lo + r2 * p + hi;
8300     return y;
8301 }
8302
8303 /*********************************************************************
8304  *      log2f (MSVCR120.@)
8305  *
8306  * Copied from musl: src/math/log2f.c
8307  */
8308 float CDECL log2f(float x)
8309 {
8310     static const double A[] = {
8311         -0x1.712b6f70a7e4dp-2,
8312         0x1.ecabf496832ep-2,
8313         -0x1.715479ffae3dep-1,
8314         0x1.715475f35c8b8p0
8315     };
8316     static const struct {
8317         double invc, logc;
8318     } T[] = {
8319         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
8320         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
8321         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
8322         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
8323         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
8324         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
8325         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
8326         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
8327         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
8328         { 0x1p+0, 0x0p+0 },
8329         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
8330         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
8331         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
8332         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
8333         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
8334         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }
8335     };
8336
8337     double z, r, r2, p, y, y0, invc, logc;
8338     UINT32 ix, iz, top, tmp;
8339     int k, i;
8340
8341     ix = *(UINT32*)&x;
8342     /* Fix sign of zero with downward rounding when x==1. */
8343     if (ix == 0x3f800000)
8344         return 0;
8345     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
8346         /* x < 0x1p-126 or inf or nan. */
8347         if (ix * 2 == 0) {
8348             *_errno() = ERANGE;
8349             return -1.0f / x;
8350         }
8351         if (ix == 0x7f800000) /* log2(inf) == inf. */
8352             return x;
8353         if (ix * 2 > 0xff000000)
8354             return x;
8355         if (ix & 0x80000000) {
8356             *_errno() = EDOM;
8357             return (x - x) / (x - x);
8358         }
8359         /* x is subnormal, normalize it. */
8360         x *= 0x1p23f;
8361         ix = *(UINT32*)&x;
8362         ix -= 23 << 23;
8363     }
8364
8365     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
8366        The range is split into N subintervals.
8367        The ith subinterval contains z and c is near its center. */
8368     tmp = ix - 0x3f330000;
8369     i = (tmp >> (23 - 4)) % (1 << 4);
8370     top = tmp & 0xff800000;
8371     iz = ix - top;
8372     k = (INT32)tmp >> 23; /* arithmetic shift */
8373     invc = T[i].invc;
8374     logc = T[i].logc;
8375     z = *(float*)&iz;
8376
8377     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
8378     r = z * invc - 1;
8379     y0 = logc + (double)k;
8380
8381     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
8382     r2 = r * r;
8383     y = A[1] * r + A[2];
8384     y = A[0] * r2 + y;
8385     p = A[3] * r + y0;
8386     y = y * r2 + p;
8387     return y;
8388 }
8389
8390 /*********************************************************************
8391  *      rint (MSVCR120.@)
8392  */
8393 double CDECL rint(double x)
8394 {
8395     return __rint(x);
8396 }
8397
8398 /*********************************************************************
8399  *      rintf (MSVCR120.@)
8400  *
8401  * Copied from musl: src/math/rintf.c
8402  */
8403 float CDECL rintf(float x)
8404 {
8405     static const float toint = 1 / FLT_EPSILON;
8406
8407     unsigned int ix = *(unsigned int*)&x;
8408     int e = ix >> 23 & 0xff;
8409     int s = ix >> 31;
8410     float y;
8411
8412     if (e >= 0x7f + 23)
8413         return x;
8414     if (s)
8415         y = fp_barrierf(x - toint) + toint;
8416     else
8417         y = fp_barrierf(x + toint) - toint;
8418     if (y == 0)
8419         return s ? -0.0f : 0.0f;
8420     return y;
8421 }
8422
8423 /*********************************************************************
8424  *      lrint (MSVCR120.@)
8425  */
8426 __msvcrt_long CDECL lrint(double x)
8427 {
8428     double d;
8429
8430     d = rint(x);
8431     if ((d < 0 && d != (double)(__msvcrt_long)d)
8432             || (d >= 0 && d != (double)(__msvcrt_ulong)d)) {
8433         *_errno() = EDOM;
8434         return 0;
8435     }
8436     return d;
8437 }
8438
8439 /*********************************************************************
8440  *      lrintf (MSVCR120.@)
8441  */
8442 __msvcrt_long CDECL lrintf(float x)
8443 {
8444     float f;
8445
8446     f = rintf(x);
8447     if ((f < 0 && f != (float)(__msvcrt_long)f)
8448             || (f >= 0 && f != (float)(__msvcrt_ulong)f)) {
8449         *_errno() = EDOM;
8450         return 0;
8451     }
8452     return f;
8453 }
8454
8455 /*********************************************************************
8456  *      llrint (MSVCR120.@)
8457  */
8458 __int64 CDECL llrint(double x)
8459 {
8460     double d;
8461
8462     d = rint(x);
8463     if ((d < 0 && d != (double)(__int64)d)
8464             || (d >= 0 && d != (double)(unsigned __int64)d)) {
8465         *_errno() = EDOM;
8466         return 0;
8467     }
8468     return d;
8469 }
8470
8471 /*********************************************************************
8472  *      llrintf (MSVCR120.@)
8473  */
8474 __int64 CDECL llrintf(float x)
8475 {
8476     float f;
8477
8478     f = rintf(x);
8479     if ((f < 0 && f != (float)(__int64)f)
8480             || (f >= 0 && f != (float)(unsigned __int64)f)) {
8481         *_errno() = EDOM;
8482         return 0;
8483     }
8484     return f;
8485 }
8486
8487 /*********************************************************************
8488  *      round (MSVCR120.@)
8489  */
8490 double CDECL round(double x)
8491 {
8492     return __round(x);
8493 }
8494
8495 /*********************************************************************
8496  *      roundf (MSVCR120.@)
8497  *
8498  * Copied from musl: src/math/roundf.c
8499  */
8500 float CDECL roundf(float x)
8501 {
8502     static const float toint = 1 / FLT_EPSILON;
8503
8504     unsigned int ix = *(unsigned int*)&x;
8505     int e = ix >> 23 & 0xff;
8506     float y;
8507
8508     if (e >= 0x7f + 23)
8509         return x;
8510     if (ix >> 31)
8511         x = -x;
8512     if (e < 0x7f - 1)
8513         return 0 * *(float*)&ix;
8514     y = fp_barrierf(x + toint) - toint - x;
8515     if (y > 0.5f)
8516         y = y + x - 1;
8517     else if (y <= -0.5f)
8518         y = y + x + 1;
8519     else
8520         y = y + x;
8521     if (ix >> 31)
8522         y = -y;
8523     return y;
8524 }
8525
8526 /*********************************************************************
8527  *      lround (MSVCR120.@)
8528  *
8529  * Copied from musl: src/math/lround.c
8530  */
8531 __msvcrt_long CDECL lround(double x)
8532 {
8533     double d = round(x);
8534     if (d != (double)(__msvcrt_long)d) {
8535         *_errno() = EDOM;
8536         return 0;
8537     }
8538     return d;
8539 }
8540
8541 /*********************************************************************
8542  *      lroundf (MSVCR120.@)
8543  *
8544  * Copied from musl: src/math/lroundf.c
8545  */
8546 __msvcrt_long CDECL lroundf(float x)
8547 {
8548     float f = roundf(x);
8549     if (f != (float)(__msvcrt_long)f) {
8550         *_errno() = EDOM;
8551         return 0;
8552     }
8553     return f;
8554 }
8555
8556 /*********************************************************************
8557  *      llround (MSVCR120.@)
8558  *
8559  * Copied from musl: src/math/llround.c
8560  */
8561 __int64 CDECL llround(double x)
8562 {
8563     double d = round(x);
8564     if (d != (double)(__int64)d) {
8565         *_errno() = EDOM;
8566         return 0;
8567     }
8568     return d;
8569 }
8570
8571 /*********************************************************************
8572  *      llroundf (MSVCR120.@)
8573  *
8574  * Copied from musl: src/math/llroundf.c
8575  */
8576 __int64 CDECL llroundf(float x)
8577 {
8578     float f = roundf(x);
8579     if (f != (float)(__int64)f) {
8580         *_errno() = EDOM;
8581         return 0;
8582     }
8583     return f;
8584 }
8585
8586 /*********************************************************************
8587  *      trunc (MSVCR120.@)
8588  *
8589  * Copied from musl: src/math/trunc.c
8590  */
8591 double CDECL trunc(double x)
8592 {
8593     union {double f; UINT64 i;} u = {x};
8594     int e = (u.i >> 52 & 0x7ff) - 0x3ff + 12;
8595     UINT64 m;
8596
8597     if (e >= 52 + 12)
8598         return x;
8599     if (e < 12)
8600         e = 1;
8601     m = -1ULL >> e;
8602     if ((u.i & m) == 0)
8603         return x;
8604     u.i &= ~m;
8605     return u.f;
8606 }
8607
8608 /*********************************************************************
8609  *      truncf (MSVCR120.@)
8610  *
8611  * Copied from musl: src/math/truncf.c
8612  */
8613 float CDECL truncf(float x)
8614 {
8615     union {float f; UINT32 i;} u = {x};
8616     int e = (u.i >> 23 & 0xff) - 0x7f + 9;
8617     UINT32 m;
8618
8619     if (e >= 23 + 9)
8620         return x;
8621     if (e < 9)
8622         e = 1;
8623     m = -1U >> e;
8624     if ((u.i & m) == 0)
8625         return x;
8626     u.i &= ~m;
8627     return u.f;
8628 }
8629
8630 /*********************************************************************
8631  *      _dtest (MSVCR120.@)
8632  */
8633 short CDECL _dtest(double *x)
8634 {
8635     return _dclass(*x);
8636 }
8637
8638 /*********************************************************************
8639  *      _fdtest (MSVCR120.@)
8640  */
8641 short CDECL _fdtest(float *x)
8642 {
8643     return _fdclass(*x);
8644 }
8645
8646 static double erfc1(double x)
8647 {
8648     static const double erx  = 8.45062911510467529297e-01,
8649                  pa0  = -2.36211856075265944077e-03,
8650                  pa1  =  4.14856118683748331666e-01,
8651                  pa2  = -3.72207876035701323847e-01,
8652                  pa3  =  3.18346619901161753674e-01,
8653                  pa4  = -1.10894694282396677476e-01,
8654                  pa5  =  3.54783043256182359371e-02,
8655                  pa6  = -2.16637559486879084300e-03,
8656                  qa1  =  1.06420880400844228286e-01,
8657                  qa2  =  5.40397917702171048937e-01,
8658                  qa3  =  7.18286544141962662868e-02,
8659                  qa4  =  1.26171219808761642112e-01,
8660                  qa5  =  1.36370839120290507362e-02,
8661                  qa6  =  1.19844998467991074170e-02;
8662
8663     double s, P, Q;
8664
8665     s = fabs(x) - 1;
8666     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8667     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8668     return 1 - erx - P / Q;
8669 }
8670
8671 static double erfc2(UINT32 ix, double x)
8672 {
8673     static const double ra0  = -9.86494403484714822705e-03,
8674                  ra1  = -6.93858572707181764372e-01,
8675                  ra2  = -1.05586262253232909814e+01,
8676                  ra3  = -6.23753324503260060396e+01,
8677                  ra4  = -1.62396669462573470355e+02,
8678                  ra5  = -1.84605092906711035994e+02,
8679                  ra6  = -8.12874355063065934246e+01,
8680                  ra7  = -9.81432934416914548592e+00,
8681                  sa1  =  1.96512716674392571292e+01,
8682                  sa2  =  1.37657754143519042600e+02,
8683                  sa3  =  4.34565877475229228821e+02,
8684                  sa4  =  6.45387271733267880336e+02,
8685                  sa5  =  4.29008140027567833386e+02,
8686                  sa6  =  1.08635005541779435134e+02,
8687                  sa7  =  6.57024977031928170135e+00,
8688                  sa8  = -6.04244152148580987438e-02,
8689                  rb0  = -9.86494292470009928597e-03,
8690                  rb1  = -7.99283237680523006574e-01,
8691                  rb2  = -1.77579549177547519889e+01,
8692                  rb3  = -1.60636384855821916062e+02,
8693                  rb4  = -6.37566443368389627722e+02,
8694                  rb5  = -1.02509513161107724954e+03,
8695                  rb6  = -4.83519191608651397019e+02,
8696                  sb1  =  3.03380607434824582924e+01,
8697                  sb2  =  3.25792512996573918826e+02,
8698                  sb3  =  1.53672958608443695994e+03,
8699                  sb4  =  3.19985821950859553908e+03,
8700                  sb5  =  2.55305040643316442583e+03,
8701                  sb6  =  4.74528541206955367215e+02,
8702                  sb7  = -2.24409524465858183362e+01;
8703
8704     double s, R, S, z;
8705     UINT64 iz;
8706
8707     if (ix < 0x3ff40000) /* |x| < 1.25 */
8708         return erfc1(x);
8709
8710     x = fabs(x);
8711     s = 1 / (x * x);
8712     if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
8713         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
8714                             (ra5 + s * (ra6 + s * ra7))))));
8715         S = 1.0 + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
8716                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
8717     } else { /* |x| > 1/.35 */
8718         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s *
8719                             (rb5 + s * rb6)))));
8720         S = 1.0 + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
8721                             (sb5 + s * (sb6 + s * sb7))))));
8722     }
8723     z = x;
8724     iz = *(ULONGLONG*)&z;
8725     iz &= 0xffffffff00000000ULL;
8726     z = *(double*)&iz;
8727     return exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S) / x;
8728 }
8729
8730 /*********************************************************************
8731  *      erf (MSVCR120.@)
8732  */
8733 double CDECL erf(double x)
8734 {
8735     static const double efx8 =  1.02703333676410069053e+00,
8736                  pp0  =  1.28379167095512558561e-01,
8737                  pp1  = -3.25042107247001499370e-01,
8738                  pp2  = -2.84817495755985104766e-02,
8739                  pp3  = -5.77027029648944159157e-03,
8740                  pp4  = -2.37630166566501626084e-05,
8741                  qq1  =  3.97917223959155352819e-01,
8742                  qq2  =  6.50222499887672944485e-02,
8743                  qq3  =  5.08130628187576562776e-03,
8744                  qq4  =  1.32494738004321644526e-04,
8745                  qq5  = -3.96022827877536812320e-06;
8746
8747     double r, s, z, y;
8748     UINT32 ix;
8749     int sign;
8750
8751     ix = *(UINT64*)&x >> 32;
8752     sign = ix >> 31;
8753     ix &= 0x7fffffff;
8754     if (ix >= 0x7ff00000) {
8755         /* erf(nan)=nan, erf(+-inf)=+-1 */
8756         return 1 - 2 * sign + 1 / x;
8757     }
8758     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
8759         if (ix < 0x3e300000) { /* |x| < 2**-28 */
8760             /* avoid underflow */
8761             return 0.125 * (8 * x + efx8 * x);
8762         }
8763         z = x * x;
8764         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
8765         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
8766         y = r / s;
8767         return x + x * y;
8768     }
8769     if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
8770         y = 1 - erfc2(ix, x);
8771     else
8772         y = 1 - DBL_MIN;
8773     return sign ? -y : y;
8774 }
8775
8776 static float erfc1f(float x)
8777 {
8778     static const float erx  =  8.4506291151e-01,
8779                  pa0  = -2.3621185683e-03,
8780                  pa1  =  4.1485610604e-01,
8781                  pa2  = -3.7220788002e-01,
8782                  pa3  =  3.1834661961e-01,
8783                  pa4  = -1.1089469492e-01,
8784                  pa5  =  3.5478305072e-02,
8785                  pa6  = -2.1663755178e-03,
8786                  qa1  =  1.0642088205e-01,
8787                  qa2  =  5.4039794207e-01,
8788                  qa3  =  7.1828655899e-02,
8789                  qa4  =  1.2617121637e-01,
8790                  qa5  =  1.3637083583e-02,
8791                  qa6  =  1.1984500103e-02;
8792
8793     float s, P, Q;
8794
8795     s = fabsf(x) - 1;
8796     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8797     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8798     return 1 - erx - P / Q;
8799 }
8800
8801 static float erfc2f(UINT32 ix, float x)
8802 {
8803     static const float ra0  = -9.8649440333e-03,
8804                  ra1  = -6.9385856390e-01,
8805                  ra2  = -1.0558626175e+01,
8806                  ra3  = -6.2375331879e+01,
8807                  ra4  = -1.6239666748e+02,
8808                  ra5  = -1.8460508728e+02,
8809                  ra6  = -8.1287437439e+01,
8810                  ra7  = -9.8143291473e+00,
8811                  sa1  =  1.9651271820e+01,
8812                  sa2  =  1.3765776062e+02,
8813                  sa3  =  4.3456588745e+02,
8814                  sa4  =  6.4538726807e+02,
8815                  sa5  =  4.2900814819e+02,
8816                  sa6  =  1.0863500214e+02,
8817                  sa7  =  6.5702495575e+00,
8818                  sa8  = -6.0424413532e-02,
8819                  rb0  = -9.8649431020e-03,
8820                  rb1  = -7.9928326607e-01,
8821                  rb2  = -1.7757955551e+01,
8822                  rb3  = -1.6063638306e+02,
8823                  rb4  = -6.3756646729e+02,
8824                  rb5  = -1.0250950928e+03,
8825                  rb6  = -4.8351919556e+02,
8826                  sb1  =  3.0338060379e+01,
8827                  sb2  =  3.2579251099e+02,
8828                  sb3  =  1.5367296143e+03,
8829                  sb4  =  3.1998581543e+03,
8830                  sb5  =  2.5530502930e+03,
8831                  sb6  =  4.7452853394e+02,
8832                  sb7  = -2.2440952301e+01;
8833
8834     float s, R, S, z;
8835
8836     if (ix < 0x3fa00000) /* |x| < 1.25 */
8837         return erfc1f(x);
8838
8839     x = fabsf(x);
8840     s = 1 / (x * x);
8841     if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
8842         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
8843                             (ra5 + s * (ra6 + s * ra7))))));
8844         S = 1.0f + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
8845                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
8846     } else { /* |x| >= 1/0.35 */
8847         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (rb5 + s * rb6)))));
8848         S = 1.0f + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
8849                             (sb5 + s * (sb6 + s * sb7))))));
8850     }
8851
8852     ix = *(UINT32*)&x & 0xffffe000;
8853     z = *(float*)&ix;
8854     return expf(-z * z - 0.5625f) * expf((z - x) * (z + x) + R / S) / x;
8855 }
8856
8857 /*********************************************************************
8858  *      erff (MSVCR120.@)
8859  *
8860  * Copied from musl: src/math/erff.c
8861  */
8862 float CDECL erff(float x)
8863 {
8864     static const float efx8 =  1.0270333290e+00,
8865                  pp0  =  1.2837916613e-01,
8866                  pp1  = -3.2504209876e-01,
8867                  pp2  = -2.8481749818e-02,
8868                  pp3  = -5.7702702470e-03,
8869                  pp4  = -2.3763017452e-05,
8870                  qq1  =  3.9791721106e-01,
8871                  qq2  =  6.5022252500e-02,
8872                  qq3  =  5.0813062117e-03,
8873                  qq4  =  1.3249473704e-04,
8874                  qq5  = -3.9602282413e-06;
8875
8876     float r, s, z, y;
8877     UINT32 ix;
8878     int sign;
8879
8880     ix = *(UINT32*)&x;
8881     sign = ix >> 31;
8882     ix &= 0x7fffffff;
8883     if (ix >= 0x7f800000) {
8884         /* erf(nan)=nan, erf(+-inf)=+-1 */
8885         return 1 - 2 * sign + 1 / x;
8886     }
8887     if (ix < 0x3f580000) { /* |x| < 0.84375 */
8888         if (ix < 0x31800000) { /* |x| < 2**-28 */
8889             /*avoid underflow */
8890             return 0.125f * (8 * x + efx8 * x);
8891         }
8892         z = x * x;
8893         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
8894         s = 1 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
8895         y = r / s;
8896         return x + x * y;
8897     }
8898     if (ix < 0x40c00000) /* |x| < 6 */
8899         y = 1 - erfc2f(ix, x);
8900     else
8901         y = 1 - FLT_MIN;
8902     return sign ? -y : y;
8903 }
8904
8905 /*********************************************************************
8906  *      erfc (MSVCR120.@)
8907  *
8908  * Copied from musl: src/math/erf.c
8909  */
8910 double CDECL erfc(double x)
8911 {
8912     static const double pp0  =  1.28379167095512558561e-01,
8913                  pp1  = -3.25042107247001499370e-01,
8914                  pp2  = -2.84817495755985104766e-02,
8915                  pp3  = -5.77027029648944159157e-03,
8916                  pp4  = -2.37630166566501626084e-05,
8917                  qq1  =  3.97917223959155352819e-01,
8918                  qq2  =  6.50222499887672944485e-02,
8919                  qq3  =  5.08130628187576562776e-03,
8920                  qq4  =  1.32494738004321644526e-04,
8921                  qq5  = -3.96022827877536812320e-06;
8922
8923     double r, s, z, y;
8924     UINT32 ix;
8925     int sign;
8926
8927     ix = *(ULONGLONG*)&x >> 32;
8928     sign = ix >> 31;
8929     ix &= 0x7fffffff;
8930     if (ix >= 0x7ff00000) {
8931         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
8932         return 2 * sign + 1 / x;
8933     }
8934     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
8935         if (ix < 0x3c700000) /* |x| < 2**-56 */
8936             return 1.0 - x;
8937         z = x * x;
8938         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
8939         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
8940         y = r / s;
8941         if (sign || ix < 0x3fd00000) { /* x < 1/4 */
8942             return 1.0 - (x + x * y);
8943         }
8944         return 0.5 - (x - 0.5 + x * y);
8945     }
8946     if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
8947         return sign ? 2 - erfc2(ix, x) : erfc2(ix, x);
8948     }
8949     if (sign)
8950         return 2 - DBL_MIN;
8951     *_errno() = ERANGE;
8952     return fp_barrier(DBL_MIN) * DBL_MIN;
8953 }
8954
8955 /*********************************************************************
8956  *      erfcf (MSVCR120.@)
8957  *
8958  * Copied from musl: src/math/erff.c
8959  */
8960 float CDECL erfcf(float x)
8961 {
8962     static const float pp0  =  1.2837916613e-01,
8963                  pp1  = -3.2504209876e-01,
8964                  pp2  = -2.8481749818e-02,
8965                  pp3  = -5.7702702470e-03,
8966                  pp4  = -2.3763017452e-05,
8967                  qq1  =  3.9791721106e-01,
8968                  qq2  =  6.5022252500e-02,
8969                  qq3  =  5.0813062117e-03,
8970                  qq4  =  1.3249473704e-04,
8971                  qq5  = -3.9602282413e-06;
8972
8973     float r, s, z, y;
8974     UINT32 ix;
8975     int sign;
8976
8977     ix = *(UINT32*)&x;
8978     sign = ix >> 31;
8979     ix &= 0x7fffffff;
8980     if (ix >= 0x7f800000) {
8981         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
8982         return 2 * sign + 1 / x;
8983     }
8984
8985     if (ix < 0x3f580000) { /* |x| < 0.84375 */
8986         if (ix < 0x23800000) /* |x| < 2**-56 */
8987             return 1.0f - x;
8988         z = x * x;
8989         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
8990         s = 1.0f + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
8991         y = r / s;
8992         if (sign || ix < 0x3e800000) /* x < 1/4 */
8993             return 1.0f - (x + x * y);
8994         return 0.5f - (x - 0.5f + x * y);
8995     }
8996     if (ix < 0x41e00000) { /* |x| < 28 */
8997         return sign ? 2 - erfc2f(ix, x) : erfc2f(ix, x);
8998     }
8999     if (sign)
9000         return 2 - FLT_MIN;
9001     *_errno() = ERANGE;
9002     return FLT_MIN * FLT_MIN;
9003 }
9004
9005 /*********************************************************************
9006  *      fmaxf (MSVCR120.@)
9007  */
9008 float CDECL fmaxf(float x, float y)
9009 {
9010     if(isnan(x))
9011         return y;
9012     if(isnan(y))
9013         return x;
9014     if(x==0 && y==0)
9015         return signbit(x) ? y : x;
9016     return x<y ? y : x;
9017 }
9018
9019 /*********************************************************************
9020  *      fmax (MSVCR120.@)
9021  */
9022 double CDECL fmax(double x, double y)
9023 {
9024     if(isnan(x))
9025         return y;
9026     if(isnan(y))
9027         return x;
9028     if(x==0 && y==0)
9029         return signbit(x) ? y : x;
9030     return x<y ? y : x;
9031 }
9032
9033 /*********************************************************************
9034  *      fdimf (MSVCR120.@)
9035  */
9036 float CDECL fdimf(float x, float y)
9037 {
9038     if(isnan(x))
9039         return x;
9040     if(isnan(y))
9041         return y;
9042     return x>y ? x-y : 0;
9043 }
9044
9045 /*********************************************************************
9046  *      fdim (MSVCR120.@)
9047  */
9048 double CDECL fdim(double x, double y)
9049 {
9050     if(isnan(x))
9051         return x;
9052     if(isnan(y))
9053         return y;
9054     return x>y ? x-y : 0;
9055 }
9056
9057 /*********************************************************************
9058  *      _fdsign (MSVCR120.@)
9059  */
9060 int CDECL _fdsign(float x)
9061 {
9062     union { float f; UINT32 i; } u = { x };
9063     return (u.i >> 16) & 0x8000;
9064 }
9065
9066 /*********************************************************************
9067  *      _dsign (MSVCR120.@)
9068  */
9069 int CDECL _dsign(double x)
9070 {
9071     union { double f; UINT64 i; } u = { x };
9072     return (u.i >> 48) & 0x8000;
9073 }
9074
9075
9076 /*********************************************************************
9077  *      _dpcomp (MSVCR120.@)
9078  */
9079 int CDECL _dpcomp(double x, double y)
9080 {
9081     if(isnan(x) || isnan(y))
9082         return 0;
9083
9084     if(x == y) return 2;
9085     return x < y ? 1 : 4;
9086 }
9087
9088 /*********************************************************************
9089  *      _fdpcomp (MSVCR120.@)
9090  */
9091 int CDECL _fdpcomp(float x, float y)
9092 {
9093     return _dpcomp(x, y);
9094 }
9095
9096 /*********************************************************************
9097  *      fminf (MSVCR120.@)
9098  */
9099 float CDECL fminf(float x, float y)
9100 {
9101     if(isnan(x))
9102         return y;
9103     if(isnan(y))
9104         return x;
9105     if(x==0 && y==0)
9106         return signbit(x) ? x : y;
9107     return x<y ? x : y;
9108 }
9109
9110 /*********************************************************************
9111  *      fmin (MSVCR120.@)
9112  */
9113 double CDECL fmin(double x, double y)
9114 {
9115     if(isnan(x))
9116         return y;
9117     if(isnan(y))
9118         return x;
9119     if(x==0 && y==0)
9120         return signbit(x) ? x : y;
9121     return x<y ? x : y;
9122 }
9123
9124 /*********************************************************************
9125  *      asinh (MSVCR120.@)
9126  *
9127  * Copied from musl: src/math/asinh.c
9128  */
9129 double CDECL asinh(double x)
9130 {
9131     UINT64 ux = *(UINT64*)&x;
9132     int e = ux >> 52 & 0x7ff;
9133     int s = ux >> 63;
9134
9135     /* |x| */
9136     ux &= (UINT64)-1 / 2;
9137     x = *(double*)&ux;
9138
9139     if (e >= 0x3ff + 26) /* |x| >= 0x1p26 or inf or nan */
9140         x = log(x) + 0.693147180559945309417232121458176568;
9141     else if (e >= 0x3ff + 1) /* |x| >= 2 */
9142         x = log(2 * x + 1 / (sqrt(x * x + 1) + x));
9143     else if (e >= 0x3ff - 26) /* |x| >= 0x1p-26 */
9144         x = log1p(x + x * x / (sqrt(x * x + 1) + 1));
9145     else /* |x| < 0x1p-26, raise inexact if x != 0 */
9146         fp_barrier(x + 0x1p120f);
9147     return s ? -x : x;
9148 }
9149
9150 /*********************************************************************
9151  *      asinhf (MSVCR120.@)
9152  *
9153  * Copied from musl: src/math/asinhf.c
9154  */
9155 float CDECL asinhf(float x)
9156 {
9157     UINT32 ux = *(UINT32*)&x;
9158     UINT32 i = ux & 0x7fffffff;
9159     int s = ux >> 31;
9160
9161     /* |x| */
9162     x = *(float*)&i;
9163
9164     if (i >= 0x3f800000 + (12 << 23))/* |x| >= 0x1p12 or inf or nan */
9165         x = logf(x) + 0.693147180559945309417232121458176568f;
9166     else if (i >= 0x3f800000 + (1 << 23)) /* |x| >= 2 */
9167         x = logf(2 * x + 1 / (sqrtf(x * x + 1) + x));
9168     else if (i >= 0x3f800000 - (12 << 23)) /* |x| >= 0x1p-12 */
9169         x = log1pf(x + x * x / (sqrtf(x * x + 1) + 1));
9170     else /* |x| < 0x1p-12, raise inexact if x!=0 */
9171         fp_barrierf(x + 0x1p120f);
9172     return s ? -x : x;
9173 }
9174
9175 /*********************************************************************
9176  *      acosh (MSVCR120.@)
9177  *
9178  * Copied from musl: src/math/acosh.c
9179  */
9180 double CDECL acosh(double x)
9181 {
9182     int e = *(UINT64*)&x >> 52 & 0x7ff;
9183
9184     if (x < 1)
9185     {
9186         *_errno() = EDOM;
9187         feraiseexcept(FE_INVALID);
9188         return NAN;
9189     }
9190
9191     if (e < 0x3ff + 1) /* |x| < 2, up to 2ulp error in [1,1.125] */
9192         return log1p(x - 1 + sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
9193     if (e < 0x3ff + 26) /* |x| < 0x1p26 */
9194         return log(2 * x - 1 / (x + sqrt(x * x - 1)));
9195     /* |x| >= 0x1p26 or nan */
9196     return log(x) + 0.693147180559945309417232121458176568;
9197 }
9198
9199 /*********************************************************************
9200  *      acoshf (MSVCR120.@)
9201  *
9202  * Copied from musl: src/math/acoshf.c
9203  */
9204 float CDECL acoshf(float x)
9205 {
9206     UINT32 a = *(UINT32*)&x & 0x7fffffff;
9207
9208     if (x < 1)
9209     {
9210         *_errno() = EDOM;
9211         feraiseexcept(FE_INVALID);
9212         return NAN;
9213     }
9214
9215     if (a < 0x3f800000 + (1 << 23)) /* |x| < 2, up to 2ulp error in [1,1.125] */
9216         return log1pf(x - 1 + sqrtf((x - 1) * (x - 1) + 2 * (x - 1)));
9217     if (*(UINT32*)&x < 0x3f800000 + (12 << 23)) /* 2 <= x < 0x1p12 */
9218         return logf(2 * x - 1 / (x + sqrtf(x * x - 1)));
9219     /* x >= 0x1p12 or x <= -2 or nan */
9220     return logf(x) + 0.693147180559945309417232121458176568f;
9221 }
9222
9223 /*********************************************************************
9224  *      atanh (MSVCR120.@)
9225  *
9226  * Copied from musl: src/math/atanh.c
9227  */
9228 double CDECL atanh(double x)
9229 {
9230     UINT64 ux = *(UINT64*)&x;
9231     int e = ux >> 52 & 0x7ff;
9232     int s = ux >> 63;
9233
9234     /* |x| */
9235     ux &= (UINT64)-1 / 2;
9236     x = *(double*)&ux;
9237
9238     if (x > 1) {
9239         *_errno() = EDOM;
9240         feraiseexcept(FE_INVALID);
9241         return NAN;
9242     }
9243
9244     if (e < 0x3ff - 1) {
9245         if (e < 0x3ff - 32) {
9246             fp_barrier(x + 0x1p120f);
9247             if (e == 0) /* handle underflow */
9248                 fp_barrier(x * x);
9249         } else { /* |x| < 0.5, up to 1.7ulp error */
9250             x = 0.5 * log1p(2 * x + 2 * x * x / (1 - x));
9251         }
9252     } else { /* avoid overflow */
9253         x = 0.5 * log1p(2 * (x / (1 - x)));
9254         if (isinf(x)) *_errno() = ERANGE;
9255     }
9256     return s ? -x : x;
9257 }
9258
9259 /*********************************************************************
9260  *      atanhf (MSVCR120.@)
9261  *
9262  * Copied from musl: src/math/atanhf.c
9263  */
9264 float CDECL atanhf(float x)
9265 {
9266     UINT32 ux = *(UINT32*)&x;
9267     int s = ux >> 31;
9268
9269     /* |x| */
9270     ux &= 0x7fffffff;
9271     x = *(float*)&ux;
9272
9273     if (x > 1) {
9274         *_errno() = EDOM;
9275         feraiseexcept(FE_INVALID);
9276         return NAN;
9277     }
9278
9279     if (ux < 0x3f800000 - (1 << 23)) {
9280         if (ux < 0x3f800000 - (32 << 23)) {
9281             fp_barrierf(x + 0x1p120f);
9282             if (ux < (1 << 23)) /* handle underflow */
9283                 fp_barrierf(x * x);
9284         } else { /* |x| < 0.5, up to 1.7ulp error */
9285             x = 0.5f * log1pf(2 * x + 2 * x * x / (1 - x));
9286         }
9287     } else { /* avoid overflow */
9288         x = 0.5f * log1pf(2 * (x / (1 - x)));
9289         if (isinf(x)) *_errno() = ERANGE;
9290     }
9291     return s ? -x : x;
9292 }
9293
9294 #endif /* _MSVCR_VER>=120 */
9295
9296 /*********************************************************************
9297  *      _scalb  (MSVCRT.@)
9298  *      scalbn  (MSVCR120.@)
9299  *      scalbln (MSVCR120.@)
9300  */
9301 double CDECL _scalb(double num, __msvcrt_long power)
9302 {
9303   return ldexp(num, power);
9304 }
9305
9306 /*********************************************************************
9307  *      _scalbf  (MSVCRT.@)
9308  *      scalbnf  (MSVCR120.@)
9309  *      scalblnf (MSVCR120.@)
9310  */
9311 float CDECL _scalbf(float num, __msvcrt_long power)
9312 {
9313   return ldexp(num, power);
9314 }
9315
9316 #if _MSVCR_VER>=120
9317
9318 /*********************************************************************
9319  *      remainder (MSVCR120.@)
9320  *
9321  * Copied from musl: src/math/remainder.c
9322  */
9323 double CDECL remainder(double x, double y)
9324 {
9325     int q;
9326 #if _MSVCR_VER == 120 && defined(__x86_64__)
9327     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9328 #endif
9329     return remquo(x, y, &q);
9330 }
9331
9332 /*********************************************************************
9333  *      remainderf (MSVCR120.@)
9334  *
9335  * Copied from musl: src/math/remainderf.c
9336  */
9337 float CDECL remainderf(float x, float y)
9338 {
9339     int q;
9340 #if _MSVCR_VER == 120 && defined(__x86_64__)
9341     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9342 #endif
9343     return remquof(x, y, &q);
9344 }
9345
9346 /*********************************************************************
9347  *      remquo (MSVCR120.@)
9348  *
9349  * Copied from musl: src/math/remquo.c
9350  */
9351 double CDECL remquo(double x, double y, int *quo)
9352 {
9353     UINT64 uxi = *(UINT64*)&x;
9354     UINT64 uyi = *(UINT64*)&y;
9355     int ex = uxi >> 52 & 0x7ff;
9356     int ey = uyi >> 52 & 0x7ff;
9357     int sx = uxi >> 63;
9358     int sy = uyi >> 63;
9359     UINT32 q;
9360     UINT64 i;
9361
9362     *quo = 0;
9363     if (y == 0 || isinf(x)) *_errno() = EDOM;
9364     if (uyi << 1 == 0 || isnan(y) || ex == 0x7ff)
9365         return (x * y) / (x * y);
9366     if (uxi << 1 == 0)
9367         return x;
9368
9369     /* normalize x and y */
9370     if (!ex) {
9371         for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1);
9372         uxi <<= -ex + 1;
9373     } else {
9374         uxi &= -1ULL >> 12;
9375         uxi |= 1ULL << 52;
9376     }
9377     if (!ey) {
9378         for (i = uyi << 12; i >> 63 == 0; ey--, i <<= 1);
9379         uyi <<= -ey + 1;
9380     } else {
9381         uyi &= -1ULL >> 12;
9382         uyi |= 1ULL << 52;
9383     }
9384
9385     q = 0;
9386     if (ex < ey) {
9387         if (ex+1 == ey)
9388             goto end;
9389         return x;
9390     }
9391
9392     /* x mod y */
9393     for (; ex > ey; ex--) {
9394         i = uxi - uyi;
9395         if (i >> 63 == 0) {
9396             uxi = i;
9397             q++;
9398         }
9399         uxi <<= 1;
9400         q <<= 1;
9401     }
9402     i = uxi - uyi;
9403     if (i >> 63 == 0) {
9404         uxi = i;
9405         q++;
9406     }
9407     if (uxi == 0)
9408         ex = -60;
9409     else
9410         for (; uxi >> 52 == 0; uxi <<= 1, ex--);
9411 end:
9412     /* scale result and decide between |x| and |x|-|y| */
9413     if (ex > 0) {
9414         uxi -= 1ULL << 52;
9415         uxi |= (UINT64)ex << 52;
9416     } else {
9417         uxi >>= -ex + 1;
9418     }
9419     x = *(double*)&uxi;
9420     if (sy)
9421         y = -y;
9422     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9423         x -= y;
9424         q++;
9425     }
9426     q &= 0x7fffffff;
9427     *quo = sx ^ sy ? -(int)q : (int)q;
9428     return sx ? -x : x;
9429 }
9430
9431 /*********************************************************************
9432  *      remquof (MSVCR120.@)
9433  *
9434  * Copied from musl: src/math/remquof.c
9435  */
9436 float CDECL remquof(float x, float y, int *quo)
9437 {
9438     UINT32 uxi = *(UINT32*)&x;
9439     UINT32 uyi = *(UINT32*)&y;
9440     int ex = uxi >> 23 & 0xff;
9441     int ey = uyi >> 23 & 0xff;
9442     int sx = uxi >> 31;
9443     int sy = uyi>> 31;
9444     UINT32 q, i;
9445
9446     *quo = 0;
9447     if (y == 0 || isinf(x)) *_errno() = EDOM;
9448     if (uyi << 1 == 0 || isnan(y) || ex == 0xff)
9449         return (x * y) / (x * y);
9450     if (uxi << 1 == 0)
9451         return x;
9452
9453     /* normalize x and y */
9454     if (!ex) {
9455         for (i = uxi << 9; i >> 31 == 0; ex--, i <<= 1);
9456         uxi <<= -ex + 1;
9457     } else {
9458         uxi &= -1U >> 9;
9459         uxi |= 1U << 23;
9460     }
9461     if (!ey) {
9462         for (i = uyi << 9; i >> 31 == 0; ey--, i <<= 1);
9463         uyi <<= -ey + 1;
9464     } else {
9465         uyi &= -1U >> 9;
9466         uyi |= 1U << 23;
9467     }
9468
9469     q = 0;
9470     if (ex < ey) {
9471         if (ex + 1 == ey)
9472             goto end;
9473         return x;
9474     }
9475
9476     /* x mod y */
9477     for (; ex > ey; ex--) {
9478         i = uxi - uyi;
9479         if (i >> 31 == 0) {
9480             uxi = i;
9481             q++;
9482         }
9483         uxi <<= 1;
9484         q <<= 1;
9485     }
9486     i = uxi - uyi;
9487     if (i >> 31 == 0) {
9488         uxi = i;
9489         q++;
9490     }
9491     if (uxi == 0)
9492         ex = -30;
9493     else
9494         for (; uxi >> 23 == 0; uxi <<= 1, ex--);
9495 end:
9496     /* scale result and decide between |x| and |x|-|y| */
9497     if (ex > 0) {
9498         uxi -= 1U << 23;
9499         uxi |= (UINT32)ex << 23;
9500     } else {
9501         uxi >>= -ex + 1;
9502     }
9503     x = *(float*)&uxi;
9504     if (sy)
9505         y = -y;
9506     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9507         x -= y;
9508         q++;
9509     }
9510     q &= 0x7fffffff;
9511     *quo = sx ^ sy ? -(int)q : (int)q;
9512     return sx ? -x : x;
9513 }
9514
9515 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9516 static double sin_pi(double x)
9517 {
9518     int n;
9519
9520     /* spurious inexact if odd int */
9521     x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */
9522
9523     n = x * 4.0;
9524     n = (n + 1) / 2;
9525     x -= n * 0.5f;
9526     x *= M_PI;
9527
9528     switch (n) {
9529     default: /* case 4: */
9530     case 0: return __sin(x, 0.0, 0);
9531     case 1: return __cos(x, 0.0);
9532     case 2: return __sin(-x, 0.0, 0);
9533     case 3: return -__cos(x, 0.0);
9534     }
9535 }
9536
9537 /*********************************************************************
9538  *      lgamma (MSVCR120.@)
9539  *
9540  * Copied from musl: src/math/lgamma_r.c
9541  */
9542 double CDECL lgamma(double x)
9543 {
9544     static const double pi = 3.14159265358979311600e+00,
9545         a0 = 7.72156649015328655494e-02,
9546         a1 = 3.22467033424113591611e-01,
9547         a2 = 6.73523010531292681824e-02,
9548         a3 = 2.05808084325167332806e-02,
9549         a4 = 7.38555086081402883957e-03,
9550         a5 = 2.89051383673415629091e-03,
9551         a6 = 1.19270763183362067845e-03,
9552         a7 = 5.10069792153511336608e-04,
9553         a8 = 2.20862790713908385557e-04,
9554         a9 = 1.08011567247583939954e-04,
9555         a10 = 2.52144565451257326939e-05,
9556         a11 = 4.48640949618915160150e-05,
9557         tc = 1.46163214496836224576e+00,
9558         tf = -1.21486290535849611461e-01,
9559         tt = -3.63867699703950536541e-18,
9560         t0 = 4.83836122723810047042e-01,
9561         t1 = -1.47587722994593911752e-01,
9562         t2 = 6.46249402391333854778e-02,
9563         t3 = -3.27885410759859649565e-02,
9564         t4 = 1.79706750811820387126e-02,
9565         t5 = -1.03142241298341437450e-02,
9566         t6 = 6.10053870246291332635e-03,
9567         t7 = -3.68452016781138256760e-03,
9568         t8 = 2.25964780900612472250e-03,
9569         t9 = -1.40346469989232843813e-03,
9570         t10 = 8.81081882437654011382e-04,
9571         t11 = -5.38595305356740546715e-04,
9572         t12 = 3.15632070903625950361e-04,
9573         t13 = -3.12754168375120860518e-04,
9574         t14 = 3.35529192635519073543e-04,
9575         u0 = -7.72156649015328655494e-02,
9576         u1 = 6.32827064025093366517e-01,
9577         u2 = 1.45492250137234768737e+00,
9578         u3 = 9.77717527963372745603e-01,
9579         u4 = 2.28963728064692451092e-01,
9580         u5 = 1.33810918536787660377e-02,
9581         v1 = 2.45597793713041134822e+00,
9582         v2 = 2.12848976379893395361e+00,
9583         v3 = 7.69285150456672783825e-01,
9584         v4 = 1.04222645593369134254e-01,
9585         v5 = 3.21709242282423911810e-03,
9586         s0 = -7.72156649015328655494e-02,
9587         s1 = 2.14982415960608852501e-01,
9588         s2 = 3.25778796408930981787e-01,
9589         s3 = 1.46350472652464452805e-01,
9590         s4 = 2.66422703033638609560e-02,
9591         s5 = 1.84028451407337715652e-03,
9592         s6 = 3.19475326584100867617e-05,
9593         r1 = 1.39200533467621045958e+00,
9594         r2 = 7.21935547567138069525e-01,
9595         r3 = 1.71933865632803078993e-01,
9596         r4 = 1.86459191715652901344e-02,
9597         r5 = 7.77942496381893596434e-04,
9598         r6 = 7.32668430744625636189e-06,
9599         w0 = 4.18938533204672725052e-01,
9600         w1 = 8.33333333333329678849e-02,
9601         w2 = -2.77777777728775536470e-03,
9602         w3 = 7.93650558643019558500e-04,
9603         w4 = -5.95187557450339963135e-04,
9604         w5 = 8.36339918996282139126e-04,
9605         w6 = -1.63092934096575273989e-03;
9606
9607     union {double f; UINT64 i;} u = {x};
9608     double t, y, z, nadj, p, p1, p2, p3, q, r, w;
9609     UINT32 ix;
9610     int sign,i;
9611
9612     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9613     sign = u.i >> 63;
9614     ix = u.i >> 32 & 0x7fffffff;
9615     if (ix >= 0x7ff00000)
9616         return x * x;
9617     if (ix < (0x3ff - 70) << 20) { /* |x|<2**-70, return -log(|x|) */
9618         if(sign)
9619             x = -x;
9620         return -log(x);
9621     }
9622     if (sign) {
9623         x = -x;
9624         t = sin_pi(x);
9625         if (t == 0.0) { /* -integer */
9626             *_errno() = ERANGE;
9627             return 1.0 / (x - x);
9628         }
9629         if (t <= 0.0)
9630             t = -t;
9631         nadj = log(pi / (t * x));
9632     }
9633
9634     /* purge off 1 and 2 */
9635     if ((ix == 0x3ff00000 || ix == 0x40000000) && (UINT32)u.i == 0)
9636         r = 0;
9637     /* for x < 2.0 */
9638     else if (ix < 0x40000000) {
9639         if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
9640             r = -log(x);
9641             if (ix >= 0x3FE76944) {
9642                 y = 1.0 - x;
9643                 i = 0;
9644             } else if (ix >= 0x3FCDA661) {
9645                 y = x - (tc - 1.0);
9646                 i = 1;
9647             } else {
9648                 y = x;
9649                 i = 2;
9650             }
9651         } else {
9652             r = 0.0;
9653             if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
9654                 y = 2.0 - x;
9655                 i = 0;
9656             } else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
9657                 y = x - tc;
9658                 i = 1;
9659             } else {
9660                 y = x - 1.0;
9661                 i = 2;
9662             }
9663         }
9664         switch (i) {
9665         case 0:
9666             z = y * y;
9667             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
9668             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
9669             p = y * p1 + p2;
9670             r += (p - 0.5 * y);
9671             break;
9672         case 1:
9673             z = y * y;
9674             w = z * y;
9675             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
9676             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
9677             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
9678             p = z * p1 - (tt - w * (p2 + y * p3));
9679             r += tf + p;
9680             break;
9681         case 2:
9682             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
9683             p2 = 1.0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
9684             r += -0.5 * y + p1 / p2;
9685         }
9686     } else if (ix < 0x40200000) { /* x < 8.0 */
9687         i = (int)x;
9688         y = x - (double)i;
9689         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
9690         q = 1.0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
9691         r = 0.5 * y + p / q;
9692         z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
9693         switch (i) {
9694         case 7: z *= y + 6.0; /* fall through */
9695         case 6: z *= y + 5.0; /* fall through */
9696         case 5: z *= y + 4.0; /* fall through */
9697         case 4: z *= y + 3.0; /* fall through */
9698         case 3:
9699             z *= y + 2.0;
9700             r += log(z);
9701             break;
9702         }
9703     } else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
9704         t = log(x);
9705         z = 1.0 / x;
9706         y = z * z;
9707         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
9708         r = (x - 0.5) * (t - 1.0) + w;
9709     } else /* 2**58 <= x <= inf */
9710         r = x * (log(x) - 1.0);
9711     if (sign)
9712         r = nadj - r;
9713     return r;
9714 }
9715
9716 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9717 static float sinf_pi(float x)
9718 {
9719     double y;
9720     int n;
9721
9722     /* spurious inexact if odd int */
9723     x = 2 * (x * 0.5f - floorf(x * 0.5f)); /* x mod 2.0 */
9724
9725     n = (int)(x * 4);
9726     n = (n + 1) / 2;
9727     y = x - n * 0.5f;
9728     y *= M_PI;
9729     switch (n) {
9730     default: /* case 4: */
9731     case 0: return __sindf(y);
9732     case 1: return __cosdf(y);
9733     case 2: return __sindf(-y);
9734     case 3: return -__cosdf(y);
9735     }
9736 }
9737
9738 /*********************************************************************
9739  *      lgammaf (MSVCR120.@)
9740  *
9741  * Copied from musl: src/math/lgammaf_r.c
9742  */
9743 float CDECL lgammaf(float x)
9744 {
9745     static const float pi = 3.1415927410e+00,
9746         a0 = 7.7215664089e-02,
9747         a1 = 3.2246702909e-01,
9748         a2 = 6.7352302372e-02,
9749         a3 = 2.0580807701e-02,
9750         a4 = 7.3855509982e-03,
9751         a5 = 2.8905137442e-03,
9752         a6 = 1.1927076848e-03,
9753         a7 = 5.1006977446e-04,
9754         a8 = 2.2086278477e-04,
9755         a9 = 1.0801156895e-04,
9756         a10 = 2.5214456400e-05,
9757         a11 = 4.4864096708e-05,
9758         tc = 1.4616321325e+00,
9759         tf = -1.2148628384e-01,
9760         tt = 6.6971006518e-09,
9761         t0 = 4.8383611441e-01,
9762         t1 = -1.4758771658e-01,
9763         t2 = 6.4624942839e-02,
9764         t3 = -3.2788541168e-02,
9765         t4 = 1.7970675603e-02,
9766         t5 = -1.0314224288e-02,
9767         t6 = 6.1005386524e-03,
9768         t7 = -3.6845202558e-03,
9769         t8 = 2.2596477065e-03,
9770         t9 = -1.4034647029e-03,
9771         t10 = 8.8108185446e-04,
9772         t11 = -5.3859531181e-04,
9773         t12 = 3.1563205994e-04,
9774         t13 = -3.1275415677e-04,
9775         t14 = 3.3552918467e-04,
9776         u0 = -7.7215664089e-02,
9777         u1 = 6.3282704353e-01,
9778         u2 = 1.4549225569e+00,
9779         u3 = 9.7771751881e-01,
9780         u4 = 2.2896373272e-01,
9781         u5 = 1.3381091878e-02,
9782         v1 = 2.4559779167e+00,
9783         v2 = 2.1284897327e+00,
9784         v3 = 7.6928514242e-01,
9785         v4 = 1.0422264785e-01,
9786         v5 = 3.2170924824e-03,
9787         s0 = -7.7215664089e-02,
9788         s1 = 2.1498242021e-01,
9789         s2 = 3.2577878237e-01,
9790         s3 = 1.4635047317e-01,
9791         s4 = 2.6642270386e-02,
9792         s5 = 1.8402845599e-03,
9793         s6 = 3.1947532989e-05,
9794         r1 = 1.3920053244e+00,
9795         r2 = 7.2193557024e-01,
9796         r3 = 1.7193385959e-01,
9797         r4 = 1.8645919859e-02,
9798         r5 = 7.7794247773e-04,
9799         r6 = 7.3266842264e-06,
9800         w0 = 4.1893854737e-01,
9801         w1 = 8.3333335817e-02,
9802         w2 = -2.7777778450e-03,
9803         w3 = 7.9365057172e-04,
9804         w4 = -5.9518753551e-04,
9805         w5 = 8.3633989561e-04,
9806         w6 = -1.6309292987e-03;
9807
9808     union {float f; UINT32 i;} u = {x};
9809     float t, y, z, nadj, p, p1, p2, p3, q, r, w;
9810     UINT32 ix;
9811     int i, sign;
9812
9813     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9814     sign = u.i >> 31;
9815     ix = u.i & 0x7fffffff;
9816     if (ix >= 0x7f800000)
9817         return x * x;
9818     if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
9819         if (sign)
9820             x = -x;
9821         return -logf(x);
9822     }
9823     if (sign) {
9824         x = -x;
9825         t = sinf_pi(x);
9826         if (t == 0.0f) { /* -integer */
9827             *_errno() = ERANGE;
9828             return 1.0f / (x - x);
9829         }
9830         if (t <= 0.0f)
9831             t = -t;
9832         nadj = logf(pi / (t * x));
9833     }
9834
9835     /* purge off 1 and 2 */
9836     if (ix == 0x3f800000 || ix == 0x40000000)
9837         r = 0;
9838     /* for x < 2.0 */
9839     else if (ix < 0x40000000) {
9840         if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
9841             r = -logf(x);
9842             if (ix >= 0x3f3b4a20) {
9843                 y = 1.0f - x;
9844                 i = 0;
9845             } else if (ix >= 0x3e6d3308) {
9846                 y = x - (tc - 1.0f);
9847                 i = 1;
9848             } else {
9849                 y = x;
9850                 i = 2;
9851             }
9852         } else {
9853             r = 0.0f;
9854             if (ix >= 0x3fdda618) { /* [1.7316,2] */
9855                 y = 2.0f - x;
9856                 i = 0;
9857             } else if (ix >= 0x3F9da620) { /* [1.23,1.73] */
9858                 y = x - tc;
9859                 i = 1;
9860             } else {
9861                 y = x - 1.0f;
9862                 i = 2;
9863             }
9864         }
9865         switch(i) {
9866         case 0:
9867             z = y * y;
9868             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
9869             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
9870             p = y * p1 + p2;
9871             r += p - 0.5f * y;
9872             break;
9873         case 1:
9874             z = y * y;
9875             w = z * y;
9876             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
9877             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
9878             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
9879             p = z * p1 - (tt - w * (p2 + y * p3));
9880             r += (tf + p);
9881             break;
9882         case 2:
9883             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
9884             p2 = 1.0f + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
9885             r += -0.5f * y + p1 / p2;
9886         }
9887     } else if (ix < 0x41000000) { /* x < 8.0 */
9888         i = (int)x;
9889         y = x - (float)i;
9890         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
9891         q = 1.0f + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
9892         r = 0.5f * y + p / q;
9893         z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */
9894         switch (i) {
9895         case 7: z *= y + 6.0f; /* fall through */
9896         case 6: z *= y + 5.0f; /* fall through */
9897         case 5: z *= y + 4.0f; /* fall through */
9898         case 4: z *= y + 3.0f; /* fall through */
9899         case 3:
9900             z *= y + 2.0f;
9901             r += logf(z);
9902             break;
9903         }
9904     } else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */
9905         t = logf(x);
9906         z = 1.0f / x;
9907         y = z * z;
9908         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
9909         r = (x - 0.5f) * (t - 1.0f) + w;
9910     } else /* 2**58 <= x <= inf */
9911         r = x * (logf(x) - 1.0f);
9912     if (sign)
9913         r = nadj - r;
9914     return r;
9915 }
9916
9917 static double tgamma_S(double x)
9918 {
9919     static const double Snum[] = {
9920         23531376880.410759688572007674451636754734846804940,
9921         42919803642.649098768957899047001988850926355848959,
9922         35711959237.355668049440185451547166705960488635843,
9923         17921034426.037209699919755754458931112671403265390,
9924         6039542586.3520280050642916443072979210699388420708,
9925         1439720407.3117216736632230727949123939715485786772,
9926         248874557.86205415651146038641322942321632125127801,
9927         31426415.585400194380614231628318205362874684987640,
9928         2876370.6289353724412254090516208496135991145378768,
9929         186056.26539522349504029498971604569928220784236328,
9930         8071.6720023658162106380029022722506138218516325024,
9931         210.82427775157934587250973392071336271166969580291,
9932         2.5066282746310002701649081771338373386264310793408,
9933     };
9934     static const double Sden[] = {
9935         0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
9936         2637558, 357423, 32670, 1925, 66, 1,
9937     };
9938
9939     double num = 0, den = 0;
9940     int i;
9941
9942     /* to avoid overflow handle large x differently */
9943     if (x < 8)
9944         for (i = ARRAY_SIZE(Snum) - 1; i >= 0; i--) {
9945             num = num * x + Snum[i];
9946             den = den * x + Sden[i];
9947         }
9948     else
9949         for (i = 0; i < ARRAY_SIZE(Snum); i++) {
9950             num = num / x + Snum[i];
9951             den = den / x + Sden[i];
9952         }
9953     return num / den;
9954 }
9955
9956 /*********************************************************************
9957  *      tgamma (MSVCR120.@)
9958  *
9959  * Copied from musl: src/math/tgamma.c
9960  */
9961 double CDECL tgamma(double x)
9962 {
9963     static const double gmhalf = 5.524680040776729583740234375;
9964     static const double fact[] = {
9965         1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
9966         479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
9967         355687428096000.0, 6402373705728000.0, 121645100408832000.0,
9968         2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
9969     };
9970
9971     union {double f; UINT64 i;} u = {x};
9972     double absx, y, dy, z, r;
9973     UINT32 ix = u.i >> 32 & 0x7fffffff;
9974     int sign = u.i >> 63;
9975
9976     /* special cases */
9977     if (ix >= 0x7ff00000) {
9978         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
9979         if (u.i == 0xfff0000000000000ULL)
9980             *_errno() = EDOM;
9981         return x + INFINITY;
9982     }
9983     if (ix < (0x3ff - 54) << 20) {
9984         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
9985         if (x == 0.0)
9986             *_errno() = ERANGE;
9987         return 1 / x;
9988     }
9989
9990     /* integer arguments */
9991     /* raise inexact when non-integer */
9992     if (x == floor(x)) {
9993         if (sign) {
9994             *_errno() = EDOM;
9995             return 0 / (x - x);
9996         }
9997         if (x <= ARRAY_SIZE(fact))
9998             return fact[(int)x - 1];
9999     }
10000
10001     /* x >= 172: tgamma(x)=inf with overflow */
10002     /* x =< -184: tgamma(x)=+-0 with underflow */
10003     if (ix >= 0x40670000) { /* |x| >= 184 */
10004         *_errno() = ERANGE;
10005         if (sign) {
10006             fp_barrierf(0x1p-126 / x);
10007             return 0;
10008         }
10009         x *= 0x1p1023;
10010         return x;
10011     }
10012
10013     absx = sign ? -x : x;
10014
10015     /* handle the error of x + g - 0.5 */
10016     y = absx + gmhalf;
10017     if (absx > gmhalf) {
10018         dy = y - absx;
10019         dy -= gmhalf;
10020     } else {
10021         dy = y - gmhalf;
10022         dy -= absx;
10023     }
10024
10025     z = absx - 0.5;
10026     r = tgamma_S(absx) * exp(-y);
10027     if (x < 0) {
10028         /* reflection formula for negative x */
10029         /* sinpi(absx) is not 0, integers are already handled */
10030         r = -M_PI / (sin_pi(absx) * absx * r);
10031         dy = -dy;
10032         z = -z;
10033     }
10034     r += dy * (gmhalf + 0.5) * r / y;
10035     z = pow(y, 0.5 * z);
10036     y = r * z * z;
10037     return y;
10038 }
10039
10040 /*********************************************************************
10041  *      tgammaf (MSVCR120.@)
10042  *
10043  * Copied from musl: src/math/tgammaf.c
10044  */
10045 float CDECL tgammaf(float x)
10046 {
10047     return tgamma(x);
10048 }
10049
10050 /*********************************************************************
10051  *      nan (MSVCR120.@)
10052  */
10053 double CDECL nan(const char *tagp)
10054 {
10055     /* Windows ignores input (MSDN) */
10056     return NAN;
10057 }
10058
10059 /*********************************************************************
10060  *      nanf (MSVCR120.@)
10061  */
10062 float CDECL nanf(const char *tagp)
10063 {
10064     return NAN;
10065 }
10066
10067 /*********************************************************************
10068  *      _except1 (MSVCR120.@)
10069  *  TODO:
10070  *   - find meaning of ignored cw and operation bits
10071  *   - unk parameter
10072  */
10073 double CDECL _except1(DWORD fpe, _FP_OPERATION_CODE op, double arg, double res, DWORD cw, void *unk)
10074 {
10075     ULONG_PTR exception_arg;
10076     DWORD exception = 0;
10077     unsigned int fpword = 0;
10078     WORD operation;
10079     int raise = 0;
10080
10081     TRACE("(%lx %x %lf %lf %lx %p)\n", fpe, op, arg, res, cw, unk);
10082
10083 #ifdef _WIN64
10084     cw = ((cw >> 7) & 0x3f) | ((cw >> 3) & 0xc00);
10085 #endif
10086     operation = op << 5;
10087     exception_arg = (ULONG_PTR)&operation;
10088
10089     if (fpe & 0x1) { /* overflow */
10090         if ((fpe == 0x1 && (cw & 0x8)) || (fpe==0x11 && (cw & 0x28))) {
10091             /* 32-bit version also sets SW_INEXACT here */
10092             raise |= FE_OVERFLOW;
10093             if (fpe & 0x10) raise |= FE_INEXACT;
10094             res = signbit(res) ? -INFINITY : INFINITY;
10095         } else {
10096             exception = EXCEPTION_FLT_OVERFLOW;
10097         }
10098     } else if (fpe & 0x2) { /* underflow */
10099         if ((fpe == 0x2 && (cw & 0x10)) || (fpe==0x12 && (cw & 0x30))) {
10100             raise |= FE_UNDERFLOW;
10101             if (fpe & 0x10) raise |= FE_INEXACT;
10102             res = signbit(res) ? -0.0 : 0.0;
10103         } else {
10104             exception = EXCEPTION_FLT_UNDERFLOW;
10105         }
10106     } else if (fpe & 0x4) { /* zerodivide */
10107         if ((fpe == 0x4 && (cw & 0x4)) || (fpe==0x14 && (cw & 0x24))) {
10108             raise |= FE_DIVBYZERO;
10109             if (fpe & 0x10) raise |= FE_INEXACT;
10110         } else {
10111             exception = EXCEPTION_FLT_DIVIDE_BY_ZERO;
10112         }
10113     } else if (fpe & 0x8) { /* invalid */
10114         if (fpe == 0x8 && (cw & 0x1)) {
10115             raise |= FE_INVALID;
10116         } else {
10117             exception = EXCEPTION_FLT_INVALID_OPERATION;
10118         }
10119     } else if (fpe & 0x10) { /* inexact */
10120         if (fpe == 0x10 && (cw & 0x20)) {
10121             raise |= FE_INEXACT;
10122         } else {
10123             exception = EXCEPTION_FLT_INEXACT_RESULT;
10124         }
10125     }
10126
10127     if (exception)
10128         raise = 0;
10129     feraiseexcept(raise);
10130     if (exception)
10131         RaiseException(exception, 0, 1, &exception_arg);
10132
10133     if (cw & 0x1) fpword |= _EM_INVALID;
10134     if (cw & 0x2) fpword |= _EM_DENORMAL;
10135     if (cw & 0x4) fpword |= _EM_ZERODIVIDE;
10136     if (cw & 0x8) fpword |= _EM_OVERFLOW;
10137     if (cw & 0x10) fpword |= _EM_UNDERFLOW;
10138     if (cw & 0x20) fpword |= _EM_INEXACT;
10139     switch (cw & 0xc00)
10140     {
10141         case 0xc00: fpword |= _RC_UP|_RC_DOWN; break;
10142         case 0x800: fpword |= _RC_UP; break;
10143         case 0x400: fpword |= _RC_DOWN; break;
10144     }
10145     switch (cw & 0x300)
10146     {
10147         case 0x0:   fpword |= _PC_24; break;
10148         case 0x200: fpword |= _PC_53; break;
10149         case 0x300: fpword |= _PC_64; break;
10150     }
10151     if (cw & 0x1000) fpword |= _IC_AFFINE;
10152     _setfp(&fpword, _MCW_EM | _MCW_RC | _MCW_PC | _MCW_IC, NULL, 0);
10153
10154     return res;
10155 }
10156
10157 _Dcomplex* CDECL _Cbuild(_Dcomplex *ret, double r, double i)
10158 {
10159     ret->_Val[0] = r;
10160     ret->_Val[1] = i;
10161     return ret;
10162 }
10163
10164 double CDECL MSVCR120_creal(_Dcomplex z)
10165 {
10166     return z._Val[0];
10167 }
10168
10169 /*********************************************************************
10170  *      ilogb (MSVCR120.@)
10171  */
10172 int CDECL ilogb(double x)
10173 {
10174     return __ilogb(x);
10175 }
10176
10177 /*********************************************************************
10178  *      ilogbf (MSVCR120.@)
10179  */
10180 int CDECL ilogbf(float x)
10181 {
10182     return __ilogbf(x);
10183 }
10184 #endif /* _MSVCR_VER>=120 */