dlls/msvcrt/math.c

   1 /*
   2  * msvcrt.dll math functions
   3  *
   4  * Copyright 2000 Jon Griffiths
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19  *
  20  *
  21  * For functions copied from musl libc (http://musl.libc.org/):
  22  * ====================================================
  23  * Copyright 2005-2020 Rich Felker, et al.
  24  *
  25  * Permission is hereby granted, free of charge, to any person obtaining
  26  * a copy of this software and associated documentation files (the
  27  * "Software"), to deal in the Software without restriction, including
  28  * without limitation the rights to use, copy, modify, merge, publish,
  29  * distribute, sublicense, and/or sell copies of the Software, and to
  30  * permit persons to whom the Software is furnished to do so, subject to
  31  * the following conditions:
  32  *
  33  * The above copyright notice and this permission notice shall be
  34  * included in all copies or substantial portions of the Software.
  35  * ====================================================
  36  */
  37
  38 #include <assert.h>
  39 #include <complex.h>
  40 #include <stdio.h>
  41 #include <fenv.h>
  42 #include <fpieee.h>
  43 #include <limits.h>
  44 #include <locale.h>
  45 #include <math.h>
  46
  47 #include "msvcrt.h"
  48 #include "winternl.h"
  49
  50 #include "wine/asm.h"
  51 #include "wine/debug.h"
  52
  53 WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
  54
  55 #undef div
  56 #undef ldiv
  57
  58 #define _DOMAIN         1       /* domain error in argument */
  59 #define _SING           2       /* singularity */
  60 #define _OVERFLOW       3       /* range overflow */
  61 #define _UNDERFLOW      4       /* range underflow */
  62
  63 typedef int (CDECL *MSVCRT_matherr_func)(struct _exception *);
  64
  65 static MSVCRT_matherr_func MSVCRT_default_matherr_func = NULL;
  66
  67 BOOL sse2_supported;
  68 static BOOL sse2_enabled;
  69
  70 void msvcrt_init_math( void *module )
  71 {
  72     sse2_supported = IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE );
  73 #if _MSVCR_VER <=71
  74     sse2_enabled = FALSE;
  75 #else
  76     sse2_enabled = sse2_supported;
  77 #endif
  78 }
  79
  80 /* Copied from musl: src/internal/libm.h */
  81 static inline float fp_barrierf(float x)
  82 {
  83     volatile float y = x;
  84     return y;
  85 }
  86
  87 static inline double fp_barrier(double x)
  88 {
  89     volatile double y = x;
  90     return y;
  91 }
  92
  93 static inline double ret_nan( BOOL update_sw )
  94 {
  95     double x = 1.0;
  96     if (!update_sw) return -NAN;
  97     return (x - x) / (x - x);
  98 }
  99
 100 #define SET_X87_CW(MASK) \
 101     "subl $4, %esp\n\t" \
 102     __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
 103     "fnstcw (%esp)\n\t" \
 104     "movw (%esp), %ax\n\t" \
 105     "movw %ax, 2(%esp)\n\t" \
 106     "testw $" #MASK ", %ax\n\t" \
 107     "jz 1f\n\t" \
 108     "andw $~" #MASK ", %ax\n\t" \
 109     "movw %ax, 2(%esp)\n\t" \
 110     "fldcw 2(%esp)\n\t" \
 111     "1:\n\t"
 112
 113 #define RESET_X87_CW \
 114     "movw (%esp), %ax\n\t" \
 115     "cmpw %ax, 2(%esp)\n\t" \
 116     "je 1f\n\t" \
 117     "fstpl 8(%esp)\n\t" \
 118     "fldcw (%esp)\n\t" \
 119     "fldl 8(%esp)\n\t" \
 120     "fwait\n\t" \
 121     "1:\n\t" \
 122     "addl $4, %esp\n\t" \
 123     __ASM_CFI(".cfi_adjust_cfa_offset -4\n\t")
 124
 125 /*********************************************************************
 126  *      _matherr (CRTDLL.@)
 127  */
 128 int CDECL _matherr(struct _exception *e)
 129 {
 130     return 0;
 131 }
 132
 133
 134 static double math_error(int type, const char *name, double arg1, double arg2, double retval)
 135 {
 136     struct _exception exception = {type, (char *)name, arg1, arg2, retval};
 137
 138     TRACE("(%d, %s, %g, %g, %g)\n", type, debugstr_a(name), arg1, arg2, retval);
 139
 140     if (MSVCRT_default_matherr_func && MSVCRT_default_matherr_func(&exception))
 141         return exception.retval;
 142
 143     switch (type)
 144     {
 145     case 0:
 146         /* don't set errno */
 147         break;
 148     case _DOMAIN:
 149         *_errno() = EDOM;
 150         break;
 151     case _SING:
 152     case _OVERFLOW:
 153         *_errno() = ERANGE;
 154         break;
 155     case _UNDERFLOW:
 156         /* don't set errno */
 157         break;
 158     default:
 159         ERR("Unhandled math error!\n");
 160     }
 161
 162     return exception.retval;
 163 }
 164
 165 /*********************************************************************
 166  *      __setusermatherr (MSVCRT.@)
 167  */
 168 void CDECL __setusermatherr(MSVCRT_matherr_func func)
 169 {
 170     MSVCRT_default_matherr_func = func;
 171     TRACE("new matherr handler %p\n", func);
 172 }
 173
 174 /*********************************************************************
 175  *      _set_SSE2_enable (MSVCRT.@)
 176  */
 177 int CDECL _set_SSE2_enable(int flag)
 178 {
 179     sse2_enabled = flag && sse2_supported;
 180     return sse2_enabled;
 181 }
 182
 183 #if defined(_WIN64)
 184 # if _MSVCR_VER>=140
 185 /*********************************************************************
 186  *      _get_FMA3_enable (UCRTBASE.@)
 187  */
 188 int CDECL _get_FMA3_enable(void)
 189 {
 190     FIXME("() stub\n");
 191     return 0;
 192 }
 193 # endif
 194
 195 # if _MSVCR_VER>=120
 196 /*********************************************************************
 197  *      _set_FMA3_enable (MSVCR120.@)
 198  */
 199 int CDECL _set_FMA3_enable(int flag)
 200 {
 201     FIXME("(%x) stub\n", flag);
 202     return 0;
 203 }
 204 # endif
 205 #endif
 206
 207 #if !defined(__i386__) || _MSVCR_VER>=120
 208
 209 /*********************************************************************
 210  *      _chgsignf (MSVCRT.@)
 211  */
 212 float CDECL _chgsignf( float num )
 213 {
 214     union { float f; UINT32 i; } u = { num };
 215     u.i ^= 0x80000000;
 216     return u.f;
 217 }
 218
 219 /*********************************************************************
 220  *      _copysignf (MSVCRT.@)
 221  *
 222  * Copied from musl: src/math/copysignf.c
 223  */
 224 float CDECL _copysignf( float x, float y )
 225 {
 226     union { float f; UINT32 i; } ux = { x }, uy = { y };
 227     ux.i &= 0x7fffffff;
 228     ux.i |= uy.i & 0x80000000;
 229     return ux.f;
 230 }
 231
 232 /*********************************************************************
 233  *      _nextafterf (MSVCRT.@)
 234  *
 235  * Copied from musl: src/math/nextafterf.c
 236  */
 237 float CDECL _nextafterf( float x, float y )
 238 {
 239     unsigned int ix = *(unsigned int*)&x;
 240     unsigned int iy = *(unsigned int*)&y;
 241     unsigned int ax, ay, e;
 242
 243     if (isnan(x) || isnan(y))
 244         return x + y;
 245     if (x == y) {
 246         if (_fpclassf(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
 247             *_errno() = ERANGE;
 248         return y;
 249     }
 250     ax = ix & 0x7fffffff;
 251     ay = iy & 0x7fffffff;
 252     if (ax == 0) {
 253         if (ay == 0)
 254             return y;
 255         ix = (iy & 0x80000000) | 1;
 256     } else if (ax > ay || ((ix ^ iy) & 0x80000000))
 257         ix--;
 258     else
 259         ix++;
 260     e = ix & 0x7f800000;
 261     /* raise overflow if ix is infinite and x is finite */
 262     if (e == 0x7f800000) {
 263         fp_barrierf(x + x);
 264         *_errno() = ERANGE;
 265     }
 266     /* raise underflow if ix is subnormal or zero */
 267     y = *(float*)&ix;
 268     if (e == 0) {
 269         fp_barrierf(x * x + y * y);
 270         *_errno() = ERANGE;
 271     }
 272     return y;
 273 }
 274
 275 /* Copied from musl: src/math/ilogbf.c */
 276 static int __ilogbf(float x)
 277 {
 278     union { float f; UINT32 i; } u = { x };
 279     int e = u.i >> 23 & 0xff;
 280
 281     if (!e)
 282     {
 283         u.i <<= 9;
 284         if (u.i == 0) return FP_ILOGB0;
 285         /* subnormal x */
 286         for (e = -0x7f; u.i >> 31 == 0; e--, u.i <<= 1);
 287         return e;
 288     }
 289     if (e == 0xff) return u.i << 9 ? FP_ILOGBNAN : INT_MAX;
 290     return e - 0x7f;
 291 }
 292
 293 /*********************************************************************
 294  *      _logbf (MSVCRT.@)
 295  *
 296  * Copied from musl: src/math/logbf.c
 297  */
 298 float CDECL _logbf(float x)
 299 {
 300     if (!isfinite(x))
 301         return x * x;
 302     if (x == 0) {
 303         *_errno() = ERANGE;
 304         return -1 / (x * x);
 305     }
 306     return __ilogbf(x);
 307 }
 308
 309 #endif
 310
 311 /* Copied from musl: src/math/scalbn.c */
 312 static double __scalbn(double x, int n)
 313 {
 314     union {double f; UINT64 i;} u;
 315     double y = x;
 316
 317     if (n > 1023) {
 318         y *= 0x1p1023;
 319         n -= 1023;
 320         if (n > 1023) {
 321             y *= 0x1p1023;
 322             n -= 1023;
 323             if (n > 1023)
 324                 n = 1023;
 325         }
 326     } else if (n < -1022) {
 327         /* make sure final n < -53 to avoid double
 328            rounding in the subnormal range */
 329         y *= 0x1p-1022 * 0x1p53;
 330         n += 1022 - 53;
 331         if (n < -1022) {
 332             y *= 0x1p-1022 * 0x1p53;
 333             n += 1022 - 53;
 334             if (n < -1022)
 335                 n = -1022;
 336         }
 337     }
 338     u.i = (UINT64)(0x3ff + n) << 52;
 339     x = y * u.f;
 340     return x;
 341 }
 342
 343 /* Copied from musl: src/math/__rem_pio2_large.c */
 344 static int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
 345 {
 346     static const int init_jk[] = {3, 4};
 347     static const INT32 ipio2[] = {
 348         0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
 349         0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
 350         0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
 351         0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
 352         0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
 353         0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
 354         0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
 355         0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
 356         0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
 357         0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
 358         0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 359     };
 360     static const double PIo2[] = {
 361         1.57079625129699707031e+00,
 362         7.54978941586159635335e-08,
 363         5.39030252995776476554e-15,
 364         3.28200341580791294123e-22,
 365         1.27065575308067607349e-29,
 366         1.22933308981111328932e-36,
 367         2.73370053816464559624e-44,
 368         2.16741683877804819444e-51,
 369     };
 370
 371     INT32 jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
 372     double z, fw, f[20], fq[20] = {0}, q[20];
 373
 374     /* initialize jk*/
 375     jk = init_jk[prec];
 376     jp = jk;
 377
 378     /* determine jx,jv,q0, note that 3>q0 */
 379     jx = nx - 1;
 380     jv = (e0 - 3) / 24;
 381     if(jv < 0) jv = 0;
 382     q0 = e0 - 24 * (jv + 1);
 383
 384     /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 385     j = jv - jx;
 386     m = jx + jk;
 387     for (i = 0; i <= m; i++, j++)
 388         f[i] = j < 0 ? 0.0 : (double)ipio2[j];
 389
 390     /* compute q[0],q[1],...q[jk] */
 391     for (i = 0; i <= jk; i++) {
 392         for (j = 0, fw = 0.0; j <= jx; j++)
 393             fw += x[j] * f[jx + i - j];
 394         q[i] = fw;
 395     }
 396
 397     jz = jk;
 398 recompute:
 399     /* distill q[] into iq[] reversingly */
 400     for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
 401         fw = (double)(INT32)(0x1p-24 * z);
 402         iq[i] = (INT32)(z - 0x1p24 * fw);
 403         z = q[j - 1] + fw;
 404     }
 405
 406     /* compute n */
 407     z = __scalbn(z, q0); /* actual value of z */
 408     z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */
 409     n = (INT32)z;
 410     z -= (double)n;
 411     ih = 0;
 412     if (q0 > 0) {  /* need iq[jz-1] to determine n */
 413         i = iq[jz - 1] >> (24 - q0);
 414         n += i;
 415         iq[jz - 1] -= i << (24 - q0);
 416         ih = iq[jz - 1] >> (23 - q0);
 417     }
 418     else if (q0 == 0) ih = iq[jz - 1] >> 23;
 419     else if (z >= 0.5) ih = 2;
 420
 421     if (ih > 0) {  /* q > 0.5 */
 422         n += 1;
 423         carry = 0;
 424         for (i = 0; i < jz; i++) {  /* compute 1-q */
 425             j = iq[i];
 426             if (carry == 0) {
 427                 if (j != 0) {
 428                     carry = 1;
 429                     iq[i] = 0x1000000 - j;
 430                 }
 431             } else
 432                 iq[i] = 0xffffff - j;
 433         }
 434         if (q0 > 0) {  /* rare case: chance is 1 in 12 */
 435             switch(q0) {
 436             case 1:
 437                 iq[jz - 1] &= 0x7fffff;
 438                 break;
 439             case 2:
 440                 iq[jz - 1] &= 0x3fffff;
 441                 break;
 442             }
 443         }
 444         if (ih == 2) {
 445             z = 1.0 - z;
 446             if (carry != 0)
 447                 z -= __scalbn(1.0, q0);
 448         }
 449     }
 450
 451     /* check if recomputation is needed */
 452     if (z == 0.0) {
 453         j = 0;
 454         for (i = jz - 1; i >= jk; i--) j |= iq[i];
 455         if (j == 0) {  /* need recomputation */
 456             for (k = 1; iq[jk - k] == 0; k++);  /* k = no. of terms needed */
 457
 458             for (i = jz + 1; i <= jz + k; i++) {  /* add q[jz+1] to q[jz+k] */
 459                 f[jx + i] = (double)ipio2[jv + i];
 460                 for (j = 0, fw = 0.0; j <= jx; j++)
 461                     fw += x[j] * f[jx + i - j];
 462                 q[i] = fw;
 463             }
 464             jz += k;
 465             goto recompute;
 466         }
 467     }
 468
 469     /* chop off zero terms */
 470     if (z == 0.0) {
 471         jz -= 1;
 472         q0 -= 24;
 473         while (iq[jz] == 0) {
 474             jz--;
 475             q0 -= 24;
 476         }
 477     } else { /* break z into 24-bit if necessary */
 478         z = __scalbn(z, -q0);
 479         if (z >= 0x1p24) {
 480             fw = (double)(INT32)(0x1p-24 * z);
 481             iq[jz] = (INT32)(z - 0x1p24 * fw);
 482             jz += 1;
 483             q0 += 24;
 484             iq[jz] = (INT32)fw;
 485         } else
 486             iq[jz] = (INT32)z;
 487     }
 488
 489     /* convert integer "bit" chunk to floating-point value */
 490     fw = __scalbn(1.0, q0);
 491     for (i = jz; i >= 0; i--) {
 492         q[i] = fw * (double)iq[i];
 493         fw *= 0x1p-24;
 494     }
 495
 496     /* compute PIo2[0,...,jp]*q[jz,...,0] */
 497     for(i = jz; i >= 0; i--) {
 498         for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
 499             fw += PIo2[k] * q[i + k];
 500         fq[jz - i] = fw;
 501     }
 502
 503     /* compress fq[] into y[] */
 504     switch(prec) {
 505     case 0:
 506         fw = 0.0;
 507         for (i = jz; i >= 0; i--)
 508             fw += fq[i];
 509         y[0] = ih == 0 ? fw : -fw;
 510         break;
 511     case 1:
 512     case 2:
 513         fw = 0.0;
 514         for (i = jz; i >= 0; i--)
 515             fw += fq[i];
 516         fw = (double)fw;
 517         y[0] = ih==0 ? fw : -fw;
 518         fw = fq[0] - fw;
 519         for (i = 1; i <= jz; i++)
 520             fw += fq[i];
 521         y[1] = ih == 0 ? fw : -fw;
 522         break;
 523     case 3:  /* painful */
 524         for (i = jz; i > 0; i--) {
 525             fw = fq[i - 1] + fq[i];
 526             fq[i] += fq[i - 1] - fw;
 527             fq[i - 1] = fw;
 528         }
 529         for (i = jz; i > 1; i--) {
 530             fw = fq[i - 1] + fq[i];
 531             fq[i] += fq[i - 1] - fw;
 532             fq[i - 1] = fw;
 533         }
 534         for (fw = 0.0, i = jz; i >= 2; i--)
 535             fw += fq[i];
 536         if (ih == 0) {
 537             y[0] = fq[0];
 538             y[1] = fq[1];
 539             y[2] = fw;
 540         } else {
 541             y[0] = -fq[0];
 542             y[1] = -fq[1];
 543             y[2] = -fw;
 544         }
 545     }
 546     return n & 7;
 547 }
 548
 549 /* Based on musl implementation: src/math/round.c */
 550 static double __round(double x)
 551 {
 552     ULONGLONG llx = *(ULONGLONG*)&x, tmp;
 553     int e = (llx >> 52 & 0x7ff) - 0x3ff;
 554
 555     if (e >= 52)
 556         return x;
 557     if (e < -1)
 558         return 0 * x;
 559     else if (e == -1)
 560         return signbit(x) ? -1 : 1;
 561
 562     tmp = 0x000fffffffffffffULL >> e;
 563     if (!(llx & tmp))
 564         return x;
 565     llx += 0x0008000000000000ULL >> e;
 566     llx &= ~tmp;
 567     return *(double*)&llx;
 568 }
 569
 570 #if !defined(__i386__) || _MSVCR_VER >= 120
 571 /* Copied from musl: src/math/expm1f.c */
 572 static float __expm1f(float x)
 573 {
 574     static const float ln2_hi = 6.9313812256e-01,
 575         ln2_lo = 9.0580006145e-06,
 576         invln2 = 1.4426950216e+00,
 577         Q1 = -3.3333212137e-2,
 578         Q2 = 1.5807170421e-3;
 579
 580     float y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
 581     union {float f; UINT32 i;} u = {x};
 582     UINT32 hx = u.i & 0x7fffffff;
 583     int k, sign = u.i >> 31;
 584
 585     /* filter out huge and non-finite argument */
 586     if (hx >= 0x4195b844) { /* if |x|>=27*ln2 */
 587         if (hx >= 0x7f800000) /* NaN */
 588             return u.i == 0xff800000 ? -1 : x;
 589         if (sign)
 590             return math_error(_UNDERFLOW, "exp", x, 0, -1);
 591         if (hx > 0x42b17217) /* x > log(FLT_MAX) */
 592             return math_error(_OVERFLOW, "exp", x, 0, fp_barrierf(x * FLT_MAX));
 593     }
 594
 595     /* argument reduction */
 596     if (hx > 0x3eb17218) { /* if |x| > 0.5 ln2 */
 597         if (hx < 0x3F851592) { /* and |x| < 1.5 ln2 */
 598             if (!sign) {
 599                 hi = x - ln2_hi;
 600                 lo = ln2_lo;
 601                 k = 1;
 602             } else {
 603                 hi = x + ln2_hi;
 604                 lo = -ln2_lo;
 605                 k = -1;
 606             }
 607         } else {
 608             k = invln2 * x + (sign ? -0.5f : 0.5f);
 609             t = k;
 610             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
 611             lo = t * ln2_lo;
 612         }
 613         x = hi - lo;
 614         c = (hi - x) - lo;
 615     } else if (hx < 0x33000000) { /* when |x|<2**-25, return x */
 616         if (hx < 0x00800000)
 617             fp_barrierf(x * x);
 618         return x;
 619     } else
 620         k = 0;
 621
 622     /* x is now in primary range */
 623     hfx = 0.5f * x;
 624     hxs = x * hfx;
 625     r1 = 1.0f + hxs * (Q1 + hxs * Q2);
 626     t = 3.0f - r1 * hfx;
 627     e = hxs * ((r1 - t) / (6.0f - x * t));
 628     if (k == 0) /* c is 0 */
 629         return x - (x * e - hxs);
 630     e = x * (e - c) - c;
 631     e -= hxs;
 632     /* exp(x) ~ 2^k (x_reduced - e + 1) */
 633     if (k == -1)
 634         return 0.5f * (x - e) - 0.5f;
 635     if (k == 1) {
 636         if (x < -0.25f)
 637             return -2.0f * (e - (x + 0.5f));
 638         return 1.0f + 2.0f * (x - e);
 639     }
 640     u.i = (0x7f + k) << 23; /* 2^k */
 641     twopk = u.f;
 642     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
 643         y = x - e + 1.0f;
 644         if (k == 128)
 645             y = y * 2.0f * 0x1p127f;
 646         else
 647             y = y * twopk;
 648         return y - 1.0f;
 649     }
 650     u.i = (0x7f-k) << 23; /* 2^-k */
 651     if (k < 23)
 652         y = (x - e + (1 - u.f)) * twopk;
 653     else
 654         y = (x - (e + u.f) + 1) * twopk;
 655     return y;
 656 }
 657
 658 /* Copied from musl: src/math/__sindf.c */
 659 static float __sindf(double x)
 660 {
 661     static const double S1 = -0x1.5555555555555p-3,
 662         S2 = 0x1.1111111111111p-7,
 663         S3 = -0x1.a01a01a01a01ap-13,
 664         S4 = 0x1.71de3a556c734p-19;
 665
 666     double r, s, w, z;
 667
 668     z = x * x;
 669     if (x > -7.8175831586122513e-03 && x < 7.8175831586122513e-03)
 670         return x * (1 + S1 * z);
 671
 672     w = z * z;
 673     r = S3 + z * S4;
 674     s = z * x;
 675     return (x + s * (S1 + z * S2)) + s * w * r;
 676 }
 677
 678 /* Copied from musl: src/math/__cosdf.c */
 679 static float __cosdf(double x)
 680 {
 681     static const double C0 = -0x1.0000000000000p-1,
 682         C1 = 0x1.5555555555555p-5,
 683         C2 = -0x1.6c16c16c16c17p-10,
 684         C3 = 0x1.a01a01a01a01ap-16,
 685         C4 = -0x1.27e4fb7789f5cp-22;
 686     double z;
 687
 688     z = x * x;
 689     if (x > -7.8163146972656250e-03 && x < 7.8163146972656250e-03)
 690         return 1 + C0 * z;
 691     return 1.0 + z * (C0 + z * (C1 + z * (C2 + z * (C3 + z * C4))));
 692 }
 693
 694 static const UINT64 exp2f_T[] = {
 695     0x3ff0000000000000ULL, 0x3fefd9b0d3158574ULL, 0x3fefb5586cf9890fULL, 0x3fef9301d0125b51ULL,
 696     0x3fef72b83c7d517bULL, 0x3fef54873168b9aaULL, 0x3fef387a6e756238ULL, 0x3fef1e9df51fdee1ULL,
 697     0x3fef06fe0a31b715ULL, 0x3feef1a7373aa9cbULL, 0x3feedea64c123422ULL, 0x3feece086061892dULL,
 698     0x3feebfdad5362a27ULL, 0x3feeb42b569d4f82ULL, 0x3feeab07dd485429ULL, 0x3feea47eb03a5585ULL,
 699     0x3feea09e667f3bcdULL, 0x3fee9f75e8ec5f74ULL, 0x3feea11473eb0187ULL, 0x3feea589994cce13ULL,
 700     0x3feeace5422aa0dbULL, 0x3feeb737b0cdc5e5ULL, 0x3feec49182a3f090ULL, 0x3feed503b23e255dULL,
 701     0x3feee89f995ad3adULL, 0x3feeff76f2fb5e47ULL, 0x3fef199bdd85529cULL, 0x3fef3720dcef9069ULL,
 702     0x3fef5818dcfba487ULL, 0x3fef7c97337b9b5fULL, 0x3fefa4afa2a490daULL, 0x3fefd0765b6e4540ULL
 703 };
 704 #endif
 705
 706 /*********************************************************************
 707  *      _fdclass (MSVCR120.@)
 708  *
 709  * Copied from musl: src/math/__fpclassifyf.c
 710  */
 711 short CDECL _fdclass(float x)
 712 {
 713     union { float f; UINT32 i; } u = { x };
 714     int e = u.i >> 23 & 0xff;
 715
 716     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
 717     if (e == 0xff) return u.i << 9 ? FP_NAN : FP_INFINITE;
 718     return FP_NORMAL;
 719 }
 720
 721 /*********************************************************************
 722  *      _dclass (MSVCR120.@)
 723  *
 724  * Copied from musl: src/math/__fpclassify.c
 725  */
 726 short CDECL _dclass(double x)
 727 {
 728     union { double f; UINT64 i; } u = { x };
 729     int e = u.i >> 52 & 0x7ff;
 730
 731     if (!e) return u.i << 1 ? FP_SUBNORMAL : FP_ZERO;
 732     if (e == 0x7ff) return (u.i << 12) ? FP_NAN : FP_INFINITE;
 733     return FP_NORMAL;
 734 }
 735
 736 #ifndef __i386__
 737
 738 /*********************************************************************
 739  *      _fpclassf (MSVCRT.@)
 740  */
 741 int CDECL _fpclassf( float num )
 742 {
 743     union { float f; UINT32 i; } u = { num };
 744     int e = u.i >> 23 & 0xff;
 745     int s = u.i >> 31;
 746
 747     switch (e)
 748     {
 749     case 0:
 750         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
 751         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
 752     case 0xff:
 753         if (u.i << 9) return ((u.i >> 22) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
 754         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
 755     default:
 756         return s ? _FPCLASS_NN : _FPCLASS_PN;
 757     }
 758 }
 759
 760 /*********************************************************************
 761  *      _finitef (MSVCRT.@)
 762  */
 763 int CDECL _finitef( float num )
 764 {
 765     union { float f; UINT32 i; } u = { num };
 766     return (u.i & 0x7fffffff) < 0x7f800000;
 767 }
 768
 769 /*********************************************************************
 770  *      _isnanf (MSVCRT.@)
 771  */
 772 int CDECL _isnanf( float num )
 773 {
 774     union { float f; UINT32 i; } u = { num };
 775     return (u.i & 0x7fffffff) > 0x7f800000;
 776 }
 777
 778 static float asinf_R(float z)
 779 {
 780     /* coefficients for R(x^2) */
 781     static const float p1 = 1.66666672e-01,
 782                  p2 = -5.11644611e-02,
 783                  p3 = -1.21124933e-02,
 784                  p4 = -3.58742251e-03,
 785                  q1 = -7.56982703e-01;
 786
 787     float p, q;
 788     p = z * (p1 + z * (p2 + z * (p3 + z * p4)));
 789     q = 1.0f + z * q1;
 790     return p / q;
 791 }
 792
 793 /*********************************************************************
 794  *      acosf (MSVCRT.@)
 795  *
 796  * Copied from musl: src/math/acosf.c
 797  */
 798 float CDECL acosf( float x )
 799 {
 800     static const double pio2_lo = 6.12323399573676603587e-17;
 801     static const double pio2_hi = 1.57079632679489655800e+00;
 802
 803     float z, w, s, c, df;
 804     unsigned int hx, ix;
 805
 806     hx = *(unsigned int*)&x;
 807     ix = hx & 0x7fffffff;
 808     /* |x| >= 1 or nan */
 809     if (ix >= 0x3f800000) {
 810         if (ix == 0x3f800000) {
 811             if (hx >> 31)
 812                 return M_PI;
 813             return 0;
 814         }
 815         if (isnan(x)) return x;
 816         return math_error(_DOMAIN, "acosf", x, 0, 0 / (x - x));
 817     }
 818     /* |x| < 0.5 */
 819     if (ix < 0x3f000000) {
 820         if (ix <= 0x32800000) /* |x| < 2**-26 */
 821             return M_PI_2;
 822         return pio2_hi - (x - (pio2_lo - x * asinf_R(x * x)));
 823     }
 824     /* x < -0.5 */
 825     if (hx >> 31) {
 826         z = (1 + x) * 0.5f;
 827         s = sqrtf(z);
 828         return 2*(pio2_hi - (s + (asinf_R(z) * s - pio2_lo)));
 829     }
 830     /* x > 0.5 */
 831     z = (1 - x) * 0.5f;
 832     s = sqrtf(z);
 833     hx = *(unsigned int*)&s & 0xffff0000;
 834     df = *(float*)&hx;
 835     c = (z - df * df) / (s + df);
 836     w = asinf_R(z) * s + c;
 837     return 2 * (df + w);
 838 }
 839
 840 /*********************************************************************
 841  *      asinf (MSVCRT.@)
 842  *
 843  * Copied from musl: src/math/asinf.c
 844  */
 845 float CDECL asinf( float x )
 846 {
 847     static const double pio2 = 1.570796326794896558e+00;
 848     static const float pio4_hi = 0.785398125648;
 849     static const float pio2_lo = 7.54978941586e-08;
 850
 851     float s, z, f, c;
 852     unsigned int hx, ix;
 853
 854     hx = *(unsigned int*)&x;
 855     ix = hx & 0x7fffffff;
 856     if (ix >= 0x3f800000) {  /* |x| >= 1 */
 857         if (ix == 0x3f800000)  /* |x| == 1 */
 858             return x * pio2 + 7.5231638453e-37;  /* asin(+-1) = +-pi/2 with inexact */
 859         if (isnan(x)) return x;
 860         return math_error(_DOMAIN, "asinf", x, 0, 0 / (x - x));
 861     }
 862     if (ix < 0x3f000000) {  /* |x| < 0.5 */
 863         /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
 864         if (ix < 0x39800000 && ix >= 0x00800000)
 865             return x;
 866         return x + x * asinf_R(x * x);
 867     }
 868     /* 1 > |x| >= 0.5 */
 869     z = (1 - fabsf(x)) * 0.5f;
 870     s = sqrtf(z);
 871     /* f+c = sqrt(z) */
 872     *(unsigned int*)&f = *(unsigned int*)&s & 0xffff0000;
 873     c = (z - f * f) / (s + f);
 874     x = pio4_hi - (2 * s * asinf_R(z) - (pio2_lo - 2 * c) - (pio4_hi - 2 * f));
 875     if (hx >> 31)
 876         return -x;
 877     return x;
 878 }
 879
 880 /*********************************************************************
 881  *      atanf (MSVCRT.@)
 882  *
 883  * Copied from musl: src/math/atanf.c
 884  */
 885 float CDECL atanf( float x )
 886 {
 887     static const float atanhi[] = {
 888         4.6364760399e-01,
 889         7.8539812565e-01,
 890         9.8279368877e-01,
 891         1.5707962513e+00,
 892     };
 893     static const float atanlo[] = {
 894         5.0121582440e-09,
 895         3.7748947079e-08,
 896         3.4473217170e-08,
 897         7.5497894159e-08,
 898     };
 899     static const float aT[] = {
 900         3.3333328366e-01,
 901         -1.9999158382e-01,
 902         1.4253635705e-01,
 903         -1.0648017377e-01,
 904         6.1687607318e-02,
 905     };
 906
 907     float w, s1, s2, z;
 908     unsigned int ix, sign;
 909     int id;
 910
 911 #if _MSVCR_VER == 0
 912     if (isnan(x)) return math_error(_DOMAIN, "atanf", x, 0, x);
 913 #endif
 914
 915     ix = *(unsigned int*)&x;
 916     sign = ix >> 31;
 917     ix &= 0x7fffffff;
 918     if (ix >= 0x4c800000) {  /* if |x| >= 2**26 */
 919         if (isnan(x))
 920             return x;
 921         z = atanhi[3] + 7.5231638453e-37;
 922         return sign ? -z : z;
 923     }
 924     if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 925         if (ix < 0x39800000) {  /* |x| < 2**-12 */
 926             if (ix < 0x00800000)
 927                 /* raise underflow for subnormal x */
 928                 fp_barrierf(x*x);
 929             return x;
 930         }
 931         id = -1;
 932     } else {
 933         x = fabsf(x);
 934         if (ix < 0x3f980000) {  /* |x| < 1.1875 */
 935             if (ix < 0x3f300000) {  /*  7/16 <= |x| < 11/16 */
 936                 id = 0;
 937                 x = (2.0f * x - 1.0f) / (2.0f + x);
 938             } else {                /* 11/16 <= |x| < 19/16 */
 939                 id = 1;
 940                 x = (x - 1.0f) / (x + 1.0f);
 941             }
 942         } else {
 943             if (ix < 0x401c0000) {  /* |x| < 2.4375 */
 944                 id = 2;
 945                 x = (x - 1.5f) / (1.0f + 1.5f * x);
 946             } else {                /* 2.4375 <= |x| < 2**26 */
 947                 id = 3;
 948                 x = -1.0f / x;
 949             }
 950         }
 951     }
 952     /* end of argument reduction */
 953     z = x * x;
 954     w = z * z;
 955     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
 956     s1 = z * (aT[0] + w * (aT[2] + w * aT[4]));
 957     s2 = w * (aT[1] + w * aT[3]);
 958     if (id < 0)
 959         return x - x * (s1 + s2);
 960     z = atanhi[id] - ((x * (s1 + s2) - atanlo[id]) - x);
 961     return sign ? -z : z;
 962 }
 963
 964 /*********************************************************************
 965  *              atan2f (MSVCRT.@)
 966  *
 967  * Copied from musl: src/math/atan2f.c
 968  */
 969 float CDECL atan2f( float y, float x )
 970 {
 971     static const float pi     = 3.1415927410e+00,
 972                  pi_lo  = -8.7422776573e-08;
 973
 974     float z;
 975     unsigned int m, ix, iy;
 976
 977     if (isnan(x) || isnan(y))
 978         return x + y;
 979     ix = *(unsigned int*)&x;
 980     iy = *(unsigned int*)&y;
 981     if (ix == 0x3f800000)  /* x=1.0 */
 982         return atanf(y);
 983     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
 984     ix &= 0x7fffffff;
 985     iy &= 0x7fffffff;
 986
 987     /* when y = 0 */
 988     if (iy == 0) {
 989         switch (m) {
 990         case 0:
 991         case 1: return y;   /* atan(+-0,+anything)=+-0 */
 992         case 2: return pi;  /* atan(+0,-anything) = pi */
 993         case 3: return -pi; /* atan(-0,-anything) =-pi */
 994         }
 995     }
 996     /* when x = 0 */
 997     if (ix == 0)
 998         return m & 1 ? -pi / 2 : pi / 2;
 999     /* when x is INF */
1000     if (ix == 0x7f800000) {
1001         if (iy == 0x7f800000) {
1002             switch (m) {
1003             case 0: return pi / 4;      /* atan(+INF,+INF) */
1004             case 1: return -pi / 4;     /* atan(-INF,+INF) */
1005             case 2: return 3 * pi / 4;  /*atan(+INF,-INF)*/
1006             case 3: return -3 * pi / 4; /*atan(-INF,-INF)*/
1007             }
1008         } else {
1009             switch (m) {
1010             case 0: return 0.0f;    /* atan(+...,+INF) */
1011             case 1: return -0.0f;   /* atan(-...,+INF) */
1012             case 2: return pi;      /* atan(+...,-INF) */
1013             case 3: return -pi;     /* atan(-...,-INF) */
1014             }
1015         }
1016     }
1017     /* |y/x| > 0x1p26 */
1018     if (ix + (26 << 23) < iy || iy == 0x7f800000)
1019         return m & 1 ? -pi / 2 : pi / 2;
1020
1021     /* z = atan(|y/x|) with correct underflow */
1022     if ((m & 2) && iy + (26 << 23) < ix)  /*|y/x| < 0x1p-26, x < 0 */
1023         z = 0.0;
1024     else
1025         z = atanf(fabsf(y / x));
1026     switch (m) {
1027     case 0: return z;                /* atan(+,+) */
1028     case 1: return -z;               /* atan(-,+) */
1029     case 2: return pi - (z - pi_lo); /* atan(+,-) */
1030     default: /* case 3 */
1031         return (z - pi_lo) - pi;     /* atan(-,-) */
1032     }
1033 }
1034
1035 /* Copied from musl: src/math/__rem_pio2f.c */
1036 static int __rem_pio2f(float x, double *y)
1037 {
1038     static const double toint = 1.5 / DBL_EPSILON,
1039         pio4 = 0x1.921fb6p-1,
1040         invpio2 = 6.36619772367581382433e-01,
1041         pio2_1 = 1.57079631090164184570e+00,
1042         pio2_1t = 1.58932547735281966916e-08;
1043
1044     union {float f; uint32_t i;} u = {x};
1045     double tx[1], ty[1], fn;
1046     UINT32 ix;
1047     int n, sign, e0;
1048
1049     ix = u.i & 0x7fffffff;
1050     /* 25+53 bit pi is good enough for medium size */
1051     if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
1052         /* Use a specialized rint() to get fn. */
1053         fn = fp_barrier(x * invpio2 + toint) - toint;
1054         n  = (int)fn;
1055         *y = x - fn * pio2_1 - fn * pio2_1t;
1056         /* Matters with directed rounding. */
1057         if (*y < -pio4) {
1058             n--;
1059             fn--;
1060             *y = x - fn * pio2_1 - fn * pio2_1t;
1061         } else if (*y > pio4) {
1062             n++;
1063             fn++;
1064             *y = x - fn * pio2_1 - fn * pio2_1t;
1065         }
1066         return n;
1067     }
1068     if(ix >= 0x7f800000) { /* x is inf or NaN */
1069         *y = x - x;
1070         return 0;
1071     }
1072     /* scale x into [2^23, 2^24-1] */
1073     sign = u.i >> 31;
1074     e0 = (ix >> 23) - (0x7f + 23); /* e0 = ilogb(|x|)-23, positive */
1075     u.i = ix - (e0 << 23);
1076     tx[0] = u.f;
1077     n = __rem_pio2_large(tx, ty, e0, 1, 0);
1078     if (sign) {
1079         *y = -ty[0];
1080         return -n;
1081     }
1082     *y = ty[0];
1083     return n;
1084 }
1085
1086 /*********************************************************************
1087  *      cosf (MSVCRT.@)
1088  *
1089  * Copied from musl: src/math/cosf.c
1090  */
1091 float CDECL cosf( float x )
1092 {
1093     static const double c1pio2 = 1*M_PI_2,
1094         c2pio2 = 2*M_PI_2,
1095         c3pio2 = 3*M_PI_2,
1096         c4pio2 = 4*M_PI_2;
1097
1098     double y;
1099     UINT32 ix;
1100     unsigned n, sign;
1101
1102     ix = *(UINT32*)&x;
1103     sign = ix >> 31;
1104     ix &= 0x7fffffff;
1105
1106     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1107         if (ix < 0x39800000) { /* |x| < 2**-12 */
1108             /* raise inexact if x != 0 */
1109             fp_barrierf(x + 0x1p120f);
1110             return 1.0f;
1111         }
1112         return __cosdf(x);
1113     }
1114     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1115         if (ix > 0x4016cbe3) /* |x| ~> 3*pi/4 */
1116             return -__cosdf(sign ? x + c2pio2 : x - c2pio2);
1117         else {
1118             if (sign)
1119                 return __sindf(x + c1pio2);
1120             else
1121                 return __sindf(c1pio2 - x);
1122         }
1123     }
1124     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1125         if (ix > 0x40afeddf) /* |x| ~> 7*pi/4 */
1126             return __cosdf(sign ? x + c4pio2 : x - c4pio2);
1127         else {
1128             if (sign)
1129                 return __sindf(-x - c3pio2);
1130             else
1131                 return __sindf(x - c3pio2);
1132         }
1133     }
1134
1135     /* cos(Inf or NaN) is NaN */
1136     if (isinf(x)) return math_error(_DOMAIN, "cosf", x, 0, x - x);
1137     if (ix >= 0x7f800000)
1138         return x - x;
1139
1140     /* general argument reduction needed */
1141     n = __rem_pio2f(x, &y);
1142     switch (n & 3) {
1143     case 0: return __cosdf(y);
1144     case 1: return __sindf(-y);
1145     case 2: return -__cosdf(y);
1146     default: return __sindf(y);
1147     }
1148 }
1149
1150 /* Copied from musl: src/math/__expo2f.c */
1151 static float __expo2f(float x, float sign)
1152 {
1153     static const int k = 235;
1154     static const float kln2 = 0x1.45c778p+7f;
1155     float scale;
1156
1157     *(UINT32*)&scale = (UINT32)(0x7f + k/2) << 23;
1158     return expf(x - kln2) * (sign * scale) * scale;
1159 }
1160
1161 /*********************************************************************
1162  *      coshf (MSVCRT.@)
1163  *
1164  * Copied from musl: src/math/coshf.c
1165  */
1166 float CDECL coshf( float x )
1167 {
1168     UINT32 ui = *(UINT32*)&x;
1169     UINT32 sign = ui & 0x80000000;
1170     float t;
1171
1172     /* |x| */
1173     ui &= 0x7fffffff;
1174     x = *(float*)&ui;
1175
1176     /* |x| < log(2) */
1177     if (ui < 0x3f317217) {
1178         if (ui < 0x3f800000 - (12 << 23)) {
1179             fp_barrierf(x + 0x1p120f);
1180             return 1;
1181         }
1182         t = __expm1f(x);
1183         return 1 + t * t / (2 * (1 + t));
1184     }
1185
1186     /* |x| < log(FLT_MAX) */
1187     if (ui < 0x42b17217) {
1188         t = expf(x);
1189         return 0.5f * (t + 1 / t);
1190     }
1191
1192     /* |x| > log(FLT_MAX) or nan */
1193     if (ui > 0x7f800000)
1194         *(UINT32*)&t = ui | sign | 0x400000;
1195     else
1196         t = __expo2f(x, 1.0f);
1197     return t;
1198 }
1199
1200 /*********************************************************************
1201  *      expf (MSVCRT.@)
1202  */
1203 float CDECL expf( float x )
1204 {
1205     static const double C[] = {
1206         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1207         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1208         0x1.62e42ff0c52d6p-1 / (1 << 5)
1209     };
1210     static const double invln2n = 0x1.71547652b82fep+0 * (1 << 5);
1211
1212     double kd, z, r, r2, y, s;
1213     UINT32 abstop;
1214     UINT64 ki, t;
1215
1216     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
1217     if (abstop >= 0x42b) {
1218         /* |x| >= 88 or x is nan.  */
1219         if (*(UINT32*)&x == 0xff800000)
1220             return 0.0f;
1221         if (abstop >= 0x7f8)
1222             return x + x;
1223         if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
1224             return math_error(_OVERFLOW, "expf", x, 0, x * FLT_MAX);
1225         if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
1226             return math_error(_UNDERFLOW, "expf", x, 0, fp_barrierf(FLT_MIN) * FLT_MIN);
1227     }
1228
1229     /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k.  */
1230     z = invln2n * x;
1231
1232     /* Round and convert z to int, the result is in [-150*N, 128*N] and
1233        ideally ties-to-even rule is used, otherwise the magnitude of r
1234        can be bigger which gives larger approximation error.  */
1235     kd = __round(z);
1236     ki = (INT64)kd;
1237     r = z - kd;
1238
1239     /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1240     t = exp2f_T[ki % (1 << 5)];
1241     t += ki << (52 - 5);
1242     s = *(double*)&t;
1243     z = C[0] * r + C[1];
1244     r2 = r * r;
1245     y = C[2] * r + 1;
1246     y = z * r2 + y;
1247     y = y * s;
1248     return y;
1249 }
1250
1251 /*********************************************************************
1252  *      fmodf (MSVCRT.@)
1253  *
1254  * Copied from musl: src/math/fmodf.c
1255  */
1256 float CDECL fmodf( float x, float y )
1257 {
1258     UINT32 xi = *(UINT32*)&x;
1259     UINT32 yi = *(UINT32*)&y;
1260     int ex = xi>>23 & 0xff;
1261     int ey = yi>>23 & 0xff;
1262     UINT32 sx = xi & 0x80000000;
1263     UINT32 i;
1264
1265     if (isinf(x)) return math_error(_DOMAIN, "fmodf", x, y, (x * y) / (x * y));
1266     if (yi << 1 == 0 || isnan(y) || ex == 0xff)
1267         return (x * y) / (x * y);
1268     if (xi << 1 <= yi << 1) {
1269         if (xi << 1 == yi << 1)
1270             return 0 * x;
1271         return x;
1272     }
1273
1274     /* normalize x and y */
1275     if (!ex) {
1276         for (i = xi << 9; i >> 31 == 0; ex--, i <<= 1);
1277         xi <<= -ex + 1;
1278     } else {
1279         xi &= -1U >> 9;
1280         xi |= 1U << 23;
1281     }
1282     if (!ey) {
1283         for (i = yi << 9; i >> 31 == 0; ey--, i <<= 1);
1284         yi <<= -ey + 1;
1285     } else {
1286         yi &= -1U >> 9;
1287         yi |= 1U << 23;
1288     }
1289
1290     /* x mod y */
1291     for (; ex > ey; ex--) {
1292         i = xi - yi;
1293         if (i >> 31 == 0) {
1294             if (i == 0)
1295                 return 0 * x;
1296             xi = i;
1297         }
1298         xi <<= 1;
1299     }
1300     i = xi - yi;
1301     if (i >> 31 == 0) {
1302         if (i == 0)
1303             return 0 * x;
1304         xi = i;
1305     }
1306     for (; xi>>23 == 0; xi <<= 1, ex--);
1307
1308     /* scale result up */
1309     if (ex > 0) {
1310         xi -= 1U << 23;
1311         xi |= (UINT32)ex << 23;
1312     } else {
1313         xi >>= -ex + 1;
1314     }
1315     xi |= sx;
1316     return *(float*)&xi;
1317 }
1318
1319 /*********************************************************************
1320  *      logf (MSVCRT.@)
1321  *
1322  * Copied from musl: src/math/logf.c src/math/logf_data.c
1323  */
1324 float CDECL logf( float x )
1325 {
1326     static const double Ln2 = 0x1.62e42fefa39efp-1;
1327     static const double A[] = {
1328         -0x1.00ea348b88334p-2,
1329         0x1.5575b0be00b6ap-2,
1330         -0x1.ffffef20a4123p-2
1331     };
1332     static const struct {
1333         double invc, logc;
1334     } T[] = {
1335         { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
1336         { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
1337         { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
1338         { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
1339         { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
1340         { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
1341         { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
1342         { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
1343         { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
1344         { 0x1p+0, 0x0p+0 },
1345         { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
1346         { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
1347         { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
1348         { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
1349         { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
1350         { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }
1351     };
1352
1353     double z, r, r2, y, y0, invc, logc;
1354     UINT32 ix, iz, tmp;
1355     int k, i;
1356
1357     ix = *(UINT32*)&x;
1358     /* Fix sign of zero with downward rounding when x==1. */
1359     if (ix == 0x3f800000)
1360         return 0;
1361     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
1362         /* x < 0x1p-126 or inf or nan. */
1363         if (ix * 2 == 0)
1364             return math_error(_SING, "logf", x, 0, (ix & 0x80000000 ? 1.0 : -1.0) / x);
1365         if (ix == 0x7f800000) /* log(inf) == inf. */
1366             return x;
1367         if (ix * 2 > 0xff000000)
1368             return x;
1369         if (ix & 0x80000000)
1370             return math_error(_DOMAIN, "logf", x, 0, (x - x) / (x - x));
1371         /* x is subnormal, normalize it. */
1372         x *= 0x1p23f;
1373         ix = *(UINT32*)&x;
1374         ix -= 23 << 23;
1375     }
1376
1377     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1378        The range is split into N subintervals.
1379        The ith subinterval contains z and c is near its center. */
1380     tmp = ix - 0x3f330000;
1381     i = (tmp >> (23 - 4)) % (1 << 4);
1382     k = (INT32)tmp >> 23; /* arithmetic shift */
1383     iz = ix - (tmp & (0x1ffu << 23));
1384     invc = T[i].invc;
1385     logc = T[i].logc;
1386     z = *(float*)&iz;
1387
1388     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
1389     r = z * invc - 1;
1390     y0 = logc + (double)k * Ln2;
1391
1392     /* Pipelined polynomial evaluation to approximate log1p(r). */
1393     r2 = r * r;
1394     y = A[1] * r + A[2];
1395     y = A[0] * r2 + y;
1396     y = y * r2 + (y0 + r);
1397     return y;
1398 }
1399
1400 /*********************************************************************
1401  *      log10f (MSVCRT.@)
1402  */
1403 float CDECL log10f( float x )
1404 {
1405     static const float ivln10hi = 4.3432617188e-01,
1406         ivln10lo = -3.1689971365e-05,
1407         log10_2hi = 3.0102920532e-01,
1408         log10_2lo = 7.9034151668e-07,
1409         Lg1 = 0xaaaaaa.0p-24,
1410         Lg2 = 0xccce13.0p-25,
1411         Lg3 = 0x91e9ee.0p-25,
1412         Lg4 = 0xf89e26.0p-26;
1413
1414     union {float f; UINT32 i;} u = {x};
1415     float hfsq, f, s, z, R, w, t1, t2, dk, hi, lo;
1416     UINT32 ix;
1417     int k;
1418
1419     ix = u.i;
1420     k = 0;
1421     if (ix < 0x00800000 || ix >> 31) { /* x < 2**-126 */
1422         if (ix << 1 == 0)
1423             return math_error(_SING, "log10f", x, 0, -1 / (x * x));
1424         if ((ix & ~(1u << 31)) > 0x7f800000)
1425             return x;
1426         if (ix >> 31)
1427             return math_error(_DOMAIN, "log10f", x, 0, (x - x) / (x - x));
1428         /* subnormal number, scale up x */
1429         k -= 25;
1430         x *= 0x1p25f;
1431         u.f = x;
1432         ix = u.i;
1433     } else if (ix >= 0x7f800000) {
1434         return x;
1435     } else if (ix == 0x3f800000)
1436         return 0;
1437
1438     /* reduce x into [sqrt(2)/2, sqrt(2)] */
1439     ix += 0x3f800000 - 0x3f3504f3;
1440     k += (int)(ix >> 23) - 0x7f;
1441     ix = (ix & 0x007fffff) + 0x3f3504f3;
1442     u.i = ix;
1443     x = u.f;
1444
1445     f = x - 1.0f;
1446     s = f / (2.0f + f);
1447     z = s * s;
1448     w = z * z;
1449     t1= w * (Lg2 + w * Lg4);
1450     t2= z * (Lg1 + w * Lg3);
1451     R = t2 + t1;
1452     hfsq = 0.5f * f * f;
1453
1454     hi = f - hfsq;
1455     u.f = hi;
1456     u.i &= 0xfffff000;
1457     hi = u.f;
1458     lo = f - hi - hfsq + s * (hfsq + R);
1459     dk = k;
1460     return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
1461 }
1462
1463 /* Subnormal input is normalized so ix has negative biased exponent.
1464    Output is multiplied by POWF_SCALE (where 1 << 5). */
1465 static double powf_log2(UINT32 ix)
1466 {
1467     static const struct {
1468         double invc, logc;
1469     } T[] = {
1470         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * (1 << 5) },
1471         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * (1 << 5) },
1472         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * (1 << 5) },
1473         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * (1 << 5) },
1474         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * (1 << 5) },
1475         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * (1 << 5) },
1476         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * (1 << 5) },
1477         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * (1 << 5) },
1478         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * (1 << 5) },
1479         { 0x1p+0, 0x0p+0 * (1 << 4) },
1480         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * (1 << 5) },
1481         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * (1 << 5) },
1482         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * (1 << 5) },
1483         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * (1 << 5) },
1484         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * (1 << 5) },
1485         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * (1 << 5) }
1486     };
1487     static const double A[] = {
1488         0x1.27616c9496e0bp-2 * (1 << 5), -0x1.71969a075c67ap-2 * (1 << 5),
1489         0x1.ec70a6ca7baddp-2 * (1 << 5), -0x1.7154748bef6c8p-1 * (1 << 5),
1490         0x1.71547652ab82bp0 * (1 << 5)
1491     };
1492
1493     double z, r, r2, r4, p, q, y, y0, invc, logc;
1494     UINT32 iz, top, tmp;
1495     int k, i;
1496
1497     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
1498        The range is split into N subintervals.
1499        The ith subinterval contains z and c is near its center. */
1500     tmp = ix - 0x3f330000;
1501     i = (tmp >> (23 - 4)) % (1 << 4);
1502     top = tmp & 0xff800000;
1503     iz = ix - top;
1504     k = (INT32)top >> (23 - 5); /* arithmetic shift */
1505     invc = T[i].invc;
1506     logc = T[i].logc;
1507     z = *(float*)&iz;
1508
1509     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
1510     r = z * invc - 1;
1511     y0 = logc + (double)k;
1512
1513     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
1514     r2 = r * r;
1515     y = A[0] * r + A[1];
1516     p = A[2] * r + A[3];
1517     r4 = r2 * r2;
1518     q = A[4] * r + y0;
1519     q = p * r2 + q;
1520     y = y * r4 + q;
1521     return y;
1522 }
1523
1524 /* The output of log2 and thus the input of exp2 is either scaled by N
1525    (in case of fast toint intrinsics) or not. The unscaled xd must be
1526    in [-1021,1023], sign_bias sets the sign of the result. */
1527 static float powf_exp2(double xd, UINT32 sign_bias)
1528 {
1529     static const double C[] = {
1530         0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5),
1531         0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5),
1532         0x1.62e42ff0c52d6p-1 / (1 << 5)
1533     };
1534
1535     UINT64 ki, ski, t;
1536     double kd, z, r, r2, y, s;
1537
1538     /* N*x = k + r with r in [-1/2, 1/2] */
1539     kd = __round(xd); /* k */
1540     ki = (INT64)kd;
1541     r = xd - kd;
1542
1543     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
1544     t = exp2f_T[ki % (1 << 5)];
1545     ski = ki + sign_bias;
1546     t += ski << (52 - 5);
1547     s = *(double*)&t;
1548     z = C[0] * r + C[1];
1549     r2 = r * r;
1550     y = C[2] * r + 1;
1551     y = z * r2 + y;
1552     y = y * s;
1553     return y;
1554 }
1555
1556 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
1557    the bit representation of a non-zero finite floating-point value. */
1558 static int powf_checkint(UINT32 iy)
1559 {
1560     int e = iy >> 23 & 0xff;
1561     if (e < 0x7f)
1562         return 0;
1563     if (e > 0x7f + 23)
1564         return 2;
1565     if (iy & ((1 << (0x7f + 23 - e)) - 1))
1566         return 0;
1567     if (iy & (1 << (0x7f + 23 - e)))
1568         return 1;
1569     return 2;
1570 }
1571
1572 /*********************************************************************
1573  *      powf (MSVCRT.@)
1574  *
1575  * Copied from musl: src/math/powf.c src/math/powf_data.c
1576  */
1577 float CDECL powf( float x, float y )
1578 {
1579     UINT32 sign_bias = 0;
1580     UINT32 ix, iy;
1581     double logx, ylogx;
1582
1583     ix = *(UINT32*)&x;
1584     iy = *(UINT32*)&y;
1585     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
1586             2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1587         /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
1588         if (2 * iy - 1 >= 2u * 0x7f800000 - 1) {
1589             if (2 * iy == 0)
1590                 return 1.0f;
1591             if (ix == 0x3f800000)
1592                 return 1.0f;
1593             if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
1594                 return x + y;
1595             if (2 * ix == 2 * 0x3f800000)
1596                 return 1.0f;
1597             if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
1598                 return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
1599             return y * y;
1600         }
1601         if (2 * ix - 1 >= 2u * 0x7f800000 - 1) {
1602             float x2 = x * x;
1603             if (ix & 0x80000000 && powf_checkint(iy) == 1)
1604                 x2 = -x2;
1605             if (iy & 0x80000000 && x2 == 0.0)
1606                 return math_error(_SING, "powf", x, y, 1 / x2);
1607             /* Without the barrier some versions of clang hoist the 1/x2 and
1608                thus division by zero exception can be signaled spuriously. */
1609             return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
1610         }
1611         /* x and y are non-zero finite. */
1612         if (ix & 0x80000000) {
1613             /* Finite x < 0. */
1614             int yint = powf_checkint(iy);
1615             if (yint == 0)
1616                 return math_error(_DOMAIN, "powf", x, y, 0 / (x - x));
1617             if (yint == 1)
1618                 sign_bias = 1 << (5 + 11);
1619             ix &= 0x7fffffff;
1620         }
1621         if (ix < 0x00800000) {
1622             /* Normalize subnormal x so exponent becomes negative. */
1623             x *= 0x1p23f;
1624             ix = *(UINT32*)&x;
1625             ix &= 0x7fffffff;
1626             ix -= 23 << 23;
1627         }
1628     }
1629     logx = powf_log2(ix);
1630     ylogx = y * logx; /* cannot overflow, y is single prec. */
1631     if ((*(UINT64*)&ylogx >> 47 & 0xffff) >= 0x40af800000000000llu >> 47) {
1632         /* |y*log(x)| >= 126. */
1633         if (ylogx > 0x1.fffffffd1d571p+6 * (1 << 5))
1634             return math_error(_OVERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) * 0x1p1023);
1635         if (ylogx <= -150.0 * (1 << 5))
1636             return math_error(_UNDERFLOW, "powf", x, y, (sign_bias ? -1.0 : 1.0) / 0x1p1023);
1637     }
1638     return powf_exp2(ylogx, sign_bias);
1639 }
1640
1641 /*********************************************************************
1642  *      sinf (MSVCRT.@)
1643  *
1644  * Copied from musl: src/math/sinf.c
1645  */
1646 float CDECL sinf( float x )
1647 {
1648     static const double s1pio2 = 1*M_PI_2,
1649         s2pio2 = 2*M_PI_2,
1650         s3pio2 = 3*M_PI_2,
1651         s4pio2 = 4*M_PI_2;
1652
1653     double y;
1654     UINT32 ix;
1655     int n, sign;
1656
1657     ix = *(UINT32*)&x;
1658     sign = ix >> 31;
1659     ix &= 0x7fffffff;
1660
1661     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1662         if (ix < 0x39800000) { /* |x| < 2**-12 */
1663             /* raise inexact if x!=0 and underflow if subnormal */
1664             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1665             return x;
1666         }
1667         return __sindf(x);
1668     }
1669     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1670         if (ix <= 0x4016cbe3) { /* |x| ~<= 3pi/4 */
1671             if (sign)
1672                 return -__cosdf(x + s1pio2);
1673             else
1674                 return __cosdf(x - s1pio2);
1675         }
1676         return __sindf(sign ? -(x + s2pio2) : -(x - s2pio2));
1677     }
1678     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1679         if (ix <= 0x40afeddf) { /* |x| ~<= 7*pi/4 */
1680             if (sign)
1681                 return __cosdf(x + s3pio2);
1682             else
1683                 return -__cosdf(x - s3pio2);
1684         }
1685         return __sindf(sign ? x + s4pio2 : x - s4pio2);
1686     }
1687
1688     /* sin(Inf or NaN) is NaN */
1689     if (isinf(x))
1690         return math_error(_DOMAIN, "sinf", x, 0, x - x);
1691     if (ix >= 0x7f800000)
1692         return x - x;
1693
1694     /* general argument reduction needed */
1695     n = __rem_pio2f(x, &y);
1696     switch (n&3) {
1697     case 0: return __sindf(y);
1698     case 1: return __cosdf(y);
1699     case 2: return __sindf(-y);
1700     default: return -__cosdf(y);
1701     }
1702 }
1703
1704 /*********************************************************************
1705  *      sinhf (MSVCRT.@)
1706  */
1707 float CDECL sinhf( float x )
1708 {
1709     UINT32 ui = *(UINT32*)&x;
1710     float t, h, absx;
1711
1712     h = 0.5;
1713     if (ui >> 31)
1714         h = -h;
1715     /* |x| */
1716     ui &= 0x7fffffff;
1717     absx = *(float*)&ui;
1718
1719     /* |x| < log(FLT_MAX) */
1720     if (ui < 0x42b17217) {
1721         t = __expm1f(absx);
1722         if (ui < 0x3f800000) {
1723             if (ui < 0x3f800000 - (12 << 23))
1724                 return x;
1725             return h * (2 * t - t * t / (t + 1));
1726         }
1727         return h * (t + t / (t + 1));
1728     }
1729
1730     /* |x| > logf(FLT_MAX) or nan */
1731     if (ui > 0x7f800000)
1732         *(DWORD*)&t = *(DWORD*)&x | 0x400000;
1733     else
1734         t = __expo2f(absx, 2 * h);
1735     return t;
1736 }
1737
1738 static BOOL sqrtf_validate( float *x )
1739 {
1740     short c = _fdclass(*x);
1741
1742     if (c == FP_ZERO) return FALSE;
1743     if (c == FP_NAN) return FALSE;
1744     if (signbit(*x))
1745     {
1746         *x = math_error(_DOMAIN, "sqrtf", *x, 0, ret_nan(TRUE));
1747         return FALSE;
1748     }
1749     if (c == FP_INFINITE) return FALSE;
1750     return TRUE;
1751 }
1752
1753 #if defined(__x86_64__) || defined(__i386__)
1754 float CDECL sse2_sqrtf(float);
1755 __ASM_GLOBAL_FUNC( sse2_sqrtf,
1756         "sqrtss %xmm0, %xmm0\n\t"
1757         "ret" )
1758 #endif
1759
1760 /*********************************************************************
1761  *      sqrtf (MSVCRT.@)
1762  *
1763  * Copied from musl: src/math/sqrtf.c
1764  */
1765 float CDECL sqrtf( float x )
1766 {
1767 #ifdef __x86_64__
1768     if (!sqrtf_validate(&x))
1769         return x;
1770
1771     return sse2_sqrtf(x);
1772 #else
1773     static const float tiny = 1.0e-30;
1774
1775     float z;
1776     int ix,s,q,m,t,i;
1777     unsigned int r;
1778
1779     ix = *(int*)&x;
1780
1781     if (!sqrtf_validate(&x))
1782         return x;
1783
1784     /* normalize x */
1785     m = ix >> 23;
1786     if (m == 0) {  /* subnormal x */
1787         for (i = 0; (ix & 0x00800000) == 0; i++)
1788             ix <<= 1;
1789         m -= i - 1;
1790     }
1791     m -= 127;  /* unbias exponent */
1792     ix = (ix & 0x007fffff) | 0x00800000;
1793     if (m & 1)  /* odd m, double x to make it even */
1794         ix += ix;
1795     m >>= 1;  /* m = [m/2] */
1796
1797     /* generate sqrt(x) bit by bit */
1798     ix += ix;
1799     q = s = 0;       /* q = sqrt(x) */
1800     r = 0x01000000;  /* r = moving bit from right to left */
1801
1802     while (r != 0) {
1803         t = s + r;
1804         if (t <= ix) {
1805             s = t + r;
1806             ix -= t;
1807             q += r;
1808         }
1809         ix += ix;
1810         r >>= 1;
1811     }
1812
1813     /* use floating add to find out rounding direction */
1814     if (ix != 0) {
1815         z = 1.0f - tiny; /* raise inexact flag */
1816         if (z >= 1.0f) {
1817             z = 1.0f + tiny;
1818             if (z > 1.0f)
1819                 q += 2;
1820             else
1821                 q += q & 1;
1822         }
1823     }
1824     ix = (q >> 1) + 0x3f000000;
1825     r = ix + ((unsigned int)m << 23);
1826     z = *(float*)&r;
1827     return z;
1828 #endif
1829 }
1830
1831 /* Copied from musl: src/math/__tandf.c */
1832 static float __tandf(double x, int odd)
1833 {
1834     static const double T[] = {
1835         0x15554d3418c99f.0p-54,
1836         0x1112fd38999f72.0p-55,
1837         0x1b54c91d865afe.0p-57,
1838         0x191df3908c33ce.0p-58,
1839         0x185dadfcecf44e.0p-61,
1840         0x1362b9bf971bcd.0p-59,
1841     };
1842
1843     double z, r, w, s, t, u;
1844
1845     z = x * x;
1846     r = T[4] + z * T[5];
1847     t = T[2] + z * T[3];
1848     w = z * z;
1849     s = z * x;
1850     u = T[0] + z * T[1];
1851     r = (x + s * u) + (s * w) * (t + w * r);
1852     return odd ? -1.0 / r : r;
1853 }
1854
1855 /*********************************************************************
1856  *      tanf (MSVCRT.@)
1857  *
1858  * Copied from musl: src/math/tanf.c
1859  */
1860 float CDECL tanf( float x )
1861 {
1862     static const double t1pio2 = 1*M_PI_2,
1863         t2pio2 = 2*M_PI_2,
1864         t3pio2 = 3*M_PI_2,
1865         t4pio2 = 4*M_PI_2;
1866
1867     double y;
1868     UINT32 ix;
1869     unsigned n, sign;
1870
1871     ix = *(UINT32*)&x;
1872     sign = ix >> 31;
1873     ix &= 0x7fffffff;
1874
1875     if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
1876         if (ix < 0x39800000) { /* |x| < 2**-12 */
1877             /* raise inexact if x!=0 and underflow if subnormal */
1878             fp_barrierf(ix < 0x00800000 ? x / 0x1p120f : x + 0x1p120f);
1879             return x;
1880         }
1881         return __tandf(x, 0);
1882     }
1883     if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
1884         if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
1885             return __tandf((sign ? x + t1pio2 : x - t1pio2), 1);
1886         else
1887             return __tandf((sign ? x + t2pio2 : x - t2pio2), 0);
1888     }
1889     if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
1890         if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
1891             return __tandf((sign ? x + t3pio2 : x - t3pio2), 1);
1892         else
1893             return __tandf((sign ? x + t4pio2 : x - t4pio2), 0);
1894     }
1895
1896     /* tan(Inf or NaN) is NaN */
1897     if (isinf(x))
1898         return math_error(_DOMAIN, "tanf", x, 0, x - x);
1899     if (ix >= 0x7f800000)
1900         return x - x;
1901
1902     /* argument reduction */
1903     n = __rem_pio2f(x, &y);
1904     return __tandf(y, n & 1);
1905 }
1906
1907 /*********************************************************************
1908  *      tanhf (MSVCRT.@)
1909  */
1910 float CDECL tanhf( float x )
1911 {
1912     UINT32 ui = *(UINT32*)&x;
1913     UINT32 sign = ui & 0x80000000;
1914     float t;
1915
1916     /* x = |x| */
1917     ui &= 0x7fffffff;
1918     x = *(float*)&ui;
1919
1920     if (ui > 0x3f0c9f54) {
1921         /* |x| > log(3)/2 ~= 0.5493 or nan */
1922         if (ui > 0x41200000) {
1923             if (ui > 0x7f800000) {
1924                 *(UINT32*)&x = ui | sign | 0x400000;
1925 #if _MSVCR_VER < 140
1926                 return math_error(_DOMAIN, "tanhf", x, 0, x);
1927 #else
1928                 return x;
1929 #endif
1930             }
1931             /* |x| > 10 */
1932             fp_barrierf(x + 0x1p120f);
1933             t = 1 + 0 / x;
1934         } else {
1935             t = __expm1f(2 * x);
1936             t = 1 - 2 / (t + 2);
1937         }
1938     } else if (ui > 0x3e82c578) {
1939         /* |x| > log(5/3)/2 ~= 0.2554 */
1940         t = __expm1f(2 * x);
1941         t = t / (t + 2);
1942     } else if (ui >= 0x00800000) {
1943         /* |x| >= 0x1p-126 */
1944         t = __expm1f(-2 * x);
1945         t = -t / (t + 2);
1946     } else {
1947         /* |x| is subnormal */
1948         fp_barrierf(x * x);
1949         t = x;
1950     }
1951     return sign ? -t : t;
1952 }
1953
1954 /*********************************************************************
1955  *      ceilf (MSVCRT.@)
1956  *
1957  * Copied from musl: src/math/ceilf.c
1958  */
1959 float CDECL ceilf( float x )
1960 {
1961     union {float f; UINT32 i;} u = {x};
1962     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1963     UINT32 m;
1964
1965     if (e >= 23)
1966         return x;
1967     if (e >= 0) {
1968         m = 0x007fffff >> e;
1969         if ((u.i & m) == 0)
1970             return x;
1971         if (u.i >> 31 == 0)
1972             u.i += m;
1973         u.i &= ~m;
1974     } else {
1975         if (u.i >> 31)
1976             return -0.0;
1977         else if (u.i << 1)
1978             return 1.0;
1979     }
1980     return u.f;
1981 }
1982
1983 /*********************************************************************
1984  *      floorf (MSVCRT.@)
1985  *
1986  * Copied from musl: src/math/floorf.c
1987  */
1988 float CDECL floorf( float x )
1989 {
1990     union {float f; UINT32 i;} u = {x};
1991     int e = (int)(u.i >> 23 & 0xff) - 0x7f;
1992     UINT32 m;
1993
1994     if (e >= 23)
1995         return x;
1996     if (e >= 0) {
1997         m = 0x007fffff >> e;
1998         if ((u.i & m) == 0)
1999             return x;
2000         if (u.i >> 31)
2001             u.i += m;
2002         u.i &= ~m;
2003     } else {
2004         if (u.i >> 31 == 0)
2005             return 0;
2006         else if (u.i << 1)
2007             return -1;
2008     }
2009     return u.f;
2010 }
2011
2012 /*********************************************************************
2013  *      frexpf (MSVCRT.@)
2014  *
2015  * Copied from musl: src/math/frexpf.c
2016  */
2017 float CDECL frexpf( float x, int *e )
2018 {
2019     UINT32 ux = *(UINT32*)&x;
2020     int ee = ux >> 23 & 0xff;
2021
2022     if (!ee) {
2023         if (x) {
2024             x = frexpf(x * 0x1p64, e);
2025             *e -= 64;
2026         } else *e = 0;
2027         return x;
2028     } else if (ee == 0xff) {
2029         return x;
2030     }
2031
2032     *e = ee - 0x7e;
2033     ux &= 0x807ffffful;
2034     ux |= 0x3f000000ul;
2035     return *(float*)&ux;
2036 }
2037
2038 /*********************************************************************
2039  *      modff (MSVCRT.@)
2040  *
2041  * Copied from musl: src/math/modff.c
2042  */
2043 float CDECL modff( float x, float *iptr )
2044 {
2045     union {float f; UINT32 i;} u = {x};
2046     UINT32 mask;
2047     int e = (u.i >> 23 & 0xff) - 0x7f;
2048
2049     /* no fractional part */
2050     if (e >= 23) {
2051         *iptr = x;
2052         if (e == 0x80 && u.i << 9 != 0) { /* nan */
2053             return x;
2054         }
2055         u.i &= 0x80000000;
2056         return u.f;
2057     }
2058     /* no integral part */
2059     if (e < 0) {
2060         u.i &= 0x80000000;
2061         *iptr = u.f;
2062         return x;
2063     }
2064
2065     mask = 0x007fffff >> e;
2066     if ((u.i & mask) == 0) {
2067         *iptr = x;
2068         u.i &= 0x80000000;
2069         return u.f;
2070     }
2071     u.i &= ~mask;
2072     *iptr = u.f;
2073     return x - u.f;
2074 }
2075
2076 #endif
2077
2078 #if !defined(__i386__) && !defined(__x86_64__) && (_MSVCR_VER == 0 || _MSVCR_VER >= 110)
2079
2080 /*********************************************************************
2081  *      fabsf (MSVCRT.@)
2082  *
2083  * Copied from musl: src/math/fabsf.c
2084  */
2085 float CDECL fabsf( float x )
2086 {
2087     union { float f; UINT32 i; } u = { x };
2088     u.i &= 0x7fffffff;
2089     return u.f;
2090 }
2091
2092 #endif
2093
2094 /*********************************************************************
2095  *              acos (MSVCRT.@)
2096  *
2097  * Copied from musl: src/math/acos.c
2098  */
2099 static double acos_R(double z)
2100 {
2101     static const double pS0 =  1.66666666666666657415e-01,
2102                  pS1 = -3.25565818622400915405e-01,
2103                  pS2 =  2.01212532134862925881e-01,
2104                  pS3 = -4.00555345006794114027e-02,
2105                  pS4 =  7.91534994289814532176e-04,
2106                  pS5 =  3.47933107596021167570e-05,
2107                  qS1 = -2.40339491173441421878e+00,
2108                  qS2 =  2.02094576023350569471e+00,
2109                  qS3 = -6.88283971605453293030e-01,
2110                  qS4 =  7.70381505559019352791e-02;
2111
2112     double p, q;
2113     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2114     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2115     return p/q;
2116 }
2117
2118 double CDECL acos( double x )
2119 {
2120     static const double pio2_hi = 1.57079632679489655800e+00,
2121                  pio2_lo = 6.12323399573676603587e-17;
2122
2123     double z, w, s, c, df;
2124     unsigned int hx, ix;
2125     ULONGLONG llx;
2126
2127     hx = *(ULONGLONG*)&x >> 32;
2128     ix = hx & 0x7fffffff;
2129     /* |x| >= 1 or nan */
2130     if (ix >= 0x3ff00000) {
2131         unsigned int lx;
2132
2133         lx = *(ULONGLONG*)&x;
2134         if (((ix - 0x3ff00000) | lx) == 0) {
2135             /* acos(1)=0, acos(-1)=pi */
2136             if (hx >> 31)
2137                 return 2 * pio2_hi + 7.5231638452626401e-37;
2138             return 0;
2139         }
2140         if (isnan(x)) return x;
2141         return math_error(_DOMAIN, "acos", x, 0, 0 / (x - x));
2142     }
2143     /* |x| < 0.5 */
2144     if (ix < 0x3fe00000) {
2145         if (ix <= 0x3c600000)  /* |x| < 2**-57 */
2146             return pio2_hi + 7.5231638452626401e-37;
2147         return pio2_hi - (x - (pio2_lo - x * acos_R(x * x)));
2148     }
2149     /* x < -0.5 */
2150     if (hx >> 31) {
2151         z = (1.0 + x) * 0.5;
2152         s = sqrt(z);
2153         w = acos_R(z) * s - pio2_lo;
2154         return 2 * (pio2_hi - (s + w));
2155     }
2156     /* x > 0.5 */
2157     z = (1.0 - x) * 0.5;
2158     s = sqrt(z);
2159     df = s;
2160     llx = (*(ULONGLONG*)&df >> 32) << 32;
2161     df = *(double*)&llx;
2162     c = (z - df * df) / (s + df);
2163     w = acos_R(z) * s + c;
2164     return 2 * (df + w);
2165 }
2166
2167 /*********************************************************************
2168  *              asin (MSVCRT.@)
2169  *
2170  * Copied from musl: src/math/asin.c
2171  */
2172 static double asin_R(double z)
2173 {
2174     /* coefficients for R(x^2) */
2175     static const double pS0 =  1.66666666666666657415e-01,
2176                  pS1 = -3.25565818622400915405e-01,
2177                  pS2 =  2.01212532134862925881e-01,
2178                  pS3 = -4.00555345006794114027e-02,
2179                  pS4 =  7.91534994289814532176e-04,
2180                  pS5 =  3.47933107596021167570e-05,
2181                  qS1 = -2.40339491173441421878e+00,
2182                  qS2 =  2.02094576023350569471e+00,
2183                  qS3 = -6.88283971605453293030e-01,
2184                  qS4 =  7.70381505559019352791e-02;
2185
2186     double p, q;
2187     p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
2188     q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
2189     return p / q;
2190 }
2191
2192 #ifdef __i386__
2193 double CDECL x87_asin(double);
2194 __ASM_GLOBAL_FUNC( x87_asin,
2195         "fldl 4(%esp)\n\t"
2196         SET_X87_CW(~0x37f)
2197         "fld %st\n\t"
2198         "fld1\n\t"
2199         "fsubp\n\t"
2200         "fld1\n\t"
2201         "fadd %st(2)\n\t"
2202         "fmulp\n\t"
2203         "fsqrt\n\t"
2204         "fpatan\n\t"
2205         RESET_X87_CW
2206         "ret" )
2207 #endif
2208
2209 double CDECL asin( double x )
2210 {
2211     static const double pio2_hi = 1.57079632679489655800e+00,
2212                  pio2_lo = 6.12323399573676603587e-17;
2213
2214     double z, r, s;
2215     unsigned int hx, ix;
2216     ULONGLONG llx;
2217 #ifdef __i386__
2218     unsigned int x87_cw, sse2_cw;
2219 #endif
2220
2221     hx = *(ULONGLONG*)&x >> 32;
2222     ix = hx & 0x7fffffff;
2223     /* |x| >= 1 or nan */
2224     if (ix >= 0x3ff00000) {
2225         unsigned int lx;
2226         lx = *(ULONGLONG*)&x;
2227         if (((ix - 0x3ff00000) | lx) == 0)
2228             /* asin(1) = +-pi/2 with inexact */
2229             return x * pio2_hi + 7.5231638452626401e-37;
2230         if (isnan(x))
2231         {
2232 #ifdef __i386__
2233             return math_error(_DOMAIN, "asin", x, 0, x);
2234 #else
2235             return x;
2236 #endif
2237         }
2238         return math_error(_DOMAIN, "asin", x, 0, 0 / (x - x));
2239     }
2240
2241 #ifdef __i386__
2242     __control87_2(0, 0, &x87_cw, &sse2_cw);
2243     if (!sse2_enabled || (x87_cw & _MCW_EM) != _MCW_EM
2244             || (sse2_cw & (_MCW_EM | _MCW_RC)) != _MCW_EM)
2245         return x87_asin(x);
2246 #endif
2247
2248     /* |x| < 0.5 */
2249     if (ix < 0x3fe00000) {
2250         /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
2251         if (ix < 0x3e500000 && ix >= 0x00100000)
2252             return x;
2253         return x + x * asin_R(x * x);
2254     }
2255     /* 1 > |x| >= 0.5 */
2256     z = (1 - fabs(x)) * 0.5;
2257     s = sqrt(z);
2258     r = asin_R(z);
2259     if (ix >= 0x3fef3333) {  /* if |x| > 0.975 */
2260         x = pio2_hi - (2 * (s + s * r) - pio2_lo);
2261     } else {
2262         double f, c;
2263         /* f+c = sqrt(z) */
2264         f = s;
2265         llx = (*(ULONGLONG*)&f >> 32) << 32;
2266         f = *(double*)&llx;
2267         c = (z - f * f) / (s + f);
2268         x = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * f));
2269     }
2270     if (hx >> 31)
2271         return -x;
2272     return x;
2273 }
2274
2275 /*********************************************************************
2276  *              atan (MSVCRT.@)
2277  *
2278  * Copied from musl: src/math/atan.c
2279  */
2280 double CDECL atan( double x )
2281 {
2282     static const double atanhi[] = {
2283         4.63647609000806093515e-01,
2284         7.85398163397448278999e-01,
2285         9.82793723247329054082e-01,
2286         1.57079632679489655800e+00,
2287     };
2288     static const double atanlo[] = {
2289         2.26987774529616870924e-17,
2290         3.06161699786838301793e-17,
2291         1.39033110312309984516e-17,
2292         6.12323399573676603587e-17,
2293     };
2294     static const double aT[] = {
2295         3.33333333333329318027e-01,
2296         -1.99999999998764832476e-01,
2297         1.42857142725034663711e-01,
2298         -1.11111104054623557880e-01,
2299         9.09088713343650656196e-02,
2300         -7.69187620504482999495e-02,
2301         6.66107313738753120669e-02,
2302         -5.83357013379057348645e-02,
2303         4.97687799461593236017e-02,
2304         -3.65315727442169155270e-02,
2305         1.62858201153657823623e-02,
2306     };
2307
2308     double w, s1, s2, z;
2309     unsigned int ix, sign;
2310     int id;
2311
2312 #if _MSVCR_VER == 0
2313     if (isnan(x)) return math_error(_DOMAIN, "atan", x, 0, x);
2314 #endif
2315
2316     ix = *(ULONGLONG*)&x >> 32;
2317     sign = ix >> 31;
2318     ix &= 0x7fffffff;
2319     if (ix >= 0x44100000) {   /* if |x| >= 2^66 */
2320         if (isnan(x))
2321             return x;
2322         z = atanhi[3] + 7.5231638452626401e-37;
2323         return sign ? -z : z;
2324     }
2325     if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
2326         if (ix < 0x3e400000) {  /* |x| < 2^-27 */
2327             if (ix < 0x00100000)
2328                 /* raise underflow for subnormal x */
2329                 fp_barrierf((float)x);
2330             return x;
2331         }
2332         id = -1;
2333     } else {
2334         x = fabs(x);
2335         if (ix < 0x3ff30000) {  /* |x| < 1.1875 */
2336             if (ix < 0x3fe60000) {  /*  7/16 <= |x| < 11/16 */
2337                 id = 0;
2338                 x = (2.0 * x - 1.0) / (2.0 + x);
2339             } else {                /* 11/16 <= |x| < 19/16 */
2340                 id = 1;
2341                 x = (x - 1.0) / (x + 1.0);
2342             }
2343         } else {
2344             if (ix < 0x40038000) {  /* |x| < 2.4375 */
2345                 id = 2;
2346                 x = (x - 1.5) / (1.0 + 1.5 * x);
2347             } else {                /* 2.4375 <= |x| < 2^66 */
2348                 id = 3;
2349                 x = -1.0 / x;
2350             }
2351         }
2352     }
2353     /* end of argument reduction */
2354     z = x * x;
2355     w = z * z;
2356     /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
2357     s1 = z * (aT[0] + w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
2358     s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
2359     if (id < 0)
2360         return x - x * (s1 + s2);
2361     z = atanhi[id] - (x * (s1 + s2) - atanlo[id] - x);
2362     return sign ? -z : z;
2363 }
2364
2365 /*********************************************************************
2366  *              atan2 (MSVCRT.@)
2367  *
2368  * Copied from musl: src/math/atan2.c
2369  */
2370 double CDECL atan2( double y, double x )
2371 {
2372     static const double pi     = 3.1415926535897931160E+00,
2373                  pi_lo  = 1.2246467991473531772E-16;
2374
2375     double z;
2376     unsigned int m, lx, ly, ix, iy;
2377
2378     if (isnan(x) || isnan(y))
2379         return x+y;
2380     ix = *(ULONGLONG*)&x >> 32;
2381     lx = *(ULONGLONG*)&x;
2382     iy = *(ULONGLONG*)&y >> 32;
2383     ly = *(ULONGLONG*)&y;
2384     if (((ix - 0x3ff00000) | lx) == 0)  /* x = 1.0 */
2385         return atan(y);
2386     m = ((iy >> 31) & 1) | ((ix >> 30) & 2);  /* 2*sign(x)+sign(y) */
2387     ix = ix & 0x7fffffff;
2388     iy = iy & 0x7fffffff;
2389
2390     /* when y = 0 */
2391     if ((iy | ly) == 0) {
2392         switch(m) {
2393         case 0:
2394         case 1: return y;   /* atan(+-0,+anything)=+-0 */
2395         case 2: return pi;  /* atan(+0,-anything) = pi */
2396         case 3: return -pi; /* atan(-0,-anything) =-pi */
2397         }
2398     }
2399     /* when x = 0 */
2400     if ((ix | lx) == 0)
2401         return m & 1 ? -pi / 2 : pi / 2;
2402     /* when x is INF */
2403     if (ix == 0x7ff00000) {
2404         if (iy == 0x7ff00000) {
2405             switch(m) {
2406             case 0: return pi / 4;      /* atan(+INF,+INF) */
2407             case 1: return -pi / 4;     /* atan(-INF,+INF) */
2408             case 2: return 3 * pi / 4;  /* atan(+INF,-INF) */
2409             case 3: return -3 * pi / 4; /* atan(-INF,-INF) */
2410             }
2411         } else {
2412             switch(m) {
2413             case 0: return 0.0;  /* atan(+...,+INF) */
2414             case 1: return -0.0; /* atan(-...,+INF) */
2415             case 2: return pi;   /* atan(+...,-INF) */
2416             case 3: return -pi;  /* atan(-...,-INF) */
2417             }
2418         }
2419     }
2420     /* |y/x| > 0x1p64 */
2421     if (ix + (64 << 20) < iy || iy == 0x7ff00000)
2422         return m & 1 ? -pi / 2 : pi / 2;
2423
2424     /* z = atan(|y/x|) without spurious underflow */
2425     if ((m & 2) && iy + (64 << 20) < ix)  /* |y/x| < 0x1p-64, x<0 */
2426         z = 0;
2427     else
2428         z = atan(fabs(y / x));
2429     switch (m) {
2430     case 0: return z;                /* atan(+,+) */
2431     case 1: return -z;               /* atan(-,+) */
2432     case 2: return pi - (z - pi_lo); /* atan(+,-) */
2433     default: /* case 3 */
2434         return (z - pi_lo) - pi;     /* atan(-,-) */
2435     }
2436 }
2437
2438 /* Copied from musl: src/math/rint.c */
2439 static double __rint(double x)
2440 {
2441     static const double toint = 1 / DBL_EPSILON;
2442
2443     ULONGLONG llx = *(ULONGLONG*)&x;
2444     int e = llx >> 52 & 0x7ff;
2445     int s = llx >> 63;
2446     unsigned cw;
2447     double y;
2448
2449     if (e >= 0x3ff+52)
2450         return x;
2451     cw = _controlfp(0, 0);
2452     if ((cw & _MCW_PC) != _PC_53)
2453         _controlfp(_PC_53, _MCW_PC);
2454     if (s)
2455         y = fp_barrier(x - toint) + toint;
2456     else
2457         y = fp_barrier(x + toint) - toint;
2458     if ((cw & _MCW_PC) != _PC_53)
2459         _controlfp(cw, _MCW_PC);
2460     if (y == 0)
2461         return s ? -0.0 : 0;
2462     return y;
2463 }
2464
2465 /* Copied from musl: src/math/__rem_pio2.c */
2466 static int __rem_pio2(double x, double *y)
2467 {
2468     static const double pio4    = 0x1.921fb54442d18p-1,
2469                  invpio2 = 6.36619772367581382433e-01,
2470                  pio2_1  = 1.57079632673412561417e+00,
2471                  pio2_1t = 6.07710050650619224932e-11,
2472                  pio2_2  = 6.07710050630396597660e-11,
2473                  pio2_2t = 2.02226624879595063154e-21,
2474                  pio2_3  = 2.02226624871116645580e-21,
2475                  pio2_3t = 8.47842766036889956997e-32;
2476
2477     union {double f; UINT64 i;} u = {x};
2478     double z, w, t, r, fn, tx[3], ty[2];
2479     UINT32 ix;
2480     int sign, n, ex, ey, i;
2481
2482     sign = u.i >> 63;
2483     ix = u.i >> 32 & 0x7fffffff;
2484     if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
2485         if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
2486             goto medium; /* cancellation -- use medium case */
2487         if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
2488             if (!sign) {
2489                 z = x - pio2_1; /* one round good to 85 bits */
2490                 y[0] = z - pio2_1t;
2491                 y[1] = (z - y[0]) - pio2_1t;
2492                 return 1;
2493             } else {
2494                 z = x + pio2_1;
2495                 y[0] = z + pio2_1t;
2496                 y[1] = (z - y[0]) + pio2_1t;
2497                 return -1;
2498             }
2499         } else {
2500             if (!sign) {
2501                 z = x - 2 * pio2_1;
2502                 y[0] = z - 2 * pio2_1t;
2503                 y[1] = (z - y[0]) - 2 * pio2_1t;
2504                 return 2;
2505             } else {
2506                 z = x + 2 * pio2_1;
2507                 y[0] = z + 2 * pio2_1t;
2508                 y[1] = (z - y[0]) + 2 * pio2_1t;
2509                 return -2;
2510             }
2511         }
2512     }
2513     if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
2514         if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
2515             if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
2516                 goto medium;
2517             if (!sign) {
2518                 z = x - 3 * pio2_1;
2519                 y[0] = z - 3 * pio2_1t;
2520                 y[1] = (z - y[0]) - 3 * pio2_1t;
2521                 return 3;
2522             } else {
2523                 z = x + 3 * pio2_1;
2524                 y[0] = z + 3 * pio2_1t;
2525                 y[1] = (z - y[0]) + 3 * pio2_1t;
2526                 return -3;
2527             }
2528         } else {
2529             if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
2530                 goto medium;
2531             if (!sign) {
2532                 z = x - 4 * pio2_1;
2533                 y[0] = z - 4 * pio2_1t;
2534                 y[1] = (z - y[0]) - 4 * pio2_1t;
2535                 return 4;
2536             } else {
2537                 z = x + 4 * pio2_1;
2538                 y[0] = z + 4 * pio2_1t;
2539                 y[1] = (z - y[0]) + 4 * pio2_1t;
2540                 return -4;
2541             }
2542         }
2543     }
2544     if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
2545 medium:
2546         fn = __rint(x * invpio2);
2547         n = (INT32)fn;
2548         r = x - fn * pio2_1;
2549         w = fn * pio2_1t; /* 1st round, good to 85 bits */
2550         /* Matters with directed rounding. */
2551         if (r - w < -pio4) {
2552             n--;
2553             fn--;
2554             r = x - fn * pio2_1;
2555             w = fn * pio2_1t;
2556         } else if (r - w > pio4) {
2557             n++;
2558             fn++;
2559             r = x - fn * pio2_1;
2560             w = fn * pio2_1t;
2561         }
2562         y[0] = r - w;
2563         u.f = y[0];
2564         ey = u.i >> 52 & 0x7ff;
2565         ex = ix >> 20;
2566         if (ex - ey > 16) { /* 2nd round, good to 118 bits */
2567             t = r;
2568             w = fn * pio2_2;
2569             r = t - w;
2570             w = fn * pio2_2t - ((t - r) - w);
2571             y[0] = r - w;
2572             u.f = y[0];
2573             ey = u.i >> 52 & 0x7ff;
2574             if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
2575                 t = r;
2576                 w = fn * pio2_3;
2577                 r = t - w;
2578                 w = fn * pio2_3t - ((t - r) - w);
2579                 y[0] = r - w;
2580             }
2581         }
2582         y[1] = (r - y[0]) - w;
2583         return n;
2584     }
2585     /*
2586      * all other (large) arguments
2587      */
2588     if (ix >= 0x7ff00000) {  /* x is inf or NaN */
2589         y[0] = y[1] = x - x;
2590         return 0;
2591     }
2592     /* set z = scalbn(|x|,-ilogb(x)+23) */
2593     u.f = x;
2594     u.i &= (UINT64)-1 >> 12;
2595     u.i |= (UINT64)(0x3ff + 23) << 52;
2596     z = u.f;
2597     for (i = 0; i < 2; i++) {
2598         tx[i] = (double)(INT32)z;
2599         z = (z - tx[i]) * 0x1p24;
2600     }
2601     tx[i] = z;
2602     /* skip zero terms, first term is non-zero */
2603     while (tx[i] == 0.0)
2604         i--;
2605     n = __rem_pio2_large(tx, ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1);
2606     if (sign) {
2607         y[0] = -ty[0];
2608         y[1] = -ty[1];
2609         return -n;
2610     }
2611     y[0] = ty[0];
2612     y[1] = ty[1];
2613     return n;
2614 }
2615
2616 /* Copied from musl: src/math/__sin.c */
2617 static double __sin(double x, double y, int iy)
2618 {
2619     static const double S1  = -1.66666666666666324348e-01,
2620                  S2  =  8.33333333332248946124e-03,
2621                  S3  = -1.98412698298579493134e-04,
2622                  S4  =  2.75573137070700676789e-06,
2623                  S5  = -2.50507602534068634195e-08,
2624                  S6  =  1.58969099521155010221e-10;
2625
2626     double z, r, v, w;
2627
2628     z = x * x;
2629     w = z * z;
2630     r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
2631     v = z * x;
2632     if (iy == 0)
2633         return x + v * (S1 + z * r);
2634     else
2635         return x - ((z * (0.5 * y - v * r) - y) - v * S1);
2636 }
2637
2638 /* Copied from musl: src/math/__cos.c */
2639 static double __cos(double x, double y)
2640 {
2641     static const double C1  =  4.16666666666666019037e-02,
2642                  C2  = -1.38888888888741095749e-03,
2643                  C3  =  2.48015872894767294178e-05,
2644                  C4  = -2.75573143513906633035e-07,
2645                  C5  =  2.08757232129817482790e-09,
2646                  C6  = -1.13596475577881948265e-11;
2647     double hz, z, r, w;
2648
2649     z = x * x;
2650     w = z * z;
2651     r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6));
2652     hz = 0.5 * z;
2653     w = 1.0 - hz;
2654     return w + (((1.0 - w) - hz) + (z * r - x * y));
2655 }
2656
2657 /*********************************************************************
2658  *              cos (MSVCRT.@)
2659  *
2660  * Copied from musl: src/math/cos.c
2661  */
2662 double CDECL cos( double x )
2663 {
2664     double y[2];
2665     UINT32 ix;
2666     unsigned n;
2667
2668     ix = *(ULONGLONG*)&x >> 32;
2669     ix &= 0x7fffffff;
2670
2671     /* |x| ~< pi/4 */
2672     if (ix <= 0x3fe921fb) {
2673         if (ix < 0x3e46a09e) { /* |x| < 2**-27 * sqrt(2) */
2674             /* raise inexact if x!=0 */
2675             fp_barrier(x + 0x1p120f);
2676             return 1.0;
2677         }
2678         return __cos(x, 0);
2679     }
2680
2681     /* cos(Inf or NaN) is NaN */
2682     if (isinf(x)) return math_error(_DOMAIN, "cos", x, 0, x - x);
2683     if (ix >= 0x7ff00000)
2684         return x - x;
2685
2686     /* argument reduction */
2687     n = __rem_pio2(x, y);
2688     switch (n & 3) {
2689     case 0: return __cos(y[0], y[1]);
2690     case 1: return -__sin(y[0], y[1], 1);
2691     case 2: return -__cos(y[0], y[1]);
2692     default: return __sin(y[0], y[1], 1);
2693     }
2694 }
2695
2696 /* Copied from musl: src/math/expm1.c */
2697 static double __expm1(double x)
2698 {
2699     static const double o_threshold = 7.09782712893383973096e+02,
2700         ln2_hi = 6.93147180369123816490e-01,
2701         ln2_lo = 1.90821492927058770002e-10,
2702         invln2 = 1.44269504088896338700e+00,
2703         Q1 = -3.33333333333331316428e-02,
2704         Q2 = 1.58730158725481460165e-03,
2705         Q3 = -7.93650757867487942473e-05,
2706         Q4 = 4.00821782732936239552e-06,
2707         Q5 = -2.01099218183624371326e-07;
2708
2709     double y, hi, lo, c, t, e, hxs, hfx, r1, twopk;
2710     union {double f; UINT64 i;} u = {x};
2711     UINT32 hx = u.i >> 32 & 0x7fffffff;
2712     int k, sign = u.i >> 63;
2713
2714     /* filter out huge and non-finite argument */
2715     if (hx >= 0x4043687A) { /* if |x|>=56*ln2 */
2716         if (isnan(x))
2717             return x;
2718         if (isinf(x))
2719             return sign ? -1 : x;
2720         if (sign)
2721             return math_error(_UNDERFLOW, "exp", x, 0, -1);
2722         if (x > o_threshold)
2723             return math_error(_OVERFLOW, "exp", x, 0, x * 0x1p1023);
2724     }
2725
2726     /* argument reduction */
2727     if (hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */
2728         if (hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */
2729             if (!sign) {
2730                 hi = x - ln2_hi;
2731                 lo = ln2_lo;
2732                 k = 1;
2733             } else {
2734                 hi = x + ln2_hi;
2735                 lo = -ln2_lo;
2736                 k = -1;
2737             }
2738         } else {
2739             k = invln2 * x + (sign ? -0.5 : 0.5);
2740             t = k;
2741             hi = x - t * ln2_hi; /* t*ln2_hi is exact here */
2742             lo = t * ln2_lo;
2743         }
2744         x = hi - lo;
2745         c = (hi - x) - lo;
2746     } else if (hx < 0x3c900000) { /* |x| < 2**-54, return x */
2747         fp_barrier(x + 0x1p120f);
2748         if (hx < 0x00100000)
2749             fp_barrier((float)x);
2750         return x;
2751     } else
2752         k = 0;
2753
2754     /* x is now in primary range */
2755     hfx = 0.5 * x;
2756     hxs = x * hfx;
2757     r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
2758     t = 3.0 - r1 * hfx;
2759     e = hxs * ((r1 - t) / (6.0 - x * t));
2760     if (k == 0) /* c is 0 */
2761         return x - (x * e - hxs);
2762     e = x * (e - c) - c;
2763     e -= hxs;
2764     /* exp(x) ~ 2^k (x_reduced - e + 1) */
2765     if (k == -1)
2766         return 0.5 * (x - e) - 0.5;
2767     if (k == 1) {
2768         if (x < -0.25)
2769             return -2.0 * (e - (x + 0.5));
2770         return 1.0 + 2.0 * (x - e);
2771     }
2772     u.i = (UINT64)(0x3ff + k) << 52; /* 2^k */
2773     twopk = u.f;
2774     if (k < 0 || k > 56) { /* suffice to return exp(x)-1 */
2775         y = x - e + 1.0;
2776         if (k == 1024)
2777             y = y * 2.0 * 0x1p1023;
2778         else
2779             y = y * twopk;
2780         return y - 1.0;
2781     }
2782     u.i = (UINT64)(0x3ff - k) << 52; /* 2^-k */
2783     if (k < 20)
2784         y = (x - e + (1 - u.f)) * twopk;
2785     else
2786         y = (x - (e + u.f) + 1) * twopk;
2787     return y;
2788 }
2789
2790 static double __expo2(double x, double sign)
2791 {
2792     static const int k = 2043;
2793     static const double kln2 = 0x1.62066151add8bp+10;
2794     double scale;
2795
2796     *(UINT64*)&scale = (UINT64)(0x3ff + k / 2) << 52;
2797     return exp(x - kln2) * (sign * scale) * scale;
2798 }
2799
2800 /*********************************************************************
2801  *              cosh (MSVCRT.@)
2802  *
2803  * Copied from musl: src/math/cosh.c
2804  */
2805 double CDECL cosh( double x )
2806 {
2807     UINT64 ux = *(UINT64*)&x;
2808     UINT64 sign = ux & 0x8000000000000000ULL;
2809     UINT32 w;
2810     double t;
2811
2812     /* |x| */
2813     ux &= (uint64_t)-1 / 2;
2814     x = *(double*)&ux;
2815     w = ux >> 32;
2816
2817     /* |x| < log(2) */
2818     if (w < 0x3fe62e42) {
2819         if (w < 0x3ff00000 - (26 << 20)) {
2820             fp_barrier(x + 0x1p120f);
2821             return 1;
2822         }
2823         t = __expm1(x);
2824         return 1 + t * t / (2 * (1 + t));
2825     }
2826
2827     /* |x| < log(DBL_MAX) */
2828     if (w < 0x40862e42) {
2829         t = exp(x);
2830         /* note: if x>log(0x1p26) then the 1/t is not needed */
2831         return 0.5 * (t + 1 / t);
2832     }
2833
2834     /* |x| > log(DBL_MAX) or nan */
2835     /* note: the result is stored to handle overflow */
2836     if (ux > 0x7ff0000000000000ULL)
2837         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
2838     else
2839         t = __expo2(x, 1.0);
2840     return t;
2841 }
2842
2843 /* Copied from musl: src/math/exp_data.c */
2844 static const UINT64 exp_T[] = {
2845     0x0ULL, 0x3ff0000000000000ULL,
2846     0x3c9b3b4f1a88bf6eULL, 0x3feff63da9fb3335ULL,
2847     0xbc7160139cd8dc5dULL, 0x3fefec9a3e778061ULL,
2848     0xbc905e7a108766d1ULL, 0x3fefe315e86e7f85ULL,
2849     0x3c8cd2523567f613ULL, 0x3fefd9b0d3158574ULL,
2850     0xbc8bce8023f98efaULL, 0x3fefd06b29ddf6deULL,
2851     0x3c60f74e61e6c861ULL, 0x3fefc74518759bc8ULL,
2852     0x3c90a3e45b33d399ULL, 0x3fefbe3ecac6f383ULL,
2853     0x3c979aa65d837b6dULL, 0x3fefb5586cf9890fULL,
2854     0x3c8eb51a92fdeffcULL, 0x3fefac922b7247f7ULL,
2855     0x3c3ebe3d702f9cd1ULL, 0x3fefa3ec32d3d1a2ULL,
2856     0xbc6a033489906e0bULL, 0x3fef9b66affed31bULL,
2857     0xbc9556522a2fbd0eULL, 0x3fef9301d0125b51ULL,
2858     0xbc5080ef8c4eea55ULL, 0x3fef8abdc06c31ccULL,
2859     0xbc91c923b9d5f416ULL, 0x3fef829aaea92de0ULL,
2860     0x3c80d3e3e95c55afULL, 0x3fef7a98c8a58e51ULL,
2861     0xbc801b15eaa59348ULL, 0x3fef72b83c7d517bULL,
2862     0xbc8f1ff055de323dULL, 0x3fef6af9388c8deaULL,
2863     0x3c8b898c3f1353bfULL, 0x3fef635beb6fcb75ULL,
2864     0xbc96d99c7611eb26ULL, 0x3fef5be084045cd4ULL,
2865     0x3c9aecf73e3a2f60ULL, 0x3fef54873168b9aaULL,
2866     0xbc8fe782cb86389dULL, 0x3fef4d5022fcd91dULL,
2867     0x3c8a6f4144a6c38dULL, 0x3fef463b88628cd6ULL,
2868     0x3c807a05b0e4047dULL, 0x3fef3f49917ddc96ULL,
2869     0x3c968efde3a8a894ULL, 0x3fef387a6e756238ULL,
2870     0x3c875e18f274487dULL, 0x3fef31ce4fb2a63fULL,
2871     0x3c80472b981fe7f2ULL, 0x3fef2b4565e27cddULL,
2872     0xbc96b87b3f71085eULL, 0x3fef24dfe1f56381ULL,
2873     0x3c82f7e16d09ab31ULL, 0x3fef1e9df51fdee1ULL,
2874     0xbc3d219b1a6fbffaULL, 0x3fef187fd0dad990ULL,
2875     0x3c8b3782720c0ab4ULL, 0x3fef1285a6e4030bULL,
2876     0x3c6e149289cecb8fULL, 0x3fef0cafa93e2f56ULL,
2877     0x3c834d754db0abb6ULL, 0x3fef06fe0a31b715ULL,
2878     0x3c864201e2ac744cULL, 0x3fef0170fc4cd831ULL,
2879     0x3c8fdd395dd3f84aULL, 0x3feefc08b26416ffULL,
2880     0xbc86a3803b8e5b04ULL, 0x3feef6c55f929ff1ULL,
2881     0xbc924aedcc4b5068ULL, 0x3feef1a7373aa9cbULL,
2882     0xbc9907f81b512d8eULL, 0x3feeecae6d05d866ULL,
2883     0xbc71d1e83e9436d2ULL, 0x3feee7db34e59ff7ULL,
2884     0xbc991919b3ce1b15ULL, 0x3feee32dc313a8e5ULL,
2885     0x3c859f48a72a4c6dULL, 0x3feedea64c123422ULL,
2886     0xbc9312607a28698aULL, 0x3feeda4504ac801cULL,
2887     0xbc58a78f4817895bULL, 0x3feed60a21f72e2aULL,
2888     0xbc7c2c9b67499a1bULL, 0x3feed1f5d950a897ULL,
2889     0x3c4363ed60c2ac11ULL, 0x3feece086061892dULL,
2890     0x3c9666093b0664efULL, 0x3feeca41ed1d0057ULL,
2891     0x3c6ecce1daa10379ULL, 0x3feec6a2b5c13cd0ULL,
2892     0x3c93ff8e3f0f1230ULL, 0x3feec32af0d7d3deULL,
2893     0x3c7690cebb7aafb0ULL, 0x3feebfdad5362a27ULL,
2894     0x3c931dbdeb54e077ULL, 0x3feebcb299fddd0dULL,
2895     0xbc8f94340071a38eULL, 0x3feeb9b2769d2ca7ULL,
2896     0xbc87deccdc93a349ULL, 0x3feeb6daa2cf6642ULL,
2897     0xbc78dec6bd0f385fULL, 0x3feeb42b569d4f82ULL,
2898     0xbc861246ec7b5cf6ULL, 0x3feeb1a4ca5d920fULL,
2899     0x3c93350518fdd78eULL, 0x3feeaf4736b527daULL,
2900     0x3c7b98b72f8a9b05ULL, 0x3feead12d497c7fdULL,
2901     0x3c9063e1e21c5409ULL, 0x3feeab07dd485429ULL,
2902     0x3c34c7855019c6eaULL, 0x3feea9268a5946b7ULL,
2903     0x3c9432e62b64c035ULL, 0x3feea76f15ad2148ULL,
2904     0xbc8ce44a6199769fULL, 0x3feea5e1b976dc09ULL,
2905     0xbc8c33c53bef4da8ULL, 0x3feea47eb03a5585ULL,
2906     0xbc845378892be9aeULL, 0x3feea34634ccc320ULL,
2907     0xbc93cedd78565858ULL, 0x3feea23882552225ULL,
2908     0x3c5710aa807e1964ULL, 0x3feea155d44ca973ULL,
2909     0xbc93b3efbf5e2228ULL, 0x3feea09e667f3bcdULL,
2910     0xbc6a12ad8734b982ULL, 0x3feea012750bdabfULL,
2911     0xbc6367efb86da9eeULL, 0x3fee9fb23c651a2fULL,
2912     0xbc80dc3d54e08851ULL, 0x3fee9f7df9519484ULL,
2913     0xbc781f647e5a3ecfULL, 0x3fee9f75e8ec5f74ULL,
2914     0xbc86ee4ac08b7db0ULL, 0x3fee9f9a48a58174ULL,
2915     0xbc8619321e55e68aULL, 0x3fee9feb564267c9ULL,
2916     0x3c909ccb5e09d4d3ULL, 0x3feea0694fde5d3fULL,
2917     0xbc7b32dcb94da51dULL, 0x3feea11473eb0187ULL,
2918     0x3c94ecfd5467c06bULL, 0x3feea1ed0130c132ULL,
2919     0x3c65ebe1abd66c55ULL, 0x3feea2f336cf4e62ULL,
2920     0xbc88a1c52fb3cf42ULL, 0x3feea427543e1a12ULL,
2921     0xbc9369b6f13b3734ULL, 0x3feea589994cce13ULL,
2922     0xbc805e843a19ff1eULL, 0x3feea71a4623c7adULL,
2923     0xbc94d450d872576eULL, 0x3feea8d99b4492edULL,
2924     0x3c90ad675b0e8a00ULL, 0x3feeaac7d98a6699ULL,
2925     0x3c8db72fc1f0eab4ULL, 0x3feeace5422aa0dbULL,
2926     0xbc65b6609cc5e7ffULL, 0x3feeaf3216b5448cULL,
2927     0x3c7bf68359f35f44ULL, 0x3feeb1ae99157736ULL,
2928     0xbc93091fa71e3d83ULL, 0x3feeb45b0b91ffc6ULL,
2929     0xbc5da9b88b6c1e29ULL, 0x3feeb737b0cdc5e5ULL,
2930     0xbc6c23f97c90b959ULL, 0x3feeba44cbc8520fULL,
2931     0xbc92434322f4f9aaULL, 0x3feebd829fde4e50ULL,
2932     0xbc85ca6cd7668e4bULL, 0x3feec0f170ca07baULL,
2933     0x3c71affc2b91ce27ULL, 0x3feec49182a3f090ULL,
2934     0x3c6dd235e10a73bbULL, 0x3feec86319e32323ULL,
2935     0xbc87c50422622263ULL, 0x3feecc667b5de565ULL,
2936     0x3c8b1c86e3e231d5ULL, 0x3feed09bec4a2d33ULL,
2937     0xbc91bbd1d3bcbb15ULL, 0x3feed503b23e255dULL,
2938     0x3c90cc319cee31d2ULL, 0x3feed99e1330b358ULL,
2939     0x3c8469846e735ab3ULL, 0x3feede6b5579fdbfULL,
2940     0xbc82dfcd978e9db4ULL, 0x3feee36bbfd3f37aULL,
2941     0x3c8c1a7792cb3387ULL, 0x3feee89f995ad3adULL,
2942     0xbc907b8f4ad1d9faULL, 0x3feeee07298db666ULL,
2943     0xbc55c3d956dcaebaULL, 0x3feef3a2b84f15fbULL,
2944     0xbc90a40e3da6f640ULL, 0x3feef9728de5593aULL,
2945     0xbc68d6f438ad9334ULL, 0x3feeff76f2fb5e47ULL,
2946     0xbc91eee26b588a35ULL, 0x3fef05b030a1064aULL,
2947     0x3c74ffd70a5fddcdULL, 0x3fef0c1e904bc1d2ULL,
2948     0xbc91bdfbfa9298acULL, 0x3fef12c25bd71e09ULL,
2949     0x3c736eae30af0cb3ULL, 0x3fef199bdd85529cULL,
2950     0x3c8ee3325c9ffd94ULL, 0x3fef20ab5fffd07aULL,
2951     0x3c84e08fd10959acULL, 0x3fef27f12e57d14bULL,
2952     0x3c63cdaf384e1a67ULL, 0x3fef2f6d9406e7b5ULL,
2953     0x3c676b2c6c921968ULL, 0x3fef3720dcef9069ULL,
2954     0xbc808a1883ccb5d2ULL, 0x3fef3f0b555dc3faULL,
2955     0xbc8fad5d3ffffa6fULL, 0x3fef472d4a07897cULL,
2956     0xbc900dae3875a949ULL, 0x3fef4f87080d89f2ULL,
2957     0x3c74a385a63d07a7ULL, 0x3fef5818dcfba487ULL,
2958     0xbc82919e2040220fULL, 0x3fef60e316c98398ULL,
2959     0x3c8e5a50d5c192acULL, 0x3fef69e603db3285ULL,
2960     0x3c843a59ac016b4bULL, 0x3fef7321f301b460ULL,
2961     0xbc82d52107b43e1fULL, 0x3fef7c97337b9b5fULL,
2962     0xbc892ab93b470dc9ULL, 0x3fef864614f5a129ULL,
2963     0x3c74b604603a88d3ULL, 0x3fef902ee78b3ff6ULL,
2964     0x3c83c5ec519d7271ULL, 0x3fef9a51fbc74c83ULL,
2965     0xbc8ff7128fd391f0ULL, 0x3fefa4afa2a490daULL,
2966     0xbc8dae98e223747dULL, 0x3fefaf482d8e67f1ULL,
2967     0x3c8ec3bc41aa2008ULL, 0x3fefba1bee615a27ULL,
2968     0x3c842b94c3a9eb32ULL, 0x3fefc52b376bba97ULL,
2969     0x3c8a64a931d185eeULL, 0x3fefd0765b6e4540ULL,
2970     0xbc8e37bae43be3edULL, 0x3fefdbfdad9cbe14ULL,
2971     0x3c77893b4d91cd9dULL, 0x3fefe7c1819e90d8ULL,
2972     0x3c5305c14160cc89ULL, 0x3feff3c22b8f71f1ULL
2973 };
2974
2975 /*********************************************************************
2976  *              exp (MSVCRT.@)
2977  *
2978  * Copied from musl: src/math/exp.c
2979  */
2980 double CDECL exp( double x )
2981 {
2982     static const double C[] = {
2983         0x1.ffffffffffdbdp-2,
2984         0x1.555555555543cp-3,
2985         0x1.55555cf172b91p-5,
2986         0x1.1111167a4d017p-7
2987     };
2988     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
2989         negln2hiN = -0x1.62e42fefa0000p-8,
2990         negln2loN = -0x1.cf79abc9e3b3ap-47;
2991
2992     UINT32 abstop;
2993     UINT64 ki, idx, top, sbits;
2994     double kd, z, r, r2, scale, tail, tmp;
2995
2996     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
2997     if (abstop -  0x3c9 >= 0x408 - 0x3c9) {
2998         if (abstop - 0x3c9 >= 0x80000000)
2999             /* Avoid spurious underflow for tiny x. */
3000             /* Note: 0 is common input. */
3001             return 1.0 + x;
3002         if (abstop >= 0x409) {
3003             if (*(UINT64*)&x == 0xfff0000000000000ULL)
3004                 return 0.0;
3005 #if _MSVCR_VER == 0
3006             if (*(UINT64*)&x > 0x7ff0000000000000ULL)
3007                 return math_error(_DOMAIN, "exp", x, 0, 1.0 + x);
3008 #endif
3009             if (abstop >= 0x7ff)
3010                 return 1.0 + x;
3011             if (*(UINT64*)&x >> 63)
3012                 return math_error(_UNDERFLOW, "exp", x, 0, fp_barrier(DBL_MIN) * DBL_MIN);
3013             else
3014                 return math_error(_OVERFLOW, "exp", x, 0, fp_barrier(DBL_MAX) * DBL_MAX);
3015         }
3016         /* Large x is special cased below. */
3017         abstop = 0;
3018     }
3019
3020     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3021     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3022     z = invln2N * x;
3023     kd = __round(z);
3024     ki = (INT64)kd;
3025
3026     r = x + kd * negln2hiN + kd * negln2loN;
3027     /* 2^(k/N) ~= scale * (1 + tail). */
3028     idx = 2 * (ki % (1 << 7));
3029     top = ki << (52 - 7);
3030     tail = *(double*)&exp_T[idx];
3031     /* This is only a valid scale when -1023*N < k < 1024*N. */
3032     sbits = exp_T[idx + 1] + top;
3033     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3034     /* Evaluation is optimized assuming superscalar pipelined execution. */
3035     r2 = r * r;
3036     /* Without fma the worst case error is 0.25/N ulp larger. */
3037     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3038     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3039     if (abstop == 0) {
3040         /* Handle cases that may overflow or underflow when computing the result that
3041            is scale*(1+TMP) without intermediate rounding. The bit representation of
3042            scale is in SBITS, however it has a computed exponent that may have
3043            overflown into the sign bit so that needs to be adjusted before using it as
3044            a double. (int32_t)KI is the k used in the argument reduction and exponent
3045            adjustment of scale, positive k here means the result may overflow and
3046            negative k means the result may underflow. */
3047         double scale, y;
3048
3049         if ((ki & 0x80000000) == 0) {
3050             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3051             sbits -= 1009ull << 52;
3052             scale = *(double*)&sbits;
3053             y = 0x1p1009 * (scale + scale * tmp);
3054             if (isinf(y))
3055                 return math_error(_OVERFLOW, "exp", x, 0, y);
3056             return y;
3057         }
3058         /* k < 0, need special care in the subnormal range. */
3059         sbits += 1022ull << 52;
3060         scale = *(double*)&sbits;
3061         y = scale + scale * tmp;
3062         if (y < 1.0) {
3063             /* Round y to the right precision before scaling it into the subnormal
3064                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3065                E is the worst-case ulp error outside the subnormal range. So this
3066                is only useful if the goal is better than 1 ulp worst-case error. */
3067             double hi, lo;
3068             lo = scale - y + scale * tmp;
3069             hi = 1.0 + y;
3070             lo = 1.0 - hi + y + lo;
3071             y = hi + lo - 1.0;
3072             /* Avoid -0.0 with downward rounding. */
3073             if (y == 0.0)
3074                 y = 0.0;
3075             /* The underflow exception needs to be signaled explicitly. */
3076             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3077             y = 0x1p-1022 * y;
3078             return math_error(_UNDERFLOW, "exp", x, 0, y);
3079         }
3080         y = 0x1p-1022 * y;
3081         return y;
3082     }
3083     scale = *(double*)&sbits;
3084     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3085        is no spurious underflow here even without fma. */
3086     return scale + scale * tmp;
3087 }
3088
3089 /*********************************************************************
3090  *              fmod (MSVCRT.@)
3091  *
3092  * Copied from musl: src/math/fmod.c
3093  */
3094 double CDECL fmod( double x, double y )
3095 {
3096     UINT64 xi = *(UINT64*)&x;
3097     UINT64 yi = *(UINT64*)&y;
3098     int ex = xi >> 52 & 0x7ff;
3099     int ey = yi >> 52 & 0x7ff;
3100     int sx = xi >> 63;
3101     UINT64 i;
3102
3103     if (isinf(x)) return math_error(_DOMAIN, "fmod", x, y, (x * y) / (x * y));
3104     if (yi << 1 == 0 || isnan(y) || ex == 0x7ff)
3105         return (x * y) / (x * y);
3106     if (xi << 1 <= yi << 1) {
3107         if (xi << 1 == yi << 1)
3108             return 0 * x;
3109         return x;
3110     }
3111
3112     /* normalize x and y */
3113     if (!ex) {
3114         for (i = xi << 12; i >> 63 == 0; ex--, i <<= 1);
3115         xi <<= -ex + 1;
3116     } else {
3117         xi &= -1ULL >> 12;
3118         xi |= 1ULL << 52;
3119     }
3120     if (!ey) {
3121         for (i = yi << 12; i >> 63 == 0; ey--, i <<= 1);
3122         yi <<= -ey + 1;
3123     } else {
3124         yi &= -1ULL >> 12;
3125         yi |= 1ULL << 52;
3126     }
3127
3128     /* x mod y */
3129     for (; ex > ey; ex--) {
3130         i = xi - yi;
3131         if (i >> 63 == 0) {
3132             if (i == 0)
3133                 return 0 * x;
3134             xi = i;
3135         }
3136         xi <<= 1;
3137     }
3138     i = xi - yi;
3139     if (i >> 63 == 0) {
3140         if (i == 0)
3141             return 0 * x;
3142         xi = i;
3143     }
3144     for (; xi >> 52 == 0; xi <<= 1, ex--);
3145
3146     /* scale result */
3147     if (ex > 0) {
3148         xi -= 1ULL << 52;
3149         xi |= (UINT64)ex << 52;
3150     } else {
3151         xi >>= -ex + 1;
3152     }
3153     xi |= (UINT64)sx << 63;
3154     return *(double*)&xi;
3155 }
3156
3157 /*********************************************************************
3158  *              log (MSVCRT.@)
3159  *
3160  * Copied from musl: src/math/log.c src/math/log_data.c
3161  */
3162 double CDECL log( double x )
3163 {
3164     static const double Ln2hi = 0x1.62e42fefa3800p-1,
3165         Ln2lo = 0x1.ef35793c76730p-45;
3166     static const double A[] = {
3167         -0x1.0000000000001p-1,
3168         0x1.555555551305bp-2,
3169         -0x1.fffffffeb459p-3,
3170         0x1.999b324f10111p-3,
3171         -0x1.55575e506c89fp-3
3172     };
3173     static const double B[] = {
3174         -0x1p-1,
3175         0x1.5555555555577p-2,
3176         -0x1.ffffffffffdcbp-3,
3177         0x1.999999995dd0cp-3,
3178         -0x1.55555556745a7p-3,
3179         0x1.24924a344de3p-3,
3180         -0x1.fffffa4423d65p-4,
3181         0x1.c7184282ad6cap-4,
3182         -0x1.999eb43b068ffp-4,
3183         0x1.78182f7afd085p-4,
3184         -0x1.5521375d145cdp-4
3185     };
3186     static const struct {
3187         double invc, logc;
3188     } T[] = {
3189         {0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
3190         {0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
3191         {0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
3192         {0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
3193         {0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
3194         {0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
3195         {0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
3196         {0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
3197         {0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
3198         {0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
3199         {0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
3200         {0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
3201         {0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
3202         {0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
3203         {0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
3204         {0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
3205         {0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
3206         {0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
3207         {0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
3208         {0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
3209         {0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
3210         {0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
3211         {0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
3212         {0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
3213         {0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
3214         {0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
3215         {0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
3216         {0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
3217         {0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
3218         {0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
3219         {0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
3220         {0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
3221         {0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
3222         {0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
3223         {0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
3224         {0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
3225         {0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
3226         {0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
3227         {0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
3228         {0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
3229         {0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
3230         {0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
3231         {0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
3232         {0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
3233         {0x1.293726014b530p+0, -0x1.31b996b490000p-3},
3234         {0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
3235         {0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
3236         {0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
3237         {0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
3238         {0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
3239         {0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
3240         {0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
3241         {0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
3242         {0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
3243         {0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
3244         {0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
3245         {0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
3246         {0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
3247         {0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
3248         {0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
3249         {0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
3250         {0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
3251         {0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
3252         {0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
3253         {0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
3254         {0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
3255         {0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
3256         {0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
3257         {0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
3258         {0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
3259         {0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
3260         {0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
3261         {0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
3262         {0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
3263         {0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
3264         {0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
3265         {0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
3266         {0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
3267         {0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
3268         {0x1.008040614b195p+0, -0x1.0040979240000p-9},
3269         {0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
3270         {0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
3271         {0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
3272         {0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
3273         {0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
3274         {0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
3275         {0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
3276         {0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
3277         {0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
3278         {0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
3279         {0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
3280         {0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
3281         {0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
3282         {0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
3283         {0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
3284         {0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
3285         {0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
3286         {0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
3287         {0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
3288         {0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
3289         {0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
3290         {0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
3291         {0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
3292         {0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
3293         {0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
3294         {0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
3295         {0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
3296         {0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
3297         {0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
3298         {0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
3299         {0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
3300         {0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
3301         {0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
3302         {0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
3303         {0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
3304         {0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
3305         {0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
3306         {0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
3307         {0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
3308         {0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
3309         {0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
3310         {0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
3311         {0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
3312         {0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
3313         {0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
3314         {0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
3315         {0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
3316         {0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2}
3317     };
3318     static const struct {
3319         double chi, clo;
3320     } T2[] = {
3321         {0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
3322         {0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
3323         {0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
3324         {0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
3325         {0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
3326         {0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
3327         {0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
3328         {0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
3329         {0x1.710000e86978p-1, 0x1.bff6671097952p-56},
3330         {0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
3331         {0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
3332         {0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
3333         {0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
3334         {0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
3335         {0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
3336         {0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
3337         {0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
3338         {0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
3339         {0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
3340         {0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
3341         {0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
3342         {0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
3343         {0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
3344         {0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
3345         {0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
3346         {0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
3347         {0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
3348         {0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
3349         {0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
3350         {0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
3351         {0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
3352         {0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
3353         {0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
3354         {0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
3355         {0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
3356         {0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
3357         {0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
3358         {0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
3359         {0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
3360         {0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
3361         {0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
3362         {0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
3363         {0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
3364         {0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
3365         {0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
3366         {0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
3367         {0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
3368         {0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
3369         {0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
3370         {0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
3371         {0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
3372         {0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
3373         {0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
3374         {0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
3375         {0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
3376         {0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
3377         {0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
3378         {0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
3379         {0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
3380         {0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
3381         {0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
3382         {0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
3383         {0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
3384         {0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
3385         {0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
3386         {0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
3387         {0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
3388         {0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
3389         {0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
3390         {0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
3391         {0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
3392         {0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
3393         {0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
3394         {0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
3395         {0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
3396         {0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
3397         {0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
3398         {0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
3399         {0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
3400         {0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
3401         {0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
3402         {0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
3403         {0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
3404         {0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
3405         {0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
3406         {0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
3407         {0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
3408         {0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
3409         {0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
3410         {0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
3411         {0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
3412         {0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
3413         {0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
3414         {0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
3415         {0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
3416         {0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
3417         {0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
3418         {0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
3419         {0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
3420         {0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
3421         {0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
3422         {0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
3423         {0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
3424         {0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
3425         {0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
3426         {0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
3427         {0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
3428         {0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
3429         {0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
3430         {0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
3431         {0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
3432         {0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
3433         {0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
3434         {0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
3435         {0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
3436         {0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
3437         {0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
3438         {0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
3439         {0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
3440         {0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
3441         {0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
3442         {0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
3443         {0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
3444         {0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
3445         {0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
3446         {0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
3447         {0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
3448         {0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54}
3449     };
3450
3451     double w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
3452     UINT64 ix, iz, tmp;
3453     UINT32 top;
3454     int k, i;
3455
3456     ix = *(UINT64*)&x;
3457     top = ix >> 48;
3458     if (ix - 0x3fee000000000000ULL < 0x3090000000000ULL) {
3459         double rhi, rlo;
3460
3461         /* Handle close to 1.0 inputs separately. */
3462         /* Fix sign of zero with downward rounding when x==1. */
3463         if (ix == 0x3ff0000000000000ULL)
3464             return 0;
3465         r = x - 1.0;
3466         r2 = r * r;
3467         r3 = r * r2;
3468         y = r3 * (B[1] + r * B[2] + r2 * B[3] + r3 * (B[4] + r * B[5] + r2 * B[6] +
3469                     r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
3470         /* Worst-case error is around 0.507 ULP. */
3471         w = r * 0x1p27;
3472         rhi = r + w - w;
3473         rlo = r - rhi;
3474         w = rhi * rhi * B[0]; /* B[0] == -0.5. */
3475         hi = r + w;
3476         lo = r - hi + w;
3477         lo += B[0] * rlo * (rhi + r);
3478         y += lo;
3479         y += hi;
3480         return y;
3481     }
3482     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
3483         /* x < 0x1p-1022 or inf or nan. */
3484         if (ix * 2 == 0)
3485             return math_error(_SING, "log", x, 0, (top & 0x8000 ? 1.0 : -1.0) / x);
3486         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf. */
3487             return x;
3488         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
3489             return x;
3490         if (top & 0x8000)
3491             return math_error(_DOMAIN, "log", x, 0, (x - x) / (x - x));
3492         /* x is subnormal, normalize it. */
3493         x *= 0x1p52;
3494         ix = *(UINT64*)&x;
3495         ix -= 52ULL << 52;
3496     }
3497
3498     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3499        The range is split into N subintervals.
3500        The ith subinterval contains z and c is near its center. */
3501     tmp = ix - 0x3fe6000000000000ULL;
3502     i = (tmp >> (52 - 7)) % (1 << 7);
3503     k = (INT64)tmp >> 52; /* arithmetic shift */
3504     iz = ix - (tmp & 0xfffULL << 52);
3505     invc = T[i].invc;
3506     logc = T[i].logc;
3507     z = *(double*)&iz;
3508
3509     /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
3510     /* r ~= z/c - 1, |r| < 1/(2*N). */
3511     r = (z - T2[i].chi - T2[i].clo) * invc;
3512     kd = (double)k;
3513
3514     /* hi + lo = r + log(c) + k*Ln2. */
3515     w = kd * Ln2hi + logc;
3516     hi = w + r;
3517     lo = w - hi + r + kd * Ln2lo;
3518
3519     /* log(x) = lo + (log1p(r) - r) + hi. */
3520     r2 = r * r; /* rounding error: 0x1p-54/N^2. */
3521     /* Worst case error if |y| > 0x1p-5:
3522        0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
3523        Worst case error if |y| > 0x1p-4:
3524        0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
3525     y = lo + r2 * A[0] +
3526         r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
3527     return y;
3528 }
3529
3530 /*********************************************************************
3531  *              log10 (MSVCRT.@)
3532  */
3533 double CDECL log10( double x )
3534 {
3535     static const double ivln10hi = 4.34294481878168880939e-01,
3536         ivln10lo = 2.50829467116452752298e-11,
3537         log10_2hi = 3.01029995663611771306e-01,
3538         log10_2lo = 3.69423907715893078616e-13,
3539         Lg1 = 6.666666666666735130e-01,
3540         Lg2 = 3.999999999940941908e-01,
3541         Lg3 = 2.857142874366239149e-01,
3542         Lg4 = 2.222219843214978396e-01,
3543         Lg5 = 1.818357216161805012e-01,
3544         Lg6 = 1.531383769920937332e-01,
3545         Lg7 = 1.479819860511658591e-01;
3546
3547     union {double f; UINT64 i;} u = {x};
3548     double hfsq, f, s, z, R, w, t1, t2, dk, y, hi, lo, val_hi, val_lo;
3549     UINT32 hx;
3550     int k;
3551
3552     hx = u.i >> 32;
3553     k = 0;
3554     if (hx < 0x00100000 || hx >> 31) {
3555         if (u.i << 1 == 0)
3556             return math_error(_SING, "log10", x, 0, -1 / (x * x));
3557         if ((u.i & ~(1ULL << 63)) > 0x7ff0000000000000ULL)
3558             return x;
3559         if (hx >> 31)
3560             return math_error(_DOMAIN, "log10", x, 0, (x - x) / (x - x));
3561         /* subnormal number, scale x up */
3562         k -= 54;
3563         x *= 0x1p54;
3564         u.f = x;
3565         hx = u.i >> 32;
3566     } else if (hx >= 0x7ff00000) {
3567         return x;
3568     } else if (hx == 0x3ff00000 && u.i<<32 == 0)
3569         return 0;
3570
3571     /* reduce x into [sqrt(2)/2, sqrt(2)] */
3572     hx += 0x3ff00000 - 0x3fe6a09e;
3573     k += (int)(hx >> 20) - 0x3ff;
3574     hx = (hx & 0x000fffff) + 0x3fe6a09e;
3575     u.i = (UINT64)hx << 32 | (u.i & 0xffffffff);
3576     x = u.f;
3577
3578     f = x - 1.0;
3579     hfsq = 0.5 * f * f;
3580     s = f / (2.0 + f);
3581     z = s * s;
3582     w = z * z;
3583     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
3584     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
3585     R = t2 + t1;
3586
3587     /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */
3588     hi = f - hfsq;
3589     u.f = hi;
3590     u.i &= (UINT64)-1 << 32;
3591     hi = u.f;
3592     lo = f - hi - hfsq + s * (hfsq + R);
3593
3594     /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */
3595     val_hi = hi * ivln10hi;
3596     dk = k;
3597     y = dk * log10_2hi;
3598     val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
3599
3600     /*
3601      * Extra precision in for adding y is not strictly needed
3602      * since there is no very large cancellation near x = sqrt(2) or
3603      * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs
3604      * with some parallelism and it reduces the error for many args.
3605      */
3606     w = y + val_hi;
3607     val_lo += (y - w) + val_hi;
3608     val_hi = w;
3609
3610     return val_lo + val_hi;
3611 }
3612
3613 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
3614    additional 15 bits precision. IX is the bit representation of x, but
3615    normalized in the subnormal range using the sign bit for the exponent. */
3616 static double pow_log(UINT64 ix, double *tail)
3617 {
3618     static const struct {
3619         double invc, logc, logctail;
3620     } T[] = {
3621         {0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48},
3622         {0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46},
3623         {0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45},
3624         {0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49},
3625         {0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47},
3626         {0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46},
3627         {0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50},
3628         {0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45},
3629         {0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45},
3630         {0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45},
3631         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3632         {0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46},
3633         {0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46},
3634         {0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46},
3635         {0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46},
3636         {0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45},
3637         {0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47},
3638         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3639         {0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48},
3640         {0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47},
3641         {0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45},
3642         {0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46},
3643         {0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45},
3644         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3645         {0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45},
3646         {0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46},
3647         {0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52},
3648         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3649         {0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45},
3650         {0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45},
3651         {0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45},
3652         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3653         {0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45},
3654         {0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46},
3655         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3656         {0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46},
3657         {0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45},
3658         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3659         {0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46},
3660         {0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48},
3661         {0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45},
3662         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3663         {0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45},
3664         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3665         {0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47},
3666         {0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45},
3667         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3668         {0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45},
3669         {0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46},
3670         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3671         {0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45},
3672         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3673         {0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46},
3674         {0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45},
3675         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3676         {0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46},
3677         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3678         {0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45},
3679         {0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46},
3680         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3681         {0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45},
3682         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3683         {0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46},
3684         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3685         {0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45},
3686         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3687         {0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48},
3688         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3689         {0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45},
3690         {0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45},
3691         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3692         {0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50},
3693         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3694         {0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46},
3695         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3696         {0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0},
3697         {0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46},
3698         {0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45},
3699         {0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45},
3700         {0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47},
3701         {0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45},
3702         {0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46},
3703         {0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46},
3704         {0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47},
3705         {0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45},
3706         {0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45},
3707         {0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45},
3708         {0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49},
3709         {0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45},
3710         {0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46},
3711         {0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45},
3712         {0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45},
3713         {0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45},
3714         {0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45},
3715         {0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45},
3716         {0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47},
3717         {0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51},
3718         {0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45},
3719         {0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45},
3720         {0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46},
3721         {0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45},
3722         {0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46},
3723         {0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47},
3724         {0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47},
3725         {0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45},
3726         {0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47},
3727         {0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45},
3728         {0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48},
3729         {0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45},
3730         {0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51},
3731         {0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51},
3732         {0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46},
3733         {0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48},
3734         {0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45},
3735         {0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45},
3736         {0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45},
3737         {0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45},
3738         {0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47},
3739         {0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45},
3740         {0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45},
3741         {0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46},
3742         {0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46},
3743         {0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47},
3744         {0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45},
3745         {0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45},
3746         {0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45},
3747         {0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46},
3748         {0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47},
3749     };
3750     static const double A[] = {
3751         -0x1p-1,
3752         0x1.555555555556p-2 * -2,
3753         -0x1.0000000000006p-2 * -2,
3754         0x1.999999959554ep-3 * 4,
3755         -0x1.555555529a47ap-3 * 4,
3756         0x1.2495b9b4845e9p-3 * -8,
3757         -0x1.0002b8b263fc3p-3 * -8
3758     };
3759     static const double ln2hi = 0x1.62e42fefa3800p-1,
3760         ln2lo = 0x1.ef35793c76730p-45;
3761
3762     double z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
3763     double zhi, zlo, rhi, rlo, ar, ar2, ar3, lo3, lo4, arhi, arhi2;
3764     UINT64 iz, tmp;
3765     int k, i;
3766
3767     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
3768        The range is split into N subintervals.
3769        The ith subinterval contains z and c is near its center. */
3770     tmp = ix - 0x3fe6955500000000ULL;
3771     i = (tmp >> (52 - 7)) % (1 << 7);
3772     k = (INT64)tmp >> 52; /* arithmetic shift */
3773     iz = ix - (tmp & 0xfffULL << 52);
3774     z = *(double*)&iz;
3775     kd = k;
3776
3777     /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
3778     invc = T[i].invc;
3779     logc = T[i].logc;
3780     logctail = T[i].logctail;
3781
3782     /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
3783      |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
3784     /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
3785     iz = (iz + (1ULL << 31)) & (-1ULL << 32);
3786     zhi = *(double*)&iz;
3787     zlo = z - zhi;
3788     rhi = zhi * invc - 1.0;
3789     rlo = zlo * invc;
3790     r = rhi + rlo;
3791
3792     /* k*Ln2 + log(c) + r. */
3793     t1 = kd * ln2hi + logc;
3794     t2 = t1 + r;
3795     lo1 = kd * ln2lo + logctail;
3796     lo2 = t1 - t2 + r;
3797
3798     /* Evaluation is optimized assuming superscalar pipelined execution. */
3799     ar = A[0] * r; /* A[0] = -0.5. */
3800     ar2 = r * ar;
3801     ar3 = r * ar2;
3802     /* k*Ln2 + log(c) + r + A[0]*r*r. */
3803     arhi = A[0] * rhi;
3804     arhi2 = rhi * arhi;
3805     hi = t2 + arhi2;
3806     lo3 = rlo * (ar + arhi);
3807     lo4 = t2 - hi + arhi2;
3808     /* p = log1p(r) - r - A[0]*r*r. */
3809     p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
3810     lo = lo1 + lo2 + lo3 + lo4 + p;
3811     y = hi + lo;
3812     *tail = hi - y + lo;
3813     return y;
3814 }
3815
3816 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
3817    The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
3818 static double pow_exp(double argx, double argy, double x, double xtail, UINT32 sign_bias)
3819 {
3820     static const double C[] = {
3821         0x1.ffffffffffdbdp-2,
3822         0x1.555555555543cp-3,
3823         0x1.55555cf172b91p-5,
3824         0x1.1111167a4d017p-7
3825     };
3826     static const double invln2N = 0x1.71547652b82fep0 * (1 << 7),
3827         negln2hiN = -0x1.62e42fefa0000p-8,
3828         negln2loN = -0x1.cf79abc9e3b3ap-47;
3829
3830     UINT32 abstop;
3831     UINT64 ki, idx, top, sbits;
3832     double kd, z, r, r2, scale, tail, tmp;
3833
3834     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
3835     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
3836         if (abstop - 0x3c9 >= 0x80000000) {
3837             /* Avoid spurious underflow for tiny x. */
3838             /* Note: 0 is common input. */
3839             double one = 1.0 + x;
3840             return sign_bias ? -one : one;
3841         }
3842         if (abstop >= 0x409) {
3843             /* Note: inf and nan are already handled. */
3844             if (*(UINT64*)&x >> 63)
3845                 return math_error(_UNDERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MIN : DBL_MIN) * DBL_MIN);
3846             return math_error(_OVERFLOW, "pow", argx, argy, (sign_bias ? -DBL_MAX : DBL_MAX) * DBL_MAX);
3847         }
3848         /* Large x is special cased below. */
3849         abstop = 0;
3850     }
3851
3852     /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
3853     /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
3854     z = invln2N * x;
3855     kd = __round(z);
3856     ki = (INT64)kd;
3857     r = x + kd * negln2hiN + kd * negln2loN;
3858     /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
3859     r += xtail;
3860     /* 2^(k/N) ~= scale * (1 + tail). */
3861     idx = 2 * (ki % (1 << 7));
3862     top = (ki + sign_bias) << (52 - 7);
3863     tail = *(double*)&exp_T[idx];
3864     /* This is only a valid scale when -1023*N < k < 1024*N. */
3865     sbits = exp_T[idx + 1] + top;
3866     /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
3867     /* Evaluation is optimized assuming superscalar pipelined execution. */
3868     r2 = r * r;
3869     /* Without fma the worst case error is 0.25/N ulp larger. */
3870     /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
3871     tmp = tail + r + r2 * (C[0] + r * C[1]) + r2 * r2 * (C[2] + r * C[3]);
3872     if (abstop == 0) {
3873         /* Handle cases that may overflow or underflow when computing the result that
3874            is scale*(1+TMP) without intermediate rounding. The bit representation of
3875            scale is in SBITS, however it has a computed exponent that may have
3876            overflown into the sign bit so that needs to be adjusted before using it as
3877            a double. (int32_t)KI is the k used in the argument reduction and exponent
3878            adjustment of scale, positive k here means the result may overflow and
3879            negative k means the result may underflow. */
3880         double scale, y;
3881
3882         if ((ki & 0x80000000) == 0) {
3883             /* k > 0, the exponent of scale might have overflowed by <= 460. */
3884             sbits -= 1009ull << 52;
3885             scale = *(double*)&sbits;
3886             y = 0x1p1009 * (scale + scale * tmp);
3887             if (isinf(y))
3888                 return math_error(_OVERFLOW, "pow", argx, argy, y);
3889             return y;
3890         }
3891         /* k < 0, need special care in the subnormal range. */
3892         sbits += 1022ull << 52;
3893         /* Note: sbits is signed scale. */
3894         scale = *(double*)&sbits;
3895         y = scale + scale * tmp;
3896         if (fabs(y) < 1.0) {
3897             /* Round y to the right precision before scaling it into the subnormal
3898                range to avoid double rounding that can cause 0.5+E/2 ulp error where
3899                E is the worst-case ulp error outside the subnormal range. So this
3900                is only useful if the goal is better than 1 ulp worst-case error. */
3901             double hi, lo, one = 1.0;
3902             if (y < 0.0)
3903                 one = -1.0;
3904             lo = scale - y + scale * tmp;
3905             hi = one + y;
3906             lo = one - hi + y + lo;
3907             y = hi + lo - one;
3908             /* Fix the sign of 0. */
3909             if (y == 0.0) {
3910                 sbits &= 0x8000000000000000ULL;
3911                 y = *(double*)&sbits;
3912             }
3913             /* The underflow exception needs to be signaled explicitly. */
3914             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
3915             y = 0x1p-1022 * y;
3916             return math_error(_UNDERFLOW, "pow", argx, argy, y);
3917         }
3918         y = 0x1p-1022 * y;
3919         return y;
3920     }
3921     scale = *(double*)&sbits;
3922     /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
3923        is no spurious underflow here even without fma. */
3924     return scale + scale * tmp;
3925 }
3926
3927 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
3928    the bit representation of a non-zero finite floating-point value. */
3929 static inline int pow_checkint(UINT64 iy)
3930 {
3931     int e = iy >> 52 & 0x7ff;
3932     if (e < 0x3ff)
3933         return 0;
3934     if (e > 0x3ff + 52)
3935         return 2;
3936     if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
3937         return 0;
3938     if (iy & (1ULL << (0x3ff + 52 - e)))
3939         return 1;
3940     return 2;
3941 }
3942
3943 /*********************************************************************
3944  *              pow (MSVCRT.@)
3945  *
3946  * Copied from musl: src/math/pow.c
3947  */
3948 double CDECL pow( double x, double y )
3949 {
3950     UINT32 sign_bias = 0;
3951     UINT64 ix, iy;
3952     UINT32 topx, topy;
3953     double lo, hi, ehi, elo, yhi, ylo, lhi, llo;
3954
3955     ix = *(UINT64*)&x;
3956     iy = *(UINT64*)&y;
3957     topx = ix >> 52;
3958     topy = iy >> 52;
3959     if (topx - 0x001 >= 0x7ff - 0x001 ||
3960             (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
3961         /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
3962            and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
3963         /* Special cases: (x < 0x1p-126 or inf or nan) or
3964            (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
3965         if (2 * iy - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3966             if (2 * iy == 0)
3967                 return 1.0;
3968             if (ix == 0x3ff0000000000000ULL)
3969                 return 1.0;
3970             if (2 * ix > 2 * 0x7ff0000000000000ULL ||
3971                     2 * iy > 2 * 0x7ff0000000000000ULL)
3972                 return x + y;
3973             if (2 * ix == 2 * 0x3ff0000000000000ULL)
3974                 return 1.0;
3975             if ((2 * ix < 2 * 0x3ff0000000000000ULL) == !(iy >> 63))
3976                 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
3977             return y * y;
3978         }
3979         if (2 * ix - 1 >= 2 * 0x7ff0000000000000ULL - 1) {
3980             double x2 = x * x;
3981             if (ix >> 63 && pow_checkint(iy) == 1)
3982                 x2 = -x2;
3983             if (iy & 0x8000000000000000ULL && x2 == 0.0)
3984                 return math_error(_SING, "pow", x, y, 1 / x2);
3985             /* Without the barrier some versions of clang hoist the 1/x2 and
3986                thus division by zero exception can be signaled spuriously. */
3987             return iy >> 63 ? fp_barrier(1 / x2) : x2;
3988         }
3989         /* Here x and y are non-zero finite. */
3990         if (ix >> 63) {
3991             /* Finite x < 0. */
3992             int yint = pow_checkint(iy);
3993             if (yint == 0)
3994                 return math_error(_DOMAIN, "pow", x, y, 0 / (x - x));
3995             if (yint == 1)
3996                 sign_bias = 0x800 << 7;
3997             ix &= 0x7fffffffffffffff;
3998             topx &= 0x7ff;
3999         }
4000         if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
4001             /* Note: sign_bias == 0 here because y is not odd. */
4002             if (ix == 0x3ff0000000000000ULL)
4003                 return 1.0;
4004             if ((topy & 0x7ff) < 0x3be) {
4005                 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
4006                 return ix > 0x3ff0000000000000ULL ? 1.0 + y : 1.0 - y;
4007             }
4008             if ((ix > 0x3ff0000000000000ULL) == (topy < 0x800))
4009                 return math_error(_OVERFLOW, "pow", x, y, fp_barrier(DBL_MAX) * DBL_MAX);
4010             return math_error(_UNDERFLOW, "pow", x, y, fp_barrier(DBL_MIN) * DBL_MIN);
4011         }
4012         if (topx == 0) {
4013             /* Normalize subnormal x so exponent becomes negative. */
4014             x *= 0x1p52;
4015             ix = *(UINT64*)&x;
4016             ix &= 0x7fffffffffffffff;
4017             ix -= 52ULL << 52;
4018         }
4019     }
4020
4021     hi = pow_log(ix, &lo);
4022     iy &= -1ULL << 27;
4023     yhi = *(double*)&iy;
4024     ylo = y - yhi;
4025     *(UINT64*)&lhi = *(UINT64*)&hi & -1ULL << 27;
4026     llo = fp_barrier(hi - lhi + lo);
4027     ehi = yhi * lhi;
4028     elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
4029     return pow_exp(x, y, ehi, elo, sign_bias);
4030 }
4031
4032 /*********************************************************************
4033  *              sin (MSVCRT.@)
4034  *
4035  * Copied from musl: src/math/sin.c
4036  */
4037 double CDECL sin( double x )
4038 {
4039     double y[2];
4040     UINT32 ix;
4041     unsigned n;
4042
4043     ix = *(ULONGLONG*)&x >> 32;
4044     ix &= 0x7fffffff;
4045
4046     /* |x| ~< pi/4 */
4047     if (ix <= 0x3fe921fb) {
4048         if (ix < 0x3e500000) { /* |x| < 2**-26 */
4049             /* raise inexact if x != 0 and underflow if subnormal*/
4050             fp_barrier(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
4051             return x;
4052         }
4053         return __sin(x, 0.0, 0);
4054     }
4055
4056     /* sin(Inf or NaN) is NaN */
4057     if (isinf(x))
4058         return math_error(_DOMAIN, "sin", x, 0, x - x);
4059     if (ix >= 0x7ff00000)
4060         return x - x;
4061
4062     /* argument reduction needed */
4063     n = __rem_pio2(x, y);
4064     switch (n&3) {
4065     case 0: return  __sin(y[0], y[1], 1);
4066     case 1: return  __cos(y[0], y[1]);
4067     case 2: return -__sin(y[0], y[1], 1);
4068     default: return -__cos(y[0], y[1]);
4069     }
4070 }
4071
4072 /*********************************************************************
4073  *              sinh (MSVCRT.@)
4074  */
4075 double CDECL sinh( double x )
4076 {
4077     UINT64 ux = *(UINT64*)&x;
4078     UINT64 sign = ux & 0x8000000000000000ULL;
4079     UINT32 w;
4080     double t, h, absx;
4081
4082     h = 0.5;
4083     if (ux >> 63)
4084         h = -h;
4085     /* |x| */
4086     ux &= (UINT64)-1 / 2;
4087     absx = *(double*)&ux;
4088     w = ux >> 32;
4089
4090     /* |x| < log(DBL_MAX) */
4091     if (w < 0x40862e42) {
4092         t = __expm1(absx);
4093         if (w < 0x3ff00000) {
4094             if (w < 0x3ff00000 - (26 << 20))
4095                 return x;
4096             return h * (2 * t - t * t / (t + 1));
4097         }
4098         return h * (t + t / (t + 1));
4099     }
4100
4101     /* |x| > log(DBL_MAX) or nan */
4102     /* note: the result is stored to handle overflow */
4103     if (ux > 0x7ff0000000000000ULL)
4104         *(UINT64*)&t = ux | sign | 0x0008000000000000ULL;
4105     else
4106         t = __expo2(absx, 2 * h);
4107     return t;
4108 }
4109
4110 static BOOL sqrt_validate( double *x, BOOL update_sw )
4111 {
4112     short c = _dclass(*x);
4113
4114     if (c == FP_ZERO) return FALSE;
4115     if (c == FP_NAN)
4116     {
4117 #ifdef __i386__
4118         if (update_sw)
4119             *x = math_error(_DOMAIN, "sqrt", *x, 0, *x);
4120 #else
4121         /* set signaling bit */
4122         *(ULONGLONG*)x |= 0x8000000000000ULL;
4123 #endif
4124         return FALSE;
4125     }
4126     if (signbit(*x))
4127     {
4128         *x = math_error(_DOMAIN, "sqrt", *x, 0, ret_nan(update_sw));
4129         return FALSE;
4130     }
4131     if (c == FP_INFINITE) return FALSE;
4132     return TRUE;
4133 }
4134
4135 #if defined(__x86_64__) || defined(__i386__)
4136 double CDECL sse2_sqrt(double);
4137 __ASM_GLOBAL_FUNC( sse2_sqrt,
4138         "sqrtsd %xmm0, %xmm0\n\t"
4139         "ret" )
4140 #endif
4141
4142 #ifdef __i386__
4143 double CDECL x87_sqrt(double);
4144 __ASM_GLOBAL_FUNC( x87_sqrt,
4145         "fldl 4(%esp)\n\t"
4146         SET_X87_CW(0xc00)
4147         "fsqrt\n\t"
4148         RESET_X87_CW
4149         "ret" )
4150 #endif
4151
4152 /*********************************************************************
4153  *              sqrt (MSVCRT.@)
4154  *
4155  * Copied from musl: src/math/sqrt.c
4156  */
4157 double CDECL sqrt( double x )
4158 {
4159 #ifdef __x86_64__
4160     if (!sqrt_validate(&x, TRUE))
4161         return x;
4162
4163     return sse2_sqrt(x);
4164 #elif defined( __i386__ )
4165     if (!sqrt_validate(&x, TRUE))
4166         return x;
4167
4168     return x87_sqrt(x);
4169 #else
4170     static const double tiny = 1.0e-300;
4171
4172     double z;
4173     int sign = 0x80000000;
4174     int ix0,s0,q,m,t,i;
4175     unsigned int r,t1,s1,ix1,q1;
4176     ULONGLONG ix;
4177
4178     if (!sqrt_validate(&x, TRUE))
4179         return x;
4180
4181     ix = *(ULONGLONG*)&x;
4182     ix0 = ix >> 32;
4183     ix1 = ix;
4184
4185     /* normalize x */
4186     m = ix0 >> 20;
4187     if (m == 0) {  /* subnormal x */
4188         while (ix0 == 0) {
4189             m -= 21;
4190             ix0 |= (ix1 >> 11);
4191             ix1 <<= 21;
4192         }
4193         for (i=0; (ix0 & 0x00100000) == 0; i++)
4194             ix0 <<= 1;
4195         m -= i - 1;
4196         ix0 |= ix1 >> (32 - i);
4197         ix1 <<= i;
4198     }
4199     m -= 1023;    /* unbias exponent */
4200     ix0 = (ix0 & 0x000fffff) | 0x00100000;
4201     if (m & 1) {  /* odd m, double x to make it even */
4202         ix0 += ix0 + ((ix1 & sign) >> 31);
4203         ix1 += ix1;
4204     }
4205     m >>= 1;      /* m = [m/2] */
4206
4207     /* generate sqrt(x) bit by bit */
4208     ix0 += ix0 + ((ix1 & sign) >> 31);
4209     ix1 += ix1;
4210     q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
4211     r = 0x00200000;        /* r = moving bit from right to left */
4212
4213     while (r != 0) {
4214         t = s0 + r;
4215         if (t <= ix0) {
4216             s0   = t + r;
4217             ix0 -= t;
4218             q   += r;
4219         }
4220         ix0 += ix0 + ((ix1 & sign) >> 31);
4221         ix1 += ix1;
4222         r >>= 1;
4223     }
4224
4225     r = sign;
4226     while (r != 0) {
4227         t1 = s1 + r;
4228         t  = s0;
4229         if (t < ix0 || (t == ix0 && t1 <= ix1)) {
4230             s1 = t1 + r;
4231             if ((t1&sign) == sign && (s1 & sign) == 0)
4232                 s0++;
4233             ix0 -= t;
4234             if (ix1 < t1)
4235                 ix0--;
4236             ix1 -= t1;
4237             q1 += r;
4238         }
4239         ix0 += ix0 + ((ix1 & sign) >> 31);
4240         ix1 += ix1;
4241         r >>= 1;
4242     }
4243
4244     /* use floating add to find out rounding direction */
4245     if ((ix0 | ix1) != 0) {
4246         z = 1.0 - tiny; /* raise inexact flag */
4247         if (z >= 1.0) {
4248             z = 1.0 + tiny;
4249             if (q1 == (unsigned int)0xffffffff) {
4250                 q1 = 0;
4251                 q++;
4252             } else if (z > 1.0) {
4253                 if (q1 == (unsigned int)0xfffffffe)
4254                     q++;
4255                 q1 += 2;
4256             } else
4257                 q1 += q1 & 1;
4258         }
4259     }
4260     ix0 = (q >> 1) + 0x3fe00000;
4261     ix1 = q1 >> 1;
4262     if (q & 1)
4263         ix1 |= sign;
4264     ix = ix0 + ((unsigned int)m << 20);
4265     ix <<= 32;
4266     ix |= ix1;
4267     return *(double*)&ix;
4268 #endif
4269 }
4270
4271 /* Copied from musl: src/math/__tan.c */
4272 static double __tan(double x, double y, int odd)
4273 {
4274     static const double T[] = {
4275         3.33333333333334091986e-01,
4276         1.33333333333201242699e-01,
4277         5.39682539762260521377e-02,
4278         2.18694882948595424599e-02,
4279         8.86323982359930005737e-03,
4280         3.59207910759131235356e-03,
4281         1.45620945432529025516e-03,
4282         5.88041240820264096874e-04,
4283         2.46463134818469906812e-04,
4284         7.81794442939557092300e-05,
4285         7.14072491382608190305e-05,
4286         -1.85586374855275456654e-05,
4287         2.59073051863633712884e-05,
4288     };
4289     static const double pio4 = 7.85398163397448278999e-01;
4290     static const double pio4lo = 3.06161699786838301793e-17;
4291
4292     double z, r, v, w, s, a, w0, a0;
4293     UINT32 hx;
4294     int big, sign;
4295
4296     hx = *(ULONGLONG*)&x >> 32;
4297     big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
4298     if (big) {
4299         sign = hx >> 31;
4300         if (sign) {
4301             x = -x;
4302             y = -y;
4303         }
4304         x = (pio4 - x) + (pio4lo - y);
4305         y = 0.0;
4306     }
4307     z = x * x;
4308     w = z * z;
4309     r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11]))));
4310     v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12])))));
4311     s = z * x;
4312     r = y + z * (s * (r + v) + y) + s * T[0];
4313     w = x + r;
4314     if (big) {
4315         s = 1 - 2 * odd;
4316         v = s - 2.0 * (x + (r - w * w / (w + s)));
4317         return sign ? -v : v;
4318     }
4319     if (!odd)
4320         return w;
4321     /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
4322     w0 = w;
4323     *(LONGLONG*)&w0 = *(LONGLONG*)&w0 & 0xffffffff00000000ULL;
4324     v = r - (w0 - x);       /* w0+v = r+x */
4325     a0 = a = -1.0 / w;
4326     *(LONGLONG*)&a0 = *(LONGLONG*)&a0 & 0xffffffff00000000ULL;
4327     return a0 + a * (1.0 + a0 * w0 + a0 * v);
4328 }
4329
4330 /*********************************************************************
4331  *              tan (MSVCRT.@)
4332  *
4333  * Copied from musl: src/math/tan.c
4334  */
4335 double CDECL tan( double x )
4336 {
4337     double y[2];
4338     UINT32 ix;
4339     unsigned n;
4340
4341     ix = *(ULONGLONG*)&x >> 32;
4342     ix &= 0x7fffffff;
4343
4344     if (ix <= 0x3fe921fb) { /* |x| ~< pi/4 */
4345         if (ix < 0x3e400000) { /* |x| < 2**-27 */
4346             /* raise inexact if x!=0 and underflow if subnormal */
4347             fp_barrier(ix < 0x00100000 ? x / 0x1p120f : x + 0x1p120f);
4348             return x;
4349         }
4350         return __tan(x, 0.0, 0);
4351     }
4352
4353     if (isinf(x))
4354         return math_error(_DOMAIN, "tan", x, 0, x - x);
4355     if (ix >= 0x7ff00000)
4356         return x - x;
4357
4358     n = __rem_pio2(x, y);
4359     return __tan(y[0], y[1], n & 1);
4360 }
4361
4362 /*********************************************************************
4363  *              tanh (MSVCRT.@)
4364  */
4365 double CDECL tanh( double x )
4366 {
4367     UINT64 ui = *(UINT64*)&x;
4368     UINT64 sign = ui & 0x8000000000000000ULL;
4369     UINT32 w;
4370     double t;
4371
4372     /* x = |x| */
4373     ui &= (UINT64)-1 / 2;
4374     x = *(double*)&ui;
4375     w = ui >> 32;
4376
4377     if (w > 0x3fe193ea) {
4378         /* |x| > log(3)/2 ~= 0.5493 or nan */
4379         if (w > 0x40340000) {
4380             if (ui > 0x7ff0000000000000ULL) {
4381                 *(UINT64*)&x = ui | sign | 0x0008000000000000ULL;
4382 #if _MSVCR_VER < 140
4383                 return math_error(_DOMAIN, "tanh", x, 0, x);
4384 #else
4385                 return x;
4386 #endif
4387             }
4388             /* |x| > 20 */
4389             /* note: this branch avoids raising overflow */
4390             fp_barrier(x + 0x1p120f);
4391             t = 1 - 0 / x;
4392         } else {
4393             t = __expm1(2 * x);
4394             t = 1 - 2 / (t + 2);
4395         }
4396     } else if (w > 0x3fd058ae) {
4397         /* |x| > log(5/3)/2 ~= 0.2554 */
4398         t = __expm1(2 * x);
4399         t = t / (t + 2);
4400     } else if (w >= 0x00100000) {
4401         /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
4402         t = __expm1(-2 * x);
4403         t = -t / (t + 2);
4404     } else {
4405         /* |x| is subnormal */
4406         /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
4407         fp_barrier((float)x);
4408         t = x;
4409     }
4410     return sign ? -t : t;
4411 }
4412
4413
4414 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
4415
4416 #define CREATE_FPU_FUNC1(name, call) \
4417     __ASM_GLOBAL_FUNC(name, \
4418             "pushl   %ebp\n\t" \
4419             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4420             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4421             "movl    %esp, %ebp\n\t" \
4422             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4423             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4424             "fstpl   (%esp)\n\t"    /* store function argument */ \
4425             "fwait\n\t" \
4426             "movl    $1, %ecx\n\t"  /* empty FPU stack */ \
4427             "1:\n\t" \
4428             "fxam\n\t" \
4429             "fstsw   %ax\n\t" \
4430             "and     $0x4500, %ax\n\t" \
4431             "cmp     $0x4100, %ax\n\t" \
4432             "je      2f\n\t" \
4433             "fstpl    (%esp,%ecx,8)\n\t" \
4434             "fwait\n\t" \
4435             "incl    %ecx\n\t" \
4436             "jmp     1b\n\t" \
4437             "2:\n\t" \
4438             "movl    %ecx, -4(%ebp)\n\t" \
4439             "call    " __ASM_NAME( #call ) "\n\t" \
4440             "movl    -4(%ebp), %ecx\n\t" \
4441             "fstpl   (%esp)\n\t"    /* save result */ \
4442             "3:\n\t"                /* restore FPU stack */ \
4443             "decl    %ecx\n\t" \
4444             "fldl    (%esp,%ecx,8)\n\t" \
4445             "cmpl    $0, %ecx\n\t" \
4446             "jne     3b\n\t" \
4447             "leave\n\t" \
4448             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4449             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4450             "ret")
4451
4452 #define CREATE_FPU_FUNC2(name, call) \
4453     __ASM_GLOBAL_FUNC(name, \
4454             "pushl   %ebp\n\t" \
4455             __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t") \
4456             __ASM_CFI(".cfi_rel_offset %ebp,0\n\t") \
4457             "movl    %esp, %ebp\n\t" \
4458             __ASM_CFI(".cfi_def_cfa_register %ebp\n\t") \
4459             "subl    $68, %esp\n\t" /* sizeof(double)*8 + sizeof(int) */ \
4460             "fstpl   8(%esp)\n\t"   /* store function argument */ \
4461             "fwait\n\t" \
4462             "fstpl   (%esp)\n\t" \
4463             "fwait\n\t" \
4464             "movl    $2, %ecx\n\t"  /* empty FPU stack */ \
4465             "1:\n\t" \
4466             "fxam\n\t" \
4467             "fstsw   %ax\n\t" \
4468             "and     $0x4500, %ax\n\t" \
4469             "cmp     $0x4100, %ax\n\t" \
4470             "je      2f\n\t" \
4471             "fstpl    (%esp,%ecx,8)\n\t" \
4472             "fwait\n\t" \
4473             "incl    %ecx\n\t" \
4474             "jmp     1b\n\t" \
4475             "2:\n\t" \
4476             "movl    %ecx, -4(%ebp)\n\t" \
4477             "call    " __ASM_NAME( #call ) "\n\t" \
4478             "movl    -4(%ebp), %ecx\n\t" \
4479             "fstpl   8(%esp)\n\t"   /* save result */ \
4480             "3:\n\t"                /* restore FPU stack */ \
4481             "decl    %ecx\n\t" \
4482             "fldl    (%esp,%ecx,8)\n\t" \
4483             "cmpl    $1, %ecx\n\t" \
4484             "jne     3b\n\t" \
4485             "leave\n\t" \
4486             __ASM_CFI(".cfi_def_cfa %esp,4\n\t") \
4487             __ASM_CFI(".cfi_same_value %ebp\n\t") \
4488             "ret")
4489
4490 CREATE_FPU_FUNC1(_CIacos, acos)
4491 CREATE_FPU_FUNC1(_CIasin, asin)
4492 CREATE_FPU_FUNC1(_CIatan, atan)
4493 CREATE_FPU_FUNC2(_CIatan2, atan2)
4494 CREATE_FPU_FUNC1(_CIcos, cos)
4495 CREATE_FPU_FUNC1(_CIcosh, cosh)
4496 CREATE_FPU_FUNC1(_CIexp, exp)
4497 CREATE_FPU_FUNC2(_CIfmod, fmod)
4498 CREATE_FPU_FUNC1(_CIlog, log)
4499 CREATE_FPU_FUNC1(_CIlog10, log10)
4500 CREATE_FPU_FUNC2(_CIpow, pow)
4501 CREATE_FPU_FUNC1(_CIsin, sin)
4502 CREATE_FPU_FUNC1(_CIsinh, sinh)
4503 CREATE_FPU_FUNC1(_CIsqrt, sqrt)
4504 CREATE_FPU_FUNC1(_CItan, tan)
4505 CREATE_FPU_FUNC1(_CItanh, tanh)
4506
4507 __ASM_GLOBAL_FUNC(_ftol,
4508         "pushl   %ebp\n\t"
4509         __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
4510         __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
4511         "movl    %esp, %ebp\n\t"
4512         __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
4513         "subl    $12, %esp\n\t"     /* sizeof(LONGLONG) + 2*sizeof(WORD) */
4514         "fnstcw  (%esp)\n\t"
4515         "mov     (%esp), %ax\n\t"
4516         "or      $0xc00, %ax\n\t"
4517         "mov     %ax, 2(%esp)\n\t"
4518         "fldcw   2(%esp)\n\t"
4519         "fistpq  4(%esp)\n\t"
4520         "fldcw   (%esp)\n\t"
4521         "movl    4(%esp), %eax\n\t"
4522         "movl    8(%esp), %edx\n\t"
4523         "leave\n\t"
4524         __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
4525         __ASM_CFI(".cfi_same_value %ebp\n\t")
4526         "ret")
4527
4528 #endif /* (defined(__GNUC__) || defined(__clang__)) && defined(__i386__) */
4529
4530 /*********************************************************************
4531  *              _fpclass (MSVCRT.@)
4532  */
4533 int CDECL _fpclass(double num)
4534 {
4535     union { double f; UINT64 i; } u = { num };
4536     int e = u.i >> 52 & 0x7ff;
4537     int s = u.i >> 63;
4538
4539     switch (e)
4540     {
4541     case 0:
4542         if (u.i << 1) return s ? _FPCLASS_ND : _FPCLASS_PD;
4543         return s ? _FPCLASS_NZ : _FPCLASS_PZ;
4544     case 0x7ff:
4545         if (u.i << 12) return ((u.i >> 51) & 1) ? _FPCLASS_QNAN : _FPCLASS_SNAN;
4546         return s ? _FPCLASS_NINF : _FPCLASS_PINF;
4547     default:
4548         return s ? _FPCLASS_NN : _FPCLASS_PN;
4549     }
4550 }
4551
4552 /*********************************************************************
4553  *              _rotl (MSVCRT.@)
4554  */
4555 unsigned int CDECL MSVCRT__rotl(unsigned int num, int shift)
4556 {
4557   shift &= 31;
4558   return (num << shift) | (num >> (32-shift));
4559 }
4560
4561 /*********************************************************************
4562  *              _lrotl (MSVCRT.@)
4563  */
4564 __msvcrt_ulong CDECL MSVCRT__lrotl(__msvcrt_ulong num, int shift)
4565 {
4566   shift &= 0x1f;
4567   return (num << shift) | (num >> (32-shift));
4568 }
4569
4570 /*********************************************************************
4571  *              _lrotr (MSVCRT.@)
4572  */
4573 __msvcrt_ulong CDECL MSVCRT__lrotr(__msvcrt_ulong num, int shift)
4574 {
4575   shift &= 0x1f;
4576   return (num >> shift) | (num << (32-shift));
4577 }
4578
4579 /*********************************************************************
4580  *              _rotr (MSVCRT.@)
4581  */
4582 unsigned int CDECL MSVCRT__rotr(unsigned int num, int shift)
4583 {
4584     shift &= 0x1f;
4585     return (num >> shift) | (num << (32-shift));
4586 }
4587
4588 /*********************************************************************
4589  *              _rotl64 (MSVCRT.@)
4590  */
4591 unsigned __int64 CDECL MSVCRT__rotl64(unsigned __int64 num, int shift)
4592 {
4593   shift &= 63;
4594   return (num << shift) | (num >> (64-shift));
4595 }
4596
4597 /*********************************************************************
4598  *              _rotr64 (MSVCRT.@)
4599  */
4600 unsigned __int64 CDECL MSVCRT__rotr64(unsigned __int64 num, int shift)
4601 {
4602     shift &= 63;
4603     return (num >> shift) | (num << (64-shift));
4604 }
4605
4606 /*********************************************************************
4607  *              abs (MSVCRT.@)
4608  */
4609 int CDECL abs( int n )
4610 {
4611     return n >= 0 ? n : -n;
4612 }
4613
4614 /*********************************************************************
4615  *              labs (MSVCRT.@)
4616  */
4617 __msvcrt_long CDECL labs( __msvcrt_long n )
4618 {
4619     return n >= 0 ? n : -n;
4620 }
4621
4622 #if _MSVCR_VER>=100
4623 /*********************************************************************
4624  *              llabs (MSVCR100.@)
4625  */
4626 __int64 CDECL llabs( __int64 n )
4627 {
4628     return n >= 0 ? n : -n;
4629 }
4630 #endif
4631
4632 #if _MSVCR_VER>=120
4633 /*********************************************************************
4634  *              imaxabs (MSVCR120.@)
4635  */
4636 intmax_t CDECL imaxabs( intmax_t n )
4637 {
4638     return n >= 0 ? n : -n;
4639 }
4640 #endif
4641
4642 /*********************************************************************
4643  *              _abs64 (MSVCRT.@)
4644  */
4645 __int64 CDECL _abs64( __int64 n )
4646 {
4647     return n >= 0 ? n : -n;
4648 }
4649
4650 /* Copied from musl: src/math/ilogb.c */
4651 static int __ilogb(double x)
4652 {
4653     union { double f; UINT64 i; } u = { x };
4654     int e = u.i >> 52 & 0x7ff;
4655
4656     if (!e)
4657     {
4658         u.i <<= 12;
4659         if (u.i == 0) return FP_ILOGB0;
4660         /* subnormal x */
4661         for (e = -0x3ff; u.i >> 63 == 0; e--, u.i <<= 1);
4662         return e;
4663     }
4664     if (e == 0x7ff) return u.i << 12 ? FP_ILOGBNAN : INT_MAX;
4665     return e - 0x3ff;
4666 }
4667
4668 /*********************************************************************
4669  *              _logb (MSVCRT.@)
4670  *
4671  * Copied from musl: src/math/logb.c
4672  */
4673 double CDECL _logb(double x)
4674 {
4675     if (!isfinite(x))
4676         return x * x;
4677     if (x == 0)
4678         return math_error(_SING, "_logb", x, 0, -1 / (x * x));
4679     return __ilogb(x);
4680 }
4681
4682 static void sq(double *hi, double *lo, double x)
4683 {
4684     double xh, xl, xc;
4685
4686     xc = x * (0x1p27 + 1);
4687     xh = x - xc + xc;
4688     xl = x - xh;
4689     *hi = x * x;
4690     *lo = xh * xh - *hi + 2 * xh * xl + xl * xl;
4691 }
4692
4693 /*********************************************************************
4694  *              _hypot (MSVCRT.@)
4695  *
4696  * Copied from musl: src/math/hypot.c
4697  */
4698 double CDECL _hypot(double x, double y)
4699 {
4700     UINT64 ux = *(UINT64*)&x, uy = *(UINT64*)&y, ut;
4701     double hx, lx, hy, ly, z;
4702     int ex, ey;
4703
4704     /* arrange |x| >= |y| */
4705     ux &= -1ULL >> 1;
4706     uy &= -1ULL >> 1;
4707     if (ux < uy) {
4708         ut = ux;
4709         ux = uy;
4710         uy = ut;
4711     }
4712
4713     /* special cases */
4714     ex = ux >> 52;
4715     ey = uy >> 52;
4716     x = *(double*)&ux;
4717     y = *(double*)&uy;
4718     /* note: hypot(inf,nan) == inf */
4719     if (ey == 0x7ff)
4720         return y;
4721     if (ex == 0x7ff || uy == 0)
4722         return x;
4723     /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
4724     /* 64 difference is enough for ld80 double_t */
4725     if (ex - ey > 64)
4726         return x + y;
4727
4728     /* precise sqrt argument in nearest rounding mode without overflow */
4729     /* xh*xh must not overflow and xl*xl must not underflow in sq */
4730     z = 1;
4731     if (ex > 0x3ff + 510) {
4732         z = 0x1p700;
4733         x *= 0x1p-700;
4734         y *= 0x1p-700;
4735     } else if (ey < 0x3ff - 450) {
4736         z = 0x1p-700;
4737         x *= 0x1p700;
4738         y *= 0x1p700;
4739     }
4740     sq(&hx, &lx, x);
4741     sq(&hy, &ly, y);
4742     return z * sqrt(ly + lx + hy + hx);
4743 }
4744
4745 /*********************************************************************
4746  *      _hypotf (MSVCRT.@)
4747  *
4748  * Copied from musl: src/math/hypotf.c
4749  */
4750 float CDECL _hypotf(float x, float y)
4751 {
4752     UINT32 ux = *(UINT32*)&x, uy = *(UINT32*)&y, ut;
4753     float z;
4754
4755     ux &= -1U >> 1;
4756     uy &= -1U >> 1;
4757     if (ux < uy) {
4758         ut = ux;
4759         ux = uy;
4760         uy = ut;
4761     }
4762
4763     x = *(float*)&ux;
4764     y = *(float*)&uy;
4765     if (uy == 0xff << 23)
4766         return y;
4767     if (ux >= 0xff << 23 || uy == 0 || ux - uy >= 25 << 23)
4768         return x + y;
4769
4770     z = 1;
4771     if (ux >= (0x7f + 60) << 23) {
4772         z = 0x1p90f;
4773         x *= 0x1p-90f;
4774         y *= 0x1p-90f;
4775     } else if (uy < (0x7f - 60) << 23) {
4776         z = 0x1p-90f;
4777         x *= 0x1p90f;
4778         y *= 0x1p90f;
4779     }
4780     return z * sqrtf((double)x * x + (double)y * y);
4781 }
4782
4783 /*********************************************************************
4784  *              ceil (MSVCRT.@)
4785  *
4786  * Based on musl: src/math/ceilf.c
4787  */
4788 double CDECL ceil( double x )
4789 {
4790     union {double f; UINT64 i;} u = {x};
4791     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
4792     UINT64 m;
4793
4794     if (e >= 52)
4795         return x;
4796     if (e >= 0) {
4797         m = 0x000fffffffffffffULL >> e;
4798         if ((u.i & m) == 0)
4799             return x;
4800         if (u.i >> 63 == 0)
4801             u.i += m;
4802         u.i &= ~m;
4803     } else {
4804         if (u.i >> 63)
4805             return -0.0;
4806         else if (u.i << 1)
4807             return 1.0;
4808     }
4809     return u.f;
4810 }
4811
4812 /*********************************************************************
4813  *              floor (MSVCRT.@)
4814  *
4815  * Based on musl: src/math/floorf.c
4816  */
4817 double CDECL floor( double x )
4818 {
4819     union {double f; UINT64 i;} u = {x};
4820     int e = (int)(u.i >> 52 & 0x7ff) - 0x3ff;
4821     UINT64 m;
4822
4823     if (e >= 52)
4824         return x;
4825     if (e >= 0) {
4826         m = 0x000fffffffffffffULL >> e;
4827         if ((u.i & m) == 0)
4828             return x;
4829         if (u.i >> 63)
4830             u.i += m;
4831         u.i &= ~m;
4832     } else {
4833         if (u.i >> 63 == 0)
4834             return 0;
4835         else if (u.i << 1)
4836             return -1;
4837     }
4838     return u.f;
4839 }
4840
4841 /*********************************************************************
4842  *      fma (MSVCRT.@)
4843  *
4844  * Copied from musl: src/math/fma.c
4845  */
4846 struct fma_num
4847 {
4848     UINT64 m;
4849     int e;
4850     int sign;
4851 };
4852
4853 static struct fma_num normalize(double x)
4854 {
4855     UINT64 ix = *(UINT64*)&x;
4856     int e = ix >> 52;
4857     int sign = e & 0x800;
4858     struct fma_num ret;
4859
4860     e &= 0x7ff;
4861     if (!e) {
4862         x *= 0x1p63;
4863         ix = *(UINT64*)&x;
4864         e = ix >> 52 & 0x7ff;
4865         e = e ? e - 63 : 0x800;
4866     }
4867     ix &= (1ull << 52) - 1;
4868     ix |= 1ull << 52;
4869     ix <<= 1;
4870     e -= 0x3ff + 52 + 1;
4871
4872     ret.m = ix;
4873     ret.e = e;
4874     ret.sign = sign;
4875     return ret;
4876 }
4877
4878 static void mul(UINT64 *hi, UINT64 *lo, UINT64 x, UINT64 y)
4879 {
4880     UINT64 t1, t2, t3;
4881     UINT64 xlo = (UINT32)x, xhi = x >> 32;
4882     UINT64 ylo = (UINT32)y, yhi = y >> 32;
4883
4884     t1 = xlo * ylo;
4885     t2 = xlo * yhi + xhi * ylo;
4886     t3 = xhi * yhi;
4887     *lo = t1 + (t2 << 32);
4888     *hi = t3 + (t2 >> 32) + (t1 > *lo);
4889 }
4890
4891 double CDECL fma( double x, double y, double z )
4892 {
4893     int e, d, sign, samesign, nonzero;
4894     UINT64 rhi, rlo, zhi, zlo;
4895     struct fma_num nx, ny, nz;
4896     double r;
4897     INT64 i;
4898
4899     /* normalize so top 10bits and last bit are 0 */
4900     nx = normalize(x);
4901     ny = normalize(y);
4902     nz = normalize(z);
4903
4904     if (nx.e >= 0x7ff - 0x3ff - 52 - 1 || ny.e >= 0x7ff - 0x3ff - 52 - 1) {
4905         r = x * y + z;
4906         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(r)) *_errno() = EDOM;
4907         return r;
4908     }
4909     if (nz.e >= 0x7ff - 0x3ff - 52 - 1) {
4910         if (nz.e > 0x7ff - 0x3ff - 52 - 1) {/* z==0 */
4911             r = x * y + z;
4912             if (!isnan(x) && !isnan(y) && isnan(r)) *_errno() = EDOM;
4913             return r;
4914         }
4915         return z;
4916     }
4917
4918     /* mul: r = x*y */
4919     mul(&rhi, &rlo, nx.m, ny.m);
4920     /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
4921
4922     /* align exponents */
4923     e = nx.e + ny.e;
4924     d = nz.e - e;
4925     /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
4926     if (d > 0) {
4927         if (d < 64) {
4928             zlo = nz.m << d;
4929             zhi = nz.m >> (64 - d);
4930         } else {
4931             zlo = 0;
4932             zhi = nz.m;
4933             e = nz.e - 64;
4934             d -= 64;
4935             if (d < 64 && d) {
4936                 rlo = rhi << (64 - d) | rlo >> d | !!(rlo << (64 - d));
4937                 rhi = rhi >> d;
4938             } else if (d) {
4939                 rlo = 1;
4940                 rhi = 0;
4941             }
4942         }
4943     } else {
4944         zhi = 0;
4945         d = -d;
4946         if (d == 0) {
4947             zlo = nz.m;
4948         } else if (d < 64) {
4949             zlo = nz.m >> d | !!(nz.m << (64 - d));
4950         } else {
4951             zlo = 1;
4952         }
4953     }
4954
4955     /* add */
4956     sign = nx.sign ^ ny.sign;
4957     samesign = !(sign ^ nz.sign);
4958     nonzero = 1;
4959     if (samesign) {
4960         /* r += z */
4961         rlo += zlo;
4962         rhi += zhi + (rlo < zlo);
4963     } else {
4964         /* r -= z */
4965         UINT64 t = rlo;
4966         rlo -= zlo;
4967         rhi = rhi - zhi - (t < rlo);
4968         if (rhi >> 63) {
4969             rlo = -rlo;
4970             rhi = -rhi - !!rlo;
4971             sign = !sign;
4972         }
4973         nonzero = !!rhi;
4974     }
4975
4976     /* set rhi to top 63bit of the result (last bit is sticky) */
4977     if (nonzero) {
4978         e += 64;
4979         if (rhi >> 32) {
4980             BitScanReverse((DWORD*)&d, rhi >> 32);
4981             d = 31 - d - 1;
4982         } else {
4983             BitScanReverse((DWORD*)&d, rhi);
4984             d = 63 - d - 1;
4985         }
4986         /* note: d > 0 */
4987         rhi = rhi << d | rlo >> (64 - d) | !!(rlo << d);
4988     } else if (rlo) {
4989         if (rlo >> 32) {
4990             BitScanReverse((DWORD*)&d, rlo >> 32);
4991             d = 31 - d - 1;
4992         } else {
4993             BitScanReverse((DWORD*)&d, rlo);
4994             d = 63 - d - 1;
4995         }
4996         if (d < 0)
4997             rhi = rlo >> 1 | (rlo & 1);
4998         else
4999             rhi = rlo << d;
5000     } else {
5001         /* exact +-0 */
5002         return x * y + z;
5003     }
5004     e -= d;
5005
5006     /* convert to double */
5007     i = rhi; /* i is in [1<<62,(1<<63)-1] */
5008     if (sign)
5009         i = -i;
5010     r = i; /* |r| is in [0x1p62,0x1p63] */
5011
5012     if (e < -1022 - 62) {
5013         /* result is subnormal before rounding */
5014         if (e == -1022 - 63) {
5015             double c = 0x1p63;
5016             if (sign)
5017                 c = -c;
5018             if (r == c) {
5019                 /* min normal after rounding, underflow depends
5020                    on arch behaviour which can be imitated by
5021                    a double to float conversion */
5022                 float fltmin = 0x0.ffffff8p-63 * FLT_MIN * r;
5023                 return DBL_MIN / FLT_MIN * fltmin;
5024             }
5025             /* one bit is lost when scaled, add another top bit to
5026                only round once at conversion if it is inexact */
5027             if (rhi << 53) {
5028                 double tiny;
5029
5030                 i = rhi >> 1 | (rhi & 1) | 1ull << 62;
5031                 if (sign)
5032                     i = -i;
5033                 r = i;
5034                 r = 2 * r - c; /* remove top bit */
5035
5036                 /* raise underflow portably, such that it
5037                    cannot be optimized away */
5038                 tiny = DBL_MIN / FLT_MIN * r;
5039                 r += (double)(tiny * tiny) * (r - r);
5040             }
5041         } else {
5042             /* only round once when scaled */
5043             d = 10;
5044             i = (rhi >> d | !!(rhi << (64 - d))) << d;
5045             if (sign)
5046                 i = -i;
5047             r = i;
5048         }
5049     }
5050     return __scalbn(r, e);
5051 }
5052
5053 /*********************************************************************
5054  *      fmaf (MSVCRT.@)
5055  *
5056  * Copied from musl: src/math/fmaf.c
5057  */
5058 float CDECL fmaf( float x, float y, float z )
5059 {
5060     union { double f; UINT64 i; } u;
5061     double xy, err;
5062     int e, neg;
5063
5064     xy = (double)x * y;
5065     u.f = xy + z;
5066     e = u.i>>52 & 0x7ff;
5067     /* Common case: The double precision result is fine. */
5068     if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
5069             e == 0x7ff || /* NaN */
5070             (u.f - xy == z && u.f - z == xy) || /* exact */
5071             (_controlfp(0, 0) & _MCW_RC) != _RC_NEAR) /* not round-to-nearest */
5072     {
5073         if (!isnan(x) && !isnan(y) && !isnan(z) && isnan(u.f)) *_errno() = EDOM;
5074
5075         /* underflow may not be raised correctly, example:
5076            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */
5077         if (e < 0x3ff-126 && e >= 0x3ff-149 && _statusfp() & _SW_INEXACT)
5078             fp_barrierf((float)u.f * (float)u.f);
5079         return u.f;
5080     }
5081
5082     /*
5083      * If result is inexact, and exactly halfway between two float values,
5084      * we need to adjust the low-order bit in the direction of the error.
5085      */
5086     neg = u.i >> 63;
5087     if (neg == (z > xy))
5088         err = xy - u.f + z;
5089     else
5090         err = z - u.f + xy;
5091     if (neg == (err < 0))
5092         u.i++;
5093     else
5094         u.i--;
5095     return u.f;
5096 }
5097
5098 /*********************************************************************
5099  *              fabs (MSVCRT.@)
5100  *
5101  * Copied from musl: src/math/fabsf.c
5102  */
5103 double CDECL fabs( double x )
5104 {
5105     union { double f; UINT64 i; } u = { x };
5106     u.i &= ~0ull >> 1;
5107     return u.f;
5108 }
5109
5110 /*********************************************************************
5111  *              frexp (MSVCRT.@)
5112  *
5113  * Copied from musl: src/math/frexp.c
5114  */
5115 double CDECL frexp( double x, int *e )
5116 {
5117     UINT64 ux = *(UINT64*)&x;
5118     int ee = ux >> 52 & 0x7ff;
5119
5120     if (!ee) {
5121         if (x) {
5122             x = frexp(x * 0x1p64, e);
5123             *e -= 64;
5124         } else *e = 0;
5125         return x;
5126     } else if (ee == 0x7ff) {
5127         return x;
5128     }
5129
5130     *e = ee - 0x3fe;
5131     ux &= 0x800fffffffffffffull;
5132     ux |= 0x3fe0000000000000ull;
5133     return *(double*)&ux;
5134 }
5135
5136 /*********************************************************************
5137  *              modf (MSVCRT.@)
5138  *
5139  * Copied from musl: src/math/modf.c
5140  */
5141 double CDECL modf( double x, double *iptr )
5142 {
5143     union {double f; UINT64 i;} u = {x};
5144     UINT64 mask;
5145     int e = (u.i >> 52 & 0x7ff) - 0x3ff;
5146
5147     /* no fractional part */
5148     if (e >= 52) {
5149         *iptr = x;
5150         if (e == 0x400 && u.i << 12 != 0) /* nan */
5151             return x;
5152         u.i &= 1ULL << 63;
5153         return u.f;
5154     }
5155
5156     /* no integral part*/
5157     if (e < 0) {
5158         u.i &= 1ULL << 63;
5159         *iptr = u.f;
5160         return x;
5161     }
5162
5163     mask = -1ULL >> 12 >> e;
5164     if ((u.i & mask) == 0) {
5165         *iptr = x;
5166         u.i &= 1ULL << 63;
5167         return u.f;
5168     }
5169     u.i &= ~mask;
5170     *iptr = u.f;
5171     return x - u.f;
5172 }
5173
5174 #if defined(__i386__) || defined(__x86_64__)
5175 static void _setfp_sse( unsigned int *cw, unsigned int cw_mask,
5176         unsigned int *sw, unsigned int sw_mask )
5177 {
5178 #if defined(__GNUC__) || defined(__clang__)
5179     unsigned long old_fpword, fpword;
5180     unsigned int flags;
5181
5182     __asm__ __volatile__( "stmxcsr %0" : "=m" (fpword) );
5183     old_fpword = fpword;
5184
5185     cw_mask &= _MCW_EM | _MCW_RC | _MCW_DN;
5186     sw_mask &= _MCW_EM;
5187
5188     if (sw)
5189     {
5190         flags = 0;
5191         if (fpword & 0x1) flags |= _SW_INVALID;
5192         if (fpword & 0x2) flags |= _SW_DENORMAL;
5193         if (fpword & 0x4) flags |= _SW_ZERODIVIDE;
5194         if (fpword & 0x8) flags |= _SW_OVERFLOW;
5195         if (fpword & 0x10) flags |= _SW_UNDERFLOW;
5196         if (fpword & 0x20) flags |= _SW_INEXACT;
5197
5198         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5199         TRACE("sse2 update sw %08x to %08x\n", flags, *sw);
5200         fpword &= ~0x3f;
5201         if (*sw & _SW_INVALID) fpword |= 0x1;
5202         if (*sw & _SW_DENORMAL) fpword |= 0x2;
5203         if (*sw & _SW_ZERODIVIDE) fpword |= 0x4;
5204         if (*sw & _SW_OVERFLOW) fpword |= 0x8;
5205         if (*sw & _SW_UNDERFLOW) fpword |= 0x10;
5206         if (*sw & _SW_INEXACT) fpword |= 0x20;
5207         *sw = flags;
5208     }
5209
5210     if (cw)
5211     {
5212         flags = 0;
5213         if (fpword & 0x80) flags |= _EM_INVALID;
5214         if (fpword & 0x100) flags |= _EM_DENORMAL;
5215         if (fpword & 0x200) flags |= _EM_ZERODIVIDE;
5216         if (fpword & 0x400) flags |= _EM_OVERFLOW;
5217         if (fpword & 0x800) flags |= _EM_UNDERFLOW;
5218         if (fpword & 0x1000) flags |= _EM_INEXACT;
5219         switch (fpword & 0x6000)
5220         {
5221         case 0x6000: flags |= _RC_UP|_RC_DOWN; break;
5222         case 0x4000: flags |= _RC_UP; break;
5223         case 0x2000: flags |= _RC_DOWN; break;
5224         }
5225         switch (fpword & 0x8040)
5226         {
5227         case 0x0040: flags |= _DN_FLUSH_OPERANDS_SAVE_RESULTS; break;
5228         case 0x8000: flags |= _DN_SAVE_OPERANDS_FLUSH_RESULTS; break;
5229         case 0x8040: flags |= _DN_FLUSH; break;
5230         }
5231
5232         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5233         TRACE("sse2 update cw %08x to %08x\n", flags, *cw);
5234         fpword &= ~0xffc0;
5235         if (*cw & _EM_INVALID) fpword |= 0x80;
5236         if (*cw & _EM_DENORMAL) fpword |= 0x100;
5237         if (*cw & _EM_ZERODIVIDE) fpword |= 0x200;
5238         if (*cw & _EM_OVERFLOW) fpword |= 0x400;
5239         if (*cw & _EM_UNDERFLOW) fpword |= 0x800;
5240         if (*cw & _EM_INEXACT) fpword |= 0x1000;
5241         switch (*cw & _MCW_RC)
5242         {
5243         case _RC_UP|_RC_DOWN: fpword |= 0x6000; break;
5244         case _RC_UP: fpword |= 0x4000; break;
5245         case _RC_DOWN: fpword |= 0x2000; break;
5246         }
5247         switch (*cw & _MCW_DN)
5248         {
5249         case _DN_FLUSH_OPERANDS_SAVE_RESULTS: fpword |= 0x0040; break;
5250         case _DN_SAVE_OPERANDS_FLUSH_RESULTS: fpword |= 0x8000; break;
5251         case _DN_FLUSH: fpword |= 0x8040; break;
5252         }
5253
5254         /* clear status word if anything changes */
5255         if (fpword != old_fpword && !sw)
5256         {
5257             TRACE("sse2 clear status word\n");
5258             fpword &= ~0x3f;
5259         }
5260     }
5261
5262     if (fpword != old_fpword)
5263         __asm__ __volatile__( "ldmxcsr %0" : : "m" (fpword) );
5264 #else
5265     FIXME("not implemented\n");
5266     if (cw) *cw = 0;
5267     if (sw) *sw = 0;
5268 #endif
5269 }
5270 #endif
5271
5272 static void _setfp( unsigned int *cw, unsigned int cw_mask,
5273         unsigned int *sw, unsigned int sw_mask )
5274 {
5275 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
5276     unsigned long oldcw = 0, newcw = 0;
5277     unsigned long oldsw = 0, newsw = 0;
5278     unsigned int flags;
5279
5280     cw_mask &= _MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC;
5281     sw_mask &= _MCW_EM;
5282
5283     if (sw)
5284     {
5285         __asm__ __volatile__( "fstsw %0" : "=m" (newsw) );
5286         oldsw = newsw;
5287
5288         flags = 0;
5289         if (newsw & 0x1) flags |= _SW_INVALID;
5290         if (newsw & 0x2) flags |= _SW_DENORMAL;
5291         if (newsw & 0x4) flags |= _SW_ZERODIVIDE;
5292         if (newsw & 0x8) flags |= _SW_OVERFLOW;
5293         if (newsw & 0x10) flags |= _SW_UNDERFLOW;
5294         if (newsw & 0x20) flags |= _SW_INEXACT;
5295
5296         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5297         TRACE("x86 update sw %08x to %08x\n", flags, *sw);
5298         newsw &= ~0x3f;
5299         if (*sw & _SW_INVALID) newsw |= 0x1;
5300         if (*sw & _SW_DENORMAL) newsw |= 0x2;
5301         if (*sw & _SW_ZERODIVIDE) newsw |= 0x4;
5302         if (*sw & _SW_OVERFLOW) newsw |= 0x8;
5303         if (*sw & _SW_UNDERFLOW) newsw |= 0x10;
5304         if (*sw & _SW_INEXACT) newsw |= 0x20;
5305         *sw = flags;
5306     }
5307
5308     if (cw)
5309     {
5310         __asm__ __volatile__( "fstcw %0" : "=m" (newcw) );
5311         oldcw = newcw;
5312
5313         flags = 0;
5314         if (newcw & 0x1) flags |= _EM_INVALID;
5315         if (newcw & 0x2) flags |= _EM_DENORMAL;
5316         if (newcw & 0x4) flags |= _EM_ZERODIVIDE;
5317         if (newcw & 0x8) flags |= _EM_OVERFLOW;
5318         if (newcw & 0x10) flags |= _EM_UNDERFLOW;
5319         if (newcw & 0x20) flags |= _EM_INEXACT;
5320         switch (newcw & 0xc00)
5321         {
5322         case 0xc00: flags |= _RC_UP|_RC_DOWN; break;
5323         case 0x800: flags |= _RC_UP; break;
5324         case 0x400: flags |= _RC_DOWN; break;
5325         }
5326         switch (newcw & 0x300)
5327         {
5328         case 0x0: flags |= _PC_24; break;
5329         case 0x200: flags |= _PC_53; break;
5330         case 0x300: flags |= _PC_64; break;
5331         }
5332         if (newcw & 0x1000) flags |= _IC_AFFINE;
5333
5334         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5335         TRACE("x86 update cw %08x to %08x\n", flags, *cw);
5336         newcw &= ~0x1f3f;
5337         if (*cw & _EM_INVALID) newcw |= 0x1;
5338         if (*cw & _EM_DENORMAL) newcw |= 0x2;
5339         if (*cw & _EM_ZERODIVIDE) newcw |= 0x4;
5340         if (*cw & _EM_OVERFLOW) newcw |= 0x8;
5341         if (*cw & _EM_UNDERFLOW) newcw |= 0x10;
5342         if (*cw & _EM_INEXACT) newcw |= 0x20;
5343         switch (*cw & _MCW_RC)
5344         {
5345         case _RC_UP|_RC_DOWN: newcw |= 0xc00; break;
5346         case _RC_UP: newcw |= 0x800; break;
5347         case _RC_DOWN: newcw |= 0x400; break;
5348         }
5349         switch (*cw & _MCW_PC)
5350         {
5351         case _PC_64: newcw |= 0x300; break;
5352         case _PC_53: newcw |= 0x200; break;
5353         case _PC_24: newcw |= 0x0; break;
5354         }
5355         if (*cw & _IC_AFFINE) newcw |= 0x1000;
5356     }
5357
5358     if (oldsw != newsw && (newsw & 0x3f))
5359     {
5360         struct {
5361             WORD control_word;
5362             WORD unused1;
5363             WORD status_word;
5364             WORD unused2;
5365             WORD tag_word;
5366             WORD unused3;
5367             DWORD instruction_pointer;
5368             WORD code_segment;
5369             WORD unused4;
5370             DWORD operand_addr;
5371             WORD data_segment;
5372             WORD unused5;
5373         } fenv;
5374
5375         assert(cw);
5376
5377         __asm__ __volatile__( "fnstenv %0" : "=m" (fenv) );
5378         fenv.control_word = newcw;
5379         fenv.status_word = newsw;
5380         __asm__ __volatile__( "fldenv %0" : : "m" (fenv) : "st", "st(1)",
5381                 "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" );
5382         return;
5383     }
5384
5385     if (oldsw != newsw)
5386         __asm__ __volatile__( "fnclex" );
5387     if (oldcw != newcw)
5388         __asm__ __volatile__( "fldcw %0" : : "m" (newcw) );
5389 #elif defined(__x86_64__)
5390     _setfp_sse(cw, cw_mask, sw, sw_mask);
5391 #elif defined(__aarch64__)
5392     ULONG_PTR old_fpsr = 0, fpsr = 0, old_fpcr = 0, fpcr = 0;
5393     unsigned int flags;
5394
5395     cw_mask &= _MCW_EM | _MCW_RC;
5396     sw_mask &= _MCW_EM;
5397
5398     if (sw)
5399     {
5400         __asm__ __volatile__( "mrs %0, fpsr" : "=r" (fpsr) );
5401         old_fpsr = fpsr;
5402
5403         flags = 0;
5404         if (fpsr & 0x1) flags |= _SW_INVALID;
5405         if (fpsr & 0x2) flags |= _SW_ZERODIVIDE;
5406         if (fpsr & 0x4) flags |= _SW_OVERFLOW;
5407         if (fpsr & 0x8) flags |= _SW_UNDERFLOW;
5408         if (fpsr & 0x10) flags |= _SW_INEXACT;
5409         if (fpsr & 0x80) flags |= _SW_DENORMAL;
5410
5411         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5412         TRACE("aarch64 update sw %08x to %08x\n", flags, *sw);
5413         fpsr &= ~0x9f;
5414         if (*sw & _SW_INVALID) fpsr |= 0x1;
5415         if (*sw & _SW_ZERODIVIDE) fpsr |= 0x2;
5416         if (*sw & _SW_OVERFLOW) fpsr |= 0x4;
5417         if (*sw & _SW_UNDERFLOW) fpsr |= 0x8;
5418         if (*sw & _SW_INEXACT) fpsr |= 0x10;
5419         if (*sw & _SW_DENORMAL) fpsr |= 0x80;
5420         *sw = flags;
5421     }
5422
5423     if (cw)
5424     {
5425         __asm__ __volatile__( "mrs %0, fpcr" : "=r" (fpcr) );
5426         old_fpcr = fpcr;
5427
5428         flags = 0;
5429         if (!(fpcr & 0x100)) flags |= _EM_INVALID;
5430         if (!(fpcr & 0x200)) flags |= _EM_ZERODIVIDE;
5431         if (!(fpcr & 0x400)) flags |= _EM_OVERFLOW;
5432         if (!(fpcr & 0x800)) flags |= _EM_UNDERFLOW;
5433         if (!(fpcr & 0x1000)) flags |= _EM_INEXACT;
5434         if (!(fpcr & 0x8000)) flags |= _EM_DENORMAL;
5435         switch (fpcr & 0xc00000)
5436         {
5437         case 0x400000: flags |= _RC_UP; break;
5438         case 0x800000: flags |= _RC_DOWN; break;
5439         case 0xc00000: flags |= _RC_CHOP; break;
5440         }
5441
5442         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5443         TRACE("aarch64 update cw %08x to %08x\n", flags, *cw);
5444         fpcr &= ~0xc09f00ul;
5445         if (!(*cw & _EM_INVALID)) fpcr |= 0x100;
5446         if (!(*cw & _EM_ZERODIVIDE)) fpcr |= 0x200;
5447         if (!(*cw & _EM_OVERFLOW)) fpcr |= 0x400;
5448         if (!(*cw & _EM_UNDERFLOW)) fpcr |= 0x800;
5449         if (!(*cw & _EM_INEXACT)) fpcr |= 0x1000;
5450         if (!(*cw & _EM_DENORMAL)) fpcr |= 0x8000;
5451         switch (*cw & _MCW_RC)
5452         {
5453         case _RC_CHOP: fpcr |= 0xc00000; break;
5454         case _RC_UP: fpcr |= 0x400000; break;
5455         case _RC_DOWN: fpcr |= 0x800000; break;
5456         }
5457     }
5458
5459     /* mask exceptions if needed */
5460     if (old_fpcr != fpcr && ~(old_fpcr >> 8) & fpsr & 0x9f != fpsr & 0x9f)
5461     {
5462         ULONG_PTR mask = fpcr & ~0x9f00;
5463         __asm__ __volatile__( "msr fpcr, %0" :: "r" (mask) );
5464     }
5465
5466     if (old_fpsr != fpsr)
5467         __asm__ __volatile__( "msr fpsr, %0" :: "r" (fpsr) );
5468     if (old_fpcr != fpcr)
5469         __asm__ __volatile__( "msr fpcr, %0" :: "r" (fpcr) );
5470 #elif defined(__arm__) && !defined(__SOFTFP__)
5471     DWORD old_fpscr, fpscr;
5472     unsigned int flags;
5473
5474     __asm__ __volatile__( "vmrs %0, fpscr" : "=r" (fpscr) );
5475     old_fpscr = fpscr;
5476
5477     cw_mask &= _MCW_EM | _MCW_RC;
5478     sw_mask &= _MCW_EM;
5479
5480     if (sw)
5481     {
5482         flags = 0;
5483         if (fpscr & 0x1) flags |= _SW_INVALID;
5484         if (fpscr & 0x2) flags |= _SW_ZERODIVIDE;
5485         if (fpscr & 0x4) flags |= _SW_OVERFLOW;
5486         if (fpscr & 0x8) flags |= _SW_UNDERFLOW;
5487         if (fpscr & 0x10) flags |= _SW_INEXACT;
5488         if (fpscr & 0x80) flags |= _SW_DENORMAL;
5489
5490         *sw = (flags & ~sw_mask) | (*sw & sw_mask);
5491         TRACE("arm update sw %08x to %08x\n", flags, *sw);
5492         fpscr &= ~0x9f;
5493         if (*sw & _SW_INVALID) fpscr |= 0x1;
5494         if (*sw & _SW_ZERODIVIDE) fpscr |= 0x2;
5495         if (*sw & _SW_OVERFLOW) fpscr |= 0x4;
5496         if (*sw & _SW_UNDERFLOW) fpscr |= 0x8;
5497         if (*sw & _SW_INEXACT) fpscr |= 0x10;
5498         if (*sw & _SW_DENORMAL) fpscr |= 0x80;
5499         *sw = flags;
5500     }
5501
5502     if (cw)
5503     {
5504         flags = 0;
5505         if (!(fpscr & 0x100)) flags |= _EM_INVALID;
5506         if (!(fpscr & 0x200)) flags |= _EM_ZERODIVIDE;
5507         if (!(fpscr & 0x400)) flags |= _EM_OVERFLOW;
5508         if (!(fpscr & 0x800)) flags |= _EM_UNDERFLOW;
5509         if (!(fpscr & 0x1000)) flags |= _EM_INEXACT;
5510         if (!(fpscr & 0x8000)) flags |= _EM_DENORMAL;
5511         switch (fpscr & 0xc00000)
5512         {
5513         case 0x400000: flags |= _RC_UP; break;
5514         case 0x800000: flags |= _RC_DOWN; break;
5515         case 0xc00000: flags |= _RC_CHOP; break;
5516         }
5517
5518         *cw = (flags & ~cw_mask) | (*cw & cw_mask);
5519         TRACE("arm update cw %08x to %08x\n", flags, *cw);
5520         fpscr &= ~0xc09f00ul;
5521         if (!(*cw & _EM_INVALID)) fpscr |= 0x100;
5522         if (!(*cw & _EM_ZERODIVIDE)) fpscr |= 0x200;
5523         if (!(*cw & _EM_OVERFLOW)) fpscr |= 0x400;
5524         if (!(*cw & _EM_UNDERFLOW)) fpscr |= 0x800;
5525         if (!(*cw & _EM_INEXACT)) fpscr |= 0x1000;
5526         if (!(*cw & _EM_DENORMAL)) fpscr |= 0x8000;
5527         switch (*cw & _MCW_RC)
5528         {
5529         case _RC_CHOP: fpscr |= 0xc00000; break;
5530         case _RC_UP: fpscr |= 0x400000; break;
5531         case _RC_DOWN: fpscr |= 0x800000; break;
5532         }
5533     }
5534
5535     if (old_fpscr != fpscr)
5536         __asm__ __volatile__( "vmsr fpscr, %0" :: "r" (fpscr) );
5537 #else
5538     FIXME("not implemented\n");
5539     if (cw) *cw = 0;
5540     if (sw) *sw = 0;
5541 #endif
5542 }
5543
5544 /**********************************************************************
5545  *              _statusfp2 (MSVCR80.@)
5546  */
5547 #if defined(__i386__)
5548 void CDECL _statusfp2( unsigned int *x86_sw, unsigned int *sse2_sw )
5549 {
5550     if (x86_sw)
5551         _setfp(NULL, 0, x86_sw, 0);
5552     if (!sse2_sw) return;
5553     if (sse2_supported)
5554         _setfp_sse(NULL, 0, sse2_sw, 0);
5555     else *sse2_sw = 0;
5556 }
5557 #endif
5558
5559 /**********************************************************************
5560  *              _statusfp (MSVCRT.@)
5561  */
5562 unsigned int CDECL _statusfp(void)
5563 {
5564     unsigned int flags = 0;
5565 #if defined(__i386__)
5566     unsigned int x86_sw, sse2_sw;
5567
5568     _statusfp2( &x86_sw, &sse2_sw );
5569     /* FIXME: there's no definition for ambiguous status, just return all status bits for now */
5570     flags = x86_sw | sse2_sw;
5571 #else
5572     _setfp(NULL, 0, &flags, 0);
5573 #endif
5574     return flags;
5575 }
5576
5577 /*********************************************************************
5578  *              _clearfp (MSVCRT.@)
5579  */
5580 unsigned int CDECL _clearfp(void)
5581 {
5582     unsigned int flags = 0;
5583 #ifdef __i386__
5584     _setfp(NULL, 0, &flags, _MCW_EM);
5585     if (sse2_supported)
5586     {
5587         unsigned int sse_sw = 0;
5588
5589         _setfp_sse(NULL, 0, &sse_sw, _MCW_EM);
5590         flags |= sse_sw;
5591     }
5592 #else
5593     _setfp(NULL, 0, &flags, _MCW_EM);
5594 #endif
5595     return flags;
5596 }
5597
5598 /*********************************************************************
5599  *              __fpecode (MSVCRT.@)
5600  */
5601 int * CDECL __fpecode(void)
5602 {
5603     return &msvcrt_get_thread_data()->fpecode;
5604 }
5605
5606 /*********************************************************************
5607  *              ldexp (MSVCRT.@)
5608  */
5609 double CDECL ldexp(double num, int exp)
5610 {
5611   double z = __scalbn(num, exp);
5612
5613   if (isfinite(num) && !isfinite(z))
5614     return math_error(_OVERFLOW, "ldexp", num, exp, z);
5615   if (num && isfinite(num) && !z)
5616     return math_error(_UNDERFLOW, "ldexp", num, exp, z);
5617   return z;
5618 }
5619
5620 /*********************************************************************
5621  *              _cabs (MSVCRT.@)
5622  */
5623 double CDECL _cabs(struct _complex num)
5624 {
5625   return sqrt(num.x * num.x + num.y * num.y);
5626 }
5627
5628 /*********************************************************************
5629  *              _chgsign (MSVCRT.@)
5630  */
5631 double CDECL _chgsign(double num)
5632 {
5633     union { double f; UINT64 i; } u = { num };
5634     u.i ^= 1ull << 63;
5635     return u.f;
5636 }
5637
5638 /*********************************************************************
5639  *              __control87_2 (MSVCR80.@)
5640  *
5641  * Not exported by native msvcrt, added in msvcr80.
5642  */
5643 #ifdef __i386__
5644 int CDECL __control87_2( unsigned int newval, unsigned int mask,
5645                          unsigned int *x86_cw, unsigned int *sse2_cw )
5646 {
5647     if (x86_cw)
5648     {
5649         *x86_cw = newval;
5650         _setfp(x86_cw, mask, NULL, 0);
5651     }
5652
5653     if (!sse2_cw) return 1;
5654
5655     if (sse2_supported)
5656     {
5657         *sse2_cw = newval;
5658         _setfp_sse(sse2_cw, mask, NULL, 0);
5659     }
5660     else *sse2_cw = 0;
5661
5662     return 1;
5663 }
5664 #endif
5665
5666 /*********************************************************************
5667  *              _control87 (MSVCRT.@)
5668  */
5669 unsigned int CDECL _control87(unsigned int newval, unsigned int mask)
5670 {
5671     unsigned int flags = 0;
5672 #ifdef __i386__
5673     unsigned int sse2_cw;
5674
5675     __control87_2( newval, mask, &flags, &sse2_cw );
5676
5677     if (sse2_supported)
5678     {
5679         if ((flags ^ sse2_cw) & (_MCW_EM | _MCW_RC)) flags |= _EM_AMBIGUOUS;
5680         flags |= sse2_cw;
5681     }
5682 #else
5683     flags = newval;
5684     _setfp(&flags, mask, NULL, 0);
5685 #endif
5686     return flags;
5687 }
5688
5689 /*********************************************************************
5690  *              _controlfp (MSVCRT.@)
5691  */
5692 unsigned int CDECL _controlfp(unsigned int newval, unsigned int mask)
5693 {
5694   return _control87( newval, mask & ~_EM_DENORMAL );
5695 }
5696
5697 /*********************************************************************
5698  *              _set_controlfp (MSVCRT.@)
5699  */
5700 void CDECL _set_controlfp( unsigned int newval, unsigned int mask )
5701 {
5702     _controlfp( newval, mask );
5703 }
5704
5705 /*********************************************************************
5706  *              _controlfp_s (MSVCRT.@)
5707  */
5708 int CDECL _controlfp_s(unsigned int *cur, unsigned int newval, unsigned int mask)
5709 {
5710     static const unsigned int all_flags = (_MCW_EM | _MCW_IC | _MCW_RC |
5711                                            _MCW_PC | _MCW_DN);
5712     unsigned int val;
5713
5714     if (!MSVCRT_CHECK_PMT( !(newval & mask & ~all_flags) ))
5715     {
5716         if (cur) *cur = _controlfp( 0, 0 );  /* retrieve it anyway */
5717         return EINVAL;
5718     }
5719     val = _controlfp( newval, mask );
5720     if (cur) *cur = val;
5721     return 0;
5722 }
5723
5724 #if _MSVCR_VER >= 140 && (defined(__i386__) || defined(__x86_64__))
5725 enum fenv_masks
5726 {
5727     FENV_X_INVALID = 0x00100010,
5728     FENV_X_DENORMAL = 0x00200020,
5729     FENV_X_ZERODIVIDE = 0x00080008,
5730     FENV_X_OVERFLOW = 0x00040004,
5731     FENV_X_UNDERFLOW = 0x00020002,
5732     FENV_X_INEXACT = 0x00010001,
5733     FENV_X_AFFINE = 0x00004000,
5734     FENV_X_UP = 0x00800200,
5735     FENV_X_DOWN = 0x00400100,
5736     FENV_X_24 = 0x00002000,
5737     FENV_X_53 = 0x00001000,
5738     FENV_Y_INVALID = 0x10000010,
5739     FENV_Y_DENORMAL = 0x20000020,
5740     FENV_Y_ZERODIVIDE = 0x08000008,
5741     FENV_Y_OVERFLOW = 0x04000004,
5742     FENV_Y_UNDERFLOW = 0x02000002,
5743     FENV_Y_INEXACT = 0x01000001,
5744     FENV_Y_UP = 0x80000200,
5745     FENV_Y_DOWN = 0x40000100,
5746     FENV_Y_FLUSH = 0x00000400,
5747     FENV_Y_FLUSH_SAVE = 0x00000800
5748 };
5749
5750 /* encodes x87/sse control/status word in ulong */
5751 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5752 {
5753     __msvcrt_ulong ret = 0;
5754
5755 #ifdef __i386__
5756     if (x & _EM_INVALID) ret |= FENV_X_INVALID;
5757     if (x & _EM_DENORMAL) ret |= FENV_X_DENORMAL;
5758     if (x & _EM_ZERODIVIDE) ret |= FENV_X_ZERODIVIDE;
5759     if (x & _EM_OVERFLOW) ret |= FENV_X_OVERFLOW;
5760     if (x & _EM_UNDERFLOW) ret |= FENV_X_UNDERFLOW;
5761     if (x & _EM_INEXACT) ret |= FENV_X_INEXACT;
5762     if (x & _IC_AFFINE) ret |= FENV_X_AFFINE;
5763     if (x & _RC_UP) ret |= FENV_X_UP;
5764     if (x & _RC_DOWN) ret |= FENV_X_DOWN;
5765     if (x & _PC_24) ret |= FENV_X_24;
5766     if (x & _PC_53) ret |= FENV_X_53;
5767 #endif
5768     x &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_PC);
5769
5770     if (y & _EM_INVALID) ret |= FENV_Y_INVALID;
5771     if (y & _EM_DENORMAL) ret |= FENV_Y_DENORMAL;
5772     if (y & _EM_ZERODIVIDE) ret |= FENV_Y_ZERODIVIDE;
5773     if (y & _EM_OVERFLOW) ret |= FENV_Y_OVERFLOW;
5774     if (y & _EM_UNDERFLOW) ret |= FENV_Y_UNDERFLOW;
5775     if (y & _EM_INEXACT) ret |= FENV_Y_INEXACT;
5776     if (y & _RC_UP) ret |= FENV_Y_UP;
5777     if (y & _RC_DOWN) ret |= FENV_Y_DOWN;
5778     if (y & _DN_FLUSH) ret |= FENV_Y_FLUSH;
5779     if (y & _DN_FLUSH_OPERANDS_SAVE_RESULTS) ret |= FENV_Y_FLUSH_SAVE;
5780     y &= ~(_MCW_EM | _MCW_IC | _MCW_RC | _MCW_DN);
5781
5782     if(x || y) FIXME("unsupported flags: %x, %x\n", x, y);
5783     return ret;
5784 }
5785
5786 /* decodes x87/sse control/status word, returns FALSE on error */
5787 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5788 {
5789     *x = *y = 0;
5790     if ((enc & FENV_X_INVALID) == FENV_X_INVALID) *x |= _EM_INVALID;
5791     if ((enc & FENV_X_DENORMAL) == FENV_X_DENORMAL) *x |= _EM_DENORMAL;
5792     if ((enc & FENV_X_ZERODIVIDE) == FENV_X_ZERODIVIDE) *x |= _EM_ZERODIVIDE;
5793     if ((enc & FENV_X_OVERFLOW) == FENV_X_OVERFLOW) *x |= _EM_OVERFLOW;
5794     if ((enc & FENV_X_UNDERFLOW) == FENV_X_UNDERFLOW) *x |= _EM_UNDERFLOW;
5795     if ((enc & FENV_X_INEXACT) == FENV_X_INEXACT) *x |= _EM_INEXACT;
5796     if ((enc & FENV_X_AFFINE) == FENV_X_AFFINE) *x |= _IC_AFFINE;
5797     if ((enc & FENV_X_UP) == FENV_X_UP) *x |= _RC_UP;
5798     if ((enc & FENV_X_DOWN) == FENV_X_DOWN) *x |= _RC_DOWN;
5799     if ((enc & FENV_X_24) == FENV_X_24) *x |= _PC_24;
5800     if ((enc & FENV_X_53) == FENV_X_53) *x |= _PC_53;
5801
5802     if ((enc & FENV_Y_INVALID) == FENV_Y_INVALID) *y |= _EM_INVALID;
5803     if ((enc & FENV_Y_DENORMAL) == FENV_Y_DENORMAL) *y |= _EM_DENORMAL;
5804     if ((enc & FENV_Y_ZERODIVIDE) == FENV_Y_ZERODIVIDE) *y |= _EM_ZERODIVIDE;
5805     if ((enc & FENV_Y_OVERFLOW) == FENV_Y_OVERFLOW) *y |= _EM_OVERFLOW;
5806     if ((enc & FENV_Y_UNDERFLOW) == FENV_Y_UNDERFLOW) *y |= _EM_UNDERFLOW;
5807     if ((enc & FENV_Y_INEXACT) == FENV_Y_INEXACT) *y |= _EM_INEXACT;
5808     if ((enc & FENV_Y_UP) == FENV_Y_UP) *y |= _RC_UP;
5809     if ((enc & FENV_Y_DOWN) == FENV_Y_DOWN) *y |= _RC_DOWN;
5810     if ((enc & FENV_Y_FLUSH) == FENV_Y_FLUSH) *y |= _DN_FLUSH;
5811     if ((enc & FENV_Y_FLUSH_SAVE) == FENV_Y_FLUSH_SAVE) *y |= _DN_FLUSH_OPERANDS_SAVE_RESULTS;
5812
5813     if (fenv_encode(*x, *y) != enc)
5814     {
5815         WARN("can't decode: %lx\n", enc);
5816         return FALSE;
5817     }
5818     return TRUE;
5819 }
5820 #elif _MSVCR_VER >= 120
5821 static __msvcrt_ulong fenv_encode(unsigned int x, unsigned int y)
5822 {
5823     if (y & _EM_DENORMAL)
5824         y = (y & ~_EM_DENORMAL) | 0x20;
5825
5826     return x | y;
5827 }
5828
5829 static BOOL fenv_decode(__msvcrt_ulong enc, unsigned int *x, unsigned int *y)
5830 {
5831     if (enc & 0x20)
5832         enc = (enc & ~0x20) | _EM_DENORMAL;
5833
5834     *x = *y = enc;
5835     return TRUE;
5836 }
5837 #endif
5838
5839 #if _MSVCR_VER>=120
5840 /*********************************************************************
5841  *              fegetenv (MSVCR120.@)
5842  */
5843 int CDECL fegetenv(fenv_t *env)
5844 {
5845 #if _MSVCR_VER>=140 && defined(__i386__)
5846     unsigned int x87, sse;
5847     __control87_2(0, 0, &x87, &sse);
5848     env->_Fe_ctl = fenv_encode(x87, sse);
5849     _statusfp2(&x87, &sse);
5850     env->_Fe_stat = fenv_encode(x87, sse);
5851 #elif _MSVCR_VER>=140
5852     env->_Fe_ctl = fenv_encode(0, _control87(0, 0));
5853     env->_Fe_stat = fenv_encode(0, _statusfp());
5854 #else
5855     env->_Fe_ctl = _controlfp(0, 0) & (_EM_INEXACT | _EM_UNDERFLOW |
5856             _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID | _MCW_RC);
5857     env->_Fe_stat = _statusfp();
5858 #endif
5859     return 0;
5860 }
5861
5862 /*********************************************************************
5863  *              feupdateenv (MSVCR120.@)
5864  */
5865 int CDECL feupdateenv(const fenv_t *env)
5866 {
5867     fenv_t set;
5868     fegetenv(&set);
5869     set._Fe_ctl = env->_Fe_ctl;
5870     set._Fe_stat |= env->_Fe_stat;
5871     return fesetenv(&set);
5872 }
5873
5874 /*********************************************************************
5875  *      fetestexcept (MSVCR120.@)
5876  */
5877 int CDECL fetestexcept(int flags)
5878 {
5879     return _statusfp() & flags;
5880 }
5881
5882 /*********************************************************************
5883  *      fesetexceptflag (MSVCR120.@)
5884  */
5885 int CDECL fesetexceptflag(const fexcept_t *status, int excepts)
5886 {
5887     fenv_t env;
5888
5889     excepts &= FE_ALL_EXCEPT;
5890     if(!excepts)
5891         return 0;
5892
5893     fegetenv(&env);
5894     env._Fe_stat &= ~fenv_encode(excepts, excepts);
5895     env._Fe_stat |= *status & fenv_encode(excepts, excepts);
5896     return fesetenv(&env);
5897 }
5898
5899 /*********************************************************************
5900  *      feraiseexcept (MSVCR120.@)
5901  */
5902 int CDECL feraiseexcept(int flags)
5903 {
5904     fenv_t env;
5905
5906     flags &= FE_ALL_EXCEPT;
5907     fegetenv(&env);
5908     env._Fe_stat |= fenv_encode(flags, flags);
5909     return fesetenv(&env);
5910 }
5911
5912 /*********************************************************************
5913  *      feclearexcept (MSVCR120.@)
5914  */
5915 int CDECL feclearexcept(int flags)
5916 {
5917     fenv_t env;
5918
5919     fegetenv(&env);
5920     flags &= FE_ALL_EXCEPT;
5921     env._Fe_stat &= ~fenv_encode(flags, flags);
5922     return fesetenv(&env);
5923 }
5924
5925 /*********************************************************************
5926  *      fegetexceptflag (MSVCR120.@)
5927  */
5928 int CDECL fegetexceptflag(fexcept_t *status, int excepts)
5929 {
5930 #if _MSVCR_VER>=140 && defined(__i386__)
5931     unsigned int x87, sse;
5932     _statusfp2(&x87, &sse);
5933     *status = fenv_encode(x87 & excepts, sse & excepts);
5934 #else
5935     *status = fenv_encode(0, _statusfp() & excepts);
5936 #endif
5937     return 0;
5938 }
5939 #endif
5940
5941 #if _MSVCR_VER>=140
5942 /*********************************************************************
5943  *              __fpe_flt_rounds (UCRTBASE.@)
5944  */
5945 int CDECL __fpe_flt_rounds(void)
5946 {
5947     unsigned int fpc = _controlfp(0, 0) & _RC_CHOP;
5948
5949     TRACE("()\n");
5950
5951     switch(fpc) {
5952         case _RC_CHOP: return 0;
5953         case _RC_NEAR: return 1;
5954         case _RC_UP: return 2;
5955         default: return 3;
5956     }
5957 }
5958 #endif
5959
5960 #if _MSVCR_VER>=120
5961
5962 /*********************************************************************
5963  *              fegetround (MSVCR120.@)
5964  */
5965 int CDECL fegetround(void)
5966 {
5967     return _controlfp(0, 0) & _MCW_RC;
5968 }
5969
5970 /*********************************************************************
5971  *              fesetround (MSVCR120.@)
5972  */
5973 int CDECL fesetround(int round_mode)
5974 {
5975     if (round_mode & (~_MCW_RC))
5976         return 1;
5977     _controlfp(round_mode, _MCW_RC);
5978     return 0;
5979 }
5980
5981 #endif /* _MSVCR_VER>=120 */
5982
5983 /*********************************************************************
5984  *              _copysign (MSVCRT.@)
5985  *
5986  * Copied from musl: src/math/copysign.c
5987  */
5988 double CDECL _copysign( double x, double y )
5989 {
5990     union { double f; UINT64 i; } ux = { x }, uy = { y };
5991     ux.i &= ~0ull >> 1;
5992     ux.i |= uy.i & 1ull << 63;
5993     return ux.f;
5994 }
5995
5996 /*********************************************************************
5997  *              _finite (MSVCRT.@)
5998  */
5999 int CDECL _finite(double num)
6000 {
6001     union { double f; UINT64 i; } u = { num };
6002     return (u.i & ~0ull >> 1) < 0x7ffull << 52;
6003 }
6004
6005 /*********************************************************************
6006  *              _fpreset (MSVCRT.@)
6007  */
6008 void CDECL _fpreset(void)
6009 {
6010 #if (defined(__GNUC__) || defined(__clang__)) && defined(__i386__)
6011     const unsigned int x86_cw = 0x27f;
6012     __asm__ __volatile__( "fninit; fldcw %0" : : "m" (x86_cw) );
6013     if (sse2_supported)
6014     {
6015         unsigned int cw = _MCW_EM, sw = 0;
6016         _setfp_sse(&cw, ~0, &sw, ~0);
6017     }
6018 #else
6019     unsigned int cw = _MCW_EM, sw = 0;
6020     _setfp(&cw, ~0, &sw, ~0);
6021 #endif
6022 }
6023
6024 #if _MSVCR_VER>=120
6025 /*********************************************************************
6026  *              fesetenv (MSVCR120.@)
6027  */
6028 int CDECL fesetenv(const fenv_t *env)
6029 {
6030     unsigned int x87_cw, cw, x87_stat, stat;
6031     unsigned int mask;
6032
6033     TRACE( "(%p)\n", env );
6034
6035     if (!env->_Fe_ctl && !env->_Fe_stat) {
6036         _fpreset();
6037         return 0;
6038     }
6039
6040     if (!fenv_decode(env->_Fe_ctl, &x87_cw, &cw))
6041         return 1;
6042     if (!fenv_decode(env->_Fe_stat, &x87_stat, &stat))
6043         return 1;
6044
6045 #if _MSVCR_VER >= 140
6046     mask = ~0;
6047 #else
6048     mask = _EM_INEXACT | _EM_UNDERFLOW | _EM_OVERFLOW
6049         | _EM_ZERODIVIDE | _EM_INVALID | _MCW_RC;
6050 #endif
6051
6052 #ifdef __i386__
6053     _setfp(&x87_cw, mask, &x87_stat, ~0);
6054     if (sse2_supported)
6055         _setfp_sse(&cw, mask, &stat, ~0);
6056     return 0;
6057 #else
6058     _setfp(&cw, mask, &stat, ~0);
6059     return 0;
6060 #endif
6061 }
6062 #endif
6063
6064 /*********************************************************************
6065  *              _isnan (MSVCRT.@)
6066  */
6067 int CDECL _isnan(double num)
6068 {
6069     union { double f; UINT64 i; } u = { num };
6070     return (u.i & ~0ull >> 1) > 0x7ffull << 52;
6071 }
6072
6073 static double pzero(double x)
6074 {
6075     static const double pR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6076         0.00000000000000000000e+00,
6077         -7.03124999999900357484e-02,
6078         -8.08167041275349795626e+00,
6079         -2.57063105679704847262e+02,
6080         -2.48521641009428822144e+03,
6081         -5.25304380490729545272e+03,
6082     }, pS8[5] = {
6083         1.16534364619668181717e+02,
6084         3.83374475364121826715e+03,
6085         4.05978572648472545552e+04,
6086         1.16752972564375915681e+05,
6087         4.76277284146730962675e+04,
6088     }, pR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6089         -1.14125464691894502584e-11,
6090         -7.03124940873599280078e-02,
6091         -4.15961064470587782438e+00,
6092         -6.76747652265167261021e+01,
6093         -3.31231299649172967747e+02,
6094         -3.46433388365604912451e+02,
6095     }, pS5[5] = {
6096         6.07539382692300335975e+01,
6097         1.05125230595704579173e+03,
6098         5.97897094333855784498e+03,
6099         9.62544514357774460223e+03,
6100         2.40605815922939109441e+03,
6101     }, pR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6102         -2.54704601771951915620e-09,
6103         -7.03119616381481654654e-02,
6104         -2.40903221549529611423e+00,
6105         -2.19659774734883086467e+01,
6106         -5.80791704701737572236e+01,
6107         -3.14479470594888503854e+01,
6108     }, pS3[5] = {
6109         3.58560338055209726349e+01,
6110         3.61513983050303863820e+02,
6111         1.19360783792111533330e+03,
6112         1.12799679856907414432e+03,
6113         1.73580930813335754692e+02,
6114     }, pR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6115         -8.87534333032526411254e-08,
6116         -7.03030995483624743247e-02,
6117         -1.45073846780952986357e+00,
6118         -7.63569613823527770791e+00,
6119         -1.11931668860356747786e+01,
6120         -3.23364579351335335033e+00,
6121     }, pS2[5] = {
6122         2.22202997532088808441e+01,
6123         1.36206794218215208048e+02,
6124         2.70470278658083486789e+02,
6125         1.53875394208320329881e+02,
6126         1.46576176948256193810e+01,
6127     };
6128
6129     const double *p, *q;
6130     double z, r, s;
6131     UINT32 ix;
6132
6133     ix = *(ULONGLONG*)&x >> 32;
6134     ix &= 0x7fffffff;
6135     if (ix >= 0x40200000) {
6136         p = pR8;
6137         q = pS8;
6138     } else if (ix >= 0x40122E8B) {
6139         p = pR5;
6140         q = pS5;
6141     } else if (ix >= 0x4006DB6D) {
6142         p = pR3;
6143         q = pS3;
6144     } else /*ix >= 0x40000000*/ {
6145         p = pR2;
6146         q = pS2;
6147     }
6148
6149     z = 1.0 / (x * x);
6150     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6151     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6152     return 1.0 + r / s;
6153 }
6154
6155 static double qzero(double x)
6156 {
6157     static const double qR8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6158         0.00000000000000000000e+00,
6159         7.32421874999935051953e-02,
6160         1.17682064682252693899e+01,
6161         5.57673380256401856059e+02,
6162         8.85919720756468632317e+03,
6163         3.70146267776887834771e+04,
6164     }, qS8[6] = {
6165         1.63776026895689824414e+02,
6166         8.09834494656449805916e+03,
6167         1.42538291419120476348e+05,
6168         8.03309257119514397345e+05,
6169         8.40501579819060512818e+05,
6170         -3.43899293537866615225e+05,
6171     }, qR5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6172         1.84085963594515531381e-11,
6173         7.32421766612684765896e-02,
6174         5.83563508962056953777e+00,
6175         1.35111577286449829671e+02,
6176         1.02724376596164097464e+03,
6177         1.98997785864605384631e+03,
6178     }, qS5[6] = {
6179         8.27766102236537761883e+01,
6180         2.07781416421392987104e+03,
6181         1.88472887785718085070e+04,
6182         5.67511122894947329769e+04,
6183         3.59767538425114471465e+04,
6184         -5.35434275601944773371e+03,
6185     }, qR3[6] = {/* for x in [4.547,2.8571]=1/[0.2199,0.35001] */
6186         4.37741014089738620906e-09,
6187         7.32411180042911447163e-02,
6188         3.34423137516170720929e+00,
6189         4.26218440745412650017e+01,
6190         1.70808091340565596283e+02,
6191         1.66733948696651168575e+02,
6192     }, qS3[6] = {
6193         4.87588729724587182091e+01,
6194         7.09689221056606015736e+02,
6195         3.70414822620111362994e+03,
6196         6.46042516752568917582e+03,
6197         2.51633368920368957333e+03,
6198         -1.49247451836156386662e+02,
6199     }, qR2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
6200         1.50444444886983272379e-07,
6201         7.32234265963079278272e-02,
6202         1.99819174093815998816e+00,
6203         1.44956029347885735348e+01,
6204         3.16662317504781540833e+01,
6205         1.62527075710929267416e+01,
6206     }, qS2[6] = {
6207         3.03655848355219184498e+01,
6208         2.69348118608049844624e+02,
6209         8.44783757595320139444e+02,
6210         8.82935845112488550512e+02,
6211         2.12666388511798828631e+02,
6212         -5.31095493882666946917e+00,
6213     };
6214
6215     const double *p, *q;
6216     double s, r, z;
6217     unsigned int ix;
6218
6219     ix = *(ULONGLONG*)&x >> 32;
6220     ix &= 0x7fffffff;
6221     if (ix >= 0x40200000) {
6222         p = qR8;
6223         q = qS8;
6224     } else if (ix >= 0x40122E8B) {
6225         p = qR5;
6226         q = qS5;
6227     } else if (ix >= 0x4006DB6D) {
6228         p = qR3;
6229         q = qS3;
6230     } else /*ix >= 0x40000000*/ {
6231         p = qR2;
6232         q = qS2;
6233     }
6234
6235     z = 1.0 / (x * x);
6236     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6237     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6238     return (-0.125 + r / s) / x;
6239 }
6240
6241 /* j0 and y0 approximation for |x|>=2 */
6242 static double j0_y0_approx(unsigned int ix, double x, BOOL y0)
6243 {
6244     static const double invsqrtpi = 5.64189583547756279280e-01;
6245
6246     double s, c, ss, cc, z;
6247
6248     s = sin(x);
6249     c = cos(x);
6250     if (y0) c = -c;
6251     cc = s + c;
6252     /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */
6253     if (ix < 0x7fe00000) {
6254         ss = s - c;
6255         z = -cos(2 * x);
6256         if (s * c < 0) cc = z / ss;
6257         else ss = z / cc;
6258         if (ix < 0x48000000) {
6259             if (y0) ss = -ss;
6260             cc = pzero(x) * cc - qzero(x) * ss;
6261         }
6262     }
6263     return invsqrtpi * cc / sqrt(x);
6264 }
6265
6266 /*********************************************************************
6267  *              _j0 (MSVCRT.@)
6268  *
6269  * Copied from musl: src/math/j0.c
6270  */
6271 double CDECL _j0(double x)
6272 {
6273     static const double R02 =  1.56249999999999947958e-02,
6274             R03 = -1.89979294238854721751e-04,
6275             R04 =  1.82954049532700665670e-06,
6276             R05 = -4.61832688532103189199e-09,
6277             S01 =  1.56191029464890010492e-02,
6278             S02 =  1.16926784663337450260e-04,
6279             S03 =  5.13546550207318111446e-07,
6280             S04 =  1.16614003333790000205e-09;
6281
6282     double z, r, s;
6283     unsigned int ix;
6284
6285     ix = *(ULONGLONG*)&x >> 32;
6286     ix &= 0x7fffffff;
6287
6288     /* j0(+-inf)=0, j0(nan)=nan */
6289     if (ix >= 0x7ff00000)
6290         return math_error(_DOMAIN, "_j0", x, 0, 1 / (x * x));
6291     x = fabs(x);
6292
6293     if (ix >= 0x40000000) {  /* |x| >= 2 */
6294         /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */
6295         return j0_y0_approx(ix, x, FALSE);
6296     }
6297
6298     if (ix >= 0x3f200000) {  /* |x| >= 2**-13 */
6299         /* up to 4ulp error close to 2 */
6300         z = x * x;
6301         r = z * (R02 + z * (R03 + z * (R04 + z * R05)));
6302         s = 1 + z * (S01 + z * (S02 + z * (S03 + z * S04)));
6303         return (1 + x / 2) * (1 - x / 2) + z * (r / s);
6304     }
6305
6306     /* 1 - x*x/4 */
6307     /* prevent underflow */
6308     /* inexact should be raised when x!=0, this is not done correctly */
6309     if (ix >= 0x38000000)  /* |x| >= 2**-127 */
6310         x = 0.25 * x * x;
6311     return 1 - x;
6312 }
6313
6314 static double pone(double x)
6315 {
6316     static const double pr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6317         0.00000000000000000000e+00,
6318         1.17187499999988647970e-01,
6319         1.32394806593073575129e+01,
6320         4.12051854307378562225e+02,
6321         3.87474538913960532227e+03,
6322         7.91447954031891731574e+03,
6323     }, ps8[5] = {
6324         1.14207370375678408436e+02,
6325         3.65093083420853463394e+03,
6326         3.69562060269033463555e+04,
6327         9.76027935934950801311e+04,
6328         3.08042720627888811578e+04,
6329     }, pr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6330         1.31990519556243522749e-11,
6331         1.17187493190614097638e-01,
6332         6.80275127868432871736e+00,
6333         1.08308182990189109773e+02,
6334         5.17636139533199752805e+02,
6335         5.28715201363337541807e+02,
6336     }, ps5[5] = {
6337         5.92805987221131331921e+01,
6338         9.91401418733614377743e+02,
6339         5.35326695291487976647e+03,
6340         7.84469031749551231769e+03,
6341         1.50404688810361062679e+03,
6342     }, pr3[6] = {
6343         3.02503916137373618024e-09,
6344         1.17186865567253592491e-01,
6345         3.93297750033315640650e+00,
6346         3.51194035591636932736e+01,
6347         9.10550110750781271918e+01,
6348         4.85590685197364919645e+01,
6349     }, ps3[5] = {
6350         3.47913095001251519989e+01,
6351         3.36762458747825746741e+02,
6352         1.04687139975775130551e+03,
6353         8.90811346398256432622e+02,
6354         1.03787932439639277504e+02,
6355     }, pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6356         1.07710830106873743082e-07,
6357         1.17176219462683348094e-01,
6358         2.36851496667608785174e+00,
6359         1.22426109148261232917e+01,
6360         1.76939711271687727390e+01,
6361         5.07352312588818499250e+00,
6362     }, ps2[5] = {
6363         2.14364859363821409488e+01,
6364         1.25290227168402751090e+02,
6365         2.32276469057162813669e+02,
6366         1.17679373287147100768e+02,
6367         8.36463893371618283368e+00,
6368     };
6369
6370     const double *p, *q;
6371     double z, r, s;
6372     unsigned int ix;
6373
6374     ix = *(ULONGLONG*)&x >> 32;
6375     ix &= 0x7fffffff;
6376     if (ix >= 0x40200000) {
6377         p = pr8;
6378         q = ps8;
6379     } else if (ix >= 0x40122E8B) {
6380         p = pr5;
6381         q = ps5;
6382     } else if (ix >= 0x4006DB6D) {
6383         p = pr3;
6384         q = ps3;
6385     } else /*ix >= 0x40000000*/ {
6386         p = pr2;
6387         q = ps2;
6388     }
6389     z = 1.0 / (x * x);
6390     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6391     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4]))));
6392     return 1.0 + r / s;
6393 }
6394
6395 static double qone(double x)
6396 {
6397     static const double qr8[6] = { /* for x in [inf, 8]=1/[0,0.125] */
6398         0.00000000000000000000e+00,
6399         -1.02539062499992714161e-01,
6400         -1.62717534544589987888e+01,
6401         -7.59601722513950107896e+02,
6402         -1.18498066702429587167e+04,
6403         -4.84385124285750353010e+04,
6404     }, qs8[6] = {
6405         1.61395369700722909556e+02,
6406         7.82538599923348465381e+03,
6407         1.33875336287249578163e+05,
6408         7.19657723683240939863e+05,
6409         6.66601232617776375264e+05,
6410         -2.94490264303834643215e+05,
6411     }, qr5[6] = { /* for x in [8,4.5454]=1/[0.125,0.22001] */
6412         -2.08979931141764104297e-11,
6413         -1.02539050241375426231e-01,
6414         -8.05644828123936029840e+00,
6415         -1.83669607474888380239e+02,
6416         -1.37319376065508163265e+03,
6417         -2.61244440453215656817e+03,
6418     }, qs5[6] = {
6419         8.12765501384335777857e+01,
6420         1.99179873460485964642e+03,
6421         1.74684851924908907677e+04,
6422         4.98514270910352279316e+04,
6423         2.79480751638918118260e+04,
6424         -4.71918354795128470869e+03,
6425     }, qr3[6] = {
6426         -5.07831226461766561369e-09,
6427         -1.02537829820837089745e-01,
6428         -4.61011581139473403113e+00,
6429         -5.78472216562783643212e+01,
6430         -2.28244540737631695038e+02,
6431         -2.19210128478909325622e+02,
6432     }, qs3[6] = {
6433         4.76651550323729509273e+01,
6434         6.73865112676699709482e+02,
6435         3.38015286679526343505e+03,
6436         5.54772909720722782367e+03,
6437         1.90311919338810798763e+03,
6438         -1.35201191444307340817e+02,
6439     }, qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
6440         -1.78381727510958865572e-07,
6441         -1.02517042607985553460e-01,
6442         -2.75220568278187460720e+00,
6443         -1.96636162643703720221e+01,
6444         -4.23253133372830490089e+01,
6445         -2.13719211703704061733e+01,
6446     }, qs2[6] = {
6447         2.95333629060523854548e+01,
6448         2.52981549982190529136e+02,
6449         7.57502834868645436472e+02,
6450         7.39393205320467245656e+02,
6451         1.55949003336666123687e+02,
6452         -4.95949898822628210127e+00,
6453     };
6454
6455     const double *p, *q;
6456     double s, r, z;
6457     unsigned int ix;
6458
6459     ix = *(ULONGLONG*)&x >> 32;
6460     ix &= 0x7fffffff;
6461     if (ix >= 0x40200000) {
6462         p = qr8;
6463         q = qs8;
6464     } else if (ix >= 0x40122E8B) {
6465         p = qr5;
6466         q = qs5;
6467     } else if (ix >= 0x4006DB6D) {
6468         p = qr3;
6469         q = qs3;
6470     } else /*ix >= 0x40000000*/ {
6471         p = qr2;
6472         q = qs2;
6473     }
6474     z = 1.0 / (x * x);
6475     r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5]))));
6476     s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5])))));
6477     return (0.375 + r / s) / x;
6478 }
6479
6480 static double j1_y1_approx(unsigned int ix, double x, BOOL y1, int sign)
6481 {
6482     static const double invsqrtpi = 5.64189583547756279280e-01;
6483
6484     double z, s, c, ss, cc;
6485
6486     s = sin(x);
6487     if (y1) s = -s;
6488     c = cos(x);
6489     cc = s - c;
6490     if (ix < 0x7fe00000) {
6491         ss = -s - c;
6492         z = cos(2 * x);
6493         if (s * c > 0) cc = z / ss;
6494         else ss = z / cc;
6495         if (ix < 0x48000000) {
6496             if (y1)
6497                 ss = -ss;
6498             cc = pone(x) * cc - qone(x) * ss;
6499         }
6500     }
6501     if (sign)
6502         cc = -cc;
6503     return invsqrtpi * cc / sqrt(x);
6504 }
6505
6506 /*********************************************************************
6507  *              _j1 (MSVCRT.@)
6508  *
6509  * Copied from musl: src/math/j1.c
6510  */
6511 double CDECL _j1(double x)
6512 {
6513     static const double r00 = -6.25000000000000000000e-02,
6514         r01 =  1.40705666955189706048e-03,
6515         r02 = -1.59955631084035597520e-05,
6516         r03 =  4.96727999609584448412e-08,
6517         s01 =  1.91537599538363460805e-02,
6518         s02 =  1.85946785588630915560e-04,
6519         s03 =  1.17718464042623683263e-06,
6520         s04 =  5.04636257076217042715e-09,
6521         s05 =  1.23542274426137913908e-11;
6522
6523     double z, r, s;
6524     unsigned int ix;
6525     int sign;
6526
6527     ix = *(ULONGLONG*)&x >> 32;
6528     sign = ix >> 31;
6529     ix &= 0x7fffffff;
6530     if (ix >= 0x7ff00000)
6531         return math_error(isnan(x) ? 0 : _DOMAIN, "_j1", x, 0, 1 / (x * x));
6532     if (ix >= 0x40000000)  /* |x| >= 2 */
6533         return j1_y1_approx(ix, fabs(x), FALSE, sign);
6534     if (ix >= 0x38000000) {  /* |x| >= 2**-127 */
6535         z = x * x;
6536         r = z * (r00 + z * (r01 + z * (r02 + z * r03)));
6537         s = 1 + z * (s01 + z * (s02 + z * (s03 + z * (s04 + z * s05))));
6538         z = r / s;
6539     } else {
6540         /* avoid underflow, raise inexact if x!=0 */
6541         z = x;
6542     }
6543     return (0.5 + z) * x;
6544 }
6545
6546 /*********************************************************************
6547  *              _jn (MSVCRT.@)
6548  *
6549  * Copied from musl: src/math/jn.c
6550  */
6551 double CDECL _jn(int n, double x)
6552 {
6553     static const double invsqrtpi = 5.64189583547756279280e-01;
6554
6555     unsigned int ix, lx;
6556     int nm1, i, sign;
6557     double a, b, temp;
6558
6559     ix = *(ULONGLONG*)&x >> 32;
6560     lx = *(ULONGLONG*)&x;
6561     sign = ix >> 31;
6562     ix &= 0x7fffffff;
6563
6564     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6565         return x;
6566
6567     if (n == 0)
6568         return _j0(x);
6569     if (n < 0) {
6570         nm1 = -(n + 1);
6571         x = -x;
6572         sign ^= 1;
6573     } else {
6574         nm1 = n-1;
6575     }
6576     if (nm1 == 0)
6577         return j1(x);
6578
6579     sign &= n;  /* even n: 0, odd n: signbit(x) */
6580     x = fabs(x);
6581     if ((ix | lx) == 0 || ix == 0x7ff00000)  /* if x is 0 or inf */
6582         b = 0.0;
6583     else if (nm1 < x) {
6584         if (ix >= 0x52d00000) { /* x > 2**302 */
6585             switch(nm1 & 3) {
6586             case 0:
6587                 temp = -cos(x) + sin(x);
6588                 break;
6589             case 1:
6590                 temp = -cos(x) - sin(x);
6591                 break;
6592             case 2:
6593                 temp =  cos(x) - sin(x);
6594                 break;
6595             default:
6596                 temp =  cos(x) + sin(x);
6597                 break;
6598             }
6599             b = invsqrtpi * temp / sqrt(x);
6600         } else {
6601             a = _j0(x);
6602             b = _j1(x);
6603             for (i = 0; i < nm1; ) {
6604                 i++;
6605                 temp = b;
6606                 b = b * (2.0 * i / x) - a; /* avoid underflow */
6607                 a = temp;
6608             }
6609         }
6610     } else {
6611         if (ix < 0x3e100000) { /* x < 2**-29 */
6612             if (nm1 > 32)  /* underflow */
6613                 b = 0.0;
6614             else {
6615                 temp = x * 0.5;
6616                 b = temp;
6617                 a = 1.0;
6618                 for (i = 2; i <= nm1 + 1; i++) {
6619                     a *= (double)i; /* a = n! */
6620                     b *= temp;      /* b = (x/2)^n */
6621                 }
6622                 b = b / a;
6623             }
6624         } else {
6625             double t, q0, q1, w, h, z, tmp, nf;
6626             int k;
6627
6628             nf = nm1 + 1.0;
6629             w = 2 * nf / x;
6630             h = 2 / x;
6631             z = w + h;
6632             q0 = w;
6633             q1 = w * z - 1.0;
6634             k = 1;
6635             while (q1 < 1.0e9) {
6636                 k += 1;
6637                 z += h;
6638                 tmp = z * q1 - q0;
6639                 q0 = q1;
6640                 q1 = tmp;
6641             }
6642             for (t = 0.0, i = k; i >= 0; i--)
6643                 t = 1 / (2 * (i + nf) / x - t);
6644             a = t;
6645             b = 1.0;
6646             tmp = nf * log(fabs(w));
6647             if (tmp < 7.09782712893383973096e+02) {
6648                 for (i = nm1; i > 0; i--) {
6649                     temp = b;
6650                     b = b * (2.0 * i) / x - a;
6651                     a = temp;
6652                 }
6653             } else {
6654                 for (i = nm1; i > 0; i--) {
6655                     temp = b;
6656                     b = b * (2.0 * i) / x - a;
6657                     a = temp;
6658                     /* scale b to avoid spurious overflow */
6659                     if (b > 0x1p500) {
6660                         a /= b;
6661                         t /= b;
6662                         b  = 1.0;
6663                     }
6664                 }
6665             }
6666             z = j0(x);
6667             w = j1(x);
6668             if (fabs(z) >= fabs(w))
6669                 b = t * z / b;
6670             else
6671                 b = t * w / a;
6672         }
6673     }
6674     return sign ? -b : b;
6675 }
6676
6677 /*********************************************************************
6678  *              _y0 (MSVCRT.@)
6679  */
6680 double CDECL _y0(double x)
6681 {
6682     static const double tpi = 6.36619772367581382433e-01,
6683         u00  = -7.38042951086872317523e-02,
6684         u01  =  1.76666452509181115538e-01,
6685         u02  = -1.38185671945596898896e-02,
6686         u03  =  3.47453432093683650238e-04,
6687         u04  = -3.81407053724364161125e-06,
6688         u05  =  1.95590137035022920206e-08,
6689         u06  = -3.98205194132103398453e-11,
6690         v01  =  1.27304834834123699328e-02,
6691         v02  =  7.60068627350353253702e-05,
6692         v03  =  2.59150851840457805467e-07,
6693         v04  =  4.41110311332675467403e-10;
6694
6695     double z, u, v;
6696     unsigned int ix, lx;
6697
6698     ix = *(ULONGLONG*)&x >> 32;
6699     lx = *(ULONGLONG*)&x;
6700
6701     /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */
6702     if ((ix << 1 | lx) == 0)
6703         return math_error(_OVERFLOW, "_y0", x, 0, -INFINITY);
6704     if (isnan(x))
6705         return x;
6706     if (ix >> 31)
6707         return math_error(_DOMAIN, "_y0", x, 0, 0 / (x - x));
6708     if (ix >= 0x7ff00000)
6709         return 1 / x;
6710
6711     if (ix >= 0x40000000) {  /* x >= 2 */
6712         /* large ulp errors near zeros: 3.958, 7.086,.. */
6713         return j0_y0_approx(ix, x, TRUE);
6714     }
6715
6716     if (ix >= 0x3e400000) {  /* x >= 2**-27 */
6717         /* large ulp error near the first zero, x ~= 0.89 */
6718         z = x * x;
6719         u = u00 + z * (u01 + z * (u02 + z * (u03 + z * (u04 + z * (u05 + z * u06)))));
6720         v = 1.0 + z * (v01 + z * (v02 + z * (v03 + z * v04)));
6721         return u / v + tpi * (j0(x) * log(x));
6722     }
6723     return u00 + tpi * log(x);
6724 }
6725
6726 /*********************************************************************
6727  *              _y1 (MSVCRT.@)
6728  */
6729 double CDECL _y1(double x)
6730 {
6731     static const double tpi = 6.36619772367581382433e-01,
6732         u00 =  -1.96057090646238940668e-01,
6733         u01 = 5.04438716639811282616e-02,
6734         u02 = -1.91256895875763547298e-03,
6735         u03 = 2.35252600561610495928e-05,
6736         u04 = -9.19099158039878874504e-08,
6737         v00 = 1.99167318236649903973e-02,
6738         v01 = 2.02552581025135171496e-04,
6739         v02 = 1.35608801097516229404e-06,
6740         v03 = 6.22741452364621501295e-09,
6741         v04 = 1.66559246207992079114e-11;
6742
6743     double z, u, v;
6744     unsigned int ix, lx;
6745
6746     ix = *(ULONGLONG*)&x >> 32;
6747     lx = *(ULONGLONG*)&x;
6748
6749     /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
6750     if ((ix << 1 | lx) == 0)
6751         return math_error(_OVERFLOW, "_y1", x, 0, -INFINITY);
6752     if (isnan(x))
6753         return x;
6754     if (ix >> 31)
6755         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6756     if (ix >= 0x7ff00000)
6757         return 1 / x;
6758
6759     if (ix >= 0x40000000)  /* x >= 2 */
6760         return j1_y1_approx(ix, x, TRUE, 0);
6761     if (ix < 0x3c900000)  /* x < 2**-54 */
6762         return -tpi / x;
6763     z = x * x;
6764     u = u00 + z * (u01 + z * (u02 + z * (u03 + z * u04)));
6765     v = 1 + z * (v00 + z * (v01 + z * (v02 + z * (v03 + z * v04))));
6766     return x * (u / v) + tpi * (j1(x) * log(x) - 1 / x);
6767 }
6768
6769 /*********************************************************************
6770  *              _yn (MSVCRT.@)
6771  *
6772  * Copied from musl: src/math/jn.c
6773  */
6774 double CDECL _yn(int n, double x)
6775 {
6776     static const double invsqrtpi = 5.64189583547756279280e-01;
6777
6778     unsigned int ix, lx, ib;
6779     int nm1, sign, i;
6780     double a, b, temp;
6781
6782     ix = *(ULONGLONG*)&x >> 32;
6783     lx = *(ULONGLONG*)&x;
6784     sign = ix >> 31;
6785     ix &= 0x7fffffff;
6786
6787     if ((ix | (lx | -lx) >> 31) > 0x7ff00000) /* nan */
6788         return x;
6789     if (sign && (ix | lx) != 0) /* x < 0 */
6790         return math_error(_DOMAIN, "_y1", x, 0, 0 / (x - x));
6791     if (ix == 0x7ff00000)
6792         return 0.0;
6793
6794     if (n == 0)
6795         return y0(x);
6796     if (n < 0) {
6797         nm1 = -(n + 1);
6798         sign = n & 1;
6799     } else {
6800         nm1 = n - 1;
6801         sign = 0;
6802     }
6803     if (nm1 == 0)
6804         return sign ? -y1(x) : y1(x);
6805
6806     if (ix >= 0x52d00000) { /* x > 2**302 */
6807         switch(nm1 & 3) {
6808         case 0:
6809             temp = -sin(x) - cos(x);
6810             break;
6811         case 1:
6812             temp = -sin(x) + cos(x);
6813             break;
6814         case 2:
6815             temp = sin(x) + cos(x);
6816             break;
6817         default:
6818             temp = sin(x) - cos(x);
6819             break;
6820         }
6821         b = invsqrtpi * temp / sqrt(x);
6822     } else {
6823         a = y0(x);
6824         b = y1(x);
6825         /* quit if b is -inf */
6826         ib = *(ULONGLONG*)&b >> 32;
6827         for (i = 0; i < nm1 && ib != 0xfff00000;) {
6828             i++;
6829             temp = b;
6830             b = (2.0 * i / x) * b - a;
6831             ib = *(ULONGLONG*)&b >> 32;
6832             a = temp;
6833         }
6834     }
6835     return sign ? -b : b;
6836 }
6837
6838 #if _MSVCR_VER>=120
6839
6840 /*********************************************************************
6841  *              _nearbyint (MSVCR120.@)
6842  *
6843  * Based on musl: src/math/nearbyteint.c
6844  */
6845 double CDECL nearbyint(double x)
6846 {
6847     BOOL update_cw, update_sw;
6848     unsigned int cw, sw;
6849
6850     _setfp(&cw, 0, &sw, 0);
6851     update_cw = !(cw & _EM_INEXACT);
6852     update_sw = !(sw & _SW_INEXACT);
6853     if (update_cw)
6854     {
6855         cw |= _EM_INEXACT;
6856         _setfp(&cw, _EM_INEXACT, NULL, 0);
6857     }
6858     x = rint(x);
6859     if (update_cw || update_sw)
6860     {
6861         sw = 0;
6862         cw &= ~_EM_INEXACT;
6863         _setfp(update_cw ? &cw : NULL, _EM_INEXACT,
6864                 update_sw ? &sw : NULL, _SW_INEXACT);
6865     }
6866     return x;
6867 }
6868
6869 /*********************************************************************
6870  *              _nearbyintf (MSVCR120.@)
6871  *
6872  * Based on musl: src/math/nearbyteintf.c
6873  */
6874 float CDECL nearbyintf(float x)
6875 {
6876     BOOL update_cw, update_sw;
6877     unsigned int cw, sw;
6878
6879     _setfp(&cw, 0, &sw, 0);
6880     update_cw = !(cw & _EM_INEXACT);
6881     update_sw = !(sw & _SW_INEXACT);
6882     if (update_cw)
6883     {
6884         cw |= _EM_INEXACT;
6885         _setfp(&cw, _EM_INEXACT, NULL, 0);
6886     }
6887     x = rintf(x);
6888     if (update_cw || update_sw)
6889     {
6890         sw = 0;
6891         cw &= ~_EM_INEXACT;
6892         _setfp(update_cw ? &cw : NULL, _EM_INEXACT,
6893                 update_sw ? &sw : NULL, _SW_INEXACT);
6894     }
6895     return x;
6896 }
6897
6898 /*********************************************************************
6899  *              nexttoward (MSVCR120.@)
6900  */
6901 double CDECL MSVCRT_nexttoward(double num, double next)
6902 {
6903     return _nextafter(num, next);
6904 }
6905
6906 /*********************************************************************
6907  *              nexttowardf (MSVCR120.@)
6908  *
6909  * Copied from musl: src/math/nexttowardf.c
6910  */
6911 float CDECL MSVCRT_nexttowardf(float x, double y)
6912 {
6913     unsigned int ix = *(unsigned int*)&x;
6914     unsigned int e;
6915     float ret;
6916
6917     if (isnan(x) || isnan(y))
6918         return x + y;
6919     if (x == y)
6920         return y;
6921     if (x == 0) {
6922         ix = 1;
6923         if (signbit(y))
6924             ix |= 0x80000000;
6925     } else if (x < y) {
6926         if (signbit(x))
6927             ix--;
6928         else
6929             ix++;
6930     } else {
6931         if (signbit(x))
6932             ix++;
6933         else
6934             ix--;
6935     }
6936     e = ix & 0x7f800000;
6937     /* raise overflow if ix is infinite and x is finite */
6938     if (e == 0x7f800000) {
6939         fp_barrierf(x + x);
6940         *_errno() = ERANGE;
6941     }
6942     ret = *(float*)&ix;
6943     /* raise underflow if ret is subnormal or zero */
6944     if (e == 0) {
6945         fp_barrierf(x * x + ret * ret);
6946         *_errno() = ERANGE;
6947     }
6948     return ret;
6949 }
6950
6951 #endif /* _MSVCR_VER>=120 */
6952
6953 /*********************************************************************
6954  *              _nextafter (MSVCRT.@)
6955  *
6956  * Copied from musl: src/math/nextafter.c
6957  */
6958 double CDECL _nextafter(double x, double y)
6959 {
6960     ULONGLONG llx = *(ULONGLONG*)&x;
6961     ULONGLONG lly = *(ULONGLONG*)&y;
6962     ULONGLONG ax, ay;
6963     int e;
6964
6965     if (isnan(x) || isnan(y))
6966         return x + y;
6967     if (llx == lly) {
6968         if (_fpclass(y) & (_FPCLASS_ND | _FPCLASS_PD | _FPCLASS_NZ | _FPCLASS_PZ ))
6969             *_errno() = ERANGE;
6970         return y;
6971     }
6972     ax = llx & -1ULL / 2;
6973     ay = lly & -1ULL / 2;
6974     if (ax == 0) {
6975         if (ay == 0)
6976             return y;
6977         llx = (lly & 1ULL << 63) | 1;
6978     } else if (ax > ay || ((llx ^ lly) & 1ULL << 63))
6979         llx--;
6980     else
6981         llx++;
6982     e = llx >> 52 & 0x7ff;
6983     /* raise overflow if llx is infinite and x is finite */
6984     if (e == 0x7ff) {
6985         fp_barrier(x + x);
6986         *_errno() = ERANGE;
6987     }
6988     /* raise underflow if llx is subnormal or zero */
6989     y = *(double*)&llx;
6990     if (e == 0) {
6991         fp_barrier(x * x + y * y);
6992         *_errno() = ERANGE;
6993     }
6994     return y;
6995 }
6996
6997 /*********************************************************************
6998  *              _ecvt (MSVCRT.@)
6999  */
7000 char * CDECL _ecvt( double number, int ndigits, int *decpt, int *sign )
7001 {
7002     int prec, len;
7003     thread_data_t *data = msvcrt_get_thread_data();
7004     /* FIXME: check better for overflow (native supports over 300 chars) */
7005     ndigits = min( ndigits, 80 - 8); /* 8 : space for sign, dec point, "e",
7006                                       * 4 for exponent and one for
7007                                       * terminating '\0' */
7008     if (!data->efcvt_buffer)
7009         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
7010
7011     /* handle cases with zero ndigits or less */
7012     prec = ndigits;
7013     if( prec < 1) prec = 2;
7014     len = _snprintf(data->efcvt_buffer, 80, "%.*le", prec - 1, number);
7015
7016     if (data->efcvt_buffer[0] == '-') {
7017         memmove( data->efcvt_buffer, data->efcvt_buffer + 1, len-- );
7018         *sign = 1;
7019     } else *sign = 0;
7020
7021     /* take the decimal "point away */
7022     if( prec != 1)
7023         memmove( data->efcvt_buffer + 1, data->efcvt_buffer + 2, len - 1 );
7024     /* take the exponential "e" out */
7025     data->efcvt_buffer[ prec] = '\0';
7026     /* read the exponent */
7027     sscanf( data->efcvt_buffer + prec + 1, "%d", decpt);
7028     (*decpt)++;
7029     /* adjust for some border cases */
7030     if( data->efcvt_buffer[0] == '0')/* value is zero */
7031         *decpt = 0;
7032     /* handle cases with zero ndigits or less */
7033     if( ndigits < 1){
7034         if( data->efcvt_buffer[ 0] >= '5')
7035             (*decpt)++;
7036         data->efcvt_buffer[ 0] = '\0';
7037     }
7038     TRACE("out=\"%s\"\n",data->efcvt_buffer);
7039     return data->efcvt_buffer;
7040 }
7041
7042 /*********************************************************************
7043  *              _ecvt_s (MSVCRT.@)
7044  */
7045 int CDECL _ecvt_s( char *buffer, size_t length, double number, int ndigits, int *decpt, int *sign )
7046 {
7047     int prec, len;
7048     char *result;
7049
7050     if (!MSVCRT_CHECK_PMT(buffer != NULL)) return EINVAL;
7051     if (!MSVCRT_CHECK_PMT(decpt != NULL)) return EINVAL;
7052     if (!MSVCRT_CHECK_PMT(sign != NULL)) return EINVAL;
7053     if (!MSVCRT_CHECK_PMT_ERR( length > 2, ERANGE )) return ERANGE;
7054     if (!MSVCRT_CHECK_PMT_ERR(ndigits < (int)length - 1, ERANGE )) return ERANGE;
7055
7056     /* handle cases with zero ndigits or less */
7057     prec = ndigits;
7058     if( prec < 1) prec = 2;
7059     result = malloc(prec + 8);
7060
7061     len = _snprintf(result, prec + 8, "%.*le", prec - 1, number);
7062     if (result[0] == '-') {
7063         memmove( result, result + 1, len-- );
7064         *sign = 1;
7065     } else *sign = 0;
7066
7067     /* take the decimal "point away */
7068     if( prec != 1)
7069         memmove( result + 1, result + 2, len - 1 );
7070     /* take the exponential "e" out */
7071     result[ prec] = '\0';
7072     /* read the exponent */
7073     sscanf( result + prec + 1, "%d", decpt);
7074     (*decpt)++;
7075     /* adjust for some border cases */
7076     if( result[0] == '0')/* value is zero */
7077         *decpt = 0;
7078     /* handle cases with zero ndigits or less */
7079     if( ndigits < 1){
7080         if( result[ 0] >= '5')
7081             (*decpt)++;
7082         result[ 0] = '\0';
7083     }
7084     memcpy( buffer, result, max(ndigits + 1, 1) );
7085     free( result );
7086     return 0;
7087 }
7088
7089 /***********************************************************************
7090  *              _fcvt  (MSVCRT.@)
7091  */
7092 char * CDECL _fcvt( double number, int ndigits, int *decpt, int *sign )
7093 {
7094     thread_data_t *data = msvcrt_get_thread_data();
7095     int stop, dec1, dec2;
7096     char *ptr1, *ptr2, *first;
7097     char buf[80]; /* ought to be enough */
7098     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7099
7100     if (!data->efcvt_buffer)
7101         data->efcvt_buffer = malloc( 80 ); /* ought to be enough */
7102
7103     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7104     ptr1 = buf;
7105     ptr2 = data->efcvt_buffer;
7106     first = NULL;
7107     dec1 = 0;
7108     dec2 = 0;
7109
7110     if (*ptr1 == '-') {
7111         *sign = 1;
7112         ptr1++;
7113     } else *sign = 0;
7114
7115     /* For numbers below the requested resolution, work out where
7116        the decimal point will be rather than finding it in the string */
7117     if (number < 1.0 && number > 0.0) {
7118         dec2 = log10(number + 1e-10);
7119         if (-dec2 <= ndigits) dec2 = 0;
7120     }
7121
7122     /* If requested digits is zero or less, we will need to truncate
7123      * the returned string */
7124     if (ndigits < 1) {
7125         stop += ndigits;
7126     }
7127
7128     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7129     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7130         if (!first) first = ptr2;
7131         if ((ptr1 - buf) < stop) {
7132             *ptr2++ = *ptr1++;
7133         } else {
7134             ptr1++;
7135         }
7136         dec1++;
7137     }
7138
7139     if (ndigits > 0) {
7140         ptr1++;
7141         if (!first) {
7142             while (*ptr1 == '0') { /* Process leading zeroes */
7143                 *ptr2++ = *ptr1++;
7144                 dec1--;
7145             }
7146         }
7147         while (*ptr1 != '\0') {
7148             if (!first) first = ptr2;
7149             *ptr2++ = *ptr1++;
7150         }
7151     }
7152
7153     *ptr2 = '\0';
7154
7155     /* We never found a non-zero digit, then our number is either
7156      * smaller than the requested precision, or 0.0 */
7157     if (!first) {
7158         if (number > 0.0) {
7159             first = ptr2;
7160         } else {
7161             first = data->efcvt_buffer;
7162             dec1 = 0;
7163         }
7164     }
7165
7166     *decpt = dec2 ? dec2 : dec1;
7167     return first;
7168 }
7169
7170 /***********************************************************************
7171  *              _fcvt_s  (MSVCRT.@)
7172  */
7173 int CDECL _fcvt_s(char* outbuffer, size_t size, double number, int ndigits, int *decpt, int *sign)
7174 {
7175     int stop, dec1, dec2;
7176     char *ptr1, *ptr2, *first;
7177     char buf[80]; /* ought to be enough */
7178     char decimal_separator = get_locinfo()->lconv->decimal_point[0];
7179
7180     if (!outbuffer || !decpt || !sign || size == 0)
7181     {
7182         *_errno() = EINVAL;
7183         return EINVAL;
7184     }
7185
7186     stop = _snprintf(buf, 80, "%.*f", ndigits < 0 ? 0 : ndigits, number);
7187     ptr1 = buf;
7188     ptr2 = outbuffer;
7189     first = NULL;
7190     dec1 = 0;
7191     dec2 = 0;
7192
7193     if (*ptr1 == '-') {
7194         *sign = 1;
7195         ptr1++;
7196     } else *sign = 0;
7197
7198     /* For numbers below the requested resolution, work out where
7199        the decimal point will be rather than finding it in the string */
7200     if (number < 1.0 && number > 0.0) {
7201         dec2 = log10(number + 1e-10);
7202         if (-dec2 <= ndigits) dec2 = 0;
7203     }
7204
7205     /* If requested digits is zero or less, we will need to truncate
7206      * the returned string */
7207     if (ndigits < 1) {
7208         stop += ndigits;
7209     }
7210
7211     while (*ptr1 == '0') ptr1++; /* Skip leading zeroes */
7212     while (*ptr1 != '\0' && *ptr1 != decimal_separator) {
7213         if (!first) first = ptr2;
7214         if ((ptr1 - buf) < stop) {
7215             if (size > 1) {
7216                 *ptr2++ = *ptr1++;
7217                 size--;
7218             }
7219         } else {
7220             ptr1++;
7221         }
7222         dec1++;
7223     }
7224
7225     if (ndigits > 0) {
7226         ptr1++;
7227         if (!first) {
7228             while (*ptr1 == '0') { /* Process leading zeroes */
7229                 if (number == 0.0 && size > 1) {
7230                     *ptr2++ = '0';
7231                     size--;
7232                 }
7233                 ptr1++;
7234                 dec1--;
7235             }
7236         }
7237         while (*ptr1 != '\0') {
7238             if (!first) first = ptr2;
7239             if (size > 1) {
7240                 *ptr2++ = *ptr1++;
7241                 size--;
7242             }
7243         }
7244     }
7245
7246     *ptr2 = '\0';
7247
7248     /* We never found a non-zero digit, then our number is either
7249      * smaller than the requested precision, or 0.0 */
7250     if (!first && (number <= 0.0))
7251         dec1 = 0;
7252
7253     *decpt = dec2 ? dec2 : dec1;
7254     return 0;
7255 }
7256
7257 /***********************************************************************
7258  *              _gcvt  (MSVCRT.@)
7259  */
7260 char * CDECL _gcvt( double number, int ndigit, char *buff )
7261 {
7262     if(!buff) {
7263         *_errno() = EINVAL;
7264         return NULL;
7265     }
7266
7267     if(ndigit < 0) {
7268         *_errno() = ERANGE;
7269         return NULL;
7270     }
7271
7272     sprintf(buff, "%.*g", ndigit, number);
7273     return buff;
7274 }
7275
7276 /***********************************************************************
7277  *              _gcvt_s  (MSVCRT.@)
7278  */
7279 int CDECL _gcvt_s(char *buff, size_t size, double number, int digits)
7280 {
7281     int len;
7282
7283     if(!buff) {
7284         *_errno() = EINVAL;
7285         return EINVAL;
7286     }
7287
7288     if( digits<0 || digits>=size) {
7289         if(size)
7290             buff[0] = '\0';
7291
7292         *_errno() = ERANGE;
7293         return ERANGE;
7294     }
7295
7296     len = _scprintf("%.*g", digits, number);
7297     if(len > size) {
7298         buff[0] = '\0';
7299         *_errno() = ERANGE;
7300         return ERANGE;
7301     }
7302
7303     sprintf(buff, "%.*g", digits, number);
7304     return 0;
7305 }
7306
7307 #include <stdlib.h> /* div_t, ldiv_t */
7308
7309 /*********************************************************************
7310  *              div (MSVCRT.@)
7311  * VERSION
7312  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7313  */
7314 #ifdef __i386__
7315 unsigned __int64 CDECL div(int num, int denom)
7316 {
7317     union {
7318         div_t div;
7319         unsigned __int64 uint64;
7320     } ret;
7321
7322     ret.div.quot = num / denom;
7323     ret.div.rem = num % denom;
7324     return ret.uint64;
7325 }
7326 #else
7327 /*********************************************************************
7328  *              div (MSVCRT.@)
7329  * VERSION
7330  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7331  */
7332 div_t CDECL div(int num, int denom)
7333 {
7334     div_t ret;
7335
7336     ret.quot = num / denom;
7337     ret.rem = num % denom;
7338     return ret;
7339 }
7340 #endif /* ifdef __i386__ */
7341
7342
7343 /*********************************************************************
7344  *              ldiv (MSVCRT.@)
7345  * VERSION
7346  *      [i386] Windows binary compatible - returns the struct in eax/edx.
7347  */
7348 #ifdef __i386__
7349 unsigned __int64 CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7350 {
7351     union {
7352         ldiv_t ldiv;
7353         unsigned __int64 uint64;
7354     } ret;
7355
7356     ret.ldiv.quot = num / denom;
7357     ret.ldiv.rem = num % denom;
7358     return ret.uint64;
7359 }
7360 #else
7361 /*********************************************************************
7362  *              ldiv (MSVCRT.@)
7363  * VERSION
7364  *      [!i386] Non-x86 can't run win32 apps so we don't need binary compatibility
7365  */
7366 ldiv_t CDECL ldiv(__msvcrt_long num, __msvcrt_long denom)
7367 {
7368     ldiv_t ret;
7369
7370     ret.quot = num / denom;
7371     ret.rem = num % denom;
7372     return ret;
7373 }
7374 #endif /* ifdef __i386__ */
7375
7376 #if _MSVCR_VER>=100
7377 /*********************************************************************
7378  *              lldiv (MSVCR100.@)
7379  */
7380 lldiv_t CDECL lldiv(__int64 num, __int64 denom)
7381 {
7382   lldiv_t ret;
7383
7384   ret.quot = num / denom;
7385   ret.rem = num % denom;
7386
7387   return ret;
7388 }
7389 #endif
7390
7391 #ifdef __i386__
7392
7393 /*********************************************************************
7394  *              _adjust_fdiv (MSVCRT.@)
7395  * Used by the MSVC compiler to work around the Pentium FDIV bug.
7396  */
7397 int MSVCRT__adjust_fdiv = 0;
7398
7399 /***********************************************************************
7400  *              _adj_fdiv_m16i (MSVCRT.@)
7401  *
7402  * NOTE
7403  *    I _think_ this function is intended to work around the Pentium
7404  *    fdiv bug.
7405  */
7406 void __stdcall _adj_fdiv_m16i( short arg )
7407 {
7408   TRACE("(): stub\n");
7409 }
7410
7411 /***********************************************************************
7412  *              _adj_fdiv_m32 (MSVCRT.@)
7413  *
7414  * NOTE
7415  *    I _think_ this function is intended to work around the Pentium
7416  *    fdiv bug.
7417  */
7418 void __stdcall _adj_fdiv_m32( unsigned int arg )
7419 {
7420   TRACE("(): stub\n");
7421 }
7422
7423 /***********************************************************************
7424  *              _adj_fdiv_m32i (MSVCRT.@)
7425  *
7426  * NOTE
7427  *    I _think_ this function is intended to work around the Pentium
7428  *    fdiv bug.
7429  */
7430 void __stdcall _adj_fdiv_m32i( int arg )
7431 {
7432   TRACE("(): stub\n");
7433 }
7434
7435 /***********************************************************************
7436  *              _adj_fdiv_m64 (MSVCRT.@)
7437  *
7438  * NOTE
7439  *    I _think_ this function is intended to work around the Pentium
7440  *    fdiv bug.
7441  */
7442 void __stdcall _adj_fdiv_m64( unsigned __int64 arg )
7443 {
7444   TRACE("(): stub\n");
7445 }
7446
7447 /***********************************************************************
7448  *              _adj_fdiv_r (MSVCRT.@)
7449  * FIXME
7450  *    This function is likely to have the wrong number of arguments.
7451  *
7452  * NOTE
7453  *    I _think_ this function is intended to work around the Pentium
7454  *    fdiv bug.
7455  */
7456 void _adj_fdiv_r(void)
7457 {
7458   TRACE("(): stub\n");
7459 }
7460
7461 /***********************************************************************
7462  *              _adj_fdivr_m16i (MSVCRT.@)
7463  *
7464  * NOTE
7465  *    I _think_ this function is intended to work around the Pentium
7466  *    fdiv bug.
7467  */
7468 void __stdcall _adj_fdivr_m16i( short arg )
7469 {
7470   TRACE("(): stub\n");
7471 }
7472
7473 /***********************************************************************
7474  *              _adj_fdivr_m32 (MSVCRT.@)
7475  *
7476  * NOTE
7477  *    I _think_ this function is intended to work around the Pentium
7478  *    fdiv bug.
7479  */
7480 void __stdcall _adj_fdivr_m32( unsigned int arg )
7481 {
7482   TRACE("(): stub\n");
7483 }
7484
7485 /***********************************************************************
7486  *              _adj_fdivr_m32i (MSVCRT.@)
7487  *
7488  * NOTE
7489  *    I _think_ this function is intended to work around the Pentium
7490  *    fdiv bug.
7491  */
7492 void __stdcall _adj_fdivr_m32i( int arg )
7493 {
7494   TRACE("(): stub\n");
7495 }
7496
7497 /***********************************************************************
7498  *              _adj_fdivr_m64 (MSVCRT.@)
7499  *
7500  * NOTE
7501  *    I _think_ this function is intended to work around the Pentium
7502  *    fdiv bug.
7503  */
7504 void __stdcall _adj_fdivr_m64( unsigned __int64 arg )
7505 {
7506   TRACE("(): stub\n");
7507 }
7508
7509 /***********************************************************************
7510  *              _adj_fpatan (MSVCRT.@)
7511  * FIXME
7512  *    This function is likely to have the wrong number of arguments.
7513  *
7514  * NOTE
7515  *    I _think_ this function is intended to work around the Pentium
7516  *    fdiv bug.
7517  */
7518 void _adj_fpatan(void)
7519 {
7520   TRACE("(): stub\n");
7521 }
7522
7523 /***********************************************************************
7524  *              _adj_fprem (MSVCRT.@)
7525  * FIXME
7526  *    This function is likely to have the wrong number of arguments.
7527  *
7528  * NOTE
7529  *    I _think_ this function is intended to work around the Pentium
7530  *    fdiv bug.
7531  */
7532 void _adj_fprem(void)
7533 {
7534   TRACE("(): stub\n");
7535 }
7536
7537 /***********************************************************************
7538  *              _adj_fprem1 (MSVCRT.@)
7539  * FIXME
7540  *    This function is likely to have the wrong number of arguments.
7541  *
7542  * NOTE
7543  *    I _think_ this function is intended to work around the Pentium
7544  *    fdiv bug.
7545  */
7546 void _adj_fprem1(void)
7547 {
7548   TRACE("(): stub\n");
7549 }
7550
7551 /***********************************************************************
7552  *              _adj_fptan (MSVCRT.@)
7553  * FIXME
7554  *    This function is likely to have the wrong number of arguments.
7555  *
7556  * NOTE
7557  *    I _think_ this function is intended to work around the Pentium
7558  *    fdiv bug.
7559  */
7560 void _adj_fptan(void)
7561 {
7562   TRACE("(): stub\n");
7563 }
7564
7565 /***********************************************************************
7566  *              _safe_fdiv (MSVCRT.@)
7567  * FIXME
7568  *    This function is likely to have the wrong number of arguments.
7569  *
7570  * NOTE
7571  *    I _think_ this function is intended to work around the Pentium
7572  *    fdiv bug.
7573  */
7574 void _safe_fdiv(void)
7575 {
7576   TRACE("(): stub\n");
7577 }
7578
7579 /***********************************************************************
7580  *              _safe_fdivr (MSVCRT.@)
7581  * FIXME
7582  *    This function is likely to have the wrong number of arguments.
7583  *
7584  * NOTE
7585  *    I _think_ this function is intended to work around the Pentium
7586  *    fdiv bug.
7587  */
7588 void _safe_fdivr(void)
7589 {
7590   TRACE("(): stub\n");
7591 }
7592
7593 /***********************************************************************
7594  *              _safe_fprem (MSVCRT.@)
7595  * FIXME
7596  *    This function is likely to have the wrong number of arguments.
7597  *
7598  * NOTE
7599  *    I _think_ this function is intended to work around the Pentium
7600  *    fdiv bug.
7601  */
7602 void _safe_fprem(void)
7603 {
7604   TRACE("(): stub\n");
7605 }
7606
7607 /***********************************************************************
7608  *              _safe_fprem1 (MSVCRT.@)
7609  *
7610  * FIXME
7611  *    This function is likely to have the wrong number of arguments.
7612  *
7613  * NOTE
7614  *    I _think_ this function is intended to work around the Pentium
7615  *    fdiv bug.
7616  */
7617 void _safe_fprem1(void)
7618 {
7619   TRACE("(): stub\n");
7620 }
7621
7622 /***********************************************************************
7623  *              __libm_sse2_acos   (MSVCRT.@)
7624  */
7625 void __cdecl __libm_sse2_acos(void)
7626 {
7627     double d;
7628     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7629     d = acos( d );
7630     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7631 }
7632
7633 /***********************************************************************
7634  *              __libm_sse2_acosf   (MSVCRT.@)
7635  */
7636 void __cdecl __libm_sse2_acosf(void)
7637 {
7638     float f;
7639     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7640     f = acosf( f );
7641     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7642 }
7643
7644 /***********************************************************************
7645  *              __libm_sse2_asin   (MSVCRT.@)
7646  */
7647 void __cdecl __libm_sse2_asin(void)
7648 {
7649     double d;
7650     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7651     d = asin( d );
7652     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7653 }
7654
7655 /***********************************************************************
7656  *              __libm_sse2_asinf   (MSVCRT.@)
7657  */
7658 void __cdecl __libm_sse2_asinf(void)
7659 {
7660     float f;
7661     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7662     f = asinf( f );
7663     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7664 }
7665
7666 /***********************************************************************
7667  *              __libm_sse2_atan   (MSVCRT.@)
7668  */
7669 void __cdecl __libm_sse2_atan(void)
7670 {
7671     double d;
7672     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7673     d = atan( d );
7674     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7675 }
7676
7677 /***********************************************************************
7678  *              __libm_sse2_atan2   (MSVCRT.@)
7679  */
7680 void __cdecl __libm_sse2_atan2(void)
7681 {
7682     double d1, d2;
7683     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7684     d1 = atan2( d1, d2 );
7685     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7686 }
7687
7688 /***********************************************************************
7689  *              __libm_sse2_atanf   (MSVCRT.@)
7690  */
7691 void __cdecl __libm_sse2_atanf(void)
7692 {
7693     float f;
7694     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7695     f = atanf( f );
7696     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7697 }
7698
7699 /***********************************************************************
7700  *              __libm_sse2_cos   (MSVCRT.@)
7701  */
7702 void __cdecl __libm_sse2_cos(void)
7703 {
7704     double d;
7705     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7706     d = cos( d );
7707     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7708 }
7709
7710 /***********************************************************************
7711  *              __libm_sse2_cosf   (MSVCRT.@)
7712  */
7713 void __cdecl __libm_sse2_cosf(void)
7714 {
7715     float f;
7716     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7717     f = cosf( f );
7718     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7719 }
7720
7721 /***********************************************************************
7722  *              __libm_sse2_exp   (MSVCRT.@)
7723  */
7724 void __cdecl __libm_sse2_exp(void)
7725 {
7726     double d;
7727     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7728     d = exp( d );
7729     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7730 }
7731
7732 /***********************************************************************
7733  *              __libm_sse2_expf   (MSVCRT.@)
7734  */
7735 void __cdecl __libm_sse2_expf(void)
7736 {
7737     float f;
7738     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7739     f = expf( f );
7740     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7741 }
7742
7743 /***********************************************************************
7744  *              __libm_sse2_log   (MSVCRT.@)
7745  */
7746 void __cdecl __libm_sse2_log(void)
7747 {
7748     double d;
7749     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7750     d = log( d );
7751     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7752 }
7753
7754 /***********************************************************************
7755  *              __libm_sse2_log10   (MSVCRT.@)
7756  */
7757 void __cdecl __libm_sse2_log10(void)
7758 {
7759     double d;
7760     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7761     d = log10( d );
7762     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7763 }
7764
7765 /***********************************************************************
7766  *              __libm_sse2_log10f   (MSVCRT.@)
7767  */
7768 void __cdecl __libm_sse2_log10f(void)
7769 {
7770     float f;
7771     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7772     f = log10f( f );
7773     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7774 }
7775
7776 /***********************************************************************
7777  *              __libm_sse2_logf   (MSVCRT.@)
7778  */
7779 void __cdecl __libm_sse2_logf(void)
7780 {
7781     float f;
7782     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7783     f = logf( f );
7784     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7785 }
7786
7787 /***********************************************************************
7788  *              __libm_sse2_pow   (MSVCRT.@)
7789  */
7790 void __cdecl __libm_sse2_pow(void)
7791 {
7792     double d1, d2;
7793     __asm__ __volatile__( "movq %%xmm0,%0; movq %%xmm1,%1 " : "=m" (d1), "=m" (d2) );
7794     d1 = pow( d1, d2 );
7795     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d1) );
7796 }
7797
7798 /***********************************************************************
7799  *              __libm_sse2_powf   (MSVCRT.@)
7800  */
7801 void __cdecl __libm_sse2_powf(void)
7802 {
7803     float f1, f2;
7804     __asm__ __volatile__( "movd %%xmm0,%0; movd %%xmm1,%1" : "=g" (f1), "=g" (f2) );
7805     f1 = powf( f1, f2 );
7806     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f1) );
7807 }
7808
7809 /***********************************************************************
7810  *              __libm_sse2_sin   (MSVCRT.@)
7811  */
7812 void __cdecl __libm_sse2_sin(void)
7813 {
7814     double d;
7815     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7816     d = sin( d );
7817     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7818 }
7819
7820 /***********************************************************************
7821  *              __libm_sse2_sinf   (MSVCRT.@)
7822  */
7823 void __cdecl __libm_sse2_sinf(void)
7824 {
7825     float f;
7826     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7827     f = sinf( f );
7828     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7829 }
7830
7831 /***********************************************************************
7832  *              __libm_sse2_tan   (MSVCRT.@)
7833  */
7834 void __cdecl __libm_sse2_tan(void)
7835 {
7836     double d;
7837     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7838     d = tan( d );
7839     __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7840 }
7841
7842 /***********************************************************************
7843  *              __libm_sse2_tanf   (MSVCRT.@)
7844  */
7845 void __cdecl __libm_sse2_tanf(void)
7846 {
7847     float f;
7848     __asm__ __volatile__( "movd %%xmm0,%0" : "=g" (f) );
7849     f = tanf( f );
7850     __asm__ __volatile__( "movd %0,%%xmm0" : : "g" (f) );
7851 }
7852
7853 /***********************************************************************
7854  *              __libm_sse2_sqrt_precise   (MSVCR110.@)
7855  */
7856 void __cdecl __libm_sse2_sqrt_precise(void)
7857 {
7858     unsigned int cw;
7859     double d;
7860
7861     __asm__ __volatile__( "movq %%xmm0,%0" : "=m" (d) );
7862     __control87_2(0, 0, NULL, &cw);
7863     if (cw & _MCW_RC)
7864     {
7865         d = sqrt(d);
7866         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7867         return;
7868     }
7869
7870     if (!sqrt_validate(&d, FALSE))
7871     {
7872         __asm__ __volatile__( "movq %0,%%xmm0" : : "m" (d) );
7873         return;
7874     }
7875     __asm__ __volatile__( "call " __ASM_NAME( "sse2_sqrt" ) );
7876 }
7877 #endif  /* __i386__ */
7878
7879 #if _MSVCR_VER>=120
7880
7881 /*********************************************************************
7882  *      cbrt (MSVCR120.@)
7883  *
7884  * Copied from musl: src/math/cbrt.c
7885  */
7886 double CDECL cbrt(double x)
7887 {
7888     static const UINT32 B1 = 715094163, B2 = 696219795;
7889     static const double P0 =  1.87595182427177009643,
7890                  P1 = -1.88497979543377169875,
7891                  P2 =  1.621429720105354466140,
7892                  P3 = -0.758397934778766047437,
7893                  P4 =  0.145996192886612446982;
7894
7895     union {double f; UINT64 i;} u = {x};
7896     double r,s,t,w;
7897     UINT32 hx = u.i >> 32 & 0x7fffffff;
7898
7899     if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
7900         return x + x;
7901
7902     if (hx < 0x00100000) { /* zero or subnormal? */
7903         u.f = x * 0x1p54;
7904         hx = u.i>>32 & 0x7fffffff;
7905         if (hx == 0)
7906             return x;
7907         hx = hx / 3 + B2;
7908     } else
7909         hx = hx / 3 + B1;
7910     u.i &= 1ULL << 63;
7911     u.i |= (UINT64)hx << 32;
7912     t = u.f;
7913
7914     r = (t * t) * (t / x);
7915     t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
7916
7917     u.f = t;
7918     u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
7919     t = u.f;
7920
7921     s = t * t;
7922     r = x / s;
7923     w = t + t;
7924     r = (r - t) / (w + r);
7925     t = t + t * r;
7926     return t;
7927 }
7928
7929 /*********************************************************************
7930  *      cbrtf (MSVCR120.@)
7931  *
7932  * Copied from musl: src/math/cbrtf.c
7933  */
7934 float CDECL cbrtf(float x)
7935 {
7936     static const unsigned B1 = 709958130, B2 = 642849266;
7937
7938     double r,T;
7939     union {float f; UINT32 i;} u = {x};
7940     UINT32 hx = u.i & 0x7fffffff;
7941
7942     if (hx >= 0x7f800000)
7943         return x + x;
7944
7945     if (hx < 0x00800000) {  /* zero or subnormal? */
7946         if (hx == 0)
7947             return x;
7948         u.f = x * 0x1p24f;
7949         hx = u.i & 0x7fffffff;
7950         hx = hx / 3 + B2;
7951     } else
7952         hx = hx / 3 + B1;
7953     u.i &= 0x80000000;
7954     u.i |= hx;
7955
7956     T = u.f;
7957     r = T * T * T;
7958     T = T * (x + x + r) / (x + r + r);
7959
7960     r = T * T * T;
7961     T = T * (x + x + r) / (x + r + r);
7962     return T;
7963 }
7964
7965 /*********************************************************************
7966  *      exp2 (MSVCR120.@)
7967  *
7968  * Copied from musl: src/math/exp2.c
7969  */
7970 double CDECL exp2(double x)
7971 {
7972     static const double C[] = {
7973         0x1.62e42fefa39efp-1,
7974         0x1.ebfbdff82c424p-3,
7975         0x1.c6b08d70cf4b5p-5,
7976         0x1.3b2abd24650ccp-7,
7977         0x1.5d7e09b4e3a84p-10
7978     };
7979
7980     UINT32 abstop;
7981     UINT64 ki, idx, top, sbits;
7982     double kd, r, r2, scale, tail, tmp;
7983
7984     abstop = (*(UINT64*)&x >> 52) & 0x7ff;
7985     if (abstop - 0x3c9 >= 0x408 - 0x3c9) {
7986         if (abstop - 0x3c9 >= 0x80000000) {
7987             /* Avoid spurious underflow for tiny x. */
7988             /* Note: 0 is common input. */
7989             return 1.0 + x;
7990         }
7991         if (abstop >= 409) {
7992             if (*(UINT64*)&x == 0xfff0000000000000ull)
7993                 return 0.0;
7994             if (abstop >= 0x7ff)
7995                 return 1.0 + x;
7996             if (!(*(UINT64*)&x >> 63)) {
7997                 *_errno() = ERANGE;
7998                 return fp_barrier(DBL_MAX) * DBL_MAX;
7999             }
8000             else if (x <= -2147483648.0) {
8001                 fp_barrier(x + 0x1p120f);
8002                 return 0;
8003             }
8004             else if (*(UINT64*)&x >= 0xc090cc0000000000ull) {
8005                 *_errno() = ERANGE;
8006                 fp_barrier(x + 0x1p120f);
8007                 return 0;
8008             }
8009         }
8010         if (2 * *(UINT64*)&x > 2 * 0x408d000000000000ull)
8011             /* Large x is special cased below. */
8012             abstop = 0;
8013     }
8014
8015     /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
8016     /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
8017     kd = fp_barrier(x + 0x1.8p52 / (1 << 7));
8018     ki = *(UINT64*)&kd; /* k. */
8019     kd -= 0x1.8p52 / (1 << 7); /* k/N for int k. */
8020     r = x - kd;
8021     /* 2^(k/N) ~= scale * (1 + tail). */
8022     idx = 2 * (ki % (1 << 7));
8023     top = ki << (52 - 7);
8024     tail = *(double*)&exp_T[idx];
8025     /* This is only a valid scale when -1023*N < k < 1024*N. */
8026     sbits = exp_T[idx + 1] + top;
8027     /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
8028     /* Evaluation is optimized assuming superscalar pipelined execution. */
8029     r2 = r * r;
8030     /* Without fma the worst case error is 0.5/N ulp larger. */
8031     /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
8032     tmp = tail + r * C[0] + r2 * (C[1] + r * C[2]) + r2 * r2 * (C[3] + r * C[4]);
8033     if (abstop == 0)
8034     {
8035         /* Handle cases that may overflow or underflow when computing the result that
8036            is scale*(1+TMP) without intermediate rounding. The bit representation of
8037            scale is in SBITS, however it has a computed exponent that may have
8038            overflown into the sign bit so that needs to be adjusted before using it as
8039            a double. (int32_t)KI is the k used in the argument reduction and exponent
8040            adjustment of scale, positive k here means the result may overflow and
8041            negative k means the result may underflow. */
8042         double scale, y;
8043
8044         if ((ki & 0x80000000) == 0) {
8045             /* k > 0, the exponent of scale might have overflowed by 1. */
8046             sbits -= 1ull << 52;
8047             scale = *(double*)&sbits;
8048             y = 2 * (scale + scale * tmp);
8049             return y;
8050         }
8051         /* k < 0, need special care in the subnormal range. */
8052         sbits += 1022ull << 52;
8053         scale = *(double*)&sbits;
8054         y = scale + scale * tmp;
8055         if (y < 1.0) {
8056             /* Round y to the right precision before scaling it into the subnormal
8057                range to avoid double rounding that can cause 0.5+E/2 ulp error where
8058                E is the worst-case ulp error outside the subnormal range. So this
8059                is only useful if the goal is better than 1 ulp worst-case error. */
8060             double hi, lo;
8061             lo = scale - y + scale * tmp;
8062             hi = 1.0 + y;
8063             lo = 1.0 - hi + y + lo;
8064             y = hi + lo - 1.0;
8065             /* Avoid -0.0 with downward rounding. */
8066             if (y == 0.0)
8067                 y = 0.0;
8068             /* The underflow exception needs to be signaled explicitly. */
8069             fp_barrier(fp_barrier(0x1p-1022) * 0x1p-1022);
8070         }
8071         y = 0x1p-1022 * y;
8072         return y;
8073     }
8074     scale = *(double*)&sbits;
8075     /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
8076        is no spurious underflow here even without fma. */
8077     return scale + scale * tmp;
8078 }
8079
8080 /*********************************************************************
8081  *      exp2f (MSVCR120.@)
8082  *
8083  * Copied from musl: src/math/exp2f.c
8084  */
8085 float CDECL exp2f(float x)
8086 {
8087     static const double C[] = {
8088         0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1
8089     };
8090     static const double shift = 0x1.8p+52 / (1 << 5);
8091
8092     double kd, xd, z, r, r2, y, s;
8093     UINT32 abstop;
8094     UINT64 ki, t;
8095
8096     xd = x;
8097     abstop = (*(UINT32*)&x >> 20) & 0x7ff;
8098     if (abstop >= 0x430) {
8099         /* |x| >= 128 or x is nan.  */
8100         if (*(UINT32*)&x == 0xff800000)
8101             return 0.0f;
8102         if (abstop >= 0x7f8)
8103             return x + x;
8104         if (x > 0.0f) {
8105             *_errno() = ERANGE;
8106             return fp_barrierf(x * FLT_MAX);
8107         }
8108         if (x <= -150.0f) {
8109             fp_barrierf(x - 0x1p120);
8110             return 0;
8111         }
8112     }
8113
8114     /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k, N = 1 << 5. */
8115     kd = xd + shift;
8116     ki = *(UINT64*)&kd;
8117     kd -= shift; /* k/(1<<5) for int k.  */
8118     r = xd - kd;
8119
8120     /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
8121     t = exp2f_T[ki % (1 << 5)];
8122     t += ki << (52 - 5);
8123     s = *(double*)&t;
8124     z = C[0] * r + C[1];
8125     r2 = r * r;
8126     y = C[2] * r + 1;
8127     y = z * r2 + y;
8128     y = y * s;
8129     return y;
8130 }
8131
8132 /*********************************************************************
8133  *      expm1 (MSVCR120.@)
8134  */
8135 double CDECL expm1(double x)
8136 {
8137     return __expm1(x);
8138 }
8139
8140 /*********************************************************************
8141  *      expm1f (MSVCR120.@)
8142  */
8143 float CDECL expm1f(float x)
8144 {
8145     return __expm1f(x);
8146 }
8147
8148 /*********************************************************************
8149  *      log1p (MSVCR120.@)
8150  *
8151  * Copied from musl: src/math/log1p.c
8152  */
8153 double CDECL log1p(double x)
8154 {
8155     static const double ln2_hi = 6.93147180369123816490e-01,
8156         ln2_lo = 1.90821492927058770002e-10,
8157         Lg1 = 6.666666666666735130e-01,
8158         Lg2 = 3.999999999940941908e-01,
8159         Lg3 = 2.857142874366239149e-01,
8160         Lg4 = 2.222219843214978396e-01,
8161         Lg5 = 1.818357216161805012e-01,
8162         Lg6 = 1.531383769920937332e-01,
8163         Lg7 = 1.479819860511658591e-01;
8164
8165     union {double f; UINT64 i;} u = {x};
8166     double hfsq, f, c, s, z, R, w, t1, t2, dk;
8167     UINT32 hx, hu;
8168     int k;
8169
8170     hx = u.i >> 32;
8171     k = 1;
8172     if (hx < 0x3fda827a || hx >> 31) { /* 1+x < sqrt(2)+ */
8173         if (hx >= 0xbff00000) { /* x <= -1.0 */
8174             if (x == -1) {
8175                 *_errno() = ERANGE;
8176                 return x / 0.0; /* og1p(-1) = -inf */
8177             }
8178             *_errno() = EDOM;
8179             return (x-x) / 0.0; /* log1p(x<-1) = NaN */
8180         }
8181         if (hx << 1 < 0x3ca00000 << 1) { /* |x| < 2**-53 */
8182             fp_barrier(x + 0x1p120f);
8183             /* underflow if subnormal */
8184             if ((hx & 0x7ff00000) == 0)
8185                 fp_barrierf(x);
8186             return x;
8187         }
8188         if (hx <= 0xbfd2bec4) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8189             k = 0;
8190             c = 0;
8191             f = x;
8192         }
8193     } else if (hx >= 0x7ff00000)
8194         return x;
8195     if (k) {
8196         u.f = 1 + x;
8197         hu = u.i >> 32;
8198         hu += 0x3ff00000 - 0x3fe6a09e;
8199         k = (int)(hu >> 20) - 0x3ff;
8200         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8201         if (k < 54) {
8202             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8203             c /= u.f;
8204         } else
8205             c = 0;
8206         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8207         hu = (hu & 0x000fffff) + 0x3fe6a09e;
8208         u.i = (UINT64)hu << 32 | (u.i & 0xffffffff);
8209         f = u.f - 1;
8210     }
8211     hfsq = 0.5 * f * f;
8212     s = f / (2.0 + f);
8213     z = s * s;
8214     w = z * z;
8215     t1 = w * (Lg2 + w * (Lg4 + w * Lg6));
8216     t2 = z * (Lg1 + w * (Lg3 + w * (Lg5 + w * Lg7)));
8217     R = t2 + t1;
8218     dk = k;
8219     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8220 }
8221
8222 /*********************************************************************
8223  *      log1pf (MSVCR120.@)
8224  *
8225  * Copied from musl: src/math/log1pf.c
8226  */
8227 float CDECL log1pf(float x)
8228 {
8229     static const float ln2_hi = 6.9313812256e-01,
8230         ln2_lo = 9.0580006145e-06,
8231         Lg1 = 0xaaaaaa.0p-24,
8232         Lg2 = 0xccce13.0p-25,
8233         Lg3 = 0x91e9ee.0p-25,
8234         Lg4 = 0xf89e26.0p-26;
8235
8236     union {float f; UINT32 i;} u = {x};
8237     float hfsq, f, c, s, z, R, w, t1, t2, dk;
8238     UINT32 ix, iu;
8239     int k;
8240
8241     ix = u.i;
8242     k = 1;
8243     if (ix < 0x3ed413d0 || ix >> 31) { /* 1+x < sqrt(2)+ */
8244         if (ix >= 0xbf800000) { /* x <= -1.0 */
8245             if (x == -1) {
8246                 *_errno() = ERANGE;
8247                 return x / 0.0f; /* log1p(-1)=+inf */
8248             }
8249             *_errno() = EDOM;
8250             return (x - x) / 0.0f; /* log1p(x<-1)=NaN */
8251         }
8252         if (ix<<1 < 0x33800000<<1) { /* |x| < 2**-24 */
8253             /* underflow if subnormal */
8254             if ((ix & 0x7f800000) == 0)
8255                 fp_barrierf(x * x);
8256             return x;
8257         }
8258         if (ix <= 0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */
8259             k = 0;
8260             c = 0;
8261             f = x;
8262         }
8263     } else if (ix >= 0x7f800000)
8264         return x;
8265     if (k) {
8266         u.f = 1 + x;
8267         iu = u.i;
8268         iu += 0x3f800000 - 0x3f3504f3;
8269         k = (int)(iu >> 23) - 0x7f;
8270         /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
8271         if (k < 25) {
8272             c = k >= 2 ? 1 - (u.f - x) : x - (u.f - 1);
8273             c /= u.f;
8274         } else
8275             c = 0;
8276         /* reduce u into [sqrt(2)/2, sqrt(2)] */
8277         iu = (iu & 0x007fffff) + 0x3f3504f3;
8278         u.i = iu;
8279         f = u.f - 1;
8280     }
8281     s = f / (2.0f + f);
8282     z = s * s;
8283     w = z * z;
8284     t1= w * (Lg2 + w * Lg4);
8285     t2= z * (Lg1 + w * Lg3);
8286     R = t2 + t1;
8287     hfsq = 0.5f * f * f;
8288     dk = k;
8289     return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
8290 }
8291
8292 /*********************************************************************
8293  *      log2 (MSVCR120.@)
8294  *
8295  * Copied from musl: src/math/log2.c
8296  */
8297 double CDECL log2(double x)
8298 {
8299     static const double invln2hi = 0x1.7154765200000p+0,
8300         invln2lo = 0x1.705fc2eefa200p-33;
8301     static const double A[] = {
8302         -0x1.71547652b8339p-1,
8303         0x1.ec709dc3a04bep-2,
8304         -0x1.7154764702ffbp-2,
8305         0x1.2776c50034c48p-2,
8306         -0x1.ec7b328ea92bcp-3,
8307         0x1.a6225e117f92ep-3
8308     };
8309     static const double B[] = {
8310         -0x1.71547652b82fep-1,
8311         0x1.ec709dc3a03f7p-2,
8312         -0x1.71547652b7c3fp-2,
8313         0x1.2776c50f05be4p-2,
8314         -0x1.ec709dd768fe5p-3,
8315         0x1.a61761ec4e736p-3,
8316         -0x1.7153fbc64a79bp-3,
8317         0x1.484d154f01b4ap-3,
8318         -0x1.289e4a72c383cp-3,
8319         0x1.0b32f285aee66p-3
8320     };
8321     static const struct {
8322         double invc, logc;
8323     } T[] = {
8324         {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
8325         {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
8326         {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
8327         {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
8328         {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
8329         {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
8330         {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
8331         {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
8332         {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
8333         {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
8334         {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
8335         {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
8336         {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
8337         {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
8338         {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
8339         {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
8340         {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
8341         {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
8342         {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
8343         {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
8344         {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
8345         {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
8346         {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
8347         {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
8348         {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
8349         {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
8350         {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
8351         {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
8352         {0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
8353         {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
8354         {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
8355         {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
8356         {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
8357         {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
8358         {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
8359         {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
8360         {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
8361         {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
8362         {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
8363         {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
8364         {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
8365         {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
8366         {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
8367         {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
8368         {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
8369         {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
8370         {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
8371         {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
8372         {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
8373         {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
8374         {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
8375         {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
8376         {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
8377         {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
8378         {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
8379         {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
8380         {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
8381         {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
8382         {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
8383         {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
8384         {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
8385         {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
8386         {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
8387         {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}
8388     };
8389     static const struct {
8390         double chi, clo;
8391     } T2[] = {
8392         {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
8393         {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
8394         {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
8395         {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
8396         {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
8397         {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
8398         {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
8399         {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
8400         {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
8401         {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
8402         {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
8403         {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
8404         {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
8405         {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
8406         {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
8407         {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
8408         {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
8409         {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
8410         {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
8411         {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
8412         {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
8413         {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
8414         {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
8415         {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
8416         {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
8417         {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
8418         {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
8419         {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
8420         {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
8421         {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
8422         {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
8423         {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
8424         {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
8425         {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
8426         {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
8427         {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
8428         {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
8429         {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
8430         {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
8431         {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
8432         {0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
8433         {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
8434         {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
8435         {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
8436         {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
8437         {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
8438         {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
8439         {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
8440         {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
8441         {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
8442         {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
8443         {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
8444         {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
8445         {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
8446         {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
8447         {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
8448         {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
8449         {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
8450         {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
8451         {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
8452         {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
8453         {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
8454         {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
8455         {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}
8456     };
8457
8458     double z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p, rhi, rlo;
8459     UINT64 ix, iz, tmp;
8460     UINT32 top;
8461     int k, i;
8462
8463     ix = *(UINT64*)&x;
8464     top = ix >> 48;
8465     if (ix - 0x3feea4af00000000ULL < 0x210aa00000000ULL) {
8466         /* Handle close to 1.0 inputs separately.  */
8467         /* Fix sign of zero with downward rounding when x==1.  */
8468         if (ix == 0x3ff0000000000000ULL)
8469             return 0;
8470         r = x - 1.0;
8471         *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8472         rlo = r - rhi;
8473         hi = rhi * invln2hi;
8474         lo = rlo * invln2hi + r * invln2lo;
8475         r2 = r * r; /* rounding error: 0x1p-62.  */
8476         r4 = r2 * r2;
8477         /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
8478         p = r2 * (B[0] + r * B[1]);
8479         y = hi + p;
8480         lo += hi - y + p;
8481         lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
8482                 r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
8483         y += lo;
8484         return y;
8485     }
8486     if (top - 0x0010 >= 0x7ff0 - 0x0010) {
8487         /* x < 0x1p-1022 or inf or nan.  */
8488         if (ix * 2 == 0) {
8489             *_errno() = ERANGE;
8490             return -1.0 / x;
8491         }
8492         if (ix == 0x7ff0000000000000ULL) /* log(inf) == inf.  */
8493             return x;
8494         if ((top & 0x7ff0) == 0x7ff0 && (ix & 0xfffffffffffffULL))
8495             return x;
8496         if (top & 0x8000) {
8497             *_errno() = EDOM;
8498             return (x - x) / (x - x);
8499         }
8500         /* x is subnormal, normalize it.  */
8501         x *= 0x1p52;
8502         ix = *(UINT64*)&x;
8503         ix -= 52ULL << 52;
8504     }
8505
8506     /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
8507        The range is split into N subintervals.
8508        The ith subinterval contains z and c is near its center.  */
8509     tmp = ix - 0x3fe6000000000000ULL;
8510     i = (tmp >> (52 - 6)) % (1 << 6);
8511     k = (INT64)tmp >> 52; /* arithmetic shift */
8512     iz = ix - (tmp & 0xfffULL << 52);
8513     invc = T[i].invc;
8514     logc = T[i].logc;
8515     z = *(double*)&iz;
8516     kd = k;
8517
8518     /* log2(x) = log2(z/c) + log2(c) + k.  */
8519     /* r ~= z/c - 1, |r| < 1/(2*N).  */
8520     /* rounding error: 0x1p-55/N + 0x1p-65.  */
8521     r = (z - T2[i].chi - T2[i].clo) * invc;
8522     *(UINT64*)&rhi = *(UINT64*)&r & -1ULL << 32;
8523     rlo = r - rhi;
8524     t1 = rhi * invln2hi;
8525     t2 = rlo * invln2hi + r * invln2lo;
8526
8527     /* hi + lo = r/ln2 + log2(c) + k.  */
8528     t3 = kd + logc;
8529     hi = t3 + t1;
8530     lo = t3 - hi + t1 + t2;
8531
8532     /* log2(r+1) = r/ln2 + r^2*poly(r).  */
8533     /* Evaluation is optimized assuming superscalar pipelined execution.  */
8534     r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
8535     r4 = r2 * r2;
8536     /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
8537        ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
8538     p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
8539     y = lo + r2 * p + hi;
8540     return y;
8541 }
8542
8543 /*********************************************************************
8544  *      log2f (MSVCR120.@)
8545  *
8546  * Copied from musl: src/math/log2f.c
8547  */
8548 float CDECL log2f(float x)
8549 {
8550     static const double A[] = {
8551         -0x1.712b6f70a7e4dp-2,
8552         0x1.ecabf496832ep-2,
8553         -0x1.715479ffae3dep-1,
8554         0x1.715475f35c8b8p0
8555     };
8556     static const struct {
8557         double invc, logc;
8558     } T[] = {
8559         { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
8560         { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
8561         { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
8562         { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
8563         { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
8564         { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
8565         { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
8566         { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
8567         { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
8568         { 0x1p+0, 0x0p+0 },
8569         { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
8570         { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
8571         { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
8572         { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
8573         { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
8574         { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }
8575     };
8576
8577     double z, r, r2, p, y, y0, invc, logc;
8578     UINT32 ix, iz, top, tmp;
8579     int k, i;
8580
8581     ix = *(UINT32*)&x;
8582     /* Fix sign of zero with downward rounding when x==1. */
8583     if (ix == 0x3f800000)
8584         return 0;
8585     if (ix - 0x00800000 >= 0x7f800000 - 0x00800000) {
8586         /* x < 0x1p-126 or inf or nan. */
8587         if (ix * 2 == 0) {
8588             *_errno() = ERANGE;
8589             return -1.0f / x;
8590         }
8591         if (ix == 0x7f800000) /* log2(inf) == inf. */
8592             return x;
8593         if (ix * 2 > 0xff000000)
8594             return x;
8595         if (ix & 0x80000000) {
8596             *_errno() = EDOM;
8597             return (x - x) / (x - x);
8598         }
8599         /* x is subnormal, normalize it. */
8600         x *= 0x1p23f;
8601         ix = *(UINT32*)&x;
8602         ix -= 23 << 23;
8603     }
8604
8605     /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
8606        The range is split into N subintervals.
8607        The ith subinterval contains z and c is near its center. */
8608     tmp = ix - 0x3f330000;
8609     i = (tmp >> (23 - 4)) % (1 << 4);
8610     top = tmp & 0xff800000;
8611     iz = ix - top;
8612     k = (INT32)tmp >> 23; /* arithmetic shift */
8613     invc = T[i].invc;
8614     logc = T[i].logc;
8615     z = *(float*)&iz;
8616
8617     /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
8618     r = z * invc - 1;
8619     y0 = logc + (double)k;
8620
8621     /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
8622     r2 = r * r;
8623     y = A[1] * r + A[2];
8624     y = A[0] * r2 + y;
8625     p = A[3] * r + y0;
8626     y = y * r2 + p;
8627     return y;
8628 }
8629
8630 /*********************************************************************
8631  *      rint (MSVCR120.@)
8632  */
8633 double CDECL rint(double x)
8634 {
8635     return __rint(x);
8636 }
8637
8638 /*********************************************************************
8639  *      rintf (MSVCR120.@)
8640  *
8641  * Copied from musl: src/math/rintf.c
8642  */
8643 float CDECL rintf(float x)
8644 {
8645     static const float toint = 1 / FLT_EPSILON;
8646
8647     unsigned int ix = *(unsigned int*)&x;
8648     int e = ix >> 23 & 0xff;
8649     int s = ix >> 31;
8650     float y;
8651
8652     if (e >= 0x7f + 23)
8653         return x;
8654     if (s)
8655         y = fp_barrierf(x - toint) + toint;
8656     else
8657         y = fp_barrierf(x + toint) - toint;
8658     if (y == 0)
8659         return s ? -0.0f : 0.0f;
8660     return y;
8661 }
8662
8663 /*********************************************************************
8664  *      lrint (MSVCR120.@)
8665  */
8666 __msvcrt_long CDECL lrint(double x)
8667 {
8668     double d;
8669
8670     d = rint(x);
8671     if ((d < 0 && d != (double)(__msvcrt_long)d)
8672             || (d >= 0 && d != (double)(__msvcrt_ulong)d)) {
8673         *_errno() = EDOM;
8674         return 0;
8675     }
8676     return d;
8677 }
8678
8679 /*********************************************************************
8680  *      lrintf (MSVCR120.@)
8681  */
8682 __msvcrt_long CDECL lrintf(float x)
8683 {
8684     float f;
8685
8686     f = rintf(x);
8687     if ((f < 0 && f != (float)(__msvcrt_long)f)
8688             || (f >= 0 && f != (float)(__msvcrt_ulong)f)) {
8689         *_errno() = EDOM;
8690         return 0;
8691     }
8692     return f;
8693 }
8694
8695 /*********************************************************************
8696  *      llrint (MSVCR120.@)
8697  */
8698 __int64 CDECL llrint(double x)
8699 {
8700     double d;
8701
8702     d = rint(x);
8703     if ((d < 0 && d != (double)(__int64)d)
8704             || (d >= 0 && d != (double)(unsigned __int64)d)) {
8705         *_errno() = EDOM;
8706         return 0;
8707     }
8708     return d;
8709 }
8710
8711 /*********************************************************************
8712  *      llrintf (MSVCR120.@)
8713  */
8714 __int64 CDECL llrintf(float x)
8715 {
8716     float f;
8717
8718     f = rintf(x);
8719     if ((f < 0 && f != (float)(__int64)f)
8720             || (f >= 0 && f != (float)(unsigned __int64)f)) {
8721         *_errno() = EDOM;
8722         return 0;
8723     }
8724     return f;
8725 }
8726
8727 /*********************************************************************
8728  *      round (MSVCR120.@)
8729  */
8730 double CDECL round(double x)
8731 {
8732     return __round(x);
8733 }
8734
8735 /*********************************************************************
8736  *      roundf (MSVCR120.@)
8737  *
8738  * Copied from musl: src/math/roundf.c
8739  */
8740 float CDECL roundf(float x)
8741 {
8742     static const float toint = 1 / FLT_EPSILON;
8743
8744     unsigned int ix = *(unsigned int*)&x;
8745     int e = ix >> 23 & 0xff;
8746     float y;
8747
8748     if (e >= 0x7f + 23)
8749         return x;
8750     if (ix >> 31)
8751         x = -x;
8752     if (e < 0x7f - 1)
8753         return 0 * *(float*)&ix;
8754     y = fp_barrierf(x + toint) - toint - x;
8755     if (y > 0.5f)
8756         y = y + x - 1;
8757     else if (y <= -0.5f)
8758         y = y + x + 1;
8759     else
8760         y = y + x;
8761     if (ix >> 31)
8762         y = -y;
8763     return y;
8764 }
8765
8766 /*********************************************************************
8767  *      lround (MSVCR120.@)
8768  *
8769  * Copied from musl: src/math/lround.c
8770  */
8771 __msvcrt_long CDECL lround(double x)
8772 {
8773     double d = round(x);
8774     if (d != (double)(__msvcrt_long)d) {
8775         *_errno() = EDOM;
8776         return 0;
8777     }
8778     return d;
8779 }
8780
8781 /*********************************************************************
8782  *      lroundf (MSVCR120.@)
8783  *
8784  * Copied from musl: src/math/lroundf.c
8785  */
8786 __msvcrt_long CDECL lroundf(float x)
8787 {
8788     float f = roundf(x);
8789     if (f != (float)(__msvcrt_long)f) {
8790         *_errno() = EDOM;
8791         return 0;
8792     }
8793     return f;
8794 }
8795
8796 /*********************************************************************
8797  *      llround (MSVCR120.@)
8798  *
8799  * Copied from musl: src/math/llround.c
8800  */
8801 __int64 CDECL llround(double x)
8802 {
8803     double d = round(x);
8804     if (d != (double)(__int64)d) {
8805         *_errno() = EDOM;
8806         return 0;
8807     }
8808     return d;
8809 }
8810
8811 /*********************************************************************
8812  *      llroundf (MSVCR120.@)
8813  *
8814  * Copied from musl: src/math/llroundf.c
8815  */
8816 __int64 CDECL llroundf(float x)
8817 {
8818     float f = roundf(x);
8819     if (f != (float)(__int64)f) {
8820         *_errno() = EDOM;
8821         return 0;
8822     }
8823     return f;
8824 }
8825
8826 /*********************************************************************
8827  *      trunc (MSVCR120.@)
8828  *
8829  * Copied from musl: src/math/trunc.c
8830  */
8831 double CDECL trunc(double x)
8832 {
8833     union {double f; UINT64 i;} u = {x};
8834     int e = (u.i >> 52 & 0x7ff) - 0x3ff + 12;
8835     UINT64 m;
8836
8837     if (e >= 52 + 12)
8838         return x;
8839     if (e < 12)
8840         e = 1;
8841     m = -1ULL >> e;
8842     if ((u.i & m) == 0)
8843         return x;
8844     u.i &= ~m;
8845     return u.f;
8846 }
8847
8848 /*********************************************************************
8849  *      truncf (MSVCR120.@)
8850  *
8851  * Copied from musl: src/math/truncf.c
8852  */
8853 float CDECL truncf(float x)
8854 {
8855     union {float f; UINT32 i;} u = {x};
8856     int e = (u.i >> 23 & 0xff) - 0x7f + 9;
8857     UINT32 m;
8858
8859     if (e >= 23 + 9)
8860         return x;
8861     if (e < 9)
8862         e = 1;
8863     m = -1U >> e;
8864     if ((u.i & m) == 0)
8865         return x;
8866     u.i &= ~m;
8867     return u.f;
8868 }
8869
8870 /*********************************************************************
8871  *      _dtest (MSVCR120.@)
8872  */
8873 short CDECL _dtest(double *x)
8874 {
8875     return _dclass(*x);
8876 }
8877
8878 /*********************************************************************
8879  *      _fdtest (MSVCR120.@)
8880  */
8881 short CDECL _fdtest(float *x)
8882 {
8883     return _fdclass(*x);
8884 }
8885
8886 static double erfc1(double x)
8887 {
8888     static const double erx  = 8.45062911510467529297e-01,
8889                  pa0  = -2.36211856075265944077e-03,
8890                  pa1  =  4.14856118683748331666e-01,
8891                  pa2  = -3.72207876035701323847e-01,
8892                  pa3  =  3.18346619901161753674e-01,
8893                  pa4  = -1.10894694282396677476e-01,
8894                  pa5  =  3.54783043256182359371e-02,
8895                  pa6  = -2.16637559486879084300e-03,
8896                  qa1  =  1.06420880400844228286e-01,
8897                  qa2  =  5.40397917702171048937e-01,
8898                  qa3  =  7.18286544141962662868e-02,
8899                  qa4  =  1.26171219808761642112e-01,
8900                  qa5  =  1.36370839120290507362e-02,
8901                  qa6  =  1.19844998467991074170e-02;
8902
8903     double s, P, Q;
8904
8905     s = fabs(x) - 1;
8906     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
8907     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
8908     return 1 - erx - P / Q;
8909 }
8910
8911 static double erfc2(UINT32 ix, double x)
8912 {
8913     static const double ra0  = -9.86494403484714822705e-03,
8914                  ra1  = -6.93858572707181764372e-01,
8915                  ra2  = -1.05586262253232909814e+01,
8916                  ra3  = -6.23753324503260060396e+01,
8917                  ra4  = -1.62396669462573470355e+02,
8918                  ra5  = -1.84605092906711035994e+02,
8919                  ra6  = -8.12874355063065934246e+01,
8920                  ra7  = -9.81432934416914548592e+00,
8921                  sa1  =  1.96512716674392571292e+01,
8922                  sa2  =  1.37657754143519042600e+02,
8923                  sa3  =  4.34565877475229228821e+02,
8924                  sa4  =  6.45387271733267880336e+02,
8925                  sa5  =  4.29008140027567833386e+02,
8926                  sa6  =  1.08635005541779435134e+02,
8927                  sa7  =  6.57024977031928170135e+00,
8928                  sa8  = -6.04244152148580987438e-02,
8929                  rb0  = -9.86494292470009928597e-03,
8930                  rb1  = -7.99283237680523006574e-01,
8931                  rb2  = -1.77579549177547519889e+01,
8932                  rb3  = -1.60636384855821916062e+02,
8933                  rb4  = -6.37566443368389627722e+02,
8934                  rb5  = -1.02509513161107724954e+03,
8935                  rb6  = -4.83519191608651397019e+02,
8936                  sb1  =  3.03380607434824582924e+01,
8937                  sb2  =  3.25792512996573918826e+02,
8938                  sb3  =  1.53672958608443695994e+03,
8939                  sb4  =  3.19985821950859553908e+03,
8940                  sb5  =  2.55305040643316442583e+03,
8941                  sb6  =  4.74528541206955367215e+02,
8942                  sb7  = -2.24409524465858183362e+01;
8943
8944     double s, R, S, z;
8945     UINT64 iz;
8946
8947     if (ix < 0x3ff40000) /* |x| < 1.25 */
8948         return erfc1(x);
8949
8950     x = fabs(x);
8951     s = 1 / (x * x);
8952     if (ix < 0x4006db6d) { /* |x| < 1/.35 ~ 2.85714 */
8953         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
8954                             (ra5 + s * (ra6 + s * ra7))))));
8955         S = 1.0 + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
8956                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
8957     } else { /* |x| > 1/.35 */
8958         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s *
8959                             (rb5 + s * rb6)))));
8960         S = 1.0 + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
8961                             (sb5 + s * (sb6 + s * sb7))))));
8962     }
8963     z = x;
8964     iz = *(ULONGLONG*)&z;
8965     iz &= 0xffffffff00000000ULL;
8966     z = *(double*)&iz;
8967     return exp(-z * z - 0.5625) * exp((z - x) * (z + x) + R / S) / x;
8968 }
8969
8970 /*********************************************************************
8971  *      erf (MSVCR120.@)
8972  */
8973 double CDECL erf(double x)
8974 {
8975     static const double efx8 =  1.02703333676410069053e+00,
8976                  pp0  =  1.28379167095512558561e-01,
8977                  pp1  = -3.25042107247001499370e-01,
8978                  pp2  = -2.84817495755985104766e-02,
8979                  pp3  = -5.77027029648944159157e-03,
8980                  pp4  = -2.37630166566501626084e-05,
8981                  qq1  =  3.97917223959155352819e-01,
8982                  qq2  =  6.50222499887672944485e-02,
8983                  qq3  =  5.08130628187576562776e-03,
8984                  qq4  =  1.32494738004321644526e-04,
8985                  qq5  = -3.96022827877536812320e-06;
8986
8987     double r, s, z, y;
8988     UINT32 ix;
8989     int sign;
8990
8991     ix = *(UINT64*)&x >> 32;
8992     sign = ix >> 31;
8993     ix &= 0x7fffffff;
8994     if (ix >= 0x7ff00000) {
8995         /* erf(nan)=nan, erf(+-inf)=+-1 */
8996         return 1 - 2 * sign + 1 / x;
8997     }
8998     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
8999         if (ix < 0x3e300000) { /* |x| < 2**-28 */
9000             /* avoid underflow */
9001             return 0.125 * (8 * x + efx8 * x);
9002         }
9003         z = x * x;
9004         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9005         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9006         y = r / s;
9007         return x + x * y;
9008     }
9009     if (ix < 0x40180000) /* 0.84375 <= |x| < 6 */
9010         y = 1 - erfc2(ix, x);
9011     else
9012         y = 1 - DBL_MIN;
9013     return sign ? -y : y;
9014 }
9015
9016 static float erfc1f(float x)
9017 {
9018     static const float erx  =  8.4506291151e-01,
9019                  pa0  = -2.3621185683e-03,
9020                  pa1  =  4.1485610604e-01,
9021                  pa2  = -3.7220788002e-01,
9022                  pa3  =  3.1834661961e-01,
9023                  pa4  = -1.1089469492e-01,
9024                  pa5  =  3.5478305072e-02,
9025                  pa6  = -2.1663755178e-03,
9026                  qa1  =  1.0642088205e-01,
9027                  qa2  =  5.4039794207e-01,
9028                  qa3  =  7.1828655899e-02,
9029                  qa4  =  1.2617121637e-01,
9030                  qa5  =  1.3637083583e-02,
9031                  qa6  =  1.1984500103e-02;
9032
9033     float s, P, Q;
9034
9035     s = fabsf(x) - 1;
9036     P = pa0 + s * (pa1 + s * (pa2 + s * (pa3 + s * (pa4 + s * (pa5 + s * pa6)))));
9037     Q = 1 + s * (qa1 + s * (qa2 + s * (qa3 + s * (qa4 + s * (qa5 + s * qa6)))));
9038     return 1 - erx - P / Q;
9039 }
9040
9041 static float erfc2f(UINT32 ix, float x)
9042 {
9043     static const float ra0  = -9.8649440333e-03,
9044                  ra1  = -6.9385856390e-01,
9045                  ra2  = -1.0558626175e+01,
9046                  ra3  = -6.2375331879e+01,
9047                  ra4  = -1.6239666748e+02,
9048                  ra5  = -1.8460508728e+02,
9049                  ra6  = -8.1287437439e+01,
9050                  ra7  = -9.8143291473e+00,
9051                  sa1  =  1.9651271820e+01,
9052                  sa2  =  1.3765776062e+02,
9053                  sa3  =  4.3456588745e+02,
9054                  sa4  =  6.4538726807e+02,
9055                  sa5  =  4.2900814819e+02,
9056                  sa6  =  1.0863500214e+02,
9057                  sa7  =  6.5702495575e+00,
9058                  sa8  = -6.0424413532e-02,
9059                  rb0  = -9.8649431020e-03,
9060                  rb1  = -7.9928326607e-01,
9061                  rb2  = -1.7757955551e+01,
9062                  rb3  = -1.6063638306e+02,
9063                  rb4  = -6.3756646729e+02,
9064                  rb5  = -1.0250950928e+03,
9065                  rb6  = -4.8351919556e+02,
9066                  sb1  =  3.0338060379e+01,
9067                  sb2  =  3.2579251099e+02,
9068                  sb3  =  1.5367296143e+03,
9069                  sb4  =  3.1998581543e+03,
9070                  sb5  =  2.5530502930e+03,
9071                  sb6  =  4.7452853394e+02,
9072                  sb7  = -2.2440952301e+01;
9073
9074     float s, R, S, z;
9075
9076     if (ix < 0x3fa00000) /* |x| < 1.25 */
9077         return erfc1f(x);
9078
9079     x = fabsf(x);
9080     s = 1 / (x * x);
9081     if (ix < 0x4036db6d) { /* |x| < 1/0.35 */
9082         R = ra0 + s * (ra1 + s * (ra2 + s * (ra3 + s * (ra4 + s *
9083                             (ra5 + s * (ra6 + s * ra7))))));
9084         S = 1.0f + s * (sa1 + s * (sa2 + s * (sa3 + s * (sa4 + s *
9085                             (sa5 + s * (sa6 + s * (sa7 + s * sa8)))))));
9086     } else { /* |x| >= 1/0.35 */
9087         R = rb0 + s * (rb1 + s * (rb2 + s * (rb3 + s * (rb4 + s * (rb5 + s * rb6)))));
9088         S = 1.0f + s * (sb1 + s * (sb2 + s * (sb3 + s * (sb4 + s *
9089                             (sb5 + s * (sb6 + s * sb7))))));
9090     }
9091
9092     ix = *(UINT32*)&x & 0xffffe000;
9093     z = *(float*)&ix;
9094     return expf(-z * z - 0.5625f) * expf((z - x) * (z + x) + R / S) / x;
9095 }
9096
9097 /*********************************************************************
9098  *      erff (MSVCR120.@)
9099  *
9100  * Copied from musl: src/math/erff.c
9101  */
9102 float CDECL erff(float x)
9103 {
9104     static const float efx8 =  1.0270333290e+00,
9105                  pp0  =  1.2837916613e-01,
9106                  pp1  = -3.2504209876e-01,
9107                  pp2  = -2.8481749818e-02,
9108                  pp3  = -5.7702702470e-03,
9109                  pp4  = -2.3763017452e-05,
9110                  qq1  =  3.9791721106e-01,
9111                  qq2  =  6.5022252500e-02,
9112                  qq3  =  5.0813062117e-03,
9113                  qq4  =  1.3249473704e-04,
9114                  qq5  = -3.9602282413e-06;
9115
9116     float r, s, z, y;
9117     UINT32 ix;
9118     int sign;
9119
9120     ix = *(UINT32*)&x;
9121     sign = ix >> 31;
9122     ix &= 0x7fffffff;
9123     if (ix >= 0x7f800000) {
9124         /* erf(nan)=nan, erf(+-inf)=+-1 */
9125         return 1 - 2 * sign + 1 / x;
9126     }
9127     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9128         if (ix < 0x31800000) { /* |x| < 2**-28 */
9129             /*avoid underflow */
9130             return 0.125f * (8 * x + efx8 * x);
9131         }
9132         z = x * x;
9133         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9134         s = 1 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9135         y = r / s;
9136         return x + x * y;
9137     }
9138     if (ix < 0x40c00000) /* |x| < 6 */
9139         y = 1 - erfc2f(ix, x);
9140     else
9141         y = 1 - FLT_MIN;
9142     return sign ? -y : y;
9143 }
9144
9145 /*********************************************************************
9146  *      erfc (MSVCR120.@)
9147  *
9148  * Copied from musl: src/math/erf.c
9149  */
9150 double CDECL erfc(double x)
9151 {
9152     static const double pp0  =  1.28379167095512558561e-01,
9153                  pp1  = -3.25042107247001499370e-01,
9154                  pp2  = -2.84817495755985104766e-02,
9155                  pp3  = -5.77027029648944159157e-03,
9156                  pp4  = -2.37630166566501626084e-05,
9157                  qq1  =  3.97917223959155352819e-01,
9158                  qq2  =  6.50222499887672944485e-02,
9159                  qq3  =  5.08130628187576562776e-03,
9160                  qq4  =  1.32494738004321644526e-04,
9161                  qq5  = -3.96022827877536812320e-06;
9162
9163     double r, s, z, y;
9164     UINT32 ix;
9165     int sign;
9166
9167     ix = *(ULONGLONG*)&x >> 32;
9168     sign = ix >> 31;
9169     ix &= 0x7fffffff;
9170     if (ix >= 0x7ff00000) {
9171         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9172         return 2 * sign + 1 / x;
9173     }
9174     if (ix < 0x3feb0000) { /* |x| < 0.84375 */
9175         if (ix < 0x3c700000) /* |x| < 2**-56 */
9176             return 1.0 - x;
9177         z = x * x;
9178         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9179         s = 1.0 + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9180         y = r / s;
9181         if (sign || ix < 0x3fd00000) { /* x < 1/4 */
9182             return 1.0 - (x + x * y);
9183         }
9184         return 0.5 - (x - 0.5 + x * y);
9185     }
9186     if (ix < 0x403c0000) { /* 0.84375 <= |x| < 28 */
9187         return sign ? 2 - erfc2(ix, x) : erfc2(ix, x);
9188     }
9189     if (sign)
9190         return 2 - DBL_MIN;
9191     *_errno() = ERANGE;
9192     return fp_barrier(DBL_MIN) * DBL_MIN;
9193 }
9194
9195 /*********************************************************************
9196  *      erfcf (MSVCR120.@)
9197  *
9198  * Copied from musl: src/math/erff.c
9199  */
9200 float CDECL erfcf(float x)
9201 {
9202     static const float pp0  =  1.2837916613e-01,
9203                  pp1  = -3.2504209876e-01,
9204                  pp2  = -2.8481749818e-02,
9205                  pp3  = -5.7702702470e-03,
9206                  pp4  = -2.3763017452e-05,
9207                  qq1  =  3.9791721106e-01,
9208                  qq2  =  6.5022252500e-02,
9209                  qq3  =  5.0813062117e-03,
9210                  qq4  =  1.3249473704e-04,
9211                  qq5  = -3.9602282413e-06;
9212
9213     float r, s, z, y;
9214     UINT32 ix;
9215     int sign;
9216
9217     ix = *(UINT32*)&x;
9218     sign = ix >> 31;
9219     ix &= 0x7fffffff;
9220     if (ix >= 0x7f800000) {
9221         /* erfc(nan)=nan, erfc(+-inf)=0,2 */
9222         return 2 * sign + 1 / x;
9223     }
9224
9225     if (ix < 0x3f580000) { /* |x| < 0.84375 */
9226         if (ix < 0x23800000) /* |x| < 2**-56 */
9227             return 1.0f - x;
9228         z = x * x;
9229         r = pp0 + z * (pp1 + z * (pp2 + z * (pp3 + z * pp4)));
9230         s = 1.0f + z * (qq1 + z * (qq2 + z * (qq3 + z * (qq4 + z * qq5))));
9231         y = r / s;
9232         if (sign || ix < 0x3e800000) /* x < 1/4 */
9233             return 1.0f - (x + x * y);
9234         return 0.5f - (x - 0.5f + x * y);
9235     }
9236     if (ix < 0x41e00000) { /* |x| < 28 */
9237         return sign ? 2 - erfc2f(ix, x) : erfc2f(ix, x);
9238     }
9239     if (sign)
9240         return 2 - FLT_MIN;
9241     *_errno() = ERANGE;
9242     return FLT_MIN * FLT_MIN;
9243 }
9244
9245 /*********************************************************************
9246  *      fmaxf (MSVCR120.@)
9247  */
9248 float CDECL fmaxf(float x, float y)
9249 {
9250     if(isnan(x))
9251         return y;
9252     if(isnan(y))
9253         return x;
9254     if(x==0 && y==0)
9255         return signbit(x) ? y : x;
9256     return x<y ? y : x;
9257 }
9258
9259 /*********************************************************************
9260  *      fmax (MSVCR120.@)
9261  */
9262 double CDECL fmax(double x, double y)
9263 {
9264     if(isnan(x))
9265         return y;
9266     if(isnan(y))
9267         return x;
9268     if(x==0 && y==0)
9269         return signbit(x) ? y : x;
9270     return x<y ? y : x;
9271 }
9272
9273 /*********************************************************************
9274  *      fdimf (MSVCR120.@)
9275  */
9276 float CDECL fdimf(float x, float y)
9277 {
9278     if(isnan(x))
9279         return x;
9280     if(isnan(y))
9281         return y;
9282     return x>y ? x-y : 0;
9283 }
9284
9285 /*********************************************************************
9286  *      fdim (MSVCR120.@)
9287  */
9288 double CDECL fdim(double x, double y)
9289 {
9290     if(isnan(x))
9291         return x;
9292     if(isnan(y))
9293         return y;
9294     return x>y ? x-y : 0;
9295 }
9296
9297 /*********************************************************************
9298  *      _fdsign (MSVCR120.@)
9299  */
9300 int CDECL _fdsign(float x)
9301 {
9302     union { float f; UINT32 i; } u = { x };
9303     return (u.i >> 16) & 0x8000;
9304 }
9305
9306 /*********************************************************************
9307  *      _dsign (MSVCR120.@)
9308  */
9309 int CDECL _dsign(double x)
9310 {
9311     union { double f; UINT64 i; } u = { x };
9312     return (u.i >> 48) & 0x8000;
9313 }
9314
9315
9316 /*********************************************************************
9317  *      _dpcomp (MSVCR120.@)
9318  */
9319 int CDECL _dpcomp(double x, double y)
9320 {
9321     if(isnan(x) || isnan(y))
9322         return 0;
9323
9324     if(x == y) return 2;
9325     return x < y ? 1 : 4;
9326 }
9327
9328 /*********************************************************************
9329  *      _fdpcomp (MSVCR120.@)
9330  */
9331 int CDECL _fdpcomp(float x, float y)
9332 {
9333     return _dpcomp(x, y);
9334 }
9335
9336 /*********************************************************************
9337  *      fminf (MSVCR120.@)
9338  */
9339 float CDECL fminf(float x, float y)
9340 {
9341     if(isnan(x))
9342         return y;
9343     if(isnan(y))
9344         return x;
9345     if(x==0 && y==0)
9346         return signbit(x) ? x : y;
9347     return x<y ? x : y;
9348 }
9349
9350 /*********************************************************************
9351  *      fmin (MSVCR120.@)
9352  */
9353 double CDECL fmin(double x, double y)
9354 {
9355     if(isnan(x))
9356         return y;
9357     if(isnan(y))
9358         return x;
9359     if(x==0 && y==0)
9360         return signbit(x) ? x : y;
9361     return x<y ? x : y;
9362 }
9363
9364 /*********************************************************************
9365  *      asinh (MSVCR120.@)
9366  *
9367  * Copied from musl: src/math/asinh.c
9368  */
9369 double CDECL asinh(double x)
9370 {
9371     UINT64 ux = *(UINT64*)&x;
9372     int e = ux >> 52 & 0x7ff;
9373     int s = ux >> 63;
9374
9375     /* |x| */
9376     ux &= (UINT64)-1 / 2;
9377     x = *(double*)&ux;
9378
9379     if (e >= 0x3ff + 26) /* |x| >= 0x1p26 or inf or nan */
9380         x = log(x) + 0.693147180559945309417232121458176568;
9381     else if (e >= 0x3ff + 1) /* |x| >= 2 */
9382         x = log(2 * x + 1 / (sqrt(x * x + 1) + x));
9383     else if (e >= 0x3ff - 26) /* |x| >= 0x1p-26 */
9384         x = log1p(x + x * x / (sqrt(x * x + 1) + 1));
9385     else /* |x| < 0x1p-26, raise inexact if x != 0 */
9386         fp_barrier(x + 0x1p120f);
9387     return s ? -x : x;
9388 }
9389
9390 /*********************************************************************
9391  *      asinhf (MSVCR120.@)
9392  *
9393  * Copied from musl: src/math/asinhf.c
9394  */
9395 float CDECL asinhf(float x)
9396 {
9397     UINT32 ux = *(UINT32*)&x;
9398     UINT32 i = ux & 0x7fffffff;
9399     int s = ux >> 31;
9400
9401     /* |x| */
9402     x = *(float*)&i;
9403
9404     if (i >= 0x3f800000 + (12 << 23))/* |x| >= 0x1p12 or inf or nan */
9405         x = logf(x) + 0.693147180559945309417232121458176568f;
9406     else if (i >= 0x3f800000 + (1 << 23)) /* |x| >= 2 */
9407         x = logf(2 * x + 1 / (sqrtf(x * x + 1) + x));
9408     else if (i >= 0x3f800000 - (12 << 23)) /* |x| >= 0x1p-12 */
9409         x = log1pf(x + x * x / (sqrtf(x * x + 1) + 1));
9410     else /* |x| < 0x1p-12, raise inexact if x!=0 */
9411         fp_barrierf(x + 0x1p120f);
9412     return s ? -x : x;
9413 }
9414
9415 /*********************************************************************
9416  *      acosh (MSVCR120.@)
9417  *
9418  * Copied from musl: src/math/acosh.c
9419  */
9420 double CDECL acosh(double x)
9421 {
9422     int e = *(UINT64*)&x >> 52 & 0x7ff;
9423
9424     if (x < 1)
9425     {
9426         *_errno() = EDOM;
9427         feraiseexcept(FE_INVALID);
9428         return NAN;
9429     }
9430
9431     if (e < 0x3ff + 1) /* |x| < 2, up to 2ulp error in [1,1.125] */
9432         return log1p(x - 1 + sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
9433     if (e < 0x3ff + 26) /* |x| < 0x1p26 */
9434         return log(2 * x - 1 / (x + sqrt(x * x - 1)));
9435     /* |x| >= 0x1p26 or nan */
9436     return log(x) + 0.693147180559945309417232121458176568;
9437 }
9438
9439 /*********************************************************************
9440  *      acoshf (MSVCR120.@)
9441  *
9442  * Copied from musl: src/math/acoshf.c
9443  */
9444 float CDECL acoshf(float x)
9445 {
9446     UINT32 a = *(UINT32*)&x & 0x7fffffff;
9447
9448     if (x < 1)
9449     {
9450         *_errno() = EDOM;
9451         feraiseexcept(FE_INVALID);
9452         return NAN;
9453     }
9454
9455     if (a < 0x3f800000 + (1 << 23)) /* |x| < 2, up to 2ulp error in [1,1.125] */
9456         return log1pf(x - 1 + sqrtf((x - 1) * (x - 1) + 2 * (x - 1)));
9457     if (*(UINT32*)&x < 0x3f800000 + (12 << 23)) /* 2 <= x < 0x1p12 */
9458         return logf(2 * x - 1 / (x + sqrtf(x * x - 1)));
9459     /* x >= 0x1p12 or x <= -2 or nan */
9460     return logf(x) + 0.693147180559945309417232121458176568f;
9461 }
9462
9463 /*********************************************************************
9464  *      atanh (MSVCR120.@)
9465  *
9466  * Copied from musl: src/math/atanh.c
9467  */
9468 double CDECL atanh(double x)
9469 {
9470     UINT64 ux = *(UINT64*)&x;
9471     int e = ux >> 52 & 0x7ff;
9472     int s = ux >> 63;
9473
9474     /* |x| */
9475     ux &= (UINT64)-1 / 2;
9476     x = *(double*)&ux;
9477
9478     if (x > 1) {
9479         *_errno() = EDOM;
9480         feraiseexcept(FE_INVALID);
9481         return NAN;
9482     }
9483
9484     if (e < 0x3ff - 1) {
9485         if (e < 0x3ff - 32) {
9486             fp_barrier(x + 0x1p120f);
9487             if (e == 0) /* handle underflow */
9488                 fp_barrier(x * x);
9489         } else { /* |x| < 0.5, up to 1.7ulp error */
9490             x = 0.5 * log1p(2 * x + 2 * x * x / (1 - x));
9491         }
9492     } else { /* avoid overflow */
9493         x = 0.5 * log1p(2 * (x / (1 - x)));
9494         if (isinf(x)) *_errno() = ERANGE;
9495     }
9496     return s ? -x : x;
9497 }
9498
9499 /*********************************************************************
9500  *      atanhf (MSVCR120.@)
9501  *
9502  * Copied from musl: src/math/atanhf.c
9503  */
9504 float CDECL atanhf(float x)
9505 {
9506     UINT32 ux = *(UINT32*)&x;
9507     int s = ux >> 31;
9508
9509     /* |x| */
9510     ux &= 0x7fffffff;
9511     x = *(float*)&ux;
9512
9513     if (x > 1) {
9514         *_errno() = EDOM;
9515         feraiseexcept(FE_INVALID);
9516         return NAN;
9517     }
9518
9519     if (ux < 0x3f800000 - (1 << 23)) {
9520         if (ux < 0x3f800000 - (32 << 23)) {
9521             fp_barrierf(x + 0x1p120f);
9522             if (ux < (1 << 23)) /* handle underflow */
9523                 fp_barrierf(x * x);
9524         } else { /* |x| < 0.5, up to 1.7ulp error */
9525             x = 0.5f * log1pf(2 * x + 2 * x * x / (1 - x));
9526         }
9527     } else { /* avoid overflow */
9528         x = 0.5f * log1pf(2 * (x / (1 - x)));
9529         if (isinf(x)) *_errno() = ERANGE;
9530     }
9531     return s ? -x : x;
9532 }
9533
9534 #endif /* _MSVCR_VER>=120 */
9535
9536 /*********************************************************************
9537  *      _scalb  (MSVCRT.@)
9538  *      scalbn  (MSVCR120.@)
9539  *      scalbln (MSVCR120.@)
9540  */
9541 double CDECL _scalb(double num, __msvcrt_long power)
9542 {
9543   return ldexp(num, power);
9544 }
9545
9546 /*********************************************************************
9547  *      _scalbf  (MSVCRT.@)
9548  *      scalbnf  (MSVCR120.@)
9549  *      scalblnf (MSVCR120.@)
9550  */
9551 float CDECL _scalbf(float num, __msvcrt_long power)
9552 {
9553   return ldexp(num, power);
9554 }
9555
9556 #if _MSVCR_VER>=120
9557
9558 /*********************************************************************
9559  *      remainder (MSVCR120.@)
9560  *
9561  * Copied from musl: src/math/remainder.c
9562  */
9563 double CDECL remainder(double x, double y)
9564 {
9565     int q;
9566 #if _MSVCR_VER == 120 && defined(__x86_64__)
9567     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9568 #endif
9569     return remquo(x, y, &q);
9570 }
9571
9572 /*********************************************************************
9573  *      remainderf (MSVCR120.@)
9574  *
9575  * Copied from musl: src/math/remainderf.c
9576  */
9577 float CDECL remainderf(float x, float y)
9578 {
9579     int q;
9580 #if _MSVCR_VER == 120 && defined(__x86_64__)
9581     if (isnan(x) || isnan(y)) *_errno() = EDOM;
9582 #endif
9583     return remquof(x, y, &q);
9584 }
9585
9586 /*********************************************************************
9587  *      remquo (MSVCR120.@)
9588  *
9589  * Copied from musl: src/math/remquo.c
9590  */
9591 double CDECL remquo(double x, double y, int *quo)
9592 {
9593     UINT64 uxi = *(UINT64*)&x;
9594     UINT64 uyi = *(UINT64*)&y;
9595     int ex = uxi >> 52 & 0x7ff;
9596     int ey = uyi >> 52 & 0x7ff;
9597     int sx = uxi >> 63;
9598     int sy = uyi >> 63;
9599     UINT32 q;
9600     UINT64 i;
9601
9602     *quo = 0;
9603     if (y == 0 || isinf(x)) *_errno() = EDOM;
9604     if (uyi << 1 == 0 || isnan(y) || ex == 0x7ff)
9605         return (x * y) / (x * y);
9606     if (uxi << 1 == 0)
9607         return x;
9608
9609     /* normalize x and y */
9610     if (!ex) {
9611         for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1);
9612         uxi <<= -ex + 1;
9613     } else {
9614         uxi &= -1ULL >> 12;
9615         uxi |= 1ULL << 52;
9616     }
9617     if (!ey) {
9618         for (i = uyi << 12; i >> 63 == 0; ey--, i <<= 1);
9619         uyi <<= -ey + 1;
9620     } else {
9621         uyi &= -1ULL >> 12;
9622         uyi |= 1ULL << 52;
9623     }
9624
9625     q = 0;
9626     if (ex < ey) {
9627         if (ex+1 == ey)
9628             goto end;
9629         return x;
9630     }
9631
9632     /* x mod y */
9633     for (; ex > ey; ex--) {
9634         i = uxi - uyi;
9635         if (i >> 63 == 0) {
9636             uxi = i;
9637             q++;
9638         }
9639         uxi <<= 1;
9640         q <<= 1;
9641     }
9642     i = uxi - uyi;
9643     if (i >> 63 == 0) {
9644         uxi = i;
9645         q++;
9646     }
9647     if (uxi == 0)
9648         ex = -60;
9649     else
9650         for (; uxi >> 52 == 0; uxi <<= 1, ex--);
9651 end:
9652     /* scale result and decide between |x| and |x|-|y| */
9653     if (ex > 0) {
9654         uxi -= 1ULL << 52;
9655         uxi |= (UINT64)ex << 52;
9656     } else {
9657         uxi >>= -ex + 1;
9658     }
9659     x = *(double*)&uxi;
9660     if (sy)
9661         y = -y;
9662     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9663         x -= y;
9664         q++;
9665     }
9666     q &= 0x7fffffff;
9667     *quo = sx ^ sy ? -(int)q : (int)q;
9668     return sx ? -x : x;
9669 }
9670
9671 /*********************************************************************
9672  *      remquof (MSVCR120.@)
9673  *
9674  * Copied from musl: src/math/remquof.c
9675  */
9676 float CDECL remquof(float x, float y, int *quo)
9677 {
9678     UINT32 uxi = *(UINT32*)&x;
9679     UINT32 uyi = *(UINT32*)&y;
9680     int ex = uxi >> 23 & 0xff;
9681     int ey = uyi >> 23 & 0xff;
9682     int sx = uxi >> 31;
9683     int sy = uyi>> 31;
9684     UINT32 q, i;
9685
9686     *quo = 0;
9687     if (y == 0 || isinf(x)) *_errno() = EDOM;
9688     if (uyi << 1 == 0 || isnan(y) || ex == 0xff)
9689         return (x * y) / (x * y);
9690     if (uxi << 1 == 0)
9691         return x;
9692
9693     /* normalize x and y */
9694     if (!ex) {
9695         for (i = uxi << 9; i >> 31 == 0; ex--, i <<= 1);
9696         uxi <<= -ex + 1;
9697     } else {
9698         uxi &= -1U >> 9;
9699         uxi |= 1U << 23;
9700     }
9701     if (!ey) {
9702         for (i = uyi << 9; i >> 31 == 0; ey--, i <<= 1);
9703         uyi <<= -ey + 1;
9704     } else {
9705         uyi &= -1U >> 9;
9706         uyi |= 1U << 23;
9707     }
9708
9709     q = 0;
9710     if (ex < ey) {
9711         if (ex + 1 == ey)
9712             goto end;
9713         return x;
9714     }
9715
9716     /* x mod y */
9717     for (; ex > ey; ex--) {
9718         i = uxi - uyi;
9719         if (i >> 31 == 0) {
9720             uxi = i;
9721             q++;
9722         }
9723         uxi <<= 1;
9724         q <<= 1;
9725     }
9726     i = uxi - uyi;
9727     if (i >> 31 == 0) {
9728         uxi = i;
9729         q++;
9730     }
9731     if (uxi == 0)
9732         ex = -30;
9733     else
9734         for (; uxi >> 23 == 0; uxi <<= 1, ex--);
9735 end:
9736     /* scale result and decide between |x| and |x|-|y| */
9737     if (ex > 0) {
9738         uxi -= 1U << 23;
9739         uxi |= (UINT32)ex << 23;
9740     } else {
9741         uxi >>= -ex + 1;
9742     }
9743     x = *(float*)&uxi;
9744     if (sy)
9745         y = -y;
9746     if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
9747         x -= y;
9748         q++;
9749     }
9750     q &= 0x7fffffff;
9751     *quo = sx ^ sy ? -(int)q : (int)q;
9752     return sx ? -x : x;
9753 }
9754
9755 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9756 static double sin_pi(double x)
9757 {
9758     int n;
9759
9760     /* spurious inexact if odd int */
9761     x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */
9762
9763     n = x * 4.0;
9764     n = (n + 1) / 2;
9765     x -= n * 0.5f;
9766     x *= M_PI;
9767
9768     switch (n) {
9769     default: /* case 4: */
9770     case 0: return __sin(x, 0.0, 0);
9771     case 1: return __cos(x, 0.0);
9772     case 2: return __sin(-x, 0.0, 0);
9773     case 3: return -__cos(x, 0.0);
9774     }
9775 }
9776
9777 /*********************************************************************
9778  *      lgamma (MSVCR120.@)
9779  *
9780  * Copied from musl: src/math/lgamma_r.c
9781  */
9782 double CDECL lgamma(double x)
9783 {
9784     static const double pi = 3.14159265358979311600e+00,
9785         a0 = 7.72156649015328655494e-02,
9786         a1 = 3.22467033424113591611e-01,
9787         a2 = 6.73523010531292681824e-02,
9788         a3 = 2.05808084325167332806e-02,
9789         a4 = 7.38555086081402883957e-03,
9790         a5 = 2.89051383673415629091e-03,
9791         a6 = 1.19270763183362067845e-03,
9792         a7 = 5.10069792153511336608e-04,
9793         a8 = 2.20862790713908385557e-04,
9794         a9 = 1.08011567247583939954e-04,
9795         a10 = 2.52144565451257326939e-05,
9796         a11 = 4.48640949618915160150e-05,
9797         tc = 1.46163214496836224576e+00,
9798         tf = -1.21486290535849611461e-01,
9799         tt = -3.63867699703950536541e-18,
9800         t0 = 4.83836122723810047042e-01,
9801         t1 = -1.47587722994593911752e-01,
9802         t2 = 6.46249402391333854778e-02,
9803         t3 = -3.27885410759859649565e-02,
9804         t4 = 1.79706750811820387126e-02,
9805         t5 = -1.03142241298341437450e-02,
9806         t6 = 6.10053870246291332635e-03,
9807         t7 = -3.68452016781138256760e-03,
9808         t8 = 2.25964780900612472250e-03,
9809         t9 = -1.40346469989232843813e-03,
9810         t10 = 8.81081882437654011382e-04,
9811         t11 = -5.38595305356740546715e-04,
9812         t12 = 3.15632070903625950361e-04,
9813         t13 = -3.12754168375120860518e-04,
9814         t14 = 3.35529192635519073543e-04,
9815         u0 = -7.72156649015328655494e-02,
9816         u1 = 6.32827064025093366517e-01,
9817         u2 = 1.45492250137234768737e+00,
9818         u3 = 9.77717527963372745603e-01,
9819         u4 = 2.28963728064692451092e-01,
9820         u5 = 1.33810918536787660377e-02,
9821         v1 = 2.45597793713041134822e+00,
9822         v2 = 2.12848976379893395361e+00,
9823         v3 = 7.69285150456672783825e-01,
9824         v4 = 1.04222645593369134254e-01,
9825         v5 = 3.21709242282423911810e-03,
9826         s0 = -7.72156649015328655494e-02,
9827         s1 = 2.14982415960608852501e-01,
9828         s2 = 3.25778796408930981787e-01,
9829         s3 = 1.46350472652464452805e-01,
9830         s4 = 2.66422703033638609560e-02,
9831         s5 = 1.84028451407337715652e-03,
9832         s6 = 3.19475326584100867617e-05,
9833         r1 = 1.39200533467621045958e+00,
9834         r2 = 7.21935547567138069525e-01,
9835         r3 = 1.71933865632803078993e-01,
9836         r4 = 1.86459191715652901344e-02,
9837         r5 = 7.77942496381893596434e-04,
9838         r6 = 7.32668430744625636189e-06,
9839         w0 = 4.18938533204672725052e-01,
9840         w1 = 8.33333333333329678849e-02,
9841         w2 = -2.77777777728775536470e-03,
9842         w3 = 7.93650558643019558500e-04,
9843         w4 = -5.95187557450339963135e-04,
9844         w5 = 8.36339918996282139126e-04,
9845         w6 = -1.63092934096575273989e-03;
9846
9847     union {double f; UINT64 i;} u = {x};
9848     double t, y, z, nadj, p, p1, p2, p3, q, r, w;
9849     UINT32 ix;
9850     int sign,i;
9851
9852     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
9853     sign = u.i >> 63;
9854     ix = u.i >> 32 & 0x7fffffff;
9855     if (ix >= 0x7ff00000)
9856         return x * x;
9857     if (ix < (0x3ff - 70) << 20) { /* |x|<2**-70, return -log(|x|) */
9858         if(sign)
9859             x = -x;
9860         return -log(x);
9861     }
9862     if (sign) {
9863         x = -x;
9864         t = sin_pi(x);
9865         if (t == 0.0) { /* -integer */
9866             *_errno() = ERANGE;
9867             return 1.0 / (x - x);
9868         }
9869         if (t <= 0.0)
9870             t = -t;
9871         nadj = log(pi / (t * x));
9872     }
9873
9874     /* purge off 1 and 2 */
9875     if ((ix == 0x3ff00000 || ix == 0x40000000) && (UINT32)u.i == 0)
9876         r = 0;
9877     /* for x < 2.0 */
9878     else if (ix < 0x40000000) {
9879         if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
9880             r = -log(x);
9881             if (ix >= 0x3FE76944) {
9882                 y = 1.0 - x;
9883                 i = 0;
9884             } else if (ix >= 0x3FCDA661) {
9885                 y = x - (tc - 1.0);
9886                 i = 1;
9887             } else {
9888                 y = x;
9889                 i = 2;
9890             }
9891         } else {
9892             r = 0.0;
9893             if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
9894                 y = 2.0 - x;
9895                 i = 0;
9896             } else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
9897                 y = x - tc;
9898                 i = 1;
9899             } else {
9900                 y = x - 1.0;
9901                 i = 2;
9902             }
9903         }
9904         switch (i) {
9905         case 0:
9906             z = y * y;
9907             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
9908             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
9909             p = y * p1 + p2;
9910             r += (p - 0.5 * y);
9911             break;
9912         case 1:
9913             z = y * y;
9914             w = z * y;
9915             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
9916             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
9917             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
9918             p = z * p1 - (tt - w * (p2 + y * p3));
9919             r += tf + p;
9920             break;
9921         case 2:
9922             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
9923             p2 = 1.0 + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
9924             r += -0.5 * y + p1 / p2;
9925         }
9926     } else if (ix < 0x40200000) { /* x < 8.0 */
9927         i = (int)x;
9928         y = x - (double)i;
9929         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
9930         q = 1.0 + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
9931         r = 0.5 * y + p / q;
9932         z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
9933         switch (i) {
9934         case 7: z *= y + 6.0; /* fall through */
9935         case 6: z *= y + 5.0; /* fall through */
9936         case 5: z *= y + 4.0; /* fall through */
9937         case 4: z *= y + 3.0; /* fall through */
9938         case 3:
9939             z *= y + 2.0;
9940             r += log(z);
9941             break;
9942         }
9943     } else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
9944         t = log(x);
9945         z = 1.0 / x;
9946         y = z * z;
9947         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
9948         r = (x - 0.5) * (t - 1.0) + w;
9949     } else /* 2**58 <= x <= inf */
9950         r = x * (log(x) - 1.0);
9951     if (sign)
9952         r = nadj - r;
9953     return r;
9954 }
9955
9956 /* sin(pi*x) assuming x > 2^-100, if sin(pi*x)==0 the sign is arbitrary */
9957 static float sinf_pi(float x)
9958 {
9959     double y;
9960     int n;
9961
9962     /* spurious inexact if odd int */
9963     x = 2 * (x * 0.5f - floorf(x * 0.5f)); /* x mod 2.0 */
9964
9965     n = (int)(x * 4);
9966     n = (n + 1) / 2;
9967     y = x - n * 0.5f;
9968     y *= M_PI;
9969     switch (n) {
9970     default: /* case 4: */
9971     case 0: return __sindf(y);
9972     case 1: return __cosdf(y);
9973     case 2: return __sindf(-y);
9974     case 3: return -__cosdf(y);
9975     }
9976 }
9977
9978 /*********************************************************************
9979  *      lgammaf (MSVCR120.@)
9980  *
9981  * Copied from musl: src/math/lgammaf_r.c
9982  */
9983 float CDECL lgammaf(float x)
9984 {
9985     static const float pi = 3.1415927410e+00,
9986         a0 = 7.7215664089e-02,
9987         a1 = 3.2246702909e-01,
9988         a2 = 6.7352302372e-02,
9989         a3 = 2.0580807701e-02,
9990         a4 = 7.3855509982e-03,
9991         a5 = 2.8905137442e-03,
9992         a6 = 1.1927076848e-03,
9993         a7 = 5.1006977446e-04,
9994         a8 = 2.2086278477e-04,
9995         a9 = 1.0801156895e-04,
9996         a10 = 2.5214456400e-05,
9997         a11 = 4.4864096708e-05,
9998         tc = 1.4616321325e+00,
9999         tf = -1.2148628384e-01,
10000         tt = 6.6971006518e-09,
10001         t0 = 4.8383611441e-01,
10002         t1 = -1.4758771658e-01,
10003         t2 = 6.4624942839e-02,
10004         t3 = -3.2788541168e-02,
10005         t4 = 1.7970675603e-02,
10006         t5 = -1.0314224288e-02,
10007         t6 = 6.1005386524e-03,
10008         t7 = -3.6845202558e-03,
10009         t8 = 2.2596477065e-03,
10010         t9 = -1.4034647029e-03,
10011         t10 = 8.8108185446e-04,
10012         t11 = -5.3859531181e-04,
10013         t12 = 3.1563205994e-04,
10014         t13 = -3.1275415677e-04,
10015         t14 = 3.3552918467e-04,
10016         u0 = -7.7215664089e-02,
10017         u1 = 6.3282704353e-01,
10018         u2 = 1.4549225569e+00,
10019         u3 = 9.7771751881e-01,
10020         u4 = 2.2896373272e-01,
10021         u5 = 1.3381091878e-02,
10022         v1 = 2.4559779167e+00,
10023         v2 = 2.1284897327e+00,
10024         v3 = 7.6928514242e-01,
10025         v4 = 1.0422264785e-01,
10026         v5 = 3.2170924824e-03,
10027         s0 = -7.7215664089e-02,
10028         s1 = 2.1498242021e-01,
10029         s2 = 3.2577878237e-01,
10030         s3 = 1.4635047317e-01,
10031         s4 = 2.6642270386e-02,
10032         s5 = 1.8402845599e-03,
10033         s6 = 3.1947532989e-05,
10034         r1 = 1.3920053244e+00,
10035         r2 = 7.2193557024e-01,
10036         r3 = 1.7193385959e-01,
10037         r4 = 1.8645919859e-02,
10038         r5 = 7.7794247773e-04,
10039         r6 = 7.3266842264e-06,
10040         w0 = 4.1893854737e-01,
10041         w1 = 8.3333335817e-02,
10042         w2 = -2.7777778450e-03,
10043         w3 = 7.9365057172e-04,
10044         w4 = -5.9518753551e-04,
10045         w5 = 8.3633989561e-04,
10046         w6 = -1.6309292987e-03;
10047
10048     union {float f; UINT32 i;} u = {x};
10049     float t, y, z, nadj, p, p1, p2, p3, q, r, w;
10050     UINT32 ix;
10051     int i, sign;
10052
10053     /* purge off +-inf, NaN, +-0, tiny and negative arguments */
10054     sign = u.i >> 31;
10055     ix = u.i & 0x7fffffff;
10056     if (ix >= 0x7f800000)
10057         return x * x;
10058     if (ix < 0x35000000) { /* |x| < 2**-21, return -log(|x|) */
10059         if (sign)
10060             x = -x;
10061         return -logf(x);
10062     }
10063     if (sign) {
10064         x = -x;
10065         t = sinf_pi(x);
10066         if (t == 0.0f) { /* -integer */
10067             *_errno() = ERANGE;
10068             return 1.0f / (x - x);
10069         }
10070         if (t <= 0.0f)
10071             t = -t;
10072         nadj = logf(pi / (t * x));
10073     }
10074
10075     /* purge off 1 and 2 */
10076     if (ix == 0x3f800000 || ix == 0x40000000)
10077         r = 0;
10078     /* for x < 2.0 */
10079     else if (ix < 0x40000000) {
10080         if (ix <= 0x3f666666) { /* lgamma(x) = lgamma(x+1)-log(x) */
10081             r = -logf(x);
10082             if (ix >= 0x3f3b4a20) {
10083                 y = 1.0f - x;
10084                 i = 0;
10085             } else if (ix >= 0x3e6d3308) {
10086                 y = x - (tc - 1.0f);
10087                 i = 1;
10088             } else {
10089                 y = x;
10090                 i = 2;
10091             }
10092         } else {
10093             r = 0.0f;
10094             if (ix >= 0x3fdda618) { /* [1.7316,2] */
10095                 y = 2.0f - x;
10096                 i = 0;
10097             } else if (ix >= 0x3F9da620) { /* [1.23,1.73] */
10098                 y = x - tc;
10099                 i = 1;
10100             } else {
10101                 y = x - 1.0f;
10102                 i = 2;
10103             }
10104         }
10105         switch(i) {
10106         case 0:
10107             z = y * y;
10108             p1 = a0 + z * (a2 + z * (a4 + z * (a6 + z * (a8 + z * a10))));
10109             p2 = z * (a1 + z * (a3 + z * (a5 + z * (a7 + z * (a9 + z * a11)))));
10110             p = y * p1 + p2;
10111             r += p - 0.5f * y;
10112             break;
10113         case 1:
10114             z = y * y;
10115             w = z * y;
10116             p1 = t0 + w * (t3 + w * (t6 + w * (t9 + w * t12))); /* parallel comp */
10117             p2 = t1 + w * (t4 + w * (t7 + w * (t10 + w * t13)));
10118             p3 = t2 + w * (t5 + w * (t8 + w * (t11 + w * t14)));
10119             p = z * p1 - (tt - w * (p2 + y * p3));
10120             r += (tf + p);
10121             break;
10122         case 2:
10123             p1 = y * (u0 + y * (u1 + y * (u2 + y * (u3 + y * (u4 + y * u5)))));
10124             p2 = 1.0f + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5))));
10125             r += -0.5f * y + p1 / p2;
10126         }
10127     } else if (ix < 0x41000000) { /* x < 8.0 */
10128         i = (int)x;
10129         y = x - (float)i;
10130         p = y * (s0 + y * (s1 + y * (s2 + y * (s3 + y * (s4 + y * (s5 + y * s6))))));
10131         q = 1.0f + y * (r1 + y * (r2 + y * (r3 + y * (r4 + y * (r5 + y * r6)))));
10132         r = 0.5f * y + p / q;
10133         z = 1.0f; /* lgamma(1+s) = log(s) + lgamma(s) */
10134         switch (i) {
10135         case 7: z *= y + 6.0f; /* fall through */
10136         case 6: z *= y + 5.0f; /* fall through */
10137         case 5: z *= y + 4.0f; /* fall through */
10138         case 4: z *= y + 3.0f; /* fall through */
10139         case 3:
10140             z *= y + 2.0f;
10141             r += logf(z);
10142             break;
10143         }
10144     } else if (ix < 0x5c800000) { /* 8.0 <= x < 2**58 */
10145         t = logf(x);
10146         z = 1.0f / x;
10147         y = z * z;
10148         w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6)))));
10149         r = (x - 0.5f) * (t - 1.0f) + w;
10150     } else /* 2**58 <= x <= inf */
10151         r = x * (logf(x) - 1.0f);
10152     if (sign)
10153         r = nadj - r;
10154     return r;
10155 }
10156
10157 static double tgamma_S(double x)
10158 {
10159     static const double Snum[] = {
10160         23531376880.410759688572007674451636754734846804940,
10161         42919803642.649098768957899047001988850926355848959,
10162         35711959237.355668049440185451547166705960488635843,
10163         17921034426.037209699919755754458931112671403265390,
10164         6039542586.3520280050642916443072979210699388420708,
10165         1439720407.3117216736632230727949123939715485786772,
10166         248874557.86205415651146038641322942321632125127801,
10167         31426415.585400194380614231628318205362874684987640,
10168         2876370.6289353724412254090516208496135991145378768,
10169         186056.26539522349504029498971604569928220784236328,
10170         8071.6720023658162106380029022722506138218516325024,
10171         210.82427775157934587250973392071336271166969580291,
10172         2.5066282746310002701649081771338373386264310793408,
10173     };
10174     static const double Sden[] = {
10175         0, 39916800, 120543840, 150917976, 105258076, 45995730, 13339535,
10176         2637558, 357423, 32670, 1925, 66, 1,
10177     };
10178
10179     double num = 0, den = 0;
10180     int i;
10181
10182     /* to avoid overflow handle large x differently */
10183     if (x < 8)
10184         for (i = ARRAY_SIZE(Snum) - 1; i >= 0; i--) {
10185             num = num * x + Snum[i];
10186             den = den * x + Sden[i];
10187         }
10188     else
10189         for (i = 0; i < ARRAY_SIZE(Snum); i++) {
10190             num = num / x + Snum[i];
10191             den = den / x + Sden[i];
10192         }
10193     return num / den;
10194 }
10195
10196 /*********************************************************************
10197  *      tgamma (MSVCR120.@)
10198  *
10199  * Copied from musl: src/math/tgamma.c
10200  */
10201 double CDECL tgamma(double x)
10202 {
10203     static const double gmhalf = 5.524680040776729583740234375;
10204     static const double fact[] = {
10205         1, 1, 2, 6, 24, 120, 720, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0,
10206         479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0,
10207         355687428096000.0, 6402373705728000.0, 121645100408832000.0,
10208         2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0,
10209     };
10210
10211     union {double f; UINT64 i;} u = {x};
10212     double absx, y, dy, z, r;
10213     UINT32 ix = u.i >> 32 & 0x7fffffff;
10214     int sign = u.i >> 63;
10215
10216     /* special cases */
10217     if (ix >= 0x7ff00000) {
10218         /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */
10219         if (u.i == 0xfff0000000000000ULL)
10220             *_errno() = EDOM;
10221         return x + INFINITY;
10222     }
10223     if (ix < (0x3ff - 54) << 20) {
10224         /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */
10225         if (x == 0.0)
10226             *_errno() = ERANGE;
10227         return 1 / x;
10228     }
10229
10230     /* integer arguments */
10231     /* raise inexact when non-integer */
10232     if (x == floor(x)) {
10233         if (sign) {
10234             *_errno() = EDOM;
10235             return 0 / (x - x);
10236         }
10237         if (x <= ARRAY_SIZE(fact))
10238             return fact[(int)x - 1];
10239     }
10240
10241     /* x >= 172: tgamma(x)=inf with overflow */
10242     /* x =< -184: tgamma(x)=+-0 with underflow */
10243     if (ix >= 0x40670000) { /* |x| >= 184 */
10244         *_errno() = ERANGE;
10245         if (sign) {
10246             fp_barrierf(0x1p-126 / x);
10247             return 0;
10248         }
10249         x *= 0x1p1023;
10250         return x;
10251     }
10252
10253     absx = sign ? -x : x;
10254
10255     /* handle the error of x + g - 0.5 */
10256     y = absx + gmhalf;
10257     if (absx > gmhalf) {
10258         dy = y - absx;
10259         dy -= gmhalf;
10260     } else {
10261         dy = y - gmhalf;
10262         dy -= absx;
10263     }
10264
10265     z = absx - 0.5;
10266     r = tgamma_S(absx) * exp(-y);
10267     if (x < 0) {
10268         /* reflection formula for negative x */
10269         /* sinpi(absx) is not 0, integers are already handled */
10270         r = -M_PI / (sin_pi(absx) * absx * r);
10271         dy = -dy;
10272         z = -z;
10273     }
10274     r += dy * (gmhalf + 0.5) * r / y;
10275     z = pow(y, 0.5 * z);
10276     y = r * z * z;
10277     return y;
10278 }
10279
10280 /*********************************************************************
10281  *      tgammaf (MSVCR120.@)
10282  *
10283  * Copied from musl: src/math/tgammaf.c
10284  */
10285 float CDECL tgammaf(float x)
10286 {
10287     return tgamma(x);
10288 }
10289
10290 /*********************************************************************
10291  *      nan (MSVCR120.@)
10292  */
10293 double CDECL nan(const char *tagp)
10294 {
10295     /* Windows ignores input (MSDN) */
10296     return NAN;
10297 }
10298
10299 /*********************************************************************
10300  *      nanf (MSVCR120.@)
10301  */
10302 float CDECL nanf(const char *tagp)
10303 {
10304     return NAN;
10305 }
10306
10307 /*********************************************************************
10308  *      _except1 (MSVCR120.@)
10309  *  TODO:
10310  *   - find meaning of ignored cw and operation bits
10311  *   - unk parameter
10312  */
10313 double CDECL _except1(DWORD fpe, _FP_OPERATION_CODE op, double arg, double res, DWORD cw, void *unk)
10314 {
10315     ULONG_PTR exception_arg;
10316     DWORD exception = 0;
10317     unsigned int fpword = 0;
10318     WORD operation;
10319     int raise = 0;
10320
10321     TRACE("(%lx %x %lf %lf %lx %p)\n", fpe, op, arg, res, cw, unk);
10322
10323 #ifdef _WIN64
10324     cw = ((cw >> 7) & 0x3f) | ((cw >> 3) & 0xc00);
10325 #endif
10326     operation = op << 5;
10327     exception_arg = (ULONG_PTR)&operation;
10328
10329     if (fpe & 0x1) { /* overflow */
10330         if ((fpe == 0x1 && (cw & 0x8)) || (fpe==0x11 && (cw & 0x28))) {
10331             /* 32-bit version also sets SW_INEXACT here */
10332             raise |= FE_OVERFLOW;
10333             if (fpe & 0x10) raise |= FE_INEXACT;
10334             res = signbit(res) ? -INFINITY : INFINITY;
10335         } else {
10336             exception = EXCEPTION_FLT_OVERFLOW;
10337         }
10338     } else if (fpe & 0x2) { /* underflow */
10339         if ((fpe == 0x2 && (cw & 0x10)) || (fpe==0x12 && (cw & 0x30))) {
10340             raise |= FE_UNDERFLOW;
10341             if (fpe & 0x10) raise |= FE_INEXACT;
10342             res = signbit(res) ? -0.0 : 0.0;
10343         } else {
10344             exception = EXCEPTION_FLT_UNDERFLOW;
10345         }
10346     } else if (fpe & 0x4) { /* zerodivide */
10347         if ((fpe == 0x4 && (cw & 0x4)) || (fpe==0x14 && (cw & 0x24))) {
10348             raise |= FE_DIVBYZERO;
10349             if (fpe & 0x10) raise |= FE_INEXACT;
10350         } else {
10351             exception = EXCEPTION_FLT_DIVIDE_BY_ZERO;
10352         }
10353     } else if (fpe & 0x8) { /* invalid */
10354         if (fpe == 0x8 && (cw & 0x1)) {
10355             raise |= FE_INVALID;
10356         } else {
10357             exception = EXCEPTION_FLT_INVALID_OPERATION;
10358         }
10359     } else if (fpe & 0x10) { /* inexact */
10360         if (fpe == 0x10 && (cw & 0x20)) {
10361             raise |= FE_INEXACT;
10362         } else {
10363             exception = EXCEPTION_FLT_INEXACT_RESULT;
10364         }
10365     }
10366
10367     if (exception)
10368         raise = 0;
10369     feraiseexcept(raise);
10370     if (exception)
10371         RaiseException(exception, 0, 1, &exception_arg);
10372
10373     if (cw & 0x1) fpword |= _EM_INVALID;
10374     if (cw & 0x2) fpword |= _EM_DENORMAL;
10375     if (cw & 0x4) fpword |= _EM_ZERODIVIDE;
10376     if (cw & 0x8) fpword |= _EM_OVERFLOW;
10377     if (cw & 0x10) fpword |= _EM_UNDERFLOW;
10378     if (cw & 0x20) fpword |= _EM_INEXACT;
10379     switch (cw & 0xc00)
10380     {
10381         case 0xc00: fpword |= _RC_UP|_RC_DOWN; break;
10382         case 0x800: fpword |= _RC_UP; break;
10383         case 0x400: fpword |= _RC_DOWN; break;
10384     }
10385     switch (cw & 0x300)
10386     {
10387         case 0x0:   fpword |= _PC_24; break;
10388         case 0x200: fpword |= _PC_53; break;
10389         case 0x300: fpword |= _PC_64; break;
10390     }
10391     if (cw & 0x1000) fpword |= _IC_AFFINE;
10392     _setfp(&fpword, _MCW_EM | _MCW_RC | _MCW_PC | _MCW_IC, NULL, 0);
10393
10394     return res;
10395 }
10396
10397 _Dcomplex* CDECL _Cbuild(_Dcomplex *ret, double r, double i)
10398 {
10399     ret->_Val[0] = r;
10400     ret->_Val[1] = i;
10401     return ret;
10402 }
10403
10404 double CDECL MSVCR120_creal(_Dcomplex z)
10405 {
10406     return z._Val[0];
10407 }
10408
10409 /*********************************************************************
10410  *      ilogb (MSVCR120.@)
10411  */
10412 int CDECL ilogb(double x)
10413 {
10414     return __ilogb(x);
10415 }
10416
10417 /*********************************************************************
10418  *      ilogbf (MSVCR120.@)
10419  */
10420 int CDECL ilogbf(float x)
10421 {
10422     return __ilogbf(x);
10423 }
10424 #endif /* _MSVCR_VER>=120 */